diff --git a/.clang-format b/.clang-format
index 443f90b774d..84552f330bb 100755
--- a/.clang-format
+++ b/.clang-format
@@ -1,3 +1,6 @@
+# Copyright 2016-2021 The Khronos Group Inc.
+# SPDX-License-Identifier: Apache-2.0
+
 # The style used for all options not specifically set in the configuration.
 BasedOnStyle: LLVM
 
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 00000000000..31fd1149349
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,70 @@
+# Copyright 2020-2021 The Khronos Group, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+name: CI
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    types: [ opened, synchronize, reopened ]
+
+jobs:
+  build:
+    name: "Build ${{ matrix.platform }}"
+    strategy:
+      matrix:
+        platform: [windows-latest, ubuntu-18.04, ubuntu-20.04, macos-latest]
+    env:
+      PARALLEL: -j 2
+
+    runs-on: "${{ matrix.platform }}"
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+
+      - name: Pull glslang / SPIRV-Tools
+        shell: bash
+        working-directory: ${{github.workspace}}
+        run: ./checkout_glslang_spirv_tools.sh
+
+      - name: Build glslang / SPIRV-Tools
+        shell: bash
+        working-directory: ${{github.workspace}}
+        run: ./build_glslang_spirv_tools.sh Release
+
+      - name: Configure SPIRV-Cross
+        shell: bash
+        run: |
+          mkdir build
+          cd build
+          cmake .. -DSPIRV_CROSS_WERROR=ON -DSPIRV_CROSS_MISC_WARNINGS=ON -DSPIRV_CROSS_SHARED=ON -DCMAKE_INSTALL_PREFIX=output -DCMAKE_BUILD_TYPE=Release -DSPIRV_CROSS_ENABLE_TESTS=ON
+
+      - name: Build SPIRV-Cross
+        shell: bash
+        working-directory: ${{github.workspace}}/build
+        run: |
+          cmake --build . --config Release
+          cmake --build . --config Release --target install
+
+      - name: Test SPIRV-Cross
+        shell: bash
+        working-directory: ${{github.workspace}}/build
+        run: ctest --verbose -C Release
+  reuse:
+    name: "REUSE license check"
+    runs-on: ubuntu-latest
+    container: khronosgroup/docker-images:asciidoctor-spec
+
+    steps:
+      - uses: actions/checkout@v2
+
+      # REUSE license checker
+      - name: license-check
+        run: |
+          reuse lint
+
+
diff --git a/.gitignore b/.gitignore
index abd71895838..8e91b32a386 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+# Copyright 2016-2021 The Khronos Group Inc.
+# SPDX-License-Identifier: Apache-2.0
+
 *.o
 *.d
 *.txt
@@ -18,3 +21,4 @@
 *.vcxproj.user
 
 !CMakeLists.txt
+!LICENSES/*.txt
diff --git a/.reuse/dep5 b/.reuse/dep5
new file mode 100644
index 00000000000..9ed4191305b
--- /dev/null
+++ b/.reuse/dep5
@@ -0,0 +1,11 @@
+Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: SPIRV-Cross
+Source: https://github.com/KhronosGroup/SPIRV-Cross
+
+Files: shaders*/* reference/* tests-other/*
+Copyright: 2016-2021 The Khronos Group, Inc.
+License: Apache-2.0
+
+Files: spirv.h spirv.hpp GLSL.std.450.h
+Copyright: 2016-2021 The Khronos Group, Inc.
+License: MIT
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 46b92c8fc8e..00000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,72 +0,0 @@
-language:
-  - cpp
-  - python
-
-python: 3.7
-
-matrix:
-  include:
-    - os: linux
-      dist: trusty
-      compiler: gcc
-      env:
-        - GENERATOR="Unix Makefiles"
-        - ARTIFACT=gcc-trusty-64bit
-    - os: linux
-      dist: trusty
-      compiler: clang
-      env:
-        - GENERATOR="Unix Makefiles"
-        - ARTIFACT=clang-trusty-64bit
-    - os: osx
-      compiler: clang
-      osx_image: xcode10
-      env:
-        - GENERATOR="Unix Makefiles"
-        - ARTIFACT=clang-macos-64bit
-    - os: windows
-      before_install:
-        - choco install python3
-        - export PATH="/c/Python37:/c/Python37/Scripts:$PATH"
-      env:
-        - GENERATOR="Visual Studio 15 2017"
-        - ARTIFACT=vs2017-32bit
-    - os: windows
-      before_install:
-        - choco install python3
-        - export PATH="/c/Python37:/c/Python37/Scripts:$PATH"
-      env:
-        - GENERATOR="Visual Studio 15 2017 Win64"
-        - ARTIFACT=vs2017-64bit
-
-before_script:
-  - "./checkout_glslang_spirv_tools.sh"
-
-script:
-  - if [[ "$TRAVIS_OS_NAME" == "windows" ]]; then PYTHON3=$(which python); fi
-  - if [[ "$TRAVIS_OS_NAME" != "windows" ]]; then PYTHON3=$(which python3); fi
-  - "./build_glslang_spirv_tools.sh Release"
-  - mkdir build
-  - cd build
-  - cmake .. -DSPIRV_CROSS_SHARED=ON -DCMAKE_INSTALL_PREFIX=output -DCMAKE_BUILD_TYPE=Release -G "${GENERATOR}" -DPYTHON_EXECUTABLE:FILEPATH="${PYTHON3}" -DSPIRV_CROSS_ENABLE_TESTS=ON
-  - cmake --build . --config Release
-  - cmake --build . --config Release --target install
-  - ctest --verbose -C Release
-  - cd ..
-
-before_deploy:
-  - REV=${ARTIFACT}-$(git rev-parse --short=10 HEAD)
-  - cd build/output
-  - tar cf spirv-cross-${REV}.tar *
-  - gzip spirv-cross-${REV}.tar
-  - cd ../..
-  - export FILE_TO_UPLOAD=build/output/spirv-cross-${REV}.tar.gz
-
-deploy:
-  provider: releases
-  api_key:
-    secure: c7YEOyzhE19TFo76UnbLWk/kikRQxsHsOxzkOqN6Q2aL8joNRw5kmcG84rGd+Rf6isX62cykCzA6qHkyJCv9QTIzcyXnLju17rLvgib7cXDcseaq8x4mFvet2yUxCglthDpFY2M2LB0Aqws71lPeYIrKXa6hCFEh8jO3AWxnaor7O3RYfNZylM9d33HgH6KLT3sDx/cukwBstmKeg7EG9OUnrSvairkPW0W2+jlq3SXPlq/WeVhf8hQs3Yg0BluExGbmLOwe9EaeUpeGuJMyHRxXypnToQv1/KwoScKpap5tYxdNWiwRGZ4lYcmKrjAYVvilTioh654oX5LQpn34mE/oe8Ko9AaATkSaoiisRFp6meWtnB39oFBoL5Yn15DqLQpRXPr1AJsnBXSGAac3aDBO1j4MIqTHmYlYlfRw3n2ZsBaFaTZnv++438SNQ54nkivyoDTIWjoOmYa9+K4mQc3415RDdQmjZTJM+lu+GAlMmNBTVbfNvrbU55Usu9Lo6BZJKKdUMvdBB78kJ5FHvcBlL+eMgmk1pABQY0IZROCt7NztHcv1UmAxoWNxveSFs5glydPNNjNS8bogc4dzBGYG0KMmILbBHihVbY2toA1M9CMdDHdp+LucfDMmzECmYSEmlx0h8win+Jjb74/qpOhaXuUZ0NnzVgCOyeUYuMQ=
-  file: "${FILE_TO_UPLOAD}"
-  skip_cleanup: true
-  on:
-    tags: true
diff --git a/CMakeLists.txt b/CMakeLists.txt
index aa33262ac8c..ccaedc90c8c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,5 @@
-# Copyright 2016 Google Inc.
+# Copyright 2016-2021 Google Inc.
+# SPDX-License-Identifier: Apache-2.0 OR MIT
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,11 +13,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-cmake_minimum_required(VERSION 2.8)
+#
+# At your option, you may choose to accept this material under either:
+#  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+#  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+#
+
+cmake_minimum_required(VERSION 3.0)
 set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Avoid a warning if parent project sets VERSION in project().
+if (${CMAKE_VERSION} VERSION_GREATER "3.0.1")
+	cmake_policy(SET CMP0048 NEW)
+endif()
+
 project(SPIRV-Cross LANGUAGES CXX C)
 enable_testing()
 
+include(GNUInstallDirs)
+
 option(SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS "Instead of throwing exceptions assert" OFF)
 option(SPIRV_CROSS_SHARED "Build the C API as a single shared library." OFF)
 option(SPIRV_CROSS_STATIC "Build the C and C++ API as static libraries." ON)
@@ -39,6 +55,13 @@ option(SPIRV_CROSS_SANITIZE_UNDEFINED "Sanitize undefined" OFF)
 option(SPIRV_CROSS_NAMESPACE_OVERRIDE "" "Override the namespace used in the C++ API.")
 option(SPIRV_CROSS_FORCE_STL_TYPES "Force use of STL types instead of STL replacements in certain places. Might reduce performance." OFF)
 
+option(SPIRV_CROSS_SKIP_INSTALL "Skips installation targets." OFF)
+
+option(SPIRV_CROSS_WERROR "Fail build on warnings." OFF)
+option(SPIRV_CROSS_MISC_WARNINGS "Misc warnings useful for Travis runs." OFF)
+
+option(SPIRV_CROSS_FORCE_PIC "Force position-independent code for all targets." OFF)
+
 if(${CMAKE_GENERATOR} MATCHES "Makefile")
 	if(${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
 		message(FATAL_ERROR "Build out of tree to avoid overwriting Makefile")
@@ -49,7 +72,7 @@ set(spirv-compiler-options "")
 set(spirv-compiler-defines "")
 set(spirv-cross-link-flags "")
 
-message(STATUS "Finding Git version for SPIRV-Cross.")
+message(STATUS "SPIRV-Cross: Finding Git version for SPIRV-Cross.")
 set(spirv-cross-build-version "unknown")
 find_package(Git)
 if (GIT_FOUND)
@@ -60,24 +83,39 @@ if (GIT_FOUND)
 		ERROR_QUIET
 		OUTPUT_STRIP_TRAILING_WHITESPACE
 		)
-	message(STATUS "Git hash: ${spirv-cross-build-version}")
+	message(STATUS "SPIRV-Cross: Git hash: ${spirv-cross-build-version}")
 else()
-	message(STATUS "Git not found, using unknown build version.")
+	message(STATUS "SPIRV-Cross: Git not found, using unknown build version.")
 endif()
 
 string(TIMESTAMP spirv-cross-timestamp)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/gitversion.in.h ${CMAKE_CURRENT_BINARY_DIR}/gitversion.h @ONLY)
 
-if(SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS)
+if (SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS)
 	set(spirv-compiler-defines ${spirv-compiler-defines} SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS)
+	if (NOT MSVC)
+		set(spirv-compiler-options ${spirv-compiler-options} -fno-exceptions)
+	endif()
 endif()
 
-if(SPIRV_CROSS_FORCE_STL_TYPES)
+if (SPIRV_CROSS_FORCE_STL_TYPES)
 	set(spirv-compiler-defines ${spirv-compiler-defines} SPIRV_CROSS_FORCE_STL_TYPES)
 endif()
 
-if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang"))
-	set(spirv-compiler-options ${spirv-compiler-options} -Wall -Wextra -Werror -Wshadow)
+if (WIN32)
+	set(CMAKE_DEBUG_POSTFIX "d")
+endif()
+
+if (CMAKE_COMPILER_IS_GNUCXX OR ((${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") AND NOT MSVC))
+	set(spirv-compiler-options ${spirv-compiler-options} -Wall -Wextra -Wshadow -Wno-deprecated-declarations)
+	if (SPIRV_CROSS_MISC_WARNINGS)
+		if (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
+			set(spirv-compiler-options ${spirv-compiler-options} -Wshorten-64-to-32)
+		endif()
+	endif()
+	if (SPIRV_CROSS_WERROR)
+		set(spirv-compiler-options ${spirv-compiler-options} -Werror)
+	endif()
 
 	if (SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS)
 		set(spirv-compiler-options ${spirv-compiler-options} -fno-exceptions)
@@ -103,7 +141,8 @@ if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang"))
 		set(spirv-cross-link-flags "${spirv-cross-link-flags} -fsanitize=thread")
 	endif()
 elseif (MSVC)
-	set(spirv-compiler-options ${spirv-compiler-options} /wd4267)
+	# AppVeyor spuriously fails in debug build on older MSVC without /bigobj.
+	set(spirv-compiler-options ${spirv-compiler-options} /wd4267 /wd4996 $<$<CONFIG:DEBUG>:/bigobj>)
 endif()
 
 macro(extract_headers out_abs file_list)
@@ -130,9 +169,12 @@ macro(spirv_cross_add_library name config_name library_type)
 	extract_headers(hdrs "${ARGN}")
 	target_include_directories(${name} PUBLIC
 			$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
-			$<INSTALL_INTERFACE:include/spirv_cross>)
+			$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/spirv_cross>)
 	set_target_properties(${name} PROPERTIES
 			PUBLIC_HEADERS "${hdrs}")
+	if (SPIRV_CROSS_FORCE_PIC)
+		set_target_properties(${name} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+	endif()
 	target_compile_options(${name} PRIVATE ${spirv-compiler-options})
 	target_compile_definitions(${name} PRIVATE ${spirv-compiler-defines})
 	if (SPIRV_CROSS_NAMESPACE_OVERRIDE)
@@ -142,15 +184,18 @@ macro(spirv_cross_add_library name config_name library_type)
 			target_compile_definitions(${name} PRIVATE SPIRV_CROSS_NAMESPACE_OVERRIDE=${SPIRV_CROSS_NAMESPACE_OVERRIDE})
 		endif()
 	endif()
-	install(TARGETS ${name}
+
+	if (NOT SPIRV_CROSS_SKIP_INSTALL)
+		install(TARGETS ${name}
 			EXPORT ${config_name}Config
-			RUNTIME DESTINATION bin
-			LIBRARY DESTINATION lib
-			ARCHIVE DESTINATION lib
-			PUBLIC_HEADER DESTINATION include/spirv_cross)
-	install(FILES ${hdrs} DESTINATION include/spirv_cross)
-	install(EXPORT ${config_name}Config DESTINATION share/${config_name}/cmake)
-	export(TARGETS ${name} FILE ${config_name}Config.cmake)
+			RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+			LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+			ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+			PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/spirv_cross)
+		install(FILES ${hdrs} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/spirv_cross)
+		install(EXPORT ${config_name}Config DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${config_name}/cmake)
+		export(TARGETS ${name} FILE ${config_name}Config.cmake)
+	endif()
 endmacro()
 
 set(spirv-cross-core-sources
@@ -287,17 +332,18 @@ if (SPIRV_CROSS_STATIC)
 endif()
 
 set(spirv-cross-abi-major 0)
-set(spirv-cross-abi-minor 16)
+set(spirv-cross-abi-minor 54)
 set(spirv-cross-abi-patch 0)
 
 if (SPIRV_CROSS_SHARED)
 	set(SPIRV_CROSS_VERSION ${spirv-cross-abi-major}.${spirv-cross-abi-minor}.${spirv-cross-abi-patch})
-	set(SPIRV_CROSS_INSTALL_LIB_DIR ${CMAKE_INSTALL_PREFIX}/lib)
-	set(SPIRV_CROSS_INSTALL_INC_DIR ${CMAKE_INSTALL_PREFIX}/include/spirv_cross)
-	configure_file(
+
+	if (NOT SPIRV_CROSS_SKIP_INSTALL)
+		configure_file(
 			${CMAKE_CURRENT_SOURCE_DIR}/pkg-config/spirv-cross-c-shared.pc.in
 			${CMAKE_CURRENT_BINARY_DIR}/spirv-cross-c-shared.pc @ONLY)
-	install(FILES ${CMAKE_CURRENT_BINARY_DIR}/spirv-cross-c-shared.pc DESTINATION ${CMAKE_INSTALL_PREFIX}/share/pkgconfig)
+		install(FILES ${CMAKE_CURRENT_BINARY_DIR}/spirv-cross-c-shared.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
+	endif()
 
 	spirv_cross_add_library(spirv-cross-c-shared spirv_cross_c_shared SHARED
 			${spirv-cross-core-sources}
@@ -395,7 +441,9 @@ if (SPIRV_CROSS_CLI)
 	target_include_directories(spirv-cross PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
 	target_compile_definitions(spirv-cross PRIVATE ${spirv-compiler-defines} HAVE_SPIRV_CROSS_GIT_VERSION)
 	set_target_properties(spirv-cross PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}")
-	install(TARGETS spirv-cross RUNTIME DESTINATION bin)
+	if (NOT SPIRV_CROSS_SKIP_INSTALL)
+		install(TARGETS spirv-cross DESTINATION ${CMAKE_INSTALL_BINDIR})
+	endif()
 	target_link_libraries(spirv-cross PRIVATE
 			spirv-cross-glsl
 			spirv-cross-hlsl
@@ -427,14 +475,14 @@ if (SPIRV_CROSS_CLI)
 
 		if ((${spirv-cross-glslang} MATCHES "NOTFOUND") OR (${spirv-cross-spirv-as} MATCHES "NOTFOUND") OR (${spirv-cross-spirv-val} MATCHES "NOTFOUND") OR (${spirv-cross-spirv-opt} MATCHES "NOTFOUND"))
 			set(SPIRV_CROSS_ENABLE_TESTS OFF)
-			message("Could not find glslang or SPIRV-Tools build under external/. Run ./checkout_glslang_spirv_tools.sh and ./build_glslang_spirv_tools.sh. Testing will be disabled.")
+			message("SPIRV-Cross:  Testing will be disabled for SPIRV-Cross. Could not find glslang or SPIRV-Tools build under external/. To enable testing, run ./checkout_glslang_spirv_tools.sh and ./build_glslang_spirv_tools.sh first.")
 		else()
 			set(SPIRV_CROSS_ENABLE_TESTS ON)
-			message("Found glslang and SPIRV-Tools. Enabling test suite.")
-			message("Found glslangValidator in: ${spirv-cross-glslang}.")
-			message("Found spirv-as in: ${spirv-cross-spirv-as}.")
-			message("Found spirv-val in: ${spirv-cross-spirv-val}.")
-			message("Found spirv-opt in: ${spirv-cross-spirv-opt}.")
+			message("SPIRV-Cross: Found glslang and SPIRV-Tools. Enabling test suite.")
+			message("SPIRV-Cross: Found glslangValidator in: ${spirv-cross-glslang}.")
+			message("SPIRV-Cross: Found spirv-as in: ${spirv-cross-spirv-as}.")
+			message("SPIRV-Cross: Found spirv-val in: ${spirv-cross-spirv-val}.")
+			message("SPIRV-Cross: Found spirv-opt in: ${spirv-cross-spirv-opt}.")
 		endif()
 
 		set(spirv-cross-externals
@@ -461,6 +509,18 @@ if (SPIRV_CROSS_CLI)
 				target_link_libraries(spirv-cross-msl-resource-binding-test spirv-cross-c)
 				set_target_properties(spirv-cross-msl-resource-binding-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}")
 
+				add_executable(spirv-cross-hlsl-resource-binding-test tests-other/hlsl_resource_bindings.cpp)
+				target_link_libraries(spirv-cross-hlsl-resource-binding-test spirv-cross-c)
+				set_target_properties(spirv-cross-hlsl-resource-binding-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}")
+
+				add_executable(spirv-cross-msl-ycbcr-conversion-test tests-other/msl_ycbcr_conversion_test.cpp)
+				target_link_libraries(spirv-cross-msl-ycbcr-conversion-test spirv-cross-c)
+				set_target_properties(spirv-cross-msl-ycbcr-conversion-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}")
+
+				add_executable(spirv-cross-typed-id-test tests-other/typed_id_test.cpp)
+				target_link_libraries(spirv-cross-typed-id-test spirv-cross-core)
+				set_target_properties(spirv-cross-typed-id-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}")
+
 				if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang"))
 					target_compile_options(spirv-cross-c-api-test PRIVATE -std=c89 -Wall -Wextra)
 				endif()
@@ -475,6 +535,14 @@ if (SPIRV_CROSS_CLI)
 						COMMAND $<TARGET_FILE:spirv-cross-msl-constexpr-test> ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_constexpr_test.spv)
 				add_test(NAME spirv-cross-msl-resource-binding-test
 						COMMAND $<TARGET_FILE:spirv-cross-msl-resource-binding-test> ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_resource_binding.spv)
+				add_test(NAME spirv-cross-hlsl-resource-binding-test
+						COMMAND $<TARGET_FILE:spirv-cross-hlsl-resource-binding-test> ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/hlsl_resource_binding.spv)
+				add_test(NAME spirv-cross-msl-ycbcr-conversion-test
+						COMMAND $<TARGET_FILE:spirv-cross-msl-ycbcr-conversion-test> ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_ycbcr_conversion_test.spv)
+				add_test(NAME spirv-cross-msl-ycbcr-conversion-test-2
+						COMMAND $<TARGET_FILE:spirv-cross-msl-ycbcr-conversion-test> ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_ycbcr_conversion_test_2.spv)
+				add_test(NAME spirv-cross-typed-id-test
+						COMMAND $<TARGET_FILE:spirv-cross-typed-id-test>)
 				add_test(NAME spirv-cross-test
 						COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --parallel
 						${spirv-cross-externals}
@@ -525,9 +593,24 @@ if (SPIRV_CROSS_CLI)
 						${spirv-cross-externals}
 						${CMAKE_CURRENT_SOURCE_DIR}/shaders-reflection
 						WORKING_DIRECTORY $<TARGET_FILE_DIR:spirv-cross>)
+				add_test(NAME spirv-cross-test-ue4
+						COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --msl --parallel
+						${spirv-cross-externals}
+						${CMAKE_CURRENT_SOURCE_DIR}/shaders-ue4
+						WORKING_DIRECTORY $<TARGET_FILE_DIR:spirv-cross>)
+				add_test(NAME spirv-cross-test-ue4-opt
+						COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --msl --opt --parallel
+						${spirv-cross-externals}
+						${CMAKE_CURRENT_SOURCE_DIR}/shaders-ue4
+						WORKING_DIRECTORY $<TARGET_FILE_DIR:spirv-cross>)
+				add_test(NAME spirv-cross-test-ue4-no-opt
+						COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --msl --parallel
+						${spirv-cross-externals}
+						${CMAKE_CURRENT_SOURCE_DIR}/shaders-ue4-no-opt
+						WORKING_DIRECTORY $<TARGET_FILE_DIR:spirv-cross>)
 			endif()
 		elseif(NOT ${PYTHONINTERP_FOUND})
-			message(WARNING "Testing disabled. Could not find python3. If you have python3 installed try running "
+			message(WARNING "SPIRV-Cross: Testing disabled. Could not find python3. If you have python3 installed try running "
 					"cmake with -DPYTHON_EXECUTABLE:FILEPATH=/path/to/python3 to help it find the executable")
 		endif()
 	endif()
diff --git a/CODE_OF_CONDUCT.adoc b/CODE_OF_CONDUCT.adoc
new file mode 100644
index 00000000000..ca5e08b930a
--- /dev/null
+++ b/CODE_OF_CONDUCT.adoc
@@ -0,0 +1,11 @@
+// Copyright (c) 2016-2020 The Khronos Group Inc.
+//
+// SPDX-License-Identifier: CC-BY-4.0
+
+= Code of Conduct
+
+A reminder that this issue tracker is managed by the Khronos Group.
+Interactions here should follow the
+https://www.khronos.org/developers/code-of-conduct[Khronos Code of Conduct],
+which prohibits aggressive or derogatory language. Please keep the
+discussion friendly and civil.
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
deleted file mode 100644
index a11610bd300..00000000000
--- a/CODE_OF_CONDUCT.md
+++ /dev/null
@@ -1 +0,0 @@
-A reminder that this issue tracker is managed by the Khronos Group. Interactions here should follow the Khronos Code of Conduct (https://www.khronos.org/developers/code-of-conduct), which prohibits aggressive or derogatory language. Please keep the discussion friendly and civil.
diff --git a/GLSL.std.450.h b/GLSL.std.450.h
index 54cc00e9a88..2686fc4ea7e 100644
--- a/GLSL.std.450.h
+++ b/GLSL.std.450.h
@@ -1,27 +1,10 @@
 /*
-** Copyright (c) 2014-2016 The Khronos Group Inc.
-**
-** Permission is hereby granted, free of charge, to any person obtaining a copy
-** of this software and/or associated documentation files (the "Materials"),
-** to deal in the Materials without restriction, including without limitation
-** the rights to use, copy, modify, merge, publish, distribute, sublicense,
-** and/or sell copies of the Materials, and to permit persons to whom the
-** Materials are furnished to do so, subject to the following conditions:
-**
-** The above copyright notice and this permission notice shall be included in
-** all copies or substantial portions of the Materials.
-**
-** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
-** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
-** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ 
-**
-** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
-** IN THE MATERIALS.
+ * Copyright 2014-2016,2021 The Khronos Group, Inc.
+ * SPDX-License-Identifier: MIT
+ *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
+ * STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
+ * HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
 */
 
 #ifndef GLSLstd450_H
diff --git a/LICENSES/Apache-2.0.txt b/LICENSES/Apache-2.0.txt
new file mode 100644
index 00000000000..4ed90b95224
--- /dev/null
+++ b/LICENSES/Apache-2.0.txt
@@ -0,0 +1,208 @@
+Apache License
+
+Version 2.0, January 2004
+
+http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION,
+AND DISTRIBUTION
+
+   1. Definitions.
+
+      
+
+"License" shall mean the terms and conditions for use, reproduction, and distribution
+as defined by Sections 1 through 9 of this document.
+
+      
+
+"Licensor" shall mean the copyright owner or entity authorized by the copyright
+owner that is granting the License.
+
+      
+
+"Legal Entity" shall mean the union of the acting entity and all other entities
+that control, are controlled by, or are under common control with that entity.
+For the purposes of this definition, "control" means (i) the power, direct
+or indirect, to cause the direction or management of such entity, whether
+by contract or otherwise, or (ii) ownership of fifty percent (50%) or more
+of the outstanding shares, or (iii) beneficial ownership of such entity.
+
+      
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising permissions
+granted by this License.
+
+      
+
+"Source" form shall mean the preferred form for making modifications, including
+but not limited to software source code, documentation source, and configuration
+files.
+
+      
+
+"Object" form shall mean any form resulting from mechanical transformation
+or translation of a Source form, including but not limited to compiled object
+code, generated documentation, and conversions to other media types.
+
+      
+
+"Work" shall mean the work of authorship, whether in Source or Object form,
+made available under the License, as indicated by a copyright notice that
+is included in or attached to the work (an example is provided in the Appendix
+below).
+
+      
+
+"Derivative Works" shall mean any work, whether in Source or Object form,
+that is based on (or derived from) the Work and for which the editorial revisions,
+annotations, elaborations, or other modifications represent, as a whole, an
+original work of authorship. For the purposes of this License, Derivative
+Works shall not include works that remain separable from, or merely link (or
+bind by name) to the interfaces of, the Work and Derivative Works thereof.
+
+      
+
+"Contribution" shall mean any work of authorship, including the original version
+of the Work and any modifications or additions to that Work or Derivative
+Works thereof, that is intentionally submitted to Licensor for inclusion in
+the Work by the copyright owner or by an individual or Legal Entity authorized
+to submit on behalf of the copyright owner. For the purposes of this definition,
+"submitted" means any form of electronic, verbal, or written communication
+sent to the Licensor or its representatives, including but not limited to
+communication on electronic mailing lists, source code control systems, and
+issue tracking systems that are managed by, or on behalf of, the Licensor
+for the purpose of discussing and improving the Work, but excluding communication
+that is conspicuously marked or otherwise designated in writing by the copyright
+owner as "Not a Contribution."
+
+      
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
+of whom a Contribution has been received by Licensor and subsequently incorporated
+within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of this
+License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable copyright license to reproduce, prepare
+Derivative Works of, publicly display, publicly perform, sublicense, and distribute
+the Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of this License,
+each Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section) patent
+license to make, have made, use, offer to sell, sell, import, and otherwise
+transfer the Work, where such license applies only to those patent claims
+licensable by such Contributor that are necessarily infringed by their Contribution(s)
+alone or by combination of their Contribution(s) with the Work to which such
+Contribution(s) was submitted. If You institute patent litigation against
+any entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that the Work or a Contribution incorporated within the Work constitutes direct
+or contributory patent infringement, then any patent licenses granted to You
+under this License for that Work shall terminate as of the date such litigation
+is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the Work or
+Derivative Works thereof in any medium, with or without modifications, and
+in Source or Object form, provided that You meet the following conditions:
+
+(a) You must give any other recipients of the Work or Derivative Works a copy
+of this License; and
+
+(b) You must cause any modified files to carry prominent notices stating that
+You changed the files; and
+
+(c) You must retain, in the Source form of any Derivative Works that You distribute,
+all copyright, patent, trademark, and attribution notices from the Source
+form of the Work, excluding those notices that do not pertain to any part
+of the Derivative Works; and
+
+(d) If the Work includes a "NOTICE" text file as part of its distribution,
+then any Derivative Works that You distribute must include a readable copy
+of the attribution notices contained within such NOTICE file, excluding those
+notices that do not pertain to any part of the Derivative Works, in at least
+one of the following places: within a NOTICE text file distributed as part
+of the Derivative Works; within the Source form or documentation, if provided
+along with the Derivative Works; or, within a display generated by the Derivative
+Works, if and wherever such third-party notices normally appear. The contents
+of the NOTICE file are for informational purposes only and do not modify the
+License. You may add Your own attribution notices within Derivative Works
+that You distribute, alongside or as an addendum to the NOTICE text from the
+Work, provided that such additional attribution notices cannot be construed
+as modifying the License.
+
+You may add Your own copyright statement to Your modifications and may provide
+additional or different license terms and conditions for use, reproduction,
+or distribution of Your modifications, or for any such Derivative Works as
+a whole, provided Your use, reproduction, and distribution of the Work otherwise
+complies with the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise, any
+Contribution intentionally submitted for inclusion in the Work by You to the
+Licensor shall be under the terms and conditions of this License, without
+any additional terms or conditions. Notwithstanding the above, nothing herein
+shall supersede or modify the terms of any separate license agreement you
+may have executed with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade names,
+trademarks, service marks, or product names of the Licensor, except as required
+for reasonable and customary use in describing the origin of the Work and
+reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to
+in writing, Licensor provides the Work (and each Contributor provides its
+Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied, including, without limitation, any warranties
+or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR
+A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness
+of using or redistributing the Work and assume any risks associated with Your
+exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory, whether
+in tort (including negligence), contract, or otherwise, unless required by
+applicable law (such as deliberate and grossly negligent acts) or agreed to
+in writing, shall any Contributor be liable to You for damages, including
+any direct, indirect, special, incidental, or consequential damages of any
+character arising as a result of this License or out of the use or inability
+to use the Work (including but not limited to damages for loss of goodwill,
+work stoppage, computer failure or malfunction, or any and all other commercial
+damages or losses), even if such Contributor has been advised of the possibility
+of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing the Work
+or Derivative Works thereof, You may choose to offer, and charge a fee for,
+acceptance of support, warranty, indemnity, or other liability obligations
+and/or rights consistent with this License. However, in accepting such obligations,
+You may act only on Your own behalf and on Your sole responsibility, not on
+behalf of any other Contributor, and only if You agree to indemnify, defend,
+and hold each Contributor harmless for any liability incurred by, or claims
+asserted against, such Contributor by reason of your accepting any such warranty
+or additional liability. END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+To apply the Apache License to your work, attach the following boilerplate
+notice, with the fields enclosed by brackets "[]" replaced with your own identifying
+information. (Don't include the brackets!) The text should be enclosed in
+the appropriate comment syntax for the file format. We also recommend that
+a file or class name and description of purpose be included on the same "printed
+page" as the copyright notice for easier identification within third-party
+archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+
+you may not use this file except in compliance with the License.
+
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+
+distributed under the License is distributed on an "AS IS" BASIS,
+
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+
+See the License for the specific language governing permissions and
+
+limitations under the License.
diff --git a/LICENSES/CC-BY-4.0.txt b/LICENSES/CC-BY-4.0.txt
new file mode 100644
index 00000000000..3f92dfc5fdd
--- /dev/null
+++ b/LICENSES/CC-BY-4.0.txt
@@ -0,0 +1,324 @@
+Creative Commons Attribution 4.0 International Creative Commons Corporation
+("Creative Commons") is not a law firm and does not provide legal services
+or legal advice. Distribution of Creative Commons public licenses does not
+create a lawyer-client or other relationship. Creative Commons makes its licenses
+and related information available on an "as-is" basis. Creative Commons gives
+no warranties regarding its licenses, any material licensed under their terms
+and conditions, or any related information. Creative Commons disclaims all
+liability for damages resulting from their use to the fullest extent possible.
+
+Using Creative Commons Public Licenses
+
+Creative Commons public licenses provide a standard set of terms and conditions
+that creators and other rights holders may use to share original works of
+authorship and other material subject to copyright and certain other rights
+specified in the public license below. The following considerations are for
+informational purposes only, are not exhaustive, and do not form part of our
+licenses.
+
+Considerations for licensors: Our public licenses are intended for use by
+those authorized to give the public permission to use material in ways otherwise
+restricted by copyright and certain other rights. Our licenses are irrevocable.
+Licensors should read and understand the terms and conditions of the license
+they choose before applying it. Licensors should also secure all rights necessary
+before applying our licenses so that the public can reuse the material as
+expected. Licensors should clearly mark any material not subject to the license.
+This includes other CC-licensed material, or material used under an exception
+or limitation to copyright. More considerations for licensors : wiki.creativecommons.org/Considerations_for_licensors
+
+Considerations for the public: By using one of our public licenses, a licensor
+grants the public permission to use the licensed material under specified
+terms and conditions. If the licensor's permission is not necessary for any
+reason–for example, because of any applicable exception or limitation to copyright–then
+that use is not regulated by the license. Our licenses grant only permissions
+under copyright and certain other rights that a licensor has authority to
+grant. Use of the licensed material may still be restricted for other reasons,
+including because others have copyright or other rights in the material. A
+licensor may make special requests, such as asking that all changes be marked
+or described. Although not required by our licenses, you are encouraged to
+respect those requests where reasonable. More considerations for the public
+: wiki.creativecommons.org/Considerations_for_licensees Creative Commons Attribution
+4.0 International Public License
+
+By exercising the Licensed Rights (defined below), You accept and agree to
+be bound by the terms and conditions of this Creative Commons Attribution
+4.0 International Public License ("Public License"). To the extent this Public
+License may be interpreted as a contract, You are granted the Licensed Rights
+in consideration of Your acceptance of these terms and conditions, and the
+Licensor grants You such rights in consideration of benefits the Licensor
+receives from making the Licensed Material available under these terms and
+conditions.
+
+Section 1 – Definitions.
+
+a. Adapted Material means material subject to Copyright and Similar Rights
+that is derived from or based upon the Licensed Material and in which the
+Licensed Material is translated, altered, arranged, transformed, or otherwise
+modified in a manner requiring permission under the Copyright and Similar
+Rights held by the Licensor. For purposes of this Public License, where the
+Licensed Material is a musical work, performance, or sound recording, Adapted
+Material is always produced where the Licensed Material is synched in timed
+relation with a moving image.
+
+b. Adapter's License means the license You apply to Your Copyright and Similar
+Rights in Your contributions to Adapted Material in accordance with the terms
+and conditions of this Public License.
+
+c. Copyright and Similar Rights means copyright and/or similar rights closely
+related to copyright including, without limitation, performance, broadcast,
+sound recording, and Sui Generis Database Rights, without regard to how the
+rights are labeled or categorized. For purposes of this Public License, the
+rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.
+
+d. Effective Technological Measures means those measures that, in the absence
+of proper authority, may not be circumvented under laws fulfilling obligations
+under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996,
+and/or similar international agreements.
+
+e. Exceptions and Limitations means fair use, fair dealing, and/or any other
+exception or limitation to Copyright and Similar Rights that applies to Your
+use of the Licensed Material.
+
+f. Licensed Material means the artistic or literary work, database, or other
+material to which the Licensor applied this Public License.
+
+g. Licensed Rights means the rights granted to You subject to the terms and
+conditions of this Public License, which are limited to all Copyright and
+Similar Rights that apply to Your use of the Licensed Material and that the
+Licensor has authority to license.
+
+h. Licensor means the individual(s) or entity(ies) granting rights under this
+Public License.
+
+i. Share means to provide material to the public by any means or process that
+requires permission under the Licensed Rights, such as reproduction, public
+display, public performance, distribution, dissemination, communication, or
+importation, and to make material available to the public including in ways
+that members of the public may access the material from a place and at a time
+individually chosen by them.
+
+j. Sui Generis Database Rights means rights other than copyright resulting
+from Directive 96/9/EC of the European Parliament and of the Council of 11
+March 1996 on the legal protection of databases, as amended and/or succeeded,
+as well as other essentially equivalent rights anywhere in the world.
+
+k. You means the individual or entity exercising the Licensed Rights under
+this Public License. Your has a corresponding meaning.
+
+Section 2 – Scope.
+
+   a. License grant.
+
+1. Subject to the terms and conditions of this Public License, the Licensor
+hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive,
+irrevocable license to exercise the Licensed Rights in the Licensed Material
+to:
+
+         A. reproduce and Share the Licensed Material, in whole or in part; and
+
+         B. produce, reproduce, and Share Adapted Material.
+
+2. Exceptions and Limitations. For the avoidance of doubt, where Exceptions
+and Limitations apply to Your use, this Public License does not apply, and
+You do not need to comply with its terms and conditions.
+
+      3. Term. The term of this Public License is specified in Section 6(a).
+
+4. Media and formats; technical modifications allowed. The Licensor authorizes
+You to exercise the Licensed Rights in all media and formats whether now known
+or hereafter created, and to make technical modifications necessary to do
+so. The Licensor waives and/or agrees not to assert any right or authority
+to forbid You from making technical modifications necessary to exercise the
+Licensed Rights, including technical modifications necessary to circumvent
+Effective Technological Measures. For purposes of this Public License, simply
+making modifications authorized by this Section 2(a)(4) never produces Adapted
+Material.
+
+      5. Downstream recipients.
+
+A. Offer from the Licensor – Licensed Material. Every recipient of the Licensed
+Material automatically receives an offer from the Licensor to exercise the
+Licensed Rights under the terms and conditions of this Public License.
+
+B. No downstream restrictions. You may not offer or impose any additional
+or different terms or conditions on, or apply any Effective Technological
+Measures to, the Licensed Material if doing so restricts exercise of the Licensed
+Rights by any recipient of the Licensed Material.
+
+6. No endorsement. Nothing in this Public License constitutes or may be construed
+as permission to assert or imply that You are, or that Your use of the Licensed
+Material is, connected with, or sponsored, endorsed, or granted official status
+by, the Licensor or others designated to receive attribution as provided in
+Section 3(a)(1)(A)(i).
+
+   b. Other rights.
+
+1. Moral rights, such as the right of integrity, are not licensed under this
+Public License, nor are publicity, privacy, and/or other similar personality
+rights; however, to the extent possible, the Licensor waives and/or agrees
+not to assert any such rights held by the Licensor to the limited extent necessary
+to allow You to exercise the Licensed Rights, but not otherwise.
+
+2. Patent and trademark rights are not licensed under this Public License.
+
+3. To the extent possible, the Licensor waives any right to collect royalties
+from You for the exercise of the Licensed Rights, whether directly or through
+a collecting society under any voluntary or waivable statutory or compulsory
+licensing scheme. In all other cases the Licensor expressly reserves any right
+to collect such royalties.
+
+Section 3 – License Conditions.
+
+Your exercise of the Licensed Rights is expressly made subject to the following
+conditions.
+
+   a. Attribution.
+
+1. If You Share the Licensed Material (including in modified form), You must:
+
+A. retain the following if it is supplied by the Licensor with the Licensed
+Material:
+
+i. identification of the creator(s) of the Licensed Material and any others
+designated to receive attribution, in any reasonable manner requested by the
+Licensor (including by pseudonym if designated);
+
+            ii. a copyright notice;
+
+            iii. a notice that refers to this Public License;
+
+            iv. a notice that refers to the disclaimer of warranties;
+
+v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
+
+B. indicate if You modified the Licensed Material and retain an indication
+of any previous modifications; and
+
+C. indicate the Licensed Material is licensed under this Public License, and
+include the text of, or the URI or hyperlink to, this Public License.
+
+2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner
+based on the medium, means, and context in which You Share the Licensed Material.
+For example, it may be reasonable to satisfy the conditions by providing a
+URI or hyperlink to a resource that includes the required information.
+
+3. If requested by the Licensor, You must remove any of the information required
+by Section 3(a)(1)(A) to the extent reasonably practicable.
+
+4. If You Share Adapted Material You produce, the Adapter's License You apply
+must not prevent recipients of the Adapted Material from complying with this
+Public License.
+
+Section 4 – Sui Generis Database Rights.
+
+Where the Licensed Rights include Sui Generis Database Rights that apply to
+Your use of the Licensed Material:
+
+a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract,
+reuse, reproduce, and Share all or a substantial portion of the contents of
+the database;
+
+b. if You include all or a substantial portion of the database contents in
+a database in which You have Sui Generis Database Rights, then the database
+in which You have Sui Generis Database Rights (but not its individual contents)
+is Adapted Material; and
+
+c. You must comply with the conditions in Section 3(a) if You Share all or
+a substantial portion of the contents of the database.
+
+For the avoidance of doubt, this Section 4 supplements and does not replace
+Your obligations under this Public License where the Licensed Rights include
+other Copyright and Similar Rights.
+
+Section 5 – Disclaimer of Warranties and Limitation of Liability.
+
+a. Unless otherwise separately undertaken by the Licensor, to the extent possible,
+the Licensor offers the Licensed Material as-is and as-available, and makes
+no representations or warranties of any kind concerning the Licensed Material,
+whether express, implied, statutory, or other. This includes, without limitation,
+warranties of title, merchantability, fitness for a particular purpose, non-infringement,
+absence of latent or other defects, accuracy, or the presence or absence of
+errors, whether or not known or discoverable. Where disclaimers of warranties
+are not allowed in full or in part, this disclaimer may not apply to You.
+
+b. To the extent possible, in no event will the Licensor be liable to You
+on any legal theory (including, without limitation, negligence) or otherwise
+for any direct, special, indirect, incidental, consequential, punitive, exemplary,
+or other losses, costs, expenses, or damages arising out of this Public License
+or use of the Licensed Material, even if the Licensor has been advised of
+the possibility of such losses, costs, expenses, or damages. Where a limitation
+of liability is not allowed in full or in part, this limitation may not apply
+to You.
+
+c. The disclaimer of warranties and limitation of liability provided above
+shall be interpreted in a manner that, to the extent possible, most closely
+approximates an absolute disclaimer and waiver of all liability.
+
+Section 6 – Term and Termination.
+
+a. This Public License applies for the term of the Copyright and Similar Rights
+licensed here. However, if You fail to comply with this Public License, then
+Your rights under this Public License terminate automatically.
+
+b. Where Your right to use the Licensed Material has terminated under Section
+6(a), it reinstates:
+
+1. automatically as of the date the violation is cured, provided it is cured
+within 30 days of Your discovery of the violation; or
+
+      2. upon express reinstatement by the Licensor.
+
+c. For the avoidance of doubt, this Section 6(b) does not affect any right
+the Licensor may have to seek remedies for Your violations of this Public
+License.
+
+d. For the avoidance of doubt, the Licensor may also offer the Licensed Material
+under separate terms or conditions or stop distributing the Licensed Material
+at any time; however, doing so will not terminate this Public License.
+
+   e. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
+
+Section 7 – Other Terms and Conditions.
+
+a. The Licensor shall not be bound by any additional or different terms or
+conditions communicated by You unless expressly agreed.
+
+b. Any arrangements, understandings, or agreements regarding the Licensed
+Material not stated herein are separate from and independent of the terms
+and conditions of this Public License.
+
+Section 8 – Interpretation.
+
+a. For the avoidance of doubt, this Public License does not, and shall not
+be interpreted to, reduce, limit, restrict, or impose conditions on any use
+of the Licensed Material that could lawfully be made without permission under
+this Public License.
+
+b. To the extent possible, if any provision of this Public License is deemed
+unenforceable, it shall be automatically reformed to the minimum extent necessary
+to make it enforceable. If the provision cannot be reformed, it shall be severed
+from this Public License without affecting the enforceability of the remaining
+terms and conditions.
+
+c. No term or condition of this Public License will be waived and no failure
+to comply consented to unless expressly agreed to by the Licensor.
+
+d. Nothing in this Public License constitutes or may be interpreted as a limitation
+upon, or waiver of, any privileges and immunities that apply to the Licensor
+or You, including from the legal processes of any jurisdiction or authority.
+
+Creative Commons is not a party to its public licenses. Notwithstanding, Creative
+Commons may elect to apply one of its public licenses to material it publishes
+and in those instances will be considered the "Licensor." The text of the
+Creative Commons public licenses is dedicated to the public domain under the
+CC0 Public Domain Dedication. Except for the limited purpose of indicating
+that material is shared under a Creative Commons public license or as otherwise
+permitted by the Creative Commons policies published at creativecommons.org/policies,
+Creative Commons does not authorize the use of the trademark "Creative Commons"
+or any other trademark or logo of Creative Commons without its prior written
+consent including, without limitation, in connection with any unauthorized
+modifications to any of its public licenses or any other arrangements, understandings,
+or agreements concerning use of licensed material. For the avoidance of doubt,
+this paragraph does not form part of the public licenses.
+
+Creative Commons may be contacted at creativecommons.org.
diff --git a/LICENSES/MIT.txt b/LICENSES/MIT.txt
new file mode 100644
index 00000000000..204b93da48d
--- /dev/null
+++ b/LICENSES/MIT.txt
@@ -0,0 +1,19 @@
+MIT License Copyright (c) <year> <copyright holders>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is furnished
+to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
+OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/Makefile b/Makefile
index a006e81faa7..b44eb5e8c97 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,6 @@
+# Copyright 2016-2021 The Khronos Group Inc.
+# SPDX-License-Identifier: Apache-2.0
+
 TARGET := spirv-cross
 
 SOURCES := $(wildcard spirv_*.cpp)
@@ -10,7 +13,7 @@ STATIC_LIB := lib$(TARGET).a
 
 DEPS := $(OBJECTS:.o=.d) $(CLI_OBJECTS:.o=.d)
 
-CXXFLAGS += -std=c++11 -Wall -Wextra -Wshadow
+CXXFLAGS += -std=c++11 -Wall -Wextra -Wshadow -Wno-deprecated-declarations
 
 ifeq ($(DEBUG), 1)
 	CXXFLAGS += -O0 -g
diff --git a/Package.swift b/Package.swift
new file mode 100644
index 00000000000..99e601936a4
--- /dev/null
+++ b/Package.swift
@@ -0,0 +1,72 @@
+// swift-tools-version:5.5
+// The swift-tools-version declares the minimum version of Swift required to build this package.
+
+// Copyright 2016-2021 The Khronos Group Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+import PackageDescription
+
+let package = Package(
+    name: "SPIRV-Cross",
+    products: [
+        // Products define the executables and libraries a package produces, and make them visible to other packages.
+        .library(
+            name: "SPIRV-Cross",
+            targets: ["SPIRV-Cross"]),
+    ],
+    dependencies: [
+        // Dependencies declare other packages that this package depends on.
+        // .package(url: /* package url */, from: "1.0.0"),
+    ],
+    targets: [
+        // Targets are the basic building blocks of a package. A target can define a module or a test suite.
+        // Targets can depend on other targets in this package, and on products in packages this package depends on.
+        .target(
+            name: "SPIRV-Cross",
+            dependencies: [],
+            path: ".",
+            exclude: ["CMakeLists.txt",
+                      "CODE_OF_CONDUCT.adoc",
+                      "LICENSE",
+                      "LICENSES",
+                      "Makefile",
+                      "README.md",
+                      "appveyor.yml",
+                      "build_glslang_spirv_tools.sh",
+                      "checkout_glslang_spirv_tools.sh",
+                      "cmake",
+                      "format_all.sh",
+                      "gn",
+                      "main.cpp",
+                      "pkg-config",
+                      "reference",
+                      "samples",
+                      "shaders",
+                      "shaders-hlsl",
+                      "shaders-hlsl-no-opt",
+                      "shaders-msl",
+                      "shaders-msl-no-opt",
+                      "shaders-no-opt",
+                      "shaders-other",
+                      "shaders-reflection",
+                      "shaders-ue4",
+                      "shaders-ue4-no-opt",
+                      "test_shaders.py",
+                      "test_shaders.sh",
+                      "tests-other",
+                      "update_test_shaders.sh"],
+            sources: ["spirv_cfg.cpp",
+                      "spirv_cpp.cpp",
+                      "spirv_cross.cpp",
+                      "spirv_cross_c.cpp",
+                      "spirv_cross_parsed_ir.cpp",
+                      "spirv_cross_util.cpp",
+                      "spirv_glsl.cpp",
+                      "spirv_hlsl.cpp",
+                      "spirv_msl.cpp",
+                      "spirv_parser.cpp",
+                      "spirv_reflect.cpp"],
+            publicHeadersPath: "."),
+    ],
+    cxxLanguageStandard: .cxx14
+)
diff --git a/README.md b/README.md
index 831c6ff00e2..3a89e6c2f8c 100644
--- a/README.md
+++ b/README.md
@@ -1,17 +1,22 @@
+<!--
+    Copyright 2020-2021 The Khronos Group, Inc.
+    SPDX-License-Identifier: CC-BY-4.0
+-->
+
 # SPIRV-Cross
 
 SPIRV-Cross is a tool designed for parsing and converting SPIR-V to other shader languages.
 
-[![Build Status](https://travis-ci.org/KhronosGroup/SPIRV-Cross.svg?branch=master)](https://travis-ci.org/KhronosGroup/SPIRV-Cross)
-[![Build Status](https://ci.appveyor.com/api/projects/status/github/KhronosGroup/SPIRV-Cross?svg=true&branch=master)](https://ci.appveyor.com/project/HansKristian-Work/SPIRV-Cross)
+[![CI](https://github.com/KhronosGroup/SPIRV-Cross/actions/workflows/main.yml/badge.svg)](https://github.com/KhronosGroup/SPIRV-Cross/actions/workflows/main.yml)
+[![Build Status](https://ci.appveyor.com/api/projects/status/github/KhronosGroup/SPIRV-Cross?svg=true&branch=main)](https://ci.appveyor.com/project/HansKristian-Work/SPIRV-Cross)
 
 ## Features
 
   - Convert SPIR-V to readable, usable and efficient GLSL
   - Convert SPIR-V to readable, usable and efficient Metal Shading Language (MSL)
   - Convert SPIR-V to readable, usable and efficient HLSL
+  - Convert SPIR-V to a JSON reflection format
   - Convert SPIR-V to debuggable C++ [DEPRECATED]
-  - Convert SPIR-V to a JSON reflection format [EXPERIMENTAL]
   - Reflection API to simplify the creation of Vulkan pipeline layouts
   - Reflection API to modify and tweak OpDecorations
   - Supports "all" of vertex, fragment, tessellation, geometry and compute shaders.
@@ -26,6 +31,10 @@ However, most missing features are expected to be "trivial" improvements at this
 
 SPIRV-Cross has been tested on Linux, iOS/OSX, Windows and Android. CMake is the main build system.
 
+### NOTE: main branch rename
+
+On 2023-01-12, `master` was renamed to `main` as per Khronos policy.
+
 ### Linux and macOS
 
 Building with CMake is recommended, as it is the only build system which is tested in continuous integration.
@@ -52,6 +61,20 @@ The make and CMake build flavors offer the option to treat exceptions as asserti
 
 You can use `-DSPIRV_CROSS_STATIC=ON/OFF` `-DSPIRV_CROSS_SHARED=ON/OFF` `-DSPIRV_CROSS_CLI=ON/OFF` to control which modules are built (and installed).
 
+### Installing SPIRV-Cross (vcpkg)
+
+Alternatively, you can build and install SPIRV-Cross using [vcpkg](https://github.com/Microsoft/vcpkg/) dependency manager:
+
+```
+git clone https://github.com/Microsoft/vcpkg.git
+cd vcpkg
+./bootstrap-vcpkg.sh
+./vcpkg integrate install
+./vcpkg install spirv-cross
+```
+
+The SPIRV-Cross port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
+
 ## Usage
 
 ### Using the C++ API
@@ -99,7 +122,7 @@ int main()
 	spirv_cross::CompilerGLSL::Options options;
 	options.version = 310;
 	options.es = true;
-	glsl.set_options(options);
+	glsl.set_common_options(options);
 
 	// Compile to GLSL, ready to give to GL driver.
 	std::string source = glsl.compile();
@@ -161,12 +184,12 @@ for (i = 0; i < count; i++)
 }
 
 // Modify options.
-spvc_compiler_create_compiler_options(context, &options);
+spvc_compiler_create_compiler_options(compiler_glsl, &options);
 spvc_compiler_options_set_uint(options, SPVC_COMPILER_OPTION_GLSL_VERSION, 330);
 spvc_compiler_options_set_bool(options, SPVC_COMPILER_OPTION_GLSL_ES, SPVC_FALSE);
 spvc_compiler_install_compiler_options(compiler_glsl, options);
 
-spvc_compiler_compile(compiler, &result);
+spvc_compiler_compile(compiler_glsl, &result);
 printf("Cross-compiled source: %s\n", result);
 
 // Frees all memory we allocated so far.
@@ -321,7 +344,7 @@ compiler.set_name(varying_resource.base_type_id, "VertexFragmentLinkage");
 ```
 
 Some platform may require identical variable name for both vertex outputs and fragment inputs. (for example MacOSX)
-to rename varaible base on location, please add
+to rename variable base on location, please add
 ```
 --rename-interface-variable <in|out> <location> <new_variable_name>
 ```
@@ -376,10 +399,28 @@ for (auto &remap : compiler->get_combined_image_samplers())
 If your target is Vulkan GLSL, `--vulkan-semantics` will emit separate image samplers as you'd expect.
 The command line client calls `Compiler::build_combined_image_samplers` automatically, but if you're calling the library, you'll need to do this yourself.
 
-#### Descriptor sets (Vulkan GLSL) for backends which do not support them (HLSL/GLSL/Metal)
+#### Descriptor sets (Vulkan GLSL) for backends which do not support them (pre HLSL 5.1 / GLSL)
 
 Descriptor sets are unique to Vulkan, so make sure that descriptor set + binding is remapped to a flat binding scheme (set always 0), so that other APIs can make sense of the bindings.
-This can be done with `Compiler::set_decoration(id, spv::DecorationDescriptorSet)`.
+This can be done with `Compiler::set_decoration(id, spv::DecorationDescriptorSet)`. For other backends like MSL and HLSL, descriptor sets
+can be used, with some minor caveats, see below.
+
+##### MSL 2.0+
+
+Metal supports indirect argument buffers (--msl-argument-buffers). In this case, descriptor sets become argument buffers,
+and bindings are mapped to [[id(N)]] within the argument buffer. One quirk is that arrays of resources consume multiple ids,
+where Vulkan does not. This can be worked around either from shader authoring stage
+or remapping bindings as needed to avoid the overlap.
+There is also a rich API to declare remapping schemes which is intended to work like
+the pipeline layout in Vulkan. See `CompilerMSL::add_msl_resource_binding`. Remapping combined image samplers for example
+must be split into two bindings in MSL, so it's possible to declare an id for the texture and sampler binding separately.
+
+##### HLSL - SM 5.1+
+
+In SM 5.1+, descriptor set bindings are interpreted as register spaces directly. In HLSL however, arrays of resources consume
+multiple binding slots where Vulkan does not, so there might be overlap if the SPIR-V was not authored with this in mind.
+This can be worked around either from shader authoring stage (don't assign overlapping bindings)
+or remap bindings in SPIRV-Cross as needed to avoid the overlap.
 
 #### Linking by name for targets which do not support explicit locations (legacy GLSL/ESSL)
 
@@ -402,6 +443,29 @@ Y-flipping of gl_Position and similar is also supported.
 The use of this is discouraged, because relying on vertex shader Y-flipping tends to get quite messy.
 To enable this, set `CompilerGLSL::Options.vertex.flip_vert_y` or `--flip-vert-y` in CLI.
 
+#### Reserved identifiers
+
+When cross-compiling, certain identifiers are considered to be reserved by the implementation.
+Code generated by SPIRV-Cross cannot emit these identifiers as they are reserved and used for various internal purposes,
+and such variables will typically show up as `_RESERVED_IDENTIFIER_FIXUP_`
+or some similar name to make it more obvious that an identifier has been renamed.
+
+Reflection output will follow the exact name specified in the SPIR-V module. It might not be a valid identifier in the C sense,
+as it may contain non-alphanumeric/non-underscore characters.
+
+Reserved identifiers currently assumed by the implementation are (in pseudo-regex):
+
+- _$digit+, e.g. `_100`, `_2`
+- _$digit+_.+, e.g. `_100_tmp`, `_2_foobar`. `_2Bar` is **not** reserved.
+- gl_- prefix
+- spv- prefix
+- SPIRV_Cross prefix. This prefix is generally used for interface variables where app needs to provide data for workaround purposes.
+  This identifier will not be rewritten, but be aware of potential collisions.
+- Double underscores (reserved by all target languages).
+
+Members of structs also have a reserved identifier:
+- _m$digit+$END, e.g. `_m20` and `_m40` are reserved, but not `_m40Foobar`.
+
 ## Contributing
 
 Contributions to SPIRV-Cross are welcome. See Testing and Licensing sections for details.
@@ -416,9 +480,6 @@ All pull requests should ensure that test output does not change unexpectedly. T
 
 ```
 ./checkout_glslang_spirv_tools.sh # Checks out glslang and SPIRV-Tools at a fixed revision which matches the reference output.
-                                  # NOTE: Some users have reported problems cloning from git:// paths. To use https:// instead pass in
-                                  # $ PROTOCOL=https ./checkout_glslang_spirv_tools.sh
-                                  # instead.
 ./build_glslang_spirv_tools.sh    # Builds glslang and SPIRV-Tools.
 ./test_shaders.sh                 # Runs over all changes and makes sure that there are no deltas compared to reference files.
 ```
@@ -442,7 +503,7 @@ to update the reference files and include these changes as part of the pull requ
 Always make sure you are running the correct version of glslangValidator as well as SPIRV-Tools when updating reference files.
 See `checkout_glslang_spirv_tools.sh` which revisions are currently expected. The revisions change regularly.
 
-In short, the master branch should always be able to run `./test_shaders.py shaders` and friends without failure.
+In short, the main branch should always be able to run `./test_shaders.py shaders` and friends without failure.
 SPIRV-Cross uses Travis CI to test all pull requests, so it is not strictly needed to perform testing yourself if you have problems running it locally.
 A pull request which does not pass testing on Travis will not be accepted however.
 
diff --git a/appveyor.yml b/appveyor.yml
index 2f427f18044..cc2fade2716 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,3 +1,5 @@
+# Copyright 2016-2021 The Khronos Group Inc.
+# SPDX-License-Identifier: Apache-2.0
 
 environment:
   matrix:
diff --git a/build_glslang_spirv_tools.sh b/build_glslang_spirv_tools.sh
index fb4f7de218c..f78640f35b9 100755
--- a/build_glslang_spirv_tools.sh
+++ b/build_glslang_spirv_tools.sh
@@ -1,4 +1,6 @@
 #!/bin/bash
+# Copyright 2016-2021 The Khronos Group Inc.
+# SPDX-License-Identifier: Apache-2.0
 
 PROFILE=Release
 
diff --git a/checkout_glslang_spirv_tools.sh b/checkout_glslang_spirv_tools.sh
index 37b99a0e08f..88429395837 100755
--- a/checkout_glslang_spirv_tools.sh
+++ b/checkout_glslang_spirv_tools.sh
@@ -1,14 +1,11 @@
 #!/bin/bash
+# Copyright 2016-2021 The Khronos Group Inc.
+# SPDX-License-Identifier: Apache-2.0
 
-GLSLANG_REV=e291f7a09f6733f6634fe077a228056fabee881e
-SPIRV_TOOLS_REV=89fe836fe22c3e5c2a062ebeade012e2c2f0839b
-SPIRV_HEADERS_REV=c4f8f65792d4bf2657ca751904c511bbcf2ac77b
-
-if [ -z $PROTOCOL ]; then
-	PROTOCOL=git
-fi
-
-echo "Using protocol \"$PROTOCOL\" for checking out repositories. If this is problematic, try PROTOCOL=https $0."
+GLSLANG_REV=06a7078ce74ab5c7801a165b8145859678831fb8
+SPIRV_TOOLS_REV=f62e121b0df5374d1f043d1fbda98467406af0b1
+SPIRV_HEADERS_REV=d13b52222c39a7e9a401b44646f0ca3a640fbd47
+PROTOCOL=https
 
 if [ -d external/glslang ]; then
 	echo "Updating glslang to revision $GLSLANG_REV."
@@ -41,7 +38,7 @@ fi
 
 if [ -d external/spirv-headers ]; then
 	cd external/spirv-headers
-	git pull origin master
+	git fetch origin
 	git checkout $SPIRV_HEADERS_REV
 	cd ../..
 else
diff --git a/cmake/gitversion.in.h b/cmake/gitversion.in.h
index 7135e283b23..bff73e964e8 100644
--- a/cmake/gitversion.in.h
+++ b/cmake/gitversion.in.h
@@ -1,3 +1,6 @@
+// Copyright 2016-2021 The Khronos Group Inc.
+// SPDX-License-Identifier: Apache-2.0
+
 #ifndef SPIRV_CROSS_GIT_VERSION_H_
 #define SPIRV_CROSS_GIT_VERSION_H_
 
diff --git a/format_all.sh b/format_all.sh
index fcfffc57f86..001c3c5dd4b 100755
--- a/format_all.sh
+++ b/format_all.sh
@@ -1,4 +1,6 @@
 #!/bin/bash
+# Copyright 2016-2021 The Khronos Group Inc.
+# SPDX-License-Identifier: Apache-2.0
 
 #for file in spirv_*.{cpp,hpp} include/spirv_cross/*.{hpp,h} samples/cpp/*.cpp main.cpp
 for file in spirv_*.{cpp,hpp} main.cpp
diff --git a/gn/BUILD.gn b/gn/BUILD.gn
new file mode 100644
index 00000000000..64d019eae50
--- /dev/null
+++ b/gn/BUILD.gn
@@ -0,0 +1,68 @@
+# Copyright (C) 2019 Google, Inc.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+config("spirv_cross_public") {
+  include_dirs = [ ".." ]
+
+  defines = [ "SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS" ]
+}
+
+source_set("spirv_cross_sources") {
+  public_configs = [ ":spirv_cross_public" ]
+
+  sources = [
+    "../GLSL.std.450.h",
+    "../spirv.hpp",
+    "../spirv_cfg.cpp",
+    "../spirv_cfg.hpp",
+    "../spirv_common.hpp",
+    "../spirv_cross.cpp",
+    "../spirv_cross.hpp",
+    "../spirv_cross_containers.hpp",
+    "../spirv_cross_error_handling.hpp",
+    "../spirv_cross_parsed_ir.cpp",
+    "../spirv_cross_parsed_ir.hpp",
+    "../spirv_cross_util.cpp",
+    "../spirv_cross_util.hpp",
+    "../spirv_glsl.cpp",
+    "../spirv_glsl.hpp",
+    "../spirv_hlsl.cpp",
+    "../spirv_hlsl.hpp",
+    "../spirv_msl.cpp",
+    "../spirv_msl.hpp",
+    "../spirv_parser.cpp",
+    "../spirv_parser.hpp",
+    "../spirv_reflect.cpp",
+    "../spirv_reflect.hpp",
+  ]
+
+  if (!is_win) {
+    cflags = [ "-fno-exceptions" ]
+  }
+
+  if (is_clang) {
+    cflags_cc = [
+      "-Wno-extra-semi",
+      "-Wno-ignored-qualifiers",
+      "-Wno-implicit-fallthrough",
+      "-Wno-inconsistent-missing-override",
+      "-Wno-missing-field-initializers",
+      "-Wno-newline-eof",
+      "-Wno-sign-compare",
+      "-Wno-unused-variable",
+    ]
+  }
+}
diff --git a/include/spirv_cross/barrier.hpp b/include/spirv_cross/barrier.hpp
index bfcd2284317..4ca7f4d77cb 100644
--- a/include/spirv_cross/barrier.hpp
+++ b/include/spirv_cross/barrier.hpp
@@ -1,5 +1,6 @@
 /*
  * Copyright 2015-2017 ARM Limited
+ * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/include/spirv_cross/external_interface.h b/include/spirv_cross/external_interface.h
index 1d26f1e1e44..949654f5bff 100644
--- a/include/spirv_cross/external_interface.h
+++ b/include/spirv_cross/external_interface.h
@@ -1,5 +1,6 @@
 /*
  * Copyright 2015-2017 ARM Limited
+ * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/include/spirv_cross/image.hpp b/include/spirv_cross/image.hpp
index 73de894f886..a41ccdfbb40 100644
--- a/include/spirv_cross/image.hpp
+++ b/include/spirv_cross/image.hpp
@@ -1,5 +1,6 @@
 /*
  * Copyright 2015-2017 ARM Limited
+ * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/include/spirv_cross/internal_interface.hpp b/include/spirv_cross/internal_interface.hpp
index e56223dfdbe..3ff7f8e258c 100644
--- a/include/spirv_cross/internal_interface.hpp
+++ b/include/spirv_cross/internal_interface.hpp
@@ -1,5 +1,6 @@
 /*
  * Copyright 2015-2017 ARM Limited
+ * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/include/spirv_cross/sampler.hpp b/include/spirv_cross/sampler.hpp
index a95d489e2dc..02084809514 100644
--- a/include/spirv_cross/sampler.hpp
+++ b/include/spirv_cross/sampler.hpp
@@ -1,5 +1,6 @@
 /*
  * Copyright 2015-2017 ARM Limited
+ * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -85,7 +86,7 @@ struct sampler2DBase : spirv_cross_sampler_2d
 	std::vector<spirv_cross_miplevel> mips;
 	spirv_cross_format format;
 	spirv_cross_wrap wrap_s;
-	spirv_cross_format wrap_t;
+	spirv_cross_wrap wrap_t;
 	spirv_cross_filter min_filter;
 	spirv_cross_filter mag_filter;
 	spirv_cross_mipfilter mip_filter;
diff --git a/include/spirv_cross/thread_group.hpp b/include/spirv_cross/thread_group.hpp
index 377f098b4fb..b2155815625 100644
--- a/include/spirv_cross/thread_group.hpp
+++ b/include/spirv_cross/thread_group.hpp
@@ -1,5 +1,6 @@
 /*
  * Copyright 2015-2017 ARM Limited
+ * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/main.cpp b/main.cpp
index c441d235815..3605a54a209 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2015-2019 Arm Limited
+ * Copyright 2015-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #include "spirv_cpp.hpp"
 #include "spirv_cross_util.hpp"
 #include "spirv_glsl.hpp"
@@ -31,12 +38,13 @@
 #include <unordered_map>
 #include <unordered_set>
 
-#ifdef HAVE_SPIRV_CROSS_GIT_VERSION
-#include "gitversion.h"
+#ifdef _WIN32
+#include <io.h>
+#include <fcntl.h>
 #endif
 
-#ifdef _MSC_VER
-#pragma warning(disable : 4996)
+#ifdef HAVE_SPIRV_CROSS_GIT_VERSION
+#include "gitversion.h"
 #endif
 
 using namespace spv;
@@ -69,7 +77,7 @@ struct CLICallbacks
 struct CLIParser
 {
 	CLIParser(CLICallbacks cbs_, int argc_, char *argv_[])
-	    : cbs(move(cbs_))
+	    : cbs(std::move(cbs_))
 	    , argc(argc_)
 	    , argv(argv_)
 	{
@@ -140,6 +148,25 @@ struct CLIParser
 		return uint32_t(val);
 	}
 
+	uint32_t next_hex_uint()
+	{
+		if (!argc)
+		{
+			THROW("Tried to parse uint, but nothing left in arguments");
+		}
+
+		uint64_t val = stoul(*argv, nullptr, 16);
+		if (val > numeric_limits<uint32_t>::max())
+		{
+			THROW("next_uint() out of range");
+		}
+
+		argc--;
+		argv++;
+
+		return uint32_t(val);
+	}
+
 	double next_double()
 	{
 		if (!argc)
@@ -190,8 +217,35 @@ struct CLIParser
 	bool ended_state = false;
 };
 
+#if defined(__clang__) || defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#elif defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4996)
+#endif
+
+static vector<uint32_t> read_spirv_file_stdin()
+{
+#ifdef _WIN32
+	setmode(fileno(stdin), O_BINARY);
+#endif
+
+	vector<uint32_t> buffer;
+	uint32_t tmp[256];
+	size_t ret;
+
+	while ((ret = fread(tmp, sizeof(uint32_t), 256, stdin)))
+		buffer.insert(buffer.end(), tmp, tmp + ret);
+
+	return buffer;
+}
+
 static vector<uint32_t> read_spirv_file(const char *path)
 {
+	if (path[0] == '-' && path[1] == '\0')
+		return read_spirv_file_stdin();
+
 	FILE *file = fopen(path, "rb");
 	if (!file)
 	{
@@ -225,6 +279,67 @@ static bool write_string_to_file(const char *path, const char *string)
 	return true;
 }
 
+#if defined(__clang__) || defined(__GNUC__)
+#pragma GCC diagnostic pop
+#elif defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+static void print_resources(const Compiler &compiler, spv::StorageClass storage,
+                            const SmallVector<BuiltInResource> &resources)
+{
+	fprintf(stderr, "%s\n", storage == StorageClassInput ? "builtin inputs" : "builtin outputs");
+	fprintf(stderr, "=============\n\n");
+	for (auto &res : resources)
+	{
+		bool active = compiler.has_active_builtin(res.builtin, storage);
+		const char *basetype = "?";
+		auto &type = compiler.get_type(res.value_type_id);
+		switch (type.basetype)
+		{
+		case SPIRType::Float: basetype = "float"; break;
+		case SPIRType::Int: basetype = "int"; break;
+		case SPIRType::UInt: basetype = "uint"; break;
+		default: break;
+		}
+
+		uint32_t array_size = 0;
+		bool array_size_literal = false;
+		if (!type.array.empty())
+		{
+			array_size = type.array.front();
+			array_size_literal = type.array_size_literal.front();
+		}
+
+		string type_str = basetype;
+		if (type.vecsize > 1)
+			type_str += std::to_string(type.vecsize);
+
+		if (array_size)
+		{
+			if (array_size_literal)
+				type_str += join("[", array_size, "]");
+			else
+				type_str += join("[", array_size, " (spec constant ID)]");
+		}
+
+		string builtin_str;
+		switch (res.builtin)
+		{
+		case spv::BuiltInPosition: builtin_str = "Position"; break;
+		case spv::BuiltInPointSize: builtin_str = "PointSize"; break;
+		case spv::BuiltInCullDistance: builtin_str = "CullDistance"; break;
+		case spv::BuiltInClipDistance: builtin_str = "ClipDistance"; break;
+		case spv::BuiltInTessLevelInner: builtin_str = "TessLevelInner"; break;
+		case spv::BuiltInTessLevelOuter: builtin_str = "TessLevelOuter"; break;
+		default: builtin_str = string("builtin #") + to_string(res.builtin);
+		}
+
+		fprintf(stderr, "Builtin %s (%s) (active: %s).\n", builtin_str.c_str(), type_str.c_str(), active ? "yes" : "no");
+	}
+	fprintf(stderr, "=============\n\n");
+}
+
 static void print_resources(const Compiler &compiler, const char *tag, const SmallVector<Resource> &resources)
 {
 	fprintf(stderr, "%s\n", tag);
@@ -246,7 +361,7 @@ static void print_resources(const Compiler &compiler, const char *tag, const Sma
 		                compiler.get_decoration_bitset(type.self).get(DecorationBufferBlock);
 		bool is_sized_block = is_block && (compiler.get_storage_class(res.id) == StorageClassUniform ||
 		                                   compiler.get_storage_class(res.id) == StorageClassUniformConstant);
-		uint32_t fallback_id = !is_push_constant && is_block ? res.base_type_id : res.id;
+		ID fallback_id = !is_push_constant && is_block ? ID(res.base_type_id) : ID(res.id);
 
 		uint32_t block_size = 0;
 		uint32_t runtime_array_stride = 0;
@@ -268,7 +383,7 @@ static void print_resources(const Compiler &compiler, const char *tag, const Sma
 		for (auto arr : type.array)
 			array = join("[", arr ? convert_to_string(arr) : "", "]") + array;
 
-		fprintf(stderr, " ID %03u : %s%s", res.id,
+		fprintf(stderr, " ID %03u : %s%s", uint32_t(res.id),
 		        !res.name.empty() ? res.name.c_str() : compiler.get_fallback_name(fallback_id).c_str(), array.c_str());
 
 		if (mask.get(DecorationLocation))
@@ -277,12 +392,20 @@ static void print_resources(const Compiler &compiler, const char *tag, const Sma
 			fprintf(stderr, " (Set : %u)", compiler.get_decoration(res.id, DecorationDescriptorSet));
 		if (mask.get(DecorationBinding))
 			fprintf(stderr, " (Binding : %u)", compiler.get_decoration(res.id, DecorationBinding));
+		if (static_cast<const CompilerGLSL &>(compiler).variable_is_depth_or_compare(res.id))
+			fprintf(stderr, " (comparison)");
 		if (mask.get(DecorationInputAttachmentIndex))
 			fprintf(stderr, " (Attachment : %u)", compiler.get_decoration(res.id, DecorationInputAttachmentIndex));
 		if (mask.get(DecorationNonReadable))
 			fprintf(stderr, " writeonly");
 		if (mask.get(DecorationNonWritable))
 			fprintf(stderr, " readonly");
+		if (mask.get(DecorationRestrict))
+			fprintf(stderr, " restrict");
+		if (mask.get(DecorationCoherent))
+			fprintf(stderr, " coherent");
+		if (mask.get(DecorationVolatile))
+			fprintf(stderr, " volatile");
 		if (is_sized_block)
 		{
 			fprintf(stderr, " (BlockSize : %u bytes)", block_size);
@@ -413,6 +536,9 @@ static void print_resources(const Compiler &compiler, const ShaderResources &res
 	print_resources(compiler, "push", res.push_constant_buffers);
 	print_resources(compiler, "counters", res.atomic_counters);
 	print_resources(compiler, "acceleration structures", res.acceleration_structures);
+	print_resources(compiler, "record buffers", res.shader_record_buffers);
+	print_resources(compiler, spv::StorageClassInput, res.builtin_inputs);
+	print_resources(compiler, spv::StorageClassOutput, res.builtin_outputs);
 }
 
 static void print_push_constant_resources(const Compiler &compiler, const SmallVector<Resource> &res)
@@ -442,7 +568,7 @@ static void print_spec_constants(const Compiler &compiler)
 	fprintf(stderr, "Specialization constants\n");
 	fprintf(stderr, "==================\n\n");
 	for (auto &c : spec_constants)
-		fprintf(stderr, "ID: %u, Spec ID: %u\n", c.id, c.constant_id);
+		fprintf(stderr, "ID: %u, Spec ID: %u\n", uint32_t(c.id), c.constant_id);
 	fprintf(stderr, "==================\n\n");
 }
 
@@ -487,6 +613,12 @@ struct InterfaceVariableRename
 	string variable_name;
 };
 
+struct HLSLVertexAttributeRemapNamed
+{
+	std::string name;
+	std::string semantic;
+};
+
 struct CLIArguments
 {
 	const char *input = nullptr;
@@ -513,12 +645,55 @@ struct CLIArguments
 	bool msl_pad_fragment_output = false;
 	bool msl_domain_lower_left = false;
 	bool msl_argument_buffers = false;
+	uint32_t msl_argument_buffers_tier = 0;		// Tier 1
 	bool msl_texture_buffer_native = false;
+	bool msl_framebuffer_fetch = false;
+	bool msl_invariant_float_math = false;
+	bool msl_emulate_cube_array = false;
 	bool msl_multiview = false;
+	bool msl_multiview_layered_rendering = true;
+	bool msl_view_index_from_device_index = false;
+	bool msl_dispatch_base = false;
+	bool msl_decoration_binding = false;
+	bool msl_force_active_argument_buffer_resources = false;
+	bool msl_force_native_arrays = false;
+	bool msl_enable_frag_depth_builtin = true;
+	bool msl_enable_frag_stencil_ref_builtin = true;
+	uint32_t msl_enable_frag_output_mask = 0xffffffff;
+	bool msl_enable_clip_distance_user_varying = true;
+	bool msl_raw_buffer_tese_input = false;
+	bool msl_multi_patch_workgroup = false;
+	bool msl_vertex_for_tessellation = false;
+	uint32_t msl_additional_fixed_sample_mask = 0xffffffff;
+	bool msl_arrayed_subpass_input = false;
+	uint32_t msl_r32ui_linear_texture_alignment = 4;
+	uint32_t msl_r32ui_alignment_constant_id = 65535;
+	bool msl_texture_1d_as_2d = false;
+	bool msl_ios_use_simdgroup_functions = false;
+	bool msl_emulate_subgroups = false;
+	uint32_t msl_fixed_subgroup_size = 0;
+	bool msl_force_sample_rate_shading = false;
+	bool msl_manual_helper_invocation_updates = true;
+	bool msl_check_discarded_frag_stores = false;
+	const char *msl_combined_sampler_suffix = nullptr;
 	bool glsl_emit_push_constant_as_ubo = false;
 	bool glsl_emit_ubo_as_plain_uniforms = false;
+	bool glsl_force_flattened_io_blocks = false;
+	uint32_t glsl_ovr_multiview_view_count = 0;
+	SmallVector<pair<uint32_t, uint32_t>> glsl_ext_framebuffer_fetch;
+	bool glsl_ext_framebuffer_fetch_noncoherent = false;
+	bool vulkan_glsl_disable_ext_samplerless_texture_functions = false;
 	bool emit_line_directives = false;
+	bool enable_storage_image_qualifier_deduction = true;
+	bool force_zero_initialized_variables = false;
+	bool relax_nan_checks = false;
+	uint32_t force_recompile_max_debug_iterations = 3;
 	SmallVector<uint32_t> msl_discrete_descriptor_sets;
+	SmallVector<uint32_t> msl_device_argument_buffers;
+	SmallVector<pair<uint32_t, uint32_t>> msl_dynamic_buffers;
+	SmallVector<pair<uint32_t, uint32_t>> msl_inline_uniform_blocks;
+	SmallVector<MSLShaderInterfaceVariable> msl_shader_inputs;
+	SmallVector<MSLShaderInterfaceVariable> msl_shader_outputs;
 	SmallVector<PLSArg> pls_in;
 	SmallVector<PLSArg> pls_out;
 	SmallVector<Remap> remaps;
@@ -526,6 +701,9 @@ struct CLIArguments
 	SmallVector<VariableTypeRemap> variable_type_remaps;
 	SmallVector<InterfaceVariableRename> interface_variable_renames;
 	SmallVector<HLSLVertexAttributeRemap> hlsl_attr_remap;
+	SmallVector<HLSLVertexAttributeRemapNamed> hlsl_attr_remap_named;
+	SmallVector<std::pair<uint32_t, uint32_t>> masked_stage_outputs;
+	SmallVector<BuiltIn> masked_stage_builtins;
 	string entry;
 	string entry_stage;
 
@@ -543,7 +721,17 @@ struct CLIArguments
 	bool msl = false;
 	bool hlsl = false;
 	bool hlsl_compat = false;
+
 	bool hlsl_support_nonzero_base = false;
+	bool hlsl_base_vertex_index_explicit_binding = false;
+	uint32_t hlsl_base_vertex_index_register_index = 0;
+	uint32_t hlsl_base_vertex_index_register_space = 0;
+
+	bool hlsl_force_storage_buffer_as_uav = false;
+	bool hlsl_nonwritable_uav_texture_as_srv = false;
+	bool hlsl_enable_16bit_types = false;
+	bool hlsl_flatten_matrix_vertex_input_semantics = false;
+	HLSLBindingFlags hlsl_binding_flags = 0;
 	bool vulkan_semantics = false;
 	bool flatten_multidimensional_arrays = false;
 	bool use_420pack_extension = true;
@@ -560,63 +748,276 @@ static void print_version()
 #endif
 }
 
+static void print_help_backend()
+{
+	// clang-format off
+	fprintf(stderr, "\nSelect backend:\n"
+	        "\tBy default, OpenGL-style GLSL is the target, with #version and GLSL/ESSL information inherited from the SPIR-V module if present.\n"
+	        "\t[--vulkan-semantics] or [-V]:\n\t\tEmit Vulkan GLSL instead of plain GLSL. Makes use of Vulkan-only features to match SPIR-V.\n"
+	        "\t[--msl]:\n\t\tEmit Metal Shading Language (MSL).\n"
+	        "\t[--hlsl]:\n\t\tEmit HLSL.\n"
+	        "\t[--reflect]:\n\t\tEmit JSON reflection.\n"
+	        "\t[--cpp]:\n\t\tDEPRECATED. Emits C++ code.\n"
+	);
+	// clang-format on
+}
+
+static void print_help_glsl()
+{
+	// clang-format off
+	fprintf(stderr, "\nGLSL options:\n"
+	                "\t[--es]:\n\t\tForce ESSL.\n"
+	                "\t[--no-es]:\n\t\tForce desktop GLSL.\n"
+	                "\t[--version <GLSL version>]:\n\t\tE.g. --version 450 will emit '#version 450' in shader.\n"
+	                "\t\tCode generation will depend on the version used.\n"
+	                "\t[--flatten-ubo]:\n\t\tEmit UBOs as plain uniform arrays which are suitable for use with glUniform4*v().\n"
+	                "\t\tThis can be an optimization on GL implementations where this is faster or works around buggy driver implementations.\n"
+	                "\t\tE.g.: uniform MyUBO { vec4 a; float b, c, d, e; }; will be emitted as uniform vec4 MyUBO[2];\n"
+	                "\t\tCaveat: You cannot mix and match floating-point and integer in the same UBO with this option.\n"
+	                "\t\tLegacy GLSL/ESSL (where this flattening makes sense) does not support bit-casting, which would have been the obvious workaround.\n"
+	                "\t[--extension ext]:\n\t\tAdd #extension string of your choosing to GLSL output.\n"
+	                "\t\tUseful if you use variable name remapping to something that requires an extension unknown to SPIRV-Cross.\n"
+	                "\t[--remove-unused-variables]:\n\t\tDo not emit interface variables which are not statically accessed by the shader.\n"
+	                "\t[--separate-shader-objects]:\n\t\tRedeclare gl_PerVertex blocks to be suitable for desktop GL separate shader objects.\n"
+	                "\t[--glsl-emit-push-constant-as-ubo]:\n\t\tInstead of a plain uniform of struct for push constants, emit a UBO block instead.\n"
+	                "\t[--glsl-emit-ubo-as-plain-uniforms]:\n\t\tInstead of emitting UBOs, emit them as plain uniform structs.\n"
+	                "\t[--glsl-remap-ext-framebuffer-fetch input-attachment color-location]:\n\t\tRemaps an input attachment to use GL_EXT_shader_framebuffer_fetch.\n"
+	                "\t\tgl_LastFragData[location] is read from. The attachment to read from must be declared as an output in the shader.\n"
+	                "\t[--glsl-ext-framebuffer-fetch-noncoherent]:\n\t\tUses noncoherent qualifier for framebuffer fetch.\n"
+	                "\t[--vulkan-glsl-disable-ext-samplerless-texture-functions]:\n\t\tDo not allow use of GL_EXT_samperless_texture_functions, even in Vulkan GLSL.\n"
+	                "\t\tUse of texelFetch and similar might have to create dummy samplers to work around it.\n"
+	                "\t[--combined-samplers-inherit-bindings]:\n\t\tInherit binding information from the textures when building combined image samplers from separate textures and samplers.\n"
+	                "\t[--no-support-nonzero-baseinstance]:\n\t\tWhen using gl_InstanceIndex with desktop GL,\n"
+	                "\t\tassume that base instance is always 0, and do not attempt to fix up gl_InstanceID to match Vulkan semantics.\n"
+	                "\t[--pls-in format input-name]:\n\t\tRemaps a subpass input with name into a GL_EXT_pixel_local_storage input.\n"
+	                "\t\tEntry in PLS block is ordered where first --pls-in marks the first entry. Can be called multiple times.\n"
+	                "\t\tFormats allowed: r11f_g11f_b10f, r32f, rg16f, rg16, rgb10_a2, rgba8, rgba8i, rgba8ui, rg16i, rgb10_a2ui, rg16ui, r32ui.\n"
+	                "\t\tRequires ESSL.\n"
+	                "\t[--pls-out format output-name]:\n\t\tRemaps a color output with name into a GL_EXT_pixel_local_storage output.\n"
+	                "\t\tEntry in PLS block is ordered where first --pls-output marks the first entry. Can be called multiple times.\n"
+	                "\t\tFormats allowed: r11f_g11f_b10f, r32f, rg16f, rg16, rgb10_a2, rgba8, rgba8i, rgba8ui, rg16i, rgb10_a2ui, rg16ui, r32ui.\n"
+	                "\t\tRequires ESSL.\n"
+	                "\t[--remap source_name target_name components]:\n\t\tRemaps a variable to a different name with N components.\n"
+	                "\t\tMain use case is to remap a subpass input to gl_LastFragDepthARM.\n"
+	                "\t\tE.g.:\n"
+	                "\t\tuniform subpassInput uDepth;\n"
+	                "\t\t--remap uDepth gl_LastFragDepthARM 1 --extension GL_ARM_shader_framebuffer_fetch_depth_stencil\n"
+	                "\t[--no-420pack-extension]:\n\t\tDo not make use of GL_ARB_shading_language_420pack in older GL targets to support layout(binding).\n"
+	                "\t[--remap-variable-type <variable_name> <new_variable_type>]:\n\t\tRemaps a variable type based on name.\n"
+	                "\t\tPrimary use case is supporting external samplers in ESSL for video rendering on Android where you could remap a texture to a YUV one.\n"
+	                "\t[--glsl-force-flattened-io-blocks]:\n\t\tAlways flatten I/O blocks and structs.\n"
+	                "\t[--glsl-ovr-multiview-view-count count]:\n\t\tIn GL_OVR_multiview2, specify layout(num_views).\n"
+	);
+	// clang-format on
+}
+
+static void print_help_hlsl()
+{
+	// clang-format off
+	fprintf(stderr, "\nHLSL options:\n"
+	                "\t[--shader-model]:\n\t\tEnables a specific shader model, e.g. --shader-model 50 for SM 5.0.\n"
+	                "\t[--flatten-ubo]:\n\t\tEmit UBOs as plain uniform arrays.\n"
+	                "\t\tE.g.: uniform MyUBO { vec4 a; float b, c, d, e; }; will be emitted as uniform float4 MyUBO[2];\n"
+	                "\t\tCaveat: You cannot mix and match floating-point and integer in the same UBO with this option.\n"
+	                "\t[--hlsl-enable-compat]:\n\t\tAllow point size and point coord to be used, even if they won't work as expected.\n"
+	                "\t\tPointSize is ignored, and PointCoord returns (0.5, 0.5).\n"
+	                "\t[--hlsl-support-nonzero-basevertex-baseinstance]:\n\t\tSupport base vertex and base instance by emitting a special cbuffer declared as:\n"
+	                "\t\tcbuffer SPIRV_Cross_VertexInfo { int SPIRV_Cross_BaseVertex; int SPIRV_Cross_BaseInstance; };\n"
+	                "\t[--hlsl-basevertex-baseinstance-binding <register index> <register space>]:\n\t\tAssign a fixed binding to SPIRV_Cross_VertexInfo.\n"
+	                "\t[--hlsl-auto-binding (push, cbv, srv, uav, sampler, all)]\n"
+	                "\t\tDo not emit any : register(#) bindings for specific resource types, and rely on HLSL compiler to assign something.\n"
+	                "\t[--hlsl-force-storage-buffer-as-uav]:\n\t\tAlways emit SSBOs as UAVs, even when marked as read-only.\n"
+	                "\t\tNormally, SSBOs marked with NonWritable will be emitted as SRVs.\n"
+	                "\t[--hlsl-nonwritable-uav-texture-as-srv]:\n\t\tEmit NonWritable storage images as SRV textures instead of UAV.\n"
+	                "\t\tUsing this option messes with the type system. SPIRV-Cross cannot guarantee that this will work.\n"
+	                "\t\tOne major problem area with this feature is function arguments, where we won't know if we're seeing a UAV or SRV.\n"
+	                "\t\tShader must ensure that read/write state is consistent at all call sites.\n"
+	                "\t[--set-hlsl-vertex-input-semantic <location> <semantic>]:\n\t\tEmits a specific vertex input semantic for a given location.\n"
+	                "\t\tOtherwise, TEXCOORD# is used as semantics, where # is location.\n"
+	                "\t[--set-hlsl-named-vertex-input-semantic <name> <semantic>]:\n\t\tEmits a specific vertex input semantic for a given name.\n"
+	                "\t\tOpName reflection information must be intact.\n"
+	                "\t[--hlsl-enable-16bit-types]:\n\t\tEnables native use of half/int16_t/uint16_t and ByteAddressBuffer interaction with these types. Requires SM 6.2.\n"
+	                "\t[--hlsl-flatten-matrix-vertex-input-semantics]:\n\t\tEmits matrix vertex inputs with input semantics as if they were independent vectors, e.g. TEXCOORD{2,3,4} rather than matrix form TEXCOORD2_{0,1,2}.\n"
+	);
+	// clang-format on
+}
+
+static void print_help_msl()
+{
+	// clang-format off
+	fprintf(stderr, "\nMSL options:\n"
+	                "\t[--msl-version <MMmmpp>]:\n\t\tUses a specific MSL version, e.g. --msl-version 20100 for MSL 2.1.\n"
+	                "\t[--msl-capture-output]:\n\t\tWrites geometry varyings to a buffer instead of as stage-outputs.\n"
+	                "\t[--msl-swizzle-texture-samples]:\n\t\tWorks around lack of support for VkImageView component swizzles.\n"
+	                "\t\tThis has a massive impact on performance and bloat. Do not use this unless you are absolutely forced to.\n"
+	                "\t\tTo use this feature, the API side must pass down swizzle buffers.\n"
+	                "\t\tShould only be used by translation layers as a last resort.\n"
+	                "\t\tRecent Metal versions do not require this workaround.\n"
+	                "\t[--msl-ios]:\n\t\tTarget iOS Metal instead of macOS Metal.\n"
+	                "\t[--msl-pad-fragment-output]:\n\t\tAlways emit color outputs as 4-component variables.\n"
+	                "\t\tIn Metal, the fragment shader must emit at least as many components as the render target format.\n"
+	                "\t[--msl-domain-lower-left]:\n\t\tUse a lower-left tessellation domain.\n"
+	                "\t[--msl-argument-buffers]:\n\t\tEmit Metal argument buffers instead of discrete resource bindings.\n"
+	                "\t\tRequires MSL 2.0 to be enabled.\n"
+	                "\t[--msl-argument-buffers-tier]:\n\t\tWhen using Metal argument buffers, indicate the Metal argument buffer tier level supported by the Metal platform.\n"
+	                "\t\tUses same values as Metal MTLArgumentBuffersTier enumeration (0 = Tier1, 1 = Tier2).\n"
+	                "\t\tSetting this value also enables msl-argument-buffers.\n"
+	                "\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n"
+	                "\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n"
+	                "\t\tEmits [[color(N)]] inputs in fragment stage.\n"
+	                "\t\tRequires an Apple GPU.\n"
+	                "\t[--msl-emulate-cube-array]:\n\t\tEmulate cube arrays with 2D array and manual math.\n"
+	                "\t[--msl-discrete-descriptor-set <index>]:\n\t\tWhen using argument buffers, forces a specific descriptor set to be implemented without argument buffers.\n"
+	                "\t\tUseful for implementing push descriptors in emulation layers.\n"
+	                "\t\tCan be used multiple times for each descriptor set in question.\n"
+	                "\t[--msl-device-argument-buffer <descriptor set index>]:\n\t\tUse device address space to hold indirect argument buffers instead of constant.\n"
+	                "\t\tComes up when trying to support argument buffers which are larger than 64 KiB.\n"
+	                "\t[--msl-multiview]:\n\t\tEnable SPV_KHR_multiview emulation.\n"
+	                "\t[--msl-multiview-no-layered-rendering]:\n\t\tDon't set [[render_target_array_index]] in multiview shaders.\n"
+	                "\t\tUseful for devices which don't support layered rendering. Only effective when --msl-multiview is enabled.\n"
+	                "\t[--msl-view-index-from-device-index]:\n\t\tTreat the view index as the device index instead.\n"
+	                "\t\tFor multi-GPU rendering.\n"
+	                "\t[--msl-dispatch-base]:\n\t\tAdd support for vkCmdDispatchBase() or similar APIs.\n"
+	                "\t\tOffsets the workgroup ID based on a buffer.\n"
+	                "\t[--msl-dynamic-buffer <set index> <binding>]:\n\t\tMarks a buffer as having dynamic offset.\n"
+	                "\t\tThe offset is applied in the shader with pointer arithmetic.\n"
+	                "\t\tUseful for argument buffers where it is non-trivial to apply dynamic offset otherwise.\n"
+	                "\t[--msl-inline-uniform-block <set index> <binding>]:\n\t\tIn argument buffers, mark an UBO as being an inline uniform block which is embedded into the argument buffer itself.\n"
+	                "\t[--msl-decoration-binding]:\n\t\tUse SPIR-V bindings directly as MSL bindings.\n"
+	                "\t\tThis does not work in the general case as there is no descriptor set support, and combined image samplers are split up.\n"
+	                "\t\tHowever, if the shader author knows of binding limitations, this option will avoid the need for reflection on Metal side.\n"
+	                "\t[--msl-force-active-argument-buffer-resources]:\n\t\tAlways emit resources which are part of argument buffers.\n"
+	                "\t\tThis makes sure that similar shaders with same resource declarations can share the argument buffer as declaring an argument buffer implies an ABI.\n"
+	                "\t[--msl-force-native-arrays]:\n\t\tRather than implementing array types as a templated value type ala std::array<T>, use plain, native arrays.\n"
+	                "\t\tThis will lead to worse code-gen, but can work around driver bugs on certain driver revisions of certain Intel-based Macbooks where template arrays break.\n"
+	                "\t[--msl-disable-frag-depth-builtin]:\n\t\tDisables FragDepth output. Useful if pipeline does not enable depth, as pipeline creation might otherwise fail.\n"
+	                "\t[--msl-disable-frag-stencil-ref-builtin]:\n\t\tDisable FragStencilRef output. Useful if pipeline does not enable stencil output, as pipeline creation might otherwise fail.\n"
+	                "\t[--msl-enable-frag-output-mask <mask>]:\n\t\tOnly selectively enable fragment outputs. Useful if pipeline does not enable fragment output for certain locations, as pipeline creation might otherwise fail.\n"
+	                "\t[--msl-no-clip-distance-user-varying]:\n\t\tDo not emit user varyings to emulate gl_ClipDistance in fragment shaders.\n"
+	                "\t[--msl-add-shader-input <index> <format> <size> <rate>]:\n\t\tSpecify the format of the shader input at <index>.\n"
+	                "\t\t<format> can be 'any32', 'any16', 'u16', 'u8', or 'other', to indicate a 32-bit opaque value, 16-bit opaque value, 16-bit unsigned integer, 8-bit unsigned integer, "
+	                "or other-typed variable. <size> is the vector length of the variable, which must be greater than or equal to that declared in the shader. <rate> can be 'vertex', "
+	                "'primitive', or 'patch' to indicate a per-vertex, per-primitive, or per-patch variable.\n"
+	                "\t\tUseful if shader stage interfaces don't match up, as pipeline creation might otherwise fail.\n"
+	                "\t[--msl-add-shader-output <index> <format> <size> <rate>]:\n\t\tSpecify the format of the shader output at <index>.\n"
+	                "\t\t<format> can be 'any32', 'any16', 'u16', 'u8', or 'other', to indicate a 32-bit opaque value, 16-bit opaque value, 16-bit unsigned integer, 8-bit unsigned integer, "
+	                "or other-typed variable. <size> is the vector length of the variable, which must be greater than or equal to that declared in the shader. <rate> can be 'vertex', "
+	                "'primitive', or 'patch' to indicate a per-vertex, per-primitive, or per-patch variable.\n"
+	                "\t\tUseful if shader stage interfaces don't match up, as pipeline creation might otherwise fail.\n"
+	                "\t[--msl-shader-input <index> <format> <size>]:\n\t\tSpecify the format of the shader input at <index>.\n"
+	                "\t\t<format> can be 'any32', 'any16', 'u16', 'u8', or 'other', to indicate a 32-bit opaque value, 16-bit opaque value, 16-bit unsigned integer, 8-bit unsigned integer, "
+	                "or other-typed variable. <size> is the vector length of the variable, which must be greater than or equal to that declared in the shader."
+	                "\t\tEquivalent to --msl-add-shader-input with a rate of 'vertex'.\n"
+	                "\t[--msl-shader-output <index> <format> <size>]:\n\t\tSpecify the format of the shader output at <index>.\n"
+	                "\t\t<format> can be 'any32', 'any16', 'u16', 'u8', or 'other', to indicate a 32-bit opaque value, 16-bit opaque value, 16-bit unsigned integer, 8-bit unsigned integer, "
+	                "or other-typed variable. <size> is the vector length of the variable, which must be greater than or equal to that declared in the shader."
+	                "\t\tEquivalent to --msl-add-shader-output with a rate of 'vertex'.\n"
+	                "\t[--msl-raw-buffer-tese-input]:\n\t\tUse raw buffers for tessellation evaluation input.\n"
+	                "\t\tThis allows the use of nested structures and arrays.\n"
+	                "\t\tIn a future version of SPIRV-Cross, this will become the default.\n"
+	                "\t[--msl-multi-patch-workgroup]:\n\t\tUse the new style of tessellation control processing, where multiple patches are processed per workgroup.\n"
+	                "\t\tThis should increase throughput by ensuring all the GPU's SIMD lanes are occupied, but it is not compatible with the old style.\n"
+	                "\t\tIn addition, this style also passes input variables in buffers directly instead of using vertex attribute processing.\n"
+	                "\t\tIn a future version of SPIRV-Cross, this will become the default.\n"
+	                "\t[--msl-vertex-for-tessellation]:\n\t\tWhen handling a vertex shader, marks it as one that will be used with a new-style tessellation control shader.\n"
+	                "\t\tThe vertex shader is output to MSL as a compute kernel which outputs vertices to the buffer in the order they are received, rather than in index order as with --msl-capture-output normally.\n"
+	                "\t[--msl-additional-fixed-sample-mask <mask>]:\n"
+	                "\t\tSet an additional fixed sample mask. If the shader outputs a sample mask, then the final sample mask will be a bitwise AND of the two.\n"
+	                "\t[--msl-arrayed-subpass-input]:\n\t\tAssume that images of dimension SubpassData have multiple layers. Layered input attachments are accessed relative to BuiltInLayer.\n"
+	                "\t\tThis option has no effect if multiview is also enabled.\n"
+	                "\t[--msl-r32ui-linear-texture-align <alignment>]:\n\t\tThe required alignment of linear textures of format MTLPixelFormatR32Uint.\n"
+	                "\t\tThis is used to align the row stride for atomic accesses to such images.\n"
+	                "\t[--msl-r32ui-linear-texture-align-constant-id <id>]:\n\t\tThe function constant ID to use for the linear texture alignment.\n"
+	                "\t\tOn MSL 1.2 or later, you can override the alignment by setting this function constant.\n"
+	                "\t[--msl-texture-1d-as-2d]:\n\t\tEmit Image variables of dimension Dim1D as texture2d.\n"
+	                "\t\tIn Metal, 1D textures do not support all features that 2D textures do. Use this option if your code relies on these features.\n"
+	                "\t[--msl-ios-use-simdgroup-functions]:\n\t\tUse simd_*() functions for subgroup ops instead of quad_*().\n"
+	                "\t\tRecent Apple GPUs support SIMD-groups larger than a quad. Use this option to take advantage of this support.\n"
+	                "\t[--msl-emulate-subgroups]:\n\t\tAssume subgroups of size 1.\n"
+	                "\t\tIntended for Vulkan Portability implementations where Metal support for SIMD-groups is insufficient for true subgroups.\n"
+	                "\t[--msl-fixed-subgroup-size <size>]:\n\t\tAssign a constant <size> to the SubgroupSize builtin.\n"
+	                "\t\tIntended for Vulkan Portability implementations where VK_EXT_subgroup_size_control is not supported or disabled.\n"
+	                "\t\tIf 0, assume variable subgroup size as actually exposed by Metal.\n"
+	                "\t[--msl-force-sample-rate-shading]:\n\t\tForce fragment shaders to run per sample.\n"
+	                "\t\tThis adds a [[sample_id]] parameter if none is already present.\n"
+	                "\t[--msl-no-manual-helper-invocation-updates]:\n\t\tDo not manually update the HelperInvocation builtin when a fragment is discarded.\n"
+	                "\t\tSome Metal devices have a bug where simd_is_helper_thread() does not return true\n"
+	                "\t\tafter the fragment is discarded. This behavior is required by Vulkan and SPIR-V, however.\n"
+	                "\t[--msl-check-discarded-frag-stores]:\n\t\tAdd additional checks to resource stores in a fragment shader.\n"
+	                "\t\tSome Metal devices have a bug where stores to resources from a fragment shader\n"
+	                "\t\tcontinue to execute, even when the fragment is discarded. These checks\n"
+	                "\t\tprevent these stores from executing.\n"
+	                "\t[--msl-combined-sampler-suffix <suffix>]:\n\t\tUses a custom suffix for combined samplers.\n");
+	// clang-format on
+}
+
+static void print_help_common()
+{
+	// clang-format off
+	fprintf(stderr, "\nCommon options:\n"
+	                "\t[--entry name]:\n\t\tUse a specific entry point. By default, the first entry point in the module is used.\n"
+	                "\t[--stage <stage (vert, frag, geom, tesc, tese, comp)>]:\n\t\tForces use of a certain shader stage.\n"
+	                "\t\tCan disambiguate the entry point if more than one entry point exists with same name, but different stage.\n"
+	                "\t[--emit-line-directives]:\n\t\tIf SPIR-V has OpLine directives, aim to emit those accurately in output code as well.\n"
+	                "\t[--rename-entry-point <old> <new> <stage>]:\n\t\tRenames an entry point from what is declared in SPIR-V to code output.\n"
+	                "\t\tMostly relevant for HLSL or MSL.\n"
+	                "\t[--rename-interface-variable <in|out> <location> <new_variable_name>]:\n\t\tRename an interface variable based on location decoration.\n"
+	                "\t[--force-zero-initialized-variables]:\n\t\tForces temporary variables to be initialized to zero.\n"
+	                "\t\tCan be useful in environments where compilers do not allow potentially uninitialized variables.\n"
+	                "\t\tThis usually comes up with Phi temporaries.\n"
+	                "\t[--fixup-clipspace]:\n\t\tFixup Z clip-space at the end of a vertex shader. The behavior is backend-dependent.\n"
+	                "\t\tGLSL: Rewrites [0, w] Z range (D3D/Metal/Vulkan) to GL-style [-w, w].\n"
+	                "\t\tHLSL/MSL: Rewrites [-w, w] Z range (GL) to D3D/Metal/Vulkan-style [0, w].\n"
+	                "\t[--flip-vert-y]:\n\t\tInverts gl_Position.y (or equivalent) at the end of a vertex shader. This is equivalent to using negative viewport height.\n"
+	                "\t[--mask-stage-output-location <location> <component>]:\n"
+	                "\t\tIf a stage output variable with matching location and component is active, optimize away the variable if applicable.\n"
+	                "\t[--mask-stage-output-builtin <Position|PointSize|ClipDistance|CullDistance>]:\n"
+	                "\t\tIf a stage output variable with matching builtin is active, "
+	                "optimize away the variable if it can affect cross-stage linking correctness.\n"
+	                "\t[--relax-nan-checks]:\n\t\tRelax NaN checks for N{Clamp,Min,Max} and ordered vs. unordered compare instructions.\n"
+	);
+	// clang-format on
+}
+
+static void print_help_obscure()
+{
+	// clang-format off
+	fprintf(stderr, "\nObscure options:\n"
+	                "\tThese options are not meant to be used on a regular basis. They have some occasional uses in the test suite.\n"
+
+	                "\t[--force-temporary]:\n\t\tAggressively emit temporary expressions instead of forwarding expressions. Very rarely used and under-tested.\n"
+	                "\t[--revision]:\n\t\tPrints build timestamp and Git commit information (updated when cmake is configured).\n"
+	                "\t[--iterations iter]:\n\t\tRecompiles the same shader over and over, benchmarking related.\n"
+	                "\t[--disable-storage-image-qualifier-deduction]:\n\t\tIf storage images are received without any nonwritable or nonreadable information,\n"""
+	                "\t\tdo not attempt to analyze usage, and always emit read/write state.\n"
+	                "\t[--flatten-multidimensional-arrays]:\n\t\tDo not support multi-dimensional arrays and flatten them to one dimension.\n"
+	                "\t[--cpp-interface-name <name>]:\n\t\tEmit a specific class name in C++ codegen.\n"
+	                "\t[--force-recompile-max-debug-iterations <count>]:\n\t\tAllow compilation loop to run for N loops.\n"
+	                "\t\tCan be used to triage workarounds, but should not be used as a crutch, since it masks an implementation bug.\n"
+	);
+	// clang-format on
+}
+
 static void print_help()
 {
 	print_version();
 
-	fprintf(stderr, "Usage: spirv-cross\n"
-	                "\t[--output <output path>]\n"
-	                "\t[SPIR-V file]\n"
-	                "\t[--es]\n"
-	                "\t[--no-es]\n"
-	                "\t[--version <GLSL version>]\n"
-	                "\t[--dump-resources]\n"
-	                "\t[--help]\n"
-	                "\t[--revision]\n"
-	                "\t[--force-temporary]\n"
-	                "\t[--vulkan-semantics]\n"
-	                "\t[--flatten-ubo]\n"
-	                "\t[--fixup-clipspace]\n"
-	                "\t[--flip-vert-y]\n"
-	                "\t[--iterations iter]\n"
-	                "\t[--cpp]\n"
-	                "\t[--cpp-interface-name <name>]\n"
-	                "\t[--glsl-emit-push-constant-as-ubo]\n"
-	                "\t[--glsl-emit-ubo-as-plain-uniforms]\n"
-	                "\t[--msl]\n"
-	                "\t[--msl-version <MMmmpp>]\n"
-	                "\t[--msl-capture-output]\n"
-	                "\t[--msl-swizzle-texture-samples]\n"
-	                "\t[--msl-ios]\n"
-	                "\t[--msl-pad-fragment-output]\n"
-	                "\t[--msl-domain-lower-left]\n"
-	                "\t[--msl-argument-buffers]\n"
-	                "\t[--msl-texture-buffer-native]\n"
-	                "\t[--msl-discrete-descriptor-set <index>]\n"
-	                "\t[--msl-multiview]\n"
-	                "\t[--hlsl]\n"
-	                "\t[--reflect]\n"
-	                "\t[--shader-model]\n"
-	                "\t[--hlsl-enable-compat]\n"
-	                "\t[--hlsl-support-nonzero-basevertex-baseinstance]\n"
-	                "\t[--separate-shader-objects]\n"
-	                "\t[--pls-in format input-name]\n"
-	                "\t[--pls-out format output-name]\n"
-	                "\t[--remap source_name target_name components]\n"
-	                "\t[--extension ext]\n"
-	                "\t[--entry name]\n"
-	                "\t[--stage <stage (vert, frag, geom, tesc, tese comp)>]\n"
-	                "\t[--remove-unused-variables]\n"
-	                "\t[--flatten-multidimensional-arrays]\n"
-	                "\t[--no-420pack-extension]\n"
-	                "\t[--remap-variable-type <variable_name> <new_variable_type>]\n"
-	                "\t[--rename-interface-variable <in|out> <location> <new_variable_name>]\n"
-	                "\t[--set-hlsl-vertex-input-semantic <location> <semantic>]\n"
-	                "\t[--rename-entry-point <old> <new> <stage>]\n"
-	                "\t[--combined-samplers-inherit-bindings]\n"
-	                "\t[--no-support-nonzero-baseinstance]\n"
-	                "\t[--emit-line-directives]\n"
-	                "\n");
+	// clang-format off
+	fprintf(stderr, "Usage: spirv-cross <...>\n"
+	                "\nBasic:\n"
+	                "\t[SPIR-V file] (- is stdin)\n"
+	                "\t[--output <output path>]: If not provided, prints output to stdout.\n"
+	                "\t[--dump-resources]:\n\t\tPrints a basic reflection of the SPIR-V module along with other output.\n"
+	                "\t[--help]:\n\t\tPrints this help message.\n"
+	);
+	// clang-format on
+
+	print_help_backend();
+	print_help_common();
+	print_help_glsl();
+	print_help_msl();
+	print_help_hlsl();
+	print_help_obscure();
 }
 
 static bool remap_generic(Compiler &compiler, const SmallVector<Resource> &resources, const Remap &remap)
@@ -717,13 +1118,50 @@ static ExecutionModel stage_to_execution_model(const std::string &stage)
 		return ExecutionModelTessellationEvaluation;
 	else if (stage == "geom")
 		return ExecutionModelGeometry;
+	else if (stage == "rgen")
+		return ExecutionModelRayGenerationKHR;
+	else if (stage == "rint")
+		return ExecutionModelIntersectionKHR;
+	else if (stage == "rahit")
+		return ExecutionModelAnyHitKHR;
+	else if (stage == "rchit")
+		return ExecutionModelClosestHitKHR;
+	else if (stage == "rmiss")
+		return ExecutionModelMissKHR;
+	else if (stage == "rcall")
+		return ExecutionModelCallableKHR;
+	else if (stage == "mesh")
+		return spv::ExecutionModelMeshEXT;
+	else if (stage == "task")
+		return spv::ExecutionModelTaskEXT;
 	else
 		SPIRV_CROSS_THROW("Invalid stage.");
 }
 
+static HLSLBindingFlags hlsl_resource_type_to_flag(const std::string &arg)
+{
+	if (arg == "push")
+		return HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT;
+	else if (arg == "cbv")
+		return HLSL_BINDING_AUTO_CBV_BIT;
+	else if (arg == "srv")
+		return HLSL_BINDING_AUTO_SRV_BIT;
+	else if (arg == "uav")
+		return HLSL_BINDING_AUTO_UAV_BIT;
+	else if (arg == "sampler")
+		return HLSL_BINDING_AUTO_SAMPLER_BIT;
+	else if (arg == "all")
+		return HLSL_BINDING_AUTO_ALL;
+	else
+	{
+		fprintf(stderr, "Invalid resource type for --hlsl-auto-binding: %s\n", arg.c_str());
+		return 0;
+	}
+}
+
 static string compile_iteration(const CLIArguments &args, std::vector<uint32_t> spirv_file)
 {
-	Parser spirv_parser(move(spirv_file));
+	Parser spirv_parser(std::move(spirv_file));
 	spirv_parser.parse();
 
 	unique_ptr<CompilerGLSL> compiler;
@@ -732,13 +1170,13 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 
 	if (args.cpp)
 	{
-		compiler.reset(new CompilerCPP(move(spirv_parser.get_parsed_ir())));
+		compiler.reset(new CompilerCPP(std::move(spirv_parser.get_parsed_ir())));
 		if (args.cpp_interface_name)
 			static_cast<CompilerCPP *>(compiler.get())->set_interface_name(args.cpp_interface_name);
 	}
 	else if (args.msl)
 	{
-		compiler.reset(new CompilerMSL(move(spirv_parser.get_parsed_ir())));
+		compiler.reset(new CompilerMSL(std::move(spirv_parser.get_parsed_ir())));
 
 		auto *msl_comp = static_cast<CompilerMSL *>(compiler.get());
 		auto msl_opts = msl_comp->get_msl_options();
@@ -746,25 +1184,69 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 			msl_opts.msl_version = args.msl_version;
 		msl_opts.capture_output_to_buffer = args.msl_capture_output_to_buffer;
 		msl_opts.swizzle_texture_samples = args.msl_swizzle_texture_samples;
+		msl_opts.invariant_float_math = args.msl_invariant_float_math;
 		if (args.msl_ios)
+		{
 			msl_opts.platform = CompilerMSL::Options::iOS;
+			msl_opts.emulate_cube_array = args.msl_emulate_cube_array;
+		}
+		msl_opts.use_framebuffer_fetch_subpasses = args.msl_framebuffer_fetch;
 		msl_opts.pad_fragment_output_components = args.msl_pad_fragment_output;
 		msl_opts.tess_domain_origin_lower_left = args.msl_domain_lower_left;
 		msl_opts.argument_buffers = args.msl_argument_buffers;
+		msl_opts.argument_buffers_tier = static_cast<CompilerMSL::Options::ArgumentBuffersTier>(args.msl_argument_buffers_tier);
 		msl_opts.texture_buffer_native = args.msl_texture_buffer_native;
 		msl_opts.multiview = args.msl_multiview;
+		msl_opts.multiview_layered_rendering = args.msl_multiview_layered_rendering;
+		msl_opts.view_index_from_device_index = args.msl_view_index_from_device_index;
+		msl_opts.dispatch_base = args.msl_dispatch_base;
+		msl_opts.enable_decoration_binding = args.msl_decoration_binding;
+		msl_opts.force_active_argument_buffer_resources = args.msl_force_active_argument_buffer_resources;
+		msl_opts.force_native_arrays = args.msl_force_native_arrays;
+		msl_opts.enable_frag_depth_builtin = args.msl_enable_frag_depth_builtin;
+		msl_opts.enable_frag_stencil_ref_builtin = args.msl_enable_frag_stencil_ref_builtin;
+		msl_opts.enable_frag_output_mask = args.msl_enable_frag_output_mask;
+		msl_opts.enable_clip_distance_user_varying = args.msl_enable_clip_distance_user_varying;
+		msl_opts.raw_buffer_tese_input = args.msl_raw_buffer_tese_input;
+		msl_opts.multi_patch_workgroup = args.msl_multi_patch_workgroup;
+		msl_opts.vertex_for_tessellation = args.msl_vertex_for_tessellation;
+		msl_opts.additional_fixed_sample_mask = args.msl_additional_fixed_sample_mask;
+		msl_opts.arrayed_subpass_input = args.msl_arrayed_subpass_input;
+		msl_opts.r32ui_linear_texture_alignment = args.msl_r32ui_linear_texture_alignment;
+		msl_opts.r32ui_alignment_constant_id = args.msl_r32ui_alignment_constant_id;
+		msl_opts.texture_1D_as_2D = args.msl_texture_1d_as_2d;
+		msl_opts.ios_use_simdgroup_functions = args.msl_ios_use_simdgroup_functions;
+		msl_opts.emulate_subgroups = args.msl_emulate_subgroups;
+		msl_opts.fixed_subgroup_size = args.msl_fixed_subgroup_size;
+		msl_opts.force_sample_rate_shading = args.msl_force_sample_rate_shading;
+		msl_opts.manual_helper_invocation_updates = args.msl_manual_helper_invocation_updates;
+		msl_opts.check_discarded_frag_stores = args.msl_check_discarded_frag_stores;
+		msl_opts.ios_support_base_vertex_instance = true;
 		msl_comp->set_msl_options(msl_opts);
 		for (auto &v : args.msl_discrete_descriptor_sets)
 			msl_comp->add_discrete_descriptor_set(v);
+		for (auto &v : args.msl_device_argument_buffers)
+			msl_comp->set_argument_buffer_device_address_space(v, true);
+		uint32_t i = 0;
+		for (auto &v : args.msl_dynamic_buffers)
+			msl_comp->add_dynamic_buffer(v.first, v.second, i++);
+		for (auto &v : args.msl_inline_uniform_blocks)
+			msl_comp->add_inline_uniform_block(v.first, v.second);
+		for (auto &v : args.msl_shader_inputs)
+			msl_comp->add_msl_shader_input(v);
+		for (auto &v : args.msl_shader_outputs)
+			msl_comp->add_msl_shader_output(v);
+		if (args.msl_combined_sampler_suffix)
+			msl_comp->set_combined_sampler_suffix(args.msl_combined_sampler_suffix);
 	}
 	else if (args.hlsl)
-		compiler.reset(new CompilerHLSL(move(spirv_parser.get_parsed_ir())));
+		compiler.reset(new CompilerHLSL(std::move(spirv_parser.get_parsed_ir())));
 	else
 	{
 		combined_image_samplers = !args.vulkan_semantics;
-		if (!args.vulkan_semantics)
+		if (!args.vulkan_semantics || args.vulkan_glsl_disable_ext_samplerless_texture_functions)
 			build_dummy_sampler = true;
-		compiler.reset(new CompilerGLSL(move(spirv_parser.get_parsed_ir())));
+		compiler.reset(new CompilerGLSL(std::move(spirv_parser.get_parsed_ir())));
 	}
 
 	if (!args.variable_type_remaps.empty())
@@ -775,9 +1257,14 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 					out = remap.new_variable_type;
 		};
 
-		compiler->set_variable_type_remap_callback(move(remap_cb));
+		compiler->set_variable_type_remap_callback(std::move(remap_cb));
 	}
 
+	for (auto &masked : args.masked_stage_outputs)
+		compiler->mask_stage_output_by_location(masked.first, masked.second);
+	for (auto &masked : args.masked_stage_builtins)
+		compiler->mask_stage_output_by_builtin(masked);
+
 	for (auto &rename : args.entry_point_rename)
 		compiler->rename_entry_point(rename.old_name, rename.new_name, rename.execution_model);
 
@@ -878,9 +1365,18 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 	opts.vertex.support_nonzero_base_instance = args.support_nonzero_baseinstance;
 	opts.emit_push_constant_as_uniform_buffer = args.glsl_emit_push_constant_as_ubo;
 	opts.emit_uniform_buffer_as_plain_uniforms = args.glsl_emit_ubo_as_plain_uniforms;
+	opts.force_flattened_io_blocks = args.glsl_force_flattened_io_blocks;
+	opts.ovr_multiview_view_count = args.glsl_ovr_multiview_view_count;
 	opts.emit_line_directives = args.emit_line_directives;
+	opts.enable_storage_image_qualifier_deduction = args.enable_storage_image_qualifier_deduction;
+	opts.force_zero_initialized_variables = args.force_zero_initialized_variables;
+	opts.relax_nan_checks = args.relax_nan_checks;
+	opts.force_recompile_max_debug_iterations = args.force_recompile_max_debug_iterations;
 	compiler->set_common_options(opts);
 
+	for (auto &fetch : args.glsl_ext_framebuffer_fetch)
+		compiler->remap_ext_framebuffer_fetch(fetch.first, fetch.second, !args.glsl_ext_framebuffer_fetch_noncoherent);
+
 	// Set HLSL specific options.
 	if (args.hlsl)
 	{
@@ -910,8 +1406,23 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 			build_dummy_sampler = true;
 		}
 
+		// If we're explicitly renaming, we probably want that name to be output.
+		if (!args.entry_point_rename.empty())
+			hlsl_opts.use_entry_point_name = true;
+
 		hlsl_opts.support_nonzero_base_vertex_base_instance = args.hlsl_support_nonzero_base;
+		hlsl_opts.force_storage_buffer_as_uav = args.hlsl_force_storage_buffer_as_uav;
+		hlsl_opts.nonwritable_uav_texture_as_srv = args.hlsl_nonwritable_uav_texture_as_srv;
+		hlsl_opts.enable_16bit_types = args.hlsl_enable_16bit_types;
+		hlsl_opts.flatten_matrix_vertex_input_semantics = args.hlsl_flatten_matrix_vertex_input_semantics;
 		hlsl->set_hlsl_options(hlsl_opts);
+		hlsl->set_resource_binding_flags(args.hlsl_binding_flags);
+		if (args.hlsl_base_vertex_index_explicit_binding)
+		{
+			hlsl->set_hlsl_aux_buffer_binding(HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE,
+			                                  args.hlsl_base_vertex_index_register_index,
+			                                  args.hlsl_base_vertex_index_register_space);
+		}
 	}
 
 	if (build_dummy_sampler)
@@ -930,7 +1441,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 	{
 		auto active = compiler->get_active_interface_variables();
 		res = compiler->get_shader_resources(active);
-		compiler->set_enabled_interface_variables(move(active));
+		compiler->set_enabled_interface_variables(std::move(active));
 	}
 	else
 		res = compiler->get_shader_resources();
@@ -945,7 +1456,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 
 	auto pls_inputs = remap_pls(args.pls_in, res.stage_inputs, &res.subpass_inputs);
 	auto pls_outputs = remap_pls(args.pls_out, res.stage_outputs, nullptr);
-	compiler->remap_pixel_local_storage(move(pls_inputs), move(pls_outputs));
+	compiler->remap_pixel_local_storage(std::move(pls_inputs), std::move(pls_outputs));
 
 	for (auto &ext : args.extensions)
 		compiler->require_extension(ext);
@@ -975,14 +1486,6 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 		}
 	}
 
-	if (args.dump_resources)
-	{
-		print_resources(*compiler, res);
-		print_push_constant_resources(*compiler, res.push_constant_buffers);
-		print_spec_constants(*compiler);
-		print_capabilities_and_extensions(*compiler);
-	}
-
 	if (combined_image_samplers)
 	{
 		compiler->build_combined_image_samplers();
@@ -1000,21 +1503,43 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 	if (args.hlsl)
 	{
 		auto *hlsl_compiler = static_cast<CompilerHLSL *>(compiler.get());
-		uint32_t new_builtin = hlsl_compiler->remap_num_workgroups_builtin();
-		if (new_builtin)
-		{
-			hlsl_compiler->set_decoration(new_builtin, DecorationDescriptorSet, 0);
-			hlsl_compiler->set_decoration(new_builtin, DecorationBinding, 0);
-		}
+		hlsl_compiler->remap_num_workgroups_builtin();
 	}
 
 	if (args.hlsl)
 	{
 		for (auto &remap : args.hlsl_attr_remap)
 			static_cast<CompilerHLSL *>(compiler.get())->add_vertex_attribute_remap(remap);
+
+		for (auto &named_remap : args.hlsl_attr_remap_named)
+		{
+			auto itr = std::find_if(res.stage_inputs.begin(), res.stage_inputs.end(), [&](const Resource &input_res) {
+				return input_res.name == named_remap.name;
+			});
+
+			if (itr != res.stage_inputs.end())
+			{
+				HLSLVertexAttributeRemap remap = {
+					compiler->get_decoration(itr->id, DecorationLocation),
+					named_remap.semantic,
+				};
+				static_cast<CompilerHLSL *>(compiler.get())->add_vertex_attribute_remap(remap);
+			}
+		}
 	}
 
-	return compiler->compile();
+	auto ret = compiler->compile();
+
+	if (args.dump_resources)
+	{
+		compiler->update_active_builtins();
+		print_resources(*compiler, res);
+		print_push_constant_resources(*compiler, res.push_constant_buffers);
+		print_spec_constants(*compiler);
+		print_capabilities_and_extensions(*compiler);
+	}
+
+	return ret;
 }
 
 static int main_inner(int argc, char *argv[])
@@ -1055,12 +1580,44 @@ static int main_inner(int argc, char *argv[])
 	cbs.add("--metal", [&args](CLIParser &) { args.msl = true; }); // Legacy compatibility
 	cbs.add("--glsl-emit-push-constant-as-ubo", [&args](CLIParser &) { args.glsl_emit_push_constant_as_ubo = true; });
 	cbs.add("--glsl-emit-ubo-as-plain-uniforms", [&args](CLIParser &) { args.glsl_emit_ubo_as_plain_uniforms = true; });
+	cbs.add("--glsl-force-flattened-io-blocks", [&args](CLIParser &) { args.glsl_force_flattened_io_blocks = true; });
+	cbs.add("--glsl-ovr-multiview-view-count", [&args](CLIParser &parser) { args.glsl_ovr_multiview_view_count = parser.next_uint(); });
+	cbs.add("--glsl-remap-ext-framebuffer-fetch", [&args](CLIParser &parser) {
+		uint32_t input_index = parser.next_uint();
+		uint32_t color_attachment = parser.next_uint();
+		args.glsl_ext_framebuffer_fetch.push_back({ input_index, color_attachment });
+	});
+	cbs.add("--glsl-ext-framebuffer-fetch-noncoherent", [&args](CLIParser &) {
+		args.glsl_ext_framebuffer_fetch_noncoherent = true;
+	});
+	cbs.add("--vulkan-glsl-disable-ext-samplerless-texture-functions",
+	        [&args](CLIParser &) { args.vulkan_glsl_disable_ext_samplerless_texture_functions = true; });
+	cbs.add("--disable-storage-image-qualifier-deduction",
+	        [&args](CLIParser &) { args.enable_storage_image_qualifier_deduction = false; });
+	cbs.add("--force-zero-initialized-variables",
+	        [&args](CLIParser &) { args.force_zero_initialized_variables = true; });
 	cbs.add("--msl", [&args](CLIParser &) { args.msl = true; });
 	cbs.add("--hlsl", [&args](CLIParser &) { args.hlsl = true; });
 	cbs.add("--hlsl-enable-compat", [&args](CLIParser &) { args.hlsl_compat = true; });
 	cbs.add("--hlsl-support-nonzero-basevertex-baseinstance",
 	        [&args](CLIParser &) { args.hlsl_support_nonzero_base = true; });
+	cbs.add("--hlsl-basevertex-baseinstance-binding", [&args](CLIParser &parser) {
+		args.hlsl_base_vertex_index_explicit_binding = true;
+		args.hlsl_base_vertex_index_register_index = parser.next_uint();
+		args.hlsl_base_vertex_index_register_space = parser.next_uint();
+	});
+	cbs.add("--hlsl-auto-binding", [&args](CLIParser &parser) {
+		args.hlsl_binding_flags |= hlsl_resource_type_to_flag(parser.next_string());
+	});
+	cbs.add("--hlsl-force-storage-buffer-as-uav",
+	        [&args](CLIParser &) { args.hlsl_force_storage_buffer_as_uav = true; });
+	cbs.add("--hlsl-nonwritable-uav-texture-as-srv",
+	        [&args](CLIParser &) { args.hlsl_nonwritable_uav_texture_as_srv = true; });
+	cbs.add("--hlsl-enable-16bit-types", [&args](CLIParser &) { args.hlsl_enable_16bit_types = true; });
+	cbs.add("--hlsl-flatten-matrix-vertex-input-semantics",
+	        [&args](CLIParser &) { args.hlsl_flatten_matrix_vertex_input_semantics = true; });
 	cbs.add("--vulkan-semantics", [&args](CLIParser &) { args.vulkan_semantics = true; });
+	cbs.add("-V", [&args](CLIParser &) { args.vulkan_semantics = true; });
 	cbs.add("--flatten-multidimensional-arrays", [&args](CLIParser &) { args.flatten_multidimensional_arrays = true; });
 	cbs.add("--no-420pack-extension", [&args](CLIParser &) { args.use_420pack_extension = false; });
 	cbs.add("--msl-capture-output", [&args](CLIParser &) { args.msl_capture_output_to_buffer = true; });
@@ -1069,16 +1626,163 @@ static int main_inner(int argc, char *argv[])
 	cbs.add("--msl-pad-fragment-output", [&args](CLIParser &) { args.msl_pad_fragment_output = true; });
 	cbs.add("--msl-domain-lower-left", [&args](CLIParser &) { args.msl_domain_lower_left = true; });
 	cbs.add("--msl-argument-buffers", [&args](CLIParser &) { args.msl_argument_buffers = true; });
+	cbs.add("--msl-argument-buffer-tier", [&args](CLIParser &parser) {
+		args.msl_argument_buffers_tier = parser.next_uint();
+		args.msl_argument_buffers = true;
+	});
 	cbs.add("--msl-discrete-descriptor-set",
 	        [&args](CLIParser &parser) { args.msl_discrete_descriptor_sets.push_back(parser.next_uint()); });
+	cbs.add("--msl-device-argument-buffer",
+	        [&args](CLIParser &parser) { args.msl_device_argument_buffers.push_back(parser.next_uint()); });
 	cbs.add("--msl-texture-buffer-native", [&args](CLIParser &) { args.msl_texture_buffer_native = true; });
+	cbs.add("--msl-framebuffer-fetch", [&args](CLIParser &) { args.msl_framebuffer_fetch = true; });
+	cbs.add("--msl-invariant-float-math", [&args](CLIParser &) { args.msl_invariant_float_math = true; });
+	cbs.add("--msl-emulate-cube-array", [&args](CLIParser &) { args.msl_emulate_cube_array = true; });
 	cbs.add("--msl-multiview", [&args](CLIParser &) { args.msl_multiview = true; });
+	cbs.add("--msl-multiview-no-layered-rendering",
+	        [&args](CLIParser &) { args.msl_multiview_layered_rendering = false; });
+	cbs.add("--msl-view-index-from-device-index",
+	        [&args](CLIParser &) { args.msl_view_index_from_device_index = true; });
+	cbs.add("--msl-dispatch-base", [&args](CLIParser &) { args.msl_dispatch_base = true; });
+	cbs.add("--msl-dynamic-buffer", [&args](CLIParser &parser) {
+		args.msl_argument_buffers = true;
+		// Make sure next_uint() is called in-order.
+		uint32_t desc_set = parser.next_uint();
+		uint32_t binding = parser.next_uint();
+		args.msl_dynamic_buffers.push_back(make_pair(desc_set, binding));
+	});
+	cbs.add("--msl-decoration-binding", [&args](CLIParser &) { args.msl_decoration_binding = true; });
+	cbs.add("--msl-force-active-argument-buffer-resources",
+	        [&args](CLIParser &) { args.msl_force_active_argument_buffer_resources = true; });
+	cbs.add("--msl-inline-uniform-block", [&args](CLIParser &parser) {
+		args.msl_argument_buffers = true;
+		// Make sure next_uint() is called in-order.
+		uint32_t desc_set = parser.next_uint();
+		uint32_t binding = parser.next_uint();
+		args.msl_inline_uniform_blocks.push_back(make_pair(desc_set, binding));
+	});
+	cbs.add("--msl-force-native-arrays", [&args](CLIParser &) { args.msl_force_native_arrays = true; });
+	cbs.add("--msl-disable-frag-depth-builtin", [&args](CLIParser &) { args.msl_enable_frag_depth_builtin = false; });
+	cbs.add("--msl-disable-frag-stencil-ref-builtin",
+	        [&args](CLIParser &) { args.msl_enable_frag_stencil_ref_builtin = false; });
+	cbs.add("--msl-enable-frag-output-mask",
+	        [&args](CLIParser &parser) { args.msl_enable_frag_output_mask = parser.next_hex_uint(); });
+	cbs.add("--msl-no-clip-distance-user-varying",
+	        [&args](CLIParser &) { args.msl_enable_clip_distance_user_varying = false; });
+	cbs.add("--msl-add-shader-input", [&args](CLIParser &parser) {
+		MSLShaderInterfaceVariable input;
+		// Make sure next_uint() is called in-order.
+		input.location = parser.next_uint();
+		const char *format = parser.next_value_string("other");
+		if (strcmp(format, "any32") == 0)
+			input.format = MSL_SHADER_VARIABLE_FORMAT_ANY32;
+		else if (strcmp(format, "any16") == 0)
+			input.format = MSL_SHADER_VARIABLE_FORMAT_ANY16;
+		else if (strcmp(format, "u16") == 0)
+			input.format = MSL_SHADER_VARIABLE_FORMAT_UINT16;
+		else if (strcmp(format, "u8") == 0)
+			input.format = MSL_SHADER_VARIABLE_FORMAT_UINT8;
+		else
+			input.format = MSL_SHADER_VARIABLE_FORMAT_OTHER;
+		input.vecsize = parser.next_uint();
+		const char *rate = parser.next_value_string("vertex");
+		if (strcmp(rate, "primitive") == 0)
+			input.rate = MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE;
+		else if (strcmp(rate, "patch") == 0)
+			input.rate = MSL_SHADER_VARIABLE_RATE_PER_PATCH;
+		else
+			input.rate = MSL_SHADER_VARIABLE_RATE_PER_VERTEX;
+		args.msl_shader_inputs.push_back(input);
+	});
+	cbs.add("--msl-add-shader-output", [&args](CLIParser &parser) {
+		MSLShaderInterfaceVariable output;
+		// Make sure next_uint() is called in-order.
+		output.location = parser.next_uint();
+		const char *format = parser.next_value_string("other");
+		if (strcmp(format, "any32") == 0)
+			output.format = MSL_SHADER_VARIABLE_FORMAT_ANY32;
+		else if (strcmp(format, "any16") == 0)
+			output.format = MSL_SHADER_VARIABLE_FORMAT_ANY16;
+		else if (strcmp(format, "u16") == 0)
+			output.format = MSL_SHADER_VARIABLE_FORMAT_UINT16;
+		else if (strcmp(format, "u8") == 0)
+			output.format = MSL_SHADER_VARIABLE_FORMAT_UINT8;
+		else
+			output.format = MSL_SHADER_VARIABLE_FORMAT_OTHER;
+		output.vecsize = parser.next_uint();
+		const char *rate = parser.next_value_string("vertex");
+		if (strcmp(rate, "primitive") == 0)
+			output.rate = MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE;
+		else if (strcmp(rate, "patch") == 0)
+			output.rate = MSL_SHADER_VARIABLE_RATE_PER_PATCH;
+		else
+			output.rate = MSL_SHADER_VARIABLE_RATE_PER_VERTEX;
+		args.msl_shader_outputs.push_back(output);
+	});
+	cbs.add("--msl-shader-input", [&args](CLIParser &parser) {
+		MSLShaderInterfaceVariable input;
+		// Make sure next_uint() is called in-order.
+		input.location = parser.next_uint();
+		const char *format = parser.next_value_string("other");
+		if (strcmp(format, "any32") == 0)
+			input.format = MSL_SHADER_VARIABLE_FORMAT_ANY32;
+		else if (strcmp(format, "any16") == 0)
+			input.format = MSL_SHADER_VARIABLE_FORMAT_ANY16;
+		else if (strcmp(format, "u16") == 0)
+			input.format = MSL_SHADER_VARIABLE_FORMAT_UINT16;
+		else if (strcmp(format, "u8") == 0)
+			input.format = MSL_SHADER_VARIABLE_FORMAT_UINT8;
+		else
+			input.format = MSL_SHADER_VARIABLE_FORMAT_OTHER;
+		input.vecsize = parser.next_uint();
+		args.msl_shader_inputs.push_back(input);
+	});
+	cbs.add("--msl-shader-output", [&args](CLIParser &parser) {
+		MSLShaderInterfaceVariable output;
+		// Make sure next_uint() is called in-order.
+		output.location = parser.next_uint();
+		const char *format = parser.next_value_string("other");
+		if (strcmp(format, "any32") == 0)
+			output.format = MSL_SHADER_VARIABLE_FORMAT_ANY32;
+		else if (strcmp(format, "any16") == 0)
+			output.format = MSL_SHADER_VARIABLE_FORMAT_ANY16;
+		else if (strcmp(format, "u16") == 0)
+			output.format = MSL_SHADER_VARIABLE_FORMAT_UINT16;
+		else if (strcmp(format, "u8") == 0)
+			output.format = MSL_SHADER_VARIABLE_FORMAT_UINT8;
+		else
+			output.format = MSL_SHADER_VARIABLE_FORMAT_OTHER;
+		output.vecsize = parser.next_uint();
+		args.msl_shader_outputs.push_back(output);
+	});
+	cbs.add("--msl-raw-buffer-tese-input", [&args](CLIParser &) { args.msl_raw_buffer_tese_input = true; });
+	cbs.add("--msl-multi-patch-workgroup", [&args](CLIParser &) { args.msl_multi_patch_workgroup = true; });
+	cbs.add("--msl-vertex-for-tessellation", [&args](CLIParser &) { args.msl_vertex_for_tessellation = true; });
+	cbs.add("--msl-additional-fixed-sample-mask",
+	        [&args](CLIParser &parser) { args.msl_additional_fixed_sample_mask = parser.next_hex_uint(); });
+	cbs.add("--msl-arrayed-subpass-input", [&args](CLIParser &) { args.msl_arrayed_subpass_input = true; });
+	cbs.add("--msl-r32ui-linear-texture-align",
+	        [&args](CLIParser &parser) { args.msl_r32ui_linear_texture_alignment = parser.next_uint(); });
+	cbs.add("--msl-r32ui-linear-texture-align-constant-id",
+	        [&args](CLIParser &parser) { args.msl_r32ui_alignment_constant_id = parser.next_uint(); });
+	cbs.add("--msl-texture-1d-as-2d", [&args](CLIParser &) { args.msl_texture_1d_as_2d = true; });
+	cbs.add("--msl-ios-use-simdgroup-functions", [&args](CLIParser &) { args.msl_ios_use_simdgroup_functions = true; });
+	cbs.add("--msl-emulate-subgroups", [&args](CLIParser &) { args.msl_emulate_subgroups = true; });
+	cbs.add("--msl-fixed-subgroup-size",
+	        [&args](CLIParser &parser) { args.msl_fixed_subgroup_size = parser.next_uint(); });
+	cbs.add("--msl-force-sample-rate-shading", [&args](CLIParser &) { args.msl_force_sample_rate_shading = true; });
+	cbs.add("--msl-no-manual-helper-invocation-updates",
+	        [&args](CLIParser &) { args.msl_manual_helper_invocation_updates = false; });
+	cbs.add("--msl-check-discarded-frag-stores", [&args](CLIParser &) { args.msl_check_discarded_frag_stores = true; });
+	cbs.add("--msl-combined-sampler-suffix", [&args](CLIParser &parser) {
+		args.msl_combined_sampler_suffix = parser.next_string();
+	});
 	cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
 	cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
 		auto old_name = parser.next_string();
 		auto new_name = parser.next_string();
 		auto model = stage_to_execution_model(parser.next_string());
-		args.entry_point_rename.push_back({ old_name, new_name, move(model) });
+		args.entry_point_rename.push_back({ old_name, new_name, std::move(model) });
 	});
 	cbs.add("--entry", [&args](CLIParser &parser) { args.entry = parser.next_string(); });
 	cbs.add("--stage", [&args](CLIParser &parser) { args.entry_stage = parser.next_string(); });
@@ -1087,20 +1791,26 @@ static int main_inner(int argc, char *argv[])
 		HLSLVertexAttributeRemap remap;
 		remap.location = parser.next_uint();
 		remap.semantic = parser.next_string();
-		args.hlsl_attr_remap.push_back(move(remap));
+		args.hlsl_attr_remap.push_back(std::move(remap));
+	});
+	cbs.add("--set-hlsl-named-vertex-input-semantic", [&args](CLIParser &parser) {
+		HLSLVertexAttributeRemapNamed remap;
+		remap.name = parser.next_string();
+		remap.semantic = parser.next_string();
+		args.hlsl_attr_remap_named.push_back(std::move(remap));
 	});
 
 	cbs.add("--remap", [&args](CLIParser &parser) {
 		string src = parser.next_string();
 		string dst = parser.next_string();
 		uint32_t components = parser.next_uint();
-		args.remaps.push_back({ move(src), move(dst), components });
+		args.remaps.push_back({ std::move(src), std::move(dst), components });
 	});
 
 	cbs.add("--remap-variable-type", [&args](CLIParser &parser) {
 		string var_name = parser.next_string();
 		string new_type = parser.next_string();
-		args.variable_type_remaps.push_back({ move(var_name), move(new_type) });
+		args.variable_type_remaps.push_back({ std::move(var_name), std::move(new_type) });
 	});
 
 	cbs.add("--rename-interface-variable", [&args](CLIParser &parser) {
@@ -1113,18 +1823,18 @@ static int main_inner(int argc, char *argv[])
 
 		uint32_t loc = parser.next_uint();
 		string var_name = parser.next_string();
-		args.interface_variable_renames.push_back({ cls, loc, move(var_name) });
+		args.interface_variable_renames.push_back({ cls, loc, std::move(var_name) });
 	});
 
 	cbs.add("--pls-in", [&args](CLIParser &parser) {
 		auto fmt = pls_format(parser.next_string());
 		auto name = parser.next_string();
-		args.pls_in.push_back({ move(fmt), move(name) });
+		args.pls_in.push_back({ std::move(fmt), std::move(name) });
 	});
 	cbs.add("--pls-out", [&args](CLIParser &parser) {
 		auto fmt = pls_format(parser.next_string());
 		auto name = parser.next_string();
-		args.pls_out.push_back({ move(fmt), move(name) });
+		args.pls_out.push_back({ std::move(fmt), std::move(name) });
 	});
 	cbs.add("--shader-model", [&args](CLIParser &parser) {
 		args.shader_model = parser.next_uint();
@@ -1142,10 +1852,42 @@ static int main_inner(int argc, char *argv[])
 	cbs.add("--no-support-nonzero-baseinstance", [&](CLIParser &) { args.support_nonzero_baseinstance = false; });
 	cbs.add("--emit-line-directives", [&args](CLIParser &) { args.emit_line_directives = true; });
 
+	cbs.add("--mask-stage-output-location", [&](CLIParser &parser) {
+		uint32_t location = parser.next_uint();
+		uint32_t component = parser.next_uint();
+		args.masked_stage_outputs.push_back({ location, component });
+	});
+
+	cbs.add("--mask-stage-output-builtin", [&](CLIParser &parser) {
+		BuiltIn masked_builtin = BuiltInMax;
+		std::string builtin = parser.next_string();
+		if (builtin == "Position")
+			masked_builtin = BuiltInPosition;
+		else if (builtin == "PointSize")
+			masked_builtin = BuiltInPointSize;
+		else if (builtin == "CullDistance")
+			masked_builtin = BuiltInCullDistance;
+		else if (builtin == "ClipDistance")
+			masked_builtin = BuiltInClipDistance;
+		else
+		{
+			print_help();
+			exit(EXIT_FAILURE);
+		}
+		args.masked_stage_builtins.push_back(masked_builtin);
+	});
+
+	cbs.add("--force-recompile-max-debug-iterations", [&](CLIParser &parser) {
+		args.force_recompile_max_debug_iterations = parser.next_uint();
+	});
+
+	cbs.add("--relax-nan-checks", [&](CLIParser &) { args.relax_nan_checks = true; });
+
 	cbs.default_handler = [&args](const char *value) { args.input = value; };
+	cbs.add("-", [&args](CLIParser &) { args.input = "-"; });
 	cbs.error_handler = [] { print_help(); };
 
-	CLIParser parser{ move(cbs), argc - 1, argv + 1 };
+	CLIParser parser{ std::move(cbs), argc - 1, argv + 1 };
 	if (!parser.parse())
 		return EXIT_FAILURE;
 	else if (parser.ended_state)
@@ -1165,10 +1907,10 @@ static int main_inner(int argc, char *argv[])
 	// Special case reflection because it has little to do with the path followed by code-outputting compilers
 	if (!args.reflect.empty())
 	{
-		Parser spirv_parser(move(spirv_file));
+		Parser spirv_parser(std::move(spirv_file));
 		spirv_parser.parse();
 
-		CompilerReflection compiler(move(spirv_parser.get_parsed_ir()));
+		CompilerReflection compiler(std::move(spirv_parser.get_parsed_ir()));
 		compiler.set_format(args.reflect);
 		auto json = compiler.compile();
 		if (args.output)
@@ -1181,7 +1923,7 @@ static int main_inner(int argc, char *argv[])
 	string compiled_output;
 
 	if (args.iterations == 1)
-		compiled_output = compile_iteration(args, move(spirv_file));
+		compiled_output = compile_iteration(args, std::move(spirv_file));
 	else
 	{
 		for (unsigned i = 0; i < args.iterations; i++)
diff --git a/pkg-config/spirv-cross-c-shared.pc.in b/pkg-config/spirv-cross-c-shared.pc.in
index 823e4ce48bb..4fb8a0aee98 100644
--- a/pkg-config/spirv-cross-c-shared.pc.in
+++ b/pkg-config/spirv-cross-c-shared.pc.in
@@ -1,8 +1,11 @@
+# Copyright 2020-2021 Hans-Kristian Arntzen
+# SPDX-License-Identifier: Apache-2.0
+
 prefix=@CMAKE_INSTALL_PREFIX@
-exec_prefix=@CMAKE_INSTALL_PREFIX@
-libdir=@SPIRV_CROSS_INSTALL_LIB_DIR@
-sharedlibdir=@SPIRV_CROSS_INSTALL_LIB_DIR@
-includedir=@SPIRV_CROSS_INSTALL_INC_DIR@
+exec_prefix=${prefix}
+libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
+sharedlibdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
+includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@/spirv_cross
 
 Name: spirv-cross-c-shared
 Description: C API for SPIRV-Cross
diff --git a/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000000..da499c3b6da
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,20 @@
+RWByteAddressBuffer _5 : register(u0);
+RWByteAddressBuffer _6 : register(u1);
+
+void comp_main()
+{
+    _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) < int4(_5.Load4(0)).x, int(_5.Load4(16).y) < int4(_5.Load4(0)).y, int(_5.Load4(16).z) < int4(_5.Load4(0)).z, int(_5.Load4(16).w) < int4(_5.Load4(0)).w)));
+    _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) <= int4(_5.Load4(0)).x, int(_5.Load4(16).y) <= int4(_5.Load4(0)).y, int(_5.Load4(16).z) <= int4(_5.Load4(0)).z, int(_5.Load4(16).w) <= int4(_5.Load4(0)).w)));
+    _6.Store4(0, uint4(bool4(_5.Load4(16).x < uint(int4(_5.Load4(0)).x), _5.Load4(16).y < uint(int4(_5.Load4(0)).y), _5.Load4(16).z < uint(int4(_5.Load4(0)).z), _5.Load4(16).w < uint(int4(_5.Load4(0)).w))));
+    _6.Store4(0, uint4(bool4(_5.Load4(16).x <= uint(int4(_5.Load4(0)).x), _5.Load4(16).y <= uint(int4(_5.Load4(0)).y), _5.Load4(16).z <= uint(int4(_5.Load4(0)).z), _5.Load4(16).w <= uint(int4(_5.Load4(0)).w))));
+    _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) > int4(_5.Load4(0)).x, int(_5.Load4(16).y) > int4(_5.Load4(0)).y, int(_5.Load4(16).z) > int4(_5.Load4(0)).z, int(_5.Load4(16).w) > int4(_5.Load4(0)).w)));
+    _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) >= int4(_5.Load4(0)).x, int(_5.Load4(16).y) >= int4(_5.Load4(0)).y, int(_5.Load4(16).z) >= int4(_5.Load4(0)).z, int(_5.Load4(16).w) >= int4(_5.Load4(0)).w)));
+    _6.Store4(0, uint4(bool4(_5.Load4(16).x > uint(int4(_5.Load4(0)).x), _5.Load4(16).y > uint(int4(_5.Load4(0)).y), _5.Load4(16).z > uint(int4(_5.Load4(0)).z), _5.Load4(16).w > uint(int4(_5.Load4(0)).w))));
+    _6.Store4(0, uint4(bool4(_5.Load4(16).x >= uint(int4(_5.Load4(0)).x), _5.Load4(16).y >= uint(int4(_5.Load4(0)).y), _5.Load4(16).z >= uint(int4(_5.Load4(0)).z), _5.Load4(16).w >= uint(int4(_5.Load4(0)).w))));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp b/reference/opt/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp
index a12274c01c6..e184e03c5c1 100644
--- a/reference/opt/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp
+++ b/reference/opt/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp
@@ -4,22 +4,16 @@ struct A
     int b;
 };
 
-struct A_1
-{
-    int a;
-    int b;
-};
-
 RWByteAddressBuffer C1 : register(u1);
 cbuffer C2 : register(b2)
 {
-    A_1 C2_1_Data[1024] : packoffset(c0);
+    A C2_1_Data[1024] : packoffset(c0);
 };
 
 RWByteAddressBuffer C3 : register(u0);
 cbuffer B : register(b3)
 {
-    A_1 C4_Data[1024] : packoffset(c0);
+    A C4_Data[1024] : packoffset(c0);
 };
 
 
diff --git a/reference/opt/shaders-hlsl/asm/comp/control-flow-hints.asm.comp b/reference/opt/shaders-hlsl/asm/comp/control-flow-hints.asm.comp
index 142ef5efa8d..70df6baf0e7 100644
--- a/reference/opt/shaders-hlsl/asm/comp/control-flow-hints.asm.comp
+++ b/reference/opt/shaders-hlsl/asm/comp/control-flow-hints.asm.comp
@@ -3,18 +3,27 @@ RWByteAddressBuffer foo : register(u1);
 
 void comp_main()
 {
-    [unroll]
-    for (int _135 = 0; _135 < 16; )
-    {
-        bar.Store4(_135 * 16 + 0, asuint(asfloat(foo.Load4(_135 * 16 + 0))));
-        _135++;
-        continue;
-    }
+    bar.Store4(0, asuint(asfloat(foo.Load4(0))));
+    bar.Store4(16, asuint(asfloat(foo.Load4(16))));
+    bar.Store4(32, asuint(asfloat(foo.Load4(32))));
+    bar.Store4(48, asuint(asfloat(foo.Load4(48))));
+    bar.Store4(64, asuint(asfloat(foo.Load4(64))));
+    bar.Store4(80, asuint(asfloat(foo.Load4(80))));
+    bar.Store4(96, asuint(asfloat(foo.Load4(96))));
+    bar.Store4(112, asuint(asfloat(foo.Load4(112))));
+    bar.Store4(128, asuint(asfloat(foo.Load4(128))));
+    bar.Store4(144, asuint(asfloat(foo.Load4(144))));
+    bar.Store4(160, asuint(asfloat(foo.Load4(160))));
+    bar.Store4(176, asuint(asfloat(foo.Load4(176))));
+    bar.Store4(192, asuint(asfloat(foo.Load4(192))));
+    bar.Store4(208, asuint(asfloat(foo.Load4(208))));
+    bar.Store4(224, asuint(asfloat(foo.Load4(224))));
+    bar.Store4(240, asuint(asfloat(foo.Load4(240))));
     [loop]
-    for (int _136 = 0; _136 < 16; )
+    for (int _137 = 0; _137 < 16; )
     {
-        bar.Store4((15 - _136) * 16 + 0, asuint(asfloat(foo.Load4(_136 * 16 + 0))));
-        _136++;
+        bar.Store4((15 - _137) * 16 + 0, asuint(asfloat(foo.Load4(_137 * 16 + 0))));
+        _137++;
         continue;
     }
     [branch]
diff --git a/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000000..9f51eff1354
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,30 @@
+RWByteAddressBuffer _4 : register(u0);
+
+void comp_main()
+{
+    _4.Store(0, asuint(min(asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+    _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    _4.Store3(16, asuint(min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+    _4.Store4(32, asuint(min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+    _4.Store(0, asuint(max(asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+    _4.Store2(8, asuint(max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    _4.Store3(16, asuint(max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+    _4.Store4(32, asuint(max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+    _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+    _4.Store2(8, asuint(clamp(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    _4.Store3(16, asuint(clamp(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+    _4.Store4(32, asuint(clamp(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+    for (int _139 = 0; _139 < 2; )
+    {
+        _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+        _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(56)), asfloat(_4.Load(60)))));
+        _139++;
+        continue;
+    }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag b/reference/opt/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
index ed53720d946..2527d10fdc8 100644
--- a/reference/opt/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
+++ b/reference/opt/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
@@ -8,7 +8,7 @@ struct SPIRV_Cross_Output
     int2 Size : SV_Target0;
 };
 
-uint2 SPIRV_Cross_textureSize(Texture2D<float4> Tex, uint Level, out uint Param)
+uint2 spvTextureSize(Texture2D<float4> Tex, uint Level, out uint Param)
 {
     uint2 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, Param);
@@ -19,7 +19,7 @@ void frag_main()
 {
     uint _19_dummy_parameter;
     uint _20_dummy_parameter;
-    Size = int2(SPIRV_Cross_textureSize(uTexture, uint(0), _19_dummy_parameter)) + int2(SPIRV_Cross_textureSize(uTexture, uint(1), _20_dummy_parameter));
+    Size = int2(spvTextureSize(uTexture, uint(0), _19_dummy_parameter)) + int2(spvTextureSize(uTexture, uint(1), _20_dummy_parameter));
 }
 
 SPIRV_Cross_Output main()
diff --git a/reference/opt/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag b/reference/opt/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag
index d20cf995acf..25dc6939e5c 100644
--- a/reference/opt/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag
+++ b/reference/opt/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag
@@ -7,7 +7,7 @@ struct SPIRV_Cross_Output
 
 void frag_main()
 {
-    FragColor = float3(asfloat(0x7f800000u), asfloat(0xff800000u), asfloat(0x7fc00000u));
+    FragColor = float3(asfloat(0x7f800000u /* inf */), asfloat(0xff800000u /* -inf */), asfloat(0x7fc00000u /* nan */));
 }
 
 SPIRV_Cross_Output main()
diff --git a/reference/opt/shaders-hlsl/asm/frag/line-directive.line.asm.frag b/reference/opt/shaders-hlsl/asm/frag/line-directive.line.asm.frag
index b596a8446ef..4a1cf2ee545 100644
--- a/reference/opt/shaders-hlsl/asm/frag/line-directive.line.asm.frag
+++ b/reference/opt/shaders-hlsl/asm/frag/line-directive.line.asm.frag
@@ -14,14 +14,12 @@ struct SPIRV_Cross_Output
 #line 8 "test.frag"
 void frag_main()
 {
-    float _80;
 #line 8 "test.frag"
     FragColor = 1.0f;
 #line 9 "test.frag"
     FragColor = 2.0f;
 #line 10 "test.frag"
-    _80 = vColor;
-    if (_80 < 0.0f)
+    if (vColor < 0.0f)
     {
 #line 12 "test.frag"
         FragColor = 3.0f;
@@ -31,16 +29,19 @@ void frag_main()
 #line 16 "test.frag"
         FragColor = 4.0f;
     }
-    for (int _126 = 0; float(_126) < (40.0f + _80); )
+#line 19 "test.frag"
+    for (int _127 = 0; float(_127) < (40.0f + vColor); )
     {
 #line 21 "test.frag"
         FragColor += 0.20000000298023223876953125f;
 #line 22 "test.frag"
         FragColor += 0.300000011920928955078125f;
-        _126 += (int(_80) + 5);
+#line 19 "test.frag"
+        _127 += (int(vColor) + 5);
         continue;
     }
-    switch (int(_80))
+#line 25 "test.frag"
+    switch (int(vColor))
     {
         case 0:
         {
@@ -66,7 +67,8 @@ void frag_main()
     }
     for (;;)
     {
-        FragColor += (10.0f + _80);
+#line 42 "test.frag"
+        FragColor += (10.0f + vColor);
 #line 43 "test.frag"
         if (FragColor < 100.0f)
         {
@@ -76,6 +78,7 @@ void frag_main()
             break;
         }
     }
+#line 48 "test.frag"
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/opt/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag b/reference/opt/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag
new file mode 100644
index 00000000000..5926eef7b40
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag
@@ -0,0 +1,19 @@
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = float4(18.0f, 52.0f, 1.0f, 1.0f);
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag b/reference/opt/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag
index 507bbe1d080..269cecb3022 100644
--- a/reference/opt/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag
+++ b/reference/opt/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag
@@ -55,6 +55,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.o_color = o_color;
diff --git a/reference/opt/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag b/reference/opt/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
index 695d5fe9dfd..74c12945bfc 100644
--- a/reference/opt/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
+++ b/reference/opt/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
@@ -22,6 +22,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.FragColor = FragColor;
diff --git a/reference/opt/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert b/reference/opt/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
index 2cebffffa85..3bccae3e0a5 100644
--- a/reference/opt/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
+++ b/reference/opt/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
@@ -8,10 +8,7 @@ static const int _20 = (_7 + 2);
 #endif
 static const uint _8 = SPIRV_CROSS_CONSTANT_ID_202;
 static const uint _25 = (_8 % 5u);
-#ifndef SPIRV_CROSS_CONSTANT_ID_0
-#define SPIRV_CROSS_CONSTANT_ID_0 int4(20, 30, _20, _20)
-#endif
-static const int4 _30 = SPIRV_CROSS_CONSTANT_ID_0;
+static const int4 _30 = int4(20, 30, _20, _20);
 static const int2 _32 = int2(_30.y, _30.x);
 static const int _33 = _30.y;
 
@@ -28,9 +25,8 @@ void vert_main()
 {
     float4 _63 = 0.0f.xxxx;
     _63.y = float(_20);
-    float4 _66 = _63;
-    _66.z = float(_25);
-    float4 _52 = _66 + float4(_30);
+    _63.z = float(_25);
+    float4 _52 = _63 + float4(_30);
     float2 _56 = _52.xy + float2(_32);
     gl_Position = float4(_56.x, _56.y, _52.z, _52.w);
     _4 = _33;
diff --git a/reference/opt/shaders-hlsl/comp/access-chain-load-composite.comp b/reference/opt/shaders-hlsl/comp/access-chain-load-composite.comp
new file mode 100644
index 00000000000..778f62e83c8
--- /dev/null
+++ b/reference/opt/shaders-hlsl/comp/access-chain-load-composite.comp
@@ -0,0 +1,108 @@
+struct Baz
+{
+    float c;
+};
+
+struct Bar
+{
+    float d[2][4];
+    Baz baz[2];
+};
+
+struct Foo
+{
+    column_major float2x2 a;
+    float2 b;
+    Bar c[5];
+};
+
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer _31 : register(u0);
+
+void comp_main()
+{
+    Foo _36;
+    _36.a = asfloat(uint2x2(_31.Load(0), _31.Load(8), _31.Load(4), _31.Load(12)));
+    _36.b = asfloat(_31.Load2(16));
+    [unroll]
+    for (int _4ident = 0; _4ident < 5; _4ident++)
+    {
+        [unroll]
+        for (int _5ident = 0; _5ident < 2; _5ident++)
+        {
+            [unroll]
+            for (int _6ident = 0; _6ident < 4; _6ident++)
+            {
+                _36.c[_4ident].d[_5ident][_6ident] = asfloat(_31.Load(_6ident * 4 + _5ident * 16 + _4ident * 40 + 24));
+            }
+        }
+        [unroll]
+        for (int _7ident = 0; _7ident < 2; _7ident++)
+        {
+            _36.c[_4ident].baz[_7ident].c = asfloat(_31.Load(_7ident * 4 + _4ident * 40 + 56));
+        }
+    }
+    float2x2 _234 = float2x2(_36.a[0] + 1.0f.xx, _36.a[1] + 1.0f.xx);
+    _31.Store(224, asuint(_234[0].x));
+    _31.Store(228, asuint(_234[1].x));
+    _31.Store(232, asuint(_234[0].y));
+    _31.Store(236, asuint(_234[1].y));
+    _31.Store2(240, asuint(_36.b + 2.0f.xx));
+    _31.Store(248, asuint(_36.c[0].d[0][0]));
+    _31.Store(252, asuint(_36.c[0].d[0][1]));
+    _31.Store(256, asuint(_36.c[0].d[0][2]));
+    _31.Store(260, asuint(_36.c[0].d[0][3]));
+    _31.Store(264, asuint(_36.c[0].d[1][0]));
+    _31.Store(268, asuint(_36.c[0].d[1][1]));
+    _31.Store(272, asuint(_36.c[0].d[1][2]));
+    _31.Store(276, asuint(_36.c[0].d[1][3]));
+    _31.Store(280, asuint(_36.c[0].baz[0].c));
+    _31.Store(284, asuint(_36.c[0].baz[1].c));
+    _31.Store(288, asuint(_36.c[1].d[0][0]));
+    _31.Store(292, asuint(_36.c[1].d[0][1]));
+    _31.Store(296, asuint(_36.c[1].d[0][2]));
+    _31.Store(300, asuint(_36.c[1].d[0][3]));
+    _31.Store(304, asuint(_36.c[1].d[1][0]));
+    _31.Store(308, asuint(_36.c[1].d[1][1]));
+    _31.Store(312, asuint(_36.c[1].d[1][2]));
+    _31.Store(316, asuint(_36.c[1].d[1][3]));
+    _31.Store(320, asuint(_36.c[1].baz[0].c));
+    _31.Store(324, asuint(_36.c[1].baz[1].c));
+    _31.Store(328, asuint(_36.c[2].d[0][0]));
+    _31.Store(332, asuint(_36.c[2].d[0][1]));
+    _31.Store(336, asuint(_36.c[2].d[0][2]));
+    _31.Store(340, asuint(_36.c[2].d[0][3]));
+    _31.Store(344, asuint(_36.c[2].d[1][0]));
+    _31.Store(348, asuint(_36.c[2].d[1][1]));
+    _31.Store(352, asuint(_36.c[2].d[1][2]));
+    _31.Store(356, asuint(_36.c[2].d[1][3]));
+    _31.Store(360, asuint(_36.c[2].baz[0].c));
+    _31.Store(364, asuint(_36.c[2].baz[1].c));
+    _31.Store(368, asuint(_36.c[3].d[0][0]));
+    _31.Store(372, asuint(_36.c[3].d[0][1]));
+    _31.Store(376, asuint(_36.c[3].d[0][2]));
+    _31.Store(380, asuint(_36.c[3].d[0][3]));
+    _31.Store(384, asuint(_36.c[3].d[1][0]));
+    _31.Store(388, asuint(_36.c[3].d[1][1] + 5.0f));
+    _31.Store(392, asuint(_36.c[3].d[1][2]));
+    _31.Store(396, asuint(_36.c[3].d[1][3]));
+    _31.Store(400, asuint(_36.c[3].baz[0].c));
+    _31.Store(404, asuint(_36.c[3].baz[1].c));
+    _31.Store(408, asuint(_36.c[4].d[0][0]));
+    _31.Store(412, asuint(_36.c[4].d[0][1]));
+    _31.Store(416, asuint(_36.c[4].d[0][2]));
+    _31.Store(420, asuint(_36.c[4].d[0][3]));
+    _31.Store(424, asuint(_36.c[4].d[1][0]));
+    _31.Store(428, asuint(_36.c[4].d[1][1]));
+    _31.Store(432, asuint(_36.c[4].d[1][2]));
+    _31.Store(436, asuint(_36.c[4].d[1][3]));
+    _31.Store(440, asuint(_36.c[4].baz[0].c));
+    _31.Store(444, asuint(_36.c[4].baz[1].c));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/comp/access-chains.comp b/reference/opt/shaders-hlsl/comp/access-chains.comp
index 924e9191245..c748200b969 100644
--- a/reference/opt/shaders-hlsl/comp/access-chains.comp
+++ b/reference/opt/shaders-hlsl/comp/access-chains.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer wo : register(u1);
 ByteAddressBuffer ro : register(t0);
 
diff --git a/reference/opt/shaders-hlsl/comp/access-chains.force-uav.comp b/reference/opt/shaders-hlsl/comp/access-chains.force-uav.comp
new file mode 100644
index 00000000000..97d046d89a3
--- /dev/null
+++ b/reference/opt/shaders-hlsl/comp/access-chains.force-uav.comp
@@ -0,0 +1,23 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer wo : register(u1);
+RWByteAddressBuffer ro : register(u0);
+
+static uint3 gl_GlobalInvocationID;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+};
+
+void comp_main()
+{
+    wo.Store4(gl_GlobalInvocationID.x * 64 + 272, asuint(asfloat(ro.Load4(gl_GlobalInvocationID.x * 64 + 160))));
+    wo.Store4(gl_GlobalInvocationID.x * 16 + 480, asuint(asfloat(ro.Load4(gl_GlobalInvocationID.x * 16 + 480))));
+}
+
+[numthreads(1, 1, 1)]
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/comp/address-buffers.comp b/reference/opt/shaders-hlsl/comp/address-buffers.comp
index a252fc8ae36..7f1c7975bc6 100644
--- a/reference/opt/shaders-hlsl/comp/address-buffers.comp
+++ b/reference/opt/shaders-hlsl/comp/address-buffers.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer WriteOnly : register(u2);
 ByteAddressBuffer ReadOnly : register(t0);
 RWByteAddressBuffer ReadWrite : register(u1);
diff --git a/reference/opt/shaders-hlsl/comp/atomic.comp b/reference/opt/shaders-hlsl/comp/atomic.comp
index 72e15bf77dc..e6ff891e8c2 100644
--- a/reference/opt/shaders-hlsl/comp/atomic.comp
+++ b/reference/opt/shaders-hlsl/comp/atomic.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer ssbo : register(u2);
 RWTexture2D<uint> uImage : register(u0);
 RWTexture2D<int> iImage : register(u1);
diff --git a/reference/opt/shaders-hlsl/comp/globallycoherent.comp b/reference/opt/shaders-hlsl/comp/globallycoherent.comp
index 1637727deb2..b5f1e377ca4 100644
--- a/reference/opt/shaders-hlsl/comp/globallycoherent.comp
+++ b/reference/opt/shaders-hlsl/comp/globallycoherent.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 globallycoherent RWByteAddressBuffer _29 : register(u3);
 ByteAddressBuffer _33 : register(t2);
 RWTexture2D<float> uImageIn : register(u0);
diff --git a/reference/opt/shaders-hlsl/comp/image.comp b/reference/opt/shaders-hlsl/comp/image.comp
index 6c2b58cd29c..e2f6b0a340f 100644
--- a/reference/opt/shaders-hlsl/comp/image.comp
+++ b/reference/opt/shaders-hlsl/comp/image.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWTexture2D<float> uImageInF : register(u0);
 RWTexture2D<float> uImageOutF : register(u1);
 RWTexture2D<int> uImageInI : register(u2);
diff --git a/reference/opt/shaders-hlsl/comp/image.nonwritable-uav-texture.comp b/reference/opt/shaders-hlsl/comp/image.nonwritable-uav-texture.comp
new file mode 100644
index 00000000000..6c4a2139954
--- /dev/null
+++ b/reference/opt/shaders-hlsl/comp/image.nonwritable-uav-texture.comp
@@ -0,0 +1,66 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+Texture2D<float4> uImageInF : register(t0);
+RWTexture2D<float> uImageOutF : register(u1);
+Texture2D<int4> uImageInI : register(t2);
+RWTexture2D<int> uImageOutI : register(u3);
+Texture2D<uint4> uImageInU : register(t4);
+RWTexture2D<uint> uImageOutU : register(u5);
+Buffer<float4> uImageInBuffer : register(t6);
+RWBuffer<float> uImageOutBuffer : register(u7);
+Texture2D<float4> uImageInF2 : register(t8);
+RWTexture2D<float2> uImageOutF2 : register(u9);
+Texture2D<int4> uImageInI2 : register(t10);
+RWTexture2D<int2> uImageOutI2 : register(u11);
+Texture2D<uint4> uImageInU2 : register(t12);
+RWTexture2D<uint2> uImageOutU2 : register(u13);
+Buffer<float4> uImageInBuffer2 : register(t14);
+RWBuffer<float2> uImageOutBuffer2 : register(u15);
+Texture2D<float4> uImageInF4 : register(t16);
+RWTexture2D<float4> uImageOutF4 : register(u17);
+Texture2D<int4> uImageInI4 : register(t18);
+RWTexture2D<int4> uImageOutI4 : register(u19);
+Texture2D<uint4> uImageInU4 : register(t20);
+RWTexture2D<uint4> uImageOutU4 : register(u21);
+Buffer<float4> uImageInBuffer4 : register(t22);
+RWBuffer<float4> uImageOutBuffer4 : register(u23);
+RWTexture2D<float4> uImageNoFmtF : register(u24);
+RWTexture2D<uint4> uImageNoFmtU : register(u25);
+RWTexture2D<int4> uImageNoFmtI : register(u26);
+
+static uint3 gl_GlobalInvocationID;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+};
+
+void comp_main()
+{
+    int2 _23 = int2(gl_GlobalInvocationID.xy);
+    uImageOutF[_23] = uImageInF[_23].x;
+    uImageOutI[_23] = uImageInI[_23].x;
+    uImageOutU[_23] = uImageInU[_23].x;
+    int _74 = int(gl_GlobalInvocationID.x);
+    uImageOutBuffer[_74] = uImageInBuffer[_74].x;
+    uImageOutF2[_23] = uImageInF2[_23].xy;
+    uImageOutI2[_23] = uImageInI2[_23].xy;
+    uImageOutU2[_23] = uImageInU2[_23].xy;
+    float4 _135 = uImageInBuffer2[_74];
+    uImageOutBuffer2[_74] = _135.xy;
+    uImageOutF4[_23] = uImageInF4[_23];
+    int4 _165 = uImageInI4[_23];
+    uImageOutI4[_23] = _165;
+    uint4 _180 = uImageInU4[_23];
+    uImageOutU4[_23] = _180;
+    uImageOutBuffer4[_74] = uImageInBuffer4[_74];
+    uImageNoFmtF[_23] = _135;
+    uImageNoFmtU[_23] = _180;
+    uImageNoFmtI[_23] = _165;
+}
+
+[numthreads(1, 1, 1)]
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/comp/inverse.comp b/reference/opt/shaders-hlsl/comp/inverse.comp
index 3be954a6f61..698f647cecc 100644
--- a/reference/opt/shaders-hlsl/comp/inverse.comp
+++ b/reference/opt/shaders-hlsl/comp/inverse.comp
@@ -1,9 +1,11 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _15 : register(u0);
 ByteAddressBuffer _20 : register(t1);
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
-float2x2 SPIRV_Cross_Inverse(float2x2 m)
+float2x2 spvInverse(float2x2 m)
 {
     float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)
 
@@ -23,29 +25,29 @@ float2x2 SPIRV_Cross_Inverse(float2x2 m)
 }
 
 // Returns the determinant of a 2x2 matrix.
-float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)
+float spvDet2x2(float a1, float a2, float b1, float b2)
 {
     return a1 * b2 - b1 * a2;
 }
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
-float3x3 SPIRV_Cross_Inverse(float3x3 m)
+float3x3 spvInverse(float3x3 m)
 {
     float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)
 
     // Create the transpose of the cofactors, as the classical adjoint of the matrix.
-    adj[0][0] =  SPIRV_Cross_Det2x2(m[1][1], m[1][2], m[2][1], m[2][2]);
-    adj[0][1] = -SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[2][1], m[2][2]);
-    adj[0][2] =  SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[1][1], m[1][2]);
+    adj[0][0] =  spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);
+    adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);
+    adj[0][2] =  spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);
 
-    adj[1][0] = -SPIRV_Cross_Det2x2(m[1][0], m[1][2], m[2][0], m[2][2]);
-    adj[1][1] =  SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[2][0], m[2][2]);
-    adj[1][2] = -SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[1][0], m[1][2]);
+    adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);
+    adj[1][1] =  spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);
+    adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);
 
-    adj[2][0] =  SPIRV_Cross_Det2x2(m[1][0], m[1][1], m[2][0], m[2][1]);
-    adj[2][1] = -SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[2][0], m[2][1]);
-    adj[2][2] =  SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[1][0], m[1][1]);
+    adj[2][0] =  spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);
+    adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);
+    adj[2][2] =  spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);
 
     // Calculate the determinant as a combination of the cofactors of the first row.
     float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);
@@ -56,37 +58,37 @@ float3x3 SPIRV_Cross_Inverse(float3x3 m)
 }
 
 // Returns the determinant of a 3x3 matrix.
-float SPIRV_Cross_Det3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
+float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
 {
-    return a1 * SPIRV_Cross_Det2x2(b2, b3, c2, c3) - b1 * SPIRV_Cross_Det2x2(a2, a3, c2, c3) + c1 * SPIRV_Cross_Det2x2(a2, a3, b2, b3);
+    return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3);
 }
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
-float4x4 SPIRV_Cross_Inverse(float4x4 m)
+float4x4 spvInverse(float4x4 m)
 {
     float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
 
     // Create the transpose of the cofactors, as the classical adjoint of the matrix.
-    adj[0][0] =  SPIRV_Cross_Det3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
-    adj[0][1] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
-    adj[0][2] =  SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]);
-    adj[0][3] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]);
-
-    adj[1][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
-    adj[1][1] =  SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
-    adj[1][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]);
-    adj[1][3] =  SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]);
-
-    adj[2][0] =  SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
-    adj[2][1] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
-    adj[2][2] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]);
-    adj[2][3] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]);
-
-    adj[3][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
-    adj[3][1] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
-    adj[3][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]);
-    adj[3][3] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
+    adj[0][0] =  spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][2] =  spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]);
+
+    adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][1] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][3] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]);
+
+    adj[2][0] =  spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][2] =  spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]);
+
+    adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][1] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][3] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
 
     // Calculate the determinant as a combination of the cofactors of the first row.
     float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]);
@@ -99,16 +101,16 @@ float4x4 SPIRV_Cross_Inverse(float4x4 m)
 void comp_main()
 {
     float2x2 _23 = asfloat(uint2x2(_20.Load2(0), _20.Load2(8)));
-    float2x2 _24 = SPIRV_Cross_Inverse(_23);
+    float2x2 _24 = spvInverse(_23);
     _15.Store2(0, asuint(_24[0]));
     _15.Store2(8, asuint(_24[1]));
     float3x3 _29 = asfloat(uint3x3(_20.Load3(16), _20.Load3(32), _20.Load3(48)));
-    float3x3 _30 = SPIRV_Cross_Inverse(_29);
+    float3x3 _30 = spvInverse(_29);
     _15.Store3(16, asuint(_30[0]));
     _15.Store3(32, asuint(_30[1]));
     _15.Store3(48, asuint(_30[2]));
     float4x4 _35 = asfloat(uint4x4(_20.Load4(64), _20.Load4(80), _20.Load4(96), _20.Load4(112)));
-    float4x4 _36 = SPIRV_Cross_Inverse(_35);
+    float4x4 _36 = spvInverse(_35);
     _15.Store4(64, asuint(_36[0]));
     _15.Store4(80, asuint(_36[1]));
     _15.Store4(96, asuint(_36[2]));
diff --git a/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp b/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp
index dee39e3d579..ff71a0e103c 100644
--- a/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp
+++ b/reference/opt/shaders-hlsl/comp/num-workgroups-alone.comp
@@ -1,5 +1,7 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _10 : register(u0);
-cbuffer SPIRV_Cross_NumWorkgroups : register(b0)
+cbuffer SPIRV_Cross_NumWorkgroups
 {
     uint3 SPIRV_Cross_NumWorkgroups_1_count : packoffset(c0);
 };
diff --git a/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp b/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp
index 1c98e5e56d7..cc326db3329 100644
--- a/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp
+++ b/reference/opt/shaders-hlsl/comp/num-workgroups-with-builtins.comp
@@ -1,5 +1,7 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _10 : register(u0);
-cbuffer SPIRV_Cross_NumWorkgroups : register(b0)
+cbuffer SPIRV_Cross_NumWorkgroups
 {
     uint3 SPIRV_Cross_NumWorkgroups_1_count : packoffset(c0);
 };
diff --git a/reference/opt/shaders-hlsl/comp/outer-product.comp b/reference/opt/shaders-hlsl/comp/outer-product.comp
index 71613d4f156..e58c02fe0b8 100644
--- a/reference/opt/shaders-hlsl/comp/outer-product.comp
+++ b/reference/opt/shaders-hlsl/comp/outer-product.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _21 : register(u0);
 ByteAddressBuffer _26 : register(t1);
 
diff --git a/reference/opt/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp b/reference/opt/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp
new file mode 100644
index 00000000000..80394bef7be
--- /dev/null
+++ b/reference/opt/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp
@@ -0,0 +1,242 @@
+static float3x4 _362;
+static float4x3 _364;
+
+RWByteAddressBuffer _17 : register(u0);
+uniform RaytracingAccelerationStructure rtas : register(t1);
+
+static RayQuery<RAY_FLAG_NONE> rayQuery;
+
+void comp_main()
+{
+    RayDesc _1ident = {0.0f.xxx, 0.0f, float3(1.0f, 0.0f, 0.0f), 9999.0f};
+    rayQuery.TraceRayInline(rtas, 0u, 255u, _1ident);
+    float3x4 _361;
+    float4x3 _363;
+    _363 = _364;
+    _361 = _362;
+    float3x4 _387;
+    float4x3 _398;
+    for (;;)
+    {
+        bool _67 = rayQuery.Proceed();
+        if (_67)
+        {
+            uint _71 = rayQuery.CandidateType();
+            switch (_71)
+            {
+                case 0u:
+                {
+                    rayQuery.Abort();
+                    float4x3 _79 = rayQuery.CandidateObjectToWorld4x3();
+                    rayQuery.CommitNonOpaqueTriangleHit();
+                    bool _87 = rayQuery.CommittedTriangleFrontFace();
+                    if (_87)
+                    {
+                        _17.Store(0, 0u);
+                        _17.Store(4, 0u);
+                    }
+                    float2 _92 = rayQuery.CommittedTriangleBarycentrics();
+                    if (_92.x == 0.0f)
+                    {
+                        _17.Store(0, 0u);
+                        _17.Store(4, 0u);
+                    }
+                    int _98 = rayQuery.CommittedInstanceID();
+                    if (_98 > 0)
+                    {
+                        _17.Store(0, 0u);
+                        _17.Store(4, 0u);
+                    }
+                    int _103 = rayQuery.CommittedInstanceIndex();
+                    if (_103 > 0)
+                    {
+                        _17.Store(0, 0u);
+                        _17.Store(4, 0u);
+                    }
+                    float3 _108 = rayQuery.CommittedObjectRayDirection();
+                    if (_108.x > 0.0f)
+                    {
+                        _17.Store(0, 0u);
+                        _17.Store(4, 0u);
+                    }
+                    float3 _114 = rayQuery.CommittedObjectRayOrigin();
+                    if (_114.x > 0.0f)
+                    {
+                        _17.Store(0, 0u);
+                        _17.Store(4, 0u);
+                    }
+                    int _120 = rayQuery.CommittedPrimitiveIndex();
+                    if (_120 > 0)
+                    {
+                        _17.Store(0, 0u);
+                        _17.Store(4, 0u);
+                    }
+                    float _125 = rayQuery.CommittedRayT();
+                    if (_125 > 0.0f)
+                    {
+                        _17.Store(0, 0u);
+                        _17.Store(4, 0u);
+                    }
+                    uint _130 = rayQuery.CommittedInstanceContributionToHitGroupIndex();
+                    if (_130 > 0u)
+                    {
+                        _17.Store(0, 0u);
+                        _17.Store(4, 0u);
+                    }
+                    _398 = _79;
+                    _387 = transpose(_79);
+                    break;
+                }
+                case 1u:
+                {
+                    float4x3 _136 = rayQuery.CandidateObjectToWorld4x3();
+                    bool _139 = rayQuery.CandidateProceduralPrimitiveNonOpaque();
+                    if (_139)
+                    {
+                        _17.Store(0, 0u);
+                        _17.Store(4, 0u);
+                    }
+                    rayQuery.CommitProceduralPrimitiveHit(144);
+                    rayQuery.Abort();
+                    _398 = _136;
+                    _387 = transpose(_136);
+                    break;
+                }
+                default:
+                {
+                    _398 = _363;
+                    _387 = _361;
+                    break;
+                }
+            }
+            _363 = _398;
+            _361 = _387;
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+    if (_361[0].x == _363[0].x)
+    {
+        _17.Store(0, 0u);
+        _17.Store(4, 0u);
+    }
+    uint _157 = rayQuery.CommittedStatus();
+    float3x4 _365;
+    float4x3 _376;
+    switch (_157)
+    {
+        case 0u:
+        {
+            float4x3 _163 = rayQuery.CandidateWorldToObject4x3();
+            _376 = _163;
+            _365 = transpose(_163);
+            break;
+        }
+        case 1u:
+        {
+            float4x3 _167 = rayQuery.CommittedWorldToObject4x3();
+            bool _170 = rayQuery.CommittedTriangleFrontFace();
+            if (_170)
+            {
+                _17.Store(0, 0u);
+                _17.Store(4, 0u);
+            }
+            float2 _174 = rayQuery.CommittedTriangleBarycentrics();
+            if (_174.y == 0.0f)
+            {
+                _17.Store(0, 0u);
+                _17.Store(4, 0u);
+            }
+            _376 = _167;
+            _365 = transpose(_167);
+            break;
+        }
+        case 2u:
+        {
+            int _182 = rayQuery.CommittedGeometryIndex();
+            if (_182 > 0)
+            {
+                _17.Store(0, 0u);
+                _17.Store(4, 0u);
+            }
+            int _187 = rayQuery.CommittedInstanceIndex();
+            if (_187 > 0)
+            {
+                _17.Store(0, 0u);
+                _17.Store(4, 0u);
+            }
+            int _192 = rayQuery.CommittedInstanceID();
+            if (_192 > 0)
+            {
+                _17.Store(0, 0u);
+                _17.Store(4, 0u);
+            }
+            float3 _197 = rayQuery.CommittedObjectRayDirection();
+            if (_197.z > 0.0f)
+            {
+                _17.Store(0, 0u);
+                _17.Store(4, 0u);
+            }
+            float3 _204 = rayQuery.CommittedObjectRayOrigin();
+            if (_204.x > 0.0f)
+            {
+                _17.Store(0, 0u);
+                _17.Store(4, 0u);
+            }
+            int _210 = rayQuery.CommittedPrimitiveIndex();
+            if (_210 > 0)
+            {
+                _17.Store(0, 0u);
+                _17.Store(4, 0u);
+            }
+            float _215 = rayQuery.CommittedRayT();
+            if (_215 > 0.0f)
+            {
+                _17.Store(0, 0u);
+                _17.Store(4, 0u);
+            }
+            _376 = _363;
+            _365 = _361;
+            break;
+        }
+        default:
+        {
+            _376 = _363;
+            _365 = _361;
+            break;
+        }
+    }
+    if (_365[0].x == _376[0].x)
+    {
+        _17.Store(0, 0u);
+        _17.Store(4, 0u);
+    }
+    uint _230 = rayQuery.RayFlags();
+    if (_230 > 256u)
+    {
+        _17.Store(0, 0u);
+        _17.Store(4, 0u);
+    }
+    float _236 = rayQuery.RayTMin();
+    if (_236 > 0.0f)
+    {
+        _17.Store(0, 0u);
+        _17.Store(4, 0u);
+    }
+    float3 _242 = rayQuery.WorldRayOrigin();
+    float3 _244 = rayQuery.WorldRayDirection();
+    if (_242.x == _244.z)
+    {
+        _17.Store(0, 0u);
+        _17.Store(4, 0u);
+    }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/comp/rmw-matrix.comp b/reference/opt/shaders-hlsl/comp/rmw-matrix.comp
index ed666693588..30ac03f84f4 100644
--- a/reference/opt/shaders-hlsl/comp/rmw-matrix.comp
+++ b/reference/opt/shaders-hlsl/comp/rmw-matrix.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _11 : register(u0);
 
 void comp_main()
diff --git a/reference/opt/shaders-hlsl/comp/rwbuffer-matrix.comp b/reference/opt/shaders-hlsl/comp/rwbuffer-matrix.comp
index 42103c2bd46..09cbd2f49b4 100644
--- a/reference/opt/shaders-hlsl/comp/rwbuffer-matrix.comp
+++ b/reference/opt/shaders-hlsl/comp/rwbuffer-matrix.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _28 : register(u0);
 cbuffer UBO : register(b1)
 {
@@ -8,57 +10,57 @@ cbuffer UBO : register(b1)
 
 void comp_main()
 {
-    float4x4 _253 = asfloat(uint4x4(_28.Load(64), _28.Load(80), _28.Load(96), _28.Load(112), _28.Load(68), _28.Load(84), _28.Load(100), _28.Load(116), _28.Load(72), _28.Load(88), _28.Load(104), _28.Load(120), _28.Load(76), _28.Load(92), _28.Load(108), _28.Load(124)));
-    _28.Store4(0, asuint(_253[0]));
-    _28.Store4(16, asuint(_253[1]));
-    _28.Store4(32, asuint(_253[2]));
-    _28.Store4(48, asuint(_253[3]));
-    float2x2 _256 = asfloat(uint2x2(_28.Load(144), _28.Load(152), _28.Load(148), _28.Load(156)));
-    _28.Store2(128, asuint(_256[0]));
-    _28.Store2(136, asuint(_256[1]));
-    float2x3 _259 = asfloat(uint2x3(_28.Load(192), _28.Load(200), _28.Load(208), _28.Load(196), _28.Load(204), _28.Load(212)));
-    _28.Store3(160, asuint(_259[0]));
-    _28.Store3(176, asuint(_259[1]));
-    float3x2 _262 = asfloat(uint3x2(_28.Load(240), _28.Load(256), _28.Load(244), _28.Load(260), _28.Load(248), _28.Load(264)));
-    _28.Store2(216, asuint(_262[0]));
-    _28.Store2(224, asuint(_262[1]));
-    _28.Store2(232, asuint(_262[2]));
-    float4x4 _265 = asfloat(uint4x4(_28.Load4(0), _28.Load4(16), _28.Load4(32), _28.Load4(48)));
-    _28.Store(64, asuint(_265[0].x));
-    _28.Store(68, asuint(_265[1].x));
-    _28.Store(72, asuint(_265[2].x));
-    _28.Store(76, asuint(_265[3].x));
-    _28.Store(80, asuint(_265[0].y));
-    _28.Store(84, asuint(_265[1].y));
-    _28.Store(88, asuint(_265[2].y));
-    _28.Store(92, asuint(_265[3].y));
-    _28.Store(96, asuint(_265[0].z));
-    _28.Store(100, asuint(_265[1].z));
-    _28.Store(104, asuint(_265[2].z));
-    _28.Store(108, asuint(_265[3].z));
-    _28.Store(112, asuint(_265[0].w));
-    _28.Store(116, asuint(_265[1].w));
-    _28.Store(120, asuint(_265[2].w));
-    _28.Store(124, asuint(_265[3].w));
-    float2x2 _268 = asfloat(uint2x2(_28.Load2(128), _28.Load2(136)));
-    _28.Store(144, asuint(_268[0].x));
-    _28.Store(148, asuint(_268[1].x));
-    _28.Store(152, asuint(_268[0].y));
-    _28.Store(156, asuint(_268[1].y));
-    float2x3 _271 = asfloat(uint2x3(_28.Load3(160), _28.Load3(176)));
-    _28.Store(192, asuint(_271[0].x));
-    _28.Store(196, asuint(_271[1].x));
-    _28.Store(200, asuint(_271[0].y));
-    _28.Store(204, asuint(_271[1].y));
-    _28.Store(208, asuint(_271[0].z));
-    _28.Store(212, asuint(_271[1].z));
-    float3x2 _274 = asfloat(uint3x2(_28.Load2(216), _28.Load2(224), _28.Load2(232)));
-    _28.Store(240, asuint(_274[0].x));
-    _28.Store(244, asuint(_274[1].x));
-    _28.Store(248, asuint(_274[2].x));
-    _28.Store(256, asuint(_274[0].y));
-    _28.Store(260, asuint(_274[1].y));
-    _28.Store(264, asuint(_274[2].y));
+    float4x4 _258 = asfloat(uint4x4(_28.Load(64), _28.Load(80), _28.Load(96), _28.Load(112), _28.Load(68), _28.Load(84), _28.Load(100), _28.Load(116), _28.Load(72), _28.Load(88), _28.Load(104), _28.Load(120), _28.Load(76), _28.Load(92), _28.Load(108), _28.Load(124)));
+    _28.Store4(0, asuint(_258[0]));
+    _28.Store4(16, asuint(_258[1]));
+    _28.Store4(32, asuint(_258[2]));
+    _28.Store4(48, asuint(_258[3]));
+    float2x2 _261 = asfloat(uint2x2(_28.Load(144), _28.Load(152), _28.Load(148), _28.Load(156)));
+    _28.Store2(128, asuint(_261[0]));
+    _28.Store2(136, asuint(_261[1]));
+    float2x3 _264 = asfloat(uint2x3(_28.Load(192), _28.Load(200), _28.Load(208), _28.Load(196), _28.Load(204), _28.Load(212)));
+    _28.Store3(160, asuint(_264[0]));
+    _28.Store3(176, asuint(_264[1]));
+    float3x2 _267 = asfloat(uint3x2(_28.Load(240), _28.Load(256), _28.Load(244), _28.Load(260), _28.Load(248), _28.Load(264)));
+    _28.Store2(216, asuint(_267[0]));
+    _28.Store2(224, asuint(_267[1]));
+    _28.Store2(232, asuint(_267[2]));
+    float4x4 _271 = asfloat(uint4x4(_28.Load4(0), _28.Load4(16), _28.Load4(32), _28.Load4(48)));
+    _28.Store(64, asuint(_271[0].x));
+    _28.Store(68, asuint(_271[1].x));
+    _28.Store(72, asuint(_271[2].x));
+    _28.Store(76, asuint(_271[3].x));
+    _28.Store(80, asuint(_271[0].y));
+    _28.Store(84, asuint(_271[1].y));
+    _28.Store(88, asuint(_271[2].y));
+    _28.Store(92, asuint(_271[3].y));
+    _28.Store(96, asuint(_271[0].z));
+    _28.Store(100, asuint(_271[1].z));
+    _28.Store(104, asuint(_271[2].z));
+    _28.Store(108, asuint(_271[3].z));
+    _28.Store(112, asuint(_271[0].w));
+    _28.Store(116, asuint(_271[1].w));
+    _28.Store(120, asuint(_271[2].w));
+    _28.Store(124, asuint(_271[3].w));
+    float2x2 _274 = asfloat(uint2x2(_28.Load2(128), _28.Load2(136)));
+    _28.Store(144, asuint(_274[0].x));
+    _28.Store(148, asuint(_274[1].x));
+    _28.Store(152, asuint(_274[0].y));
+    _28.Store(156, asuint(_274[1].y));
+    float2x3 _277 = asfloat(uint2x3(_28.Load3(160), _28.Load3(176)));
+    _28.Store(192, asuint(_277[0].x));
+    _28.Store(196, asuint(_277[1].x));
+    _28.Store(200, asuint(_277[0].y));
+    _28.Store(204, asuint(_277[1].y));
+    _28.Store(208, asuint(_277[0].z));
+    _28.Store(212, asuint(_277[1].z));
+    float3x2 _280 = asfloat(uint3x2(_28.Load2(216), _28.Load2(224), _28.Load2(232)));
+    _28.Store(240, asuint(_280[0].x));
+    _28.Store(244, asuint(_280[1].x));
+    _28.Store(248, asuint(_280[2].x));
+    _28.Store(256, asuint(_280[0].y));
+    _28.Store(260, asuint(_280[1].y));
+    _28.Store(264, asuint(_280[2].y));
     _28.Store(_68_index0 * 4 + _68_index1 * 16 + 64, asuint(1.0f));
     _28.Store(_68_index0 * 4 + _68_index1 * 8 + 144, asuint(2.0f));
     _28.Store(_68_index0 * 4 + _68_index1 * 8 + 192, asuint(3.0f));
diff --git a/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp b/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
index 47f2fe41076..db2bbe96989 100644
--- a/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
+++ b/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _9 : register(u0);
 
 void comp_main()
diff --git a/reference/opt/shaders-hlsl/comp/spec-constant-op-member-array.comp b/reference/opt/shaders-hlsl/comp/spec-constant-op-member-array.comp
index c4537db0391..4e7c5e6167e 100644
--- a/reference/opt/shaders-hlsl/comp/spec-constant-op-member-array.comp
+++ b/reference/opt/shaders-hlsl/comp/spec-constant-op-member-array.comp
@@ -28,6 +28,7 @@ static const int d = (c + 50);
 #define SPIRV_CROSS_CONSTANT_ID_3 400
 #endif
 static const int e = SPIRV_CROSS_CONSTANT_ID_3;
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
 
 RWByteAddressBuffer _22 : register(u0);
 
diff --git a/reference/opt/shaders-hlsl/comp/ssbo-array-length.comp b/reference/opt/shaders-hlsl/comp/ssbo-array-length.comp
index 2e3df626ae7..82657cacfcb 100644
--- a/reference/opt/shaders-hlsl/comp/ssbo-array-length.comp
+++ b/reference/opt/shaders-hlsl/comp/ssbo-array-length.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _11 : register(u1);
 
 void comp_main()
diff --git a/reference/opt/shaders-hlsl/comp/ssbo-array.comp b/reference/opt/shaders-hlsl/comp/ssbo-array.comp
index d8bce8d54b7..ee202a22257 100644
--- a/reference/opt/shaders-hlsl/comp/ssbo-array.comp
+++ b/reference/opt/shaders-hlsl/comp/ssbo-array.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 void comp_main()
 {
 }
diff --git a/reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp b/reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
deleted file mode 100644
index dabc7df9e2d..00000000000
--- a/reference/opt/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
+++ /dev/null
@@ -1,67 +0,0 @@
-RWByteAddressBuffer _9 : register(u0, space0);
-
-static uint4 gl_SubgroupEqMask;
-static uint4 gl_SubgroupGeMask;
-static uint4 gl_SubgroupGtMask;
-static uint4 gl_SubgroupLeMask;
-static uint4 gl_SubgroupLtMask;
-void comp_main()
-{
-    _9.Store(0, asuint(float(WaveGetLaneCount())));
-    _9.Store(0, asuint(float(WaveGetLaneIndex())));
-    _9.Store(0, asuint(float4(gl_SubgroupEqMask).x));
-    _9.Store(0, asuint(float4(gl_SubgroupGeMask).x));
-    _9.Store(0, asuint(float4(gl_SubgroupGtMask).x));
-    _9.Store(0, asuint(float4(gl_SubgroupLeMask).x));
-    _9.Store(0, asuint(float4(gl_SubgroupLtMask).x));
-    uint4 _75 = WaveActiveBallot(true);
-    float4 _88 = WaveActiveSum(20.0f.xxxx);
-    int4 _94 = WaveActiveSum(int4(20, 20, 20, 20));
-    float4 _96 = WaveActiveProduct(20.0f.xxxx);
-    int4 _98 = WaveActiveProduct(int4(20, 20, 20, 20));
-    float4 _127 = WavePrefixProduct(_96) * _96;
-    int4 _129 = WavePrefixProduct(_98) * _98;
-}
-
-[numthreads(1, 1, 1)]
-void main()
-{
-    gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96));
-    if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0;
-    if (WaveGetLaneIndex() >= 64 || WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0;
-    if (WaveGetLaneIndex() >= 96 || WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0;
-    if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0;
-    gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u);
-    if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u;
-    if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u;
-    if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u;
-    if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u;
-    if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u;
-    if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u;
-    uint gt_lane_index = WaveGetLaneIndex() + 1;
-    gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u);
-    if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u;
-    if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u;
-    if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u;
-    if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u;
-    if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u;
-    if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u;
-    if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u;
-    uint le_lane_index = WaveGetLaneIndex() + 1;
-    gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u;
-    if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u;
-    if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u;
-    if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u;
-    if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u;
-    if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u;
-    if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u;
-    if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u;
-    gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u;
-    if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u;
-    if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u;
-    if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u;
-    if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u;
-    if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u;
-    if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u;
-    comp_main();
-}
diff --git a/reference/opt/shaders-hlsl/flatten/array.flatten.vert b/reference/opt/shaders-hlsl/flatten/array.flatten.vert
new file mode 100644
index 00000000000..c709893c1e9
--- /dev/null
+++ b/reference/opt/shaders-hlsl/flatten/array.flatten.vert
@@ -0,0 +1,28 @@
+uniform float4 UBO[56];
+
+static float4 gl_Position;
+static float4 aVertex;
+
+struct SPIRV_Cross_Input
+{
+    float4 aVertex : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = (mul(aVertex, float4x4(UBO[40], UBO[41], UBO[42], UBO[43])) + UBO[55]) + ((UBO[50] + UBO[45]) + UBO[54].x.xxxx);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aVertex = stage_input.aVertex;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/flatten/basic.flatten.vert b/reference/opt/shaders-hlsl/flatten/basic.flatten.vert
new file mode 100644
index 00000000000..778acd48037
--- /dev/null
+++ b/reference/opt/shaders-hlsl/flatten/basic.flatten.vert
@@ -0,0 +1,35 @@
+uniform float4 UBO[4];
+
+static float4 gl_Position;
+static float4 aVertex;
+static float3 vNormal;
+static float3 aNormal;
+
+struct SPIRV_Cross_Input
+{
+    float4 aVertex : TEXCOORD0;
+    float3 aNormal : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float3 vNormal : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3]));
+    vNormal = aNormal;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aVertex = stage_input.aVertex;
+    aNormal = stage_input.aNormal;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.vNormal = vNormal;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/flatten/copy.flatten.vert b/reference/opt/shaders-hlsl/flatten/copy.flatten.vert
new file mode 100644
index 00000000000..5d857ad674d
--- /dev/null
+++ b/reference/opt/shaders-hlsl/flatten/copy.flatten.vert
@@ -0,0 +1,50 @@
+struct Light
+{
+    float3 Position;
+    float Radius;
+    float4 Color;
+};
+
+uniform float4 UBO[12];
+
+static float4 gl_Position;
+static float4 aVertex;
+static float4 vColor;
+static float3 aNormal;
+
+struct SPIRV_Cross_Input
+{
+    float4 aVertex : TEXCOORD0;
+    float3 aNormal : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 vColor : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3]));
+    vColor = 0.0f.xxxx;
+    for (int _96 = 0; _96 < 4; )
+    {
+        Light _51 = {UBO[_96 * 2 + 4].xyz, UBO[_96 * 2 + 4].w, UBO[_96 * 2 + 5]};
+        float3 _68 = aVertex.xyz - _51.Position;
+        vColor += ((UBO[_96 * 2 + 5] * clamp(1.0f - (length(_68) / _51.Radius), 0.0f, 1.0f)) * dot(aNormal, normalize(_68)));
+        _96++;
+        continue;
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aVertex = stage_input.aVertex;
+    aNormal = stage_input.aNormal;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.vColor = vColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/flatten/dynamic.flatten.vert b/reference/opt/shaders-hlsl/flatten/dynamic.flatten.vert
new file mode 100644
index 00000000000..98d5e1b3039
--- /dev/null
+++ b/reference/opt/shaders-hlsl/flatten/dynamic.flatten.vert
@@ -0,0 +1,49 @@
+struct Light
+{
+    float3 Position;
+    float Radius;
+    float4 Color;
+};
+
+uniform float4 UBO[12];
+
+static float4 gl_Position;
+static float4 aVertex;
+static float4 vColor;
+static float3 aNormal;
+
+struct SPIRV_Cross_Input
+{
+    float4 aVertex : TEXCOORD0;
+    float3 aNormal : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 vColor : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3]));
+    vColor = 0.0f.xxxx;
+    for (int _82 = 0; _82 < 4; )
+    {
+        float3 _54 = aVertex.xyz - UBO[_82 * 2 + 4].xyz;
+        vColor += ((UBO[_82 * 2 + 5] * clamp(1.0f - (length(_54) / UBO[_82 * 2 + 4].w), 0.0f, 1.0f)) * dot(aNormal, normalize(_54)));
+        _82++;
+        continue;
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aVertex = stage_input.aVertex;
+    aNormal = stage_input.aNormal;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.vColor = vColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/flatten/matrix-conversion.flatten.frag b/reference/opt/shaders-hlsl/flatten/matrix-conversion.flatten.frag
new file mode 100644
index 00000000000..59ec525f41a
--- /dev/null
+++ b/reference/opt/shaders-hlsl/flatten/matrix-conversion.flatten.frag
@@ -0,0 +1,29 @@
+uniform float4 UBO[4];
+
+static float3 FragColor;
+static float3 vNormal;
+
+struct SPIRV_Cross_Input
+{
+    nointerpolation float3 vNormal : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float3 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    float4x4 _19 = float4x4(UBO[0], UBO[1], UBO[2], UBO[3]);
+    FragColor = mul(vNormal, float3x3(_19[0].xyz, _19[1].xyz, _19[2].xyz));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vNormal = stage_input.vNormal;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/flatten/matrixindex.flatten.vert b/reference/opt/shaders-hlsl/flatten/matrixindex.flatten.vert
new file mode 100644
index 00000000000..b69a72dc11a
--- /dev/null
+++ b/reference/opt/shaders-hlsl/flatten/matrixindex.flatten.vert
@@ -0,0 +1,41 @@
+uniform float4 UBO[14];
+
+static float4 gl_Position;
+static float4 oA;
+static float4 oB;
+static float4 oC;
+static float4 oD;
+static float4 oE;
+
+struct SPIRV_Cross_Output
+{
+    float4 oA : TEXCOORD0;
+    float4 oB : TEXCOORD1;
+    float4 oC : TEXCOORD2;
+    float4 oD : TEXCOORD3;
+    float4 oE : TEXCOORD4;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = 0.0f.xxxx;
+    oA = UBO[1];
+    oB = float4(UBO[4].y, UBO[5].y, UBO[6].y, UBO[7].y);
+    oC = UBO[9];
+    oD = float4(UBO[10].x, UBO[11].x, UBO[12].x, UBO[13].x);
+    oE = float4(UBO[1].z, UBO[6].y, UBO[9].z, UBO[12].y);
+}
+
+SPIRV_Cross_Output main()
+{
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.oA = oA;
+    stage_output.oB = oB;
+    stage_output.oC = oC;
+    stage_output.oD = oD;
+    stage_output.oE = oE;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/flatten/multiindex.flatten.vert b/reference/opt/shaders-hlsl/flatten/multiindex.flatten.vert
new file mode 100644
index 00000000000..f21f05ec446
--- /dev/null
+++ b/reference/opt/shaders-hlsl/flatten/multiindex.flatten.vert
@@ -0,0 +1,28 @@
+uniform float4 UBO[15];
+
+static float4 gl_Position;
+static int2 aIndex;
+
+struct SPIRV_Cross_Input
+{
+    int2 aIndex : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = UBO[aIndex.x * 5 + aIndex.y * 1 + 0];
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aIndex = stage_input.aIndex;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/flatten/push-constant.flatten.vert b/reference/opt/shaders-hlsl/flatten/push-constant.flatten.vert
new file mode 100644
index 00000000000..5bfb4dc0651
--- /dev/null
+++ b/reference/opt/shaders-hlsl/flatten/push-constant.flatten.vert
@@ -0,0 +1,35 @@
+uniform float4 PushMe[6];
+
+static float4 gl_Position;
+static float4 Pos;
+static float2 vRot;
+static float2 Rot;
+
+struct SPIRV_Cross_Input
+{
+    float2 Rot : TEXCOORD0;
+    float4 Pos : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float2 vRot : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = mul(Pos, float4x4(PushMe[0], PushMe[1], PushMe[2], PushMe[3]));
+    vRot = mul(Rot, float2x2(PushMe[4].xy, PushMe[4].zw)) + PushMe[5].z.xx;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    Pos = stage_input.Pos;
+    Rot = stage_input.Rot;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.vRot = vRot;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/flatten/rowmajor.flatten.vert b/reference/opt/shaders-hlsl/flatten/rowmajor.flatten.vert
new file mode 100644
index 00000000000..2560484efb5
--- /dev/null
+++ b/reference/opt/shaders-hlsl/flatten/rowmajor.flatten.vert
@@ -0,0 +1,28 @@
+uniform float4 UBO[12];
+
+static float4 gl_Position;
+static float4 aVertex;
+
+struct SPIRV_Cross_Input
+{
+    float4 aVertex : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3])) + mul(aVertex, transpose(float4x4(UBO[4], UBO[5], UBO[6], UBO[7])));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aVertex = stage_input.aVertex;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/flatten/struct.flatten.vert b/reference/opt/shaders-hlsl/flatten/struct.flatten.vert
new file mode 100644
index 00000000000..41ad8ce9654
--- /dev/null
+++ b/reference/opt/shaders-hlsl/flatten/struct.flatten.vert
@@ -0,0 +1,44 @@
+struct Light
+{
+    float3 Position;
+    float Radius;
+    float4 Color;
+};
+
+uniform float4 UBO[6];
+
+static float4 gl_Position;
+static float4 aVertex;
+static float4 vColor;
+static float3 aNormal;
+
+struct SPIRV_Cross_Input
+{
+    float4 aVertex : TEXCOORD0;
+    float3 aNormal : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 vColor : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3]));
+    vColor = 0.0f.xxxx;
+    float3 _39 = aVertex.xyz - UBO[4].xyz;
+    vColor += ((UBO[5] * clamp(1.0f - (length(_39) / UBO[4].w), 0.0f, 1.0f)) * dot(aNormal, normalize(_39)));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aVertex = stage_input.aVertex;
+    aNormal = stage_input.aNormal;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.vColor = vColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/flatten/struct.rowmajor.flatten.vert b/reference/opt/shaders-hlsl/flatten/struct.rowmajor.flatten.vert
new file mode 100644
index 00000000000..bb702907a72
--- /dev/null
+++ b/reference/opt/shaders-hlsl/flatten/struct.rowmajor.flatten.vert
@@ -0,0 +1,43 @@
+struct Foo
+{
+    column_major float3x4 MVP0;
+    column_major float3x4 MVP1;
+};
+
+uniform float4 UBO[8];
+
+static float4 v0;
+static float4 v1;
+static float3 V0;
+static float3 V1;
+
+struct SPIRV_Cross_Input
+{
+    float4 v0 : TEXCOORD0;
+    float4 v1 : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float3 V0 : TEXCOORD0;
+    float3 V1 : TEXCOORD1;
+};
+
+void vert_main()
+{
+    Foo _19 = {transpose(float4x3(UBO[0].xyz, UBO[1].xyz, UBO[2].xyz, UBO[3].xyz)), transpose(float4x3(UBO[4].xyz, UBO[5].xyz, UBO[6].xyz, UBO[7].xyz))};
+    Foo _20 = _19;
+    V0 = mul(_20.MVP0, v0);
+    V1 = mul(_20.MVP1, v1);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    v0 = stage_input.v0;
+    v1 = stage_input.v1;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.V0 = V0;
+    stage_output.V1 = V1;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/flatten/swizzle.flatten.vert b/reference/opt/shaders-hlsl/flatten/swizzle.flatten.vert
new file mode 100644
index 00000000000..1091a17e995
--- /dev/null
+++ b/reference/opt/shaders-hlsl/flatten/swizzle.flatten.vert
@@ -0,0 +1,45 @@
+uniform float4 UBO[8];
+
+static float4 gl_Position;
+static float4 oA;
+static float4 oB;
+static float4 oC;
+static float4 oD;
+static float4 oE;
+static float4 oF;
+
+struct SPIRV_Cross_Output
+{
+    float4 oA : TEXCOORD0;
+    float4 oB : TEXCOORD1;
+    float4 oC : TEXCOORD2;
+    float4 oD : TEXCOORD3;
+    float4 oE : TEXCOORD4;
+    float4 oF : TEXCOORD5;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = 0.0f.xxxx;
+    oA = UBO[0];
+    oB = float4(UBO[1].xy, UBO[1].zw);
+    oC = float4(UBO[2].x, UBO[3].xyz);
+    oD = float4(UBO[4].xyz, UBO[4].w);
+    oE = float4(UBO[5].x, UBO[5].y, UBO[5].z, UBO[5].w);
+    oF = float4(UBO[6].x, UBO[6].zw, UBO[7].x);
+}
+
+SPIRV_Cross_Output main()
+{
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.oA = oA;
+    stage_output.oB = oB;
+    stage_output.oC = oC;
+    stage_output.oD = oD;
+    stage_output.oE = oE;
+    stage_output.oF = oF;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/flatten/types.flatten.frag b/reference/opt/shaders-hlsl/flatten/types.flatten.frag
new file mode 100644
index 00000000000..feb0b36096a
--- /dev/null
+++ b/reference/opt/shaders-hlsl/flatten/types.flatten.frag
@@ -0,0 +1,23 @@
+uniform int4 UBO1[2];
+uniform uint4 UBO2[2];
+uniform float4 UBO0[2];
+
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = ((((float4(UBO1[0]) + float4(UBO1[1])) + float4(UBO2[0])) + float4(UBO2[1])) + UBO0[0]) + UBO0[1];
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag b/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag
index 3adf7d9852e..38f416fbfad 100644
--- a/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag
+++ b/reference/opt/shaders-hlsl/frag/array-lut-no-loop-variable.frag
@@ -1,12 +1,6 @@
 static const float _17[5] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
 
 static float4 FragColor;
-static float4 v0;
-
-struct SPIRV_Cross_Input
-{
-    float4 v0 : TEXCOORD0;
-};
 
 struct SPIRV_Cross_Output
 {
@@ -24,9 +18,8 @@ void frag_main()
     }
 }
 
-SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+SPIRV_Cross_Output main()
 {
-    v0 = stage_input.v0;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.FragColor = FragColor;
diff --git a/reference/opt/shaders-hlsl/frag/builtins.frag b/reference/opt/shaders-hlsl/frag/builtins.frag
index 922eca7c2d2..8432c42f80d 100644
--- a/reference/opt/shaders-hlsl/frag/builtins.frag
+++ b/reference/opt/shaders-hlsl/frag/builtins.frag
@@ -24,6 +24,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     vColor = stage_input.vColor;
     frag_main();
     SPIRV_Cross_Output stage_output;
diff --git a/reference/opt/shaders-hlsl/frag/bvec-operations.frag b/reference/opt/shaders-hlsl/frag/bvec-operations.frag
index 6a22df1ed7f..4813cc55a26 100644
--- a/reference/opt/shaders-hlsl/frag/bvec-operations.frag
+++ b/reference/opt/shaders-hlsl/frag/bvec-operations.frag
@@ -1,3 +1,5 @@
+static bool _47;
+
 static float2 value;
 static float4 FragColor;
 
@@ -11,8 +13,6 @@ struct SPIRV_Cross_Output
     float4 FragColor : SV_Target0;
 };
 
-bool _47;
-
 void frag_main()
 {
     bool2 _25 = bool2(value.x == 0.0f, _47);
diff --git a/reference/opt/shaders-hlsl/frag/complex-expression-in-access-chain.frag b/reference/opt/shaders-hlsl/frag/complex-expression-in-access-chain.frag
index d9336c09fce..1de882445b4 100644
--- a/reference/opt/shaders-hlsl/frag/complex-expression-in-access-chain.frag
+++ b/reference/opt/shaders-hlsl/frag/complex-expression-in-access-chain.frag
@@ -28,6 +28,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     vIn = stage_input.vIn;
     vIn2 = stage_input.vIn2;
     frag_main();
diff --git a/reference/opt/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag b/reference/opt/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
index b2899ea02ca..1b314e13b98 100644
--- a/reference/opt/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
+++ b/reference/opt/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
@@ -28,6 +28,7 @@ void frag_main()
     float4 _47 = ddy_fine(vInput);
     float4 _50 = fwidth(vInput);
     float _56_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vInput.zw);
+    float2 _56 = _56_tmp.xx;
     if (vInput.y > 10.0f)
     {
         FragColor += _23;
@@ -40,7 +41,7 @@ void frag_main()
         FragColor += _44;
         FragColor += _47;
         FragColor += _50;
-        FragColor += float2(_56_tmp, _56_tmp).xyxy;
+        FragColor += _56.xyxy;
     }
 }
 
diff --git a/reference/opt/shaders-hlsl/frag/demote-to-helper.frag b/reference/opt/shaders-hlsl/frag/demote-to-helper.frag
new file mode 100644
index 00000000000..743a4228baf
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/demote-to-helper.frag
@@ -0,0 +1,9 @@
+void frag_main()
+{
+    discard;
+}
+
+void main()
+{
+    frag_main();
+}
diff --git a/reference/opt/shaders-hlsl/frag/fp16-packing.frag b/reference/opt/shaders-hlsl/frag/fp16-packing.frag
index d87828225fd..54b91e2aa51 100644
--- a/reference/opt/shaders-hlsl/frag/fp16-packing.frag
+++ b/reference/opt/shaders-hlsl/frag/fp16-packing.frag
@@ -15,21 +15,21 @@ struct SPIRV_Cross_Output
     uint FP16Out : SV_Target1;
 };
 
-uint SPIRV_Cross_packHalf2x16(float2 value)
+uint spvPackHalf2x16(float2 value)
 {
     uint2 Packed = f32tof16(value);
     return Packed.x | (Packed.y << 16);
 }
 
-float2 SPIRV_Cross_unpackHalf2x16(uint value)
+float2 spvUnpackHalf2x16(uint value)
 {
     return f16tof32(uint2(value & 0xffff, value >> 16));
 }
 
 void frag_main()
 {
-    FP32Out = SPIRV_Cross_unpackHalf2x16(FP16);
-    FP16Out = SPIRV_Cross_packHalf2x16(FP32);
+    FP32Out = spvUnpackHalf2x16(FP16);
+    FP16Out = spvPackHalf2x16(FP32);
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/opt/shaders-hlsl/frag/fp16.invalid.desktop.frag b/reference/opt/shaders-hlsl/frag/fp16.invalid.desktop.frag
deleted file mode 100644
index 8ec30af16fc..00000000000
--- a/reference/opt/shaders-hlsl/frag/fp16.invalid.desktop.frag
+++ /dev/null
@@ -1,45 +0,0 @@
-static min16float4 v4;
-static min16float3 v3;
-static min16float v1;
-static min16float2 v2;
-static float o1;
-static float2 o2;
-static float3 o3;
-static float4 o4;
-
-struct SPIRV_Cross_Input
-{
-    min16float v1 : TEXCOORD0;
-    min16float2 v2 : TEXCOORD1;
-    min16float3 v3 : TEXCOORD2;
-    min16float4 v4 : TEXCOORD3;
-};
-
-struct SPIRV_Cross_Output
-{
-    float o1 : SV_Target0;
-    float2 o2 : SV_Target1;
-    float3 o3 : SV_Target2;
-    float4 o4 : SV_Target3;
-};
-
-void frag_main()
-{
-    min16float4 _324;
-    min16float4 _387 = modf(v4, _324);
-}
-
-SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
-{
-    v4 = stage_input.v4;
-    v3 = stage_input.v3;
-    v1 = stage_input.v1;
-    v2 = stage_input.v2;
-    frag_main();
-    SPIRV_Cross_Output stage_output;
-    stage_output.o1 = o1;
-    stage_output.o2 = o2;
-    stage_output.o3 = o3;
-    stage_output.o4 = o4;
-    return stage_output;
-}
diff --git a/reference/opt/shaders-hlsl/frag/image-query-uav.frag b/reference/opt/shaders-hlsl/frag/image-query-uav.frag
new file mode 100644
index 00000000000..3b50282fe07
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/image-query-uav.frag
@@ -0,0 +1,8 @@
+void frag_main()
+{
+}
+
+void main()
+{
+    frag_main();
+}
diff --git a/reference/opt/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag b/reference/opt/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag
new file mode 100644
index 00000000000..3b50282fe07
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag
@@ -0,0 +1,8 @@
+void frag_main()
+{
+}
+
+void main()
+{
+    frag_main();
+}
diff --git a/reference/opt/shaders-hlsl/frag/input-attachment-ms.frag b/reference/opt/shaders-hlsl/frag/input-attachment-ms.frag
index e206b83798a..54cb1dd944c 100644
--- a/reference/opt/shaders-hlsl/frag/input-attachment-ms.frag
+++ b/reference/opt/shaders-hlsl/frag/input-attachment-ms.frag
@@ -24,6 +24,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     gl_SampleID = stage_input.gl_SampleID;
     frag_main();
     SPIRV_Cross_Output stage_output;
diff --git a/reference/opt/shaders-hlsl/frag/input-attachment.frag b/reference/opt/shaders-hlsl/frag/input-attachment.frag
index d87661e5f93..34aaafcf3d2 100644
--- a/reference/opt/shaders-hlsl/frag/input-attachment.frag
+++ b/reference/opt/shaders-hlsl/frag/input-attachment.frag
@@ -22,6 +22,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.FragColor = FragColor;
diff --git a/reference/opt/shaders-hlsl/frag/io-block.frag b/reference/opt/shaders-hlsl/frag/io-block.frag
index 52c1f518bf2..812a44d8138 100644
--- a/reference/opt/shaders-hlsl/frag/io-block.frag
+++ b/reference/opt/shaders-hlsl/frag/io-block.frag
@@ -1,13 +1,18 @@
-static float4 FragColor;
-
 struct VertexOut
 {
-    float4 a : TEXCOORD1;
-    float4 b : TEXCOORD2;
+    float4 a;
+    float4 b;
 };
 
+static float4 FragColor;
 static VertexOut _12;
 
+struct SPIRV_Cross_Input
+{
+    float4 VertexOut_a : TEXCOORD1;
+    float4 VertexOut_b : TEXCOORD2;
+};
+
 struct SPIRV_Cross_Output
 {
     float4 FragColor : SV_Target0;
@@ -18,9 +23,10 @@ void frag_main()
     FragColor = _12.a + _12.b;
 }
 
-SPIRV_Cross_Output main(in VertexOut stage_input_12)
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
-    _12 = stage_input_12;
+    _12.a = stage_input.VertexOut_a;
+    _12.b = stage_input.VertexOut_b;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.FragColor = FragColor;
diff --git a/reference/opt/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag b/reference/opt/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag
new file mode 100644
index 00000000000..2af0e513b44
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag
@@ -0,0 +1,32 @@
+uniform sampler2D uSampler;
+
+static float4 FragColor;
+static float2 vUV;
+
+struct SPIRV_Cross_Input
+{
+    float2 vUV : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : COLOR0;
+};
+
+void frag_main()
+{
+    float3 _23 = float3(vUV, 5.0f);
+    FragColor = tex2Dproj(uSampler, float4(_23.xy, 0.0, _23.z));
+    FragColor += tex2Dbias(uSampler, float4(vUV, 0.0, 3.0f));
+    FragColor += tex2Dlod(uSampler, float4(vUV, 0.0, 2.0f));
+    FragColor += tex2Dgrad(uSampler, vUV, 4.0f.xx, 5.0f.xx);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vUV = stage_input.vUV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = float4(FragColor);
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/no-return2.frag b/reference/opt/shaders-hlsl/frag/no-return2.frag
index e9d7bbc8f97..3b50282fe07 100644
--- a/reference/opt/shaders-hlsl/frag/no-return2.frag
+++ b/reference/opt/shaders-hlsl/frag/no-return2.frag
@@ -1,16 +1,8 @@
-static float4 vColor;
-
-struct SPIRV_Cross_Input
-{
-    float4 vColor : TEXCOORD0;
-};
-
 void frag_main()
 {
 }
 
-void main(SPIRV_Cross_Input stage_input)
+void main()
 {
-    vColor = stage_input.vColor;
     frag_main();
 }
diff --git a/reference/opt/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag b/reference/opt/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag
index 544c5705389..6685ef9c429 100644
--- a/reference/opt/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag
+++ b/reference/opt/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag
@@ -3,12 +3,15 @@ struct UBO_1_1
     float4 v[64];
 };
 
-ConstantBuffer<UBO_1_1> ubos[] : register(b0, space3);
-ByteAddressBuffer ssbos[] : register(t0, space4);
+ConstantBuffer<UBO_1_1> ubos[] : register(b2, space9);
+RWByteAddressBuffer ssbos[] : register(u3, space10);
 Texture2D<float4> uSamplers[] : register(t0, space0);
-SamplerState uSamps[] : register(s0, space2);
-Texture2D<float4> uCombinedSamplers[] : register(t0, space1);
-SamplerState _uCombinedSamplers_sampler[] : register(s0, space1);
+SamplerState uSamps[] : register(s1, space3);
+Texture2D<float4> uCombinedSamplers[] : register(t4, space2);
+SamplerState _uCombinedSamplers_sampler[] : register(s4, space2);
+Texture2DMS<float4> uSamplersMS[] : register(t0, space1);
+RWTexture2D<float> uImages[] : register(u5, space7);
+RWTexture2D<uint> uImagesU32[] : register(u5, space8);
 
 static int vIndex;
 static float4 FragColor;
@@ -25,14 +28,76 @@ struct SPIRV_Cross_Output
     float4 FragColor : SV_Target0;
 };
 
+uint2 spvTextureSize(Texture2D<float4> Tex, uint Level, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, Param);
+    return ret;
+}
+
+uint2 spvTextureSize(Texture2DMS<float4> Tex, uint Level, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(ret.x, ret.y, Param);
+    return ret;
+}
+
+uint2 spvImageSize(RWTexture2D<float> Tex, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(ret.x, ret.y);
+    Param = 0u;
+    return ret;
+}
+
 void frag_main()
 {
     int _22 = vIndex + 10;
     int _32 = vIndex + 40;
     FragColor = uSamplers[NonUniformResourceIndex(_22)].Sample(uSamps[NonUniformResourceIndex(_32)], vUV);
-    FragColor = uCombinedSamplers[NonUniformResourceIndex(_22)].Sample(_uCombinedSamplers_sampler[NonUniformResourceIndex(_22)], vUV);
-    FragColor += ubos[NonUniformResourceIndex(vIndex + 20)].v[_32];
-    FragColor += asfloat(ssbos[NonUniformResourceIndex(vIndex + 50)].Load4((vIndex + 60) * 16 + 0));
+    int _49 = _22;
+    FragColor = uCombinedSamplers[NonUniformResourceIndex(_49)].Sample(_uCombinedSamplers_sampler[NonUniformResourceIndex(_49)], vUV);
+    int _65 = vIndex + 20;
+    int _69 = _32;
+    FragColor += ubos[NonUniformResourceIndex(_65)].v[_69];
+    int _83 = vIndex + 50;
+    int _88 = vIndex + 60;
+    FragColor += asfloat(ssbos[NonUniformResourceIndex(_83)].Load4(_88 * 16 + 16));
+    int _100 = vIndex + 70;
+    ssbos[NonUniformResourceIndex(_88)].Store4(_100 * 16 + 16, asuint(20.0f.xxxx));
+    int2 _111 = int2(vUV);
+    FragColor = uSamplers[NonUniformResourceIndex(_49)].Load(int3(_111, 0));
+    int _116 = vIndex + 100;
+    uint _122;
+    ssbos[_116].InterlockedAdd(0, 100u, _122);
+    float _136_tmp = uSamplers[NonUniformResourceIndex(_22)].CalculateLevelOfDetail(uSamps[NonUniformResourceIndex(_32)], vUV);
+    float2 _136 = _136_tmp.xx;
+    float _143_tmp = uCombinedSamplers[NonUniformResourceIndex(_49)].CalculateLevelOfDetail(_uCombinedSamplers_sampler[NonUniformResourceIndex(_49)], vUV);
+    float2 _143 = _143_tmp.xx;
+    float4 _147 = FragColor;
+    float2 _149 = _147.xy + (_136 + _143);
+    FragColor.x = _149.x;
+    FragColor.y = _149.y;
+    int _160;
+    spvTextureSize(uSamplers[NonUniformResourceIndex(_65)], 0u, _160);
+    FragColor.x += float(int(_160));
+    int _176;
+    spvTextureSize(uSamplersMS[NonUniformResourceIndex(_65)], 0u, _176);
+    FragColor.y += float(int(_176));
+    uint _187_dummy_parameter;
+    float4 _189 = FragColor;
+    float2 _191 = _189.xy + float2(int2(spvTextureSize(uSamplers[NonUniformResourceIndex(_65)], uint(0), _187_dummy_parameter)));
+    FragColor.x = _191.x;
+    FragColor.y = _191.y;
+    FragColor += uImages[NonUniformResourceIndex(_83)][_111].xxxx;
+    uint _216_dummy_parameter;
+    float4 _218 = FragColor;
+    float2 _220 = _218.xy + float2(int2(spvImageSize(uImages[NonUniformResourceIndex(_65)], _216_dummy_parameter)));
+    FragColor.x = _220.x;
+    FragColor.y = _220.y;
+    uImages[NonUniformResourceIndex(_88)][_111] = 50.0f.x;
+    uint _248;
+    InterlockedAdd(uImagesU32[NonUniformResourceIndex(_100)][_111], 40u, _248);
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
new file mode 100644
index 00000000000..8923f96a75e
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
@@ -0,0 +1,24 @@
+RWByteAddressBuffer _9 : register(u6, space0);
+globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0);
+RasterizerOrderedByteAddressBuffer _52 : register(u4, space0);
+RWTexture2D<unorm float4> img4 : register(u5, space0);
+RasterizerOrderedTexture2D<unorm float4> img : register(u0, space0);
+RasterizerOrderedTexture2D<unorm float4> img3 : register(u2, space0);
+RasterizerOrderedTexture2D<uint> img2 : register(u1, space0);
+
+void frag_main()
+{
+    _9.Store(0, uint(0));
+    img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f);
+    img[int2(0, 0)] = img3[int2(0, 0)];
+    uint _39;
+    InterlockedAdd(img2[int2(0, 0)], 1u, _39);
+    _42.Store(0, uint(int(_42.Load(0)) + 42));
+    uint _55;
+    _42.InterlockedAnd(4, _52.Load(0), _55);
+}
+
+void main()
+{
+    frag_main();
+}
diff --git a/reference/opt/shaders-hlsl/frag/query-lod.desktop.frag b/reference/opt/shaders-hlsl/frag/query-lod.desktop.frag
index fd95798bf42..a9d4bd83d9d 100644
--- a/reference/opt/shaders-hlsl/frag/query-lod.desktop.frag
+++ b/reference/opt/shaders-hlsl/frag/query-lod.desktop.frag
@@ -17,7 +17,8 @@ struct SPIRV_Cross_Output
 void frag_main()
 {
     float _19_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vTexCoord);
-    FragColor = float2(_19_tmp, _19_tmp).xyxy;
+    float2 _19 = _19_tmp.xx;
+    FragColor = _19.xyxy;
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/opt/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag b/reference/opt/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag
new file mode 100644
index 00000000000..bbe3e4a7d32
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag
@@ -0,0 +1,21 @@
+globallycoherent RWByteAddressBuffer _12 : register(u0);
+
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = asfloat(_12.Load4(0));
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/readonly-coherent-ssbo.frag b/reference/opt/shaders-hlsl/frag/readonly-coherent-ssbo.frag
new file mode 100644
index 00000000000..02252f9cbc5
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/readonly-coherent-ssbo.frag
@@ -0,0 +1,21 @@
+ByteAddressBuffer _12 : register(t0);
+
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = asfloat(_12.Load4(0));
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag b/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag
index c6539b18342..82688ac5a4f 100644
--- a/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag
+++ b/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag
@@ -24,9 +24,10 @@ struct SPIRV_Cross_Output
 
 void frag_main()
 {
-    float4 _80 = vDirRef;
-    _80.z = vDirRef.w;
-    FragColor = (((((((uSampler2D.SampleCmp(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)) + uSampler2DArray.SampleCmp(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmp(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w)) + uSamplerCubeArray.SampleCmp(_uSamplerCubeArray_sampler, vDirRef, 0.5f)) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1))) + uSampler2DArray.SampleCmpLevelZero(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmpLevelZero(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w)) + uSampler2D.SampleCmp(_uSampler2D_sampler, _80.xy / _80.z, vDirRef.z / _80.z, int2(1, 1))) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, _80.xy / _80.z, vDirRef.z / _80.z, int2(1, 1));
+    float4 _33 = vDirRef;
+    float4 _80 = _33;
+    _80.z = _33.w;
+    FragColor = (((((((uSampler2D.SampleCmp(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)) + uSampler2DArray.SampleCmp(_uSampler2DArray_sampler, _33.xyz, _33.w, int2(-1, -1))) + uSamplerCube.SampleCmp(_uSamplerCube_sampler, _33.xyz, _33.w)) + uSamplerCubeArray.SampleCmp(_uSamplerCubeArray_sampler, _33, 0.5f)) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1))) + uSampler2DArray.SampleCmpLevelZero(_uSampler2DArray_sampler, _33.xyz, _33.w, int2(-1, -1))) + uSamplerCube.SampleCmpLevelZero(_uSamplerCube_sampler, _33.xyz, _33.w)) + uSampler2D.SampleCmp(_uSampler2D_sampler, _80.xy / _80.z, _33.z / _80.z, int2(1, 1))) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, _80.xy / _80.z, _33.z / _80.z, int2(1, 1));
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/opt/shaders-hlsl/frag/sample-mask-in-and-out.frag b/reference/opt/shaders-hlsl/frag/sample-mask-in-and-out.frag
new file mode 100644
index 00000000000..185a09821ea
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/sample-mask-in-and-out.frag
@@ -0,0 +1,30 @@
+static int gl_SampleMaskIn;
+static int gl_SampleMask;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    uint gl_SampleMaskIn : SV_Coverage;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+    uint gl_SampleMask : SV_Coverage;
+};
+
+void frag_main()
+{
+    FragColor = 1.0f.xxxx;
+    gl_SampleMask = gl_SampleMaskIn;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_SampleMaskIn = stage_input.gl_SampleMaskIn;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_SampleMask = gl_SampleMask;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/sample-mask-in.frag b/reference/opt/shaders-hlsl/frag/sample-mask-in.frag
new file mode 100644
index 00000000000..8f6cfaf9e53
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/sample-mask-in.frag
@@ -0,0 +1,32 @@
+static int gl_SampleID;
+static int gl_SampleMaskIn;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    uint gl_SampleID : SV_SampleIndex;
+    uint gl_SampleMaskIn : SV_Coverage;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    if ((gl_SampleMaskIn & (1 << gl_SampleID)) != 0)
+    {
+        FragColor = 1.0f.xxxx;
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_SampleID = stage_input.gl_SampleID;
+    gl_SampleMaskIn = stage_input.gl_SampleMaskIn;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/sample-mask-out.frag b/reference/opt/shaders-hlsl/frag/sample-mask-out.frag
new file mode 100644
index 00000000000..a966c032183
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/sample-mask-out.frag
@@ -0,0 +1,23 @@
+static int gl_SampleMask;
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+    uint gl_SampleMask : SV_Coverage;
+};
+
+void frag_main()
+{
+    FragColor = 1.0f.xxxx;
+    gl_SampleMask = 0;
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_SampleMask = gl_SampleMask;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/sampler-array.frag b/reference/opt/shaders-hlsl/frag/sampler-array.frag
index 1eced29be0d..8ecdc6c3a8e 100644
--- a/reference/opt/shaders-hlsl/frag/sampler-array.frag
+++ b/reference/opt/shaders-hlsl/frag/sampler-array.frag
@@ -24,6 +24,7 @@ void frag_main()
 void main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     vTex = stage_input.vTex;
     vIndex = stage_input.vIndex;
     frag_main();
diff --git a/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag b/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag
index 0fb694c543f..6c2d0be4f71 100644
--- a/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag
+++ b/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag
@@ -11,12 +11,12 @@ struct SPIRV_Cross_Output
     float FragColor : SV_Target0;
 };
 
-float SPIRV_Cross_Reflect(float i, float n)
+float spvReflect(float i, float n)
 {
     return i - 2.0 * dot(n, i) * n;
 }
 
-float SPIRV_Cross_Refract(float i, float n, float eta)
+float spvRefract(float i, float n, float eta)
 {
     float NoI = n * i;
     float NoI2 = NoI * NoI;
@@ -33,8 +33,8 @@ float SPIRV_Cross_Refract(float i, float n, float eta)
 
 void frag_main()
 {
-    FragColor = SPIRV_Cross_Refract(vRefract.x, vRefract.y, vRefract.z);
-    FragColor += SPIRV_Cross_Reflect(vRefract.x, vRefract.y);
+    FragColor = spvRefract(vRefract.x, vRefract.y, vRefract.z);
+    FragColor += spvReflect(vRefract.x, vRefract.y);
     FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
     FragColor += reflect(vRefract.xy, vRefract.zy).y;
 }
diff --git a/reference/opt/shaders-hlsl/frag/switch-unreachable-break.frag b/reference/opt/shaders-hlsl/frag/switch-unreachable-break.frag
new file mode 100644
index 00000000000..f25b768b9e5
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/switch-unreachable-break.frag
@@ -0,0 +1,49 @@
+cbuffer UBO : register(b0)
+{
+    int _13_cond : packoffset(c0);
+    int _13_cond2 : packoffset(c0.y);
+};
+
+
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    bool _49;
+    switch (_13_cond)
+    {
+        case 1:
+        {
+            if (_13_cond2 < 50)
+            {
+                _49 = false;
+                break;
+            }
+            else
+            {
+                discard;
+            }
+            break; // unreachable workaround
+        }
+        default:
+        {
+            _49 = true;
+            break;
+        }
+    }
+    bool4 _45 = _49.xxxx;
+    FragColor = float4(_45.x ? 10.0f.xxxx.x : 20.0f.xxxx.x, _45.y ? 10.0f.xxxx.y : 20.0f.xxxx.y, _45.z ? 10.0f.xxxx.z : 20.0f.xxxx.z, _45.w ? 10.0f.xxxx.w : 20.0f.xxxx.w);
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/tex-sampling-ms.frag b/reference/opt/shaders-hlsl/frag/tex-sampling-ms.frag
index ca88cfaeb3a..d4dd78d8901 100644
--- a/reference/opt/shaders-hlsl/frag/tex-sampling-ms.frag
+++ b/reference/opt/shaders-hlsl/frag/tex-sampling-ms.frag
@@ -26,6 +26,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.FragColor = FragColor;
diff --git a/reference/opt/shaders-hlsl/frag/tex-sampling.frag b/reference/opt/shaders-hlsl/frag/tex-sampling.frag
index 4f8e8f091ad..caedd37e448 100644
--- a/reference/opt/shaders-hlsl/frag/tex-sampling.frag
+++ b/reference/opt/shaders-hlsl/frag/tex-sampling.frag
@@ -48,12 +48,12 @@ void frag_main()
     float3 _88 = float3(texCoord2d, 2.0f);
     float4 _135 = float4(texCoord3d, 2.0f);
     float4 _162 = (((((((((((((((((((tex1d.Sample(_tex1d_sampler, texCoord1d) + tex1d.Sample(_tex1d_sampler, texCoord1d, 1)) + tex1d.SampleLevel(_tex1d_sampler, texCoord1d, 2.0f)) + tex1d.SampleGrad(_tex1d_sampler, texCoord1d, 1.0f, 2.0f)) + tex1d.Sample(_tex1d_sampler, _41.x / _41.y)) + tex1d.SampleBias(_tex1d_sampler, texCoord1d, 1.0f)) + tex2d.Sample(_tex2d_sampler, texCoord2d)) + tex2d.Sample(_tex2d_sampler, texCoord2d, int2(1, 2))) + tex2d.SampleLevel(_tex2d_sampler, texCoord2d, 2.0f)) + tex2d.SampleGrad(_tex2d_sampler, texCoord2d, float2(1.0f, 2.0f), float2(3.0f, 4.0f))) + tex2d.Sample(_tex2d_sampler, _88.xy / _88.z)) + tex2d.SampleBias(_tex2d_sampler, texCoord2d, 1.0f)) + tex3d.Sample(_tex3d_sampler, texCoord3d)) + tex3d.Sample(_tex3d_sampler, texCoord3d, int3(1, 2, 3))) + tex3d.SampleLevel(_tex3d_sampler, texCoord3d, 2.0f)) + tex3d.SampleGrad(_tex3d_sampler, texCoord3d, float3(1.0f, 2.0f, 3.0f), float3(4.0f, 5.0f, 6.0f))) + tex3d.Sample(_tex3d_sampler, _135.xyz / _135.w)) + tex3d.SampleBias(_tex3d_sampler, texCoord3d, 1.0f)) + texCube.Sample(_texCube_sampler, texCoord3d)) + texCube.SampleLevel(_texCube_sampler, texCoord3d, 2.0f)) + texCube.SampleBias(_texCube_sampler, texCoord3d, 1.0f);
-    float4 _333 = _162;
-    _333.w = ((_162.w + tex1dShadow.SampleCmp(_tex1dShadow_sampler, float3(texCoord1d, 0.0f, 0.0f).x, 0.0f)) + tex2dShadow.SampleCmp(_tex2dShadow_sampler, float3(texCoord2d, 0.0f).xy, 0.0f)) + texCubeShadow.SampleCmp(_texCubeShadow_sampler, float4(texCoord3d, 0.0f).xyz, 0.0f);
-    float4 _308 = ((((((((((((((_333 + tex1dArray.Sample(_tex1dArray_sampler, texCoord2d)) + tex2dArray.Sample(_tex2dArray_sampler, texCoord3d)) + texCubeArray.Sample(_texCubeArray_sampler, texCoord4d)) + tex2d.GatherRed(_tex2d_sampler, texCoord2d)) + tex2d.GatherRed(_tex2d_sampler, texCoord2d)) + tex2d.GatherGreen(_tex2d_sampler, texCoord2d)) + tex2d.GatherBlue(_tex2d_sampler, texCoord2d)) + tex2d.GatherAlpha(_tex2d_sampler, texCoord2d)) + tex2d.GatherRed(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherRed(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherGreen(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherBlue(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherAlpha(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.Load(int3(int2(1, 2), 0))) + separateTex2d.Sample(samplerNonDepth, texCoord2d);
-    float4 _336 = _308;
-    _336.w = _308.w + separateTex2dDepth.SampleCmp(samplerDepth, texCoord3d.xy, texCoord3d.z);
-    FragColor = _336;
+    _162.w = ((_162.w + tex1dShadow.SampleCmp(_tex1dShadow_sampler, float3(texCoord1d, 0.0f, 0.0f).x, 0.0f)) + tex2dShadow.SampleCmp(_tex2dShadow_sampler, float3(texCoord2d, 0.0f).xy, 0.0f)) + texCubeShadow.SampleCmp(_texCubeShadow_sampler, float4(texCoord3d, 0.0f).xyz, 0.0f);
+    float4 _243 = tex2d.GatherRed(_tex2d_sampler, texCoord2d);
+    float4 _269 = tex2d.GatherRed(_tex2d_sampler, texCoord2d, int2(1, 1));
+    float4 _308 = ((((((((((((((_162 + tex1dArray.Sample(_tex1dArray_sampler, texCoord2d)) + tex2dArray.Sample(_tex2dArray_sampler, texCoord3d)) + texCubeArray.Sample(_texCubeArray_sampler, texCoord4d)) + _243) + _243) + tex2d.GatherGreen(_tex2d_sampler, texCoord2d)) + tex2d.GatherBlue(_tex2d_sampler, texCoord2d)) + tex2d.GatherAlpha(_tex2d_sampler, texCoord2d)) + _269) + _269) + tex2d.GatherGreen(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherBlue(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.GatherAlpha(_tex2d_sampler, texCoord2d, int2(1, 1))) + tex2d.Load(int3(int2(1, 2), 0))) + separateTex2d.Sample(samplerNonDepth, texCoord2d);
+    _308.w = _308.w + separateTex2dDepth.SampleCmp(samplerDepth, texCoord3d.xy, texCoord3d.z);
+    FragColor = _308;
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/opt/shaders-hlsl/frag/tex-sampling.sm30.frag b/reference/opt/shaders-hlsl/frag/tex-sampling.sm30.frag
new file mode 100644
index 00000000000..4a2d9b68f61
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/tex-sampling.sm30.frag
@@ -0,0 +1,54 @@
+uniform sampler1D tex1d;
+uniform sampler2D tex2d;
+uniform sampler3D tex3d;
+uniform samplerCUBE texCube;
+uniform sampler1D tex1dShadow;
+uniform sampler2D tex2dShadow;
+
+static float texCoord1d;
+static float2 texCoord2d;
+static float3 texCoord3d;
+static float4 FragColor;
+static float4 texCoord4d;
+
+struct SPIRV_Cross_Input
+{
+    float texCoord1d : TEXCOORD0;
+    float2 texCoord2d : TEXCOORD1;
+    float3 texCoord3d : TEXCOORD2;
+    float4 texCoord4d : TEXCOORD3;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : COLOR0;
+};
+
+void frag_main()
+{
+    float2 _34 = float2(texCoord1d, 2.0f);
+    float3 _73 = float3(texCoord2d, 2.0f);
+    float4 _112 = float4(texCoord3d, 2.0f);
+    float4 _139 = ((((((((((((((((tex1D(tex1d, texCoord1d) + tex1Dlod(tex1d, float4(texCoord1d, 0.0, 0.0, 2.0f))) + tex1Dgrad(tex1d, texCoord1d, 1.0f, 2.0f)) + tex1Dproj(tex1d, float4(_34.x, 0.0, 0.0, _34.y))) + tex1Dbias(tex1d, float4(texCoord1d, 0.0, 0.0, 1.0f))) + tex2D(tex2d, texCoord2d)) + tex2Dlod(tex2d, float4(texCoord2d, 0.0, 2.0f))) + tex2Dgrad(tex2d, texCoord2d, float2(1.0f, 2.0f), float2(3.0f, 4.0f))) + tex2Dproj(tex2d, float4(_73.xy, 0.0, _73.z))) + tex2Dbias(tex2d, float4(texCoord2d, 0.0, 1.0f))) + tex3D(tex3d, texCoord3d)) + tex3Dlod(tex3d, float4(texCoord3d, 2.0f))) + tex3Dgrad(tex3d, texCoord3d, float3(1.0f, 2.0f, 3.0f), float3(4.0f, 5.0f, 6.0f))) + tex3Dproj(tex3d, float4(_112.xyz, _112.w))) + tex3Dbias(tex3d, float4(texCoord3d, 1.0f))) + texCUBE(texCube, texCoord3d)) + texCUBElod(texCube, float4(texCoord3d, 2.0f))) + texCUBEbias(texCube, float4(texCoord3d, 1.0f));
+    float3 _147 = float3(texCoord1d, 0.0f, 0.0f);
+    float4 _171 = float4(texCoord1d, 0.0f, 0.0f, 2.0f);
+    _171.y = 2.0f;
+    float3 _194 = float3(texCoord2d, 0.0f);
+    float4 _219 = float4(texCoord2d, 0.0f, 2.0f);
+    _219.z = 2.0f;
+    float4 _264 = _139;
+    _264.w = (((((((_139.w + tex1Dproj(tex1dShadow, float4(_147.x, 0.0, 0.0f, 1.0)).x) + tex1Dlod(tex1dShadow, float4(_147.x, 0.0, 0.0f, 2.0f)).x) + tex1Dproj(tex1dShadow, float4(_171.x, 0.0, 0.0f, _171.y)).x) + tex1Dbias(tex1dShadow, float4(_147.x, 0.0, 0.0f, 1.0f)).x) + tex2Dproj(tex2dShadow, float4(_194.xy, 0.0f, 1.0)).x) + tex2Dlod(tex2dShadow, float4(_194.xy, 0.0f, 2.0f)).x) + tex2Dproj(tex2dShadow, float4(_219.xy, 0.0f, _219.z)).x) + tex2Dbias(tex2dShadow, float4(_194.xy, 0.0f, 1.0f)).x;
+    FragColor = _264;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    texCoord1d = stage_input.texCoord1d;
+    texCoord2d = stage_input.texCoord2d;
+    texCoord3d = stage_input.texCoord3d;
+    texCoord4d = stage_input.texCoord4d;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = float4(FragColor);
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/texel-fetch-offset.frag b/reference/opt/shaders-hlsl/frag/texel-fetch-offset.frag
index d7aa73d5264..9bd27697c5b 100644
--- a/reference/opt/shaders-hlsl/frag/texel-fetch-offset.frag
+++ b/reference/opt/shaders-hlsl/frag/texel-fetch-offset.frag
@@ -24,6 +24,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.FragColor = FragColor;
diff --git a/reference/opt/shaders-hlsl/frag/texture-proj-shadow.frag b/reference/opt/shaders-hlsl/frag/texture-proj-shadow.frag
index 07e06008a0b..bc710227ec8 100644
--- a/reference/opt/shaders-hlsl/frag/texture-proj-shadow.frag
+++ b/reference/opt/shaders-hlsl/frag/texture-proj-shadow.frag
@@ -28,15 +28,16 @@ struct SPIRV_Cross_Output
 
 void frag_main()
 {
-    float4 _20 = vClip4;
-    _20.y = vClip4.w;
-    FragColor = uShadow1D.SampleCmp(_uShadow1D_sampler, _20.x / _20.y, vClip4.z / _20.y);
-    float4 _30 = vClip4;
-    _30.z = vClip4.w;
-    FragColor = uShadow2D.SampleCmp(_uShadow2D_sampler, _30.xy / _30.z, vClip4.z / _30.z);
+    float4 _17 = vClip4;
+    float4 _20 = _17;
+    _20.y = _17.w;
+    FragColor = uShadow1D.SampleCmp(_uShadow1D_sampler, _20.x / _20.y, _17.z / _20.y);
+    float4 _30 = _17;
+    _30.z = _17.w;
+    FragColor = uShadow2D.SampleCmp(_uShadow2D_sampler, _30.xy / _30.z, _17.z / _30.z);
     FragColor = uSampler1D.Sample(_uSampler1D_sampler, vClip2.x / vClip2.y).x;
     FragColor = uSampler2D.Sample(_uSampler2D_sampler, vClip3.xy / vClip3.z).x;
-    FragColor = uSampler3D.Sample(_uSampler3D_sampler, vClip4.xyz / vClip4.w).x;
+    FragColor = uSampler3D.Sample(_uSampler3D_sampler, _17.xyz / _17.w).x;
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/opt/shaders-hlsl/frag/texture-size-combined-image-sampler.frag b/reference/opt/shaders-hlsl/frag/texture-size-combined-image-sampler.frag
index d5c373746d8..dd2eb251fc2 100644
--- a/reference/opt/shaders-hlsl/frag/texture-size-combined-image-sampler.frag
+++ b/reference/opt/shaders-hlsl/frag/texture-size-combined-image-sampler.frag
@@ -8,7 +8,7 @@ struct SPIRV_Cross_Output
     int2 FooOut : SV_Target0;
 };
 
-uint2 SPIRV_Cross_textureSize(Texture2D<float4> Tex, uint Level, out uint Param)
+uint2 spvTextureSize(Texture2D<float4> Tex, uint Level, out uint Param)
 {
     uint2 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, Param);
@@ -18,7 +18,7 @@ uint2 SPIRV_Cross_textureSize(Texture2D<float4> Tex, uint Level, out uint Param)
 void frag_main()
 {
     uint _23_dummy_parameter;
-    FooOut = int2(SPIRV_Cross_textureSize(uTex, uint(0), _23_dummy_parameter));
+    FooOut = int2(spvTextureSize(uTex, uint(0), _23_dummy_parameter));
 }
 
 SPIRV_Cross_Output main()
diff --git a/reference/opt/shaders-hlsl/frag/unary-enclose.frag b/reference/opt/shaders-hlsl/frag/unary-enclose.frag
index 348b91c1727..85419ef14ad 100644
--- a/reference/opt/shaders-hlsl/frag/unary-enclose.frag
+++ b/reference/opt/shaders-hlsl/frag/unary-enclose.frag
@@ -1,11 +1,9 @@
 static float4 FragColor;
 static float4 vIn;
-static int4 vIn1;
 
 struct SPIRV_Cross_Input
 {
     float4 vIn : TEXCOORD0;
-    nointerpolation int4 vIn1 : TEXCOORD1;
 };
 
 struct SPIRV_Cross_Output
@@ -21,7 +19,6 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     vIn = stage_input.vIn;
-    vIn1 = stage_input.vIn1;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.FragColor = FragColor;
diff --git a/reference/opt/shaders-hlsl/frag/unorm-snorm-packing.frag b/reference/opt/shaders-hlsl/frag/unorm-snorm-packing.frag
index 57b5950636e..95786b93b68 100644
--- a/reference/opt/shaders-hlsl/frag/unorm-snorm-packing.frag
+++ b/reference/opt/shaders-hlsl/frag/unorm-snorm-packing.frag
@@ -27,50 +27,50 @@ struct SPIRV_Cross_Output
     uint SNORM16Out : SV_Target4;
 };
 
-uint SPIRV_Cross_packUnorm4x8(float4 value)
+uint spvPackUnorm4x8(float4 value)
 {
     uint4 Packed = uint4(round(saturate(value) * 255.0));
     return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);
 }
 
-float4 SPIRV_Cross_unpackUnorm4x8(uint value)
+float4 spvUnpackUnorm4x8(uint value)
 {
     uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);
     return float4(Packed) / 255.0;
 }
 
-uint SPIRV_Cross_packSnorm4x8(float4 value)
+uint spvPackSnorm4x8(float4 value)
 {
     int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;
     return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));
 }
 
-float4 SPIRV_Cross_unpackSnorm4x8(uint value)
+float4 spvUnpackSnorm4x8(uint value)
 {
     int SignedValue = int(value);
     int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;
     return clamp(float4(Packed) / 127.0, -1.0, 1.0);
 }
 
-uint SPIRV_Cross_packUnorm2x16(float2 value)
+uint spvPackUnorm2x16(float2 value)
 {
     uint2 Packed = uint2(round(saturate(value) * 65535.0));
     return Packed.x | (Packed.y << 16);
 }
 
-float2 SPIRV_Cross_unpackUnorm2x16(uint value)
+float2 spvUnpackUnorm2x16(uint value)
 {
     uint2 Packed = uint2(value & 0xffff, value >> 16);
     return float2(Packed) / 65535.0;
 }
 
-uint SPIRV_Cross_packSnorm2x16(float2 value)
+uint spvPackSnorm2x16(float2 value)
 {
     int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;
     return uint(Packed.x | (Packed.y << 16));
 }
 
-float2 SPIRV_Cross_unpackSnorm2x16(uint value)
+float2 spvUnpackSnorm2x16(uint value)
 {
     int SignedValue = int(value);
     int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;
@@ -79,16 +79,18 @@ float2 SPIRV_Cross_unpackSnorm2x16(uint value)
 
 void frag_main()
 {
-    FP32Out = SPIRV_Cross_unpackUnorm4x8(UNORM8);
-    FP32Out = SPIRV_Cross_unpackSnorm4x8(SNORM8);
-    float2 _21 = SPIRV_Cross_unpackUnorm2x16(UNORM16);
-    FP32Out = float4(_21.x, _21.y, FP32Out.z, FP32Out.w);
-    float2 _26 = SPIRV_Cross_unpackSnorm2x16(SNORM16);
-    FP32Out = float4(_26.x, _26.y, FP32Out.z, FP32Out.w);
-    UNORM8Out = SPIRV_Cross_packUnorm4x8(FP32);
-    SNORM8Out = SPIRV_Cross_packSnorm4x8(FP32);
-    UNORM16Out = SPIRV_Cross_packUnorm2x16(FP32.xy);
-    SNORM16Out = SPIRV_Cross_packSnorm2x16(FP32.zw);
+    FP32Out = spvUnpackUnorm4x8(UNORM8);
+    FP32Out = spvUnpackSnorm4x8(SNORM8);
+    float2 _21 = spvUnpackUnorm2x16(UNORM16);
+    FP32Out.x = _21.x;
+    FP32Out.y = _21.y;
+    float2 _31 = spvUnpackSnorm2x16(SNORM16);
+    FP32Out.x = _31.x;
+    FP32Out.y = _31.y;
+    UNORM8Out = spvPackUnorm4x8(FP32);
+    SNORM8Out = spvPackSnorm4x8(FP32);
+    UNORM16Out = spvPackUnorm2x16(FP32.xy);
+    SNORM16Out = spvPackSnorm2x16(FP32.zw);
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000000..4819b14f68e
--- /dev/null
+++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
@@ -0,0 +1,90 @@
+struct BlockOut
+{
+    float4 a;
+    float4 b;
+};
+
+struct BlockOutPrim
+{
+    float4 a;
+    float4 b;
+};
+
+struct TaskPayload
+{
+    float a;
+    float b;
+    int c;
+};
+
+static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u);
+
+static uint3 gl_WorkGroupID;
+static uint3 gl_GlobalInvocationID;
+static uint gl_LocalInvocationIndex;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_WorkGroupID : SV_GroupID;
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+    uint gl_LocalInvocationIndex : SV_GroupIndex;
+};
+
+struct gl_MeshPerVertexEXT
+{
+    float4 vOut : TEXCOORD0;
+    BlockOut outputs : TEXCOORD2;
+    float4 gl_Position : SV_Position;
+    float gl_ClipDistance[1] : SV_ClipDistance;
+    float2 gl_CullDistance : SV_CullDistance;
+};
+
+struct gl_MeshPerPrimitiveEXT
+{
+    float4 vPrim : TEXCOORD1;
+    BlockOutPrim prim_outputs : TEXCOORD4;
+    uint gl_PrimitiveID : SV_PrimitiveID;
+    uint gl_Layer : SV_RenderTargetArrayIndex;
+    uint gl_ViewportIndex : SV_ViewportArrayIndex;
+    uint gl_PrimitiveShadingRateEXT : SV_ShadingRate;
+    bool gl_CullPrimitiveEXT : SV_CullPrimitive;
+};
+
+groupshared float shared_float[16];
+
+void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22])
+{
+    SetMeshOutputCounts(24u, 22u);
+    float3 _173 = float3(gl_GlobalInvocationID);
+    float _174 = _173.x;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(_174, _173.yz, 1.0f);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 6.0f;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(_174, _173.yz, 2.0f);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx;
+    GroupMemoryBarrierWithGroupSync();
+    if (gl_LocalInvocationIndex < 22u)
+    {
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx;
+        gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uint2(0u, 1u) + gl_LocalInvocationIndex.xx;
+        int _229 = int(gl_GlobalInvocationID.x);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _229;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _229 + 1;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _229 + 2;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _229 + 3;
+    }
+}
+
+[outputtopology("line")]
+[numthreads(2, 3, 4)]
+void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint2 gl_PrimitiveLineIndicesEXT[22])
+{
+    gl_WorkGroupID = stage_input.gl_WorkGroupID;
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
+    mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveLineIndicesEXT);
+}
diff --git a/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000000..7436c463ed3
--- /dev/null
+++ b/reference/opt/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
@@ -0,0 +1,90 @@
+struct BlockOut
+{
+    float4 a;
+    float4 b;
+};
+
+struct BlockOutPrim
+{
+    float4 a;
+    float4 b;
+};
+
+struct TaskPayload
+{
+    float a;
+    float b;
+    int c;
+};
+
+static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u);
+
+static uint3 gl_WorkGroupID;
+static uint3 gl_GlobalInvocationID;
+static uint gl_LocalInvocationIndex;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_WorkGroupID : SV_GroupID;
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+    uint gl_LocalInvocationIndex : SV_GroupIndex;
+};
+
+struct gl_MeshPerVertexEXT
+{
+    float4 vOut : TEXCOORD0;
+    BlockOut outputs : TEXCOORD2;
+    float4 gl_Position : SV_Position;
+    float gl_ClipDistance[1] : SV_ClipDistance;
+    float2 gl_CullDistance : SV_CullDistance;
+};
+
+struct gl_MeshPerPrimitiveEXT
+{
+    float4 vPrim : TEXCOORD1;
+    BlockOutPrim prim_outputs : TEXCOORD4;
+    uint gl_PrimitiveID : SV_PrimitiveID;
+    uint gl_Layer : SV_RenderTargetArrayIndex;
+    uint gl_ViewportIndex : SV_ViewportArrayIndex;
+    uint gl_PrimitiveShadingRateEXT : SV_ShadingRate;
+    bool gl_CullPrimitiveEXT : SV_CullPrimitive;
+};
+
+groupshared float shared_float[16];
+
+void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint3 gl_PrimitiveTriangleIndicesEXT[22])
+{
+    SetMeshOutputCounts(24u, 22u);
+    float3 _29 = float3(gl_GlobalInvocationID);
+    float _31 = _29.x;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(_31, _29.yz, 1.0f);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 3.0f;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(_31, _29.yz, 2.0f);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx;
+    GroupMemoryBarrierWithGroupSync();
+    if (gl_LocalInvocationIndex < 22u)
+    {
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx;
+        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(0u, 1u, 2u) + gl_LocalInvocationIndex.xxx;
+        int _127 = int(gl_GlobalInvocationID.x);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _127;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _127 + 1;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _127 + 2;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _127 + 3;
+    }
+}
+
+[outputtopology("triangle")]
+[numthreads(2, 3, 4)]
+void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint3 gl_PrimitiveTriangleIndicesEXT[22])
+{
+    gl_WorkGroupID = stage_input.gl_WorkGroupID;
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
+    mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveTriangleIndicesEXT);
+}
diff --git a/reference/opt/shaders-hlsl/vert/invariant.vert b/reference/opt/shaders-hlsl/vert/invariant.vert
new file mode 100644
index 00000000000..ae1ae4b7e6f
--- /dev/null
+++ b/reference/opt/shaders-hlsl/vert/invariant.vert
@@ -0,0 +1,39 @@
+static float4 gl_Position;
+static float4 vInput0;
+static float4 vInput1;
+static float4 vInput2;
+static float4 vColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 vInput0 : TEXCOORD0;
+    float4 vInput1 : TEXCOORD1;
+    float4 vInput2 : TEXCOORD2;
+};
+
+struct SPIRV_Cross_Output
+{
+    precise float4 vColor : TEXCOORD0;
+    precise float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    float4 _21 = mad(vInput1, vInput2, vInput0);
+    gl_Position = _21;
+    float4 _27 = vInput0 - vInput1;
+    float4 _29 = _27 * vInput2;
+    vColor = _29;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vInput0 = stage_input.vInput0;
+    vInput1 = stage_input.vInput1;
+    vInput2 = stage_input.vInput2;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.vColor = vColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/vert/locations.vert b/reference/opt/shaders-hlsl/vert/locations.vert
index b007582c2ae..0d14def3428 100644
--- a/reference/opt/shaders-hlsl/vert/locations.vert
+++ b/reference/opt/shaders-hlsl/vert/locations.vert
@@ -5,6 +5,12 @@ struct Foo
     float3 c;
 };
 
+struct VertexOut
+{
+    float3 color;
+    float3 foo;
+};
+
 static const Foo _71 = { 1.0f.xxx, 1.0f.xxx, 1.0f.xxx };
 
 static float4 gl_Position;
@@ -16,13 +22,6 @@ static float vLocation1;
 static float vLocation2[2];
 static Foo vLocation4;
 static float vLocation9;
-
-struct VertexOut
-{
-    float3 color : TEXCOORD7;
-    float3 foo : TEXCOORD8;
-};
-
 static VertexOut vout;
 
 struct SPIRV_Cross_Input
@@ -38,6 +37,8 @@ struct SPIRV_Cross_Output
     float vLocation1 : TEXCOORD1;
     float vLocation2[2] : TEXCOORD2;
     Foo vLocation4 : TEXCOORD4;
+    float3 VertexOut_color : TEXCOORD7;
+    float3 VertexOut_foo : TEXCOORD8;
     float vLocation9 : TEXCOORD9;
     float4 gl_Position : SV_Position;
 };
@@ -55,13 +56,12 @@ void vert_main()
     vout.foo = 4.0f.xxx;
 }
 
-SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input, out VertexOut stage_outputvout)
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     Input2 = stage_input.Input2;
     Input4 = stage_input.Input4;
     Input0 = stage_input.Input0;
     vert_main();
-    stage_outputvout = vout;
     SPIRV_Cross_Output stage_output;
     stage_output.gl_Position = gl_Position;
     stage_output.vLocation0 = vLocation0;
@@ -69,5 +69,7 @@ SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input, out VertexOut stage_outpu
     stage_output.vLocation2 = vLocation2;
     stage_output.vLocation4 = vLocation4;
     stage_output.vLocation9 = vLocation9;
+    stage_output.VertexOut_color = vout.color;
+    stage_output.VertexOut_foo = vout.foo;
     return stage_output;
 }
diff --git a/reference/opt/shaders-hlsl/vert/no-contraction.vert b/reference/opt/shaders-hlsl/vert/no-contraction.vert
new file mode 100644
index 00000000000..10763fbee5a
--- /dev/null
+++ b/reference/opt/shaders-hlsl/vert/no-contraction.vert
@@ -0,0 +1,39 @@
+static float4 gl_Position;
+static float4 vA;
+static float4 vB;
+static float4 vC;
+
+struct SPIRV_Cross_Input
+{
+    float4 vA : TEXCOORD0;
+    float4 vB : TEXCOORD1;
+    float4 vC : TEXCOORD2;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    precise float4 _15 = vA * vB;
+    precise float4 _19 = vA + vB;
+    precise float4 _23 = vA - vB;
+    precise float4 _30 = _15 + vC;
+    precise float4 _34 = _15 + _19;
+    precise float4 _36 = _34 + _23;
+    precise float4 _38 = _36 + _30;
+    gl_Position = _38;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vA = stage_input.vA;
+    vB = stage_input.vB;
+    vC = stage_input.vC;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/vert/qualifiers.vert b/reference/opt/shaders-hlsl/vert/qualifiers.vert
index 13ee2a8c1c0..bbf7dc61e45 100644
--- a/reference/opt/shaders-hlsl/vert/qualifiers.vert
+++ b/reference/opt/shaders-hlsl/vert/qualifiers.vert
@@ -1,17 +1,16 @@
+struct Block
+{
+    float vFlat;
+    float vCentroid;
+    float vSample;
+    float vNoperspective;
+};
+
 static float4 gl_Position;
 static float vFlat;
 static float vCentroid;
 static float vSample;
 static float vNoperspective;
-
-struct Block
-{
-    nointerpolation float vFlat : TEXCOORD4;
-    centroid float vCentroid : TEXCOORD5;
-    sample float vSample : TEXCOORD6;
-    noperspective float vNoperspective : TEXCOORD7;
-};
-
 static Block vout;
 
 struct SPIRV_Cross_Output
@@ -20,6 +19,10 @@ struct SPIRV_Cross_Output
     centroid float vCentroid : TEXCOORD1;
     sample float vSample : TEXCOORD2;
     noperspective float vNoperspective : TEXCOORD3;
+    nointerpolation float Block_vFlat : TEXCOORD4;
+    centroid float Block_vCentroid : TEXCOORD5;
+    sample float Block_vSample : TEXCOORD6;
+    noperspective float Block_vNoperspective : TEXCOORD7;
     float4 gl_Position : SV_Position;
 };
 
@@ -36,15 +39,18 @@ void vert_main()
     vout.vNoperspective = 3.0f;
 }
 
-SPIRV_Cross_Output main(out Block stage_outputvout)
+SPIRV_Cross_Output main()
 {
     vert_main();
-    stage_outputvout = vout;
     SPIRV_Cross_Output stage_output;
     stage_output.gl_Position = gl_Position;
     stage_output.vFlat = vFlat;
     stage_output.vCentroid = vCentroid;
     stage_output.vSample = vSample;
     stage_output.vNoperspective = vNoperspective;
+    stage_output.Block_vFlat = vout.vFlat;
+    stage_output.Block_vCentroid = vout.vCentroid;
+    stage_output.Block_vSample = vout.vSample;
+    stage_output.Block_vNoperspective = vout.vNoperspective;
     return stage_output;
 }
diff --git a/reference/opt/shaders-hlsl/vert/return-array.vert b/reference/opt/shaders-hlsl/vert/return-array.vert
index bd157556338..be11c3f1a55 100644
--- a/reference/opt/shaders-hlsl/vert/return-array.vert
+++ b/reference/opt/shaders-hlsl/vert/return-array.vert
@@ -1,10 +1,8 @@
 static float4 gl_Position;
-static float4 vInput0;
 static float4 vInput1;
 
 struct SPIRV_Cross_Input
 {
-    float4 vInput0 : TEXCOORD0;
     float4 vInput1 : TEXCOORD1;
 };
 
@@ -20,7 +18,6 @@ void vert_main()
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
-    vInput0 = stage_input.vInput0;
     vInput1 = stage_input.vInput1;
     vert_main();
     SPIRV_Cross_Output stage_output;
diff --git a/reference/opt/shaders-msl/amd/shader_trinary_minmax.msl21.comp b/reference/opt/shaders-msl/amd/shader_trinary_minmax.msl21.comp
new file mode 100644
index 00000000000..9c33c22ca86
--- /dev/null
+++ b/reference/opt/shaders-msl/amd/shader_trinary_minmax.msl21.comp
@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
+
+kernel void main0()
+{
+}
+
diff --git a/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp b/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp
index feb7dbbe524..513f8763a32 100644
--- a/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp
@@ -7,20 +7,21 @@
 
 using namespace metal;
 
-struct u0_counters
-{
-    uint c;
-};
-
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
+struct u0_counters
+{
+    uint c;
+};
+
 kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
+    uint _29 = atomic_fetch_sub_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
     u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(as_type<int>(as_type<float>(_29))));
 }
 
diff --git a/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp b/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp
index 22409301c9c..55c41374c3b 100644
--- a/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp
@@ -7,20 +7,21 @@
 
 using namespace metal;
 
-struct u0_counters
-{
-    uint c;
-};
-
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
+struct u0_counters
+{
+    uint c;
+};
+
 kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
+    uint _29 = atomic_fetch_add_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
     u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(as_type<int>(as_type<float>(_29))));
 }
 
diff --git a/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp b/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp
index 47ce85f8fc3..cbbf27d65da 100644
--- a/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp
@@ -15,7 +15,7 @@ struct _4
     int4 _m1;
 };
 
-kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
+kernel void main0(device _3& __restrict _5 [[buffer(0)]], device _4& __restrict _6 [[buffer(1)]])
 {
     _6._m0 = _5._m1 + uint4(_5._m0);
     _6._m0 = uint4(_5._m0) + _5._m1;
diff --git a/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000000..a55d8916dfa
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _3
+{
+    int4 _m0;
+    uint4 _m1;
+};
+
+struct _4
+{
+    uint4 _m0;
+    int4 _m1;
+};
+
+kernel void main0(device _3& __restrict _5 [[buffer(0)]], device _4& __restrict _6 [[buffer(1)]])
+{
+    _6._m0 = uint4(int4(_5._m1) < _5._m0);
+    _6._m0 = uint4(int4(_5._m1) <= _5._m0);
+    _6._m0 = uint4(_5._m1 < uint4(_5._m0));
+    _6._m0 = uint4(_5._m1 <= uint4(_5._m0));
+    _6._m0 = uint4(int4(_5._m1) > _5._m0);
+    _6._m0 = uint4(int4(_5._m1) >= _5._m0);
+    _6._m0 = uint4(_5._m1 > uint4(_5._m0));
+    _6._m0 = uint4(_5._m1 >= uint4(_5._m0));
+}
+
diff --git a/reference/opt/shaders-msl/asm/comp/block-name-alias-global.asm.comp b/reference/opt/shaders-msl/asm/comp/block-name-alias-global.asm.comp
index 2928efda2c4..6dcc14ea8d5 100644
--- a/reference/opt/shaders-msl/asm/comp/block-name-alias-global.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/block-name-alias-global.asm.comp
@@ -18,11 +18,12 @@ struct A_2
 {
     int a;
     int b;
+    char _m0_final_padding[8];
 };
 
 struct A_3
 {
-    /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ A_2 Data[1024];
+    A_2 Data[1024];
 };
 
 struct B
@@ -32,7 +33,7 @@ struct B
 
 struct B_1
 {
-    /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ A_2 Data[1024];
+    A_2 Data[1024];
 };
 
 kernel void main0(device A_1& C1 [[buffer(0)]], constant A_3& C2 [[buffer(1)]], device B& C3 [[buffer(2)]], constant B_1& C4 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
diff --git a/reference/opt/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp b/reference/opt/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp
index fa2b5fe53a7..db0ade34b4b 100644
--- a/reference/opt/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp
@@ -5,17 +5,18 @@
 
 using namespace metal;
 
-struct cb5_struct
-{
-    float4 _m0[5];
-};
-
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
+struct cb5_struct
+{
+    float4 _m0[5];
+};
+
 kernel void main0(constant cb5_struct& cb0_5 [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
 {
     uint _44 = as_type<uint>(as_type<float>(int(gl_LocalInvocationID.x) << 4)) >> 2u;
diff --git a/reference/opt/shaders-msl/asm/comp/buffer-write.asm.comp b/reference/opt/shaders-msl/asm/comp/buffer-write.asm.comp
index 159d09b38c8..89e8d83ea71 100644
--- a/reference/opt/shaders-msl/asm/comp/buffer-write.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/buffer-write.asm.comp
@@ -5,17 +5,18 @@
 
 using namespace metal;
 
-struct cb
-{
-    float value;
-};
-
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
+struct cb
+{
+    float value;
+};
+
 kernel void main0(constant cb& _6 [[buffer(0)]], texture2d<float, access::write> _buffer [[texture(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
 {
     _buffer.write(float4(_6.value), spvTexelBufferCoord(((32u * gl_WorkGroupID.x) + gl_LocalInvocationIndex)));
diff --git a/reference/opt/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp b/reference/opt/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp
new file mode 100644
index 00000000000..986e9096633
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _19
+{
+};
+struct _5
+{
+    int _m0;
+    _19 _m1;
+    char _m2_pad[4];
+    _19 _m2;
+    char _m3_pad[4];
+    int _m3;
+};
+
+kernel void main0(device _5& _3 [[buffer(0)]], device _5& _4 [[buffer(1)]])
+{
+    _4 = _3;
+}
+
diff --git a/reference/opt/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp b/reference/opt/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp
new file mode 100644
index 00000000000..4bcfeb21ab5
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _19
+{
+};
+struct _5
+{
+    int _m0;
+    char _m1_pad[12];
+    _19 _m1;
+    char _m2_pad[16];
+    _19 _m2;
+    char _m3_pad[16];
+    int _m3;
+};
+
+kernel void main0(constant _5& _3 [[buffer(0)]], device _5& _4 [[buffer(1)]])
+{
+    _4 = _3;
+}
+
diff --git a/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp b/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp
deleted file mode 100644
index fb97d0da9bd..00000000000
--- a/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp
+++ /dev/null
@@ -1,10 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-kernel void main0(texture2d<float, access::read_write> TargetTexture [[texture(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
-{
-    TargetTexture.write((TargetTexture.read(uint2(gl_WorkGroupID.xy)).xy + float2(1.0)).xyyy, uint2((gl_WorkGroupID.xy + uint2(1u))));
-}
-
diff --git a/reference/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp b/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp
similarity index 80%
rename from reference/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp
rename to reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp
index c90faf9ef26..536556391ec 100644
--- a/reference/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp
@@ -5,7 +5,8 @@
 
 using namespace metal;
 
-void _main(thread const uint3& id, thread texture2d<float, access::read_write> TargetTexture)
+static inline __attribute__((always_inline))
+void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
 {
     float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
     float2 storeTemp = loaded + float2(1.0);
diff --git a/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp b/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp
index 7652733268f..35843733790 100644
--- a/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp
@@ -15,7 +15,7 @@ struct _7
     int4 _m1;
 };
 
-kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]])
+kernel void main0(device _6& __restrict _8 [[buffer(0)]], device _7& __restrict _9 [[buffer(1)]])
 {
     _9._m0 = _8._m1 + uint4(_8._m0);
     _9._m0 = uint4(_8._m0) + _8._m1;
diff --git a/reference/opt/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp b/reference/opt/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp
new file mode 100644
index 00000000000..d643379aaf6
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp
@@ -0,0 +1,110 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _29
+{
+    spvUnsafeArray<spvUnsafeArray<int, 3>, 3> _m0;
+};
+
+struct _7
+{
+    int _m0[1];
+};
+
+constant int3 _32 = {};
+constant int _3_tmp [[function_constant(0)]];
+constant int _3 = is_function_constant_defined(_3_tmp) ? _3_tmp : 0;
+constant int _4_tmp [[function_constant(1)]];
+constant int _4 = is_function_constant_defined(_4_tmp) ? _4_tmp : 0;
+constant int _5_tmp [[function_constant(2)]];
+constant int _5 = is_function_constant_defined(_5_tmp) ? _5_tmp : 0;
+constant spvUnsafeArray<int, 3> _36 = spvUnsafeArray<int, 3>({ _3, 0, 0 });
+constant spvUnsafeArray<int, 3> _37 = spvUnsafeArray<int, 3>({ _3, _4, 0 });
+constant spvUnsafeArray<int, 3> _38 = spvUnsafeArray<int, 3>({ _3, _4, _5 });
+constant spvUnsafeArray<int, 3> _39 = spvUnsafeArray<int, 3>({ _4, 0, 0 });
+constant spvUnsafeArray<int, 3> _40 = spvUnsafeArray<int, 3>({ _4, _5, 0 });
+constant spvUnsafeArray<int, 3> _41 = spvUnsafeArray<int, 3>({ _4, _5, _3 });
+constant spvUnsafeArray<int, 3> _42 = spvUnsafeArray<int, 3>({ _5, 0, 0 });
+constant spvUnsafeArray<int, 3> _43 = spvUnsafeArray<int, 3>({ _5, _3, 0 });
+constant spvUnsafeArray<int, 3> _44 = spvUnsafeArray<int, 3>({ _5, _3, _4 });
+constant spvUnsafeArray<spvUnsafeArray<int, 3>, 3> _45 = spvUnsafeArray<spvUnsafeArray<int, 3>, 3>({ spvUnsafeArray<int, 3>({ _3, _4, _5 }), spvUnsafeArray<int, 3>({ 0, 0, 0 }), spvUnsafeArray<int, 3>({ 0, 0, 0 }) });
+constant spvUnsafeArray<spvUnsafeArray<int, 3>, 3> _46 = spvUnsafeArray<spvUnsafeArray<int, 3>, 3>({ spvUnsafeArray<int, 3>({ _3, _4, _5 }), spvUnsafeArray<int, 3>({ _4, _5, _3 }), spvUnsafeArray<int, 3>({ 0, 0, 0 }) });
+constant spvUnsafeArray<spvUnsafeArray<int, 3>, 3> _47 = spvUnsafeArray<spvUnsafeArray<int, 3>, 3>({ spvUnsafeArray<int, 3>({ _3, _4, _5 }), spvUnsafeArray<int, 3>({ _4, _5, _3 }), spvUnsafeArray<int, 3>({ _5, _3, _4 }) });
+constant _29 _48 = _29{ spvUnsafeArray<spvUnsafeArray<int, 3>, 3>({ spvUnsafeArray<int, 3>({ _3, _4, _5 }), spvUnsafeArray<int, 3>({ _4, _5, _3 }), spvUnsafeArray<int, 3>({ _5, _3, _4 }) }) };
+constant int _50 = _48._m0[0][0];
+constant int _51 = _48._m0[1][0];
+constant int _52 = _48._m0[0][1];
+constant int _53 = _48._m0[2][2];
+constant int _54 = _48._m0[2][0];
+constant int _55 = _48._m0[1][1];
+constant bool _56 = (_50 == _51);
+constant bool _57 = (_52 == _53);
+constant bool _58 = (_54 == _55);
+constant int _59 = int(_56);
+constant int _60 = int(_57);
+constant int _61 = _58 ? 2 : 1;
+constant int3 _62 = int3(_3, 0, 0);
+constant int3 _63 = int3(0, _4, 0);
+constant int3 _64 = int3(0, 0, _5);
+constant int3 _65 = int3(_62.x, 0, _62.z);
+constant int3 _66 = int3(0, _63.y, _63.x);
+constant int3 _67 = int3(_64.z, 0, _64.z);
+constant int3 _68 = int3(_65.y, _65.x, _66.y);
+constant int3 _69 = int3(_67.z, _68.y, _68.z);
+constant int _70 = _69.x;
+constant int _71 = _69.y;
+constant int _72 = _69.z;
+constant int _73 = (_70 - _71);
+constant int _74 = (_73 * _72);
+
+constant spvUnsafeArray<int, 3> _33 = spvUnsafeArray<int, 3>({ 0, 0, 0 });
+constant spvUnsafeArray<spvUnsafeArray<int, 3>, 3> _34 = spvUnsafeArray<spvUnsafeArray<int, 3>, 3>({ spvUnsafeArray<int, 3>({ 0, 0, 0 }), spvUnsafeArray<int, 3>({ 0, 0, 0 }), spvUnsafeArray<int, 3>({ 0, 0, 0 }) });
+
+kernel void main0(device _7& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    _9._m0[gl_GlobalInvocationID.x] = _8._m0[gl_GlobalInvocationID.x] + ((((1 - _59) * _60) * (_61 - 1)) * _74);
+}
+
diff --git a/reference/opt/shaders-msl/asm/comp/quantize.asm.comp b/reference/opt/shaders-msl/asm/comp/quantize.asm.comp
index 1839ec7a3b8..672c2b20883 100644
--- a/reference/opt/shaders-msl/asm/comp/quantize.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/quantize.asm.comp
@@ -1,8 +1,21 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template <typename F> struct SpvHalfTypeSelector;
+template <> struct SpvHalfTypeSelector<float> { public: using H = half; };
+template<uint N> struct SpvHalfTypeSelector<vec<float, N>> { using H = vec<half, N>; };
+template<typename F, typename H = typename SpvHalfTypeSelector<F>::H>
+[[clang::optnone]] F spvQuantizeToF16(F fval)
+{
+    H hval = H(fval);
+    hval = select(copysign(H(0), hval), hval, isnormal(hval) || isinf(hval) || isnan(hval));
+    return F(hval);
+}
+
 struct SSBO0
 {
     float scalar;
@@ -13,9 +26,9 @@ struct SSBO0
 
 kernel void main0(device SSBO0& _4 [[buffer(0)]])
 {
-    _4.scalar = float(half(_4.scalar));
-    _4.vec2_val = float2(half2(_4.vec2_val));
-    _4.vec3_val = float3(half3(_4.vec3_val));
-    _4.vec4_val = float4(half4(_4.vec4_val));
+    _4.scalar = spvQuantizeToF16(_4.scalar);
+    _4.vec2_val = spvQuantizeToF16(_4.vec2_val);
+    _4.vec3_val = spvQuantizeToF16(_4.vec3_val);
+    _4.vec4_val = spvQuantizeToF16(_4.vec4_val);
 }
 
diff --git a/reference/opt/shaders-msl/asm/comp/uint_smulextended.asm.comp b/reference/opt/shaders-msl/asm/comp/uint_smulextended.asm.comp
new file mode 100644
index 00000000000..6996f7fd26a
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/comp/uint_smulextended.asm.comp
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _4
+{
+    uint _m0[1];
+};
+
+struct _20
+{
+    uint _m0;
+    uint _m1;
+};
+
+kernel void main0(device _4& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]], device _4& _7 [[buffer(2)]], device _4& _8 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    _20 _28;
+    _28._m0 = uint(int(_5._m0[gl_GlobalInvocationID.x]) * int(_6._m0[gl_GlobalInvocationID.x]));
+    _28._m1 = uint(mulhi(int(_5._m0[gl_GlobalInvocationID.x]), int(_6._m0[gl_GlobalInvocationID.x])));
+    _7._m0[gl_GlobalInvocationID.x] = _28._m0;
+    _8._m0[gl_GlobalInvocationID.x] = _28._m1;
+}
+
diff --git a/reference/opt/shaders-msl/asm/comp/undefined-constant-composite.asm.comp b/reference/opt/shaders-msl/asm/comp/undefined-constant-composite.asm.comp
new file mode 100644
index 00000000000..359e8913fc6
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/comp/undefined-constant-composite.asm.comp
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _20
+{
+    int _m0;
+    int _m1;
+};
+
+struct _5
+{
+    int _m0[10];
+};
+
+struct _7
+{
+    int _m0[10];
+};
+
+constant int _28 = {};
+
+kernel void main0(device _5& _6 [[buffer(0)]], device _7& _8 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    _6._m0[gl_GlobalInvocationID.x] = _8._m0[gl_GlobalInvocationID.x] + (_20{ _28, 200 })._m1;
+}
+
diff --git a/reference/opt/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp b/reference/opt/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp
new file mode 100644
index 00000000000..0cb22e1761d
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp
@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _21
+{
+    int _m0;
+    int _m1;
+};
+
+struct _5
+{
+    int _m0[10];
+};
+
+struct _7
+{
+    int _m0[10];
+};
+
+constant int _29 = {};
+constant int _9_tmp [[function_constant(0)]];
+constant int _9 = is_function_constant_defined(_9_tmp) ? _9_tmp : 0;
+constant _21 _30 = _21{ _9, _29 };
+
+kernel void main0(device _5& _6 [[buffer(0)]], device _7& _8 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    _6._m0[gl_GlobalInvocationID.x] = (_8._m0[gl_GlobalInvocationID.x] + _30._m0) + (_21{ _29, 200 })._m1;
+}
+
diff --git a/reference/opt/shaders-msl/asm/comp/variable-pointers-2.asm.comp b/reference/opt/shaders-msl/asm/comp/variable-pointers-2.asm.comp
index b2dfc01b196..a276b400c00 100644
--- a/reference/opt/shaders-msl/asm/comp/variable-pointers-2.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/variable-pointers-2.asm.comp
@@ -17,20 +17,35 @@ struct bar
 
 kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
 {
-    bool _70 = cb.d != 0;
-    device foo* _71 = _70 ? &buf : nullptr;
-    device foo* _67 = _71;
-    device foo* _45 = _71;
-    thread uint3* _77 = _70 ? &gl_GlobalInvocationID : &gl_LocalInvocationID;
-    thread uint3* _73 = _77;
-    for (device int* _52 = &_71->a[0u], * _55 = &buf.a[0u]; (*_52) != (*_55); )
+    bool _71 = cb.d != 0;
+    device foo* _72 = _71 ? &buf : nullptr;
+    device foo* _67 = _72;
+    device foo* _45 = _72;
+    thread uint3* _79 = _71 ? &gl_GlobalInvocationID : &gl_LocalInvocationID;
+    thread uint3* _74 = _79;
+    device int* _52;
+    device int* _55;
+    _52 = &_72->a[0u];
+    _55 = &buf.a[0u];
+    int _57;
+    int _58;
+    for (;;)
     {
-        int _66 = ((*_52) + (*_55)) + int((*_77).x);
-        *_52 = _66;
-        *_55 = _66;
-        _52 = &_52[1u];
-        _55 = &_55[1u];
-        continue;
+        _57 = *_52;
+        _58 = *_55;
+        if (_57 != _58)
+        {
+            int _66 = (_57 + _58) + int((*_79).x);
+            *_52 = _66;
+            *_55 = _66;
+            _52 = &_52[1u];
+            _55 = &_55[1u];
+            continue;
+        }
+        else
+        {
+            break;
+        }
     }
 }
 
diff --git a/reference/opt/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp b/reference/opt/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp
index b4e03a2924f..e1861730f5c 100644
--- a/reference/opt/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp
@@ -15,11 +15,11 @@ struct bar
 
 kernel void main0(device foo& x [[buffer(0)]], device bar& y [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    device int* _46 = (gl_GlobalInvocationID.x != 0u) ? &x.a : &y.b;
-    device int* _40 = _46;
-    device int* _33 = _46;
+    device int* _47 = (gl_GlobalInvocationID.x != 0u) ? &x.a : &y.b;
+    device int* _40 = _47;
+    device int* _33 = _47;
     int _37 = x.a;
-    *_46 = 0;
+    *_47 = 0;
     y.b = _37 + _37;
 }
 
diff --git a/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp b/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp
index 641f108e8a1..afbcadd0b95 100644
--- a/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp
@@ -5,7 +5,7 @@ using namespace metal;
 
 struct cb1_struct
 {
-    float4 _m0[1];
+    float4 _RESERVED_IDENTIFIER_FIXUP_m0[1];
 };
 
 constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 16u, 1u);
@@ -13,14 +13,14 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 16u, 1u);
 kernel void main0(constant cb1_struct& cb0_1 [[buffer(0)]], texture2d<float, access::write> u0 [[texture(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
 {
     int2 _46 = int2(u0.get_width(), u0.get_height()) >> int2(uint2(4u));
-    int _97;
-    _97 = 0;
-    for (; _97 < _46.y; _97++)
+    int _98;
+    _98 = 0;
+    for (; _98 < _46.y; _98++)
     {
-        for (int _98 = 0; _98 < _46.x; )
+        for (int _99 = 0; _99 < _46.x; )
         {
-            u0.write(cb0_1._m0[0].xxxx, uint2(((_46 * int3(gl_LocalInvocationID).xy) + int2(_97, _98))));
-            _98++;
+            u0.write(cb0_1._RESERVED_IDENTIFIER_FIXUP_m0[0].xxxx, uint2(((_46 * int3(gl_LocalInvocationID).xy) + int2(_98, _99))));
+            _99++;
             continue;
         }
     }
diff --git a/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast.asm.comp b/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast.asm.comp
index 7f6d4bd900e..e572525ebd7 100644
--- a/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/vector-builtin-type-cast.asm.comp
@@ -5,7 +5,7 @@ using namespace metal;
 
 struct cb1_struct
 {
-    float4 _m0[1];
+    float4 _RESERVED_IDENTIFIER_FIXUP_m0[1];
 };
 
 constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 16u, 1u);
@@ -19,7 +19,7 @@ kernel void main0(constant cb1_struct& cb0_1 [[buffer(0)]], texture2d<float, acc
     {
         for (int _81 = 0; _81 < _40.x; )
         {
-            u0.write(cb0_1._m0[0].xxxx, uint2(((_40 * int3(gl_LocalInvocationID).xy) + int2(_80, _81))));
+            u0.write(cb0_1._RESERVED_IDENTIFIER_FIXUP_m0[0].xxxx, uint2(((_40 * int3(gl_LocalInvocationID).xy) + int2(_80, _81))));
             _81++;
             continue;
         }
diff --git a/reference/opt/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag b/reference/opt/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag
new file mode 100644
index 00000000000..be9f1331e3e
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag
@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _7
+{
+    float4 _m0[64];
+};
+
+struct main0_out
+{
+    float4 m_3 [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 m_2 [[user(locn1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], device _7& _10 [[buffer(0)]], texture2d<float> _8 [[texture(0)]])
+{
+    main0_out out = {};
+    for (int _154 = 0; _154 < 64; )
+    {
+        _10._m0[_154] = _8.read(uint2(int2(_154 - 8 * (_154 / 8), _154 / 8)), 0);
+        _154++;
+        continue;
+    }
+    out.m_3 = in.m_2;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag b/reference/opt/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag
new file mode 100644
index 00000000000..bbe0acd75b8
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag
@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _7
+{
+    float4 _m0[64];
+};
+
+struct main0_out
+{
+    float4 m_3 [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 m_2 [[user(locn1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], device _7& _10 [[buffer(0)]], texture2d<float> _8 [[texture(0)]], sampler _9 [[sampler(0)]])
+{
+    main0_out out = {};
+    for (int _158 = 0; _158 < 64; )
+    {
+        _10._m0[_158] = _8.sample(_9, (float2(int2(_158 - 8 * (_158 / 8), _158 / 8)) * float2(0.125)), level(0.0));
+        _158++;
+        continue;
+    }
+    out.m_3 = in.m_2;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/descriptor-array-unnamed.asm.frag b/reference/opt/shaders-msl/asm/frag/descriptor-array-unnamed.asm.frag
index 1870f67194e..58f02ad0726 100644
--- a/reference/opt/shaders-msl/asm/frag/descriptor-array-unnamed.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/descriptor-array-unnamed.asm.frag
@@ -42,7 +42,7 @@ fragment main0_out main0(const device _4* _5_0 [[buffer(0)]], const device _4* _
     };
 
     main0_out out = {};
-    out.m_3 = _5[_20._m0]->_m0 + (_8[_20._m0]->_m0 * float4(0.20000000298023223876953125));
+    out.m_3 = fma(_8[_20._m0]->_m0, float4(0.20000000298023223876953125), _5[_20._m0]->_m0);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag b/reference/opt/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag
new file mode 100644
index 00000000000..b64ccabe6bc
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 o1 [[color(1)]];
+    float4 o3 [[color(3)]];
+    float4 o6 [[color(6)]];
+    float4 o7 [[color(7)]];
+};
+
+fragment main0_out main0()
+{
+    float4 o0;
+    float4 o2;
+    float4 o4;
+    float4 o5;
+    float gl_FragDepth;
+    int gl_FragStencilRefARB;
+    main0_out out = {};
+    o0 = float4(0.0, 0.0, 0.0, 1.0);
+    out.o1 = float4(1.0, 0.0, 0.0, 1.0);
+    o2 = float4(0.0, 1.0, 0.0, 1.0);
+    out.o3 = float4(0.0, 0.0, 1.0, 1.0);
+    o4 = float4(1.0, 0.0, 1.0, 0.5);
+    o5 = float4(0.25);
+    out.o6 = float4(0.75);
+    out.o7 = float4(1.0);
+    gl_FragDepth = 0.89999997615814208984375;
+    gl_FragStencilRefARB = uint(127);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag b/reference/opt/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag
index 649f9f6f187..09f8ed8c0d6 100644
--- a/reference/opt/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag
@@ -23,8 +23,8 @@ struct main0_out
 fragment main0_out main0(constant buf& _11 [[buffer(0)]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
-    int _67 = int(gl_FragCoord.x) % 16;
-    out._entryPointOutput = float4(dot(float3(_11.results[_67].a), _11.bar.xyz), _11.results[_67].b, 0.0, 0.0);
+    int _68 = int(gl_FragCoord.x) % 16;
+    out._entryPointOutput = float4(dot(float3(_11.results[_68].a), _11.bar.xyz), _11.results[_68].b, 0.0, 0.0);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/asm/frag/inf-nan-constant.asm.frag b/reference/opt/shaders-msl/asm/frag/inf-nan-constant.asm.frag
index 8537dac19a1..067719896b8 100644
--- a/reference/opt/shaders-msl/asm/frag/inf-nan-constant.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/inf-nan-constant.asm.frag
@@ -11,7 +11,7 @@ struct main0_out
 fragment main0_out main0()
 {
     main0_out out = {};
-    out.FragColor = float3(as_type<float>(0x7f800000u), as_type<float>(0xff800000u), as_type<float>(0x7fc00000u));
+    out.FragColor = float3(as_type<float>(0x7f800000u /* inf */), as_type<float>(0xff800000u /* -inf */), as_type<float>(0x7fc00000u /* nan */));
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag b/reference/opt/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag
index 41472adac94..fe49e09aa4e 100644
--- a/reference/opt/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag
@@ -21,27 +21,27 @@ struct main0_out
 
 struct main0_in
 {
-    float2 Input_v0 [[user(locn0)]];
-    float2 Input_v1 [[user(locn1), center_no_perspective]];
-    float3 Input_v2 [[user(locn2), centroid_perspective]];
-    float4 Input_v3 [[user(locn3), centroid_no_perspective]];
-    float Input_v4 [[user(locn4), sample_perspective]];
-    float Input_v5 [[user(locn5), sample_no_perspective]];
-    float Input_v6 [[user(locn6), flat]];
+    float2 inp_v0 [[user(locn0)]];
+    float2 inp_v1 [[user(locn1), center_no_perspective]];
+    float3 inp_v2 [[user(locn2), centroid_perspective]];
+    float4 inp_v3 [[user(locn3), centroid_no_perspective]];
+    float inp_v4 [[user(locn4), sample_perspective]];
+    float inp_v5 [[user(locn5), sample_no_perspective]];
+    float inp_v6 [[user(locn6), flat]];
 };
 
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
     Input inp = {};
-    inp.v0 = in.Input_v0;
-    inp.v1 = in.Input_v1;
-    inp.v2 = in.Input_v2;
-    inp.v3 = in.Input_v3;
-    inp.v4 = in.Input_v4;
-    inp.v5 = in.Input_v5;
-    inp.v6 = in.Input_v6;
-    out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, ((inp.v3.w * inp.v4) + inp.v5) - inp.v6);
+    inp.v0 = in.inp_v0;
+    inp.v1 = in.inp_v1;
+    inp.v2 = in.inp_v2;
+    inp.v3 = in.inp_v3;
+    inp.v4 = in.inp_v4;
+    inp.v5 = in.inp_v5;
+    inp.v6 = in.inp_v6;
+    out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, fma(inp.v3.w, inp.v4, inp.v5) - inp.v6);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/asm/frag/line-directive.line.asm.frag b/reference/opt/shaders-msl/asm/frag/line-directive.line.asm.frag
index 30018aad4c9..27b7d4771f7 100644
--- a/reference/opt/shaders-msl/asm/frag/line-directive.line.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/line-directive.line.asm.frag
@@ -17,14 +17,12 @@ struct main0_in
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    float _80;
 #line 8 "test.frag"
     out.FragColor = 1.0;
 #line 9 "test.frag"
     out.FragColor = 2.0;
 #line 10 "test.frag"
-    _80 = in.vColor;
-    if (_80 < 0.0)
+    if (in.vColor < 0.0)
     {
 #line 12 "test.frag"
         out.FragColor = 3.0;
@@ -34,16 +32,19 @@ fragment main0_out main0(main0_in in [[stage_in]])
 #line 16 "test.frag"
         out.FragColor = 4.0;
     }
-    for (int _126 = 0; float(_126) < (40.0 + _80); )
+#line 19 "test.frag"
+    for (int _127 = 0; float(_127) < (40.0 + in.vColor); )
     {
 #line 21 "test.frag"
         out.FragColor += 0.20000000298023223876953125;
 #line 22 "test.frag"
         out.FragColor += 0.300000011920928955078125;
-        _126 += (int(_80) + 5);
+#line 19 "test.frag"
+        _127 += (int(in.vColor) + 5);
         continue;
     }
-    switch (int(_80))
+#line 25 "test.frag"
+    switch (int(in.vColor))
     {
         case 0:
         {
@@ -69,7 +70,8 @@ fragment main0_out main0(main0_in in [[stage_in]])
     }
     for (;;)
     {
-        out.FragColor += (10.0 + _80);
+#line 42 "test.frag"
+        out.FragColor += (10.0 + in.vColor);
 #line 43 "test.frag"
         if (out.FragColor < 100.0)
         {
@@ -79,6 +81,7 @@ fragment main0_out main0(main0_in in [[stage_in]])
             break;
         }
     }
+#line 48 "test.frag"
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag b/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag
index 610d447a8d7..726976631ac 100644
--- a/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag
@@ -1,13 +1,52 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
-constant float _46[16] = { 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 };
-constant float4 _76[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) };
-constant float4 _90[4] = { float4(20.0), float4(30.0), float4(50.0), float4(60.0) };
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 16> _46 = spvUnsafeArray<float, 16>({ 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 });
+constant spvUnsafeArray<float4, 4> _76 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) });
+constant spvUnsafeArray<float4, 4> _90 = spvUnsafeArray<float4, 4>({ float4(20.0), float4(30.0), float4(50.0), float4(60.0) });
 
 struct main0_out
 {
@@ -19,23 +58,10 @@ struct main0_in
     int index [[user(locn0)]];
 };
 
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
 fragment main0_out main0(main0_in in [[stage_in]])
 {
-    float4 foobar[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) };
-    float4 baz[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) };
+    spvUnsafeArray<float4, 4> foobar = spvUnsafeArray<float4, 4>({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) });
+    spvUnsafeArray<float4, 4> baz = spvUnsafeArray<float4, 4>({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) });
     main0_out out = {};
     out.FragColor = _46[in.index];
     if (in.index < 10)
@@ -61,7 +87,7 @@ fragment main0_out main0(main0_in in [[stage_in]])
     }
     int _37 = in.index & 3;
     out.FragColor += foobar[_37].z;
-    spvArrayCopyFromConstant1(baz, _90);
+    baz = _90;
     out.FragColor += baz[_37].z;
     return out;
 }
diff --git a/reference/opt/shaders-msl/asm/frag/op-image-sampled-image.asm.frag b/reference/opt/shaders-msl/asm/frag/op-image-sampled-image.asm.frag
index 45f0ca52f4a..807fde3f49c 100644
--- a/reference/opt/shaders-msl/asm/frag/op-image-sampled-image.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/op-image-sampled-image.asm.frag
@@ -16,7 +16,7 @@ struct main0_out
 fragment main0_out main0(constant push_cb& _19 [[buffer(0)]], texture2d<float> t0 [[texture(0)]], sampler dummy_sampler [[sampler(0)]])
 {
     main0_out out = {};
-    out.o0 = t0.read(uint2(as_type<int2>(_19.cb0[0u].zw)) + uint2(int2(-1, -2)), as_type<int>(0.0));
+    out.o0 = t0.read(uint2(as_type<int2>(_19.cb0[0u].zw)) + uint2(int2(-1, -2)), 0);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag b/reference/opt/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag
new file mode 100644
index 00000000000..6a6b1622d20
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag
@@ -0,0 +1,181 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _13
+{
+    float4 x;
+    float4 y;
+    float4 z;
+    spvUnsafeArray<float4, 2> u;
+    spvUnsafeArray<float2, 2> v;
+    spvUnsafeArray<float, 3> w;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    interpolant<float4, interpolation::no_perspective> foo [[user(locn0)]];
+    interpolant<float3, interpolation::perspective> bar [[user(locn1)]];
+    interpolant<float2, interpolation::perspective> baz [[user(locn2)]];
+    int sid [[user(locn3)]];
+    interpolant<float2, interpolation::perspective> a_0 [[user(locn4)]];
+    interpolant<float2, interpolation::perspective> a_1 [[user(locn5)]];
+    interpolant<float2, interpolation::perspective> b_0 [[user(locn6)]];
+    interpolant<float2, interpolation::perspective> b_1 [[user(locn7)]];
+    interpolant<float2, interpolation::perspective> c_0 [[user(locn8)]];
+    interpolant<float2, interpolation::perspective> c_1 [[user(locn9)]];
+    interpolant<float4, interpolation::perspective> s_x [[user(locn10)]];
+    interpolant<float4, interpolation::no_perspective> s_y [[user(locn11)]];
+    interpolant<float4, interpolation::perspective> s_z [[user(locn12)]];
+    interpolant<float4, interpolation::perspective> s_u_0 [[user(locn13)]];
+    interpolant<float4, interpolation::perspective> s_u_1 [[user(locn14)]];
+    interpolant<float2, interpolation::no_perspective> s_v_0 [[user(locn15)]];
+    interpolant<float2, interpolation::no_perspective> s_v_1 [[user(locn16)]];
+    interpolant<float, interpolation::perspective> s_w_0 [[user(locn17)]];
+    interpolant<float, interpolation::perspective> s_w_1 [[user(locn18)]];
+    interpolant<float, interpolation::perspective> s_w_2 [[user(locn19)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float2, 2> a = {};
+    _13 s = {};
+    spvUnsafeArray<float2, 2> b = {};
+    spvUnsafeArray<float2, 2> c = {};
+    a[0] = in.a_0.interpolate_at_center();
+    a[1] = in.a_1.interpolate_at_center();
+    s.x = in.s_x.interpolate_at_center();
+    s.y = in.s_y.interpolate_at_centroid();
+    s.z = in.s_z.interpolate_at_sample(gl_SampleID);
+    s.u[0] = in.s_u_0.interpolate_at_centroid();
+    s.u[1] = in.s_u_1.interpolate_at_centroid();
+    s.v[0] = in.s_v_0.interpolate_at_sample(gl_SampleID);
+    s.v[1] = in.s_v_1.interpolate_at_sample(gl_SampleID);
+    s.w[0] = in.s_w_0.interpolate_at_center();
+    s.w[1] = in.s_w_1.interpolate_at_center();
+    s.w[2] = in.s_w_2.interpolate_at_center();
+    b[0] = in.b_0.interpolate_at_centroid();
+    b[1] = in.b_1.interpolate_at_centroid();
+    c[0] = in.c_0.interpolate_at_sample(gl_SampleID);
+    c[1] = in.c_1.interpolate_at_sample(gl_SampleID);
+    out.FragColor = in.foo.interpolate_at_center();
+    out.FragColor += in.foo.interpolate_at_centroid();
+    out.FragColor += in.foo.interpolate_at_sample(in.sid);
+    out.FragColor += in.foo.interpolate_at_offset(float2(0.100000001490116119384765625) + 0.4375);
+    float3 _65 = out.FragColor.xyz + in.bar.interpolate_at_centroid();
+    out.FragColor = float4(_65.x, _65.y, _65.z, out.FragColor.w);
+    float3 _71 = out.FragColor.xyz + in.bar.interpolate_at_centroid();
+    out.FragColor = float4(_71.x, _71.y, _71.z, out.FragColor.w);
+    float3 _78 = out.FragColor.xyz + in.bar.interpolate_at_sample(in.sid);
+    out.FragColor = float4(_78.x, _78.y, _78.z, out.FragColor.w);
+    float3 _84 = out.FragColor.xyz + in.bar.interpolate_at_offset(float2(-0.100000001490116119384765625) + 0.4375);
+    out.FragColor = float4(_84.x, _84.y, _84.z, out.FragColor.w);
+    float2 _91 = out.FragColor.xy + b[0];
+    out.FragColor = float4(_91.x, _91.y, out.FragColor.z, out.FragColor.w);
+    float2 _98 = out.FragColor.xy + in.b_1.interpolate_at_centroid();
+    out.FragColor = float4(_98.x, _98.y, out.FragColor.z, out.FragColor.w);
+    float2 _105 = out.FragColor.xy + in.b_0.interpolate_at_sample(2);
+    out.FragColor = float4(_105.x, _105.y, out.FragColor.z, out.FragColor.w);
+    float2 _112 = out.FragColor.xy + in.b_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375);
+    out.FragColor = float4(_112.x, _112.y, out.FragColor.z, out.FragColor.w);
+    float2 _119 = out.FragColor.xy + c[0];
+    out.FragColor = float4(_119.x, _119.y, out.FragColor.z, out.FragColor.w);
+    float2 _127 = out.FragColor.xy + in.c_1.interpolate_at_centroid().xy;
+    out.FragColor = float4(_127.x, _127.y, out.FragColor.z, out.FragColor.w);
+    float2 _135 = out.FragColor.xy + in.c_0.interpolate_at_sample(2).yx;
+    out.FragColor = float4(_135.x, _135.y, out.FragColor.z, out.FragColor.w);
+    float2 _143 = out.FragColor.xy + in.c_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375).xx;
+    out.FragColor = float4(_143.x, _143.y, out.FragColor.z, out.FragColor.w);
+    out.FragColor += s.x;
+    out.FragColor += in.s_x.interpolate_at_centroid();
+    out.FragColor += in.s_x.interpolate_at_sample(in.sid);
+    out.FragColor += in.s_x.interpolate_at_offset(float2(0.100000001490116119384765625) + 0.4375);
+    out.FragColor += s.y;
+    out.FragColor += in.s_y.interpolate_at_centroid();
+    out.FragColor += in.s_y.interpolate_at_sample(in.sid);
+    out.FragColor += in.s_y.interpolate_at_offset(float2(-0.100000001490116119384765625) + 0.4375);
+    float2 _184 = out.FragColor.xy + s.v[0];
+    out.FragColor = float4(_184.x, _184.y, out.FragColor.z, out.FragColor.w);
+    float2 _191 = out.FragColor.xy + in.s_v_1.interpolate_at_centroid();
+    out.FragColor = float4(_191.x, _191.y, out.FragColor.z, out.FragColor.w);
+    float2 _198 = out.FragColor.xy + in.s_v_0.interpolate_at_sample(2);
+    out.FragColor = float4(_198.x, _198.y, out.FragColor.z, out.FragColor.w);
+    float2 _205 = out.FragColor.xy + in.s_v_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375);
+    out.FragColor = float4(_205.x, _205.y, out.FragColor.z, out.FragColor.w);
+    out.FragColor.x += s.w[0];
+    out.FragColor.x += in.s_w_1.interpolate_at_centroid();
+    out.FragColor.x += in.s_w_0.interpolate_at_sample(2);
+    out.FragColor.x += in.s_w_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375);
+    float2 _328 = out.FragColor.xy + in.baz.interpolate_at_sample(gl_SampleID);
+    out.FragColor = float4(_328.x, _328.y, out.FragColor.z, out.FragColor.w);
+    out.FragColor.x += in.baz.interpolate_at_centroid().x;
+    out.FragColor.y += in.baz.interpolate_at_sample(3).y;
+    out.FragColor.z += in.baz.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375).y;
+    float2 _353 = out.FragColor.xy + in.a_1.interpolate_at_centroid();
+    out.FragColor = float4(_353.x, _353.y, out.FragColor.z, out.FragColor.w);
+    float2 _360 = out.FragColor.xy + in.a_0.interpolate_at_sample(2);
+    out.FragColor = float4(_360.x, _360.y, out.FragColor.z, out.FragColor.w);
+    float2 _367 = out.FragColor.xy + in.a_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375);
+    out.FragColor = float4(_367.x, _367.y, out.FragColor.z, out.FragColor.w);
+    out.FragColor += s.z;
+    float2 _379 = out.FragColor.xy + in.s_z.interpolate_at_centroid().yy;
+    out.FragColor = float4(_379.x, _379.y, out.FragColor.z, out.FragColor.w);
+    float2 _387 = out.FragColor.yz + in.s_z.interpolate_at_sample(3).xy;
+    out.FragColor = float4(out.FragColor.x, _387.x, _387.y, out.FragColor.w);
+    float2 _395 = out.FragColor.zw + in.s_z.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375).wx;
+    out.FragColor = float4(out.FragColor.x, out.FragColor.y, _395.x, _395.y);
+    out.FragColor += s.u[0];
+    out.FragColor += in.s_u_1.interpolate_at_centroid();
+    out.FragColor += in.s_u_0.interpolate_at_sample(2);
+    out.FragColor += in.s_u_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/single-function-private-lut.asm.frag b/reference/opt/shaders-msl/asm/frag/single-function-private-lut.asm.frag
index 4081c3d89ab..6ae5ec7844f 100644
--- a/reference/opt/shaders-msl/asm/frag/single-function-private-lut.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/single-function-private-lut.asm.frag
@@ -1,31 +1,70 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
+template<typename Tx, typename Ty>
+inline Tx mod(Tx x, Ty y)
+{
+    return x - y * floor(x / y);
+}
+
 struct myType
 {
     float data;
 };
 
-constant myType _21[5] = { myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 } };
-
 struct main0_out
 {
     float4 o_color [[color(0)]];
 };
 
-// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
-template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
-{
-    return x - y * floor(x / y);
-}
-
 fragment main0_out main0(float4 gl_FragCoord [[position]])
 {
+    spvUnsafeArray<myType, 5> _21 = spvUnsafeArray<myType, 5>({ myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 } });
+    
     main0_out out = {};
     if (_21[int(mod(gl_FragCoord.x, 4.0))].data > 0.0)
     {
diff --git a/reference/opt/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag b/reference/opt/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag
index 1bafc6953ba..d59013daaf8 100644
--- a/reference/opt/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag
@@ -1,9 +1,50 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
-constant float4 _20[2] = { float4(1.0, 2.0, 3.0, 4.0), float4(10.0) };
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float4, 2> _20 = spvUnsafeArray<float4, 2>({ float4(1.0, 2.0, 3.0, 4.0), float4(10.0) });
 
 struct main0_out
 {
@@ -15,7 +56,7 @@ struct main0_out
 fragment main0_out main0()
 {
     main0_out out = {};
-    float4 FragColors[2] = { float4(1.0, 2.0, 3.0, 4.0), float4(10.0) };
+    spvUnsafeArray<float4, 2> FragColors = spvUnsafeArray<float4, 2>({ float4(1.0, 2.0, 3.0, 4.0), float4(10.0) });
     out.FragColor = float4(5.0);
     out.FragColors_0 = FragColors[0];
     out.FragColors_1 = FragColors[1];
diff --git a/reference/opt/shaders-msl/asm/frag/min-max-clamp.invalid.asm.frag b/reference/opt/shaders-msl/asm/frag/switch-different-sizes.asm.frag
similarity index 100%
rename from reference/opt/shaders-msl/asm/frag/min-max-clamp.invalid.asm.frag
rename to reference/opt/shaders-msl/asm/frag/switch-different-sizes.asm.frag
diff --git a/reference/opt/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag b/reference/opt/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag
new file mode 100644
index 00000000000..92ac1d9f832
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag
@@ -0,0 +1,9 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+fragment void main0()
+{
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag b/reference/opt/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag
new file mode 100644
index 00000000000..92ac1d9f832
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag
@@ -0,0 +1,9 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+fragment void main0()
+{
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.frag b/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.frag
new file mode 100644
index 00000000000..ab5be649849
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.frag
@@ -0,0 +1,121 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_StructuredBuffer_v4float
+{
+    spvUnsafeArray<float4, 1> _m0;
+};
+
+struct type_Globals
+{
+    uint2 ShadowTileListGroupSize;
+};
+
+constant float3 _70 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    uint in_var_TEXCOORD0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    uint2 _77 = uint2(gl_FragCoord.xy);
+    uint _78 = _77.y;
+    uint _83 = _77.x;
+    float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78));
+    float2 _93 = float2(_Globals.ShadowTileListGroupSize);
+    float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
+    float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
+    float3 _102 = float3(_100.x, _100.y, _70.z);
+    _102.z = 1.0;
+    uint _103 = in.in_var_TEXCOORD0 * 5u;
+    uint _107 = _103 + 1u;
+    if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102))
+    {
+        float _122 = _96.x;
+        float _123 = _96.y;
+        spvUnsafeArray<float3, 8> _73;
+        _73[0] = float3(_122, _123, -1000.0);
+        float _126 = _100.x;
+        _73[1] = float3(_126, _123, -1000.0);
+        float _129 = _100.y;
+        _73[2] = float3(_122, _129, -1000.0);
+        _73[3] = float3(_126, _129, -1000.0);
+        _73[4] = float3(_122, _123, 1.0);
+        _73[5] = float3(_126, _123, 1.0);
+        _73[6] = float3(_122, _129, 1.0);
+        _73[7] = float3(_126, _129, 1.0);
+        float3 _155;
+        float3 _158;
+        _155 = float3(-500000.0);
+        _158 = float3(500000.0);
+        for (int _160 = 0; _160 < 8; )
+        {
+            float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz));
+            float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
+            _155 = fast::max(_155, _170);
+            _158 = fast::min(_158, _170);
+            _160++;
+            continue;
+        }
+        if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0)))
+        {
+            uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
+        }
+    }
+    out.out_var_SV_Target0 = float4(0.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.graphics-robust-access.frag b/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.graphics-robust-access.frag
new file mode 100644
index 00000000000..ca5e3eadb70
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/texture-atomics.asm.graphics-robust-access.frag
@@ -0,0 +1,122 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_StructuredBuffer_v4float
+{
+    spvUnsafeArray<float4, 1> _m0;
+};
+
+struct type_Globals
+{
+    uint2 ShadowTileListGroupSize;
+};
+
+constant float3 _70 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    uint in_var_TEXCOORD0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvBufferSizeConstants [[buffer(25)]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    constant uint& CulledObjectBoxBoundsBufferSize = spvBufferSizeConstants[0];
+    uint2 _77 = uint2(gl_FragCoord.xy);
+    uint _78 = _77.y;
+    uint _83 = _77.x;
+    float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78));
+    float2 _93 = float2(_Globals.ShadowTileListGroupSize);
+    float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
+    float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
+    float3 _102 = float3(_100.x, _100.y, _70.z);
+    _102.z = 1.0;
+    uint _103 = in.in_var_TEXCOORD0 * 5u;
+    uint _186 = clamp(_103 + 1u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u);
+    if (all(CulledObjectBoxBounds._m0[_186].xy > _96.xy) && all(CulledObjectBoxBounds._m0[clamp(_103, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz < _102))
+    {
+        float _122 = _96.x;
+        float _123 = _96.y;
+        spvUnsafeArray<float3, 8> _73;
+        _73[0] = float3(_122, _123, -1000.0);
+        float _126 = _100.x;
+        _73[1] = float3(_126, _123, -1000.0);
+        float _129 = _100.y;
+        _73[2] = float3(_122, _129, -1000.0);
+        _73[3] = float3(_126, _129, -1000.0);
+        _73[4] = float3(_122, _123, 1.0);
+        _73[5] = float3(_126, _123, 1.0);
+        _73[6] = float3(_122, _129, 1.0);
+        _73[7] = float3(_126, _129, 1.0);
+        float3 _155;
+        float3 _158;
+        _155 = float3(-500000.0);
+        _158 = float3(500000.0);
+        for (int _160 = 0; _160 < 8; )
+        {
+            float3 _166 = _73[int(clamp(uint(_160), uint(0), uint(7)))] - (float3(0.5) * (CulledObjectBoxBounds._m0[clamp(_103, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz + CulledObjectBoxBounds._m0[_186].xyz));
+            float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[clamp(_103 + 2u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz), dot(_166, CulledObjectBoxBounds._m0[clamp(_103 + 3u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz), dot(_166, CulledObjectBoxBounds._m0[clamp(_103 + 4u, 0u, ((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u)].xyz));
+            _155 = fast::max(_155, _170);
+            _158 = fast::min(_158, _170);
+            _160++;
+            continue;
+        }
+        if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0)))
+        {
+            uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
+        }
+    }
+    out.out_var_SV_Target0 = float4(0.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
new file mode 100644
index 00000000000..aee290f5a2f
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant float a_tmp [[function_constant(1)]];
+constant float a = is_function_constant_defined(a_tmp) ? a_tmp : 1.0;
+constant float b_tmp [[function_constant(2)]];
+constant float b = is_function_constant_defined(b_tmp) ? b_tmp : 2.0;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float4(a + b);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag b/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
index a1a73ced2bd..2031b335d48 100644
--- a/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
+++ b/reference/opt/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
@@ -88,8 +88,6 @@ struct _18
     float4 _m38[2];
 };
 
-constant _28 _74 = {};
-
 struct main0_out
 {
     float4 m_5 [[color(0)]];
@@ -98,11 +96,10 @@ struct main0_out
 fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buffer(1)]], constant _18& _19 [[buffer(2)]], texture2d<float> _8 [[texture(0)]], texture2d<float> _12 [[texture(1)]], texture2d<float> _14 [[texture(2)]], sampler _9 [[sampler(0)]], sampler _13 [[sampler(1)]], sampler _15 [[sampler(2)]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
-    float2 _82 = gl_FragCoord.xy * _19._m23.xy;
     float4 _88 = _7._m2 * _7._m0.xyxy;
     float2 _95 = _88.xy;
     float2 _96 = _88.zw;
-    float2 _97 = fast::clamp(_82 + (float2(0.0, -2.0) * _7._m0.xy), _95, _96);
+    float2 _97 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(0.0, -2.0) * _7._m0.xy), _95, _96);
     float3 _109 = float3(_11._m5) * fast::clamp(_8.sample(_9, _97, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _113 = _12.sample(_13, _97, level(0.0));
     float _114 = _113.y;
@@ -115,8 +112,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _129 = _109;
     }
-    float3 _130 = _129 * 0.5;
-    float2 _144 = fast::clamp(_82 + (float2(-1.0) * _7._m0.xy), _95, _96);
+    float2 _144 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(-1.0) * _7._m0.xy), _95, _96);
     float3 _156 = float3(_11._m5) * fast::clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _160 = _12.sample(_13, _144, level(0.0));
     float _161 = _160.y;
@@ -129,8 +125,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _176 = _156;
     }
-    float3 _177 = _176 * 0.5;
-    float2 _191 = fast::clamp(_82 + (float2(0.0, -1.0) * _7._m0.xy), _95, _96);
+    float2 _191 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(0.0, -1.0) * _7._m0.xy), _95, _96);
     float3 _203 = float3(_11._m5) * fast::clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _207 = _12.sample(_13, _191, level(0.0));
     float _208 = _207.y;
@@ -143,8 +138,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _223 = _203;
     }
-    float3 _224 = _223 * 0.75;
-    float2 _238 = fast::clamp(_82 + (float2(1.0, -1.0) * _7._m0.xy), _95, _96);
+    float2 _238 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(1.0, -1.0) * _7._m0.xy), _95, _96);
     float3 _250 = float3(_11._m5) * fast::clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _254 = _12.sample(_13, _238, level(0.0));
     float _255 = _254.y;
@@ -157,8 +151,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _270 = _250;
     }
-    float3 _271 = _270 * 0.5;
-    float2 _285 = fast::clamp(_82 + (float2(-2.0, 0.0) * _7._m0.xy), _95, _96);
+    float2 _285 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(-2.0, 0.0) * _7._m0.xy), _95, _96);
     float3 _297 = float3(_11._m5) * fast::clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _301 = _12.sample(_13, _285, level(0.0));
     float _302 = _301.y;
@@ -171,8 +164,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _317 = _297;
     }
-    float3 _318 = _317 * 0.5;
-    float2 _332 = fast::clamp(_82 + (float2(-1.0, 0.0) * _7._m0.xy), _95, _96);
+    float2 _332 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(-1.0, 0.0) * _7._m0.xy), _95, _96);
     float3 _344 = float3(_11._m5) * fast::clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _348 = _12.sample(_13, _332, level(0.0));
     float _349 = _348.y;
@@ -185,8 +177,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _364 = _344;
     }
-    float3 _365 = _364 * 0.75;
-    float2 _379 = fast::clamp(_82, _95, _96);
+    float2 _379 = fast::clamp(gl_FragCoord.xy * _19._m23.xy, _95, _96);
     float3 _391 = float3(_11._m5) * fast::clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _395 = _12.sample(_13, _379, level(0.0));
     float _396 = _395.y;
@@ -199,8 +190,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _411 = _391;
     }
-    float3 _412 = _411 * 1.0;
-    float2 _426 = fast::clamp(_82 + (float2(1.0, 0.0) * _7._m0.xy), _95, _96);
+    float2 _426 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(1.0, 0.0) * _7._m0.xy), _95, _96);
     float3 _438 = float3(_11._m5) * fast::clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _442 = _12.sample(_13, _426, level(0.0));
     float _443 = _442.y;
@@ -213,8 +203,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _458 = _438;
     }
-    float3 _459 = _458 * 0.75;
-    float2 _473 = fast::clamp(_82 + (float2(2.0, 0.0) * _7._m0.xy), _95, _96);
+    float2 _473 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(2.0, 0.0) * _7._m0.xy), _95, _96);
     float3 _485 = float3(_11._m5) * fast::clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _489 = _12.sample(_13, _473, level(0.0));
     float _490 = _489.y;
@@ -227,8 +216,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _505 = _485;
     }
-    float3 _506 = _505 * 0.5;
-    float2 _520 = fast::clamp(_82 + (float2(-1.0, 1.0) * _7._m0.xy), _95, _96);
+    float2 _520 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(-1.0, 1.0) * _7._m0.xy), _95, _96);
     float3 _532 = float3(_11._m5) * fast::clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _536 = _12.sample(_13, _520, level(0.0));
     float _537 = _536.y;
@@ -241,8 +229,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _552 = _532;
     }
-    float3 _553 = _552 * 0.5;
-    float2 _567 = fast::clamp(_82 + (float2(0.0, 1.0) * _7._m0.xy), _95, _96);
+    float2 _567 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(0.0, 1.0) * _7._m0.xy), _95, _96);
     float3 _579 = float3(_11._m5) * fast::clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _583 = _12.sample(_13, _567, level(0.0));
     float _584 = _583.y;
@@ -255,8 +242,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _599 = _579;
     }
-    float3 _600 = _599 * 0.75;
-    float2 _614 = fast::clamp(_82 + _7._m0.xy, _95, _96);
+    float2 _614 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, _7._m0.xy), _95, _96);
     float3 _626 = float3(_11._m5) * fast::clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _630 = _12.sample(_13, _614, level(0.0));
     float _631 = _630.y;
@@ -269,8 +255,7 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _646 = _626;
     }
-    float3 _647 = _646 * 0.5;
-    float2 _661 = fast::clamp(_82 + (float2(0.0, 2.0) * _7._m0.xy), _95, _96);
+    float2 _661 = fast::clamp(fma(gl_FragCoord.xy, _19._m23.xy, float2(0.0, 2.0) * _7._m0.xy), _95, _96);
     float3 _673 = float3(_11._m5) * fast::clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _677 = _12.sample(_13, _661, level(0.0));
     float _678 = _677.y;
@@ -283,12 +268,10 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _693 = _673;
     }
-    float3 _702 = ((((((((((((_130.xyz + _177).xyz + _224).xyz + _271).xyz + _318).xyz + _365).xyz + _412).xyz + _459).xyz + _506).xyz + _553).xyz + _600).xyz + _647).xyz + (_693 * 0.5)).xyz * float3(0.125);
-    _28 _704 = _74;
-    _704._m0 = float4(_702.x, _702.y, _702.z, float4(0.0).w);
-    _28 _705 = _704;
-    _705._m0.w = 1.0;
-    out.m_5 = _705._m0;
+    float3 _702 = (((((((((((((_129 * 0.5).xyz + (_176 * 0.5)).xyz + (_223 * 0.75)).xyz + (_270 * 0.5)).xyz + (_317 * 0.5)).xyz + (_364 * 0.75)).xyz + (_411 * 1.0)).xyz + (_458 * 0.75)).xyz + (_505 * 0.5)).xyz + (_552 * 0.5)).xyz + (_599 * 0.75)).xyz + (_646 * 0.5)).xyz + (_693 * 0.5)).xyz * float3(0.125);
+    _28 _750 = _28{ float4(_702.x, _702.y, _702.z, float4(0.0).w) };
+    _750._m0.w = 1.0;
+    out.m_5 = _750._m0;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
deleted file mode 100644
index bbda7be5bd1..00000000000
--- a/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
+++ /dev/null
@@ -1,73 +0,0 @@
-#pragma clang diagnostic ignored "-Wmissing-prototypes"
-
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct VertexOutput
-{
-    float4 pos;
-    float2 uv;
-};
-
-struct VertexOutput_1
-{
-    float2 uv;
-};
-
-struct HSOut
-{
-    float2 uv;
-};
-
-struct main0_out
-{
-    HSOut _entryPointOutput;
-    float4 gl_Position;
-};
-
-struct main0_in
-{
-    float2 VertexOutput_uv [[attribute(0)]];
-    float4 gl_Position [[attribute(1)]];
-};
-
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
-{
-    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
-    if (gl_InvocationID < spvIndirectParams[0])
-        gl_in[gl_InvocationID] = in;
-    threadgroup_barrier(mem_flags::mem_threadgroup);
-    if (gl_InvocationID >= 3)
-        return;
-    VertexOutput _223[3] = { VertexOutput{ gl_in[0].gl_Position, gl_in[0].VertexOutput_uv }, VertexOutput{ gl_in[1].gl_Position, gl_in[1].VertexOutput_uv }, VertexOutput{ gl_in[2].gl_Position, gl_in[2].VertexOutput_uv } };
-    VertexOutput param[3];
-    spvArrayCopyFromStack1(param, _223);
-    gl_out[gl_InvocationID].gl_Position = param[gl_InvocationID].pos;
-    gl_out[gl_InvocationID]._entryPointOutput.uv = param[gl_InvocationID].uv;
-    threadgroup_barrier(mem_flags::mem_device);
-    if (int(gl_InvocationID) == 0)
-    {
-        float2 _174 = float2(1.0) + gl_in[0].VertexOutput_uv;
-        float _175 = _174.x;
-        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_175);
-        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_175);
-        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_175);
-        spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_175);
-    }
-}
-
diff --git a/reference/opt/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc b/reference/opt/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc
new file mode 100644
index 00000000000..79395a4bbb2
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct TessLevels
+{
+    float inner0;
+    float inner1;
+    float outer0;
+    float outer1;
+    float outer2;
+    float outer3;
+};
+
+kernel void main0(const device TessLevels& sb_levels [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(sb_levels.inner0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(sb_levels.outer0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(sb_levels.outer1);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(sb_levels.outer2);
+}
+
diff --git a/reference/opt/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese b/reference/opt/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese
index 83ef729321e..bfa96f9cfbd 100644
--- a/reference/opt/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese
+++ b/reference/opt/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float4 gl_Position [[position]];
@@ -10,14 +51,23 @@ struct main0_out
 
 struct main0_patchIn
 {
-    float2 gl_TessLevelInner [[attribute(0)]];
-    float4 gl_TessLevelOuter [[attribute(1)]];
+    float4 gl_TessLevelOuter [[attribute(0)]];
+    float2 gl_TessLevelInner [[attribute(1)]];
 };
 
-[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]])
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]])
 {
     main0_out out = {};
-    out.gl_Position = float4(((gl_TessCoord.x * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.x) + (((1.0 - gl_TessCoord.x) * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.z), ((gl_TessCoord.y * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.y) + (((1.0 - gl_TessCoord.y) * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.w), 0.0, 1.0);
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0];
+    gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1];
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2];
+    gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3];
+    float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0);
+    out.gl_Position = float4(fma(gl_TessCoord.x * gl_TessLevelInner[0], gl_TessLevelOuter[0], ((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), fma(gl_TessCoord.y * gl_TessLevelInner[1], gl_TessLevelOuter[1], ((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]), 0.0, 1.0);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert b/reference/opt/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert
new file mode 100644
index 00000000000..1528c8350de
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [2];
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+};
+
+struct main0_in
+{
+    float4 pos [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.gl_Position = in.pos;
+    out.gl_ClipDistance[0] = in.pos.x;
+    out.gl_ClipDistance[1] = in.pos.y;
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert b/reference/opt/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert
new file mode 100644
index 00000000000..1d6885958c3
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [2];
+};
+
+struct main0_in
+{
+    float4 pos [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.gl_Position = in.pos;
+    out.gl_ClipDistance[0] = in.pos.x;
+    out.gl_ClipDistance[1] = in.pos.y;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/vert/fake-builtin-input.asm.vert b/reference/opt/shaders-msl/asm/vert/fake-builtin-input.asm.vert
index f9fcbc85c30..3079ae9bcbb 100644
--- a/reference/opt/shaders-msl/asm/vert/fake-builtin-input.asm.vert
+++ b/reference/opt/shaders-msl/asm/vert/fake-builtin-input.asm.vert
@@ -5,6 +5,7 @@ using namespace metal;
 
 struct main0_out
 {
+    half4 out_var_SV_Target [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
diff --git a/reference/opt/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert b/reference/opt/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert
new file mode 100644
index 00000000000..1926ff9e14e
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Struct
+{
+    uint flags[1];
+};
+
+struct defaultUniformsVS
+{
+    Struct flags;
+    float4 uquad[4];
+    float4x4 umatrix;
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 a_position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _9 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    out.gl_Position = _9.umatrix * float4(_9.uquad[int(gl_VertexIndex)].x, _9.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w);
+    if (_9.flags.flags[0] != 0u)
+    {
+        out.gl_Position.z = 0.0;
+    }
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert b/reference/opt/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert
new file mode 100644
index 00000000000..ee206385746
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Struct
+{
+    uint2 flags[1];
+};
+
+struct defaultUniformsVS
+{
+    Struct flags;
+    float4 uquad[4];
+    float4x4 umatrix;
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 a_position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _9 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    out.gl_Position = _9.umatrix * float4(_9.uquad[int(gl_VertexIndex)].x, _9.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w);
+    if (_9.flags.flags[0].x != 0u)
+    {
+        out.gl_Position.z = 0.0;
+    }
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert b/reference/opt/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
index ed5c5f9ad6e..196057a79d7 100644
--- a/reference/opt/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
+++ b/reference/opt/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
@@ -9,9 +9,10 @@ constant int _20 = (_7 + 2);
 constant uint _8_tmp [[function_constant(202)]];
 constant uint _8 = is_function_constant_defined(_8_tmp) ? _8_tmp : 100u;
 constant uint _25 = (_8 % 5u);
-constant int4 _30 = int4(20, 30, _20, _20);
-constant int2 _32 = int2(_30.y, _30.x);
-constant int _33 = _30.y;
+constant int _30 = _7 - (-3) * (_7 / (-3));
+constant int4 _32 = int4(20, 30, _20, _30);
+constant int2 _34 = int2(_32.y, _32.x);
+constant int _35 = _32.y;
 
 struct main0_out
 {
@@ -22,14 +23,13 @@ struct main0_out
 vertex main0_out main0()
 {
     main0_out out = {};
-    float4 _63 = float4(0.0);
-    _63.y = float(_20);
-    float4 _66 = _63;
+    float4 _66 = float4(0.0);
+    _66.y = float(_20);
     _66.z = float(_25);
-    float4 _52 = _66 + float4(_30);
-    float2 _56 = _52.xy + float2(_32);
-    out.gl_Position = float4(_56.x, _56.y, _52.z, _52.w);
-    out.m_4 = _33;
+    float4 _55 = _66 + float4(_32);
+    float2 _59 = _55.xy + float2(_34);
+    out.gl_Position = float4(_59.x, _59.y, _55.z, _55.w);
+    out.m_4 = _35;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp b/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp
index 59fc03a7520..e57b2ea171e 100644
--- a/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp
+++ b/reference/opt/shaders-msl/comp/access-private-workgroup-in-function.comp
@@ -3,6 +3,8 @@
 
 using namespace metal;
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0()
 {
 }
diff --git a/reference/opt/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp b/reference/opt/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp
index f7757cd19f8..18cfd68c199 100644
--- a/reference/opt/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp
+++ b/reference/opt/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp
@@ -23,6 +23,8 @@ struct SSBO2
     float4 v;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 struct spvDescriptorSetBuffer0
 {
     const device SSBO0* ssbo0 [[id(0)]];
diff --git a/reference/opt/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp b/reference/opt/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp
new file mode 100644
index 00000000000..25a0233aec7
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp
@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+kernel void main0(texture2d<float, access::write> uImage [[texture(0)]], texture2d<float> uImageRead [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    int2 _17 = int2(gl_GlobalInvocationID.xy);
+    uImage.write(uImageRead.read(uint2(_17)), uint2(_17));
+}
+
diff --git a/reference/opt/shaders-msl/comp/array-length.comp b/reference/opt/shaders-msl/comp/array-length.comp
index 79358eb90e2..5a284b96669 100644
--- a/reference/opt/shaders-msl/comp/array-length.comp
+++ b/reference/opt/shaders-msl/comp/array-length.comp
@@ -14,6 +14,8 @@ struct SSBO1
     float bz[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device SSBO& _14 [[buffer(0)]], device SSBO1* ssbos_0 [[buffer(1)]], device SSBO1* ssbos_1 [[buffer(2)]])
 {
     device SSBO1* ssbos[] =
diff --git a/reference/opt/shaders-msl/comp/array-length.msl2.argument.discrete.comp b/reference/opt/shaders-msl/comp/array-length.msl2.argument.discrete.comp
index 6ec9b11bbe7..d804e187679 100644
--- a/reference/opt/shaders-msl/comp/array-length.msl2.argument.discrete.comp
+++ b/reference/opt/shaders-msl/comp/array-length.msl2.argument.discrete.comp
@@ -25,6 +25,8 @@ struct SSBO3
     float bz[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 struct spvDescriptorSetBuffer0
 {
     device SSBO* m_16 [[id(0)]];
diff --git a/reference/opt/shaders-msl/comp/atomic.comp b/reference/opt/shaders-msl/comp/atomic.comp
index 43e6a8f0380..fca72bfcfe9 100644
--- a/reference/opt/shaders-msl/comp/atomic.comp
+++ b/reference/opt/shaders-msl/comp/atomic.comp
@@ -12,59 +12,61 @@ struct SSBO
     int i32;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& ssbo [[buffer(0)]])
 {
     threadgroup uint shared_u32;
     threadgroup int shared_i32;
-    uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _16 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _18 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _20 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _22 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _24 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _28 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
     uint _32;
     do
     {
         _32 = 10u;
-    } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
-    int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
+    int _36 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _38 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _40 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _42 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _44 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _46 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _48 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
     int _52;
     do
     {
         _52 = 10;
-    } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
+    } while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
     shared_u32 = 10u;
     shared_i32 = 10;
-    uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _57 = atomic_fetch_add_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _58 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _59 = atomic_fetch_xor_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _60 = atomic_fetch_and_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _61 = atomic_fetch_min_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _62 = atomic_fetch_max_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _63 = atomic_exchange_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
     uint _64;
     do
     {
         _64 = 10u;
-    } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
-    int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
+    int _65 = atomic_fetch_add_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _66 = atomic_fetch_or_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _67 = atomic_fetch_xor_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _68 = atomic_fetch_and_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _69 = atomic_fetch_min_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _70 = atomic_fetch_max_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _71 = atomic_exchange_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
     int _72;
     do
     {
         _72 = 10;
-    } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
+    } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
 }
 
diff --git a/reference/opt/shaders-msl/comp/basic.comp b/reference/opt/shaders-msl/comp/basic.comp
index 22ec741965d..dbb839f5817 100644
--- a/reference/opt/shaders-msl/comp/basic.comp
+++ b/reference/opt/shaders-msl/comp/basic.comp
@@ -21,12 +21,14 @@ struct SSBO3
     uint counter;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]], device SSBO3& _48 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     float4 _29 = _23.in_data[gl_GlobalInvocationID.x];
     if (dot(_29, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
     {
-        uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
+        uint _52 = atomic_fetch_add_explicit((device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
         _45.out_data[_52] = _29;
     }
 }
diff --git a/reference/opt/shaders-msl/comp/basic.dispatchbase.comp b/reference/opt/shaders-msl/comp/basic.dispatchbase.comp
new file mode 100644
index 00000000000..ebbc144c7b1
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/basic.dispatchbase.comp
@@ -0,0 +1,38 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 in_data[1];
+};
+
+struct SSBO2
+{
+    float4 out_data[1];
+};
+
+struct SSBO3
+{
+    uint counter;
+};
+
+constant uint _59_tmp [[function_constant(10)]];
+constant uint _59 = is_function_constant_defined(_59_tmp) ? _59_tmp : 1u;
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(_59, 1u, 1u);
+
+kernel void main0(const device SSBO& _27 [[buffer(0)]], device SSBO2& _49 [[buffer(1)]], device SSBO3& _52 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvDispatchBase [[grid_origin]])
+{
+    gl_GlobalInvocationID += spvDispatchBase * gl_WorkGroupSize;
+    float4 _33 = _27.in_data[gl_GlobalInvocationID.x];
+    if (dot(_33, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
+    {
+        uint _56 = atomic_fetch_add_explicit((device atomic_uint*)&_52.counter, 1u, memory_order_relaxed);
+        _49.out_data[_56] = _33;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/comp/basic.dispatchbase.msl11.comp b/reference/opt/shaders-msl/comp/basic.dispatchbase.msl11.comp
new file mode 100644
index 00000000000..2d991f5db54
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/basic.dispatchbase.msl11.comp
@@ -0,0 +1,36 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 in_data[1];
+};
+
+struct SSBO2
+{
+    float4 out_data[1];
+};
+
+struct SSBO3
+{
+    uint counter;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(constant uint3& spvDispatchBase [[buffer(29)]], const device SSBO& _27 [[buffer(0)]], device SSBO2& _49 [[buffer(1)]], device SSBO3& _52 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    gl_GlobalInvocationID += spvDispatchBase * gl_WorkGroupSize;
+    float4 _33 = _27.in_data[gl_GlobalInvocationID.x];
+    if (dot(_33, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
+    {
+        uint _56 = atomic_fetch_add_explicit((device atomic_uint*)&_52.counter, 1u, memory_order_relaxed);
+        _49.out_data[_56] = _33;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/comp/basic.inline-block.msl2.comp b/reference/opt/shaders-msl/comp/basic.inline-block.msl2.comp
new file mode 100644
index 00000000000..902dfc92d93
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/basic.inline-block.msl2.comp
@@ -0,0 +1,53 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef packed_float4 packed_float4x4[4];
+
+struct Baz
+{
+    int f;
+    int g;
+};
+
+struct X
+{
+    int x;
+    int y;
+    float z;
+};
+
+struct Foo
+{
+    int a;
+    int b;
+    packed_float4x4 c;
+    X x[2];
+};
+
+struct Bar
+{
+    int d;
+    int e;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u);
+
+struct spvDescriptorSetBuffer0
+{
+    constant Bar* m_38 [[id(0)]];
+    Foo m_32 [[id(1)]];
+};
+
+struct spvDescriptorSetBuffer1
+{
+    device Baz* baz [[id(0)]][3];
+};
+
+kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    spvDescriptorSet1.baz[gl_GlobalInvocationID.x]->f = spvDescriptorSet0.m_32.a + (*spvDescriptorSet0.m_38).d;
+    spvDescriptorSet1.baz[gl_GlobalInvocationID.x]->g = spvDescriptorSet0.m_32.b * (*spvDescriptorSet0.m_38).e;
+}
+
diff --git a/reference/opt/shaders-msl/comp/bitcast-16bit-1.invalid.comp b/reference/opt/shaders-msl/comp/bitcast-16bit-1.invalid.comp
deleted file mode 100644
index ad9733a8b58..00000000000
--- a/reference/opt/shaders-msl/comp/bitcast-16bit-1.invalid.comp
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct SSBO0
-{
-    short4 inputs[1];
-};
-
-struct SSBO1
-{
-    int4 outputs[1];
-};
-
-kernel void main0(device SSBO0& _25 [[buffer(0)]], device SSBO1& _39 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
-{
-    _39.outputs[gl_GlobalInvocationID.x].x = int(as_type<uint>(as_type<half2>(_25.inputs[gl_GlobalInvocationID.x].xy) + half2(half(1.0))));
-    _39.outputs[gl_GlobalInvocationID.x].y = as_type<int>(_25.inputs[gl_GlobalInvocationID.x].zw);
-    _39.outputs[gl_GlobalInvocationID.x].z = int(as_type<uint>(ushort2(_25.inputs[gl_GlobalInvocationID.x].xy)));
-}
-
diff --git a/reference/opt/shaders-msl/comp/bitcast-16bit-2.invalid.comp b/reference/opt/shaders-msl/comp/bitcast-16bit-2.invalid.comp
deleted file mode 100644
index a4230b1eb6a..00000000000
--- a/reference/opt/shaders-msl/comp/bitcast-16bit-2.invalid.comp
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct SSBO1
-{
-    short4 outputs[1];
-};
-
-struct SSBO0
-{
-    int4 inputs[1];
-};
-
-struct UBO
-{
-    half4 const0;
-};
-
-kernel void main0(device SSBO1& _21 [[buffer(0)]], device SSBO0& _29 [[buffer(1)]], constant UBO& _40 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
-{
-    short2 _47 = as_type<short2>(_29.inputs[gl_GlobalInvocationID.x].x) + as_type<short2>(_40.const0.xy);
-    _21.outputs[gl_GlobalInvocationID.x] = short4(_47.x, _47.y, _21.outputs[gl_GlobalInvocationID.x].z, _21.outputs[gl_GlobalInvocationID.x].w);
-    short2 _66 = short2(as_type<ushort2>(uint(_29.inputs[gl_GlobalInvocationID.x].y)) - as_type<ushort2>(_40.const0.zw));
-    _21.outputs[gl_GlobalInvocationID.x] = short4(_21.outputs[gl_GlobalInvocationID.x].x, _21.outputs[gl_GlobalInvocationID.x].y, _66.x, _66.y);
-}
-
diff --git a/reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp b/reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
new file mode 100644
index 00000000000..fb561482abd
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO;
+
+struct UBO
+{
+    uint2 b;
+};
+
+struct SSBO
+{
+    packed_float3 a1;
+    float a2;
+};
+
+kernel void main0(constant UBO& _10 [[buffer(0)]])
+{
+    (reinterpret_cast<device SSBO*>(as_type<ulong>(_10.b)))->a1 = float3(1.0, 2.0, 3.0);
+    uint2 _35 = as_type<uint2>(reinterpret_cast<ulong>(reinterpret_cast<device SSBO*>(as_type<ulong>(_10.b + uint2(32u)))));
+    uint2 v2 = _35;
+    device SSBO* _39 = reinterpret_cast<device SSBO*>(as_type<ulong>(_35));
+    float3 v3 = float3(_39->a1);
+    _39->a1 = float3(_39->a1) + float3(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
new file mode 100644
index 00000000000..d66154b5494
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
@@ -0,0 +1,96 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct t21;
+
+struct t24
+{
+    int4 m0[2];
+    int m1;
+    ulong2 m2[2];
+    device t21* m3;
+    float2x4 m4;
+};
+
+struct t21
+{
+    int4 m0[2];
+    int m1;
+    ulong2 m2[2];
+    device t21* m3;
+    float2x4 m4;
+};
+
+struct t35
+{
+    int m0[32];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(constant t24& u24 [[buffer(0)]], constant t35& u35 [[buffer(1)]], texture2d<uint, access::write> v295 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    int v8 = 0;
+    int _30 = 0 | (u24.m0[0].x - 0);
+    v8 = _30;
+    int _44 = _30 | (u24.m0[u35.m0[1]].x - 1);
+    v8 = _44;
+    int _50 = _44 | (u24.m1 - 2);
+    v8 = _50;
+    int _60 = _50 | int(u24.m4[0u][0] - 3.0);
+    v8 = _60;
+    int _68 = _60 | int(u24.m4[1u][0] - 5.0);
+    v8 = _68;
+    int _75 = _68 | int(u24.m4[0u][1] - 4.0);
+    v8 = _75;
+    int _82 = _75 | int(u24.m4[1u][1] - 6.0);
+    v8 = _82;
+    int _92 = _82 | (((device t21*)u24.m2[0].x)->m0[0].x - 3);
+    v8 = _92;
+    int _101 = _92 | (((device t21*)u24.m2[0].x)->m0[u35.m0[1]].x - 4);
+    v8 = _101;
+    int _109 = _101 | (((device t21*)u24.m2[0].x)->m1 - 5);
+    v8 = _109;
+    int _118 = _109 | int(((device t21*)u24.m2[0].x)->m4[0u][0] - 6.0);
+    v8 = _118;
+    int _127 = _118 | int(((device t21*)u24.m2[0].x)->m4[1u][0] - 8.0);
+    v8 = _127;
+    int _136 = _127 | int(((device t21*)u24.m2[0].x)->m4[0u][1] - 7.0);
+    v8 = _136;
+    int _145 = _136 | int(((device t21*)u24.m2[0].x)->m4[1u][1] - 9.0);
+    v8 = _145;
+    int _155 = _145 | (((device t21*)u24.m2[u35.m0[1]].x)->m0[0].x - 6);
+    v8 = _155;
+    int _167 = _155 | (((device t21*)u24.m2[u35.m0[1]].x)->m0[u35.m0[1]].x - 7);
+    v8 = _167;
+    int _177 = _167 | (((device t21*)u24.m2[u35.m0[1]].x)->m1 - 8);
+    v8 = _177;
+    int _187 = _177 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][0] - 9.0);
+    v8 = _187;
+    int _198 = _187 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][0] - 11.0);
+    v8 = _198;
+    int _209 = _198 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][1] - 10.0);
+    v8 = _209;
+    int _220 = _209 | int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][1] - 12.0);
+    v8 = _220;
+    int _228 = _220 | (u24.m3->m0[0].x - 9);
+    v8 = _228;
+    int _238 = _228 | (u24.m3->m0[u35.m0[1]].x - 10);
+    v8 = _238;
+    int _246 = _238 | (u24.m3->m1 - 11);
+    v8 = _246;
+    int _254 = _246 | int(u24.m3->m4[0u][0] - 12.0);
+    v8 = _254;
+    int _263 = _254 | int(u24.m3->m4[1u][0] - 14.0);
+    v8 = _263;
+    int _272 = _263 | int(u24.m3->m4[0u][1] - 13.0);
+    v8 = _272;
+    int _281 = _272 | int(u24.m3->m4[1u][1] - 15.0);
+    v8 = _281;
+    uint4 _292 = select(uint4(1u, 0u, 0u, 1u), uint4(0u), bool4(_281 != 0));
+    uint4 v284 = _292;
+    v295.write(_292, uint2(int2(gl_GlobalInvocationID.xy)));
+}
+
diff --git a/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp b/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp
new file mode 100644
index 00000000000..d85fa356b80
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/buffer_device_address.msl2.comp
@@ -0,0 +1,56 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Position;
+struct PositionReferences;
+
+struct Position
+{
+    float2 positions[1];
+};
+
+struct Registers
+{
+    device PositionReferences* references;
+    float fract_time;
+};
+
+struct PositionReferences
+{
+    device Position* buffers[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u);
+
+kernel void main0(constant Registers& registers [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
+{
+    uint2 local_offset = gl_GlobalInvocationID.xy;
+    uint _19 = local_offset.y;
+    uint _29 = local_offset.x;
+    uint _30 = ((_19 * 8u) * gl_NumWorkGroups.x) + _29;
+    uint local_index = _30;
+    uint slice = gl_WorkGroupID.z;
+    device Position* __restrict positions = registers.references->buffers[gl_WorkGroupID.z];
+    float _66 = float(gl_WorkGroupID.z);
+    float _70 = fract(fma(_66, 0.100000001490116119384765625, registers.fract_time));
+    float _71 = 6.283125400543212890625 * _70;
+    float offset = _71;
+    float2 pos = float2(local_offset);
+    float _83 = sin(fma(2.2000000476837158203125, pos.x, _71));
+    pos.x = fma(0.20000000298023223876953125, _83, pos.x);
+    float _97 = sin(fma(2.25, pos.y, _70 * 12.56625080108642578125));
+    pos.y = fma(0.20000000298023223876953125, _97, pos.y);
+    float _111 = cos(fma(1.7999999523162841796875, pos.y, _70 * 18.849376678466796875));
+    pos.x = fma(0.20000000298023223876953125, _111, pos.x);
+    float _125 = cos(fma(2.849999904632568359375, pos.x, _70 * 25.1325016021728515625));
+    pos.y = fma(0.20000000298023223876953125, _125, pos.y);
+    float _133 = sin(_71);
+    pos.x = fma(0.5, _133, pos.x);
+    float _142 = sin(fma(6.283125400543212890625, _70, 0.300000011920928955078125));
+    pos.y = fma(0.5, _142, pos.y);
+    float2 _155 = float2(gl_NumWorkGroups.xy);
+    registers.references->buffers[gl_WorkGroupID.z]->positions[_30] = (pos / fma(float2(8.0), _155, float2(-1.0))) - float2(0.5);
+}
+
diff --git a/reference/opt/shaders-msl/comp/coherent-block.comp b/reference/opt/shaders-msl/comp/coherent-block.comp
index bec9b218c7b..58bbacb7f0c 100644
--- a/reference/opt/shaders-msl/comp/coherent-block.comp
+++ b/reference/opt/shaders-msl/comp/coherent-block.comp
@@ -8,7 +8,9 @@ struct SSBO
     float4 value;
 };
 
-kernel void main0(device SSBO& _10 [[buffer(0)]])
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(volatile device SSBO& _10 [[buffer(0)]])
 {
     _10.value = float4(20.0);
 }
diff --git a/reference/opt/shaders-msl/comp/coherent-image.comp b/reference/opt/shaders-msl/comp/coherent-image.comp
index 0fe044fb9ae..5090484464d 100644
--- a/reference/opt/shaders-msl/comp/coherent-image.comp
+++ b/reference/opt/shaders-msl/comp/coherent-image.comp
@@ -8,7 +8,9 @@ struct SSBO
     int4 value;
 };
 
-kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
 {
     _10.value = uImage.read(uint2(int2(10)));
 }
diff --git a/reference/opt/shaders-msl/comp/complex-composite-constant-array.comp b/reference/opt/shaders-msl/comp/complex-composite-constant-array.comp
new file mode 100644
index 00000000000..bd58c95a006
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/complex-composite-constant-array.comp
@@ -0,0 +1,59 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct SSBO
+{
+    float4x4 a;
+    uint index;
+};
+
+kernel void main0(device SSBO& _14 [[buffer(0)]])
+{
+    spvUnsafeArray<float4x4, 2> _32 = spvUnsafeArray<float4x4, 2>({ float4x4(float4(1.0, 0.0, 0.0, 0.0), float4(0.0, 1.0, 0.0, 0.0), float4(0.0, 0.0, 1.0, 0.0), float4(0.0, 0.0, 0.0, 1.0)), float4x4(float4(2.0, 0.0, 0.0, 0.0), float4(0.0, 2.0, 0.0, 0.0), float4(0.0, 0.0, 2.0, 0.0), float4(0.0, 0.0, 0.0, 2.0)) });
+    
+    _14.a = _32[_14.index];
+}
+
diff --git a/reference/opt/shaders-msl/comp/composite-array-initialization.comp b/reference/opt/shaders-msl/comp/composite-array-initialization.comp
index 8dec8bddb31..6181ae69b11 100644
--- a/reference/opt/shaders-msl/comp/composite-array-initialization.comp
+++ b/reference/opt/shaders-msl/comp/composite-array-initialization.comp
@@ -1,10 +1,49 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct Data
 {
     float a;
@@ -27,26 +66,13 @@ struct SSBO
 
 constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u);
 
-constant Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } };
-
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
 kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
 {
-    Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } };
-    Data data2[2];
-    spvArrayCopyFromStack1(data2, _31);
+    spvUnsafeArray<Data, 2> _25 = spvUnsafeArray<Data, 2>({ Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } });
+    
+    spvUnsafeArray<Data, 2> _31 = spvUnsafeArray<Data, 2>({ Data{ X, 2.0 }, Data{ 3.0, 5.0 } });
+    spvUnsafeArray<Data, 2> data2;
+    data2 = _31;
     _53.outdata[gl_WorkGroupID.x].a = _25[gl_LocalInvocationID.x].a + data2[gl_LocalInvocationID.x].a;
     _53.outdata[gl_WorkGroupID.x].b = _25[gl_LocalInvocationID.x].b + data2[gl_LocalInvocationID.x].b;
 }
diff --git a/reference/opt/shaders-msl/comp/composite-array-initialization.force-native-array.comp b/reference/opt/shaders-msl/comp/composite-array-initialization.force-native-array.comp
new file mode 100644
index 00000000000..536a6e30747
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/composite-array-initialization.force-native-array.comp
@@ -0,0 +1,148 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+struct Data
+{
+    float a;
+    float b;
+};
+
+constant float X_tmp [[function_constant(0)]];
+constant float X = is_function_constant_defined(X_tmp) ? X_tmp : 4.0;
+
+struct Data_1
+{
+    float a;
+    float b;
+};
+
+struct SSBO
+{
+    Data_1 outdata[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u);
+
+kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
+{
+    Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } };
+    
+    Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } };
+    Data data2[2];
+    spvArrayCopyFromStackToStack1(data2, _31);
+    _53.outdata[gl_WorkGroupID.x].a = _25[gl_LocalInvocationID.x].a + data2[gl_LocalInvocationID.x].a;
+    _53.outdata[gl_WorkGroupID.x].b = _25[gl_LocalInvocationID.x].b + data2[gl_LocalInvocationID.x].b;
+}
+
diff --git a/reference/opt/shaders-msl/comp/composite-construct.comp b/reference/opt/shaders-msl/comp/composite-construct.comp
index 6d44fc57b23..09e6fc7d959 100644
--- a/reference/opt/shaders-msl/comp/composite-construct.comp
+++ b/reference/opt/shaders-msl/comp/composite-construct.comp
@@ -1,10 +1,49 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct SSBO0
 {
     float4 as[1];
@@ -15,24 +54,13 @@ struct SSBO1
     float4 bs[1];
 };
 
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
 
 kernel void main0(device SSBO0& _16 [[buffer(0)]], device SSBO1& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
 {
-    float4 _37[2] = { _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] };
-    float4 values[2];
-    spvArrayCopyFromStack1(values, _37);
+    spvUnsafeArray<float4, 2> _37 = spvUnsafeArray<float4, 2>({ _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] });
+    spvUnsafeArray<float4, 2> values;
+    values = _37;
     _16.as[0] = values[gl_LocalInvocationIndex];
     _32.bs[1] = float4(40.0);
 }
diff --git a/reference/opt/shaders-msl/comp/copy-array-of-arrays.comp b/reference/opt/shaders-msl/comp/copy-array-of-arrays.comp
index ea9693ce474..cb396cff20e 100644
--- a/reference/opt/shaders-msl/comp/copy-array-of-arrays.comp
+++ b/reference/opt/shaders-msl/comp/copy-array-of-arrays.comp
@@ -10,6 +10,8 @@ struct BUF
     float c;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device BUF& o [[buffer(0)]])
 {
     o.a = 4;
diff --git a/reference/opt/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp b/reference/opt/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp
new file mode 100644
index 00000000000..cb396cff20e
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct BUF
+{
+    int a;
+    float b;
+    float c;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device BUF& o [[buffer(0)]])
+{
+    o.a = 4;
+    o.b = o.c;
+}
+
diff --git a/reference/opt/shaders-msl/comp/culling.comp b/reference/opt/shaders-msl/comp/culling.comp
index 95ffff8393b..55735475a7c 100644
--- a/reference/opt/shaders-msl/comp/culling.comp
+++ b/reference/opt/shaders-msl/comp/culling.comp
@@ -28,7 +28,7 @@ kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buff
     float _28 = _22.in_data[gl_GlobalInvocationID.x];
     if (_28 > 12.0)
     {
-        uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
+        uint _45 = atomic_fetch_add_explicit((device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
         _38.out_data[_45] = _28;
     }
 }
diff --git a/reference/opt/shaders-msl/comp/defer-parens.comp b/reference/opt/shaders-msl/comp/defer-parens.comp
index 69a8aab92dd..8c130e3a0c0 100644
--- a/reference/opt/shaders-msl/comp/defer-parens.comp
+++ b/reference/opt/shaders-msl/comp/defer-parens.comp
@@ -9,6 +9,8 @@ struct SSBO
     int index;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& _13 [[buffer(0)]])
 {
     float4 _17 = _13.data;
diff --git a/reference/opt/shaders-msl/comp/dowhile.comp b/reference/opt/shaders-msl/comp/dowhile.comp
index 3ebafe0fdeb..b503c948ad8 100644
--- a/reference/opt/shaders-msl/comp/dowhile.comp
+++ b/reference/opt/shaders-msl/comp/dowhile.comp
@@ -14,21 +14,23 @@ struct SSBO2
     float4 out_data[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(const device SSBO& _28 [[buffer(0)]], device SSBO2& _52 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    float4 _57;
-    int _58;
-    _58 = 0;
-    _57 = _28.in_data[gl_GlobalInvocationID.x];
+    float4 _59;
+    int _60;
+    _60 = 0;
+    _59 = _28.in_data[gl_GlobalInvocationID.x];
     float4 _42;
     for (;;)
     {
-        _42 = _28.mvp * _57;
-        int _44 = _58 + 1;
+        _42 = _28.mvp * _59;
+        int _44 = _60 + 1;
         if (_44 < 16)
         {
-            _58 = _44;
-            _57 = _42;
+            _60 = _44;
+            _59 = _42;
         }
         else
         {
diff --git a/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp b/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp
index 7cb8913dabf..fbf4c4f7fc4 100644
--- a/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp
+++ b/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp
@@ -5,17 +5,6 @@
 
 using namespace metal;
 
-enum class spvSwizzle : uint
-{
-    none = 0,
-    zero,
-    one,
-    red,
-    green,
-    blue,
-    alpha
-};
-
 template<typename T> struct spvRemoveReference { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
@@ -28,6 +17,17 @@ template<typename T> inline constexpr thread T&& spvForward(thread typename spvR
     return static_cast<thread T&&>(x);
 }
 
+enum class spvSwizzle : uint
+{
+    none = 0,
+    zero,
+    one,
+    red,
+    green,
+    blue,
+    alpha
+};
+
 template<typename T>
 inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
 {
@@ -65,66 +65,6 @@ inline T spvTextureSwizzle(T x, uint s)
     return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
 }
 
-// Wrapper function that swizzles texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
-{
-    if (sw)
-    {
-        switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))
-        {
-            case spvSwizzle::none:
-                break;
-            case spvSwizzle::zero:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-            case spvSwizzle::red:
-                return t.gather(s, spvForward<Ts>(params)..., component::x);
-            case spvSwizzle::green:
-                return t.gather(s, spvForward<Ts>(params)..., component::y);
-            case spvSwizzle::blue:
-                return t.gather(s, spvForward<Ts>(params)..., component::z);
-            case spvSwizzle::alpha:
-                return t.gather(s, spvForward<Ts>(params)..., component::w);
-        }
-    }
-    switch (c)
-    {
-        case component::x:
-            return t.gather(s, spvForward<Ts>(params)..., component::x);
-        case component::y:
-            return t.gather(s, spvForward<Ts>(params)..., component::y);
-        case component::z:
-            return t.gather(s, spvForward<Ts>(params)..., component::z);
-        case component::w:
-            return t.gather(s, spvForward<Ts>(params)..., component::w);
-    }
-}
-
-// Wrapper function that swizzles depth texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) 
-{
-    if (sw)
-    {
-        switch (spvSwizzle(sw & 0xFF))
-        {
-            case spvSwizzle::none:
-            case spvSwizzle::red:
-                break;
-            case spvSwizzle::zero:
-            case spvSwizzle::green:
-            case spvSwizzle::blue:
-            case spvSwizzle::alpha:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-        }
-    }
-    return t.gather_compare(s, spvForward<Ts>(params)...);
-}
-
 kernel void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture2d<float> foo [[texture(0)]], texture2d<float, access::write> bar [[texture(1)]], sampler fooSmplr [[sampler(0)]])
 {
     constant uint& fooSwzl = spvSwizzleConstants[0];
diff --git a/reference/opt/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp b/reference/opt/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp
index fe0212ec3ff..333485a256a 100644
--- a/reference/opt/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp
+++ b/reference/opt/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp
@@ -5,19 +5,19 @@
 
 using namespace metal;
 
-struct myBlock
-{
-    int a;
-    float b[1];
-};
-
 // Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
 template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
+inline Tx mod(Tx x, Ty y)
 {
     return x - y * floor(x / y);
 }
 
+struct myBlock
+{
+    int a;
+    float b[1];
+};
+
 kernel void main0(device myBlock& myStorage [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     myStorage.a = (myStorage.a + 1) % 256;
diff --git a/reference/opt/shaders-msl/comp/global-invocation-id.comp b/reference/opt/shaders-msl/comp/global-invocation-id.comp
index fe0212ec3ff..333485a256a 100644
--- a/reference/opt/shaders-msl/comp/global-invocation-id.comp
+++ b/reference/opt/shaders-msl/comp/global-invocation-id.comp
@@ -5,19 +5,19 @@
 
 using namespace metal;
 
-struct myBlock
-{
-    int a;
-    float b[1];
-};
-
 // Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
 template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
+inline Tx mod(Tx x, Ty y)
 {
     return x - y * floor(x / y);
 }
 
+struct myBlock
+{
+    int a;
+    float b[1];
+};
+
 kernel void main0(device myBlock& myStorage [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     myStorage.a = (myStorage.a + 1) % 256;
diff --git a/reference/opt/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp b/reference/opt/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp
new file mode 100644
index 00000000000..05dc38746a8
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp
@@ -0,0 +1,37 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+// The required alignment of a linear texture of R32Uint format.
+constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
+constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
+// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
+#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() +  spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
+
+struct SSBO
+{
+    float4 outdata;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+struct spvDescriptorSetBuffer0
+{
+    texture2d<uint> uImage [[id(0)]];
+    device atomic_uint* uImage_atomic [[id(1)]];
+    device SSBO* m_31 [[id(2)]];
+    texture2d<float> uTexture [[id(3)]];
+    sampler uTextureSmplr [[id(4)]];
+};
+
+kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    uint _26 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.uImage_atomic[spvImage2DAtomicCoord(int2(gl_GlobalInvocationID.xy), spvDescriptorSet0.uImage)], 10u, memory_order_relaxed);
+    (*spvDescriptorSet0.m_31).outdata = spvDescriptorSet0.uTexture.sample(spvDescriptorSet0.uTextureSmplr, float2(gl_GlobalInvocationID.xy), level(0.0)) + float4(float(_26));
+}
+
diff --git a/reference/opt/shaders-msl/comp/image-atomic-automatic-bindings.comp b/reference/opt/shaders-msl/comp/image-atomic-automatic-bindings.comp
new file mode 100644
index 00000000000..7b0a129a488
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/image-atomic-automatic-bindings.comp
@@ -0,0 +1,28 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+// The required alignment of a linear texture of R32Uint format.
+constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
+constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
+// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
+#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() +  spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
+
+struct SSBO
+{
+    float4 outdata;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _31 [[buffer(1)]], texture2d<uint> uImage [[texture(0)]], device atomic_uint* uImage_atomic [[buffer(0)]], texture2d<float> uTexture [[texture(1)]], sampler uTextureSmplr [[sampler(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    uint _26 = atomic_fetch_add_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(gl_GlobalInvocationID.xy), uImage)], 10u, memory_order_relaxed);
+    _31.outdata = uTexture.sample(uTextureSmplr, float2(gl_GlobalInvocationID.xy), level(0.0)) + float4(float(_26));
+}
+
diff --git a/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp b/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp
index 1eeaf87cf44..41c4dfc1802 100644
--- a/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp
+++ b/reference/opt/shaders-msl/comp/image-cube-array-load-store.comp
@@ -3,6 +3,8 @@
 
 using namespace metal;
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(texturecube_array<float> uImageIn [[texture(0)]], texturecube_array<float, access::write> uImageOut [[texture(1)]])
 {
     uImageOut.write(uImageIn.read(uint2(int3(9, 7, 11).xy), uint(int3(9, 7, 11).z) % 6u, uint(int3(9, 7, 11).z) / 6u), uint2(int3(9, 7, 11).xy), uint(int3(9, 7, 11).z) % 6u, uint(int3(9, 7, 11).z) / 6u);
diff --git a/reference/opt/shaders-msl/comp/image.comp b/reference/opt/shaders-msl/comp/image.comp
index 447732dd235..c875e78de02 100644
--- a/reference/opt/shaders-msl/comp/image.comp
+++ b/reference/opt/shaders-msl/comp/image.comp
@@ -3,6 +3,8 @@
 
 using namespace metal;
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(texture2d<float> uImageIn [[texture(0)]], texture2d<float, access::write> uImageOut [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     int2 _23 = int2(gl_GlobalInvocationID.xy);
diff --git a/reference/opt/shaders-msl/comp/insert.comp b/reference/opt/shaders-msl/comp/insert.comp
index 1418ce35b5c..437b7f32898 100644
--- a/reference/opt/shaders-msl/comp/insert.comp
+++ b/reference/opt/shaders-msl/comp/insert.comp
@@ -8,19 +8,11 @@ struct SSBO
     float4 out_data[1];
 };
 
-constant float4 _52 = {};
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
 
 kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    float4 _45 = _52;
-    _45.x = 10.0;
-    float4 _47 = _45;
-    _47.y = 30.0;
-    float4 _49 = _47;
-    _49.z = 70.0;
-    float4 _51 = _49;
-    _51.w = 90.0;
-    _27.out_data[gl_GlobalInvocationID.x] = _51;
-    _27.out_data[gl_GlobalInvocationID.x].y = 20.0;
+    _27.out_data[gl_GlobalInvocationID.x] = float4(10.0, 30.0, 70.0, 90.0);
+    ((device float*)&_27.out_data[gl_GlobalInvocationID.x])[1u] = 20.0;
 }
 
diff --git a/reference/opt/shaders-msl/comp/int64.invalid.msl22.comp b/reference/opt/shaders-msl/comp/int64.invalid.msl22.comp
deleted file mode 100644
index 13304bd0e81..00000000000
--- a/reference/opt/shaders-msl/comp/int64.invalid.msl22.comp
+++ /dev/null
@@ -1,24 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct SSBO
-{
-    int s32;
-    uint u32;
-};
-
-constant long _162 = {};
-
-kernel void main0(device SSBO& _96 [[buffer(0)]])
-{
-    long4 _137;
-    ulong4 _141;
-    _137 = abs((_137 + long4(30l, 40l, 50l, 60l)) + long4(_141 + ulong4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul)));
-    _141 += ulong4(long4(999999999999999999l, 8888888888888888l, 77777777777777777l, 6666666666666666l));
-    ulong _109 = ulong(_162);
-    _96.s32 = int(uint(((ulong(_137.x) + _141.y) + _109) + _109));
-    _96.u32 = uint(((ulong(_137.y) + _141.z) + ulong(_162 + 1l)) + _109);
-}
-
diff --git a/reference/opt/shaders-msl/comp/inverse.comp b/reference/opt/shaders-msl/comp/inverse.comp
index f2f499b91eb..0a1d298b0da 100644
--- a/reference/opt/shaders-msl/comp/inverse.comp
+++ b/reference/opt/shaders-msl/comp/inverse.comp
@@ -5,34 +5,23 @@
 
 using namespace metal;
 
-struct MatrixOut
-{
-    float2x2 m2out;
-    float3x3 m3out;
-    float4x4 m4out;
-};
-
-struct MatrixIn
-{
-    float2x2 m2in;
-    float3x3 m3in;
-    float4x4 m4in;
-};
-
 // Returns the determinant of a 2x2 matrix.
-inline float spvDet2x2(float a1, float a2, float b1, float b2)
+static inline __attribute__((always_inline))
+float spvDet2x2(float a1, float a2, float b1, float b2)
 {
     return a1 * b2 - b1 * a2;
 }
 
 // Returns the determinant of a 3x3 matrix.
-inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
+static inline __attribute__((always_inline))
+float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
 {
     return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3);
 }
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
+static inline __attribute__((always_inline))
 float4x4 spvInverse4x4(float4x4 m)
 {
     float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
@@ -68,6 +57,7 @@ float4x4 spvInverse4x4(float4x4 m)
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
+static inline __attribute__((always_inline))
 float3x3 spvInverse3x3(float3x3 m)
 {
     float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)
@@ -95,6 +85,7 @@ float3x3 spvInverse3x3(float3x3 m)
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
+static inline __attribute__((always_inline))
 float2x2 spvInverse2x2(float2x2 m)
 {
     float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)
@@ -114,6 +105,22 @@ float2x2 spvInverse2x2(float2x2 m)
     return (det != 0.0f) ? (adj * (1.0f / det)) : m;
 }
 
+struct MatrixOut
+{
+    float2x2 m2out;
+    float3x3 m3out;
+    float4x4 m4out;
+};
+
+struct MatrixIn
+{
+    float2x2 m2in;
+    float3x3 m3in;
+    float4x4 m4in;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device MatrixOut& _15 [[buffer(0)]], const device MatrixIn& _20 [[buffer(1)]])
 {
     _15.m2out = spvInverse2x2(_20.m2in);
diff --git a/reference/opt/shaders-msl/comp/local-invocation-id.comp b/reference/opt/shaders-msl/comp/local-invocation-id.comp
index 772e5e0d867..45059905881 100644
--- a/reference/opt/shaders-msl/comp/local-invocation-id.comp
+++ b/reference/opt/shaders-msl/comp/local-invocation-id.comp
@@ -5,19 +5,19 @@
 
 using namespace metal;
 
-struct myBlock
-{
-    int a;
-    float b[1];
-};
-
 // Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
 template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
+inline Tx mod(Tx x, Ty y)
 {
     return x - y * floor(x / y);
 }
 
+struct myBlock
+{
+    int a;
+    float b[1];
+};
+
 kernel void main0(device myBlock& myStorage [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
 {
     myStorage.a = (myStorage.a + 1) % 256;
diff --git a/reference/opt/shaders-msl/comp/local-invocation-index.comp b/reference/opt/shaders-msl/comp/local-invocation-index.comp
index 41adbdca5cf..67426dd3f6b 100644
--- a/reference/opt/shaders-msl/comp/local-invocation-index.comp
+++ b/reference/opt/shaders-msl/comp/local-invocation-index.comp
@@ -5,19 +5,19 @@
 
 using namespace metal;
 
-struct myBlock
-{
-    int a;
-    float b[1];
-};
-
 // Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
 template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
+inline Tx mod(Tx x, Ty y)
 {
     return x - y * floor(x / y);
 }
 
+struct myBlock
+{
+    int a;
+    float b[1];
+};
+
 kernel void main0(device myBlock& myStorage [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
 {
     myStorage.a = (myStorage.a + 1) % 256;
diff --git a/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp b/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp
new file mode 100644
index 00000000000..cf26178ee87
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/mat3-row-maj-read-write-const.comp
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct model_t
+{
+    float3x3 mtx_rm;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device model_t& model [[buffer(0)]])
+{
+    if ((transpose(model.mtx_rm) * float3x3(float3(4.0, -3.0, 1.0), float3(-7.0, 7.0, -7.0), float3(-5.0, 6.0, -8.0)))[0].x != 0.0)
+    {
+        model.mtx_rm = transpose(float3x3(float3(-5.0, -3.0, -5.0), float3(-2.0, 2.0, -5.0), float3(6.0, 3.0, -8.0)));
+    }
+}
+
diff --git a/reference/opt/shaders-msl/comp/mat3.comp b/reference/opt/shaders-msl/comp/mat3.comp
index 72f08dd85ed..31351ba57be 100644
--- a/reference/opt/shaders-msl/comp/mat3.comp
+++ b/reference/opt/shaders-msl/comp/mat3.comp
@@ -8,6 +8,8 @@ struct SSBO2
     float3x3 out_data[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO2& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     _22.out_data[gl_GlobalInvocationID.x] = float3x3(float3(10.0), float3(20.0), float3(40.0));
diff --git a/reference/opt/shaders-msl/comp/mod.comp b/reference/opt/shaders-msl/comp/mod.comp
index 8574f87b7e2..94d739fe6c3 100644
--- a/reference/opt/shaders-msl/comp/mod.comp
+++ b/reference/opt/shaders-msl/comp/mod.comp
@@ -5,6 +5,13 @@
 
 using namespace metal;
 
+// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
+template<typename Tx, typename Ty>
+inline Tx mod(Tx x, Ty y)
+{
+    return x - y * floor(x / y);
+}
+
 struct SSBO
 {
     float4 in_data[1];
@@ -15,12 +22,7 @@ struct SSBO2
     float4 out_data[1];
 };
 
-// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
-template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
-{
-    return x - y * floor(x / y);
-}
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
 
 kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _33 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
diff --git a/reference/opt/shaders-msl/comp/modf.comp b/reference/opt/shaders-msl/comp/modf.comp
index 39e402337f8..df19cae502f 100644
--- a/reference/opt/shaders-msl/comp/modf.comp
+++ b/reference/opt/shaders-msl/comp/modf.comp
@@ -13,6 +13,8 @@ struct SSBO2
     float4 out_data[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _35 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     float4 i;
diff --git a/reference/opt/shaders-msl/comp/outer-product.comp b/reference/opt/shaders-msl/comp/outer-product.comp
index 8e32db392ea..e589642dbda 100644
--- a/reference/opt/shaders-msl/comp/outer-product.comp
+++ b/reference/opt/shaders-msl/comp/outer-product.comp
@@ -23,6 +23,8 @@ struct ReadSSBO
     float4 v4;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]])
 {
     _21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y);
diff --git a/reference/opt/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp b/reference/opt/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp
new file mode 100644
index 00000000000..c119186663d
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp
@@ -0,0 +1,116 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO_A
+{
+    float data[1];
+};
+
+struct UBO_C
+{
+    float4 data[1024];
+};
+
+struct Registers
+{
+    float reg;
+};
+
+struct SSBO_B
+{
+    uint2 data[1];
+};
+
+struct UBO_D
+{
+    uint4 data[1024];
+};
+
+struct SSBO_BRO
+{
+    uint2 data[1];
+};
+
+struct SSBO_As
+{
+    float data[1];
+};
+
+struct UBO_Cs
+{
+    float4 data[1024];
+};
+
+struct SSBO_Bs
+{
+    uint2 data[1024];
+};
+
+struct UBO_Ds
+{
+    uint4 data[1024];
+};
+
+struct SSBO_BsRO
+{
+    uint2 data[1024];
+};
+
+struct SSBO_E
+{
+    float data[1];
+};
+
+struct UBO_G
+{
+    float4 data[1024];
+};
+
+struct SSBO_F
+{
+    uint2 data[1];
+};
+
+struct UBO_H
+{
+    uint4 data[1024];
+};
+
+struct SSBO_I
+{
+    uint2 data[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
+
+struct spvDescriptorSetBuffer0
+{
+    device SSBO_A* ssbo_a [[id(0)]];
+    constant UBO_C* ubo_c [[id(1)]];
+    device SSBO_As* ssbo_as [[id(2)]][4];
+    constant UBO_Cs* ubo_cs [[id(6)]][4];
+};
+
+kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& _42 [[buffer(1)]], device void* spvBufferAliasSet2Binding0 [[buffer(2)]], constant void* spvBufferAliasSet2Binding1 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
+{
+    device auto& ssbo_e = *(device SSBO_E*)spvBufferAliasSet2Binding0;
+    constant auto& ubo_g = *(constant UBO_G*)spvBufferAliasSet2Binding1;
+    device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding0;
+    constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding1;
+    const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding0;
+    device auto& ssbo_b = (device SSBO_B&)(*spvDescriptorSet0.ssbo_a);
+    constant auto& ubo_d = (constant UBO_D&)(*spvDescriptorSet0.ubo_c);
+    const device auto& ssbo_b_readonly = (const device SSBO_BRO&)(*spvDescriptorSet0.ssbo_a);
+    const device auto& ssbo_bs = (device SSBO_Bs* const device (&)[4])spvDescriptorSet0.ssbo_as;
+    const device auto& ubo_ds = (constant UBO_Ds* const device (&)[4])spvDescriptorSet0.ubo_cs;
+    const device auto& ssbo_bs_readonly = (const device SSBO_BsRO* const device (&)[4])spvDescriptorSet0.ssbo_as;
+    (*spvDescriptorSet0.ssbo_a).data[gl_GlobalInvocationID.x] = (*spvDescriptorSet0.ubo_c).data[gl_WorkGroupID.x].x + _42.reg;
+    ssbo_b.data[gl_GlobalInvocationID.x] = ubo_d.data[gl_WorkGroupID.y].xy + ssbo_b_readonly.data[gl_GlobalInvocationID.x];
+    spvDescriptorSet0.ssbo_as[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = spvDescriptorSet0.ubo_cs[gl_WorkGroupID.x]->data[0].x;
+    ssbo_bs[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = ubo_ds[gl_WorkGroupID.x]->data[0].xy + ssbo_bs_readonly[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x];
+    ssbo_e.data[gl_GlobalInvocationID.x] = ubo_g.data[gl_WorkGroupID.x].x;
+    ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y].xy + ssbo_i.data[gl_GlobalInvocationID.x];
+}
+
diff --git a/reference/opt/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp b/reference/opt/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp
new file mode 100644
index 00000000000..9cef6b208f4
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp
@@ -0,0 +1,116 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO_A
+{
+    float data[1];
+};
+
+struct UBO_C
+{
+    float4 data[1024];
+};
+
+struct Registers
+{
+    float reg;
+};
+
+struct SSBO_B
+{
+    uint2 data[1];
+};
+
+struct UBO_D
+{
+    uint4 data[1024];
+};
+
+struct SSBO_BRO
+{
+    uint2 data[1];
+};
+
+struct SSBO_As
+{
+    float data[1];
+};
+
+struct UBO_Cs
+{
+    float4 data[1024];
+};
+
+struct SSBO_Bs
+{
+    uint2 data[1024];
+};
+
+struct UBO_Ds
+{
+    uint4 data[1024];
+};
+
+struct SSBO_BsRO
+{
+    uint2 data[1024];
+};
+
+struct SSBO_E
+{
+    float data[1];
+};
+
+struct UBO_G
+{
+    float4 data[1024];
+};
+
+struct SSBO_F
+{
+    uint2 data[1];
+};
+
+struct UBO_H
+{
+    uint4 data[1024];
+};
+
+struct SSBO_I
+{
+    uint2 data[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
+
+struct spvDescriptorSetBuffer0
+{
+    device SSBO_A* ssbo_a [[id(0)]];
+    constant UBO_C* ubo_c [[id(1)]];
+    device SSBO_As* ssbo_as [[id(2)]][4];
+    constant UBO_Cs* ubo_cs [[id(6)]][4];
+};
+
+kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& _42 [[buffer(1)]], device void* spvBufferAliasSet2Binding0 [[buffer(2)]], constant void* spvBufferAliasSet2Binding1 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
+{
+    device auto& ssbo_e = *(device SSBO_E*)spvBufferAliasSet2Binding0;
+    constant auto& ubo_g = *(constant UBO_G*)spvBufferAliasSet2Binding1;
+    device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding0;
+    constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding1;
+    const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding0;
+    device auto& ssbo_b = (device SSBO_B&)(*spvDescriptorSet0.ssbo_a);
+    constant auto& ubo_d = (constant UBO_D&)(*spvDescriptorSet0.ubo_c);
+    const device auto& ssbo_b_readonly = (const device SSBO_BRO&)(*spvDescriptorSet0.ssbo_a);
+    constant auto& ssbo_bs = (device SSBO_Bs* constant (&)[4])spvDescriptorSet0.ssbo_as;
+    constant auto& ubo_ds = (constant UBO_Ds* constant (&)[4])spvDescriptorSet0.ubo_cs;
+    constant auto& ssbo_bs_readonly = (const device SSBO_BsRO* constant (&)[4])spvDescriptorSet0.ssbo_as;
+    (*spvDescriptorSet0.ssbo_a).data[gl_GlobalInvocationID.x] = (*spvDescriptorSet0.ubo_c).data[gl_WorkGroupID.x].x + _42.reg;
+    ssbo_b.data[gl_GlobalInvocationID.x] = ubo_d.data[gl_WorkGroupID.y].xy + ssbo_b_readonly.data[gl_GlobalInvocationID.x];
+    spvDescriptorSet0.ssbo_as[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = spvDescriptorSet0.ubo_cs[gl_WorkGroupID.x]->data[0].x;
+    ssbo_bs[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = ubo_ds[gl_WorkGroupID.x]->data[0].xy + ssbo_bs_readonly[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x];
+    ssbo_e.data[gl_GlobalInvocationID.x] = ubo_g.data[gl_WorkGroupID.x].x;
+    ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y].xy + ssbo_i.data[gl_GlobalInvocationID.x];
+}
+
diff --git a/reference/opt/shaders-msl/comp/ray-query.nocompat.spv14.vk.comp b/reference/opt/shaders-msl/comp/ray-query.nocompat.spv14.vk.comp
new file mode 100644
index 00000000000..b03d524c25a
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/ray-query.nocompat.spv14.vk.comp
@@ -0,0 +1,91 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+using namespace metal::raytracing;
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Params
+{
+    uint ray_flags;
+    uint cull_mask;
+    char _m2_pad[8];
+    packed_float3 origin;
+    float tmin;
+    packed_float3 dir;
+    float tmax;
+    float thit;
+};
+
+kernel void main0(constant Params& _18 [[buffer(1)]], acceleration_structure<instancing> AS0 [[buffer(0)]], acceleration_structure<instancing> AS1 [[buffer(2)]])
+{
+    intersection_query<instancing, triangle_data> q;
+    intersection_params _intersection_params_;
+    q.reset(ray(_18.origin, _18.dir, _18.tmin, _18.tmax), AS0, _intersection_params_);
+    spvUnsafeArray<intersection_query<instancing, triangle_data>, 2> q2;
+    intersection_params _intersection_params_;
+    q2[1].reset(ray(_18.origin, _18.dir, _18.tmin, _18.tmax), AS1, _intersection_params_);
+    bool _63 = q.next();
+    q2[0].abort();
+    q.commit_bounding_box_intersection(_18.thit);
+    _14.commit_triangle_intersection();
+    float _71 = q.get_ray_min_distance();
+    float3 _74 = q.get_world_space_ray_origin();
+    float3 _75 = q.get_world_space_ray_direction();
+    uint _80 = (uint)q2[1].get_committed_intersection_type();
+    uint _83 = (uint)q2[0].get_committed_intersection_type();
+    bool _85 = q2[1].is_candidate_non_opaque_bounding_box();
+    float _87 = q2[1].get_committed_distance();
+    float _89 = q2[1].get_committed_distance();
+    int _92 = q.get_committed_user_instance_id();
+    int _94 = q2[0].get_committed_instance_id();
+    int _96 = q2[1].get_committed_geometry_id();
+    int _97 = q.get_committed_primitive_id();
+    float2 _100 = q2[0].get_committed_triangle_barycentric_coord();
+    bool _103 = q.is_committed_triangle_front_facing();
+    float3 _104 = q.get_committed_ray_direction();
+    float3 _106 = q2[0].get_committed_ray_origin();
+    float4x3 _110 = q.get_committed_object_to_world_transform();
+    float4x3 _112 = q2[1].get_committed_world_to_object_transform();
+}
+
diff --git a/reference/opt/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp b/reference/opt/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp
new file mode 100644
index 00000000000..dde7f47b085
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp
@@ -0,0 +1,71 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+#if __METAL_VERSION__ >= 230
+#include <metal_raytracing>
+using namespace metal::raytracing;
+#endif
+
+using namespace metal;
+
+struct Params
+{
+    uint ray_flags;
+    uint cull_mask;
+    char _m2_pad[8];
+    packed_float3 origin;
+    float tmin;
+    packed_float3 dir;
+    float tmax;
+    float thit;
+};
+
+kernel void main0(constant Params& _18 [[buffer(1)]], raytracing::acceleration_structure<raytracing::instancing> AS0 [[buffer(0)]], raytracing::acceleration_structure<raytracing::instancing> AS1 [[buffer(2)]])
+{
+    raytracing::intersection_query<raytracing::instancing, raytracing::triangle_data> q;
+    q.reset(ray(_18.origin, _18.dir, _18.tmin, _18.tmax), AS0, intersection_params());
+    raytracing::intersection_query<raytracing::instancing, raytracing::triangle_data> q2[2];
+    q2[1].reset(ray(_18.origin, _18.dir, _18.tmin, _18.tmax), AS1, intersection_params());
+    bool _63 = q.next();
+    bool res = _63;
+    q2[0].abort();
+    q.commit_bounding_box_intersection(_18.thit);
+    q2[1].commit_triangle_intersection();
+    float _71 = q.get_ray_min_distance();
+    float fval = _71;
+    float3 _74 = q.get_world_space_ray_direction();
+    float3 fvals = _74;
+    float3 _75 = q.get_world_space_ray_origin();
+    fvals = _75;
+    uint _80 = uint(q2[1].get_committed_intersection_type());
+    uint type = _80;
+    uint _83 = uint(q2[0].get_candidate_intersection_type()) - 1;
+    type = _83;
+    bool _85 = q2[1].is_candidate_non_opaque_bounding_box();
+    res = _85;
+    float _87 = q2[1].get_committed_distance();
+    fval = _87;
+    float _89 = q2[1].get_candidate_triangle_distance();
+    fval = _89;
+    int _92 = q.get_committed_user_instance_id();
+    int ival = _92;
+    int _94 = q2[0].get_candidate_instance_id();
+    ival = _94;
+    int _96 = q2[1].get_candidate_geometry_id();
+    ival = _96;
+    int _97 = q.get_committed_primitive_id();
+    ival = _97;
+    float2 _100 = q2[0].get_candidate_triangle_barycentric_coord();
+    fvals.x = _100.x;
+    fvals.y = _100.y;
+    bool _107 = q.is_committed_triangle_front_facing();
+    res = _107;
+    float3 _108 = q.get_candidate_ray_direction();
+    fvals = _108;
+    float3 _110 = q2[0].get_committed_ray_origin();
+    fvals = _110;
+    float4x3 _114 = q.get_candidate_object_to_world_transform();
+    float4x3 matrices = _114;
+    float4x3 _116 = q2[1].get_committed_world_to_object_transform();
+    matrices = _116;
+}
+
diff --git a/reference/opt/shaders-msl/comp/read-write-only.comp b/reference/opt/shaders-msl/comp/read-write-only.comp
index 7547b417d8f..0cf8d8e3215 100644
--- a/reference/opt/shaders-msl/comp/read-write-only.comp
+++ b/reference/opt/shaders-msl/comp/read-write-only.comp
@@ -21,6 +21,8 @@ struct SSBO1
     float4 data3;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO2& _10 [[buffer(0)]], const device SSBO0& _15 [[buffer(1)]], device SSBO1& _21 [[buffer(2)]])
 {
     _10.data4 = _15.data0 + _21.data2;
diff --git a/reference/opt/shaders-msl/comp/rmw-matrix.comp b/reference/opt/shaders-msl/comp/rmw-matrix.comp
index 150db7ede98..b53a3a75c27 100644
--- a/reference/opt/shaders-msl/comp/rmw-matrix.comp
+++ b/reference/opt/shaders-msl/comp/rmw-matrix.comp
@@ -13,6 +13,8 @@ struct SSBO
     float4x4 c1;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& _11 [[buffer(0)]])
 {
     _11.a *= _11.a1;
diff --git a/reference/opt/shaders-msl/comp/rmw-opt.comp b/reference/opt/shaders-msl/comp/rmw-opt.comp
index 05e1f6f283c..f93967da538 100644
--- a/reference/opt/shaders-msl/comp/rmw-opt.comp
+++ b/reference/opt/shaders-msl/comp/rmw-opt.comp
@@ -8,6 +8,8 @@ struct SSBO
     int a;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& _9 [[buffer(0)]])
 {
     _9.a += 10;
diff --git a/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
index 312a6f9453a..9bf87817747 100644
--- a/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
+++ b/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
@@ -10,12 +10,16 @@ struct SSBO
     float c;
     float d;
     float e;
+    float f;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& _9 [[buffer(0)]])
 {
     _9.c = abs(_9.a - _9.b);
     _9.d = abs(_9.a);
     _9.e = sign(_9.a);
+    _9.f = abs((_9.a - 1.0) - (_9.b - 2.0));
 }
 
diff --git a/reference/opt/shaders-msl/comp/shared-matrix-array-of-array.comp b/reference/opt/shaders-msl/comp/shared-matrix-array-of-array.comp
new file mode 100644
index 00000000000..0e17f95cb85
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/shared-matrix-array-of-array.comp
@@ -0,0 +1,1353 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T, int Cols, int Rows=Cols>
+struct spvStorageMatrix
+{
+    vec<T, Rows> columns[Cols];
+    
+    spvStorageMatrix() thread = default;
+    thread spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) thread
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const thread
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const thread
+    {
+        return columns[idx];
+    }
+    thread vec<T, Rows>& operator[](size_t idx) thread
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() constant = default;
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const constant
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const constant
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() device = default;
+    device spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) device
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const device
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const device
+    {
+        return columns[idx];
+    }
+    device vec<T, Rows>& operator[](size_t idx) device
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup
+    {
+        return columns[idx];
+    }
+    threadgroup vec<T, Rows>& operator[](size_t idx) threadgroup
+    {
+        return columns[idx];
+    }
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix() threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup_imageblock
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup_imageblock
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    threadgroup_imageblock vec<T, Rows>& operator[](size_t idx) threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix() ray_data = default;
+    ray_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) ray_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const ray_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const ray_data
+    {
+        return columns[idx];
+    }
+    ray_data vec<T, Rows>& operator[](size_t idx) ray_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix() object_data = default;
+    object_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) object_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default;
+    
+    operator matrix<T, Cols, Rows>() const object_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const object_data
+    {
+        return columns[idx];
+    }
+    object_data vec<T, Rows>& operator[](size_t idx) object_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+};
+
+template<typename T, int Cols, int Rows>
+matrix<T, Rows, Cols> transpose(spvStorageMatrix<T, Cols, Rows> m)
+{
+    return transpose(matrix<T, Cols, Rows>(m));
+}
+
+typedef spvStorageMatrix<half, 2, 2> spvStorage_half2x2;
+typedef spvStorageMatrix<half, 2, 3> spvStorage_half2x3;
+typedef spvStorageMatrix<half, 2, 4> spvStorage_half2x4;
+typedef spvStorageMatrix<half, 3, 2> spvStorage_half3x2;
+typedef spvStorageMatrix<half, 3, 3> spvStorage_half3x3;
+typedef spvStorageMatrix<half, 3, 4> spvStorage_half3x4;
+typedef spvStorageMatrix<half, 4, 2> spvStorage_half4x2;
+typedef spvStorageMatrix<half, 4, 3> spvStorage_half4x3;
+typedef spvStorageMatrix<half, 4, 4> spvStorage_half4x4;
+typedef spvStorageMatrix<float, 2, 2> spvStorage_float2x2;
+typedef spvStorageMatrix<float, 2, 3> spvStorage_float2x3;
+typedef spvStorageMatrix<float, 2, 4> spvStorage_float2x4;
+typedef spvStorageMatrix<float, 3, 2> spvStorage_float3x2;
+typedef spvStorageMatrix<float, 3, 3> spvStorage_float3x3;
+typedef spvStorageMatrix<float, 3, 4> spvStorage_float3x4;
+typedef spvStorageMatrix<float, 4, 2> spvStorage_float4x2;
+typedef spvStorageMatrix<float, 4, 3> spvStorage_float4x3;
+typedef spvStorageMatrix<float, 4, 4> spvStorage_float4x4;
+
+struct S1
+{
+    spvStorage_float4x3 a[2];
+    float b;
+    spvUnsafeArray<float2, 3> c;
+};
+
+struct S2
+{
+    int4 a;
+    spvUnsafeArray<spvUnsafeArray<spvUnsafeArray<short, 3>, 1>, 3> b;
+};
+
+struct block
+{
+    uint passed;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device block& _383 [[buffer(0)]])
+{
+    threadgroup S1 s1;
+    threadgroup S2 s2;
+    s1.a[0] = spvStorage_float4x3(float4x3(float3(0.0, 2.0, -8.0), float3(6.0, 7.0, 5.0), float3(-6.0, 1.0, 9.0), float3(-4.0, -3.0, 4.0)));
+    s1.a[1] = spvStorage_float4x3(float4x3(float3(4.0, 9.0, -9.0), float3(-8.0, -9.0, 8.0), float3(0.0, 4.0, -4.0), float3(7.0, 2.0, -1.0)));
+    s1.b = 7.0;
+    s1.c[0] = float2(-5.0, -4.0);
+    s1.c[1] = float2(3.0, -5.0);
+    s1.c[2] = float2(-3.0, -1.0);
+    s2.a = int4(1, 0, -3, 1);
+    s2.b[0][0][0] = short(true);
+    s2.b[0][0][1] = short(false);
+    s2.b[0][0][2] = short(false);
+    s2.b[1][0][0] = short(true);
+    s2.b[1][0][1] = short(false);
+    s2.b[1][0][2] = short(true);
+    s2.b[2][0][0] = short(false);
+    s2.b[2][0][1] = short(true);
+    s2.b[2][0][2] = short(true);
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    bool _464 = abs(-float4x3(s1.a[0])[0].x) < 0.0500000007450580596923828125;
+    bool _449;
+    if (_464)
+    {
+        _449 = abs(2.0 - float4x3(s1.a[0])[0].y) < 0.0500000007450580596923828125;
+    }
+    else
+    {
+        _449 = _464;
+    }
+    bool _457;
+    if (_449)
+    {
+        _457 = abs((-8.0) - float4x3(s1.a[0])[0].z) < 0.0500000007450580596923828125;
+    }
+    else
+    {
+        _457 = _449;
+    }
+    bool _412;
+    if (_457)
+    {
+        bool _514 = abs(6.0 - float4x3(s1.a[0])[1].x) < 0.0500000007450580596923828125;
+        bool _499;
+        if (_514)
+        {
+            _499 = abs(7.0 - float4x3(s1.a[0])[1].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _499 = _514;
+        }
+        bool _507;
+        if (_499)
+        {
+            _507 = abs(5.0 - float4x3(s1.a[0])[1].z) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _507 = _499;
+        }
+        _412 = _507;
+    }
+    else
+    {
+        _412 = _457;
+    }
+    bool _420;
+    if (_412)
+    {
+        bool _564 = abs((-6.0) - float4x3(s1.a[0])[2].x) < 0.0500000007450580596923828125;
+        bool _549;
+        if (_564)
+        {
+            _549 = abs(1.0 - float4x3(s1.a[0])[2].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _549 = _564;
+        }
+        bool _557;
+        if (_549)
+        {
+            _557 = abs(9.0 - float4x3(s1.a[0])[2].z) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _557 = _549;
+        }
+        _420 = _557;
+    }
+    else
+    {
+        _420 = _412;
+    }
+    bool _428;
+    if (_420)
+    {
+        bool _614 = abs((-4.0) - float4x3(s1.a[0])[3].x) < 0.0500000007450580596923828125;
+        bool _599;
+        if (_614)
+        {
+            _599 = abs((-3.0) - float4x3(s1.a[0])[3].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _599 = _614;
+        }
+        bool _607;
+        if (_599)
+        {
+            _607 = abs(4.0 - float4x3(s1.a[0])[3].z) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _607 = _599;
+        }
+        _428 = _607;
+    }
+    else
+    {
+        _428 = _420;
+    }
+    bool _251;
+    if (_428)
+    {
+        bool _703 = abs(4.0 - float4x3(s1.a[1])[0].x) < 0.0500000007450580596923828125;
+        bool _688;
+        if (_703)
+        {
+            _688 = abs(9.0 - float4x3(s1.a[1])[0].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _688 = _703;
+        }
+        bool _696;
+        if (_688)
+        {
+            _696 = abs((-9.0) - float4x3(s1.a[1])[0].z) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _696 = _688;
+        }
+        bool _651;
+        if (_696)
+        {
+            bool _753 = abs((-8.0) - float4x3(s1.a[1])[1].x) < 0.0500000007450580596923828125;
+            bool _738;
+            if (_753)
+            {
+                _738 = abs((-9.0) - float4x3(s1.a[1])[1].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _738 = _753;
+            }
+            bool _746;
+            if (_738)
+            {
+                _746 = abs(8.0 - float4x3(s1.a[1])[1].z) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _746 = _738;
+            }
+            _651 = _746;
+        }
+        else
+        {
+            _651 = _696;
+        }
+        bool _659;
+        if (_651)
+        {
+            bool _803 = abs(-float4x3(s1.a[1])[2].x) < 0.0500000007450580596923828125;
+            bool _788;
+            if (_803)
+            {
+                _788 = abs(4.0 - float4x3(s1.a[1])[2].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _788 = _803;
+            }
+            bool _796;
+            if (_788)
+            {
+                _796 = abs((-4.0) - float4x3(s1.a[1])[2].z) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _796 = _788;
+            }
+            _659 = _796;
+        }
+        else
+        {
+            _659 = _651;
+        }
+        bool _667;
+        if (_659)
+        {
+            bool _853 = abs(7.0 - float4x3(s1.a[1])[3].x) < 0.0500000007450580596923828125;
+            bool _838;
+            if (_853)
+            {
+                _838 = abs(2.0 - float4x3(s1.a[1])[3].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _838 = _853;
+            }
+            bool _846;
+            if (_838)
+            {
+                _846 = abs((-1.0) - float4x3(s1.a[1])[3].z) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _846 = _838;
+            }
+            _667 = _846;
+        }
+        else
+        {
+            _667 = _659;
+        }
+        _251 = _667;
+    }
+    else
+    {
+        _251 = _428;
+    }
+    bool _260;
+    if (_251)
+    {
+        _260 = abs(7.0 - s1.b) < 0.0500000007450580596923828125;
+    }
+    else
+    {
+        _260 = _251;
+    }
+    bool _269;
+    if (_260)
+    {
+        bool _900 = abs((-5.0) - s1.c[0].x) < 0.0500000007450580596923828125;
+        bool _893;
+        if (_900)
+        {
+            _893 = abs((-4.0) - s1.c[0].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _893 = _900;
+        }
+        _269 = _893;
+    }
+    else
+    {
+        _269 = _260;
+    }
+    bool _278;
+    if (_269)
+    {
+        bool _933 = abs(3.0 - s1.c[1].x) < 0.0500000007450580596923828125;
+        bool _926;
+        if (_933)
+        {
+            _926 = abs((-5.0) - s1.c[1].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _926 = _933;
+        }
+        _278 = _926;
+    }
+    else
+    {
+        _278 = _269;
+    }
+    bool _287;
+    if (_278)
+    {
+        bool _966 = abs((-3.0) - s1.c[2].x) < 0.0500000007450580596923828125;
+        bool _959;
+        if (_966)
+        {
+            _959 = abs((-1.0) - s1.c[2].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _959 = _966;
+        }
+        _287 = _959;
+    }
+    else
+    {
+        _287 = _278;
+    }
+    bool _296;
+    if (_287)
+    {
+        _296 = all(int4(1, 0, -3, 1) == s2.a);
+    }
+    else
+    {
+        _296 = _287;
+    }
+    bool _305;
+    if (_296)
+    {
+        _305 = true == bool(s2.b[0][0][0]);
+    }
+    else
+    {
+        _305 = _296;
+    }
+    bool _314;
+    if (_305)
+    {
+        _314 = false == bool(s2.b[0][0][1]);
+    }
+    else
+    {
+        _314 = _305;
+    }
+    bool _323;
+    if (_314)
+    {
+        _323 = false == bool(s2.b[0][0][2]);
+    }
+    else
+    {
+        _323 = _314;
+    }
+    bool _332;
+    if (_323)
+    {
+        _332 = true == bool(s2.b[1][0][0]);
+    }
+    else
+    {
+        _332 = _323;
+    }
+    bool _341;
+    if (_332)
+    {
+        _341 = false == bool(s2.b[1][0][1]);
+    }
+    else
+    {
+        _341 = _332;
+    }
+    bool _350;
+    if (_341)
+    {
+        _350 = true == bool(s2.b[1][0][2]);
+    }
+    else
+    {
+        _350 = _341;
+    }
+    bool _359;
+    if (_350)
+    {
+        _359 = false == bool(s2.b[2][0][0]);
+    }
+    else
+    {
+        _359 = _350;
+    }
+    bool _368;
+    if (_359)
+    {
+        _368 = true == bool(s2.b[2][0][1]);
+    }
+    else
+    {
+        _368 = _359;
+    }
+    bool _377;
+    if (_368)
+    {
+        _377 = true == bool(s2.b[2][0][2]);
+    }
+    else
+    {
+        _377 = _368;
+    }
+    if (_377)
+    {
+        _383.passed++;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/comp/shared-matrix-cast.comp b/reference/opt/shaders-msl/comp/shared-matrix-cast.comp
new file mode 100644
index 00000000000..32c8e823d4e
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/shared-matrix-cast.comp
@@ -0,0 +1,1017 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, int Cols, int Rows=Cols>
+struct spvStorageMatrix
+{
+    vec<T, Rows> columns[Cols];
+    
+    spvStorageMatrix() thread = default;
+    thread spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) thread
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const thread
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const thread
+    {
+        return columns[idx];
+    }
+    thread vec<T, Rows>& operator[](size_t idx) thread
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() constant = default;
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const constant
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const constant
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() device = default;
+    device spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) device
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const device
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const device
+    {
+        return columns[idx];
+    }
+    device vec<T, Rows>& operator[](size_t idx) device
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup
+    {
+        return columns[idx];
+    }
+    threadgroup vec<T, Rows>& operator[](size_t idx) threadgroup
+    {
+        return columns[idx];
+    }
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix() threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup_imageblock
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup_imageblock
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    threadgroup_imageblock vec<T, Rows>& operator[](size_t idx) threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix() ray_data = default;
+    ray_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) ray_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const ray_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const ray_data
+    {
+        return columns[idx];
+    }
+    ray_data vec<T, Rows>& operator[](size_t idx) ray_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix() object_data = default;
+    object_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) object_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default;
+    
+    operator matrix<T, Cols, Rows>() const object_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const object_data
+    {
+        return columns[idx];
+    }
+    object_data vec<T, Rows>& operator[](size_t idx) object_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+};
+
+template<typename T, int Cols, int Rows>
+matrix<T, Rows, Cols> transpose(spvStorageMatrix<T, Cols, Rows> m)
+{
+    return transpose(matrix<T, Cols, Rows>(m));
+}
+
+typedef spvStorageMatrix<half, 2, 2> spvStorage_half2x2;
+typedef spvStorageMatrix<half, 2, 3> spvStorage_half2x3;
+typedef spvStorageMatrix<half, 2, 4> spvStorage_half2x4;
+typedef spvStorageMatrix<half, 3, 2> spvStorage_half3x2;
+typedef spvStorageMatrix<half, 3, 3> spvStorage_half3x3;
+typedef spvStorageMatrix<half, 3, 4> spvStorage_half3x4;
+typedef spvStorageMatrix<half, 4, 2> spvStorage_half4x2;
+typedef spvStorageMatrix<half, 4, 3> spvStorage_half4x3;
+typedef spvStorageMatrix<half, 4, 4> spvStorage_half4x4;
+typedef spvStorageMatrix<float, 2, 2> spvStorage_float2x2;
+typedef spvStorageMatrix<float, 2, 3> spvStorage_float2x3;
+typedef spvStorageMatrix<float, 2, 4> spvStorage_float2x4;
+typedef spvStorageMatrix<float, 3, 2> spvStorage_float3x2;
+typedef spvStorageMatrix<float, 3, 3> spvStorage_float3x3;
+typedef spvStorageMatrix<float, 3, 4> spvStorage_float3x4;
+typedef spvStorageMatrix<float, 4, 2> spvStorage_float4x2;
+typedef spvStorageMatrix<float, 4, 3> spvStorage_float4x3;
+typedef spvStorageMatrix<float, 4, 4> spvStorage_float4x4;
+
+struct S1
+{
+    float4 a;
+    spvStorage_float3x2 b;
+    short4 c;
+};
+
+struct block
+{
+    uint passed;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device block& _212 [[buffer(0)]])
+{
+    threadgroup S1 s1;
+    s1.a = float4(1.0, -5.0, -9.0, -5.0);
+    s1.b = spvStorage_float3x2(float3x2(float2(1.0, -7.0), float2(1.0, 2.0), float2(8.0, 7.0)));
+    s1.c = short4(bool4(false, true, false, false));
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    bool _264 = abs(1.0 - s1.a.x) < 0.0500000007450580596923828125;
+    bool _241;
+    if (_264)
+    {
+        _241 = abs((-5.0) - s1.a.y) < 0.0500000007450580596923828125;
+    }
+    else
+    {
+        _241 = _264;
+    }
+    bool _249;
+    if (_241)
+    {
+        _249 = abs((-9.0) - s1.a.z) < 0.0500000007450580596923828125;
+    }
+    else
+    {
+        _249 = _241;
+    }
+    bool _257;
+    if (_249)
+    {
+        _257 = abs((-5.0) - s1.a.w) < 0.0500000007450580596923828125;
+    }
+    else
+    {
+        _257 = _249;
+    }
+    bool _197;
+    if (_257)
+    {
+        bool _340 = abs(1.0 - float3x2(s1.b)[0].x) < 0.0500000007450580596923828125;
+        bool _333;
+        if (_340)
+        {
+            _333 = abs((-7.0) - float3x2(s1.b)[0].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _333 = _340;
+        }
+        bool _306;
+        if (_333)
+        {
+            bool _373 = abs(1.0 - float3x2(s1.b)[1].x) < 0.0500000007450580596923828125;
+            bool _366;
+            if (_373)
+            {
+                _366 = abs(2.0 - float3x2(s1.b)[1].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _366 = _373;
+            }
+            _306 = _366;
+        }
+        else
+        {
+            _306 = _333;
+        }
+        bool _314;
+        if (_306)
+        {
+            bool _406 = abs(8.0 - float3x2(s1.b)[2].x) < 0.0500000007450580596923828125;
+            bool _399;
+            if (_406)
+            {
+                _399 = abs(7.0 - float3x2(s1.b)[2].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _399 = _406;
+            }
+            _314 = _399;
+        }
+        else
+        {
+            _314 = _306;
+        }
+        _197 = _314;
+    }
+    else
+    {
+        _197 = _257;
+    }
+    bool _206;
+    if (_197)
+    {
+        _206 = all(bool4(false, true, false, false) == bool4(s1.c));
+    }
+    else
+    {
+        _206 = _197;
+    }
+    if (_206)
+    {
+        _212.passed++;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/comp/shared-matrix-nested-struct-array.comp b/reference/opt/shaders-msl/comp/shared-matrix-nested-struct-array.comp
new file mode 100644
index 00000000000..dfbd7a76664
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/shared-matrix-nested-struct-array.comp
@@ -0,0 +1,1369 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T, int Cols, int Rows=Cols>
+struct spvStorageMatrix
+{
+    vec<T, Rows> columns[Cols];
+    
+    spvStorageMatrix() thread = default;
+    thread spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) thread
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const thread
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const thread
+    {
+        return columns[idx];
+    }
+    thread vec<T, Rows>& operator[](size_t idx) thread
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() constant = default;
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const constant
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const constant
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() device = default;
+    device spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) device
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const device
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const device
+    {
+        return columns[idx];
+    }
+    device vec<T, Rows>& operator[](size_t idx) device
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup
+    {
+        return columns[idx];
+    }
+    threadgroup vec<T, Rows>& operator[](size_t idx) threadgroup
+    {
+        return columns[idx];
+    }
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix() threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup_imageblock
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup_imageblock
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    threadgroup_imageblock vec<T, Rows>& operator[](size_t idx) threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix() ray_data = default;
+    ray_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) ray_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const ray_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const ray_data
+    {
+        return columns[idx];
+    }
+    ray_data vec<T, Rows>& operator[](size_t idx) ray_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix() object_data = default;
+    object_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) object_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default;
+    
+    operator matrix<T, Cols, Rows>() const object_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const object_data
+    {
+        return columns[idx];
+    }
+    object_data vec<T, Rows>& operator[](size_t idx) object_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+};
+
+template<typename T, int Cols, int Rows>
+matrix<T, Rows, Cols> transpose(spvStorageMatrix<T, Cols, Rows> m)
+{
+    return transpose(matrix<T, Cols, Rows>(m));
+}
+
+typedef spvStorageMatrix<half, 2, 2> spvStorage_half2x2;
+typedef spvStorageMatrix<half, 2, 3> spvStorage_half2x3;
+typedef spvStorageMatrix<half, 2, 4> spvStorage_half2x4;
+typedef spvStorageMatrix<half, 3, 2> spvStorage_half3x2;
+typedef spvStorageMatrix<half, 3, 3> spvStorage_half3x3;
+typedef spvStorageMatrix<half, 3, 4> spvStorage_half3x4;
+typedef spvStorageMatrix<half, 4, 2> spvStorage_half4x2;
+typedef spvStorageMatrix<half, 4, 3> spvStorage_half4x3;
+typedef spvStorageMatrix<half, 4, 4> spvStorage_half4x4;
+typedef spvStorageMatrix<float, 2, 2> spvStorage_float2x2;
+typedef spvStorageMatrix<float, 2, 3> spvStorage_float2x3;
+typedef spvStorageMatrix<float, 2, 4> spvStorage_float2x4;
+typedef spvStorageMatrix<float, 3, 2> spvStorage_float3x2;
+typedef spvStorageMatrix<float, 3, 3> spvStorage_float3x3;
+typedef spvStorageMatrix<float, 3, 4> spvStorage_float3x4;
+typedef spvStorageMatrix<float, 4, 2> spvStorage_float4x2;
+typedef spvStorageMatrix<float, 4, 3> spvStorage_float4x3;
+typedef spvStorageMatrix<float, 4, 4> spvStorage_float4x4;
+
+struct sA
+{
+    spvStorage_float2x3 mA;
+};
+
+struct sB
+{
+    spvStorage_float2x2 mA;
+    spvStorage_float3x2 mB;
+    uint3 mC;
+};
+
+struct sC
+{
+    sA mA;
+    sB mB;
+};
+
+struct sD
+{
+    sC mA;
+};
+
+struct sE
+{
+    spvStorage_float3x2 mA;
+    spvStorage_float4x3 mB;
+};
+
+struct sF
+{
+    sE mA;
+};
+
+struct sG
+{
+    sF mA;
+};
+
+struct sH
+{
+    spvUnsafeArray<short3, 2> mA;
+};
+
+struct S1
+{
+    sD a;
+    sG b;
+    spvUnsafeArray<sH, 2> c;
+};
+
+struct block
+{
+    uint passed;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device block& _424 [[buffer(0)]])
+{
+    threadgroup S1 s1;
+    s1.a.mA.mA.mA = spvStorage_float2x3(float2x3(float3(6.0, 8.0, 8.0), float3(0.0, -4.0, -5.0)));
+    s1.a.mA.mB.mA = spvStorage_float2x2(float2x2(float2(9.0, -4.0), float2(-6.0, -1.0)));
+    s1.a.mA.mB.mB = spvStorage_float3x2(float3x2(float2(-1.0, -2.0), float2(1.0, 6.0), float2(5.0, 7.0)));
+    s1.a.mA.mB.mC = uint3(3u, 1u, 5u);
+    s1.b.mA.mA.mA = spvStorage_float3x2(float3x2(float2(8.0, 3.0), float2(0.0, 2.0), float2(1.0, 8.0)));
+    s1.b.mA.mA.mB = spvStorage_float4x3(float4x3(float3(0.0, 9.0, -1.0), float3(-1.0, -7.0, 7.0), float3(-4.0, -3.0, 1.0), float3(-4.0, -9.0, 1.0)));
+    s1.c[0].mA[0] = short3(bool3(true, false, false));
+    s1.c[0].mA[1] = short3(bool3(true, false, false));
+    s1.c[1].mA[0] = short3(bool3(false));
+    s1.c[1].mA[1] = short3(bool3(false));
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    bool _484 = abs(6.0 - float2x3(s1.a.mA.mA.mA)[0].x) < 0.0500000007450580596923828125;
+    bool _469;
+    if (_484)
+    {
+        _469 = abs(8.0 - float2x3(s1.a.mA.mA.mA)[0].y) < 0.0500000007450580596923828125;
+    }
+    else
+    {
+        _469 = _484;
+    }
+    bool _477;
+    if (_469)
+    {
+        _477 = abs(8.0 - float2x3(s1.a.mA.mA.mA)[0].z) < 0.0500000007450580596923828125;
+    }
+    else
+    {
+        _477 = _469;
+    }
+    bool _448;
+    if (_477)
+    {
+        bool _534 = abs(-float2x3(s1.a.mA.mA.mA)[1].x) < 0.0500000007450580596923828125;
+        bool _519;
+        if (_534)
+        {
+            _519 = abs((-4.0) - float2x3(s1.a.mA.mA.mA)[1].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _519 = _534;
+        }
+        bool _527;
+        if (_519)
+        {
+            _527 = abs((-5.0) - float2x3(s1.a.mA.mA.mA)[1].z) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _527 = _519;
+        }
+        _448 = _527;
+    }
+    else
+    {
+        _448 = _477;
+    }
+    bool _346;
+    if (_448)
+    {
+        bool _593 = abs(9.0 - float2x2(s1.a.mA.mB.mA)[0].x) < 0.0500000007450580596923828125;
+        bool _586;
+        if (_593)
+        {
+            _586 = abs((-4.0) - float2x2(s1.a.mA.mB.mA)[0].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _586 = _593;
+        }
+        bool _567;
+        if (_586)
+        {
+            bool _626 = abs((-6.0) - float2x2(s1.a.mA.mB.mA)[1].x) < 0.0500000007450580596923828125;
+            bool _619;
+            if (_626)
+            {
+                _619 = abs((-1.0) - float2x2(s1.a.mA.mB.mA)[1].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _619 = _626;
+            }
+            _567 = _619;
+        }
+        else
+        {
+            _567 = _586;
+        }
+        _346 = _567;
+    }
+    else
+    {
+        _346 = _448;
+    }
+    bool _355;
+    if (_346)
+    {
+        bool _688 = abs((-1.0) - float3x2(s1.a.mA.mB.mB)[0].x) < 0.0500000007450580596923828125;
+        bool _681;
+        if (_688)
+        {
+            _681 = abs((-2.0) - float3x2(s1.a.mA.mB.mB)[0].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _681 = _688;
+        }
+        bool _654;
+        if (_681)
+        {
+            bool _721 = abs(1.0 - float3x2(s1.a.mA.mB.mB)[1].x) < 0.0500000007450580596923828125;
+            bool _714;
+            if (_721)
+            {
+                _714 = abs(6.0 - float3x2(s1.a.mA.mB.mB)[1].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _714 = _721;
+            }
+            _654 = _714;
+        }
+        else
+        {
+            _654 = _681;
+        }
+        bool _662;
+        if (_654)
+        {
+            bool _754 = abs(5.0 - float3x2(s1.a.mA.mB.mB)[2].x) < 0.0500000007450580596923828125;
+            bool _747;
+            if (_754)
+            {
+                _747 = abs(7.0 - float3x2(s1.a.mA.mB.mB)[2].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _747 = _754;
+            }
+            _662 = _747;
+        }
+        else
+        {
+            _662 = _654;
+        }
+        _355 = _662;
+    }
+    else
+    {
+        _355 = _346;
+    }
+    bool _364;
+    if (_355)
+    {
+        _364 = all(uint3(3u, 1u, 5u) == s1.a.mA.mB.mC);
+    }
+    else
+    {
+        _364 = _355;
+    }
+    bool _373;
+    if (_364)
+    {
+        bool _822 = abs(8.0 - float3x2(s1.b.mA.mA.mA)[0].x) < 0.0500000007450580596923828125;
+        bool _815;
+        if (_822)
+        {
+            _815 = abs(3.0 - float3x2(s1.b.mA.mA.mA)[0].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _815 = _822;
+        }
+        bool _788;
+        if (_815)
+        {
+            bool _855 = abs(-float3x2(s1.b.mA.mA.mA)[1].x) < 0.0500000007450580596923828125;
+            bool _848;
+            if (_855)
+            {
+                _848 = abs(2.0 - float3x2(s1.b.mA.mA.mA)[1].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _848 = _855;
+            }
+            _788 = _848;
+        }
+        else
+        {
+            _788 = _815;
+        }
+        bool _796;
+        if (_788)
+        {
+            bool _888 = abs(1.0 - float3x2(s1.b.mA.mA.mA)[2].x) < 0.0500000007450580596923828125;
+            bool _881;
+            if (_888)
+            {
+                _881 = abs(8.0 - float3x2(s1.b.mA.mA.mA)[2].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _881 = _888;
+            }
+            _796 = _881;
+        }
+        else
+        {
+            _796 = _788;
+        }
+        _373 = _796;
+    }
+    else
+    {
+        _373 = _364;
+    }
+    bool _382;
+    if (_373)
+    {
+        bool _970 = abs(-float4x3(s1.b.mA.mA.mB)[0].x) < 0.0500000007450580596923828125;
+        bool _955;
+        if (_970)
+        {
+            _955 = abs(9.0 - float4x3(s1.b.mA.mA.mB)[0].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _955 = _970;
+        }
+        bool _963;
+        if (_955)
+        {
+            _963 = abs((-1.0) - float4x3(s1.b.mA.mA.mB)[0].z) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _963 = _955;
+        }
+        bool _918;
+        if (_963)
+        {
+            bool _1020 = abs((-1.0) - float4x3(s1.b.mA.mA.mB)[1].x) < 0.0500000007450580596923828125;
+            bool _1005;
+            if (_1020)
+            {
+                _1005 = abs((-7.0) - float4x3(s1.b.mA.mA.mB)[1].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _1005 = _1020;
+            }
+            bool _1013;
+            if (_1005)
+            {
+                _1013 = abs(7.0 - float4x3(s1.b.mA.mA.mB)[1].z) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _1013 = _1005;
+            }
+            _918 = _1013;
+        }
+        else
+        {
+            _918 = _963;
+        }
+        bool _926;
+        if (_918)
+        {
+            bool _1070 = abs((-4.0) - float4x3(s1.b.mA.mA.mB)[2].x) < 0.0500000007450580596923828125;
+            bool _1055;
+            if (_1070)
+            {
+                _1055 = abs((-3.0) - float4x3(s1.b.mA.mA.mB)[2].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _1055 = _1070;
+            }
+            bool _1063;
+            if (_1055)
+            {
+                _1063 = abs(1.0 - float4x3(s1.b.mA.mA.mB)[2].z) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _1063 = _1055;
+            }
+            _926 = _1063;
+        }
+        else
+        {
+            _926 = _918;
+        }
+        bool _934;
+        if (_926)
+        {
+            bool _1120 = abs((-4.0) - float4x3(s1.b.mA.mA.mB)[3].x) < 0.0500000007450580596923828125;
+            bool _1105;
+            if (_1120)
+            {
+                _1105 = abs((-9.0) - float4x3(s1.b.mA.mA.mB)[3].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _1105 = _1120;
+            }
+            bool _1113;
+            if (_1105)
+            {
+                _1113 = abs(1.0 - float4x3(s1.b.mA.mA.mB)[3].z) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _1113 = _1105;
+            }
+            _934 = _1113;
+        }
+        else
+        {
+            _934 = _926;
+        }
+        _382 = _934;
+    }
+    else
+    {
+        _382 = _373;
+    }
+    bool _391;
+    if (_382)
+    {
+        _391 = all(bool3(true, false, false) == bool3(s1.c[0].mA[0]));
+    }
+    else
+    {
+        _391 = _382;
+    }
+    bool _400;
+    if (_391)
+    {
+        _400 = all(bool3(true, false, false) == bool3(s1.c[0].mA[1]));
+    }
+    else
+    {
+        _400 = _391;
+    }
+    bool _409;
+    if (_400)
+    {
+        _409 = all(bool3(false) == bool3(s1.c[1].mA[0]));
+    }
+    else
+    {
+        _409 = _400;
+    }
+    bool _418;
+    if (_409)
+    {
+        _418 = all(bool3(false) == bool3(s1.c[1].mA[1]));
+    }
+    else
+    {
+        _418 = _409;
+    }
+    if (_418)
+    {
+        _424.passed++;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/comp/shared-matrix-nested-struct.comp b/reference/opt/shaders-msl/comp/shared-matrix-nested-struct.comp
new file mode 100644
index 00000000000..6565536651d
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/shared-matrix-nested-struct.comp
@@ -0,0 +1,1443 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, int Cols, int Rows=Cols>
+struct spvStorageMatrix
+{
+    vec<T, Rows> columns[Cols];
+    
+    spvStorageMatrix() thread = default;
+    thread spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) thread
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const thread
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const thread
+    {
+        return columns[idx];
+    }
+    thread vec<T, Rows>& operator[](size_t idx) thread
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() constant = default;
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const constant
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const constant
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() device = default;
+    device spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) device
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const device
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const device
+    {
+        return columns[idx];
+    }
+    device vec<T, Rows>& operator[](size_t idx) device
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup
+    {
+        return columns[idx];
+    }
+    threadgroup vec<T, Rows>& operator[](size_t idx) threadgroup
+    {
+        return columns[idx];
+    }
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix() threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup_imageblock
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup_imageblock
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    threadgroup_imageblock vec<T, Rows>& operator[](size_t idx) threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix() ray_data = default;
+    ray_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) ray_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const ray_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const ray_data
+    {
+        return columns[idx];
+    }
+    ray_data vec<T, Rows>& operator[](size_t idx) ray_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix() object_data = default;
+    object_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) object_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default;
+    
+    operator matrix<T, Cols, Rows>() const object_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const object_data
+    {
+        return columns[idx];
+    }
+    object_data vec<T, Rows>& operator[](size_t idx) object_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+};
+
+template<typename T, int Cols, int Rows>
+matrix<T, Rows, Cols> transpose(spvStorageMatrix<T, Cols, Rows> m)
+{
+    return transpose(matrix<T, Cols, Rows>(m));
+}
+
+typedef spvStorageMatrix<half, 2, 2> spvStorage_half2x2;
+typedef spvStorageMatrix<half, 2, 3> spvStorage_half2x3;
+typedef spvStorageMatrix<half, 2, 4> spvStorage_half2x4;
+typedef spvStorageMatrix<half, 3, 2> spvStorage_half3x2;
+typedef spvStorageMatrix<half, 3, 3> spvStorage_half3x3;
+typedef spvStorageMatrix<half, 3, 4> spvStorage_half3x4;
+typedef spvStorageMatrix<half, 4, 2> spvStorage_half4x2;
+typedef spvStorageMatrix<half, 4, 3> spvStorage_half4x3;
+typedef spvStorageMatrix<half, 4, 4> spvStorage_half4x4;
+typedef spvStorageMatrix<float, 2, 2> spvStorage_float2x2;
+typedef spvStorageMatrix<float, 2, 3> spvStorage_float2x3;
+typedef spvStorageMatrix<float, 2, 4> spvStorage_float2x4;
+typedef spvStorageMatrix<float, 3, 2> spvStorage_float3x2;
+typedef spvStorageMatrix<float, 3, 3> spvStorage_float3x3;
+typedef spvStorageMatrix<float, 3, 4> spvStorage_float3x4;
+typedef spvStorageMatrix<float, 4, 2> spvStorage_float4x2;
+typedef spvStorageMatrix<float, 4, 3> spvStorage_float4x3;
+typedef spvStorageMatrix<float, 4, 4> spvStorage_float4x4;
+
+struct S1
+{
+    uint a;
+    float4 b;
+};
+
+struct sA
+{
+    spvStorage_float4x4 mA;
+    short3 mB;
+    short4 mC;
+};
+
+struct sB
+{
+    short2 mA;
+};
+
+struct sC
+{
+    float mA;
+    uint4 mB;
+    float mC;
+};
+
+struct sD
+{
+    sA mA;
+    sB mB;
+    sC mC;
+};
+
+struct sE
+{
+    sD mA;
+};
+
+struct sF
+{
+    uint3 mA;
+    short mB;
+};
+
+struct sG
+{
+    sF mA;
+    spvStorage_float3x2 mB;
+};
+
+struct sH
+{
+    sG mA;
+    float2 mB;
+};
+
+struct sI
+{
+    spvStorage_float2x2 mA;
+    short3 mB;
+    short4 mC;
+};
+
+struct sJ
+{
+    sI mA;
+    short3 mB;
+};
+
+struct sK
+{
+    short2 mA;
+    sJ mB;
+    int2 mC;
+};
+
+struct S2
+{
+    sE a;
+    int3 b;
+    sH c;
+    sK d;
+};
+
+struct block
+{
+    uint passed;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device block& _612 [[buffer(0)]])
+{
+    threadgroup S1 s1;
+    threadgroup S2 s2;
+    s1.a = 0u;
+    s1.b = float4(8.0, 8.0, 0.0, -4.0);
+    s2.a.mA.mA.mA = spvStorage_float4x4(float4x4(float4(-5.0, 9.0, -4.0, -6.0), float4(-1.0, -1.0, -2.0, 1.0), float4(6.0, 5.0, 7.0, -2.0), float4(-4.0, -9.0, 8.0, 3.0)));
+    s2.a.mA.mA.mB = short3(bool3(true, false, false));
+    s2.a.mA.mA.mC = short4(bool4(true, true, true, false));
+    s2.a.mA.mB.mA = short2(bool2(true));
+    s2.a.mA.mC.mA = 7.0;
+    s2.a.mA.mC.mB = uint4(8u, 6u, 2u, 0u);
+    s2.a.mA.mC.mC = -9.0;
+    s2.b = int3(1, -4, 0);
+    s2.c.mA.mA.mA = uint3(4u, 9u, 1u);
+    s2.c.mA.mA.mB = short(false);
+    s2.c.mA.mB = spvStorage_float3x2(float3x2(float2(3.0, -5.0), float2(-1.0, -5.0), float2(-1.0, -9.0)));
+    s2.c.mB = float2(-6.0, -9.0);
+    s2.d.mA = short2(bool2(true, false));
+    s2.d.mB.mA.mA = spvStorage_float2x2(float2x2(float2(-2.0, 3.0), float2(7.0, 2.0)));
+    s2.d.mB.mA.mB = short3(bool3(false));
+    s2.d.mB.mA.mC = short4(bool4(false, false, false, true));
+    s2.d.mB.mB = short3(bool3(true, false, false));
+    s2.d.mC = int2(-9, 0);
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    bool _622 = 0u == s1.a;
+    bool _444;
+    if (_622)
+    {
+        bool _668 = abs(8.0 - s1.b.x) < 0.0500000007450580596923828125;
+        bool _645;
+        if (_668)
+        {
+            _645 = abs(8.0 - s1.b.y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _645 = _668;
+        }
+        bool _653;
+        if (_645)
+        {
+            _653 = abs(-s1.b.z) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _653 = _645;
+        }
+        bool _661;
+        if (_653)
+        {
+            _661 = abs((-4.0) - s1.b.w) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _661 = _653;
+        }
+        _444 = _661;
+    }
+    else
+    {
+        _444 = _622;
+    }
+    bool _453;
+    if (_444)
+    {
+        bool _774 = abs((-5.0) - float4x4(s2.a.mA.mA.mA)[0].x) < 0.0500000007450580596923828125;
+        bool _751;
+        if (_774)
+        {
+            _751 = abs(9.0 - float4x4(s2.a.mA.mA.mA)[0].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _751 = _774;
+        }
+        bool _759;
+        if (_751)
+        {
+            _759 = abs((-4.0) - float4x4(s2.a.mA.mA.mA)[0].z) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _759 = _751;
+        }
+        bool _767;
+        if (_759)
+        {
+            _767 = abs((-6.0) - float4x4(s2.a.mA.mA.mA)[0].w) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _767 = _759;
+        }
+        bool _712;
+        if (_767)
+        {
+            bool _841 = abs((-1.0) - float4x4(s2.a.mA.mA.mA)[1].x) < 0.0500000007450580596923828125;
+            bool _818;
+            if (_841)
+            {
+                _818 = abs((-1.0) - float4x4(s2.a.mA.mA.mA)[1].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _818 = _841;
+            }
+            bool _826;
+            if (_818)
+            {
+                _826 = abs((-2.0) - float4x4(s2.a.mA.mA.mA)[1].z) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _826 = _818;
+            }
+            bool _834;
+            if (_826)
+            {
+                _834 = abs(1.0 - float4x4(s2.a.mA.mA.mA)[1].w) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _834 = _826;
+            }
+            _712 = _834;
+        }
+        else
+        {
+            _712 = _767;
+        }
+        bool _720;
+        if (_712)
+        {
+            bool _908 = abs(6.0 - float4x4(s2.a.mA.mA.mA)[2].x) < 0.0500000007450580596923828125;
+            bool _885;
+            if (_908)
+            {
+                _885 = abs(5.0 - float4x4(s2.a.mA.mA.mA)[2].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _885 = _908;
+            }
+            bool _893;
+            if (_885)
+            {
+                _893 = abs(7.0 - float4x4(s2.a.mA.mA.mA)[2].z) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _893 = _885;
+            }
+            bool _901;
+            if (_893)
+            {
+                _901 = abs((-2.0) - float4x4(s2.a.mA.mA.mA)[2].w) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _901 = _893;
+            }
+            _720 = _901;
+        }
+        else
+        {
+            _720 = _712;
+        }
+        bool _728;
+        if (_720)
+        {
+            bool _975 = abs((-4.0) - float4x4(s2.a.mA.mA.mA)[3].x) < 0.0500000007450580596923828125;
+            bool _952;
+            if (_975)
+            {
+                _952 = abs((-9.0) - float4x4(s2.a.mA.mA.mA)[3].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _952 = _975;
+            }
+            bool _960;
+            if (_952)
+            {
+                _960 = abs(8.0 - float4x4(s2.a.mA.mA.mA)[3].z) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _960 = _952;
+            }
+            bool _968;
+            if (_960)
+            {
+                _968 = abs(3.0 - float4x4(s2.a.mA.mA.mA)[3].w) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _968 = _960;
+            }
+            _728 = _968;
+        }
+        else
+        {
+            _728 = _720;
+        }
+        _453 = _728;
+    }
+    else
+    {
+        _453 = _444;
+    }
+    bool _462;
+    if (_453)
+    {
+        _462 = all(bool3(true, false, false) == bool3(s2.a.mA.mA.mB));
+    }
+    else
+    {
+        _462 = _453;
+    }
+    bool _471;
+    if (_462)
+    {
+        _471 = all(bool4(true, true, true, false) == bool4(s2.a.mA.mA.mC));
+    }
+    else
+    {
+        _471 = _462;
+    }
+    bool _480;
+    if (_471)
+    {
+        _480 = all(bool2(true) == bool2(s2.a.mA.mB.mA));
+    }
+    else
+    {
+        _480 = _471;
+    }
+    bool _489;
+    if (_480)
+    {
+        _489 = abs(7.0 - s2.a.mA.mC.mA) < 0.0500000007450580596923828125;
+    }
+    else
+    {
+        _489 = _480;
+    }
+    bool _498;
+    if (_489)
+    {
+        _498 = all(uint4(8u, 6u, 2u, 0u) == s2.a.mA.mC.mB);
+    }
+    else
+    {
+        _498 = _489;
+    }
+    bool _507;
+    if (_498)
+    {
+        _507 = abs((-9.0) - s2.a.mA.mC.mC) < 0.0500000007450580596923828125;
+    }
+    else
+    {
+        _507 = _498;
+    }
+    bool _516;
+    if (_507)
+    {
+        _516 = all(int3(1, -4, 0) == s2.b);
+    }
+    else
+    {
+        _516 = _507;
+    }
+    bool _525;
+    if (_516)
+    {
+        _525 = all(uint3(4u, 9u, 1u) == s2.c.mA.mA.mA);
+    }
+    else
+    {
+        _525 = _516;
+    }
+    bool _534;
+    if (_525)
+    {
+        _534 = false == bool(s2.c.mA.mA.mB);
+    }
+    else
+    {
+        _534 = _525;
+    }
+    bool _543;
+    if (_534)
+    {
+        bool _1106 = abs(3.0 - float3x2(s2.c.mA.mB)[0].x) < 0.0500000007450580596923828125;
+        bool _1099;
+        if (_1106)
+        {
+            _1099 = abs((-5.0) - float3x2(s2.c.mA.mB)[0].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _1099 = _1106;
+        }
+        bool _1072;
+        if (_1099)
+        {
+            bool _1139 = abs((-1.0) - float3x2(s2.c.mA.mB)[1].x) < 0.0500000007450580596923828125;
+            bool _1132;
+            if (_1139)
+            {
+                _1132 = abs((-5.0) - float3x2(s2.c.mA.mB)[1].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _1132 = _1139;
+            }
+            _1072 = _1132;
+        }
+        else
+        {
+            _1072 = _1099;
+        }
+        bool _1080;
+        if (_1072)
+        {
+            bool _1172 = abs((-1.0) - float3x2(s2.c.mA.mB)[2].x) < 0.0500000007450580596923828125;
+            bool _1165;
+            if (_1172)
+            {
+                _1165 = abs((-9.0) - float3x2(s2.c.mA.mB)[2].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _1165 = _1172;
+            }
+            _1080 = _1165;
+        }
+        else
+        {
+            _1080 = _1072;
+        }
+        _543 = _1080;
+    }
+    else
+    {
+        _543 = _534;
+    }
+    bool _552;
+    if (_543)
+    {
+        bool _1205 = abs((-6.0) - s2.c.mB.x) < 0.0500000007450580596923828125;
+        bool _1198;
+        if (_1205)
+        {
+            _1198 = abs((-9.0) - s2.c.mB.y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _1198 = _1205;
+        }
+        _552 = _1198;
+    }
+    else
+    {
+        _552 = _543;
+    }
+    bool _561;
+    if (_552)
+    {
+        _561 = all(bool2(true, false) == bool2(s2.d.mA));
+    }
+    else
+    {
+        _561 = _552;
+    }
+    bool _570;
+    if (_561)
+    {
+        bool _1263 = abs((-2.0) - float2x2(s2.d.mB.mA.mA)[0].x) < 0.0500000007450580596923828125;
+        bool _1256;
+        if (_1263)
+        {
+            _1256 = abs(3.0 - float2x2(s2.d.mB.mA.mA)[0].y) < 0.0500000007450580596923828125;
+        }
+        else
+        {
+            _1256 = _1263;
+        }
+        bool _1237;
+        if (_1256)
+        {
+            bool _1296 = abs(7.0 - float2x2(s2.d.mB.mA.mA)[1].x) < 0.0500000007450580596923828125;
+            bool _1289;
+            if (_1296)
+            {
+                _1289 = abs(2.0 - float2x2(s2.d.mB.mA.mA)[1].y) < 0.0500000007450580596923828125;
+            }
+            else
+            {
+                _1289 = _1296;
+            }
+            _1237 = _1289;
+        }
+        else
+        {
+            _1237 = _1256;
+        }
+        _570 = _1237;
+    }
+    else
+    {
+        _570 = _561;
+    }
+    bool _579;
+    if (_570)
+    {
+        _579 = all(bool3(false) == bool3(s2.d.mB.mA.mB));
+    }
+    else
+    {
+        _579 = _570;
+    }
+    bool _588;
+    if (_579)
+    {
+        _588 = all(bool4(false, false, false, true) == bool4(s2.d.mB.mA.mC));
+    }
+    else
+    {
+        _588 = _579;
+    }
+    bool _597;
+    if (_588)
+    {
+        _597 = all(bool3(true, false, false) == bool3(s2.d.mB.mB));
+    }
+    else
+    {
+        _597 = _588;
+    }
+    bool _606;
+    if (_597)
+    {
+        _606 = all(int2(-9, 0) == s2.d.mC);
+    }
+    else
+    {
+        _606 = _597;
+    }
+    if (_606)
+    {
+        _612.passed++;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/comp/shared-struct-bool-cast.comp b/reference/opt/shaders-msl/comp/shared-struct-bool-cast.comp
new file mode 100644
index 00000000000..538ab0bd69c
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/shared-struct-bool-cast.comp
@@ -0,0 +1,63 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct S1
+{
+    int3 a;
+    uint2 b;
+    short4 c;
+    uint d;
+};
+
+struct block
+{
+    uint passed;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device block& _132 [[buffer(0)]])
+{
+    threadgroup S1 s1;
+    s1.a = int3(6, 8, 8);
+    s1.b = uint2(4u);
+    s1.c = short4(bool4(false, false, false, true));
+    s1.d = 6u;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    bool _144 = all(int3(6, 8, 8) == s1.a);
+    bool _108;
+    if (_144)
+    {
+        _108 = all(uint2(4u) == s1.b);
+    }
+    else
+    {
+        _108 = _144;
+    }
+    bool _117;
+    if (_108)
+    {
+        _117 = all(bool4(false, false, false, true) == bool4(s1.c));
+    }
+    else
+    {
+        _117 = _108;
+    }
+    bool _126;
+    if (_117)
+    {
+        _126 = 6u == s1.d;
+    }
+    else
+    {
+        _126 = _117;
+    }
+    if (_126)
+    {
+        _132.passed++;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/comp/spec-constant-op-member-array.comp b/reference/opt/shaders-msl/comp/spec-constant-op-member-array.comp
index d3c8b7dc4a3..8f54f0528dc 100644
--- a/reference/opt/shaders-msl/comp/spec-constant-op-member-array.comp
+++ b/reference/opt/shaders-msl/comp/spec-constant-op-member-array.comp
@@ -40,6 +40,7 @@ struct SSBO
 
 constant int e_tmp [[function_constant(3)]];
 constant int e = is_function_constant_defined(e_tmp) ? e_tmp : 400;
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
 
 kernel void main0(device SSBO& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
diff --git a/reference/opt/shaders-msl/comp/spec-constant-work-group-size.comp b/reference/opt/shaders-msl/comp/spec-constant-work-group-size.comp
index bb796ab95d7..de30edec155 100644
--- a/reference/opt/shaders-msl/comp/spec-constant-work-group-size.comp
+++ b/reference/opt/shaders-msl/comp/spec-constant-work-group-size.comp
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 #ifndef SPIRV_CROSS_CONSTANT_ID_1
 #define SPIRV_CROSS_CONSTANT_ID_1 2
 #endif
@@ -27,7 +68,7 @@ constant int _32 = (1 - a);
 
 kernel void main0(device SSBO& _17 [[buffer(0)]])
 {
-    int spec_const_array_size[b];
+    spvUnsafeArray<int, b> spec_const_array_size;
     spec_const_array_size[a] = a;
     _17.v[_30] = b + spec_const_array_size[_32];
 }
diff --git a/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp b/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp
index 905222d398d..b584f307ef2 100644
--- a/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp
+++ b/reference/opt/shaders-msl/comp/storage-buffer-std140-vector-array.comp
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct Sub
 {
     float4 f[2];
@@ -16,33 +57,35 @@ struct SSBO
     Sub sub[2];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    float _153[2];
-    _153[0] = _27.sub[gl_WorkGroupID.x].f[0].x;
-    _153[1] = _27.sub[gl_WorkGroupID.x].f[1].x;
-    float2 _154[2];
-    _154[0] = _27.sub[gl_WorkGroupID.x].f2[0].xy;
-    _154[1] = _27.sub[gl_WorkGroupID.x].f2[1].xy;
-    float3 _155[2];
-    _155[0] = _27.sub[gl_WorkGroupID.x].f3[0];
-    _155[1] = _27.sub[gl_WorkGroupID.x].f3[1];
-    float4 _156[2];
-    _156[0] = _27.sub[gl_WorkGroupID.x].f4[0];
-    _156[1] = _27.sub[gl_WorkGroupID.x].f4[1];
-    _153[gl_GlobalInvocationID.x] += 1.0;
-    _154[gl_GlobalInvocationID.x] += float2(2.0);
-    _155[gl_GlobalInvocationID.x] += float3(3.0);
-    _156[gl_GlobalInvocationID.x] += float4(4.0);
-    _27.sub[gl_WorkGroupID.x].f[0].x = _153[0];
-    _27.sub[gl_WorkGroupID.x].f[1].x = _153[1];
-    _27.sub[gl_WorkGroupID.x].f2[0].xy = _154[0];
-    _27.sub[gl_WorkGroupID.x].f2[1].xy = _154[1];
-    _27.sub[gl_WorkGroupID.x].f3[0] = _155[0];
-    _27.sub[gl_WorkGroupID.x].f3[1] = _155[1];
-    _27.sub[gl_WorkGroupID.x].f4[0] = _156[0];
-    _27.sub[gl_WorkGroupID.x].f4[1] = _156[1];
-    _27.sub[0].f[0].x += 5.0;
-    _27.sub[0].f2[1].xy += float2(5.0);
+    spvUnsafeArray<float, 2> _155;
+    _155[0] = _27.sub[gl_WorkGroupID.x].f[0].x;
+    _155[1] = _27.sub[gl_WorkGroupID.x].f[1].x;
+    spvUnsafeArray<float2, 2> _156;
+    _156[0] = _27.sub[gl_WorkGroupID.x].f2[0].xy;
+    _156[1] = _27.sub[gl_WorkGroupID.x].f2[1].xy;
+    spvUnsafeArray<float3, 2> _157;
+    _157[0] = _27.sub[gl_WorkGroupID.x].f3[0];
+    _157[1] = _27.sub[gl_WorkGroupID.x].f3[1];
+    spvUnsafeArray<float4, 2> _158;
+    _158[0] = _27.sub[gl_WorkGroupID.x].f4[0];
+    _158[1] = _27.sub[gl_WorkGroupID.x].f4[1];
+    _155[gl_GlobalInvocationID.x] += 1.0;
+    _156[gl_GlobalInvocationID.x] += float2(2.0);
+    _157[gl_GlobalInvocationID.x] += float3(3.0);
+    _158[gl_GlobalInvocationID.x] += float4(4.0);
+    (device float&)_27.sub[gl_WorkGroupID.x].f[0] = _155[0];
+    (device float&)_27.sub[gl_WorkGroupID.x].f[1] = _155[1];
+    (device float2&)_27.sub[gl_WorkGroupID.x].f2[0] = _156[0];
+    (device float2&)_27.sub[gl_WorkGroupID.x].f2[1] = _156[1];
+    _27.sub[gl_WorkGroupID.x].f3[0] = _157[0];
+    _27.sub[gl_WorkGroupID.x].f3[1] = _157[1];
+    _27.sub[gl_WorkGroupID.x].f4[0] = _158[0];
+    _27.sub[gl_WorkGroupID.x].f4[1] = _158[1];
+    (device float&)_27.sub[0].f[0] = _27.sub[0].f[0].x + 5.0;
+    (device float2&)_27.sub[0].f2[1] = _27.sub[0].f2[1].xy + float2(5.0);
 }
 
diff --git a/reference/opt/shaders-msl/comp/struct-layout.comp b/reference/opt/shaders-msl/comp/struct-layout.comp
index d4413625271..0445f5aef9c 100644
--- a/reference/opt/shaders-msl/comp/struct-layout.comp
+++ b/reference/opt/shaders-msl/comp/struct-layout.comp
@@ -18,6 +18,8 @@ struct SSBO
     Foo in_data[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO2& _23 [[buffer(0)]], const device SSBO& _30 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     _23.out_data[gl_GlobalInvocationID.x].m = _30.in_data[gl_GlobalInvocationID.x].m * _30.in_data[gl_GlobalInvocationID.x].m;
diff --git a/reference/opt/shaders-msl/comp/struct-nested.comp b/reference/opt/shaders-msl/comp/struct-nested.comp
index 6a1419cecbf..ad706c59095 100644
--- a/reference/opt/shaders-msl/comp/struct-nested.comp
+++ b/reference/opt/shaders-msl/comp/struct-nested.comp
@@ -18,6 +18,8 @@ struct dstbuffer
     s2 test[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device dstbuffer& _19 [[buffer(0)]])
 {
     _19.test[0].b.a = 0;
diff --git a/reference/opt/shaders-msl/comp/struct-packing.comp b/reference/opt/shaders-msl/comp/struct-packing.comp
index 35cf1b22cb4..dc1654399d3 100644
--- a/reference/opt/shaders-msl/comp/struct-packing.comp
+++ b/reference/opt/shaders-msl/comp/struct-packing.comp
@@ -3,12 +3,11 @@
 
 using namespace metal;
 
-typedef packed_float2 packed_rm_float2x3[3];
-
 struct S0
 {
     float2 a[1];
     float b;
+    char _m0_final_padding[4];
 };
 
 struct S1
@@ -21,6 +20,7 @@ struct S2
 {
     float3 a[1];
     float b;
+    char _m0_final_padding[12];
 };
 
 struct S3
@@ -45,6 +45,7 @@ struct Content
     S3 m3;
     float m4;
     S4 m3s[8];
+    char _m0_final_padding[8];
 };
 
 struct SSBO1
@@ -58,17 +59,17 @@ struct SSBO1
     float3x2 m3;
     float2x2 m4;
     float2x2 m5[9];
-    packed_rm_float2x3 m6[4][2];
-    char _m10_pad[8];
-    float3x2 m7;
-    char _m11_pad[8];
+    float3x2 m6[4][2];
+    float2x3 m7;
     float array[1];
 };
 
 struct S0_1
 {
-    float4 a[1];
+    float2 a[1];
+    char _m1_pad[8];
     float b;
+    char _m0_final_padding[12];
 };
 
 struct S1_1
@@ -81,6 +82,7 @@ struct S2_1
 {
     float3 a[1];
     float b;
+    char _m0_final_padding[12];
 };
 
 struct S3_1
@@ -92,6 +94,7 @@ struct S3_1
 struct S4_1
 {
     float2 c;
+    char _m0_final_padding[8];
 };
 
 struct Content_1
@@ -104,8 +107,8 @@ struct Content_1
     S2_1 m2;
     S3_1 m3;
     float m4;
-    char _m8_pad[12];
-    /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8];
+    char _m8_pad[8];
+    S4_1 m3s[8];
 };
 
 struct SSBO0
@@ -116,16 +119,18 @@ struct SSBO0
     float4 array[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]])
 {
     Content_1 _60 = ssbo_140.content;
-    ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0].xy;
+    ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0];
     ssbo_430.content.m0s[0].b = _60.m0s[0].b;
     ssbo_430.content.m1s[0].a = float3(_60.m1s[0].a);
     ssbo_430.content.m1s[0].b = _60.m1s[0].b;
     ssbo_430.content.m2s[0].a[0] = _60.m2s[0].a[0];
     ssbo_430.content.m2s[0].b = _60.m2s[0].b;
-    ssbo_430.content.m0.a[0] = _60.m0.a[0].xy;
+    ssbo_430.content.m0.a[0] = _60.m0.a[0];
     ssbo_430.content.m0.b = _60.m0.b;
     ssbo_430.content.m1.a = float3(_60.m1.a);
     ssbo_430.content.m1.b = _60.m1.b;
@@ -142,6 +147,6 @@ kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [
     ssbo_430.content.m3s[5].c = _60.m3s[5].c;
     ssbo_430.content.m3s[6].c = _60.m3s[6].c;
     ssbo_430.content.m3s[7].c = _60.m3s[7].c;
-    ssbo_430.content.m1.a = ssbo_430.content.m3.a * float3x2(float2(ssbo_430.m6[1][1][0]), float2(ssbo_430.m6[1][1][1]), float2(ssbo_430.m6[1][1][2]));
+    ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1];
 }
 
diff --git a/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp b/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp
new file mode 100644
index 00000000000..c1eccf27805
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/threadgroup-boolean-workaround.comp
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 values[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
+
+kernel void main0(device SSBO& _23 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    threadgroup short4 foo[4];
+    foo[gl_LocalInvocationIndex] = short4(_23.values[gl_GlobalInvocationID.x] != float4(10.0));
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    _23.values[gl_GlobalInvocationID.x] = select(float4(40.0), float4(30.0), bool4(foo[gl_LocalInvocationIndex ^ 3u]));
+}
+
diff --git a/reference/opt/shaders-msl/comp/torture-loop.comp b/reference/opt/shaders-msl/comp/torture-loop.comp
index 4c367d3e6da..ff7e02e2022 100644
--- a/reference/opt/shaders-msl/comp/torture-loop.comp
+++ b/reference/opt/shaders-msl/comp/torture-loop.comp
@@ -14,29 +14,31 @@ struct SSBO2
     float4 out_data[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _89 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    float4 _99;
-    _99 = _24.in_data[gl_GlobalInvocationID.x];
-    for (int _93 = 0; (_93 + 1) < 10; )
+    float4 _101;
+    _101 = _24.in_data[gl_GlobalInvocationID.x];
+    for (int _95 = 0; (_95 + 1) < 10; )
     {
-        _99 *= 2.0;
-        _93 += 2;
+        _101 *= 2.0;
+        _95 += 2;
         continue;
     }
-    float4 _98;
-    _98 = _99;
-    float4 _103;
-    for (uint _94 = 0u; _94 < 16u; _98 = _103, _94++)
+    float4 _100;
+    _100 = _101;
+    float4 _105;
+    for (uint _96 = 0u; _96 < 16u; _100 = _105, _96++)
     {
-        _103 = _98;
-        for (uint _100 = 0u; _100 < 30u; )
+        _105 = _100;
+        for (uint _102 = 0u; _102 < 30u; )
         {
-            _103 = _24.mvp * _103;
-            _100++;
+            _105 = _24.mvp * _105;
+            _102++;
             continue;
         }
     }
-    _89.out_data[gl_GlobalInvocationID.x] = _98;
+    _89.out_data[gl_GlobalInvocationID.x] = _100;
 }
 
diff --git a/reference/opt/shaders-msl/comp/type-alias.comp b/reference/opt/shaders-msl/comp/type-alias.comp
index 8a68933d82d..2f6a0b7ba8b 100644
--- a/reference/opt/shaders-msl/comp/type-alias.comp
+++ b/reference/opt/shaders-msl/comp/type-alias.comp
@@ -28,6 +28,8 @@ struct SSBO2
     float4 outputs[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO0& _36 [[buffer(0)]], device SSBO1& _55 [[buffer(1)]], device SSBO2& _66 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     _66.outputs[gl_GlobalInvocationID.x] = _36.s0s[gl_GlobalInvocationID.x].a + _55.s1s[gl_GlobalInvocationID.x].a;
diff --git a/reference/opt/shaders-msl/comp/type_casting_i64.msl22.comp b/reference/opt/shaders-msl/comp/type_casting_i64.msl22.comp
new file mode 100644
index 00000000000..6820b077a1a
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/type_casting_i64.msl22.comp
@@ -0,0 +1,27 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct dst_buff_t
+{
+    int m0[1];
+};
+
+struct src_buff_t
+{
+    int m0[1];
+};
+
+constant int base_val_tmp [[function_constant(0)]];
+constant int base_val = is_function_constant_defined(base_val_tmp) ? base_val_tmp : 0;
+constant long shift_val_tmp [[function_constant(1)]];
+constant long shift_val = is_function_constant_defined(shift_val_tmp) ? shift_val_tmp : 0l;
+constant int offset = (base_val >> int(shift_val));
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device dst_buff_t& dst_buff [[buffer(0)]], device src_buff_t& src_buff [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    dst_buff.m0[gl_GlobalInvocationID.x] = src_buff.m0[gl_GlobalInvocationID.x] + offset;
+}
+
diff --git a/reference/opt/shaders-msl/comp/udiv.comp b/reference/opt/shaders-msl/comp/udiv.comp
index 32874ad7879..7f7315b882a 100644
--- a/reference/opt/shaders-msl/comp/udiv.comp
+++ b/reference/opt/shaders-msl/comp/udiv.comp
@@ -13,6 +13,8 @@ struct SSBO
     uint inputs[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO2& _10 [[buffer(0)]], device SSBO& _23 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     _10.outputs[gl_GlobalInvocationID.x] = _23.inputs[gl_GlobalInvocationID.x] / 29u;
diff --git a/reference/opt/shaders-msl/comp/writable-ssbo.comp b/reference/opt/shaders-msl/comp/writable-ssbo.comp
index 9dc53b6dd5d..310cda7fef9 100644
--- a/reference/opt/shaders-msl/comp/writable-ssbo.comp
+++ b/reference/opt/shaders-msl/comp/writable-ssbo.comp
@@ -5,19 +5,19 @@
 
 using namespace metal;
 
-struct myBlock
-{
-    int a;
-    float b;
-};
-
 // Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
 template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
+inline Tx mod(Tx x, Ty y)
 {
     return x - y * floor(x / y);
 }
 
+struct myBlock
+{
+    int a;
+    float b;
+};
+
 kernel void main0(device myBlock& myStorage [[buffer(0)]])
 {
     myStorage.a = (myStorage.a + 1) % 256;
diff --git a/reference/opt/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp b/reference/opt/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp
index a37fe519a55..cea12980c67 100644
--- a/reference/opt/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp
+++ b/reference/opt/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp
@@ -91,6 +91,8 @@ struct ResType_7
     int4 _m1;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBOUint& u [[buffer(0)]], device SSBOInt& i [[buffer(1)]])
 {
     ResType _25;
diff --git a/reference/opt/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc b/reference/opt/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc
index a5e30b6de1a..01fceeb6c7b 100644
--- a/reference/opt/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc
+++ b/reference/opt/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float3 vVertex;
@@ -10,7 +51,7 @@ struct main0_out
 
 struct main0_patchOut
 {
-    float3 vPatch[2];
+    spvUnsafeArray<float3, 2> vPatch;
 };
 
 struct main0_in
@@ -28,7 +69,7 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
     if (gl_InvocationID >= 4)
         return;
     gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput + gl_in[gl_InvocationID ^ 1].vInput;
-    threadgroup_barrier(mem_flags::mem_device);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
     if (gl_InvocationID == 0)
     {
         patchOut.vPatch[0] = float3(10.0);
diff --git a/reference/opt/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc b/reference/opt/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc
new file mode 100644
index 00000000000..8ebde9d9d76
--- /dev/null
+++ b/reference/opt/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc
@@ -0,0 +1,39 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float3 vFoo;
+};
+
+struct main0_in
+{
+    uint3 m_86;
+    ushort2 m_90;
+    float4 gl_Position;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 1];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 1];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 1;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625);
+    patchOut.vFoo = float3(1.0);
+    gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position;
+}
+
diff --git a/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc b/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc
new file mode 100644
index 00000000000..184a4a6f9b3
--- /dev/null
+++ b/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc
@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Boo
+{
+    float3 a;
+    uint3 b;
+};
+
+struct main0_out
+{
+    Boo vVertex;
+};
+
+struct main0_in
+{
+    Boo vInput;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput;
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(2.0);
+}
+
diff --git a/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc b/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc
index cd4d8d80e52..f5fd60a9f71 100644
--- a/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc
+++ b/reference/opt/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc
@@ -16,8 +16,8 @@ struct main0_out
 
 struct main0_in
 {
-    float3 Boo_a [[attribute(0)]];
-    float3 Boo_b [[attribute(1)]];
+    float3 vInput_a [[attribute(0)]];
+    float3 vInput_b [[attribute(1)]];
 };
 
 kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
@@ -28,10 +28,8 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
     threadgroup_barrier(mem_flags::mem_threadgroup);
     if (gl_InvocationID >= 4)
         return;
-    Boo vInput_24;
-    vInput_24.a = gl_in[gl_InvocationID].Boo_a;
-    vInput_24.b = gl_in[gl_InvocationID].Boo_b;
-    gl_out[gl_InvocationID].vVertex = vInput_24;
+    Boo _25 = Boo{ gl_in[gl_InvocationID].vInput_a, gl_in[gl_InvocationID].vInput_b };
+    gl_out[gl_InvocationID].vVertex = _25;
     spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
     spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0);
     spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0);
diff --git a/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert b/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert
new file mode 100644
index 00000000000..a414c98542c
--- /dev/null
+++ b/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [2];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    out.gl_Position = float4(10.0);
+    out.gl_ClipDistance[0] = 1.0;
+    out.gl_ClipDistance[1] = 4.0;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert b/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert
index a414c98542c..2d98929051b 100644
--- a/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert
+++ b/reference/opt/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert
@@ -7,6 +7,8 @@ struct main0_out
 {
     float4 gl_Position [[position]];
     float gl_ClipDistance [[clip_distance]] [2];
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
 };
 
 vertex main0_out main0()
@@ -15,6 +17,8 @@ vertex main0_out main0()
     out.gl_Position = float4(10.0);
     out.gl_ClipDistance[0] = 1.0;
     out.gl_ClipDistance[1] = 4.0;
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert b/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert
new file mode 100644
index 00000000000..b3c8b6bb278
--- /dev/null
+++ b/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], uint3 spvDispatchBase [[grid_origin]], device main0_out* spvOut [[buffer(28)]])
+{
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    uint gl_BaseVertex = spvDispatchBase.x;
+    uint gl_BaseInstance = spvDispatchBase.y;
+    out.gl_Position = float4(float(int(gl_BaseVertex)), float(int(gl_BaseInstance)), 0.0, 1.0);
+}
+
diff --git a/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert b/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert
index 1d203ba98bc..a32c1948f88 100644
--- a/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert
+++ b/reference/opt/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert
@@ -11,7 +11,7 @@ struct main0_out
 vertex main0_out main0(uint gl_BaseVertex [[base_vertex]], uint gl_BaseInstance [[base_instance]])
 {
     main0_out out = {};
-    out.gl_Position = float4(float(gl_BaseVertex), float(gl_BaseInstance), 0.0, 1.0);
+    out.gl_Position = float4(float(int(gl_BaseVertex)), float(int(gl_BaseInstance)), 0.0, 1.0);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert b/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert
index 387fe0a8353..b40528115f0 100644
--- a/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert
+++ b/reference/opt/shaders-msl/flatten/rowmajor.flatten.vert
@@ -7,7 +7,7 @@ struct UBO
 {
     float4x4 uMVPR;
     float4x4 uMVPC;
-    float2x4 uMVP;
+    float4x4 uMVP;
 };
 
 struct main0_out
diff --git a/reference/opt/shaders-msl/flatten/struct.flatten.vert b/reference/opt/shaders-msl/flatten/struct.flatten.vert
index d97a34a859f..dc96ceae3ee 100644
--- a/reference/opt/shaders-msl/flatten/struct.flatten.vert
+++ b/reference/opt/shaders-msl/flatten/struct.flatten.vert
@@ -34,7 +34,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]
     out.gl_Position = _18.uMVP * in.aVertex;
     out.vColor = float4(0.0);
     float3 _39 = in.aVertex.xyz - float3(_18.light.Position);
-    out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(_39) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_39)));
+    out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(_39) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(_39)));
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/array-component-io.frag b/reference/opt/shaders-msl/frag/array-component-io.frag
new file mode 100644
index 00000000000..9b4c5b5204f
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/array-component-io.frag
@@ -0,0 +1,99 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 m_location_0 [[color(0)]];
+    float4 m_location_1 [[color(1)]];
+    float4 m_location_2 [[color(2)]];
+};
+
+struct main0_in
+{
+    float InC_0 [[user(locn0_1), flat]];
+    float InA_0 [[user(locn1), flat]];
+    float InC_1 [[user(locn1_1), flat]];
+    float2 InB_0 [[user(locn1_2), flat]];
+    float InA_1 [[user(locn2), flat]];
+    float InC_2 [[user(locn2_1), flat]];
+    float2 InB_1 [[user(locn2_2), flat]];
+    float InD [[user(locn3_1), sample_perspective]];
+    float InE [[user(locn4_2), center_no_perspective]];
+    float InF [[user(locn5_3), centroid_perspective]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 2> A = {};
+    spvUnsafeArray<float2, 2> B = {};
+    spvUnsafeArray<float, 3> C = {};
+    float D = {};
+    spvUnsafeArray<float, 2> InA = {};
+    spvUnsafeArray<float2, 2> InB = {};
+    spvUnsafeArray<float, 3> InC = {};
+    InA[0] = in.InA_0;
+    InA[1] = in.InA_1;
+    InB[0] = in.InB_0;
+    InB[1] = in.InB_1;
+    InC[0] = in.InC_0;
+    InC[1] = in.InC_1;
+    InC[2] = in.InC_2;
+    A = InA;
+    B = InB;
+    C = InC;
+    D = (in.InD + in.InE) + in.InF;
+    out.m_location_1.x = A[0];
+    out.m_location_2.x = A[1];
+    out.m_location_1.zw = B[0];
+    out.m_location_2.zw = B[1];
+    out.m_location_0.y = C[0];
+    out.m_location_1.y = C[1];
+    out.m_location_2.y = C[2];
+    out.m_location_0.w = D;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag b/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag
index 9b757b6a31c..79f9025a78d 100644
--- a/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag
+++ b/reference/opt/shaders-msl/frag/array-lut-no-loop-variable.frag
@@ -1,9 +1,50 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
-constant float _17[5] = { 1.0, 2.0, 3.0, 4.0, 5.0 };
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 5> _17 = spvUnsafeArray<float, 5>({ 1.0, 2.0, 3.0, 4.0, 5.0 });
 
 struct main0_out
 {
diff --git a/reference/opt/shaders-msl/frag/array-of-array-lut.frag b/reference/opt/shaders-msl/frag/array-of-array-lut.frag
new file mode 100644
index 00000000000..ba553824e79
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/array-of-array-lut.frag
@@ -0,0 +1,68 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 3> _17 = spvUnsafeArray<float, 3>({ 1.0, 2.0, 3.0 });
+constant spvUnsafeArray<float, 3> _21 = spvUnsafeArray<float, 3>({ 4.0, 5.0, 6.0 });
+constant spvUnsafeArray<spvUnsafeArray<float, 3>, 2> _22 = spvUnsafeArray<spvUnsafeArray<float, 3>, 2>({ spvUnsafeArray<float, 3>({ 1.0, 2.0, 3.0 }), spvUnsafeArray<float, 3>({ 4.0, 5.0, 6.0 }) });
+
+struct main0_out
+{
+    float vOutput [[color(0)]];
+};
+
+struct main0_in
+{
+    int vIndex1 [[user(locn0)]];
+    int vIndex2 [[user(locn1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.vOutput = _22[in.vIndex1][in.vIndex2];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag b/reference/opt/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag
new file mode 100644
index 00000000000..936b11dc853
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag
@@ -0,0 +1,105 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T> struct spvRemoveReference { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
+{
+    return static_cast<thread T&&>(x);
+}
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
+{
+    return static_cast<thread T&&>(x);
+}
+
+enum class spvSwizzle : uint
+{
+    none = 0,
+    zero,
+    one,
+    red,
+    green,
+    blue,
+    alpha
+};
+
+template<typename T>
+inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
+{
+    switch (s)
+    {
+        case spvSwizzle::none:
+            return c;
+        case spvSwizzle::zero:
+            return 0;
+        case spvSwizzle::one:
+            return 1;
+        case spvSwizzle::red:
+            return x.r;
+        case spvSwizzle::green:
+            return x.g;
+        case spvSwizzle::blue:
+            return x.b;
+        case spvSwizzle::alpha:
+            return x.a;
+    }
+}
+
+// Wrapper function that swizzles texture samples and fetches.
+template<typename T>
+inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)
+{
+    if (!s)
+        return x;
+    return vec<T, 4>(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) & 0xFF)), spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF)));
+}
+
+template<typename T>
+inline T spvTextureSwizzle(T x, uint s)
+{
+    return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
+}
+
+struct UBO
+{
+    uint index;
+};
+
+struct UBO2
+{
+    uint index2;
+};
+
+struct spvDescriptorSetBuffer0
+{
+    array<texture2d<float>, 4> uSampler [[id(0)]];
+    array<sampler, 4> uSamplerSmplr [[id(4)]];
+    constant UBO* uUBO [[id(8)]];
+    constant UBO2* m_50 [[id(9)]];
+    constant uint* spvSwizzleConstants [[id(10)]];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant uint* spvSwizzleConstants [[buffer(30)]])
+{
+    main0_out out = {};
+    constant uint* spvDescriptorSet0_uSamplerSwzl = &spvDescriptorSet0.spvSwizzleConstants[0];
+    out.FragColor = spvTextureSwizzle(spvDescriptorSet0.uSampler[(*spvDescriptorSet0.uUBO).index].sample(spvDescriptorSet0.uSamplerSmplr[(*spvDescriptorSet0.uUBO).index], in.vUV), spvDescriptorSet0_uSamplerSwzl[(*spvDescriptorSet0.uUBO).index]);
+    out.FragColor += spvTextureSwizzle(spvDescriptorSet0.uSampler[(*spvDescriptorSet0.m_50).index2].sample(spvDescriptorSet0.uSamplerSmplr[(*spvDescriptorSet0.m_50).index2], in.vUV), spvDescriptorSet0_uSamplerSwzl[(*spvDescriptorSet0.m_50).index2]);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag b/reference/opt/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag
new file mode 100644
index 00000000000..c680f04b573
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag
@@ -0,0 +1,96 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T> struct spvRemoveReference { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
+{
+    return static_cast<thread T&&>(x);
+}
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
+{
+    return static_cast<thread T&&>(x);
+}
+
+enum class spvSwizzle : uint
+{
+    none = 0,
+    zero,
+    one,
+    red,
+    green,
+    blue,
+    alpha
+};
+
+template<typename T>
+inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
+{
+    switch (s)
+    {
+        case spvSwizzle::none:
+            return c;
+        case spvSwizzle::zero:
+            return 0;
+        case spvSwizzle::one:
+            return 1;
+        case spvSwizzle::red:
+            return x.r;
+        case spvSwizzle::green:
+            return x.g;
+        case spvSwizzle::blue:
+            return x.b;
+        case spvSwizzle::alpha:
+            return x.a;
+    }
+}
+
+// Wrapper function that swizzles texture samples and fetches.
+template<typename T>
+inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)
+{
+    if (!s)
+        return x;
+    return vec<T, 4>(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) & 0xFF)), spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF)));
+}
+
+template<typename T>
+inline T spvTextureSwizzle(T x, uint s)
+{
+    return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
+}
+
+struct UBO
+{
+    uint index;
+};
+
+struct UBO2
+{
+    uint index2;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvSwizzleConstants [[buffer(30)]], constant UBO& uUBO [[buffer(0)]], constant UBO2& _50 [[buffer(1)]], array<texture2d<float>, 4> uSampler [[texture(0)]], array<sampler, 4> uSamplerSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    constant uint* uSamplerSwzl = &spvSwizzleConstants[0];
+    out.FragColor = spvTextureSwizzle(uSampler[uUBO.index].sample(uSamplerSmplr[uUBO.index], in.vUV), uSamplerSwzl[uUBO.index]);
+    out.FragColor += spvTextureSwizzle(uSampler[_50.index2].sample(uSamplerSmplr[_50.index2], in.vUV), uSamplerSwzl[_50.index2]);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag b/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag
index 2160e0be61e..cc503e86957 100644
--- a/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag
+++ b/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag
@@ -5,22 +5,17 @@
 
 using namespace metal;
 
-struct spvDescriptorSetBuffer0
-{
-    array<texture2d<float>, 4> uSampler0 [[id(0)]];
-    array<sampler, 4> uSampler0Smplr [[id(4)]];
-    constant uint* spvSwizzleConstants [[id(8)]];
-};
-
-struct main0_out
+template<typename T> struct spvRemoveReference { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
 {
-    float4 FragColor [[color(0)]];
-};
-
-struct main0_in
+    return static_cast<thread T&&>(x);
+}
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
 {
-    float2 vUV [[user(locn0)]];
-};
+    return static_cast<thread T&&>(x);
+}
 
 enum class spvSwizzle : uint
 {
@@ -33,18 +28,6 @@ enum class spvSwizzle : uint
     alpha
 };
 
-template<typename T> struct spvRemoveReference { typedef T type; };
-template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
-template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
-template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
-{
-    return static_cast<thread T&&>(x);
-}
-template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
-{
-    return static_cast<thread T&&>(x);
-}
-
 template<typename T>
 inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
 {
@@ -82,65 +65,22 @@ inline T spvTextureSwizzle(T x, uint s)
     return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
 }
 
-// Wrapper function that swizzles texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
+struct spvDescriptorSetBuffer0
 {
-    if (sw)
-    {
-        switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))
-        {
-            case spvSwizzle::none:
-                break;
-            case spvSwizzle::zero:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-            case spvSwizzle::red:
-                return t.gather(s, spvForward<Ts>(params)..., component::x);
-            case spvSwizzle::green:
-                return t.gather(s, spvForward<Ts>(params)..., component::y);
-            case spvSwizzle::blue:
-                return t.gather(s, spvForward<Ts>(params)..., component::z);
-            case spvSwizzle::alpha:
-                return t.gather(s, spvForward<Ts>(params)..., component::w);
-        }
-    }
-    switch (c)
-    {
-        case component::x:
-            return t.gather(s, spvForward<Ts>(params)..., component::x);
-        case component::y:
-            return t.gather(s, spvForward<Ts>(params)..., component::y);
-        case component::z:
-            return t.gather(s, spvForward<Ts>(params)..., component::z);
-        case component::w:
-            return t.gather(s, spvForward<Ts>(params)..., component::w);
-    }
-}
+    array<texture2d<float>, 4> uSampler0 [[id(0)]];
+    array<sampler, 4> uSampler0Smplr [[id(4)]];
+    constant uint* spvSwizzleConstants [[id(8)]];
+};
 
-// Wrapper function that swizzles depth texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) 
+struct main0_out
 {
-    if (sw)
-    {
-        switch (spvSwizzle(sw & 0xFF))
-        {
-            case spvSwizzle::none:
-            case spvSwizzle::red:
-                break;
-            case spvSwizzle::zero:
-            case spvSwizzle::green:
-            case spvSwizzle::blue:
-            case spvSwizzle::alpha:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-        }
-    }
-    return t.gather_compare(s, spvForward<Ts>(params)...);
-}
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vUV [[user(locn0)]];
+};
 
 fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant uint* spvSwizzleConstants [[buffer(30)]], texture2d<float> uSampler1 [[texture(0)]], sampler uSampler1Smplr [[sampler(0)]])
 {
@@ -148,9 +88,10 @@ fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuff
     constant uint* spvDescriptorSet0_uSampler0Swzl = &spvDescriptorSet0.spvSwizzleConstants[0];
     constant uint& uSampler1Swzl = spvSwizzleConstants[0];
     out.FragColor = spvTextureSwizzle(spvDescriptorSet0.uSampler0[2].sample(spvDescriptorSet0.uSampler0Smplr[2], in.vUV), spvDescriptorSet0_uSampler0Swzl[2]);
-    out.FragColor += spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, in.vUV), uSampler1Swzl);
+    float4 _73 = spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, in.vUV), uSampler1Swzl);
+    out.FragColor += _73;
     out.FragColor += spvTextureSwizzle(spvDescriptorSet0.uSampler0[1].sample(spvDescriptorSet0.uSampler0Smplr[1], in.vUV), spvDescriptorSet0_uSampler0Swzl[1]);
-    out.FragColor += spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, in.vUV), uSampler1Swzl);
+    out.FragColor += _73;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag b/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag
index 337abb99d8d..5b1d17c56cc 100644
--- a/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag
+++ b/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag
@@ -5,15 +5,17 @@
 
 using namespace metal;
 
-struct main0_out
+template<typename T> struct spvRemoveReference { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
 {
-    float4 FragColor [[color(0)]];
-};
-
-struct main0_in
+    return static_cast<thread T&&>(x);
+}
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
 {
-    float2 vUV [[user(locn0)]];
-};
+    return static_cast<thread T&&>(x);
+}
 
 enum class spvSwizzle : uint
 {
@@ -26,18 +28,6 @@ enum class spvSwizzle : uint
     alpha
 };
 
-template<typename T> struct spvRemoveReference { typedef T type; };
-template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
-template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
-template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
-{
-    return static_cast<thread T&&>(x);
-}
-template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
-{
-    return static_cast<thread T&&>(x);
-}
-
 template<typename T>
 inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
 {
@@ -75,65 +65,15 @@ inline T spvTextureSwizzle(T x, uint s)
     return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
 }
 
-// Wrapper function that swizzles texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
+struct main0_out
 {
-    if (sw)
-    {
-        switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))
-        {
-            case spvSwizzle::none:
-                break;
-            case spvSwizzle::zero:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-            case spvSwizzle::red:
-                return t.gather(s, spvForward<Ts>(params)..., component::x);
-            case spvSwizzle::green:
-                return t.gather(s, spvForward<Ts>(params)..., component::y);
-            case spvSwizzle::blue:
-                return t.gather(s, spvForward<Ts>(params)..., component::z);
-            case spvSwizzle::alpha:
-                return t.gather(s, spvForward<Ts>(params)..., component::w);
-        }
-    }
-    switch (c)
-    {
-        case component::x:
-            return t.gather(s, spvForward<Ts>(params)..., component::x);
-        case component::y:
-            return t.gather(s, spvForward<Ts>(params)..., component::y);
-        case component::z:
-            return t.gather(s, spvForward<Ts>(params)..., component::z);
-        case component::w:
-            return t.gather(s, spvForward<Ts>(params)..., component::w);
-    }
-}
+    float4 FragColor [[color(0)]];
+};
 
-// Wrapper function that swizzles depth texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) 
+struct main0_in
 {
-    if (sw)
-    {
-        switch (spvSwizzle(sw & 0xFF))
-        {
-            case spvSwizzle::none:
-            case spvSwizzle::red:
-                break;
-            case spvSwizzle::zero:
-            case spvSwizzle::green:
-            case spvSwizzle::blue:
-            case spvSwizzle::alpha:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-        }
-    }
-    return t.gather_compare(s, spvForward<Ts>(params)...);
-}
+    float2 vUV [[user(locn0)]];
+};
 
 fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvSwizzleConstants [[buffer(30)]], array<texture2d<float>, 4> uSampler [[texture(0)]], array<sampler, 4> uSamplerSmplr [[sampler(0)]])
 {
diff --git a/reference/opt/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag b/reference/opt/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag
index 53b8a74388b..012d99b5b76 100644
--- a/reference/opt/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag
+++ b/reference/opt/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag
@@ -15,14 +15,14 @@ struct main0_out
 
 struct main0_in
 {
-    float3 gl_BaryCoordNoPerspNV [[barycentric_coord, center_no_perspective]];
+    float3 gl_BaryCoordNoPerspEXT [[barycentric_coord, center_no_perspective]];
 };
 
 fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[buffer(0)]], uint gl_PrimitiveID [[primitive_id]])
 {
     main0_out out = {};
     int _23 = 3 * int(gl_PrimitiveID);
-    out.value = ((_19.uvs[_23] * in.gl_BaryCoordNoPerspNV.x) + (_19.uvs[_23 + 1] * in.gl_BaryCoordNoPerspNV.y)) + (_19.uvs[_23 + 2] * in.gl_BaryCoordNoPerspNV.z);
+    out.value = ((_19.uvs[_23] * in.gl_BaryCoordNoPerspEXT.x) + (_19.uvs[_23 + 1] * in.gl_BaryCoordNoPerspEXT.y)) + (_19.uvs[_23 + 2] * in.gl_BaryCoordNoPerspEXT.z);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/barycentric-nv.msl22.frag b/reference/opt/shaders-msl/frag/barycentric-nv.msl22.frag
index ae2c704d055..d6e9dcdbf96 100644
--- a/reference/opt/shaders-msl/frag/barycentric-nv.msl22.frag
+++ b/reference/opt/shaders-msl/frag/barycentric-nv.msl22.frag
@@ -15,14 +15,14 @@ struct main0_out
 
 struct main0_in
 {
-    float3 gl_BaryCoordNV [[barycentric_coord, center_perspective]];
+    float3 gl_BaryCoordEXT [[barycentric_coord, center_perspective]];
 };
 
 fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[buffer(0)]], uint gl_PrimitiveID [[primitive_id]])
 {
     main0_out out = {};
     int _23 = 3 * int(gl_PrimitiveID);
-    out.value = ((_19.uvs[_23] * in.gl_BaryCoordNV.x) + (_19.uvs[_23 + 1] * in.gl_BaryCoordNV.y)) + (_19.uvs[_23 + 2] * in.gl_BaryCoordNV.z);
+    out.value = ((_19.uvs[_23] * in.gl_BaryCoordEXT.x) + (_19.uvs[_23 + 1] * in.gl_BaryCoordEXT.y)) + (_19.uvs[_23 + 2] * in.gl_BaryCoordEXT.z);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/basic.force-sample.frag b/reference/opt/shaders-msl/frag/basic.force-sample.frag
new file mode 100644
index 00000000000..b9706b73f56
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/basic.force-sample.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+    float2 vTex [[user(locn1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    out.FragColor = in.vColor * uTex.sample(uTexSmplr, in.vTex);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/bitcasting.1d-as-2d.frag b/reference/opt/shaders-msl/frag/bitcasting.1d-as-2d.frag
new file mode 100644
index 00000000000..d341397f4c0
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/bitcasting.1d-as-2d.frag
@@ -0,0 +1,26 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor0 [[color(0)]];
+    float4 FragColor1 [[color(1)]];
+};
+
+struct main0_in
+{
+    float4 VertGeom [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> TextureBase [[texture(0)]], texture2d<float> TextureDetail [[texture(1)]], sampler TextureBaseSmplr [[sampler(0)]], sampler TextureDetailSmplr [[sampler(1)]])
+{
+    main0_out out = {};
+    float4 _22 = TextureBase.sample(TextureBaseSmplr, float2(in.VertGeom.x, 0.5));
+    float4 _30 = TextureDetail.sample(TextureDetailSmplr, float2(in.VertGeom.x, 0.5), int2(3, 0));
+    out.FragColor0 = as_type<float4>(as_type<int4>(_22)) * as_type<float4>(as_type<int4>(_30));
+    out.FragColor1 = as_type<float4>(as_type<uint4>(_22)) * as_type<float4>(as_type<uint4>(_30));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/buffer-read-write.frag b/reference/opt/shaders-msl/frag/buffer-read-write.frag
index 2b2ac7f0608..4f114ed7247 100644
--- a/reference/opt/shaders-msl/frag/buffer-read-write.frag
+++ b/reference/opt/shaders-msl/frag/buffer-read-write.frag
@@ -5,17 +5,18 @@
 
 using namespace metal;
 
-struct main0_out
-{
-    float4 FragColor [[color(0)]];
-};
-
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
 fragment main0_out main0(texture2d<float> buf [[texture(0)]], texture2d<float, access::write> bufOut [[texture(1)]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/clip-distance-varying.frag b/reference/opt/shaders-msl/frag/clip-distance-varying.frag
new file mode 100644
index 00000000000..9a72d5ba39f
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/clip-distance-varying.frag
@@ -0,0 +1,67 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 2> gl_ClipDistance = {};
+    gl_ClipDistance[0] = in.gl_ClipDistance_0;
+    gl_ClipDistance[1] = in.gl_ClipDistance_1;
+    out.FragColor = float4((1.0 - gl_ClipDistance[0]) - gl_ClipDistance[1]);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/constant-array.frag b/reference/opt/shaders-msl/frag/constant-array.frag
index a0b830daae3..ca7efc5341d 100644
--- a/reference/opt/shaders-msl/frag/constant-array.frag
+++ b/reference/opt/shaders-msl/frag/constant-array.frag
@@ -1,19 +1,59 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct Foobar
 {
     float a;
     float b;
 };
 
-constant float4 _37[3] = { float4(1.0), float4(2.0), float4(3.0) };
-constant float4 _49[2] = { float4(1.0), float4(2.0) };
-constant float4 _54[2] = { float4(8.0), float4(10.0) };
-constant float4 _55[2][2] = { { float4(1.0), float4(2.0) }, { float4(8.0), float4(10.0) } };
-constant Foobar _75[2] = { Foobar{ 10.0, 40.0 }, Foobar{ 90.0, 70.0 } };
+constant spvUnsafeArray<float4, 3> _37 = spvUnsafeArray<float4, 3>({ float4(1.0), float4(2.0), float4(3.0) });
+constant spvUnsafeArray<float4, 2> _49 = spvUnsafeArray<float4, 2>({ float4(1.0), float4(2.0) });
+constant spvUnsafeArray<float4, 2> _54 = spvUnsafeArray<float4, 2>({ float4(8.0), float4(10.0) });
+constant spvUnsafeArray<spvUnsafeArray<float4, 2>, 2> _55 = spvUnsafeArray<spvUnsafeArray<float4, 2>, 2>({ spvUnsafeArray<float4, 2>({ float4(1.0), float4(2.0) }), spvUnsafeArray<float4, 2>({ float4(8.0), float4(10.0) }) });
 
 struct main0_out
 {
@@ -27,6 +67,8 @@ struct main0_in
 
 fragment main0_out main0(main0_in in [[stage_in]])
 {
+    spvUnsafeArray<Foobar, 2> _75 = spvUnsafeArray<Foobar, 2>({ Foobar{ 10.0, 40.0 }, Foobar{ 90.0, 70.0 } });
+    
     main0_out out = {};
     out.FragColor = ((_37[in.index] + _55[in.index][in.index + 1]) + float4(30.0)) + float4(_75[in.index].a + _75[in.index].b);
     return out;
diff --git a/reference/opt/shaders-msl/frag/constant-composites.frag b/reference/opt/shaders-msl/frag/constant-composites.frag
index 335cbcd796c..e0fa980fb85 100644
--- a/reference/opt/shaders-msl/frag/constant-composites.frag
+++ b/reference/opt/shaders-msl/frag/constant-composites.frag
@@ -1,16 +1,56 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct Foo
 {
     float a;
     float b;
 };
 
-constant float _16[4] = { 1.0, 4.0, 3.0, 2.0 };
-constant Foo _28[2] = { Foo{ 10.0, 20.0 }, Foo{ 30.0, 40.0 } };
+constant spvUnsafeArray<float, 4> _16 = spvUnsafeArray<float, 4>({ 1.0, 4.0, 3.0, 2.0 });
 
 struct main0_out
 {
@@ -24,6 +64,8 @@ struct main0_in
 
 fragment main0_out main0(main0_in in [[stage_in]])
 {
+    spvUnsafeArray<Foo, 2> _28 = spvUnsafeArray<Foo, 2>({ Foo{ 10.0, 20.0 }, Foo{ 30.0, 40.0 } });
+    
     main0_out out = {};
     out.FragColor = float4(_16[in.line]);
     out.FragColor += float4(_28[in.line].a * _28[1 - in.line].a);
diff --git a/reference/opt/shaders-msl/frag/cull-distance-varying.frag b/reference/opt/shaders-msl/frag/cull-distance-varying.frag
new file mode 100644
index 00000000000..708a295710d
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/cull-distance-varying.frag
@@ -0,0 +1,67 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float gl_CullDistance_0 [[user(cull0)]];
+    float gl_CullDistance_1 [[user(cull1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 2> gl_CullDistance = {};
+    gl_CullDistance[0] = in.gl_CullDistance_0;
+    gl_CullDistance[1] = in.gl_CullDistance_1;
+    out.FragColor = float4((1.0 - gl_CullDistance[0]) - gl_CullDistance[1]);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/depth-out-early-frag-tests.frag b/reference/opt/shaders-msl/frag/depth-out-early-frag-tests.frag
new file mode 100644
index 00000000000..21884d81c5b
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/depth-out-early-frag-tests.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 color_out [[color(0)]];
+};
+
+[[ early_fragment_tests ]] fragment main0_out main0()
+{
+    float gl_FragDepth;
+    main0_out out = {};
+    out.color_out = float4(1.0, 0.0, 0.0, 1.0);
+    gl_FragDepth = 0.699999988079071044921875;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/depth-out-no-early-frag-tests.frag b/reference/opt/shaders-msl/frag/depth-out-no-early-frag-tests.frag
new file mode 100644
index 00000000000..57d810fafcb
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/depth-out-no-early-frag-tests.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 color_out [[color(0)]];
+    float gl_FragDepth [[depth(less)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.color_out = float4(1.0, 0.0, 0.0, 1.0);
+    out.gl_FragDepth = 0.699999988079071044921875;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/disable-frag-output.frag-output.frag b/reference/opt/shaders-msl/frag/disable-frag-output.frag-output.frag
new file mode 100644
index 00000000000..63bc45b8af7
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/disable-frag-output.frag-output.frag
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 buf1 [[color(1)]];
+    float4 buf3 [[color(3)]];
+    float4 buf6 [[color(6)]];
+    float4 buf7 [[color(7)]];
+};
+
+fragment main0_out main0()
+{
+    float4 buf0;
+    float4 buf2;
+    float4 buf4;
+    float4 buf5;
+    float gl_FragDepth;
+    int gl_FragStencilRefARB;
+    main0_out out = {};
+    buf0 = float4(0.0, 0.0, 0.0, 1.0);
+    out.buf1 = float4(1.0, 0.0, 0.0, 1.0);
+    buf2 = float4(0.0, 1.0, 0.0, 1.0);
+    out.buf3 = float4(0.0, 0.0, 1.0, 1.0);
+    buf4 = float4(1.0, 0.0, 1.0, 0.5);
+    buf5 = float4(0.25);
+    out.buf6 = float4(0.75);
+    out.buf7 = float4(1.0);
+    gl_FragDepth = 0.89999997615814208984375;
+    gl_FragStencilRefARB = uint(127);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/for-loop-init.frag b/reference/opt/shaders-msl/frag/for-loop-init.frag
index cef6e11d37e..1a42f6e7bec 100644
--- a/reference/opt/shaders-msl/frag/for-loop-init.frag
+++ b/reference/opt/shaders-msl/frag/for-loop-init.frag
@@ -11,63 +11,61 @@ struct main0_out
 fragment main0_out main0()
 {
     main0_out out = {};
-    int _145;
-    for (;;)
+    do
     {
         out.FragColor = 16;
-        _145 = 0;
-        for (; _145 < 25; )
+        for (int _143 = 0; _143 < 25; )
         {
             out.FragColor += 10;
-            _145++;
+            _143++;
             continue;
         }
-        for (int _146 = 1; _146 < 30; )
+        for (int _144 = 1; _144 < 30; )
         {
             out.FragColor += 11;
-            _146++;
+            _144++;
             continue;
         }
-        int _147;
-        _147 = 0;
-        for (; _147 < 20; )
+        int _145;
+        _145 = 0;
+        for (; _145 < 20; )
         {
             out.FragColor += 12;
-            _147++;
+            _145++;
             continue;
         }
-        int _62 = _147 + 3;
+        int _62 = _145 + 3;
         out.FragColor += _62;
         if (_62 == 40)
         {
-            for (int _151 = 0; _151 < 40; )
+            for (int _149 = 0; _149 < 40; )
             {
                 out.FragColor += 13;
-                _151++;
+                _149++;
                 continue;
             }
             break;
         }
         out.FragColor += _62;
-        int2 _148;
-        _148 = int2(0);
-        for (; _148.x < 10; )
+        int2 _146;
+        _146 = int2(0);
+        for (; _146.x < 10; )
         {
-            out.FragColor += _148.y;
-            int2 _144 = _148;
-            _144.x = _148.x + 4;
-            _148 = _144;
+            out.FragColor += _146.y;
+            int2 _142 = _146;
+            _142.x = _146.x + 4;
+            _146 = _142;
             continue;
         }
-        for (int _150 = _62; _150 < 40; )
+        for (int _148 = _62; _148 < 40; )
         {
-            out.FragColor += _150;
-            _150++;
+            out.FragColor += _148;
+            _148++;
             continue;
         }
         out.FragColor += _62;
         break;
-    }
+    } while(false);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/fp16.desktop.invalid.frag b/reference/opt/shaders-msl/frag/fp16.desktop.invalid.frag
deleted file mode 100644
index d9a0390e1f2..00000000000
--- a/reference/opt/shaders-msl/frag/fp16.desktop.invalid.frag
+++ /dev/null
@@ -1,16 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct main0_in
-{
-    half4 v4 [[user(locn3)]];
-};
-
-fragment void main0(main0_in in [[stage_in]])
-{
-    half4 _491;
-    half4 _563 = modf(in.v4, _491);
-}
-
diff --git a/reference/opt/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag b/reference/opt/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag
new file mode 100644
index 00000000000..25c62448943
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag
@@ -0,0 +1,65 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+// The required alignment of a linear texture of R32Uint format.
+constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
+constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
+// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
+#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() +  spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
+
+struct foo_t
+{
+    float x;
+    uint y;
+};
+
+struct main0_out
+{
+    float4 fragColor [[color(0)]];
+};
+
+fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d<uint, access::write> bar [[texture(0)]], device atomic_uint* bar_atomic [[buffer(1)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    bool gl_HelperInvocation = {};
+    gl_HelperInvocation = simd_is_helper_thread();
+    if (!gl_HelperInvocation)
+    {
+        foo.x = 1.0;
+    }
+    uint _91 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    if (int(gl_FragCoord.x) == 3)
+    {
+        gl_HelperInvocation = true, discard_fragment();
+    }
+    int2 _101 = int2(gl_FragCoord.xy);
+    (gl_HelperInvocation ? ((void)0) : bar.write(uint4(1u), uint2(_101)));
+    uint _103 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _108 = (!gl_HelperInvocation ? atomic_fetch_or_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], 62u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], memory_order_relaxed));
+    uint _110 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _112 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _114 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _119 = (!gl_HelperInvocation ? atomic_fetch_max_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], 100u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], memory_order_relaxed));
+    uint _124;
+    if (!gl_HelperInvocation)
+    {
+        do
+        {
+            _124 = 100u;
+        } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], &_124, 42u, memory_order_relaxed, memory_order_relaxed) && _124 == 100u);
+    }
+    else
+    {
+        _124 = atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_101, bar)], memory_order_relaxed);
+    }
+    bool _125 = gl_HelperInvocation;
+    out.fragColor = float4(1.0, float(_125), 0.0, 1.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag b/reference/opt/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag
new file mode 100644
index 00000000000..541096a1fa3
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag
@@ -0,0 +1,40 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct foo
+{
+    int x;
+};
+
+struct main0_out
+{
+    float4 fragColor [[color(0)]];
+};
+
+fragment main0_out main0(device foo& _24 [[buffer(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    bool gl_HelperInvocation = {};
+    gl_HelperInvocation = simd_is_helper_thread();
+    if (gl_FragCoord.y == 7.0)
+    {
+        gl_HelperInvocation = true, discard_fragment();
+    }
+    if (!gl_HelperInvocation)
+    {
+        _24.x = 0;
+    }
+    for (; float(_24.x) < gl_FragCoord.x; )
+    {
+        if (!gl_HelperInvocation)
+        {
+            _24.x++;
+        }
+        continue;
+    }
+    out.fragColor = float4(float(_24.x), 0.0, 0.0, 1.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag b/reference/opt/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag
new file mode 100644
index 00000000000..2b2e0853897
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag
@@ -0,0 +1,64 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+// The required alignment of a linear texture of R32Uint format.
+constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
+constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
+// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
+#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() +  spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
+
+struct foo_t
+{
+    float x;
+    uint y;
+};
+
+struct main0_out
+{
+    float4 fragColor [[color(0)]];
+};
+
+fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d<uint, access::write> bar [[texture(0)]], device atomic_uint* bar_atomic [[buffer(1)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    bool gl_HelperInvocation = {};
+    gl_HelperInvocation = simd_is_helper_thread();
+    if (!gl_HelperInvocation)
+    {
+        foo.x = 1.0;
+    }
+    uint _90 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    if (int(gl_FragCoord.x) == 3)
+    {
+        gl_HelperInvocation = true, discard_fragment();
+    }
+    int2 _100 = int2(gl_FragCoord.xy);
+    (gl_HelperInvocation ? ((void)0) : bar.write(uint4(1u), uint2(_100)));
+    uint _102 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _107 = (!gl_HelperInvocation ? atomic_fetch_or_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], 62u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], memory_order_relaxed));
+    uint _109 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _111 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _113 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _118 = (!gl_HelperInvocation ? atomic_fetch_max_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], 100u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], memory_order_relaxed));
+    uint _123;
+    if (!gl_HelperInvocation)
+    {
+        do
+        {
+            _123 = 100u;
+        } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], &_123, 42u, memory_order_relaxed, memory_order_relaxed) && _123 == 100u);
+    }
+    else
+    {
+        _123 = atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(_100, bar)], memory_order_relaxed);
+    }
+    out.fragColor = float4(1.0, 0.0, 0.0, 1.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/fragment-component-padding.pad-fragment.frag b/reference/opt/shaders-msl/frag/fragment-component-padding.pad-fragment.frag
index 53aafa5f7f2..19840fa434f 100644
--- a/reference/opt/shaders-msl/frag/fragment-component-padding.pad-fragment.frag
+++ b/reference/opt/shaders-msl/frag/fragment-component-padding.pad-fragment.frag
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float4 FragColors_0 [[color(0)]];
@@ -19,7 +60,7 @@ struct main0_in
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    float FragColors[2] = {};
+    spvUnsafeArray<float, 2> FragColors = {};
     float2 FragColor2 = {};
     float3 FragColor3 = {};
     FragColors[0] = in.vColor.x;
@@ -28,8 +69,8 @@ fragment main0_out main0(main0_in in [[stage_in]])
     FragColor3 = in.vColor.zzz;
     out.FragColors_0 = float4(FragColors[0]);
     out.FragColors_1 = float4(FragColors[1]);
-    out.FragColor2 = FragColor2.xyyy;
-    out.FragColor3 = FragColor3.xyzz;
+    out.FragColor2.xy = FragColor2;
+    out.FragColor3.xyz = FragColor3;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/helper-invocation.msl21.frag b/reference/opt/shaders-msl/frag/helper-invocation.msl21.frag
index bacf6fa12a0..9d876df1a23 100644
--- a/reference/opt/shaders-msl/frag/helper-invocation.msl21.frag
+++ b/reference/opt/shaders-msl/frag/helper-invocation.msl21.frag
@@ -16,17 +16,16 @@ struct main0_in
 fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
 {
     main0_out out = {};
-    bool gl_HelperInvocation = simd_is_helper_thread();
-    float4 _51;
-    if (!gl_HelperInvocation)
+    float4 _52;
+    if (!simd_is_helper_thread())
     {
-        _51 = uSampler.sample(uSamplerSmplr, in.vUV, level(0.0));
+        _52 = uSampler.sample(uSamplerSmplr, in.vUV, level(0.0));
     }
     else
     {
-        _51 = float4(1.0);
+        _52 = float4(1.0);
     }
-    out.FragColor = _51;
+    out.FragColor = _52;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag b/reference/opt/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag
new file mode 100644
index 00000000000..0c6e6f49915
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag
@@ -0,0 +1,44 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4 v;
+};
+
+struct spvDescriptorSetBuffer0
+{
+    array<texture2d<float>, 10000> uSamplers [[id(0)]];
+    array<sampler, 10000> uSamplersSmplr [[id(10000)]];
+};
+
+struct spvDescriptorSetBuffer1
+{
+    constant UBO* vs [[id(0)]][10000];
+};
+
+struct spvDescriptorSetBuffer2
+{
+    texture2d<float> uSampler [[id(0)]];
+    sampler uSamplerSmplr [[id(1)]];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], const device spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant spvDescriptorSetBuffer2& spvDescriptorSet2 [[buffer(2)]])
+{
+    main0_out out = {};
+    out.FragColor = (spvDescriptorSet0.uSamplers[9999].sample(spvDescriptorSet0.uSamplersSmplr[9999], in.vUV) + spvDescriptorSet1.vs[5000]->v) + spvDescriptorSet2.uSampler.sample(spvDescriptorSet2.uSamplerSmplr, in.vUV);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/image-query-lod.msl22.frag b/reference/opt/shaders-msl/frag/image-query-lod.msl22.frag
index a2b8262e20e..de7a60cc5a2 100644
--- a/reference/opt/shaders-msl/frag/image-query-lod.msl22.frag
+++ b/reference/opt/shaders-msl/frag/image-query-lod.msl22.frag
@@ -41,30 +41,30 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uSampler2D [
     _111.x = uTextureCube.calculate_clamped_lod(uSampler, in.vUV);
     _111.y = uTextureCube.calculate_unclamped_lod(uSampler, in.vUV);
     out.FragColor += _111;
-    float2 _118;
-    _118.x = uSampler2D.calculate_clamped_lod(uSampler2DSmplr, in.vUV.xy);
-    _118.y = uSampler2D.calculate_unclamped_lod(uSampler2DSmplr, in.vUV.xy);
-    out.FragColor += _118;
-    float2 _123;
-    _123.x = uSampler3D.calculate_clamped_lod(uSampler3DSmplr, in.vUV);
-    _123.y = uSampler3D.calculate_unclamped_lod(uSampler3DSmplr, in.vUV);
-    out.FragColor += _123;
-    float2 _128;
-    _128.x = uSamplerCube.calculate_clamped_lod(uSamplerCubeSmplr, in.vUV);
-    _128.y = uSamplerCube.calculate_unclamped_lod(uSamplerCubeSmplr, in.vUV);
-    out.FragColor += _128;
-    float2 _136;
-    _136.x = uTexture2D.calculate_clamped_lod(uSampler, in.vUV.xy);
-    _136.y = uTexture2D.calculate_unclamped_lod(uSampler, in.vUV.xy);
-    out.FragColor += _136;
-    float2 _143;
-    _143.x = uTexture3D.calculate_clamped_lod(uSampler, in.vUV);
-    _143.y = uTexture3D.calculate_unclamped_lod(uSampler, in.vUV);
-    out.FragColor += _143;
-    float2 _150;
-    _150.x = uTextureCube.calculate_clamped_lod(uSampler, in.vUV);
-    _150.y = uTextureCube.calculate_unclamped_lod(uSampler, in.vUV);
-    out.FragColor += _150;
+    float2 _119;
+    _119.x = uSampler2D.calculate_clamped_lod(uSampler2DSmplr, in.vUV.xy);
+    _119.y = uSampler2D.calculate_unclamped_lod(uSampler2DSmplr, in.vUV.xy);
+    out.FragColor += _119;
+    float2 _124;
+    _124.x = uSampler3D.calculate_clamped_lod(uSampler3DSmplr, in.vUV);
+    _124.y = uSampler3D.calculate_unclamped_lod(uSampler3DSmplr, in.vUV);
+    out.FragColor += _124;
+    float2 _129;
+    _129.x = uSamplerCube.calculate_clamped_lod(uSamplerCubeSmplr, in.vUV);
+    _129.y = uSamplerCube.calculate_unclamped_lod(uSamplerCubeSmplr, in.vUV);
+    out.FragColor += _129;
+    float2 _137;
+    _137.x = uTexture2D.calculate_clamped_lod(uSampler, in.vUV.xy);
+    _137.y = uTexture2D.calculate_unclamped_lod(uSampler, in.vUV.xy);
+    out.FragColor += _137;
+    float2 _144;
+    _144.x = uTexture3D.calculate_clamped_lod(uSampler, in.vUV);
+    _144.y = uTexture3D.calculate_unclamped_lod(uSampler, in.vUV);
+    out.FragColor += _144;
+    float2 _151;
+    _151.x = uTextureCube.calculate_clamped_lod(uSampler, in.vUV);
+    _151.y = uTextureCube.calculate_unclamped_lod(uSampler, in.vUV);
+    out.FragColor += _151;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/in_block.frag b/reference/opt/shaders-msl/frag/in_block.frag
index 8178c9a4ed6..efb0cbd4296 100644
--- a/reference/opt/shaders-msl/frag/in_block.frag
+++ b/reference/opt/shaders-msl/frag/in_block.frag
@@ -16,16 +16,16 @@ struct main0_out
 
 struct main0_in
 {
-    float4 VertexOut_color [[user(locn2)]];
-    float4 VertexOut_color2 [[user(locn3)]];
+    float4 inputs_color [[user(locn2)]];
+    float4 inputs_color2 [[user(locn3)]];
 };
 
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
     VertexOut inputs = {};
-    inputs.color = in.VertexOut_color;
-    inputs.color2 = in.VertexOut_color2;
+    inputs.color = in.inputs_color;
+    inputs.color2 = in.inputs_color2;
     out.FragColor = inputs.color + inputs.color2;
     return out;
 }
diff --git a/reference/opt/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag b/reference/opt/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag
new file mode 100644
index 00000000000..7b011ffb580
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag
@@ -0,0 +1,105 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float foos_0_a [[user(locn1)]];
+    float foos_0_b [[user(locn2)]];
+    float foos_1_a [[user(locn3)]];
+    float foos_1_b [[user(locn4)]];
+    float foos_2_a [[user(locn5)]];
+    float foos_2_b [[user(locn6)]];
+    float foos_3_a [[user(locn7)]];
+    float foos_3_b [[user(locn8)]];
+    float bars_0_a [[user(locn10)]];
+    float bars_0_b [[user(locn11)]];
+    float bars_1_a [[user(locn12)]];
+    float bars_1_b [[user(locn13)]];
+    float bars_2_a [[user(locn14)]];
+    float bars_2_b [[user(locn15)]];
+    float bars_3_a [[user(locn16)]];
+    float bars_3_b [[user(locn17)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<Foo, 4> foos = {};
+    spvUnsafeArray<Foo, 4> bars = {};
+    foos[0].a = in.foos_0_a;
+    foos[0].b = in.foos_0_b;
+    foos[1].a = in.foos_1_a;
+    foos[1].b = in.foos_1_b;
+    foos[2].a = in.foos_2_a;
+    foos[2].b = in.foos_2_b;
+    foos[3].a = in.foos_3_a;
+    foos[3].b = in.foos_3_b;
+    bars[0].a = in.bars_0_a;
+    bars[0].b = in.bars_0_b;
+    bars[1].a = in.bars_1_a;
+    bars[1].b = in.bars_1_b;
+    bars[2].a = in.bars_2_a;
+    bars[2].b = in.bars_2_b;
+    bars[3].a = in.bars_3_a;
+    bars[3].b = in.bars_3_b;
+    out.FragColor.x = foos[0].a;
+    out.FragColor.y = foos[1].b;
+    out.FragColor.z = foos[2].a;
+    out.FragColor.w = bars[3].b;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/in_mat.frag b/reference/opt/shaders-msl/frag/in_mat.frag
index 83ed9b5ea32..5d0b44eb525 100644
--- a/reference/opt/shaders-msl/frag/in_mat.frag
+++ b/reference/opt/shaders-msl/frag/in_mat.frag
@@ -27,11 +27,11 @@ fragment main0_out main0(main0_in in [[stage_in]], texturecube<float> samplerCol
     inInvModelView[1] = in.inInvModelView_1;
     inInvModelView[2] = in.inInvModelView_2;
     inInvModelView[3] = in.inInvModelView_3;
-    float4 _31 = inInvModelView * float4(reflect(normalize(in.inPos), normalize(in.inNormal)), 0.0);
+    float4 _31 = inInvModelView * float4(reflect(fast::normalize(in.inPos), fast::normalize(in.inNormal)), 0.0);
     float _33 = _31.x;
-    float3 _59 = float3(_33, _31.yz);
-    _59.x = _33 * (-1.0);
-    out.outFragColor = samplerColor.sample(samplerColorSmplr, _59, bias(in.inLodBias));
+    float3 _36 = float3(_33, _31.yz);
+    _36.x = _33 * (-1.0);
+    out.outFragColor = samplerColor.sample(samplerColorSmplr, _36, bias(in.inLodBias));
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag b/reference/opt/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag
new file mode 100644
index 00000000000..52a78cf93ac
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d_ms_array<float> uSubpass0 [[texture(0)]], texture2d_ms_array<float> uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]], uint gl_Layer [[render_target_array_index]])
+{
+    main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
+    out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), gl_Layer, 1) + uSubpass1.read(uint2(gl_FragCoord.xy), gl_Layer, 2)) + uSubpass0.read(uint2(gl_FragCoord.xy), gl_Layer, gl_SampleID);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/input-attachment-ms.frag b/reference/opt/shaders-msl/frag/input-attachment-ms.frag
index 906cabbf474..0c47348d6e8 100644
--- a/reference/opt/shaders-msl/frag/input-attachment-ms.frag
+++ b/reference/opt/shaders-msl/frag/input-attachment-ms.frag
@@ -11,6 +11,7 @@ struct main0_out
 fragment main0_out main0(texture2d_ms<float> uSubpass0 [[texture(0)]], texture2d_ms<float> uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
     out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), 1) + uSubpass1.read(uint2(gl_FragCoord.xy), 2)) + uSubpass0.read(uint2(gl_FragCoord.xy), gl_SampleID);
     return out;
 }
diff --git a/reference/opt/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag b/reference/opt/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag
new file mode 100644
index 00000000000..e27b24adf39
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(constant uint* spvViewMask [[buffer(24)]], texture2d_ms_array<float> uSubpass0 [[texture(0)]], texture2d_ms_array<float> uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]], uint gl_ViewIndex [[render_target_array_index]])
+{
+    main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
+    gl_ViewIndex += spvViewMask[0];
+    out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), gl_ViewIndex, 1) + uSubpass1.read(uint2(gl_FragCoord.xy), gl_ViewIndex, 2)) + uSubpass0.read(uint2(gl_FragCoord.xy), gl_ViewIndex, gl_SampleID);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/input-attachment.arrayed-subpass.frag b/reference/opt/shaders-msl/frag/input-attachment.arrayed-subpass.frag
new file mode 100644
index 00000000000..5d5ee43104d
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/input-attachment.arrayed-subpass.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d_array<float> uSubpass0 [[texture(0)]], texture2d_array<float> uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]], uint gl_Layer [[render_target_array_index]])
+{
+    main0_out out = {};
+    out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), gl_Layer) + uSubpass1.read(uint2(gl_FragCoord.xy), gl_Layer);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/input-attachment.frag b/reference/opt/shaders-msl/frag/input-attachment.frag
index 122190648a2..790dce3c1bf 100644
--- a/reference/opt/shaders-msl/frag/input-attachment.frag
+++ b/reference/opt/shaders-msl/frag/input-attachment.frag
@@ -11,7 +11,7 @@ struct main0_out
 fragment main0_out main0(texture2d<float> uSubpass0 [[texture(0)]], texture2d<float> uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
-    out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), 0) + uSubpass1.read(uint2(gl_FragCoord.xy), 0);
+    out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy)) + uSubpass1.read(uint2(gl_FragCoord.xy));
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/input-attachment.multiview.frag b/reference/opt/shaders-msl/frag/input-attachment.multiview.frag
new file mode 100644
index 00000000000..7cf06d2d6b9
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/input-attachment.multiview.frag
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(constant uint* spvViewMask [[buffer(24)]], texture2d_array<float> uSubpass0 [[texture(0)]], texture2d_array<float> uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]], uint gl_ViewIndex [[render_target_array_index]])
+{
+    main0_out out = {};
+    gl_ViewIndex += spvViewMask[0];
+    out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), gl_ViewIndex) + uSubpass1.read(uint2(gl_FragCoord.xy), gl_ViewIndex);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/interpolation-qualifiers-block.frag b/reference/opt/shaders-msl/frag/interpolation-qualifiers-block.frag
index 2b420195ffc..b3bab04319e 100644
--- a/reference/opt/shaders-msl/frag/interpolation-qualifiers-block.frag
+++ b/reference/opt/shaders-msl/frag/interpolation-qualifiers-block.frag
@@ -21,27 +21,27 @@ struct main0_out
 
 struct main0_in
 {
-    float2 Input_v0 [[user(locn0), centroid_no_perspective]];
-    float2 Input_v1 [[user(locn1), centroid_no_perspective]];
-    float3 Input_v2 [[user(locn2), centroid_no_perspective]];
-    float4 Input_v3 [[user(locn3), centroid_no_perspective]];
-    float Input_v4 [[user(locn4), centroid_no_perspective]];
-    float Input_v5 [[user(locn5), centroid_no_perspective]];
-    float Input_v6 [[user(locn6), centroid_no_perspective]];
+    float2 inp_v0 [[user(locn0), centroid_no_perspective]];
+    float2 inp_v1 [[user(locn1), centroid_no_perspective]];
+    float3 inp_v2 [[user(locn2), centroid_no_perspective]];
+    float4 inp_v3 [[user(locn3), centroid_no_perspective]];
+    float inp_v4 [[user(locn4), centroid_no_perspective]];
+    float inp_v5 [[user(locn5), centroid_no_perspective]];
+    float inp_v6 [[user(locn6), centroid_no_perspective]];
 };
 
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
     Input inp = {};
-    inp.v0 = in.Input_v0;
-    inp.v1 = in.Input_v1;
-    inp.v2 = in.Input_v2;
-    inp.v3 = in.Input_v3;
-    inp.v4 = in.Input_v4;
-    inp.v5 = in.Input_v5;
-    inp.v6 = in.Input_v6;
-    out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, ((inp.v3.w * inp.v4) + inp.v5) - inp.v6);
+    inp.v0 = in.inp_v0;
+    inp.v1 = in.inp_v1;
+    inp.v2 = in.inp_v2;
+    inp.v3 = in.inp_v3;
+    inp.v4 = in.inp_v4;
+    inp.v5 = in.inp_v5;
+    inp.v6 = in.inp_v6;
+    out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, fma(inp.v3.w, inp.v4, inp.v5) - inp.v6);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/interpolation-qualifiers.frag b/reference/opt/shaders-msl/frag/interpolation-qualifiers.frag
index aff6e1b0f70..208e8806b24 100644
--- a/reference/opt/shaders-msl/frag/interpolation-qualifiers.frag
+++ b/reference/opt/shaders-msl/frag/interpolation-qualifiers.frag
@@ -22,7 +22,7 @@ struct main0_in
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    out.FragColor = float4(in.v0.x + in.v1.y, in.v2.xy, ((in.v3.w * in.v4) + in.v5) - in.v6);
+    out.FragColor = float4(in.v0.x + in.v1.y, in.v2.xy, fma(in.v3.w, in.v4, in.v5) - in.v6);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/lut-promotion.frag b/reference/opt/shaders-msl/frag/lut-promotion.frag
index c9169b790d3..e24bcd6d111 100644
--- a/reference/opt/shaders-msl/frag/lut-promotion.frag
+++ b/reference/opt/shaders-msl/frag/lut-promotion.frag
@@ -1,13 +1,52 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
-constant float _16[16] = { 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 };
-constant float4 _60[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) };
-constant float4 _104[4] = { float4(20.0), float4(30.0), float4(50.0), float4(60.0) };
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 16> _16 = spvUnsafeArray<float, 16>({ 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 });
+constant spvUnsafeArray<float4, 4> _60 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) });
+constant spvUnsafeArray<float4, 4> _104 = spvUnsafeArray<float4, 4>({ float4(20.0), float4(30.0), float4(50.0), float4(60.0) });
 
 struct main0_out
 {
@@ -19,19 +58,6 @@ struct main0_in
     int index [[user(locn0)]];
 };
 
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
@@ -53,7 +79,7 @@ fragment main0_out main0(main0_in in [[stage_in]])
     {
         out.FragColor += _60[in.index & 1].x;
     }
-    float4 foobar[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) };
+    spvUnsafeArray<float4, 4> foobar = spvUnsafeArray<float4, 4>({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) });
     if (_63)
     {
         foobar[1].z = 20.0;
diff --git a/reference/opt/shaders-msl/frag/modf-access-tracking-function.frag b/reference/opt/shaders-msl/frag/modf-access-tracking-function.frag
new file mode 100644
index 00000000000..612dd4e92d1
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/modf-access-tracking-function.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 vo0 [[color(0)]];
+    float4 vo1 [[color(1)]];
+};
+
+struct main0_in
+{
+    float4 v [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float4 _25 = modf(in.v, out.vo1);
+    out.vo0 = _25;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/mrt-array.frag b/reference/opt/shaders-msl/frag/mrt-array.frag
index d7cea6baf94..d7fccdedc86 100644
--- a/reference/opt/shaders-msl/frag/mrt-array.frag
+++ b/reference/opt/shaders-msl/frag/mrt-array.frag
@@ -1,10 +1,56 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
+template<typename Tx, typename Ty>
+inline Tx mod(Tx x, Ty y)
+{
+    return x - y * floor(x / y);
+}
+
 struct main0_out
 {
     float4 FragColor_0 [[color(0)]];
@@ -19,17 +65,10 @@ struct main0_in
     float4 vB [[user(locn1)]];
 };
 
-// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
-template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
-{
-    return x - y * floor(x / y);
-}
-
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    float4 FragColor[4] = {};
+    spvUnsafeArray<float4, 4> FragColor = {};
     FragColor[0] = mod(in.vA, in.vB);
     FragColor[1] = in.vA + in.vB;
     FragColor[2] = in.vA - in.vB;
diff --git a/reference/opt/shaders-msl/frag/nonuniform-qualifier.msl2.frag b/reference/opt/shaders-msl/frag/nonuniform-qualifier.msl2.frag
index 14a6999f339..bdd8d1419be 100644
--- a/reference/opt/shaders-msl/frag/nonuniform-qualifier.msl2.frag
+++ b/reference/opt/shaders-msl/frag/nonuniform-qualifier.msl2.frag
@@ -39,12 +39,15 @@ fragment main0_out main0(main0_in in [[stage_in]], constant UBO* ubos_0 [[buffer
     };
 
     main0_out out = {};
-    int _24 = in.vIndex + 10;
-    int _35 = in.vIndex + 40;
-    out.FragColor = uSamplers[_24].sample(uSamps[_35], in.vUV);
-    out.FragColor = uCombinedSamplers[_24].sample(uCombinedSamplersSmplr[_24], in.vUV);
-    out.FragColor += ubos[(in.vIndex + 20)]->v[_35];
-    out.FragColor += ssbos[(in.vIndex + 50)]->v[in.vIndex + 60];
+    int _25 = in.vIndex + 10;
+    int _37 = in.vIndex + 40;
+    out.FragColor = uSamplers[_25].sample(uSamps[_37], in.vUV);
+    out.FragColor = uCombinedSamplers[_25].sample(uCombinedSamplersSmplr[_25], in.vUV);
+    int _69 = in.vIndex + 20;
+    out.FragColor += ubos[_69]->v[_37];
+    int _87 = in.vIndex + 50;
+    int _91 = in.vIndex + 60;
+    out.FragColor += ssbos[_87]->v[_91];
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/packed-expression-vector-shuffle.frag b/reference/opt/shaders-msl/frag/packed-expression-vector-shuffle.frag
index dd319af5552..a12c44912f7 100644
--- a/reference/opt/shaders-msl/frag/packed-expression-vector-shuffle.frag
+++ b/reference/opt/shaders-msl/frag/packed-expression-vector-shuffle.frag
@@ -17,7 +17,11 @@ struct main0_out
 fragment main0_out main0(constant UBO& _15 [[buffer(0)]])
 {
     main0_out out = {};
-    out.FragColor = float4(_15.color[0], _15.color[1], _15.color[2], float4(1.0).w);
+    float4 _36 = float4(1.0);
+    _36.x = _15.color[0];
+    _36.y = _15.color[1];
+    _36.z = _15.color[2];
+    out.FragColor = _36;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/packing-test-3.frag b/reference/opt/shaders-msl/frag/packing-test-3.frag
index 8cc5f5c75d7..1f696e94ef0 100644
--- a/reference/opt/shaders-msl/frag/packing-test-3.frag
+++ b/reference/opt/shaders-msl/frag/packing-test-3.frag
@@ -19,10 +19,10 @@ struct main0_out
     float4 _entryPointOutput [[color(0)]];
 };
 
-fragment main0_out main0(constant CB0& _26 [[buffer(0)]])
+fragment main0_out main0(constant CB0& _RESERVED_IDENTIFIER_FIXUP_24 [[buffer(0)]])
 {
     main0_out out = {};
-    out._entryPointOutput = float4(_26.CB0[1].position[0], _26.CB0[1].position[1], _26.CB0[1].position[2], _26.CB0[1].radius);
+    out._entryPointOutput = float4(_RESERVED_IDENTIFIER_FIXUP_24.CB0[1].position[0], _RESERVED_IDENTIFIER_FIXUP_24.CB0[1].position[1], _RESERVED_IDENTIFIER_FIXUP_24.CB0[1].position[2], _RESERVED_IDENTIFIER_FIXUP_24.CB0[1].radius);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
new file mode 100644
index 00000000000..1bfaff53bf8
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
@@ -0,0 +1,53 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+// The required alignment of a linear texture of R32Uint format.
+constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
+constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
+// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
+#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() +  spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
+
+struct Buffer3
+{
+    int baz;
+};
+
+struct Buffer
+{
+    int foo;
+    uint bar;
+};
+
+struct Buffer2
+{
+    uint quux;
+};
+
+struct spvDescriptorSetBuffer0
+{
+    device Buffer3* m_9 [[id(0)]];
+    texture2d<float, access::write> img4 [[id(1)]];
+    texture2d<float, access::write> img [[id(2), raster_order_group(0)]];
+    texture2d<float> img3 [[id(3), raster_order_group(0)]];
+    texture2d<uint> img2 [[id(4), raster_order_group(0)]];
+    device atomic_uint* img2_atomic [[id(5), raster_order_group(0)]];
+    volatile device Buffer* m_42 [[id(6), raster_order_group(0)]];
+    device Buffer2* m_52 [[id(7), raster_order_group(0)]];
+};
+
+fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]])
+{
+    (*spvDescriptorSet0.m_9).baz = 0;
+    spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+    spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0)));
+    uint _39 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.img2_atomic[spvImage2DAtomicCoord(int2(0), spvDescriptorSet0.img2)], 1u, memory_order_relaxed);
+    (*spvDescriptorSet0.m_42).foo += 42;
+    uint _55 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_42).bar, (*spvDescriptorSet0.m_52).quux, memory_order_relaxed);
+}
+
diff --git a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
new file mode 100644
index 00000000000..6a300e8c589
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
@@ -0,0 +1,41 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+// The required alignment of a linear texture of R32Uint format.
+constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
+constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
+// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
+#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() +  spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
+
+struct Buffer3
+{
+    int baz;
+};
+
+struct Buffer
+{
+    int foo;
+    uint bar;
+};
+
+struct Buffer2
+{
+    uint quux;
+};
+
+fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _42 [[buffer(2), raster_order_group(0)]], device Buffer2& _52 [[buffer(3), raster_order_group(0)]], texture2d<float, access::write> img4 [[texture(0)]], texture2d<float, access::write> img [[texture(1), raster_order_group(0)]], texture2d<float> img3 [[texture(2), raster_order_group(0)]], texture2d<uint> img2 [[texture(3), raster_order_group(0)]], device atomic_uint* img2_atomic [[buffer(1), raster_order_group(0)]])
+{
+    _9.baz = 0;
+    img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+    img.write(img3.read(uint2(int2(0))), uint2(int2(0)));
+    uint _39 = atomic_fetch_add_explicit((device atomic_uint*)&img2_atomic[spvImage2DAtomicCoord(int2(0), img2)], 1u, memory_order_relaxed);
+    _42.foo += 42;
+    uint _55 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_42.bar, _52.quux, memory_order_relaxed);
+}
+
diff --git a/reference/opt/shaders-msl/frag/post-depth-coverage.ios.msl2.frag b/reference/opt/shaders-msl/frag/post-depth-coverage.ios.msl2.frag
new file mode 100644
index 00000000000..3b2885e2e2a
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/post-depth-coverage.ios.msl2.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+[[ early_fragment_tests ]] fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask, post_depth_coverage]])
+{
+    main0_out out = {};
+    out.FragColor = float4(float(gl_SampleMaskIn));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/post-depth-coverage.msl23.frag b/reference/opt/shaders-msl/frag/post-depth-coverage.msl23.frag
new file mode 100644
index 00000000000..3b2885e2e2a
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/post-depth-coverage.msl23.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+[[ early_fragment_tests ]] fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask, post_depth_coverage]])
+{
+    main0_out out = {};
+    out.FragColor = float4(float(gl_SampleMaskIn));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag b/reference/opt/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag
new file mode 100644
index 00000000000..859ace2cd48
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag
@@ -0,0 +1,45 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+#if __METAL_VERSION__ >= 230
+#include <metal_raytracing>
+using namespace metal::raytracing;
+#endif
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 outColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 inPos [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], raytracing::acceleration_structure<raytracing::instancing> topLevelAS [[buffer(0)]])
+{
+    main0_out out = {};
+    raytracing::intersection_query<raytracing::instancing, raytracing::triangle_data> rayQuery;
+    rayQuery.reset(ray(float3((in.inPos.xy * 4.0) - float2(2.0), 1.0), float3(0.0, 0.0, -1.0), 0.001000000047497451305389404296875, 2.0), topLevelAS, intersection_params());
+    for (;;)
+    {
+        bool _88 = rayQuery.next();
+        if (_88)
+        {
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+    uint _92 = uint(rayQuery.get_committed_intersection_type());
+    if (_92 == 0u)
+    {
+        discard_fragment();
+    }
+    out.outColor = in.inPos;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/read-cull-clip-distance-in-function.frag b/reference/opt/shaders-msl/frag/read-cull-clip-distance-in-function.frag
new file mode 100644
index 00000000000..3c9757ebd26
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/read-cull-clip-distance-in-function.frag
@@ -0,0 +1,72 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+    float gl_CullDistance_0 [[user(cull0)]];
+    float gl_CullDistance_1 [[user(cull1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 2> gl_CullDistance = {};
+    spvUnsafeArray<float, 2> gl_ClipDistance = {};
+    gl_CullDistance[0] = in.gl_CullDistance_0;
+    gl_CullDistance[1] = in.gl_CullDistance_1;
+    gl_ClipDistance[0] = in.gl_ClipDistance_0;
+    gl_ClipDistance[1] = in.gl_ClipDistance_1;
+    out.FragColor = float4(gl_CullDistance[0], gl_CullDistance[1], gl_ClipDistance[0], gl_ClipDistance[1]);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/return-value-after-discard-terminator.frag b/reference/opt/shaders-msl/frag/return-value-after-discard-terminator.frag
new file mode 100644
index 00000000000..92097dfa4ad
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/return-value-after-discard-terminator.frag
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct buff_t
+{
+    int m0[1024];
+};
+
+struct main0_out
+{
+    float4 frag_clr [[color(0)]];
+};
+
+fragment main0_out main0(device buff_t& buff [[buffer(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    int4 _16 = int4(gl_FragCoord);
+    out.frag_clr = float4(0.0, 0.0, 1.0, 1.0);
+    buff.m0[(_16.y * 32) + _16.x] = 1;
+    discard_fragment();
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag b/reference/opt/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag
new file mode 100644
index 00000000000..4f7e9b53b24
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uTexture [[texture(0)]], sampler uSampler [[sampler(0)]], sampler uSamplerShadow [[sampler(1)]])
+{
+    main0_out out = {};
+    out.FragColor = float4(uTexture.sample(uSampler, in.vUV.xy)).x;
+    out.FragColor += uTexture.sample_compare(uSamplerShadow, in.vUV.xy, in.vUV.z);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag b/reference/opt/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag
new file mode 100644
index 00000000000..626fe4c79c2
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+    uint gl_SampleMask [[sample_mask]];
+};
+
+fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    out.FragColor = float4(1.0);
+    out.gl_SampleMask = (gl_SampleMaskIn & 0x22 & (1 << gl_SampleID));
+    out.gl_SampleMask &= 0x22;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag b/reference/opt/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag
new file mode 100644
index 00000000000..f478901b6be
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+    uint gl_SampleMask [[sample_mask]];
+};
+
+fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask]])
+{
+    main0_out out = {};
+    out.FragColor = float4(1.0);
+    out.gl_SampleMask = (gl_SampleMaskIn & 0x22);
+    out.gl_SampleMask &= 0x22;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/16bit-constants.frag b/reference/opt/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag
similarity index 53%
rename from reference/shaders-msl/frag/16bit-constants.frag
rename to reference/opt/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag
index 56c7ea5df4a..d04f2033bb5 100644
--- a/reference/shaders-msl/frag/16bit-constants.frag
+++ b/reference/opt/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag
@@ -5,17 +5,15 @@ using namespace metal;
 
 struct main0_out
 {
-    half foo [[color(0)]];
-    short bar [[color(1)]];
-    ushort baz [[color(2)]];
+    float4 FragColor [[color(0)]];
+    uint gl_SampleMask [[sample_mask]];
 };
 
 fragment main0_out main0()
 {
     main0_out out = {};
-    out.foo = half(1.0);
-    out.bar = 2;
-    out.baz = 3u;
+    out.FragColor = float4(1.0);
+    out.gl_SampleMask = 0x22;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/sample-mask.fixed-sample-mask.frag b/reference/opt/shaders-msl/frag/sample-mask.fixed-sample-mask.frag
new file mode 100644
index 00000000000..76306b5ade8
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sample-mask.fixed-sample-mask.frag
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+    uint gl_SampleMask [[sample_mask]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float4(1.0);
+    out.gl_SampleMask = 0;
+    out.gl_SampleMask &= 0x22;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag b/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag
new file mode 100644
index 00000000000..5df60f909e5
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d_array<float> tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
+    float3 _28 = float3(gl_FragCoord.xy, float(gl_SampleID));
+    out.FragColor = tex.sample(texSmplr, _28.xy, uint(round(_28.z)));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag b/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag
new file mode 100644
index 00000000000..386230ef0c3
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float foo [[user(locn0), sample_perspective]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d_array<float> tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
+    float3 _26 = float3(gl_FragCoord.xy, in.foo);
+    out.FragColor = tex.sample(texSmplr, _26.xy, uint(round(_26.z)));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag b/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag
new file mode 100644
index 00000000000..f8f357fe7c9
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d<float> tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
+    float2 gl_SamplePosition = get_sample_position(gl_SampleID);
+    out.FragColor = tex.sample(texSmplr, (gl_FragCoord.xy - gl_SamplePosition));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag b/reference/opt/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag
new file mode 100644
index 00000000000..1ed8148d4c0
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d<float> tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
+    out.FragColor = tex.sample(texSmplr, gl_FragCoord.xy);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag b/reference/opt/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag
new file mode 100644
index 00000000000..70278b12907
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float vTex [[user(locn0), flat]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor += ((uSampler.sample(uSamplerSmplr, float2(in.vTex, 0.5), bias(2.0)) + uSampler.sample(uSamplerSmplr, float2(in.vTex, 0.5), level(3.0))) + uSampler.sample(uSamplerSmplr, float2(in.vTex, 0.5), gradient2d(5.0, 8.0)));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag b/reference/opt/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag
new file mode 100644
index 00000000000..6aaffe532ba
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], depth2d_array<float> uTex [[texture(0)]], sampler uShadow [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = uTex.sample_compare(uShadow, float2(in.vUV.x, 0.5), uint(round(in.vUV.y)), in.vUV.z, bias(1.0));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag b/reference/opt/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag
new file mode 100644
index 00000000000..07845691942
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], depth2d_array<float> uTex [[texture(0)]], sampler uShadow [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = uTex.sample_compare(uShadow, in.vUV.xy, uint(round(in.vUV.z)), in.vUV.w, level(0)) + uTex.sample_compare(uShadow, in.vUV.xy, uint(round(in.vUV.z)), in.vUV.w, gradient2d(float2(1.0), float2(1.0)));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag b/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag
index 592d445810f..fc908cb3e18 100644
--- a/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag
+++ b/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag
@@ -5,18 +5,8 @@
 
 using namespace metal;
 
-struct main0_out
-{
-    float FragColor [[color(0)]];
-};
-
-struct main0_in
-{
-    float3 vRefract [[user(locn0)]];
-};
-
 template<typename T>
-inline T spvReflect(T i, T n)
+[[clang::optnone]] T spvReflect(T i, T n)
 {
     return i - T(2) * i * n * n;
 }
@@ -37,6 +27,16 @@ inline T spvRefract(T i, T n, T eta)
     }
 }
 
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vRefract [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag b/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag
index 30b28d21311..fff932c0371 100644
--- a/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag
+++ b/reference/opt/shaders-msl/frag/shader-arithmetic-8bit.frag
@@ -35,43 +35,19 @@ struct main0_in
 fragment main0_out main0(main0_in in [[stage_in]], device SSBO& ssbo [[buffer(0)]], constant Push& registers [[buffer(1)]], constant UBO& ubo [[buffer(2)]])
 {
     main0_out out = {};
-    short _196 = 10;
-    int _197 = 20;
-    char2 _198 = as_type<char2>(_196);
-    char4 _199 = as_type<char4>(_197);
-    _196 = as_type<short>(_198);
-    _197 = as_type<int>(_199);
-    ssbo.i8[0] = _199.x;
-    ssbo.i8[1] = _199.y;
-    ssbo.i8[2] = _199.z;
-    ssbo.i8[3] = _199.w;
-    ushort _220 = 10u;
-    uint _221 = 20u;
-    uchar2 _222 = as_type<uchar2>(_220);
-    uchar4 _223 = as_type<uchar4>(_221);
-    _220 = as_type<ushort>(_222);
-    _221 = as_type<uint>(_223);
-    ssbo.u8[0] = _223.x;
-    ssbo.u8[1] = _223.y;
-    ssbo.u8[2] = _223.z;
-    ssbo.u8[3] = _223.w;
-    char4 _246 = char4(in.vColor);
-    char4 _244 = _246;
-    _244 += char4(registers.i8);
-    _244 += char4(-40);
-    _244 += char4(-50);
-    _244 += char4(char(10), char(20), char(30), char(40));
-    _244 += char4(ssbo.i8[4]);
-    _244 += char4(ubo.i8);
-    out.FragColorInt = int4(_244);
-    uchar4 _271 = uchar4(_246);
-    _271 += uchar4(registers.u8);
-    _271 += uchar4(216);
-    _271 += uchar4(206);
-    _271 += uchar4(uchar(10), uchar(20), uchar(30), uchar(40));
-    _271 += uchar4(ssbo.u8[4]);
-    _271 += uchar4(ubo.u8);
-    out.FragColorUint = uint4(_271);
+    char4 _204 = as_type<char4>(20);
+    ssbo.i8[0] = _204.x;
+    ssbo.i8[1] = _204.y;
+    ssbo.i8[2] = _204.z;
+    ssbo.i8[3] = _204.w;
+    uchar4 _229 = as_type<uchar4>(20u);
+    ssbo.u8[0] = _229.x;
+    ssbo.u8[1] = _229.y;
+    ssbo.u8[2] = _229.z;
+    ssbo.u8[3] = _229.w;
+    char4 _249 = char4(in.vColor);
+    out.FragColorInt = int4((((((_249 + char4(registers.i8)) + char4(-40)) + char4(-50)) + char4(char(10), char(20), char(30), char(40))) + char4(ssbo.i8[4])) + char4(ubo.i8));
+    out.FragColorUint = uint4((((((uchar4(_249) + uchar4(registers.u8)) + uchar4(216)) + uchar4(206)) + uchar4(uchar(10), uchar(20), uchar(30), uchar(40))) + uchar4(ssbo.u8[4])) + uchar4(ubo.u8));
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/shadow-compare-global-alias.invalid.frag b/reference/opt/shaders-msl/frag/shadow-compare-global-alias.invalid.frag
deleted file mode 100644
index a58f13fc402..00000000000
--- a/reference/opt/shaders-msl/frag/shadow-compare-global-alias.invalid.frag
+++ /dev/null
@@ -1,27 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct main0_out
-{
-    float FragColor [[color(0)]];
-};
-
-struct main0_in
-{
-    float3 vUV [[user(locn0)]];
-};
-
-fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uTex [[texture(0)]], depth2d<float> uSampler [[texture(1)]], sampler uSamp [[sampler(0)]], sampler uSamplerSmplr [[sampler(1)]])
-{
-    main0_out out = {};
-    out.FragColor = uSampler.sample_compare(uSamplerSmplr, in.vUV.xy, in.vUV.z);
-    out.FragColor += uTex.sample_compare(uSamp, in.vUV.xy, in.vUV.z);
-    out.FragColor += uTex.sample_compare(uSamp, in.vUV.xy, in.vUV.z);
-    out.FragColor += uSampler.sample_compare(uSamplerSmplr, in.vUV.xy, in.vUV.z);
-    out.FragColor += uTex.sample_compare(uSamp, in.vUV.xy, in.vUV.z);
-    out.FragColor += uSampler.sample_compare(uSamplerSmplr, in.vUV.xy, in.vUV.z);
-    return out;
-}
-
diff --git a/reference/opt/shaders-msl/frag/subgroup-globals-extract.msl22.frag b/reference/opt/shaders-msl/frag/subgroup-globals-extract.msl22.frag
new file mode 100644
index 00000000000..b2cfddf6429
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/subgroup-globals-extract.msl22.frag
@@ -0,0 +1,56 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    ballot &= mask;
+    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
+}
+
+inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    ballot &= mask;
+    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
+}
+
+inline uint spvPopCount4(uint4 ballot)
+{
+    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
+}
+
+inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+struct main0_out
+{
+    uint2 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]], uint gl_SubgroupSize [[threads_per_simdgroup]])
+{
+    main0_out out = {};
+    out.FragColor.x = (((spvSubgroupBallotFindLSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize) + spvSubgroupBallotFindMSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize)) + spvSubgroupBallotBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize)) + spvSubgroupBallotInclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID)) + spvSubgroupBallotExclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/switch-unreachable-break.frag b/reference/opt/shaders-msl/frag/switch-unreachable-break.frag
new file mode 100644
index 00000000000..8d7903b79b4
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/switch-unreachable-break.frag
@@ -0,0 +1,43 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    int cond;
+    int cond2;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(constant UBO& _15 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = float4(10.0);
+    switch (_15.cond)
+    {
+        case 1:
+        {
+            if (_15.cond2 < 50)
+            {
+                break;
+            }
+            else
+            {
+                discard_fragment();
+            }
+            break; // unreachable workaround
+        }
+        default:
+        {
+            out.FragColor = float4(20.0);
+            break;
+        }
+    }
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/swizzle.frag b/reference/opt/shaders-msl/frag/swizzle.frag
index 7a0494e064a..cda23096c54 100644
--- a/reference/opt/shaders-msl/frag/swizzle.frag
+++ b/reference/opt/shaders-msl/frag/swizzle.frag
@@ -17,9 +17,11 @@ struct main0_in
 fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> samp [[texture(0)]], sampler sampSmplr [[sampler(0)]])
 {
     main0_out out = {};
-    out.FragColor = float4(samp.sample(sampSmplr, in.vUV).xyz, 1.0);
-    out.FragColor = float4(samp.sample(sampSmplr, in.vUV).xz, 1.0, 4.0);
-    out.FragColor = float4(samp.sample(sampSmplr, in.vUV).xx, samp.sample(sampSmplr, (in.vUV + float2(0.100000001490116119384765625))).yy);
+    float4 _19 = samp.sample(sampSmplr, in.vUV);
+    float _23 = _19.x;
+    out.FragColor = float4(_23, _19.yz, 1.0);
+    out.FragColor = float4(_23, _19.z, 1.0, 4.0);
+    out.FragColor = float4(_23, _23, samp.sample(sampSmplr, (in.vUV + float2(0.100000001490116119384765625))).yy);
     out.FragColor = float4(in.vNormal, 1.0);
     out.FragColor = float4(in.vNormal + float3(1.7999999523162841796875), 1.0);
     out.FragColor = float4(in.vUV, in.vUV + float2(1.7999999523162841796875));
diff --git a/reference/opt/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag b/reference/opt/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag
new file mode 100644
index 00000000000..98b9bb7ef80
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d<float> uTexture [[texture(0)]], texture2d<float> uTexture2 [[texture(1)]], sampler uTextureSmplr [[sampler(0)]], sampler uTexture2Smplr [[sampler(1)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    out.FragColor = uTexture.read(uint2(int2(gl_FragCoord.xy)) + uint2(int2(1)), 0);
+    out.FragColor += uTexture2.read(uint2(uint(int(gl_FragCoord.x)), 0) + uint2(uint(-1), 0), 0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/texture-cube-array.frag b/reference/opt/shaders-msl/frag/texture-cube-array.frag
new file mode 100644
index 00000000000..0af8a047b3f
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/texture-cube-array.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texturecube<float> cubeSampler [[texture(0)]], texturecube_array<float> cubeArraySampler [[texture(1)]], texture2d_array<float> texArraySampler [[texture(2)]], sampler cubeSamplerSmplr [[sampler(0)]], sampler cubeArraySamplerSmplr [[sampler(1)]], sampler texArraySamplerSmplr [[sampler(2)]])
+{
+    main0_out out = {};
+    out.FragColor = (cubeSampler.sample(cubeSamplerSmplr, in.vUV.xyz) + cubeArraySampler.sample(cubeArraySamplerSmplr, in.vUV.xyz, uint(round(in.vUV.w)))) + texArraySampler.sample(texArraySamplerSmplr, in.vUV.xyz.xy, uint(round(in.vUV.xyz.z)));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag b/reference/opt/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag
new file mode 100644
index 00000000000..c057e217e21
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag
@@ -0,0 +1,58 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+static inline __attribute__((always_inline))
+float3 spvCubemapTo2DArrayFace(float3 P)
+{
+    float3 Coords = abs(P.xyz);
+    float CubeFace = 0;
+    float ProjectionAxis = 0;
+    float u = 0;
+    float v = 0;
+    if (Coords.x >= Coords.y && Coords.x >= Coords.z)
+    {
+        CubeFace = P.x >= 0 ? 0 : 1;
+        ProjectionAxis = Coords.x;
+        u = P.x >= 0 ? -P.z : P.z;
+        v = -P.y;
+    }
+    else if (Coords.y >= Coords.x && Coords.y >= Coords.z)
+    {
+        CubeFace = P.y >= 0 ? 2 : 3;
+        ProjectionAxis = Coords.y;
+        u = P.x;
+        v = P.y >= 0 ? P.z : -P.z;
+    }
+    else
+    {
+        CubeFace = P.z >= 0 ? 4 : 5;
+        ProjectionAxis = Coords.z;
+        u = P.z >= 0 ? P.x : -P.x;
+        v = -P.y;
+    }
+    u = 0.5 * (u/ProjectionAxis + 1);
+    v = 0.5 * (v/ProjectionAxis + 1);
+    return float3(u, v, CubeFace);
+}
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texturecube<float> cubeSampler [[texture(0)]], texture2d_array<float> cubeArraySampler [[texture(1)]], texture2d_array<float> texArraySampler [[texture(2)]], sampler cubeSamplerSmplr [[sampler(0)]], sampler cubeArraySamplerSmplr [[sampler(1)]], sampler texArraySamplerSmplr [[sampler(2)]])
+{
+    main0_out out = {};
+    out.FragColor = (cubeSampler.sample(cubeSamplerSmplr, in.vUV.xyz) + cubeArraySampler.sample(cubeArraySamplerSmplr, spvCubemapTo2DArrayFace(in.vUV.xyz).xy, uint(spvCubemapTo2DArrayFace(in.vUV.xyz).z) + (uint(round(in.vUV.w)) * 6u))) + texArraySampler.sample(texArraySamplerSmplr, in.vUV.xyz.xy, uint(round(in.vUV.xyz.z)));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/texture-proj-shadow.frag b/reference/opt/shaders-msl/frag/texture-proj-shadow.frag
index 52d4a026d2d..6d465ce9f9f 100644
--- a/reference/opt/shaders-msl/frag/texture-proj-shadow.frag
+++ b/reference/opt/shaders-msl/frag/texture-proj-shadow.frag
@@ -18,12 +18,13 @@ struct main0_in
 fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uShadow2D [[texture(0)]], texture1d<float> uSampler1D [[texture(1)]], texture2d<float> uSampler2D [[texture(2)]], texture3d<float> uSampler3D [[texture(3)]], sampler uShadow2DSmplr [[sampler(0)]], sampler uSampler1DSmplr [[sampler(1)]], sampler uSampler2DSmplr [[sampler(2)]], sampler uSampler3DSmplr [[sampler(3)]])
 {
     main0_out out = {};
-    float4 _20 = in.vClip4;
-    _20.z = in.vClip4.w;
-    out.FragColor = uShadow2D.sample_compare(uShadow2DSmplr, _20.xy / _20.z, in.vClip4.z / _20.z);
+    float4 _17 = in.vClip4;
+    float4 _20 = _17;
+    _20.z = _17.w;
+    out.FragColor = uShadow2D.sample_compare(uShadow2DSmplr, _20.xy / _20.z, _17.z / _20.z);
     out.FragColor = uSampler1D.sample(uSampler1DSmplr, in.vClip2.x / in.vClip2.y).x;
     out.FragColor = uSampler2D.sample(uSampler2DSmplr, in.vClip3.xy / in.vClip3.z).x;
-    out.FragColor = uSampler3D.sample(uSampler3DSmplr, in.vClip4.xyz / in.vClip4.w).x;
+    out.FragColor = uSampler3D.sample(uSampler3DSmplr, _17.xyz / _17.w).x;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/ubo_layout.frag b/reference/opt/shaders-msl/frag/ubo_layout.frag
index 0bc27462b2e..4ca603d6431 100644
--- a/reference/opt/shaders-msl/frag/ubo_layout.frag
+++ b/reference/opt/shaders-msl/frag/ubo_layout.frag
@@ -31,7 +31,7 @@ struct main0_out
 fragment main0_out main0(constant UBO1& ubo1 [[buffer(0)]], constant UBO2& ubo0 [[buffer(1)]])
 {
     main0_out out = {};
-    out.FragColor = transpose(ubo1.foo.foo)[0] + ubo0.foo.foo[0];
+    out.FragColor = float4(ubo1.foo.foo[0][0], ubo1.foo.foo[1][0], ubo1.foo.foo[2][0], ubo1.foo.foo[3][0]) + ubo0.foo.foo[0];
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag b/reference/opt/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag
new file mode 100644
index 00000000000..1cb7aa70328
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag
@@ -0,0 +1,75 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    ushort2 a [[user(locn0)]];
+    uint3 b [[user(locn1)]];
+    ushort c_0 [[user(locn2)]];
+    ushort c_1 [[user(locn3)]];
+    uint4 e_0 [[user(locn4)]];
+    uint4 e_1 [[user(locn5)]];
+    float4 d [[user(locn6)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<ushort, 2> c = {};
+    spvUnsafeArray<uint4, 2> e = {};
+    c[0] = in.c_0;
+    c[1] = in.c_1;
+    e[0] = in.e_0;
+    e[1] = in.e_1;
+    out.FragColor = float4(float(int(short(in.a.x))), float(int(in.b.x)), float2(float(uint(c[1])), float(e[0].w)) + in.d.xy);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/intel/shader-integer-functions2.asm.comp b/reference/opt/shaders-msl/intel/shader-integer-functions2.asm.comp
new file mode 100644
index 00000000000..1e5d889d462
--- /dev/null
+++ b/reference/opt/shaders-msl/intel/shader-integer-functions2.asm.comp
@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct foo
+{
+    uint a;
+    uint b;
+    int c;
+    int d;
+};
+
+kernel void main0(device foo& _4 [[buffer(0)]])
+{
+    _4.a = clz(_4.a);
+    _4.a = ctz(_4.a);
+    _4.a = absdiff(_4.c, _4.d);
+    _4.a = absdiff(_4.a, _4.b);
+    _4.c = addsat(_4.c, _4.d);
+    _4.a = addsat(_4.a, _4.b);
+    _4.c = hadd(_4.c, _4.d);
+    _4.a = hadd(_4.a, _4.b);
+    _4.c = rhadd(_4.c, _4.d);
+    _4.a = rhadd(_4.a, _4.b);
+    _4.c = subsat(_4.c, _4.d);
+    _4.a = subsat(_4.a, _4.b);
+    _4.c = int(short(_4.c)) * int(short(_4.d));
+    _4.a = uint(ushort(_4.a)) * uint(ushort(_4.b));
+}
+
diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc
new file mode 100644
index 00000000000..24928da01df
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc
@@ -0,0 +1,188 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    spvUnsafeArray<float4, 2> iFoo;
+    float4 ipFoo;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup float4 spvStorageFoo[8][4][2];
+    threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements);
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc
new file mode 100644
index 00000000000..a08364e2b34
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc
@@ -0,0 +1,191 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    float4 iFoo_0 [[attribute(0)]];
+    float4 iFoo_1 [[attribute(1)]];
+    float4 ipFoo [[attribute(2)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    threadgroup float4 Foo[4][2];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
+    spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements);
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc
new file mode 100644
index 00000000000..abc95ca899e
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc
@@ -0,0 +1,79 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    spvUnsafeArray<float4, 2> Foo;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    spvUnsafeArray<float4, 2> iFoo;
+    float4 ipFoo;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    gl_out[gl_InvocationID].Foo = gl_in[gl_InvocationID].iFoo;
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc
new file mode 100644
index 00000000000..3da1d18c61d
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc
@@ -0,0 +1,83 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    spvUnsafeArray<float4, 2> Foo;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    float4 iFoo_0 [[attribute(0)]];
+    float4 iFoo_1 [[attribute(1)]];
+    float4 ipFoo [[attribute(2)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
+    gl_out[gl_InvocationID].Foo = _38;
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert
new file mode 100644
index 00000000000..2bf5c257d6b
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct V
+{
+    float4 a;
+    float4 b;
+    float4 c;
+    float4 d;
+};
+
+struct main0_out
+{
+    float4 m_22_b;
+    float4 m_22_c;
+    float4 m_22_d;
+    float4 gl_Position;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    V _22 = {};
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    out.gl_Position = float4(1.0);
+    _22.a = float4(2.0);
+    _22.b = float4(3.0);
+    out.m_22_b = _22.b;
+    out.m_22_c = _22.c;
+    out.m_22_d = _22.d;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
new file mode 100644
index 00000000000..18596302eb3
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
@@ -0,0 +1,41 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float c_a;
+    float c_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float m_11_b;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup P _11;
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    _11.a = 1.0;
+    patchOut.m_11_b = 2.0;
+    gl_out[gl_InvocationID].c_a = 3.0;
+    gl_out[gl_InvocationID].c_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc
new file mode 100644
index 00000000000..f2f17bad37e
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc
@@ -0,0 +1,44 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float c_a;
+    float c_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float m_11_b;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup P spvStorage_11[8];
+    threadgroup P (&_11) = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    _11.a = 1.0;
+    patchOut.m_11_b = 2.0;
+    gl_out[gl_InvocationID].c_a = 3.0;
+    gl_out[gl_InvocationID].c_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.vert b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.vert
new file mode 100644
index 00000000000..ad6079061ec
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-0.vert
@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct V
+{
+    float4 a;
+    float4 b;
+    float4 c;
+    float4 d;
+};
+
+struct main0_out
+{
+    float4 m_22_b [[user(locn1)]];
+    float4 m_22_c [[user(locn2)]];
+    float4 m_22_d [[user(locn3)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    V _22 = {};
+    out.gl_Position = float4(1.0);
+    _22.a = float4(2.0);
+    _22.b = float4(3.0);
+    out.m_22_b = _22.b;
+    out.m_22_c = _22.c;
+    out.m_22_d = _22.d;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert
new file mode 100644
index 00000000000..2b535c312ef
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct V
+{
+    float4 a;
+    float4 b;
+    float4 c;
+    float4 d;
+};
+
+struct main0_out
+{
+    float4 m_22_a;
+    float4 m_22_c;
+    float4 m_22_d;
+    float4 gl_Position;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    V _22 = {};
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    out.gl_Position = float4(1.0);
+    _22.a = float4(2.0);
+    _22.b = float4(3.0);
+    out.m_22_a = _22.a;
+    out.m_22_c = _22.c;
+    out.m_22_d = _22.d;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
new file mode 100644
index 00000000000..7db78a9ba5a
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
@@ -0,0 +1,41 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float c_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float m_11_a;
+    float m_11_b;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup C c[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    patchOut.m_11_a = 1.0;
+    patchOut.m_11_b = 2.0;
+    c[gl_InvocationID].a = 3.0;
+    gl_out[gl_InvocationID].c_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc
new file mode 100644
index 00000000000..ad793918e2f
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc
@@ -0,0 +1,44 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float c_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float m_11_a;
+    float m_11_b;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup C spvStoragec[8][4];
+    threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    patchOut.m_11_a = 1.0;
+    patchOut.m_11_b = 2.0;
+    c[gl_InvocationID].a = 3.0;
+    gl_out[gl_InvocationID].c_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.vert b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.vert
new file mode 100644
index 00000000000..3b830290f7c
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs-block.mask-location-1.vert
@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct V
+{
+    float4 a;
+    float4 b;
+    float4 c;
+    float4 d;
+};
+
+struct main0_out
+{
+    float4 m_22_a [[user(locn0)]];
+    float4 m_22_c [[user(locn2)]];
+    float4 m_22_d [[user(locn3)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    V _22 = {};
+    out.gl_Position = float4(1.0);
+    _22.a = float4(2.0);
+    _22.b = float4(3.0);
+    out.m_22_a = _22.a;
+    out.m_22_c = _22.c;
+    out.m_22_d = _22.d;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-clip-distance.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-clip-distance.vert
new file mode 100644
index 00000000000..1f56f34a76d
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-clip-distance.vert
@@ -0,0 +1,67 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 v0 [[user(locn0)]];
+    float4 v1 [[user(locn1)]];
+    float4 gl_Position [[position]];
+    float gl_PointSize [[point_size]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 2> gl_ClipDistance = {};
+    out.v0 = float4(1.0);
+    out.v1 = float4(2.0);
+    out.gl_Position = float4(3.0);
+    out.gl_PointSize = 4.0;
+    gl_ClipDistance[0] = 1.0;
+    gl_ClipDistance[1] = 0.5;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert
new file mode 100644
index 00000000000..1c0aab5037b
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert
@@ -0,0 +1,68 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 v1;
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 2> gl_ClipDistance;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    float4 v0 = {};
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    v0 = float4(1.0);
+    out.v1 = float4(2.0);
+    out.gl_Position = float4(3.0);
+    out.gl_PointSize = 4.0;
+    out.gl_ClipDistance[0] = 1.0;
+    out.gl_ClipDistance[1] = 0.5;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc
new file mode 100644
index 00000000000..7c8e3878248
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc
@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    float4 v1;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup float4 v0[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    v0[gl_InvocationID] = float4(1.0);
+    v0[gl_InvocationID].x = 2.0;
+    if (gl_InvocationID == 0)
+    {
+        patchOut.v1 = float4(2.0);
+        ((device float*)&patchOut.v1)[3u] = 4.0;
+    }
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+    gl_out[gl_InvocationID].gl_Position.z = 5.0;
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc
new file mode 100644
index 00000000000..7a5e183a26d
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc
@@ -0,0 +1,81 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup float4 spvStoragev0[8][4];
+    threadgroup float4 (&v0)[4] = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    v0[gl_InvocationID] = float4(1.0);
+    v0[gl_InvocationID].z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        patchOut.v1[0] = float4(2.0);
+        ((device float*)&patchOut.v1[0])[0u] = 3.0;
+        patchOut.v1[1] = float4(2.0);
+        ((device float*)&patchOut.v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.tesc
new file mode 100644
index 00000000000..ef3ff9c2210
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.tesc
@@ -0,0 +1,78 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup float4 v0[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    v0[gl_InvocationID] = float4(1.0);
+    v0[gl_InvocationID].z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        patchOut.v1[0] = float4(2.0);
+        ((device float*)&patchOut.v1[0])[0u] = 3.0;
+        patchOut.v1[1] = float4(2.0);
+        ((device float*)&patchOut.v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.vert
new file mode 100644
index 00000000000..88c6bb6facc
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-0.vert
@@ -0,0 +1,30 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v1 [[user(locn1)]];
+    float4 gl_Position [[position]];
+    float gl_PointSize [[point_size]];
+    float gl_ClipDistance [[clip_distance]] [2];
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    float4 v0 = {};
+    v0 = float4(1.0);
+    out.v1 = float4(2.0);
+    out.gl_Position = float4(3.0);
+    out.gl_PointSize = 4.0;
+    out.gl_ClipDistance[0] = 1.0;
+    out.gl_ClipDistance[1] = 0.5;
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert
new file mode 100644
index 00000000000..d558b7aed7c
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert
@@ -0,0 +1,68 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 2> gl_ClipDistance;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    float4 v1 = {};
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    out.v0 = float4(1.0);
+    v1 = float4(2.0);
+    out.gl_Position = float4(3.0);
+    out.gl_PointSize = 4.0;
+    out.gl_ClipDistance[0] = 1.0;
+    out.gl_ClipDistance[1] = 0.5;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc
new file mode 100644
index 00000000000..bb87ced750d
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc
@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+};
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup float4 v1;
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.x = 2.0;
+    if (gl_InvocationID == 0)
+    {
+        v1 = float4(2.0);
+        ((threadgroup float*)&v1)[3u] = 4.0;
+    }
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+    gl_out[gl_InvocationID].gl_Position.z = 5.0;
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc
new file mode 100644
index 00000000000..28ec0be0c65
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc
@@ -0,0 +1,40 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    float4 v3;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup float4 spvStoragev1[8][2];
+    threadgroup float4 (&v1)[2] = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((threadgroup float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((threadgroup float*)&v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.tesc
new file mode 100644
index 00000000000..1673d523298
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.tesc
@@ -0,0 +1,37 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    float4 v3;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup float4 v1[2];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((threadgroup float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((threadgroup float*)&v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.vert
new file mode 100644
index 00000000000..cc7d41794d2
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-location-1.vert
@@ -0,0 +1,30 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v0 [[user(locn0)]];
+    float4 gl_Position [[position]];
+    float gl_PointSize [[point_size]];
+    float gl_ClipDistance [[clip_distance]] [2];
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    float4 v1 = {};
+    out.v0 = float4(1.0);
+    v1 = float4(2.0);
+    out.gl_Position = float4(3.0);
+    out.gl_PointSize = 4.0;
+    out.gl_ClipDistance[0] = 1.0;
+    out.gl_ClipDistance[1] = 0.5;
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert
new file mode 100644
index 00000000000..463ecc87b56
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert
@@ -0,0 +1,68 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 v0;
+    float4 v1;
+    float4 gl_Position;
+    spvUnsafeArray<float, 2> gl_ClipDistance;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    float gl_PointSize = {};
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    out.v0 = float4(1.0);
+    out.v1 = float4(2.0);
+    out.gl_Position = float4(3.0);
+    gl_PointSize = 4.0;
+    out.gl_ClipDistance[0] = 1.0;
+    out.gl_ClipDistance[1] = 0.5;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc
new file mode 100644
index 00000000000..694cdbb7ff4
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc
@@ -0,0 +1,89 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        patchOut.v1[0] = float4(2.0);
+        ((device float*)&patchOut.v1[0])[0u] = 3.0;
+        patchOut.v1[1] = float4(2.0);
+        ((device float*)&patchOut.v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out_masked[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.tesc
new file mode 100644
index 00000000000..da976c9a8a2
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.tesc
@@ -0,0 +1,86 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup gl_PerVertex gl_out_masked[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        patchOut.v1[0] = float4(2.0);
+        ((device float*)&patchOut.v1[0])[0u] = 3.0;
+        patchOut.v1[1] = float4(2.0);
+        ((device float*)&patchOut.v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out_masked[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.vert b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.vert
new file mode 100644
index 00000000000..ffdfdaaff4b
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-point-size.vert
@@ -0,0 +1,30 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v0 [[user(locn0)]];
+    float4 v1 [[user(locn1)]];
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [2];
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    float gl_PointSize = {};
+    out.v0 = float4(1.0);
+    out.v1 = float4(2.0);
+    out.gl_Position = float4(3.0);
+    gl_PointSize = 4.0;
+    out.gl_ClipDistance[0] = 1.0;
+    out.gl_ClipDistance[1] = 0.5;
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc
new file mode 100644
index 00000000000..c55e3376828
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc
@@ -0,0 +1,89 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_out
+{
+    float4 v0;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        patchOut.v1[0] = float4(2.0);
+        ((device float*)&patchOut.v1[0])[0u] = 3.0;
+        patchOut.v1[1] = float4(2.0);
+        ((device float*)&patchOut.v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out_masked[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out_masked[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/masking/write-outputs.mask-position.tesc b/reference/opt/shaders-msl/masking/write-outputs.mask-position.tesc
new file mode 100644
index 00000000000..04584de49fa
--- /dev/null
+++ b/reference/opt/shaders-msl/masking/write-outputs.mask-position.tesc
@@ -0,0 +1,86 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_out
+{
+    float4 v0;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup gl_PerVertex gl_out_masked[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        patchOut.v1[0] = float4(2.0);
+        ((device float*)&patchOut.v1[0])[0u] = 3.0;
+        patchOut.v1[1] = float4(2.0);
+        ((device float*)&patchOut.v1[1])[0u] = 5.0;
+    }
+    patchOut.v3 = float4(5.0);
+    gl_out_masked[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out_masked[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
diff --git a/reference/opt/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc b/reference/opt/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc
new file mode 100644
index 00000000000..1618eaa627f
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc
@@ -0,0 +1,123 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct S
+{
+    int x;
+    float4 y;
+    spvUnsafeArray<float, 2> z;
+};
+
+struct TheBlock
+{
+    spvUnsafeArray<float, 3> blockFa;
+    spvUnsafeArray<S, 2> blockSa;
+    float blockF;
+};
+
+struct main0_patchOut
+{
+    float2 in_te_positionScale;
+    float2 in_te_positionOffset;
+    spvUnsafeArray<TheBlock, 2> tcBlock;
+};
+
+struct main0_in
+{
+    float3 in_tc_attr;
+    ushort2 m_196;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 5];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 5, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 5, spvIndirectParams[1] - 1);
+    int _163;
+    _163 = 0;
+    float _111;
+    for (float _170 = 1.2999999523162841796875; _163 < 2; _170 = _111, _163++)
+    {
+        float _169;
+        _169 = _170;
+        for (int _164 = 0; _164 < 3; )
+        {
+            patchOut.tcBlock[_163].blockFa[_164] = _169;
+            _169 += 0.4000000059604644775390625;
+            _164++;
+            continue;
+        }
+        int _165;
+        float _168;
+        _168 = _169;
+        _165 = 0;
+        float _174;
+        for (; _165 < 2; _168 = _174, _165++)
+        {
+            patchOut.tcBlock[_163].blockSa[_165].x = int(_168);
+            patchOut.tcBlock[_163].blockSa[_165].y = float4(_168 + 0.4000000059604644775390625, _168 + 1.2000000476837158203125, _168 + 2.0, _168 + 2.80000019073486328125);
+            _174 = _168 + 0.800000011920928955078125;
+            for (int _171 = 0; _171 < 2; )
+            {
+                patchOut.tcBlock[_163].blockSa[_165].z[_171] = _174;
+                _174 += 0.4000000059604644775390625;
+                _171++;
+                continue;
+            }
+        }
+        patchOut.tcBlock[_163].blockF = _168;
+        _111 = _168 + 0.4000000059604644775390625;
+    }
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(gl_in[0].in_tc_attr.x);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(gl_in[1].in_tc_attr.x);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(gl_in[2].in_tc_attr.x);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(gl_in[3].in_tc_attr.x);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(gl_in[4].in_tc_attr.x);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(gl_in[5].in_tc_attr.x);
+    patchOut.in_te_positionScale = float2(gl_in[6].in_tc_attr.x, gl_in[7].in_tc_attr.x);
+    patchOut.in_te_positionOffset = float2(gl_in[8].in_tc_attr.x, gl_in[9].in_tc_attr.x);
+}
+
diff --git a/reference/opt/shaders-msl/tesc/basic.multi-patch.tesc b/reference/opt/shaders-msl/tesc/basic.multi-patch.tesc
new file mode 100644
index 00000000000..fe268316737
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/basic.multi-patch.tesc
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_patchOut
+{
+    float3 vFoo;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 1];
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625);
+    patchOut.vFoo = float3(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc b/reference/opt/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc
new file mode 100644
index 00000000000..1bed1c90585
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc
@@ -0,0 +1,128 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Meep
+{
+    float a;
+    float b;
+};
+
+struct Block
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+};
+
+struct Block_1
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+};
+
+struct main0_out
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+    spvUnsafeArray<float, 2> B_a;
+    float B_b;
+    float2x2 B_m;
+    Meep B_meep;
+    spvUnsafeArray<Meep, 2> B_meeps;
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    spvUnsafeArray<float, 2> in_a;
+    float in_b;
+    float2x2 in_m;
+    Meep in_meep;
+    spvUnsafeArray<Meep, 2> in_meeps;
+    spvUnsafeArray<float, 2> in_B_a;
+    float in_B_b;
+    float2x2 in_B_m;
+    Meep in_B_meep;
+    spvUnsafeArray<Meep, 2> in_B_meeps;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    gl_out[gl_InvocationID].a[0] = gl_in[gl_InvocationID].in_a[0];
+    gl_out[gl_InvocationID].a[1] = gl_in[gl_InvocationID].in_a[1];
+    gl_out[gl_InvocationID].b = gl_in[gl_InvocationID].in_b;
+    gl_out[gl_InvocationID].m = gl_in[gl_InvocationID].in_m;
+    gl_out[gl_InvocationID].meep.a = gl_in[gl_InvocationID].in_meep.a;
+    gl_out[gl_InvocationID].meep.b = gl_in[gl_InvocationID].in_meep.b;
+    gl_out[gl_InvocationID].meeps[0].a = gl_in[gl_InvocationID].in_meeps[0].a;
+    gl_out[gl_InvocationID].meeps[0].b = gl_in[gl_InvocationID].in_meeps[0].b;
+    gl_out[gl_InvocationID].meeps[1].a = gl_in[gl_InvocationID].in_meeps[1].a;
+    gl_out[gl_InvocationID].meeps[1].b = gl_in[gl_InvocationID].in_meeps[1].b;
+    gl_out[gl_InvocationID].B_a[0] = gl_in[gl_InvocationID].in_B_a[0];
+    gl_out[gl_InvocationID].B_a[1] = gl_in[gl_InvocationID].in_B_a[1];
+    gl_out[gl_InvocationID].B_b = gl_in[gl_InvocationID].in_B_b;
+    gl_out[gl_InvocationID].B_m = gl_in[gl_InvocationID].in_B_m;
+    gl_out[gl_InvocationID].B_meep.a = gl_in[gl_InvocationID].in_B_meep.a;
+    gl_out[gl_InvocationID].B_meep.b = gl_in[gl_InvocationID].in_B_meep.b;
+    gl_out[gl_InvocationID].B_meeps[0].a = gl_in[gl_InvocationID].in_B_meeps[0].a;
+    gl_out[gl_InvocationID].B_meeps[0].b = gl_in[gl_InvocationID].in_B_meeps[0].b;
+    gl_out[gl_InvocationID].B_meeps[1].a = gl_in[gl_InvocationID].in_B_meeps[1].a;
+    gl_out[gl_InvocationID].B_meeps[1].b = gl_in[gl_InvocationID].in_B_meeps[1].b;
+}
+
diff --git a/reference/opt/shaders-msl/tesc/complex-control-point-inout-types.tesc b/reference/opt/shaders-msl/tesc/complex-control-point-inout-types.tesc
new file mode 100644
index 00000000000..e785fdfe22d
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/complex-control-point-inout-types.tesc
@@ -0,0 +1,132 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Meep
+{
+    float a;
+    float b;
+};
+
+struct Block
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+};
+
+struct Block_1
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+};
+
+struct main0_out
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+    spvUnsafeArray<float, 2> B_a;
+    float B_b;
+    float2x2 B_m;
+    Meep B_meep;
+    spvUnsafeArray<Meep, 2> B_meeps;
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    float in_a_0 [[attribute(0)]];
+    float in_a_1 [[attribute(1)]];
+    float in_b [[attribute(2)]];
+    float2 in_m_0 [[attribute(3)]];
+    float2 in_m_1 [[attribute(4)]];
+    float in_meep_a [[attribute(5)]];
+    float in_meep_b [[attribute(6)]];
+    float in_B_a_0 [[attribute(11)]];
+    float in_B_a_1 [[attribute(12)]];
+    float in_B_b [[attribute(13)]];
+    float2 in_B_m_0 [[attribute(14)]];
+    float2 in_B_m_1 [[attribute(15)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    gl_out[gl_InvocationID].a[0] = gl_in[gl_InvocationID].in_a_0;
+    gl_out[gl_InvocationID].a[1] = gl_in[gl_InvocationID].in_a_1;
+    gl_out[gl_InvocationID].b = gl_in[gl_InvocationID].in_b;
+    float2x2 _178 = float2x2(gl_in[gl_InvocationID].in_m_0, gl_in[gl_InvocationID].in_m_1);
+    gl_out[gl_InvocationID].m = _178;
+    gl_out[gl_InvocationID].meep.a = gl_in[gl_InvocationID].in_meep_a;
+    gl_out[gl_InvocationID].meep.b = gl_in[gl_InvocationID].in_meep_b;
+    gl_out[gl_InvocationID].meeps[0].a = 1.0;
+    gl_out[gl_InvocationID].meeps[0].b = 2.0;
+    gl_out[gl_InvocationID].meeps[1].a = 3.0;
+    gl_out[gl_InvocationID].meeps[1].b = 4.0;
+    gl_out[gl_InvocationID].B_a[0] = gl_in[gl_InvocationID].in_B_a_0;
+    gl_out[gl_InvocationID].B_a[1] = gl_in[gl_InvocationID].in_B_a_1;
+    gl_out[gl_InvocationID].B_b = gl_in[gl_InvocationID].in_B_b;
+    float2x2 _216 = float2x2(gl_in[gl_InvocationID].in_B_m_0, gl_in[gl_InvocationID].in_B_m_1);
+    gl_out[gl_InvocationID].B_m = _216;
+    gl_out[gl_InvocationID].B_meep.a = 10.0;
+    gl_out[gl_InvocationID].B_meep.b = 20.0;
+    gl_out[gl_InvocationID].B_meeps[0].a = 5.0;
+    gl_out[gl_InvocationID].B_meeps[0].b = 6.0;
+    gl_out[gl_InvocationID].B_meeps[1].a = 7.0;
+    gl_out[gl_InvocationID].B_meeps[1].b = 8.0;
+}
+
diff --git a/reference/opt/shaders-msl/tesc/complex-patch-out-types.tesc b/reference/opt/shaders-msl/tesc/complex-patch-out-types.tesc
new file mode 100644
index 00000000000..c3f7081552c
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/complex-patch-out-types.tesc
@@ -0,0 +1,107 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Meep
+{
+    float a;
+    float b;
+};
+
+struct Block
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+    spvUnsafeArray<float, 2> B_a;
+    float B_b;
+    float2x2 B_m;
+    Meep B_meep;
+    spvUnsafeArray<Meep, 2> B_meeps;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    patchOut.a[0] = 1.0;
+    patchOut.a[1] = 2.0;
+    patchOut.b = 3.0;
+    patchOut.m = float2x2(float2(2.0, 0.0), float2(0.0, 2.0));
+    patchOut.meep.a = 4.0;
+    patchOut.meep.b = 5.0;
+    patchOut.meeps[0].a = 6.0;
+    patchOut.meeps[0].b = 7.0;
+    patchOut.meeps[1].a = 8.0;
+    patchOut.meeps[1].b = 9.0;
+    patchOut.B_a[0] = 1.0;
+    patchOut.B_a[1] = 2.0;
+    patchOut.B_b = 3.0;
+    patchOut.B_m = float2x2(float2(4.0, 0.0), float2(0.0, 4.0));
+    patchOut.B_meep.a = 4.0;
+    patchOut.B_meep.b = 5.0;
+    patchOut.B_meeps[0].a = 6.0;
+    patchOut.B_meeps[0].b = 7.0;
+    patchOut.B_meeps[1].a = 8.0;
+    patchOut.B_meeps[1].b = 9.0;
+}
+
diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc
new file mode 100644
index 00000000000..5ea01cad9fa
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4x4 vOutputs;
+};
+
+struct main0_in
+{
+    float4x4 vInputs;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].vOutputs = gl_in[gl_InvocationID].vInputs;
+}
+
diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array-of-matrix.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array-of-matrix.tesc
new file mode 100644
index 00000000000..1ed36213b62
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/load-control-point-array-of-matrix.tesc
@@ -0,0 +1,30 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4x4 vOutputs;
+};
+
+struct main0_in
+{
+    float4 vInputs_0 [[attribute(0)]];
+    float4 vInputs_1 [[attribute(1)]];
+    float4 vInputs_2 [[attribute(2)]];
+    float4 vInputs_3 [[attribute(3)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    float4x4 _28 = float4x4(gl_in[gl_InvocationID].vInputs_0, gl_in[gl_InvocationID].vInputs_1, gl_in[gl_InvocationID].vInputs_2, gl_in[gl_InvocationID].vInputs_3);
+    gl_out[gl_InvocationID].vOutputs = _28;
+}
+
diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc
new file mode 100644
index 00000000000..de6ba178075
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc
@@ -0,0 +1,73 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct VertexData
+{
+    float4x4 a;
+    spvUnsafeArray<float4, 2> b;
+    float4 c;
+};
+
+struct main0_out
+{
+    float4 vOutputs;
+};
+
+struct main0_in
+{
+    VertexData vInputs;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    int _27 = gl_InvocationID ^ 1;
+    gl_out[gl_InvocationID].vOutputs = ((gl_in[gl_InvocationID].vInputs.a[1] + gl_in[gl_InvocationID].vInputs.b[1]) + gl_in[gl_InvocationID].vInputs.c) + gl_in[_27].vInputs.c;
+}
+
diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.tesc
new file mode 100644
index 00000000000..9eaaa2e6d50
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/load-control-point-array-of-struct.tesc
@@ -0,0 +1,81 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct VertexData
+{
+    float4x4 a;
+    spvUnsafeArray<float4, 2> b;
+    float4 c;
+};
+
+struct main0_out
+{
+    float4 vOutputs;
+};
+
+struct main0_in
+{
+    float4 vInputs_a_0 [[attribute(0)]];
+    float4 vInputs_a_1 [[attribute(1)]];
+    float4 vInputs_a_2 [[attribute(2)]];
+    float4 vInputs_a_3 [[attribute(3)]];
+    float4 vInputs_b_0 [[attribute(4)]];
+    float4 vInputs_b_1 [[attribute(5)]];
+    float4 vInputs_c [[attribute(6)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    int _27 = gl_InvocationID ^ 1;
+    gl_out[gl_InvocationID].vOutputs = ((gl_in[gl_InvocationID].vInputs_a_1 + gl_in[gl_InvocationID].vInputs_b_1) + gl_in[gl_InvocationID].vInputs_c) + gl_in[_27].vInputs_c;
+}
+
diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array.multi-patch.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array.multi-patch.tesc
new file mode 100644
index 00000000000..ddf142ccba7
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/load-control-point-array.multi-patch.tesc
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 vOutputs;
+};
+
+struct main0_in
+{
+    float4 vInputs;
+    ushort2 m_44;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].vOutputs = gl_in[gl_InvocationID].vInputs;
+}
+
diff --git a/reference/opt/shaders-msl/tesc/load-control-point-array.tesc b/reference/opt/shaders-msl/tesc/load-control-point-array.tesc
new file mode 100644
index 00000000000..6f0da387e29
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/load-control-point-array.tesc
@@ -0,0 +1,26 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 vOutputs;
+};
+
+struct main0_in
+{
+    float4 vInputs [[attribute(0)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    gl_out[gl_InvocationID].vOutputs = gl_in[gl_InvocationID].vInputs;
+}
+
diff --git a/reference/opt/shaders-msl/tesc/matrix-output.multi-patch.tesc b/reference/opt/shaders-msl/tesc/matrix-output.multi-patch.tesc
new file mode 100644
index 00000000000..28fff015558
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/matrix-output.multi-patch.tesc
@@ -0,0 +1,41 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float in_te_attr;
+    float4x3 in_te_data0;
+    float4x3 in_te_data1;
+};
+
+struct main0_in
+{
+    float3 in_tc_attr;
+    ushort2 m_104;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 3];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 3;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1);
+    float _15 = float(gl_InvocationID);
+    float3 _18 = float3(_15, 0.0, 0.0);
+    float3 _19 = float3(0.0, _15, 0.0);
+    float3 _20 = float3(0.0, 0.0, _15);
+    gl_out[gl_InvocationID].in_te_data0 = float4x3(_18, _19, _20, float3(0.0));
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
+    int _42 = (gl_InvocationID + 1) % 3;
+    gl_out[gl_InvocationID].in_te_data1 = float4x3(_18 + gl_out[_42].in_te_data0[0], _19 + gl_out[_42].in_te_data0[1], _20 + gl_out[_42].in_te_data0[2], gl_out[_42].in_te_data0[3]);
+    gl_out[gl_InvocationID].in_te_attr = gl_in[gl_InvocationID].in_tc_attr.x;
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/tesc/reload-tess-level.multi-patch.tesc b/reference/opt/shaders-msl/tesc/reload-tess-level.multi-patch.tesc
new file mode 100644
index 00000000000..ae33de517a3
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/reload-tess-level.multi-patch.tesc
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    uint3 m_82;
+    ushort2 m_86;
+    float4 gl_Position;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    if (gl_InvocationID == 0)
+    {
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(2.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(3.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(4.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(5.0);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]), 0.5));
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), 0.5));
+    }
+    gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
+}
+
diff --git a/reference/opt/shaders-msl/tesc/reload-tess-level.tesc b/reference/opt/shaders-msl/tesc/reload-tess-level.tesc
new file mode 100644
index 00000000000..eafc50607d7
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/reload-tess-level.tesc
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    float4 gl_Position [[attribute(0)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    if (gl_InvocationID == 0)
+    {
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(2.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(3.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(4.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(5.0);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]), 0.5));
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), 0.5));
+    }
+    gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
+}
+
diff --git a/reference/opt/shaders-msl/tesc/struct-output.multi-patch.tesc b/reference/opt/shaders-msl/tesc/struct-output.multi-patch.tesc
new file mode 100644
index 00000000000..6c526546156
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/struct-output.multi-patch.tesc
@@ -0,0 +1,48 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct te_data
+{
+    float a;
+    float b;
+    uint c;
+};
+
+struct main0_out
+{
+    float in_te_attr;
+    te_data in_te_data0;
+    te_data in_te_data1;
+};
+
+struct main0_in
+{
+    float3 in_tc_attr;
+    ushort2 m_119;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 3];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 3;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1);
+    float _15 = float(gl_InvocationID);
+    int _18 = gl_InvocationID + 1;
+    float _19 = float(_18);
+    uint _21 = uint(gl_InvocationID);
+    gl_out[gl_InvocationID].in_te_data0 = te_data{ _15, _19, _21 };
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
+    int _38 = _18 % 3;
+    gl_out[gl_InvocationID].in_te_data1 = te_data{ _15 + gl_out[_38].in_te_data0.a, _19 + gl_out[_38].in_te_data0.b, _21 + gl_out[_38].in_te_data0.c };
+    gl_out[gl_InvocationID].in_te_attr = gl_in[gl_InvocationID].in_tc_attr.x;
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(1.0);
+}
+
diff --git a/reference/opt/shaders-msl/tesc/water_tess.multi-patch.tesc b/reference/opt/shaders-msl/tesc/water_tess.multi-patch.tesc
new file mode 100644
index 00000000000..356a963d689
--- /dev/null
+++ b/reference/opt/shaders-msl/tesc/water_tess.multi-patch.tesc
@@ -0,0 +1,91 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4 uScale;
+    float3 uCamPos;
+    float2 uPatchSize;
+    float2 uMaxTessLevel;
+    float uDistanceMod;
+    float4 uFrustum[6];
+};
+
+struct main0_patchOut
+{
+    float2 vOutPatchPosBase;
+    float4 vPatchLods;
+};
+
+struct main0_in
+{
+    float3 vPatchPosBase;
+    ushort2 m_996;
+};
+
+kernel void main0(constant UBO& _41 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 1];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1);
+    float2 _431 = (gl_in[0].vPatchPosBase.xy - float2(10.0)) * _41.uScale.xy;
+    float2 _441 = ((gl_in[0].vPatchPosBase.xy + _41.uPatchSize) + float2(10.0)) * _41.uScale.xy;
+    float3 _446 = float3(_431.x, -10.0, _431.y);
+    float3 _451 = float3(_441.x, 10.0, _441.y);
+    float4 _467 = float4((_446 + _451) * 0.5, 1.0);
+    float3 _514 = float3(length(_451 - _446) * (-0.5));
+    bool _516 = any(float3(dot(_41.uFrustum[0], _467), dot(_41.uFrustum[1], _467), dot(_41.uFrustum[2], _467)) <= _514);
+    bool _526;
+    if (!_516)
+    {
+        _526 = any(float3(dot(_41.uFrustum[3], _467), dot(_41.uFrustum[4], _467), dot(_41.uFrustum[5], _467)) <= _514);
+    }
+    else
+    {
+        _526 = _516;
+    }
+    if (!(!_526))
+    {
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(-1.0);
+    }
+    else
+    {
+        patchOut.vOutPatchPosBase = gl_in[0].vPatchPosBase.xy;
+        float2 _681 = fma(float2(-0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy;
+        float2 _710 = fma(float2(0.5, -0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy;
+        float _729 = fast::clamp(log2((length(_41.uCamPos - float3(_710.x, 0.0, _710.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _739 = fma(float2(1.5, -0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy;
+        float2 _768 = fma(float2(-0.5, 0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy;
+        float _787 = fast::clamp(log2((length(_41.uCamPos - float3(_768.x, 0.0, _768.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _797 = fma(float2(0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy;
+        float _816 = fast::clamp(log2((length(_41.uCamPos - float3(_797.x, 0.0, _797.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _826 = fma(float2(1.5, 0.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy;
+        float _845 = fast::clamp(log2((length(_41.uCamPos - float3(_826.x, 0.0, _826.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _855 = fma(float2(-0.5, 1.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy;
+        float2 _884 = fma(float2(0.5, 1.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy;
+        float _903 = fast::clamp(log2((length(_41.uCamPos - float3(_884.x, 0.0, _884.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _913 = fma(float2(1.5), _41.uPatchSize, gl_in[0].vPatchPosBase.xy) * _41.uScale.xy;
+        float _614 = dot(float4(_787, _816, fast::clamp(log2((length(_41.uCamPos - float3(_855.x, 0.0, _855.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _903), float4(0.25));
+        float _620 = dot(float4(fast::clamp(log2((length(_41.uCamPos - float3(_681.x, 0.0, _681.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _729, _787, _816), float4(0.25));
+        float _626 = dot(float4(_729, fast::clamp(log2((length(_41.uCamPos - float3(_739.x, 0.0, _739.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _816, _845), float4(0.25));
+        float _632 = dot(float4(_816, _845, _903, fast::clamp(log2((length(_41.uCamPos - float3(_913.x, 0.0, _913.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x)), float4(0.25));
+        float4 _633 = float4(_614, _620, _626, _632);
+        patchOut.vPatchLods = _633;
+        float4 _940 = exp2(-fast::min(_633, _633.yzwx)) * _41.uMaxTessLevel.y;
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_940.x);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_940.y);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_940.z);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(_940.w);
+        float _948 = _41.uMaxTessLevel.y * exp2(-fast::min(fast::min(fast::min(_614, _620), fast::min(_626, _632)), _816));
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(_948);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(_948);
+    }
+}
+
diff --git a/reference/opt/shaders-msl/tesc/water_tess.tesc b/reference/opt/shaders-msl/tesc/water_tess.tesc
index 9a75f05b43f..d9a6697cd95 100644
--- a/reference/opt/shaders-msl/tesc/water_tess.tesc
+++ b/reference/opt/shaders-msl/tesc/water_tess.tesc
@@ -32,23 +32,23 @@ kernel void main0(main0_in in [[stage_in]], constant UBO& _41 [[buffer(0)]], uin
     threadgroup_barrier(mem_flags::mem_threadgroup);
     if (gl_InvocationID >= 1)
         return;
-    float2 _430 = (gl_in[0].vPatchPosBase - float2(10.0)) * _41.uScale.xy;
-    float2 _440 = ((gl_in[0].vPatchPosBase + _41.uPatchSize) + float2(10.0)) * _41.uScale.xy;
-    float3 _445 = float3(_430.x, -10.0, _430.y);
-    float3 _450 = float3(_440.x, 10.0, _440.y);
-    float4 _466 = float4((_445 + _450) * 0.5, 1.0);
-    float3 _513 = float3(length(_450 - _445) * (-0.5));
-    bool _515 = any(float3(dot(_41.uFrustum[0], _466), dot(_41.uFrustum[1], _466), dot(_41.uFrustum[2], _466)) <= _513);
-    bool _525;
-    if (!_515)
+    float2 _431 = (gl_in[0].vPatchPosBase - float2(10.0)) * _41.uScale.xy;
+    float2 _441 = ((gl_in[0].vPatchPosBase + _41.uPatchSize) + float2(10.0)) * _41.uScale.xy;
+    float3 _446 = float3(_431.x, -10.0, _431.y);
+    float3 _451 = float3(_441.x, 10.0, _441.y);
+    float4 _467 = float4((_446 + _451) * 0.5, 1.0);
+    float3 _514 = float3(length(_451 - _446) * (-0.5));
+    bool _516 = any(float3(dot(_41.uFrustum[0], _467), dot(_41.uFrustum[1], _467), dot(_41.uFrustum[2], _467)) <= _514);
+    bool _526;
+    if (!_516)
     {
-        _525 = any(float3(dot(_41.uFrustum[3], _466), dot(_41.uFrustum[4], _466), dot(_41.uFrustum[5], _466)) <= _513);
+        _526 = any(float3(dot(_41.uFrustum[3], _467), dot(_41.uFrustum[4], _467), dot(_41.uFrustum[5], _467)) <= _514);
     }
     else
     {
-        _525 = _515;
+        _526 = _516;
     }
-    if (!(!_525))
+    if (!(!_526))
     {
         spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(-1.0);
         spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(-1.0);
@@ -60,34 +60,34 @@ kernel void main0(main0_in in [[stage_in]], constant UBO& _41 [[buffer(0)]], uin
     else
     {
         patchOut.vOutPatchPosBase = gl_in[0].vPatchPosBase;
-        float2 _678 = (gl_in[0].vPatchPosBase + (float2(-0.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float2 _706 = (gl_in[0].vPatchPosBase + (float2(0.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float _725 = fast::clamp(log2((length(_41.uCamPos - float3(_706.x, 0.0, _706.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
-        float2 _734 = (gl_in[0].vPatchPosBase + (float2(1.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float2 _762 = (gl_in[0].vPatchPosBase + (float2(-0.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float _781 = fast::clamp(log2((length(_41.uCamPos - float3(_762.x, 0.0, _762.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
-        float2 _790 = (gl_in[0].vPatchPosBase + (float2(0.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float _809 = fast::clamp(log2((length(_41.uCamPos - float3(_790.x, 0.0, _790.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
-        float2 _818 = (gl_in[0].vPatchPosBase + (float2(1.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float _837 = fast::clamp(log2((length(_41.uCamPos - float3(_818.x, 0.0, _818.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
-        float2 _846 = (gl_in[0].vPatchPosBase + (float2(-0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float2 _874 = (gl_in[0].vPatchPosBase + (float2(0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float _893 = fast::clamp(log2((length(_41.uCamPos - float3(_874.x, 0.0, _874.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
-        float2 _902 = (gl_in[0].vPatchPosBase + (float2(1.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float _612 = dot(float4(_781, _809, fast::clamp(log2((length(_41.uCamPos - float3(_846.x, 0.0, _846.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _893), float4(0.25));
-        float _618 = dot(float4(fast::clamp(log2((length(_41.uCamPos - float3(_678.x, 0.0, _678.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _725, _781, _809), float4(0.25));
-        float _624 = dot(float4(_725, fast::clamp(log2((length(_41.uCamPos - float3(_734.x, 0.0, _734.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _809, _837), float4(0.25));
-        float _630 = dot(float4(_809, _837, _893, fast::clamp(log2((length(_41.uCamPos - float3(_902.x, 0.0, _902.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x)), float4(0.25));
-        float4 _631 = float4(_612, _618, _624, _630);
-        patchOut.vPatchLods = _631;
-        float4 _928 = exp2(-fast::min(_631, _631.yzwx)) * _41.uMaxTessLevel.y;
-        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_928.x);
-        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_928.y);
-        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_928.z);
-        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(_928.w);
-        float _935 = _41.uMaxTessLevel.y * exp2(-fast::min(fast::min(fast::min(_612, _618), fast::min(_624, _630)), _809));
-        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(_935);
-        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(_935);
+        float2 _681 = fma(float2(-0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy;
+        float2 _710 = fma(float2(0.5, -0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy;
+        float _729 = fast::clamp(log2((length(_41.uCamPos - float3(_710.x, 0.0, _710.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _739 = fma(float2(1.5, -0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy;
+        float2 _768 = fma(float2(-0.5, 0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy;
+        float _787 = fast::clamp(log2((length(_41.uCamPos - float3(_768.x, 0.0, _768.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _797 = fma(float2(0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy;
+        float _816 = fast::clamp(log2((length(_41.uCamPos - float3(_797.x, 0.0, _797.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _826 = fma(float2(1.5, 0.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy;
+        float _845 = fast::clamp(log2((length(_41.uCamPos - float3(_826.x, 0.0, _826.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _855 = fma(float2(-0.5, 1.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy;
+        float2 _884 = fma(float2(0.5, 1.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy;
+        float _903 = fast::clamp(log2((length(_41.uCamPos - float3(_884.x, 0.0, _884.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        float2 _913 = fma(float2(1.5), _41.uPatchSize, gl_in[0].vPatchPosBase) * _41.uScale.xy;
+        float _614 = dot(float4(_787, _816, fast::clamp(log2((length(_41.uCamPos - float3(_855.x, 0.0, _855.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _903), float4(0.25));
+        float _620 = dot(float4(fast::clamp(log2((length(_41.uCamPos - float3(_681.x, 0.0, _681.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _729, _787, _816), float4(0.25));
+        float _626 = dot(float4(_729, fast::clamp(log2((length(_41.uCamPos - float3(_739.x, 0.0, _739.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _816, _845), float4(0.25));
+        float _632 = dot(float4(_816, _845, _903, fast::clamp(log2((length(_41.uCamPos - float3(_913.x, 0.0, _913.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x)), float4(0.25));
+        float4 _633 = float4(_614, _620, _626, _632);
+        patchOut.vPatchLods = _633;
+        float4 _940 = exp2(-fast::min(_633, _633.yzwx)) * _41.uMaxTessLevel.y;
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_940.x);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_940.y);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_940.z);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(_940.w);
+        float _948 = _41.uMaxTessLevel.y * exp2(-fast::min(fast::min(fast::min(_614, _620), fast::min(_626, _632)), _816));
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(_948);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(_948);
     }
 }
 
diff --git a/reference/opt/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese b/reference/opt/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese
new file mode 100644
index 00000000000..e1f1f3c9df9
--- /dev/null
+++ b/reference/opt/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese
@@ -0,0 +1,72 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _35
+{
+    float dummy;
+    float4 variableInStruct;
+};
+
+struct main0_out
+{
+    float outResult [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    spvUnsafeArray<_35, 3> testStructArray;
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(float3 gl_TessCoord [[position_in_patch]], uint gl_PrimitiveID [[patch_id]], const device main0_in* spvIn [[buffer(22)]])
+{
+    main0_out out = {};
+    const device main0_in* gl_in = &spvIn[gl_PrimitiveID * 0];
+    out.gl_Position = float4((gl_TessCoord.xy * 2.0) - float2(1.0), 0.0, 1.0);
+    out.outResult = ((float(abs(gl_in[0].testStructArray[2].variableInStruct.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(gl_in[0].testStructArray[2].variableInStruct.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].testStructArray[2].variableInStruct.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].testStructArray[2].variableInStruct.w - 7.0) < 0.001000000047497451305389404296875);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese b/reference/opt/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese
new file mode 100644
index 00000000000..d81d44b0700
--- /dev/null
+++ b/reference/opt/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct t35
+{
+    float2 m0;
+    float4 m1;
+};
+
+struct t36
+{
+    float2 m0;
+    t35 m1;
+};
+
+struct main0_out
+{
+    float v80 [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float2 v40_m0;
+    t35 v40_m1;
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(float3 gl_TessCoord [[position_in_patch]], uint gl_PrimitiveID [[patch_id]], const device main0_in* spvIn [[buffer(22)]])
+{
+    main0_out out = {};
+    const device main0_in* gl_in = &spvIn[gl_PrimitiveID * 0];
+    out.gl_Position = float4((gl_TessCoord.xy * 2.0) - float2(1.0), 0.0, 1.0);
+    out.v80 = ((float(abs(gl_in[0].v40_m1.m1.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(gl_in[0].v40_m1.m1.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].v40_m1.m1.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].v40_m1.m1.w - 7.0) < 0.001000000047497451305389404296875);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/tese/in-block-with-nested-struct.tese b/reference/opt/shaders-msl/tese/in-block-with-nested-struct.tese
new file mode 100644
index 00000000000..86553955813
--- /dev/null
+++ b/reference/opt/shaders-msl/tese/in-block-with-nested-struct.tese
@@ -0,0 +1,43 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct t35
+{
+    float2 m0;
+    float4 m1;
+};
+
+struct t36
+{
+    float2 m0;
+    t35 m1;
+};
+
+struct main0_out
+{
+    float v80 [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float2 v40_m0 [[attribute(0)]];
+    float2 v40_m1_m0 [[attribute(1)]];
+    float4 v40_m1_m1 [[attribute(2)]];
+};
+
+struct main0_patchIn
+{
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float3 gl_TessCoord [[position_in_patch]])
+{
+    main0_out out = {};
+    out.gl_Position = float4((gl_TessCoord.xy * 2.0) - float2(1.0), 0.0, 1.0);
+    out.v80 = ((float(abs(patchIn.gl_in[0].v40_m1_m1.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(patchIn.gl_in[0].v40_m1_m1.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(patchIn.gl_in[0].v40_m1_m1.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(patchIn.gl_in[0].v40_m1_m1.w - 7.0) < 0.001000000047497451305389404296875);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/tese/input-array.tese b/reference/opt/shaders-msl/tese/input-array.tese
index 97a83b4eedf..8f1002128d0 100644
--- a/reference/opt/shaders-msl/tese/input-array.tese
+++ b/reference/opt/shaders-msl/tese/input-array.tese
@@ -19,9 +19,10 @@ struct main0_patchIn
     patch_control_point<main0_in> gl_in;
 };
 
-[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]])
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]])
 {
     main0_out out = {};
+    float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0);
     out.gl_Position = (patchIn.gl_in[0].Floats * gl_TessCoord.x) + (patchIn.gl_in[1].Floats2 * gl_TessCoord.y);
     return out;
 }
diff --git a/reference/opt/shaders-msl/tese/input-types.raw-tess-in.tese b/reference/opt/shaders-msl/tese/input-types.raw-tess-in.tese
new file mode 100644
index 00000000000..e918f5b6530
--- /dev/null
+++ b/reference/opt/shaders-msl/tese/input-types.raw-tess-in.tese
@@ -0,0 +1,70 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Block
+{
+    float4 a;
+    float4 b;
+};
+
+struct PatchBlock
+{
+    float4 a;
+    float4 b;
+};
+
+struct Foo
+{
+    float4 a;
+    float4 b;
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vColor;
+    float4 blocks_a;
+    float4 blocks_b;
+    Foo vFoos;
+};
+
+struct main0_patchIn
+{
+    float4 vColors;
+    float4 patch_block_a;
+    float4 patch_block_b;
+    Foo vFoo;
+};
+
+[[ patch(quad, 0) ]] vertex main0_out main0(uint gl_PrimitiveID [[patch_id]], const device main0_patchIn* spvPatchIn [[buffer(20)]], const device main0_in* spvIn [[buffer(22)]])
+{
+    main0_out out = {};
+    PatchBlock patch_block = {};
+    const device main0_in* gl_in = &spvIn[gl_PrimitiveID * 0];
+    const device main0_patchIn& patchIn = spvPatchIn[gl_PrimitiveID];
+    patch_block.a = patchIn.patch_block_a;
+    patch_block.b = patchIn.patch_block_b;
+    out.gl_Position = gl_in[0].blocks_a;
+    out.gl_Position += gl_in[0].blocks_b;
+    out.gl_Position += gl_in[1].blocks_a;
+    out.gl_Position += gl_in[1].blocks_b;
+    out.gl_Position += patch_block.a;
+    out.gl_Position += patch_block.b;
+    out.gl_Position += gl_in[0].vColor;
+    out.gl_Position += gl_in[1].vColor;
+    out.gl_Position += patchIn.vColors;
+    out.gl_Position += patchIn.vFoo.a;
+    out.gl_Position += patchIn.vFoo.b;
+    out.gl_Position += gl_in[0].vFoos.a;
+    out.gl_Position += gl_in[0].vFoos.b;
+    out.gl_Position += gl_in[1].vFoos.a;
+    out.gl_Position += gl_in[1].vFoos.b;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/tese/input-types.tese b/reference/opt/shaders-msl/tese/input-types.tese
index 2a936fce3aa..25b25ff94e1 100644
--- a/reference/opt/shaders-msl/tese/input-types.tese
+++ b/reference/opt/shaders-msl/tese/input-types.tese
@@ -29,19 +29,19 @@ struct main0_out
 struct main0_in
 {
     float4 vColor [[attribute(0)]];
-    float4 Block_a [[attribute(4)]];
-    float4 Block_b [[attribute(5)]];
-    float4 Foo_a [[attribute(14)]];
-    float4 Foo_b [[attribute(15)]];
+    float4 blocks_a [[attribute(4)]];
+    float4 blocks_b [[attribute(5)]];
+    float4 vFoos_a [[attribute(14)]];
+    float4 vFoos_b [[attribute(15)]];
 };
 
 struct main0_patchIn
 {
     float4 vColors [[attribute(1)]];
-    float4 PatchBlock_a [[attribute(6)]];
-    float4 PatchBlock_b [[attribute(7)]];
-    float4 Foo_a [[attribute(8)]];
-    float4 Foo_b [[attribute(9)]];
+    float4 patch_block_a [[attribute(6)]];
+    float4 patch_block_b [[attribute(7)]];
+    float4 vFoo_a [[attribute(8)]];
+    float4 vFoo_b [[attribute(9)]];
     patch_control_point<main0_in> gl_in;
 };
 
@@ -50,14 +50,14 @@ struct main0_patchIn
     main0_out out = {};
     PatchBlock patch_block = {};
     Foo vFoo = {};
-    patch_block.a = patchIn.PatchBlock_a;
-    patch_block.b = patchIn.PatchBlock_b;
-    vFoo.a = patchIn.Foo_a;
-    vFoo.b = patchIn.Foo_b;
-    out.gl_Position = patchIn.gl_in[0].Block_a;
-    out.gl_Position += patchIn.gl_in[0].Block_b;
-    out.gl_Position += patchIn.gl_in[1].Block_a;
-    out.gl_Position += patchIn.gl_in[1].Block_b;
+    patch_block.a = patchIn.patch_block_a;
+    patch_block.b = patchIn.patch_block_b;
+    vFoo.a = patchIn.vFoo_a;
+    vFoo.b = patchIn.vFoo_b;
+    out.gl_Position = patchIn.gl_in[0].blocks_a;
+    out.gl_Position += patchIn.gl_in[0].blocks_b;
+    out.gl_Position += patchIn.gl_in[1].blocks_a;
+    out.gl_Position += patchIn.gl_in[1].blocks_b;
     out.gl_Position += patch_block.a;
     out.gl_Position += patch_block.b;
     out.gl_Position += patchIn.gl_in[0].vColor;
@@ -65,16 +65,12 @@ struct main0_patchIn
     out.gl_Position += patchIn.vColors;
     out.gl_Position += vFoo.a;
     out.gl_Position += vFoo.b;
-    Foo vFoos_202;
-    vFoos_202.a = patchIn.gl_in[0].Foo_a;
-    vFoos_202.b = patchIn.gl_in[0].Foo_b;
-    out.gl_Position += vFoos_202.a;
-    out.gl_Position += vFoos_202.b;
-    Foo vFoos_216;
-    vFoos_216.a = patchIn.gl_in[1].Foo_a;
-    vFoos_216.b = patchIn.gl_in[1].Foo_b;
-    out.gl_Position += vFoos_216.a;
-    out.gl_Position += vFoos_216.b;
+    Foo _204 = Foo{ patchIn.gl_in[0].vFoos_a, patchIn.gl_in[0].vFoos_b };
+    out.gl_Position += _204.a;
+    out.gl_Position += _204.b;
+    Foo _218 = Foo{ patchIn.gl_in[1].vFoos_a, patchIn.gl_in[1].vFoos_b };
+    out.gl_Position += _218.a;
+    out.gl_Position += _218.b;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/tese/load-control-point-array-of-matrix.tese b/reference/opt/shaders-msl/tese/load-control-point-array-of-matrix.tese
new file mode 100644
index 00000000000..e4bbeb7ede9
--- /dev/null
+++ b/reference/opt/shaders-msl/tese/load-control-point-array-of-matrix.tese
@@ -0,0 +1,85 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vInputs_0 [[attribute(0)]];
+    float4 vInputs_1 [[attribute(1)]];
+    float4 vInputs_2 [[attribute(2)]];
+    float4 vInputs_3 [[attribute(3)]];
+};
+
+struct main0_patchIn
+{
+    float4 vBoo_0 [[attribute(4)]];
+    float4 vBoo_1 [[attribute(5)]];
+    float4 vBoo_2 [[attribute(6)]];
+    float4 vBoo_3 [[attribute(7)]];
+    int vIndex [[attribute(8)]];
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 4> vBoo = {};
+    vBoo[0] = patchIn.vBoo_0;
+    vBoo[1] = patchIn.vBoo_1;
+    vBoo[2] = patchIn.vBoo_2;
+    vBoo[3] = patchIn.vBoo_3;
+    float4x4 _57 = float4x4(patchIn.gl_in[0u].vInputs_0, patchIn.gl_in[0u].vInputs_1, patchIn.gl_in[0u].vInputs_2, patchIn.gl_in[0u].vInputs_3);
+    float4x4 _59 = float4x4(patchIn.gl_in[1u].vInputs_0, patchIn.gl_in[1u].vInputs_1, patchIn.gl_in[1u].vInputs_2, patchIn.gl_in[1u].vInputs_3);
+    float4x4 _47 = _57;
+    float4x4 _48 = _59;
+    out.gl_Position = (_47[patchIn.vIndex] + _48[patchIn.vIndex]) + vBoo[patchIn.vIndex];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/tese/load-control-point-array.tese b/reference/opt/shaders-msl/tese/load-control-point-array.tese
new file mode 100644
index 00000000000..54d7419f5a3
--- /dev/null
+++ b/reference/opt/shaders-msl/tese/load-control-point-array.tese
@@ -0,0 +1,78 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vInputs [[attribute(0)]];
+};
+
+struct main0_patchIn
+{
+    float4 vBoo_0 [[attribute(1)]];
+    float4 vBoo_1 [[attribute(2)]];
+    float4 vBoo_2 [[attribute(3)]];
+    float4 vBoo_3 [[attribute(4)]];
+    int vIndex [[attribute(5)]];
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 4> vBoo = {};
+    vBoo[0] = patchIn.vBoo_0;
+    vBoo[1] = patchIn.vBoo_1;
+    vBoo[2] = patchIn.vBoo_2;
+    vBoo[3] = patchIn.vBoo_3;
+    out.gl_Position = (patchIn.gl_in[0u].vInputs + patchIn.gl_in[1u].vInputs) + vBoo[patchIn.vIndex];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/tese/quad.domain.tese b/reference/opt/shaders-msl/tese/quad.domain.tese
index 78b58ab9975..81c4aa49a45 100644
--- a/reference/opt/shaders-msl/tese/quad.domain.tese
+++ b/reference/opt/shaders-msl/tese/quad.domain.tese
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float4 gl_Position [[position]];
@@ -10,15 +51,24 @@ struct main0_out
 
 struct main0_patchIn
 {
-    float2 gl_TessLevelInner [[attribute(0)]];
-    float4 gl_TessLevelOuter [[attribute(1)]];
+    float4 gl_TessLevelOuter [[attribute(0)]];
+    float2 gl_TessLevelInner [[attribute(1)]];
 };
 
-[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]])
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]])
 {
     main0_out out = {};
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0];
+    gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1];
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2];
+    gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3];
+    float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0);
     gl_TessCoord.y = 1.0 - gl_TessCoord.y;
-    out.gl_Position = float4(((gl_TessCoord.x * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.x) + (((1.0 - gl_TessCoord.x) * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.z), ((gl_TessCoord.y * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.w) + (((1.0 - gl_TessCoord.y) * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.y), 0.0, 1.0);
+    out.gl_Position = float4(fma(gl_TessCoord.x * gl_TessLevelInner[0], gl_TessLevelOuter[0], ((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), fma(gl_TessCoord.y * gl_TessLevelInner[1], gl_TessLevelOuter[3], ((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[1]), 0.0, 1.0);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/tese/quad.tese b/reference/opt/shaders-msl/tese/quad.tese
index 83ef729321e..bfa96f9cfbd 100644
--- a/reference/opt/shaders-msl/tese/quad.tese
+++ b/reference/opt/shaders-msl/tese/quad.tese
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float4 gl_Position [[position]];
@@ -10,14 +51,23 @@ struct main0_out
 
 struct main0_patchIn
 {
-    float2 gl_TessLevelInner [[attribute(0)]];
-    float4 gl_TessLevelOuter [[attribute(1)]];
+    float4 gl_TessLevelOuter [[attribute(0)]];
+    float2 gl_TessLevelInner [[attribute(1)]];
 };
 
-[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]])
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]])
 {
     main0_out out = {};
-    out.gl_Position = float4(((gl_TessCoord.x * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.x) + (((1.0 - gl_TessCoord.x) * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.z), ((gl_TessCoord.y * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.y) + (((1.0 - gl_TessCoord.y) * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.w), 0.0, 1.0);
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0];
+    gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1];
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2];
+    gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3];
+    float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0);
+    out.gl_Position = float4(fma(gl_TessCoord.x * gl_TessLevelInner[0], gl_TessLevelOuter[0], ((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), fma(gl_TessCoord.y * gl_TessLevelInner[1], gl_TessLevelOuter[1], ((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]), 0.0, 1.0);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese b/reference/opt/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese
new file mode 100644
index 00000000000..44bdd5ffe9d
--- /dev/null
+++ b/reference/opt/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese
@@ -0,0 +1,72 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_patchIn
+{
+    float4 gl_TessLevelOuter [[attribute(0)]];
+    float2 gl_TessLevelInner [[attribute(1)]];
+};
+
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2];
+    gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3];
+    gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0];
+    gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1];
+    out.gl_Position = float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese b/reference/opt/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese
new file mode 100644
index 00000000000..65d2fd94f72
--- /dev/null
+++ b/reference/opt/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese
@@ -0,0 +1,66 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+[[ patch(quad, 0) ]] vertex main0_out main0(uint gl_PrimitiveID [[patch_id]], const device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    gl_TessLevelOuter[0] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0];
+    gl_TessLevelOuter[1] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1];
+    gl_TessLevelOuter[2] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2];
+    gl_TessLevelOuter[3] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3];
+    gl_TessLevelInner[0] = spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0];
+    gl_TessLevelInner[1] = spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1];
+    out.gl_Position = float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/tese/read-tess-level-in-func.msl2.tese b/reference/opt/shaders-msl/tese/read-tess-level-in-func.msl2.tese
new file mode 100644
index 00000000000..decaca3d5a7
--- /dev/null
+++ b/reference/opt/shaders-msl/tese/read-tess-level-in-func.msl2.tese
@@ -0,0 +1,69 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_patchIn
+{
+    float4 gl_TessLevel [[attribute(0)]];
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevel[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevel[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevel[2];
+    gl_TessLevelInner[0] = patchIn.gl_TessLevel[3];
+    out.gl_Position = float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese b/reference/opt/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese
new file mode 100644
index 00000000000..0b555ed0fee
--- /dev/null
+++ b/reference/opt/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese
@@ -0,0 +1,64 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(uint gl_PrimitiveID [[patch_id]], const device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    gl_TessLevelOuter[0] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0];
+    gl_TessLevelOuter[1] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1];
+    gl_TessLevelOuter[2] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2];
+    gl_TessLevelInner[0] = spvTessLevel[gl_PrimitiveID].insideTessellationFactor;
+    out.gl_Position = float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/tese/set-from-function.tese b/reference/opt/shaders-msl/tese/set-from-function.tese
index 6dcdbe19387..ad4f61b9562 100644
--- a/reference/opt/shaders-msl/tese/set-from-function.tese
+++ b/reference/opt/shaders-msl/tese/set-from-function.tese
@@ -23,15 +23,15 @@ struct main0_out
 struct main0_in
 {
     float4 vColor [[attribute(0)]];
-    float4 Block_a [[attribute(2)]];
-    float4 Block_b [[attribute(3)]];
+    float4 blocks_a [[attribute(2)]];
+    float4 blocks_b [[attribute(3)]];
 };
 
 struct main0_patchIn
 {
     float4 vColors [[attribute(1)]];
-    float4 Foo_a [[attribute(4)]];
-    float4 Foo_b [[attribute(5)]];
+    float4 vFoo_a [[attribute(4)]];
+    float4 vFoo_b [[attribute(5)]];
     patch_control_point<main0_in> gl_in;
 };
 
@@ -39,12 +39,12 @@ struct main0_patchIn
 {
     main0_out out = {};
     Foo vFoo = {};
-    vFoo.a = patchIn.Foo_a;
-    vFoo.b = patchIn.Foo_b;
-    out.gl_Position = patchIn.gl_in[0].Block_a;
-    out.gl_Position += patchIn.gl_in[0].Block_b;
-    out.gl_Position += patchIn.gl_in[1].Block_a;
-    out.gl_Position += patchIn.gl_in[1].Block_b;
+    vFoo.a = patchIn.vFoo_a;
+    vFoo.b = patchIn.vFoo_b;
+    out.gl_Position = patchIn.gl_in[0].blocks_a;
+    out.gl_Position += patchIn.gl_in[0].blocks_b;
+    out.gl_Position += patchIn.gl_in[1].blocks_a;
+    out.gl_Position += patchIn.gl_in[1].blocks_b;
     out.gl_Position += patchIn.gl_in[0].vColor;
     out.gl_Position += patchIn.gl_in[1].vColor;
     out.gl_Position += patchIn.vColors;
diff --git a/reference/opt/shaders-msl/tese/triangle-tess-level.tese b/reference/opt/shaders-msl/tese/triangle-tess-level.tese
index 975e6298518..86ccc4f023e 100644
--- a/reference/opt/shaders-msl/tese/triangle-tess-level.tese
+++ b/reference/opt/shaders-msl/tese/triangle-tess-level.tese
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float4 gl_Position [[position]];
@@ -16,12 +57,12 @@ struct main0_patchIn
 [[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float3 gl_TessCoord [[position_in_patch]])
 {
     main0_out out = {};
-    float gl_TessLevelInner[2] = {};
-    float gl_TessLevelOuter[4] = {};
-    gl_TessLevelInner[0] = patchIn.gl_TessLevel.w;
-    gl_TessLevelOuter[0] = patchIn.gl_TessLevel.x;
-    gl_TessLevelOuter[1] = patchIn.gl_TessLevel.y;
-    gl_TessLevelOuter[2] = patchIn.gl_TessLevel.z;
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    gl_TessLevelInner[0] = patchIn.gl_TessLevel[3];
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevel[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevel[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevel[2];
     out.gl_Position = float4((gl_TessCoord.x * gl_TessLevelInner[0]) * gl_TessLevelOuter[0], (gl_TessCoord.y * gl_TessLevelInner[0]) * gl_TessLevelOuter[1], (gl_TessCoord.z * gl_TessLevelInner[0]) * gl_TessLevelOuter[2], 1.0);
     return out;
 }
diff --git a/reference/opt/shaders-msl/tese/water_tess.raw-tess-in.tese b/reference/opt/shaders-msl/tese/water_tess.raw-tess-in.tese
new file mode 100644
index 00000000000..d4441c2eeeb
--- /dev/null
+++ b/reference/opt/shaders-msl/tese/water_tess.raw-tess-in.tese
@@ -0,0 +1,46 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4x4 uMVP;
+    float4 uScale;
+    float2 uInvScale;
+    float3 uCamPos;
+    float2 uPatchSize;
+    float2 uInvHeightmapSize;
+};
+
+struct main0_out
+{
+    float3 vWorld [[user(locn0)]];
+    float4 vGradNormalTex [[user(locn1)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_patchIn
+{
+    float2 vOutPatchPosBase;
+    float4 vPatchLods;
+};
+
+[[ patch(quad, 0) ]] vertex main0_out main0(constant UBO& _31 [[buffer(0)]], texture2d<float> uHeightmapDisplacement [[texture(0)]], sampler uHeightmapDisplacementSmplr [[sampler(0)]], float2 gl_TessCoordIn [[position_in_patch]], uint gl_PrimitiveID [[patch_id]], const device main0_patchIn* spvPatchIn [[buffer(20)]])
+{
+    main0_out out = {};
+    const device main0_patchIn& patchIn = spvPatchIn[gl_PrimitiveID];
+    float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0);
+    float2 _202 = fma(gl_TessCoord.xy, _31.uPatchSize, patchIn.vOutPatchPosBase);
+    float2 _216 = mix(patchIn.vPatchLods.yx, patchIn.vPatchLods.zw, float2(gl_TessCoord.x));
+    float _223 = mix(_216.x, _216.y, gl_TessCoord.y);
+    float _225 = floor(_223);
+    float2 _141 = _31.uInvHeightmapSize * exp2(_225);
+    out.vGradNormalTex = float4(fma(_202, _31.uInvHeightmapSize, _31.uInvHeightmapSize * 0.5), (_202 * _31.uInvHeightmapSize) * _31.uScale.zw);
+    float3 _256 = mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, fma(_202, _31.uInvHeightmapSize, _141 * 0.5), level(_225)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, fma(_202, _31.uInvHeightmapSize, _141 * 1.0), level(_225 + 1.0)).xyz, float3(_223 - _225));
+    float2 _171 = fma(_202, _31.uScale.xy, _256.yz);
+    out.vWorld = float3(_171.x, _256.x, _171.y);
+    out.gl_Position = _31.uMVP * float4(out.vWorld, 1.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/tese/water_tess.tese b/reference/opt/shaders-msl/tese/water_tess.tese
index dd93e4134d5..e91063f2d4e 100644
--- a/reference/opt/shaders-msl/tese/water_tess.tese
+++ b/reference/opt/shaders-msl/tese/water_tess.tese
@@ -26,19 +26,19 @@ struct main0_patchIn
     float4 vPatchLods [[attribute(1)]];
 };
 
-[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant UBO& _31 [[buffer(0)]], texture2d<float> uHeightmapDisplacement [[texture(0)]], sampler uHeightmapDisplacementSmplr [[sampler(0)]], float2 gl_TessCoord [[position_in_patch]])
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant UBO& _31 [[buffer(0)]], texture2d<float> uHeightmapDisplacement [[texture(0)]], sampler uHeightmapDisplacementSmplr [[sampler(0)]], float2 gl_TessCoordIn [[position_in_patch]])
 {
     main0_out out = {};
-    float2 _201 = patchIn.vOutPatchPosBase + (float3(gl_TessCoord, 0).xy * _31.uPatchSize);
-    float2 _214 = mix(patchIn.vPatchLods.yx, patchIn.vPatchLods.zw, float2(float3(gl_TessCoord, 0).x));
-    float _221 = mix(_214.x, _214.y, float3(gl_TessCoord, 0).y);
-    float _223 = floor(_221);
-    float2 _125 = _201 * _31.uInvHeightmapSize;
-    float2 _141 = _31.uInvHeightmapSize * exp2(_223);
-    out.vGradNormalTex = float4(_125 + (_31.uInvHeightmapSize * 0.5), _125 * _31.uScale.zw);
-    float3 _253 = mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (_125 + (_141 * 0.5)), level(_223)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (_125 + (_141 * 1.0)), level(_223 + 1.0)).xyz, float3(_221 - _223));
-    float2 _171 = (_201 * _31.uScale.xy) + _253.yz;
-    out.vWorld = float3(_171.x, _253.x, _171.y);
+    float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0);
+    float2 _202 = fma(gl_TessCoord.xy, _31.uPatchSize, patchIn.vOutPatchPosBase);
+    float2 _216 = mix(patchIn.vPatchLods.yx, patchIn.vPatchLods.zw, float2(gl_TessCoord.x));
+    float _223 = mix(_216.x, _216.y, gl_TessCoord.y);
+    float _225 = floor(_223);
+    float2 _141 = _31.uInvHeightmapSize * exp2(_225);
+    out.vGradNormalTex = float4(fma(_202, _31.uInvHeightmapSize, _31.uInvHeightmapSize * 0.5), (_202 * _31.uInvHeightmapSize) * _31.uScale.zw);
+    float3 _256 = mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, fma(_202, _31.uInvHeightmapSize, _141 * 0.5), level(_225)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, fma(_202, _31.uInvHeightmapSize, _141 * 1.0), level(_225 + 1.0)).xyz, float3(_223 - _225));
+    float2 _171 = fma(_202, _31.uScale.xy, _256.yz);
+    out.vWorld = float3(_171.x, _256.x, _171.y);
     out.gl_Position = _31.uMVP * float4(out.vWorld, 1.0);
     return out;
 }
diff --git a/reference/opt/shaders-msl/vert/array-component-io.for-tess.vert b/reference/opt/shaders-msl/vert/array-component-io.for-tess.vert
new file mode 100644
index 00000000000..24958eb50db
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/array-component-io.for-tess.vert
@@ -0,0 +1,98 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 m_location_0;
+    float4 m_location_1;
+    float4 m_location_2;
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    float4 m_location_0 [[attribute(0)]];
+    float4 m_location_1 [[attribute(1)]];
+    float4 m_location_2 [[attribute(2)]];
+    float4 Pos [[attribute(4)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    spvUnsafeArray<float, 2> A = {};
+    spvUnsafeArray<float2, 2> B = {};
+    spvUnsafeArray<float, 3> C = {};
+    float D = {};
+    spvUnsafeArray<float, 2> InA = {};
+    spvUnsafeArray<float2, 2> InB = {};
+    spvUnsafeArray<float, 3> InC = {};
+    float InD = {};
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    InA[0] = in.m_location_1.x;
+    InA[1] = in.m_location_2.x;
+    InB[0] = in.m_location_1.zw;
+    InB[1] = in.m_location_2.zw;
+    InC[0] = in.m_location_0.y;
+    InC[1] = in.m_location_1.y;
+    InC[2] = in.m_location_2.y;
+    InD = in.m_location_0.w;
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    out.gl_Position = in.Pos;
+    A = InA;
+    B = InB;
+    C = InC;
+    D = InD;
+    out.m_location_1.x = A[0];
+    out.m_location_2.x = A[1];
+    out.m_location_1.zw = B[0];
+    out.m_location_2.zw = B[1];
+    out.m_location_0.y = C[0];
+    out.m_location_1.y = C[1];
+    out.m_location_2.y = C[2];
+    out.m_location_0.w = D;
+}
+
diff --git a/reference/opt/shaders-msl/vert/array-component-io.vert b/reference/opt/shaders-msl/vert/array-component-io.vert
new file mode 100644
index 00000000000..352c9d2ef0f
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/array-component-io.vert
@@ -0,0 +1,100 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float C_0 [[user(locn0_1)]];
+    float D [[user(locn0_3)]];
+    float A_0 [[user(locn1)]];
+    float C_1 [[user(locn1_1)]];
+    float2 B_0 [[user(locn1_2)]];
+    float A_1 [[user(locn2)]];
+    float C_2 [[user(locn2_1)]];
+    float2 B_1 [[user(locn2_2)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 m_location_0 [[attribute(0)]];
+    float4 m_location_1 [[attribute(1)]];
+    float4 m_location_2 [[attribute(2)]];
+    float4 Pos [[attribute(4)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 2> A = {};
+    spvUnsafeArray<float2, 2> B = {};
+    spvUnsafeArray<float, 3> C = {};
+    spvUnsafeArray<float, 2> InA = {};
+    spvUnsafeArray<float2, 2> InB = {};
+    spvUnsafeArray<float, 3> InC = {};
+    float InD = {};
+    InA[0] = in.m_location_1.x;
+    InA[1] = in.m_location_2.x;
+    InB[0] = in.m_location_1.zw;
+    InB[1] = in.m_location_2.zw;
+    InC[0] = in.m_location_0.y;
+    InC[1] = in.m_location_1.y;
+    InC[2] = in.m_location_2.y;
+    InD = in.m_location_0.w;
+    out.gl_Position = in.Pos;
+    A = InA;
+    B = InB;
+    C = InC;
+    out.D = InD;
+    out.A_0 = A[0];
+    out.A_1 = A[1];
+    out.B_0 = B[0];
+    out.B_1 = B[1];
+    out.C_0 = C[0];
+    out.C_1 = C[1];
+    out.C_2 = C[2];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/basic.for-tess.vert b/reference/opt/shaders-msl/vert/basic.for-tess.vert
new file mode 100644
index 00000000000..c99a95ac898
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/basic.for-tess.vert
@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4x4 uMVP;
+};
+
+struct main0_out
+{
+    float3 vNormal;
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    out.gl_Position = _16.uMVP * in.aVertex;
+    out.vNormal = in.aNormal;
+}
+
diff --git a/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert b/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert
new file mode 100644
index 00000000000..38442986959
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/buffer_device_address.msl2.vert
@@ -0,0 +1,62 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Position;
+struct PositionReferences;
+
+struct Position
+{
+    float2 positions[1];
+};
+
+struct Registers
+{
+    float4x4 view_projection;
+    device PositionReferences* references;
+};
+
+struct PositionReferences
+{
+    device Position* buffers[1];
+};
+
+struct main0_out
+{
+    float4 out_color [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0(constant Registers& registers [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    int slice = int(gl_InstanceIndex);
+    const device Position* __restrict positions = registers.references->buffers[int(gl_InstanceIndex)];
+    float2 _45 = registers.references->buffers[int(gl_InstanceIndex)]->positions[int(gl_VertexIndex)] * 2.5;
+    float2 pos = _45;
+    float2 _60 = _45 + ((float2(float(int(gl_InstanceIndex) % 8), float(int(gl_InstanceIndex) / 8)) - float2(3.5)) * 3.0);
+    pos = _60;
+    out.gl_Position = registers.view_projection * float4(_60, 0.0, 1.0);
+    int _82 = int(gl_VertexIndex) % 16;
+    int index_x = _82;
+    int _85 = int(gl_VertexIndex) / 16;
+    int index_y = _85;
+    float _92 = sin(float(_82));
+    float _94 = fma(0.300000011920928955078125, _92, 0.5);
+    float r = _94;
+    float _98 = sin(float(_85));
+    float _100 = fma(0.300000011920928955078125, _98, 0.5);
+    float g = _100;
+    int _105 = (_82 ^ _85) & 1;
+    int checkerboard = _105;
+    float _107 = float(_105);
+    float _111 = fma(_107, 0.800000011920928955078125, 0.20000000298023223876953125);
+    float _113 = _94 * _111;
+    r = _113;
+    float _119 = _100 * _111;
+    g = _119;
+    out.out_color = float4(_113, _119, 0.1500000059604644775390625, 1.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/clip-distance-block.no-user-varying.vert b/reference/opt/shaders-msl/vert/clip-distance-block.no-user-varying.vert
new file mode 100644
index 00000000000..c78105e0ce6
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/clip-distance-block.no-user-varying.vert
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [2];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.gl_Position = in.Position;
+    out.gl_ClipDistance[0] = in.Position.x;
+    out.gl_ClipDistance[1] = in.Position.y;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/clip-distance-block.vert b/reference/opt/shaders-msl/vert/clip-distance-block.vert
new file mode 100644
index 00000000000..af58f35ff5f
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/clip-distance-block.vert
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [2];
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.gl_Position = in.Position;
+    out.gl_ClipDistance[0] = in.Position.x;
+    out.gl_ClipDistance[1] = in.Position.y;
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/copy.flatten.vert b/reference/opt/shaders-msl/vert/copy.flatten.vert
index d73ee3282fb..32fde3a48b8 100644
--- a/reference/opt/shaders-msl/vert/copy.flatten.vert
+++ b/reference/opt/shaders-msl/vert/copy.flatten.vert
@@ -36,7 +36,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
     for (int _96 = 0; _96 < 4; )
     {
         float3 _68 = in.aVertex.xyz - float3(_21.lights[_96].Position);
-        out.vColor += ((_21.lights[_96].Color * fast::clamp(1.0 - (length(_68) / _21.lights[_96].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_68)));
+        out.vColor += ((_21.lights[_96].Color * fast::clamp(1.0 - (length(_68) / _21.lights[_96].Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(_68)));
         _96++;
         continue;
     }
diff --git a/reference/opt/shaders-msl/vert/dynamic.flatten.vert b/reference/opt/shaders-msl/vert/dynamic.flatten.vert
index 92911a4eebd..26264ddf95e 100644
--- a/reference/opt/shaders-msl/vert/dynamic.flatten.vert
+++ b/reference/opt/shaders-msl/vert/dynamic.flatten.vert
@@ -36,7 +36,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
     for (int _82 = 0; _82 < 4; )
     {
         float3 _54 = in.aVertex.xyz - float3(_21.lights[_82].Position);
-        out.vColor += ((_21.lights[_82].Color * fast::clamp(1.0 - (length(_54) / _21.lights[_82].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(_54)));
+        out.vColor += ((_21.lights[_82].Color * fast::clamp(1.0 - (length(_54) / _21.lights[_82].Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(_54)));
         _82++;
         continue;
     }
diff --git a/reference/opt/shaders-msl/vert/float-math.invariant-float-math.vert b/reference/opt/shaders-msl/vert/float-math.invariant-float-math.vert
new file mode 100644
index 00000000000..0fddcdf4d33
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/float-math.invariant-float-math.vert
@@ -0,0 +1,137 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T>
+[[clang::optnone]] T spvFMul(T l, T r)
+{
+    return fma(l, r, T(0));
+}
+
+template<typename T, int Cols, int Rows>
+[[clang::optnone]] vec<T, Cols> spvFMulVectorMatrix(vec<T, Rows> v, matrix<T, Cols, Rows> m)
+{
+    vec<T, Cols> res = vec<T, Cols>(0);
+    for (uint i = Rows; i > 0; --i)
+    {
+        vec<T, Cols> tmp(0);
+        for (uint j = 0; j < Cols; ++j)
+        {
+            tmp[j] = m[j][i - 1];
+        }
+        res = fma(tmp, vec<T, Cols>(v[i - 1]), res);
+    }
+    return res;
+}
+
+template<typename T, int Cols, int Rows>
+[[clang::optnone]] vec<T, Rows> spvFMulMatrixVector(matrix<T, Cols, Rows> m, vec<T, Cols> v)
+{
+    vec<T, Rows> res = vec<T, Rows>(0);
+    for (uint i = Cols; i > 0; --i)
+    {
+        res = fma(m[i - 1], vec<T, Rows>(v[i - 1]), res);
+    }
+    return res;
+}
+
+template<typename T, int LCols, int LRows, int RCols, int RRows>
+[[clang::optnone]] matrix<T, RCols, LRows> spvFMulMatrixMatrix(matrix<T, LCols, LRows> l, matrix<T, RCols, RRows> r)
+{
+    matrix<T, RCols, LRows> res;
+    for (uint i = 0; i < RCols; i++)
+    {
+        vec<T, RCols> tmp(0);
+        for (uint j = 0; j < LCols; j++)
+        {
+            tmp = fma(vec<T, RCols>(r[i][j]), l[j], tmp);
+        }
+        res[i] = tmp;
+    }
+    return res;
+}
+
+struct Matrices
+{
+    float4x4 vpMatrix;
+    float4x4 wMatrix;
+    float4x3 wMatrix4x3;
+    float3x4 wMatrix3x4;
+};
+
+struct main0_out
+{
+    float3 OutNormal [[user(locn0)]];
+    float4 OutWorldPos_0 [[user(locn1)]];
+    float4 OutWorldPos_1 [[user(locn2)]];
+    float4 OutWorldPos_2 [[user(locn3)]];
+    float4 OutWorldPos_3 [[user(locn4)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float3 InPos [[attribute(0)]];
+    float3 InNormal [[attribute(1)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant Matrices& _22 [[buffer(0)]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 4> OutWorldPos = {};
+    float4 _37 = float4(in.InPos, 1.0);
+    out.gl_Position = spvFMulMatrixVector(spvFMulMatrixMatrix(_22.vpMatrix, _22.wMatrix), _37);
+    OutWorldPos[0] = spvFMulMatrixVector(_22.wMatrix, _37);
+    OutWorldPos[1] = spvFMulVectorMatrix(_37, _22.wMatrix);
+    OutWorldPos[2] = spvFMulMatrixVector(_22.wMatrix3x4, in.InPos);
+    OutWorldPos[3] = spvFMulVectorMatrix(in.InPos, _22.wMatrix4x3);
+    out.OutNormal = spvFMulMatrixVector(_22.wMatrix, float4(in.InNormal, 0.0)).xyz;
+    out.OutWorldPos_0 = OutWorldPos[0];
+    out.OutWorldPos_1 = OutWorldPos[1];
+    out.OutWorldPos_2 = OutWorldPos[2];
+    out.OutWorldPos_3 = OutWorldPos[3];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/float-math.vert b/reference/opt/shaders-msl/vert/float-math.vert
new file mode 100644
index 00000000000..da468c24c9b
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/float-math.vert
@@ -0,0 +1,88 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Matrices
+{
+    float4x4 vpMatrix;
+    float4x4 wMatrix;
+    float4x3 wMatrix4x3;
+    float3x4 wMatrix3x4;
+};
+
+struct main0_out
+{
+    float3 OutNormal [[user(locn0)]];
+    float4 OutWorldPos_0 [[user(locn1)]];
+    float4 OutWorldPos_1 [[user(locn2)]];
+    float4 OutWorldPos_2 [[user(locn3)]];
+    float4 OutWorldPos_3 [[user(locn4)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float3 InPos [[attribute(0)]];
+    float3 InNormal [[attribute(1)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant Matrices& _22 [[buffer(0)]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 4> OutWorldPos = {};
+    float4 _37 = float4(in.InPos, 1.0);
+    out.gl_Position = (_22.vpMatrix * _22.wMatrix) * _37;
+    OutWorldPos[0] = _22.wMatrix * _37;
+    OutWorldPos[1] = _37 * _22.wMatrix;
+    OutWorldPos[2] = _22.wMatrix3x4 * in.InPos;
+    OutWorldPos[3] = in.InPos * _22.wMatrix4x3;
+    out.OutNormal = (_22.wMatrix * float4(in.InNormal, 0.0)).xyz;
+    out.OutWorldPos_0 = OutWorldPos[0];
+    out.OutWorldPos_1 = OutWorldPos[1];
+    out.OutWorldPos_2 = OutWorldPos[2];
+    out.OutWorldPos_3 = OutWorldPos[3];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/functions.vert b/reference/opt/shaders-msl/vert/functions.vert
index f710225261d..4300aa1350a 100644
--- a/reference/opt/shaders-msl/vert/functions.vert
+++ b/reference/opt/shaders-msl/vert/functions.vert
@@ -5,73 +5,52 @@
 
 using namespace metal;
 
-struct UBO
-{
-    float4x4 uMVP;
-    float3 rotDeg;
-    float3 rotRad;
-    int2 bits;
-};
-
-struct main0_out
-{
-    float3 vNormal [[user(locn0)]];
-    float3 vRotDeg [[user(locn1)]];
-    float3 vRotRad [[user(locn2)]];
-    int2 vLSB [[user(locn3)]];
-    int2 vMSB [[user(locn4)]];
-    float4 gl_Position [[position]];
-};
-
-struct main0_in
-{
-    float4 aVertex [[attribute(0)]];
-    float3 aNormal [[attribute(1)]];
-};
-
 // Implementation of the GLSL radians() function
 template<typename T>
-T radians(T d)
+inline T radians(T d)
 {
     return d * T(0.01745329251);
 }
 
 // Implementation of the GLSL degrees() function
 template<typename T>
-T degrees(T r)
+inline T degrees(T r)
 {
     return r * T(57.2957795131);
 }
 
 // Implementation of the GLSL findLSB() function
 template<typename T>
-T findLSB(T x)
+inline T spvFindLSB(T x)
 {
     return select(ctz(x), T(-1), x == T(0));
 }
 
 // Implementation of the signed GLSL findMSB() function
 template<typename T>
-T findSMSB(T x)
+inline T spvFindSMSB(T x)
 {
     T v = select(x, T(-1) - x, x < T(0));
     return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));
 }
 
 // Returns the determinant of a 2x2 matrix.
-inline float spvDet2x2(float a1, float a2, float b1, float b2)
+static inline __attribute__((always_inline))
+float spvDet2x2(float a1, float a2, float b1, float b2)
 {
     return a1 * b2 - b1 * a2;
 }
 
 // Returns the determinant of a 3x3 matrix.
-inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
+static inline __attribute__((always_inline))
+float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
 {
     return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3);
 }
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
+static inline __attribute__((always_inline))
 float4x4 spvInverse4x4(float4x4 m)
 {
     float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
@@ -105,6 +84,30 @@ float4x4 spvInverse4x4(float4x4 m)
     return (det != 0.0f) ? (adj * (1.0f / det)) : m;
 }
 
+struct UBO
+{
+    float4x4 uMVP;
+    float3 rotDeg;
+    float3 rotRad;
+    int2 bits;
+};
+
+struct main0_out
+{
+    float3 vNormal [[user(locn0)]];
+    float3 vRotDeg [[user(locn1)]];
+    float3 vRotRad [[user(locn2)]];
+    int2 vLSB [[user(locn3)]];
+    int2 vMSB [[user(locn4)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]])
 {
     main0_out out = {};
@@ -112,8 +115,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]
     out.vNormal = in.aNormal;
     out.vRotDeg = degrees(_18.rotRad);
     out.vRotRad = radians(_18.rotDeg);
-    out.vLSB = findLSB(_18.bits);
-    out.vMSB = findSMSB(_18.bits);
+    out.vLSB = spvFindLSB(_18.bits);
+    out.vMSB = spvFindSMSB(_18.bits);
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/vert/implicit-position-1.vert b/reference/opt/shaders-msl/vert/implicit-position-1.vert
new file mode 100644
index 00000000000..5cea4ee2c20
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/implicit-position-1.vert
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 V [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    out.V = float4(1.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/implicit-position-2.vert b/reference/opt/shaders-msl/vert/implicit-position-2.vert
new file mode 100644
index 00000000000..9e024c2095b
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/implicit-position-2.vert
@@ -0,0 +1,9 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+vertex void main0()
+{
+}
+
diff --git a/reference/opt/shaders-msl/vert/in_out_array_mat.vert b/reference/opt/shaders-msl/vert/in_out_array_mat.vert
index 0d6976e2606..7a74f49a49e 100644
--- a/reference/opt/shaders-msl/vert/in_out_array_mat.vert
+++ b/reference/opt/shaders-msl/vert/in_out_array_mat.vert
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct UBO
 {
     float4x4 projection;
@@ -40,7 +81,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& ubo [[buffer(0)]]
 {
     main0_out out = {};
     float4x4 outTransModel = {};
-    float4 colors[3] = {};
+    spvUnsafeArray<float4, 3> colors = {};
     float4x4 inViewMat = {};
     colors[0] = in.colors_0;
     colors[1] = in.colors_1;
diff --git a/reference/opt/shaders-msl/vert/interface-block-block-composites.frag b/reference/opt/shaders-msl/vert/interface-block-block-composites.frag
index 90d732cc52b..ac0d424d883 100644
--- a/reference/opt/shaders-msl/vert/interface-block-block-composites.frag
+++ b/reference/opt/shaders-msl/vert/interface-block-block-composites.frag
@@ -1,13 +1,54 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct Vert
 {
     float3x3 wMatrix;
     float4 wTmp;
-    float arr[4];
+    spvUnsafeArray<float, 4> arr;
 };
 
 struct main0_out
@@ -20,14 +61,14 @@ struct main0_in
     float3 vMatrix_0 [[user(locn0)]];
     float3 vMatrix_1 [[user(locn1)]];
     float3 vMatrix_2 [[user(locn2)]];
-    float3 Vert_wMatrix_0 [[user(locn4)]];
-    float3 Vert_wMatrix_1 [[user(locn5)]];
-    float3 Vert_wMatrix_2 [[user(locn6)]];
-    float4 Vert_wTmp [[user(locn7)]];
-    float Vert_arr_0 [[user(locn8)]];
-    float Vert_arr_1 [[user(locn9)]];
-    float Vert_arr_2 [[user(locn10)]];
-    float Vert_arr_3 [[user(locn11)]];
+    float3 m_17_wMatrix_0 [[user(locn4)]];
+    float3 m_17_wMatrix_1 [[user(locn5)]];
+    float3 m_17_wMatrix_2 [[user(locn6)]];
+    float4 m_17_wTmp [[user(locn7)]];
+    float m_17_arr_0 [[user(locn8)]];
+    float m_17_arr_1 [[user(locn9)]];
+    float m_17_arr_2 [[user(locn10)]];
+    float m_17_arr_3 [[user(locn11)]];
 };
 
 fragment main0_out main0(main0_in in [[stage_in]])
@@ -35,14 +76,14 @@ fragment main0_out main0(main0_in in [[stage_in]])
     main0_out out = {};
     Vert _17 = {};
     float3x3 vMatrix = {};
-    _17.wMatrix[0] = in.Vert_wMatrix_0;
-    _17.wMatrix[1] = in.Vert_wMatrix_1;
-    _17.wMatrix[2] = in.Vert_wMatrix_2;
-    _17.wTmp = in.Vert_wTmp;
-    _17.arr[0] = in.Vert_arr_0;
-    _17.arr[1] = in.Vert_arr_1;
-    _17.arr[2] = in.Vert_arr_2;
-    _17.arr[3] = in.Vert_arr_3;
+    _17.wMatrix[0] = in.m_17_wMatrix_0;
+    _17.wMatrix[1] = in.m_17_wMatrix_1;
+    _17.wMatrix[2] = in.m_17_wMatrix_2;
+    _17.wTmp = in.m_17_wTmp;
+    _17.arr[0] = in.m_17_arr_0;
+    _17.arr[1] = in.m_17_arr_1;
+    _17.arr[2] = in.m_17_arr_2;
+    _17.arr[3] = in.m_17_arr_3;
     vMatrix[0] = in.vMatrix_0;
     vMatrix[1] = in.vMatrix_1;
     vMatrix[2] = in.vMatrix_2;
diff --git a/reference/opt/shaders-msl/vert/interface-block-block-composites.vert b/reference/opt/shaders-msl/vert/interface-block-block-composites.vert
index 3d97ae6dcff..a05c9331586 100644
--- a/reference/opt/shaders-msl/vert/interface-block-block-composites.vert
+++ b/reference/opt/shaders-msl/vert/interface-block-block-composites.vert
@@ -1,11 +1,52 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct Vert
 {
-    float arr[3];
+    spvUnsafeArray<float, 3> arr;
     float3x3 wMatrix;
     float4 wTmp;
 };
@@ -15,13 +56,13 @@ struct main0_out
     float3 vMatrix_0 [[user(locn0)]];
     float3 vMatrix_1 [[user(locn1)]];
     float3 vMatrix_2 [[user(locn2)]];
-    float Vert_arr_0 [[user(locn4)]];
-    float Vert_arr_1 [[user(locn5)]];
-    float Vert_arr_2 [[user(locn6)]];
-    float3 Vert_wMatrix_0 [[user(locn7)]];
-    float3 Vert_wMatrix_1 [[user(locn8)]];
-    float3 Vert_wMatrix_2 [[user(locn9)]];
-    float4 Vert_wTmp [[user(locn10)]];
+    float m_20_arr_0 [[user(locn4)]];
+    float m_20_arr_1 [[user(locn5)]];
+    float m_20_arr_2 [[user(locn6)]];
+    float3 m_20_wMatrix_0 [[user(locn7)]];
+    float3 m_20_wMatrix_1 [[user(locn8)]];
+    float3 m_20_wMatrix_2 [[user(locn9)]];
+    float4 m_20_wTmp [[user(locn10)]];
     float4 gl_Position [[position]];
 };
 
@@ -52,13 +93,13 @@ vertex main0_out main0(main0_in in [[stage_in]])
     out.vMatrix_0 = vMatrix[0];
     out.vMatrix_1 = vMatrix[1];
     out.vMatrix_2 = vMatrix[2];
-    out.Vert_arr_0 = _20.arr[0];
-    out.Vert_arr_1 = _20.arr[1];
-    out.Vert_arr_2 = _20.arr[2];
-    out.Vert_wMatrix_0 = _20.wMatrix[0];
-    out.Vert_wMatrix_1 = _20.wMatrix[1];
-    out.Vert_wMatrix_2 = _20.wMatrix[2];
-    out.Vert_wTmp = _20.wTmp;
+    out.m_20_arr_0 = _20.arr[0];
+    out.m_20_arr_1 = _20.arr[1];
+    out.m_20_arr_2 = _20.arr[2];
+    out.m_20_wMatrix_0 = _20.wMatrix[0];
+    out.m_20_wMatrix_1 = _20.wMatrix[1];
+    out.m_20_wMatrix_2 = _20.wMatrix[2];
+    out.m_20_wTmp = _20.wTmp;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/vert/interface-block-single-element-array.vert b/reference/opt/shaders-msl/vert/interface-block-single-element-array.vert
new file mode 100644
index 00000000000..6858db730e3
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/interface-block-single-element-array.vert
@@ -0,0 +1,79 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct TDPickVertex
+{
+    float4 c;
+    spvUnsafeArray<float3, 1> uv;
+};
+
+struct main0_out
+{
+    float4 oTDVert_c [[user(locn0)]];
+    float3 oTDVert_uv_0 [[user(locn1)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float3 P [[attribute(0)]];
+    float3 uv_0 [[attribute(1)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    TDPickVertex oTDVert = {};
+    spvUnsafeArray<float3, 1> uv = {};
+    uv[0] = in.uv_0;
+    out.gl_Position = float4(in.P, 1.0);
+    oTDVert.uv[0] = uv[0];
+    oTDVert.c = float4(1.0);
+    out.oTDVert_c = oTDVert.c;
+    out.oTDVert_uv_0 = oTDVert.uv[0];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/interpolation-qualifiers-block.vert b/reference/opt/shaders-msl/vert/interpolation-qualifiers-block.vert
index 4206623b4f6..1ae24c7e5b5 100644
--- a/reference/opt/shaders-msl/vert/interpolation-qualifiers-block.vert
+++ b/reference/opt/shaders-msl/vert/interpolation-qualifiers-block.vert
@@ -16,13 +16,13 @@ struct Output
 
 struct main0_out
 {
-    float2 Output_v0 [[user(locn0)]];
-    float2 Output_v1 [[user(locn1)]];
-    float3 Output_v2 [[user(locn2)]];
-    float4 Output_v3 [[user(locn3)]];
-    float Output_v4 [[user(locn4)]];
-    float Output_v5 [[user(locn5)]];
-    float Output_v6 [[user(locn6)]];
+    float2 outp_v0 [[user(locn0)]];
+    float2 outp_v1 [[user(locn1)]];
+    float3 outp_v2 [[user(locn2)]];
+    float4 outp_v3 [[user(locn3)]];
+    float outp_v4 [[user(locn4)]];
+    float outp_v5 [[user(locn5)]];
+    float outp_v6 [[user(locn6)]];
     float4 gl_Position [[position]];
 };
 
@@ -43,13 +43,13 @@ vertex main0_out main0(main0_in in [[stage_in]])
     outp.v5 = in.Position.y;
     outp.v6 = in.Position.x * in.Position.w;
     out.gl_Position = in.Position;
-    out.Output_v0 = outp.v0;
-    out.Output_v1 = outp.v1;
-    out.Output_v2 = outp.v2;
-    out.Output_v3 = outp.v3;
-    out.Output_v4 = outp.v4;
-    out.Output_v5 = outp.v5;
-    out.Output_v6 = outp.v6;
+    out.outp_v0 = outp.v0;
+    out.outp_v1 = outp.v1;
+    out.outp_v2 = outp.v2;
+    out.outp_v3 = outp.v3;
+    out.outp_v4 = outp.v4;
+    out.outp_v5 = outp.v5;
+    out.outp_v6 = outp.v6;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/vert/invariant.msl21.vert b/reference/opt/shaders-msl/vert/invariant.msl21.vert
index 73b0ec7449d..b7f703b2e66 100644
--- a/reference/opt/shaders-msl/vert/invariant.msl21.vert
+++ b/reference/opt/shaders-msl/vert/invariant.msl21.vert
@@ -18,8 +18,7 @@ struct main0_in
 vertex main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    float4 _20 = in.vInput1 * in.vInput2;
-    float4 _21 = in.vInput0 + _20;
+    float4 _21 = fma(in.vInput1, in.vInput2, in.vInput0);
     out.gl_Position = _21;
     return out;
 }
diff --git a/reference/opt/shaders-msl/vert/leaf-function.for-tess.vert b/reference/opt/shaders-msl/vert/leaf-function.for-tess.vert
new file mode 100644
index 00000000000..e3d2d1fac02
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/leaf-function.for-tess.vert
@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4x4 uMVP;
+};
+
+struct main0_out
+{
+    float3 vNormal;
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    out.gl_Position = _18.uMVP * in.aVertex;
+    out.vNormal = in.aNormal;
+}
+
diff --git a/reference/opt/shaders-msl/vert/no-contraction.vert b/reference/opt/shaders-msl/vert/no-contraction.vert
new file mode 100644
index 00000000000..f4df5506ae4
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/no-contraction.vert
@@ -0,0 +1,88 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T>
+[[clang::optnone]] T spvFMul(T l, T r)
+{
+    return fma(l, r, T(0));
+}
+
+template<typename T, int Cols, int Rows>
+[[clang::optnone]] vec<T, Cols> spvFMulVectorMatrix(vec<T, Rows> v, matrix<T, Cols, Rows> m)
+{
+    vec<T, Cols> res = vec<T, Cols>(0);
+    for (uint i = Rows; i > 0; --i)
+    {
+        vec<T, Cols> tmp(0);
+        for (uint j = 0; j < Cols; ++j)
+        {
+            tmp[j] = m[j][i - 1];
+        }
+        res = fma(tmp, vec<T, Cols>(v[i - 1]), res);
+    }
+    return res;
+}
+
+template<typename T, int Cols, int Rows>
+[[clang::optnone]] vec<T, Rows> spvFMulMatrixVector(matrix<T, Cols, Rows> m, vec<T, Cols> v)
+{
+    vec<T, Rows> res = vec<T, Rows>(0);
+    for (uint i = Cols; i > 0; --i)
+    {
+        res = fma(m[i - 1], vec<T, Rows>(v[i - 1]), res);
+    }
+    return res;
+}
+
+template<typename T, int LCols, int LRows, int RCols, int RRows>
+[[clang::optnone]] matrix<T, RCols, LRows> spvFMulMatrixMatrix(matrix<T, LCols, LRows> l, matrix<T, RCols, RRows> r)
+{
+    matrix<T, RCols, LRows> res;
+    for (uint i = 0; i < RCols; i++)
+    {
+        vec<T, RCols> tmp(0);
+        for (uint j = 0; j < LCols; j++)
+        {
+            tmp = fma(vec<T, RCols>(r[i][j]), l[j], tmp);
+        }
+        res[i] = tmp;
+    }
+    return res;
+}
+
+template<typename T>
+[[clang::optnone]] T spvFAdd(T l, T r)
+{
+    return fma(T(1), l, r);
+}
+
+template<typename T>
+[[clang::optnone]] T spvFSub(T l, T r)
+{
+    return fma(T(-1), r, l);
+}
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vA [[attribute(0)]];
+    float4 vB [[attribute(1)]];
+    float4 vC [[attribute(2)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float4 _15 = spvFMul(in.vA, in.vB);
+    out.gl_Position = spvFAdd(spvFAdd(spvFAdd(_15, spvFAdd(in.vA, in.vB)), spvFSub(in.vA, in.vB)), spvFAdd(_15, in.vC));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/no-disable-vertex-out.frag-output.vert b/reference/opt/shaders-msl/vert/no-disable-vertex-out.frag-output.vert
new file mode 100644
index 00000000000..14cc94937c0
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/no-disable-vertex-out.frag-output.vert
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct buf
+{
+    float4x4 MVP;
+    float4 position[36];
+    float4 attr[36];
+};
+
+struct main0_out
+{
+    float4 texcoord [[user(locn0)]];
+    float3 frag_pos [[user(locn1)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0(constant buf& ubuf [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    out.texcoord = ubuf.attr[int(gl_VertexIndex)];
+    out.gl_Position = ubuf.MVP * ubuf.position[int(gl_VertexIndex)];
+    out.frag_pos = out.gl_Position.xyz;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/no_stage_out.for-tess.vert b/reference/opt/shaders-msl/vert/no_stage_out.for-tess.vert
new file mode 100644
index 00000000000..984e83260aa
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/no_stage_out.for-tess.vert
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _RESERVED_IDENTIFIER_FIXUP_10_12
+{
+    uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024];
+};
+
+struct main0_in
+{
+    uint4 _RESERVED_IDENTIFIER_FIXUP_19 [[attribute(0)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], device _RESERVED_IDENTIFIER_FIXUP_10_12& _RESERVED_IDENTIFIER_FIXUP_12 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], uint3 spvDispatchBase [[grid_origin]])
+{
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    uint gl_VertexIndex = gl_GlobalInvocationID.x + spvDispatchBase.x;
+    _RESERVED_IDENTIFIER_FIXUP_12._RESERVED_IDENTIFIER_FIXUP_m0[int(gl_VertexIndex)] = in._RESERVED_IDENTIFIER_FIXUP_19;
+}
+
diff --git a/reference/opt/shaders-msl/vert/no_stage_out.vert b/reference/opt/shaders-msl/vert/no_stage_out.vert
index 28098ee88e6..e804da67535 100644
--- a/reference/opt/shaders-msl/vert/no_stage_out.vert
+++ b/reference/opt/shaders-msl/vert/no_stage_out.vert
@@ -3,18 +3,18 @@
 
 using namespace metal;
 
-struct _10
+struct _RESERVED_IDENTIFIER_FIXUP_10_12
 {
-    uint4 _m0[1024];
+    uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024];
 };
 
 struct main0_in
 {
-    uint4 m_19 [[attribute(0)]];
+    uint4 _RESERVED_IDENTIFIER_FIXUP_19 [[attribute(0)]];
 };
 
-vertex void main0(main0_in in [[stage_in]], device _10& _12 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
+vertex void main0(main0_in in [[stage_in]], device _RESERVED_IDENTIFIER_FIXUP_10_12& _RESERVED_IDENTIFIER_FIXUP_12 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
 {
-    _12._m0[gl_VertexIndex] = in.m_19;
+    _RESERVED_IDENTIFIER_FIXUP_12._RESERVED_IDENTIFIER_FIXUP_m0[int(gl_VertexIndex)] = in._RESERVED_IDENTIFIER_FIXUP_19;
 }
 
diff --git a/reference/opt/shaders-msl/vert/no_stage_out.write_buff.vert b/reference/opt/shaders-msl/vert/no_stage_out.write_buff.vert
index d5d31f44308..296293aaea1 100644
--- a/reference/opt/shaders-msl/vert/no_stage_out.write_buff.vert
+++ b/reference/opt/shaders-msl/vert/no_stage_out.write_buff.vert
@@ -3,14 +3,14 @@
 
 using namespace metal;
 
-struct _35
+struct _RESERVED_IDENTIFIER_FIXUP_33_35
 {
-    uint4 _m0[1024];
+    uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024];
 };
 
-struct _40
+struct _RESERVED_IDENTIFIER_FIXUP_38_40
 {
-    uint4 _m0[1024];
+    uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024];
 };
 
 struct main0_out
@@ -20,16 +20,16 @@ struct main0_out
 
 struct main0_in
 {
-    float4 m_17 [[attribute(0)]];
+    float4 _RESERVED_IDENTIFIER_FIXUP_14 [[attribute(0)]];
 };
 
-vertex void main0(main0_in in [[stage_in]], device _35& _37 [[buffer(0)]], constant _40& _42 [[buffer(1)]])
+vertex void main0(main0_in in [[stage_in]], device _RESERVED_IDENTIFIER_FIXUP_33_35& _RESERVED_IDENTIFIER_FIXUP_35 [[buffer(0)]], constant _RESERVED_IDENTIFIER_FIXUP_38_40& _RESERVED_IDENTIFIER_FIXUP_40 [[buffer(1)]])
 {
     main0_out out = {};
-    out.gl_Position = in.m_17;
+    out.gl_Position = in._RESERVED_IDENTIFIER_FIXUP_14;
     for (int _52 = 0; _52 < 1024; )
     {
-        _37._m0[_52] = _42._m0[_52];
+        _RESERVED_IDENTIFIER_FIXUP_35._RESERVED_IDENTIFIER_FIXUP_m0[_52] = _RESERVED_IDENTIFIER_FIXUP_40._RESERVED_IDENTIFIER_FIXUP_m0[_52];
         _52++;
         continue;
     }
diff --git a/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert b/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert
index ca4d6a5b92f..92fbf555d32 100644
--- a/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert
+++ b/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert
@@ -6,9 +6,9 @@
 
 using namespace metal;
 
-struct _23
+struct _RESERVED_IDENTIFIER_FIXUP_19_21
 {
-    uint _m0;
+    uint _RESERVED_IDENTIFIER_FIXUP_m0;
 };
 
 struct main0_out
@@ -18,13 +18,13 @@ struct main0_out
 
 struct main0_in
 {
-    float4 m_17 [[attribute(0)]];
+    float4 _RESERVED_IDENTIFIER_FIXUP_14 [[attribute(0)]];
 };
 
-vertex void main0(main0_in in [[stage_in]], device _23& _25 [[buffer(0)]])
+vertex void main0(main0_in in [[stage_in]], volatile device _RESERVED_IDENTIFIER_FIXUP_19_21& _RESERVED_IDENTIFIER_FIXUP_21 [[buffer(0)]])
 {
     main0_out out = {};
-    out.gl_Position = in.m_17;
-    uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_25._m0, 1u, memory_order_relaxed);
+    out.gl_Position = in._RESERVED_IDENTIFIER_FIXUP_14;
+    uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_RESERVED_IDENTIFIER_FIXUP_21._RESERVED_IDENTIFIER_FIXUP_m0, 1u, memory_order_relaxed);
 }
 
diff --git a/reference/opt/shaders-msl/vert/no_stage_out.write_tex.vert b/reference/opt/shaders-msl/vert/no_stage_out.write_tex.vert
index ddad934ee00..9d87efe1420 100644
--- a/reference/opt/shaders-msl/vert/no_stage_out.write_tex.vert
+++ b/reference/opt/shaders-msl/vert/no_stage_out.write_tex.vert
@@ -10,16 +10,16 @@ struct main0_out
 
 struct main0_in
 {
-    float4 m_17 [[attribute(0)]];
+    float4 _RESERVED_IDENTIFIER_FIXUP_14 [[attribute(0)]];
 };
 
-vertex void main0(main0_in in [[stage_in]], texture1d<uint, access::write> _34 [[texture(0)]], texture1d<uint> _37 [[texture(1)]])
+vertex void main0(main0_in in [[stage_in]], texture1d<uint, access::write> _RESERVED_IDENTIFIER_FIXUP_32 [[texture(0)]], texture1d<uint> _RESERVED_IDENTIFIER_FIXUP_35 [[texture(1)]])
 {
     main0_out out = {};
-    out.gl_Position = in.m_17;
+    out.gl_Position = in._RESERVED_IDENTIFIER_FIXUP_14;
     for (int _45 = 0; _45 < 128; )
     {
-        _34.write(_37.read(uint(_45)), uint(_45));
+        _RESERVED_IDENTIFIER_FIXUP_32.write(_RESERVED_IDENTIFIER_FIXUP_35.read(uint(_45)), uint(_45));
         _45++;
         continue;
     }
diff --git a/reference/opt/shaders-msl/vert/out-block-with-nested-struct-array.vert b/reference/opt/shaders-msl/vert/out-block-with-nested-struct-array.vert
new file mode 100644
index 00000000000..cabcfcb521d
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/out-block-with-nested-struct-array.vert
@@ -0,0 +1,88 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct t21
+{
+    float4 m0;
+    float4 m1;
+};
+
+struct t24
+{
+    spvUnsafeArray<t21, 3> m0;
+};
+
+struct main0_out
+{
+    float4 v26_m0_0_m0 [[user(locn0)]];
+    float4 v26_m0_0_m1 [[user(locn1)]];
+    float4 v26_m0_1_m0 [[user(locn2)]];
+    float4 v26_m0_1_m1 [[user(locn3)]];
+    float4 v26_m0_2_m0 [[user(locn4)]];
+    float4 v26_m0_2_m1 [[user(locn5)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 v17 [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    t24 v26 = {};
+    out.gl_Position = in.v17;
+    v26.m0[1].m1 = float4(-4.0, -9.0, 3.0, 7.0);
+    out.v26_m0_0_m0 = v26.m0[0].m0;
+    out.v26_m0_0_m1 = v26.m0[0].m1;
+    out.v26_m0_1_m0 = v26.m0[1].m0;
+    out.v26_m0_1_m1 = v26.m0[1].m1;
+    out.v26_m0_2_m0 = v26.m0[2].m0;
+    out.v26_m0_2_m1 = v26.m0[2].m1;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/out-block-with-struct-array.vert b/reference/opt/shaders-msl/vert/out-block-with-struct-array.vert
new file mode 100644
index 00000000000..61c7c18b54c
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/out-block-with-struct-array.vert
@@ -0,0 +1,83 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct t21
+{
+    float m0;
+    float4 m1;
+};
+
+struct main0_out
+{
+    float v25_0_m0 [[user(locn0)]];
+    float4 v25_0_m1 [[user(locn1)]];
+    float v25_1_m0 [[user(locn2)]];
+    float4 v25_1_m1 [[user(locn3)]];
+    float v25_2_m0 [[user(locn4)]];
+    float4 v25_2_m1 [[user(locn5)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 v17 [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<t21, 3> v25 = {};
+    out.gl_Position = in.v17;
+    v25[2].m1 = float4(-4.0, -9.0, 3.0, 7.0);
+    out.v25_0_m0 = v25[0].m0;
+    out.v25_0_m1 = v25[0].m1;
+    out.v25_1_m0 = v25[1].m0;
+    out.v25_1_m1 = v25[1].m1;
+    out.v25_2_m0 = v25[2].m0;
+    out.v25_2_m1 = v25[2].m1;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/out_block.vert b/reference/opt/shaders-msl/vert/out_block.vert
index 45b897013b1..909a059bd2c 100644
--- a/reference/opt/shaders-msl/vert/out_block.vert
+++ b/reference/opt/shaders-msl/vert/out_block.vert
@@ -16,8 +16,8 @@ struct VertexOut
 
 struct main0_out
 {
-    float4 VertexOut_color [[user(locn2)]];
-    float4 VertexOut_color2 [[user(locn3)]];
+    float4 outputs_color [[user(locn2)]];
+    float4 outputs_color2 [[user(locn3)]];
     float4 gl_Position [[position]];
 };
 
@@ -34,8 +34,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant Transform& block [[buf
     out.gl_Position = block.transform * float4(in.position, 1.0);
     outputs.color = in.color;
     outputs.color2 = in.color + float4(1.0);
-    out.VertexOut_color = outputs.color;
-    out.VertexOut_color2 = outputs.color2;
+    out.outputs_color = outputs.color;
+    out.outputs_color2 = outputs.color2;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/vert/packed-bool-to-uint.vert b/reference/opt/shaders-msl/vert/packed-bool-to-uint.vert
new file mode 100644
index 00000000000..6cc55204848
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/packed-bool-to-uint.vert
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Struct
+{
+    uint flags[1];
+};
+
+struct defaultUniformsVS
+{
+    Struct flags;
+    float4 uquad[4];
+    float4x4 umatrix;
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 a_position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _24 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    out.gl_Position = _24.umatrix * float4(_24.uquad[int(gl_VertexIndex)].x, _24.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w);
+    if (_24.flags.flags[0] != 0u)
+    {
+        out.gl_Position.z = 0.0;
+    }
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/packed-bool2-to-packed_uint2.vert b/reference/opt/shaders-msl/vert/packed-bool2-to-packed_uint2.vert
new file mode 100644
index 00000000000..4c46aaeb4ea
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/packed-bool2-to-packed_uint2.vert
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Struct
+{
+    uint2 flags[1];
+};
+
+struct defaultUniformsVS
+{
+    Struct flags;
+    float4 uquad[4];
+    float4x4 umatrix;
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 a_position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _25 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    out.gl_Position = _25.umatrix * float4(_25.uquad[int(gl_VertexIndex)].x, _25.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w);
+    if (_25.flags.flags[0].x != 0u)
+    {
+        out.gl_Position.z = 0.0;
+    }
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/packed_matrix.vert b/reference/opt/shaders-msl/vert/packed_matrix.vert
index 44db8203c7e..2e6f9680dbe 100644
--- a/reference/opt/shaders-msl/vert/packed_matrix.vert
+++ b/reference/opt/shaders-msl/vert/packed_matrix.vert
@@ -3,48 +3,45 @@
 
 using namespace metal;
 
-typedef packed_float4 packed_rm_float4x3[3];
-
-struct _15
+struct _RESERVED_IDENTIFIER_FIXUP_1365_18812
 {
-    packed_rm_float4x3 _m0;
-    packed_rm_float4x3 _m1;
+    float3x4 _RESERVED_IDENTIFIER_FIXUP_m0;
+    float3x4 _RESERVED_IDENTIFIER_FIXUP_m1;
 };
 
-struct _42
+struct _RESERVED_IDENTIFIER_FIXUP_1126_22044
 {
-    float4x4 _m0;
-    float4x4 _m1;
-    float _m2;
+    float4x4 _RESERVED_IDENTIFIER_FIXUP_m0;
+    float4x4 _RESERVED_IDENTIFIER_FIXUP_m1;
+    float _RESERVED_IDENTIFIER_FIXUP_m9;
     char _m3_pad[12];
-    packed_float3 _m3;
-    float _m4;
-    packed_float3 _m5;
-    float _m6;
-    float _m7;
-    float _m8;
-    float2 _m9;
+    packed_float3 _RESERVED_IDENTIFIER_FIXUP_m10;
+    float _RESERVED_IDENTIFIER_FIXUP_m11;
+    packed_float3 _RESERVED_IDENTIFIER_FIXUP_m12;
+    float _RESERVED_IDENTIFIER_FIXUP_m17;
+    float _RESERVED_IDENTIFIER_FIXUP_m18;
+    float _RESERVED_IDENTIFIER_FIXUP_m19;
+    float2 _RESERVED_IDENTIFIER_FIXUP_m20;
 };
 
 struct main0_out
 {
-    float3 m_72 [[user(locn0)]];
+    float3 _RESERVED_IDENTIFIER_FIXUP_3976 [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
 struct main0_in
 {
-    float4 m_25 [[attribute(0)]];
+    float4 _RESERVED_IDENTIFIER_FIXUP_5275 [[attribute(0)]];
 };
 
-vertex main0_out main0(main0_in in [[stage_in]], constant _15& _17 [[buffer(0)]], constant _42& _44 [[buffer(1)]])
+vertex main0_out main0(main0_in in [[stage_in]], constant _RESERVED_IDENTIFIER_FIXUP_1365_18812& _RESERVED_IDENTIFIER_FIXUP_18812 [[buffer(0)]], constant _RESERVED_IDENTIFIER_FIXUP_1126_22044& _RESERVED_IDENTIFIER_FIXUP_22044 [[buffer(1)]])
 {
     main0_out out = {};
-    float4 _70 = _44._m0 * float4(float3(_44._m3) + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0);
-    out.m_72 = normalize(float4(in.m_25.xyz, 0.0) * float3x4(float4(_17._m1[0]), float4(_17._m1[1]), float4(_17._m1[2])));
-    float4 _94 = _70;
-    _94.y = -_70.y;
-    out.gl_Position = _94;
+    float4 _70 = _RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m0 * float4(float3(_RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m10) + (in._RESERVED_IDENTIFIER_FIXUP_5275.xyz * (_RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m17 + _RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m18)), 1.0);
+    out._RESERVED_IDENTIFIER_FIXUP_3976 = fast::normalize(float4(in._RESERVED_IDENTIFIER_FIXUP_5275.xyz, 0.0) * _RESERVED_IDENTIFIER_FIXUP_18812._RESERVED_IDENTIFIER_FIXUP_m1);
+    _70.y = -_70.y;
+    out.gl_Position = _70;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/vert/read-from-row-major-array.vert b/reference/opt/shaders-msl/vert/read-from-row-major-array.vert
index 9b85a25956a..ec3e7b72481 100644
--- a/reference/opt/shaders-msl/vert/read-from-row-major-array.vert
+++ b/reference/opt/shaders-msl/vert/read-from-row-major-array.vert
@@ -1,5 +1,3 @@
-#pragma clang diagnostic ignored "-Wmissing-prototypes"
-
 #include <metal_stdlib>
 #include <simd/simd.h>
 
@@ -7,7 +5,7 @@ using namespace metal;
 
 struct Block
 {
-    float2x3 var[3][4];
+    float3x4 var[3][4];
 };
 
 struct main0_out
@@ -21,17 +19,11 @@ struct main0_in
     float4 a_position [[attribute(0)]];
 };
 
-// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.
-float2x3 spvConvertFromRowMajor2x3(float2x3 m)
-{
-    return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));
-}
-
 vertex main0_out main0(main0_in in [[stage_in]], constant Block& _104 [[buffer(0)]])
 {
     main0_out out = {};
     out.gl_Position = in.a_position;
-    out.v_vtxResult = ((float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[0].x - 2.0) < 0.0500000007450580596923828125) * float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[0].y - 6.0) < 0.0500000007450580596923828125)) * float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[0].z - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[1].x) < 0.0500000007450580596923828125) * float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[1].y - 5.0) < 0.0500000007450580596923828125)) * float(abs(spvConvertFromRowMajor2x3(_104.var[0][0])[1].z - 5.0) < 0.0500000007450580596923828125));
+    out.v_vtxResult = ((float(abs(_104.var[0][0][0][0] - 2.0) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][1][0] - 6.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][2][0] - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(_104.var[0][0][0][1]) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][1][1] - 5.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][2][1] - 5.0) < 0.0500000007450580596923828125));
     return out;
 }
 
diff --git a/reference/shaders-msl/vert/layer.msl11.invalid.vert b/reference/opt/shaders-msl/vert/return-array.force-native-array.vert
similarity index 61%
rename from reference/shaders-msl/vert/layer.msl11.invalid.vert
rename to reference/opt/shaders-msl/vert/return-array.force-native-array.vert
index b6f39dca3e9..ce13349a0ff 100644
--- a/reference/shaders-msl/vert/layer.msl11.invalid.vert
+++ b/reference/opt/shaders-msl/vert/return-array.force-native-array.vert
@@ -6,19 +6,17 @@ using namespace metal;
 struct main0_out
 {
     float4 gl_Position [[position]];
-    uint gl_Layer [[render_target_array_index]];
 };
 
 struct main0_in
 {
-    float4 coord [[attribute(0)]];
+    float4 vInput1 [[attribute(1)]];
 };
 
 vertex main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    out.gl_Position = in.coord;
-    out.gl_Layer = uint(int(in.coord.z));
+    out.gl_Position = float4(10.0) + in.vInput1;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/vert/sign-int-types.vert b/reference/opt/shaders-msl/vert/sign-int-types.vert
index 2f518b12911..f5f647d4589 100644
--- a/reference/opt/shaders-msl/vert/sign-int-types.vert
+++ b/reference/opt/shaders-msl/vert/sign-int-types.vert
@@ -5,6 +5,13 @@
 
 using namespace metal;
 
+// Implementation of the GLSL sign() function for integer types
+template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>
+inline T sign(T x)
+{
+    return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));
+}
+
 struct UBO
 {
     float4x4 uMVP;
@@ -36,13 +43,6 @@ struct main0_in
     float4 aVertex [[attribute(0)]];
 };
 
-// Implementation of the GLSL sign() function for integer types
-template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>
-T sign(T x)
-{
-    return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));
-}
-
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/vert/signedness-mismatch.shader-inputs.vert b/reference/opt/shaders-msl/vert/signedness-mismatch.shader-inputs.vert
new file mode 100644
index 00000000000..56e00199cb1
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/signedness-mismatch.shader-inputs.vert
@@ -0,0 +1,74 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    ushort2 a [[attribute(0)]];
+    uint3 b [[attribute(1)]];
+    ushort c_0 [[attribute(2)]];
+    ushort c_1 [[attribute(3)]];
+    uint4 d_0 [[attribute(4)]];
+    uint4 d_1 [[attribute(5)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<ushort, 2> c = {};
+    spvUnsafeArray<uint4, 2> d = {};
+    c[0] = in.c_0;
+    c[1] = in.c_1;
+    d[0] = in.d_0;
+    d[1] = in.d_1;
+    out.gl_Position = float4(float(int(short(in.a.x))), float(int(in.b.x)), float(uint(c[1])), float(d[0].w));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/texture_buffer.vert b/reference/opt/shaders-msl/vert/texture_buffer.vert
index ee3956fad84..9d8b5c49f02 100644
--- a/reference/opt/shaders-msl/vert/texture_buffer.vert
+++ b/reference/opt/shaders-msl/vert/texture_buffer.vert
@@ -5,17 +5,18 @@
 
 using namespace metal;
 
-struct main0_out
-{
-    float4 gl_Position [[position]];
-};
-
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
 vertex main0_out main0(texture2d<float> uSamp [[texture(0)]], texture2d<float> uSampo [[texture(1)]])
 {
     main0_out out = {};
diff --git a/reference/opt/shaders-msl/vert/uniform-struct-out-of-order-offests.vert b/reference/opt/shaders-msl/vert/uniform-struct-out-of-order-offests.vert
new file mode 100644
index 00000000000..c69775e3262
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/uniform-struct-out-of-order-offests.vert
@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct data_u_t
+{
+    int4 m1[3];
+    uint m3;
+    uint3 m2;
+    int4 m0[8];
+};
+
+struct main0_out
+{
+    float foo [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vtx_posn [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant data_u_t& data_u [[buffer(0)]])
+{
+    main0_out out = {};
+    out.gl_Position = in.vtx_posn;
+    out.foo = float((uint3(data_u.m1[1].xyz) + data_u.m2).y * uint(data_u.m0[4].x));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/uniform-struct-packing-nested.vert b/reference/opt/shaders-msl/vert/uniform-struct-packing-nested.vert
new file mode 100644
index 00000000000..c305623256a
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/uniform-struct-packing-nested.vert
@@ -0,0 +1,52 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef packed_float4 packed_rm_float4x4[4];
+
+struct s0
+{
+    float3x4 m0;
+    packed_int4 m1;
+    packed_rm_float4x4 m2;
+    packed_uint2 m3;
+};
+
+struct s1
+{
+    float4x4 m0;
+    int m1;
+    char _m2_pad[12];
+    packed_uint3 m2;
+    s0 m3;
+};
+
+struct data_u_t
+{
+    float4 m1[5];
+    float2x4 m3;
+    int4 m4;
+    s1 m2;
+    float3x4 m0;
+};
+
+struct main0_out
+{
+    float foo [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vtx_posn [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant data_u_t& data_u [[buffer(0)]])
+{
+    main0_out out = {};
+    out.gl_Position = in.vtx_posn;
+    out.foo = (((data_u.m1[3].y + float(data_u.m4.z)) * data_u.m0[2][1]) * data_u.m2.m0[3][2]) * data_u.m2.m3.m2[3][3];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vert/unused-position.vert b/reference/opt/shaders-msl/vert/unused-position.vert
new file mode 100644
index 00000000000..7dc4672139c
--- /dev/null
+++ b/reference/opt/shaders-msl/vert/unused-position.vert
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    float gl_PointSize [[point_size]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    out.gl_PointSize = 1.0;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/reference/opt/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp
deleted file mode 100644
index 278a8bb2ee8..00000000000
--- a/reference/opt/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp
+++ /dev/null
@@ -1,171 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-typedef packed_float2 packed_float2x2[2];
-typedef packed_float2 packed_rm_float2x3[3];
-typedef packed_float3 packed_float2x3[2];
-typedef packed_float3 packed_rm_float3x2[2];
-
-struct S0
-{
-    packed_float2 a[1];
-    float b;
-};
-
-struct S1
-{
-    packed_float3 a;
-    float b;
-};
-
-struct S2
-{
-    packed_float3 a[1];
-    float b;
-};
-
-struct S3
-{
-    packed_float2 a;
-    float b;
-};
-
-struct S4
-{
-    float2 c;
-};
-
-struct Content
-{
-    S0 m0s[1];
-    S1 m1s[1];
-    S2 m2s[1];
-    S0 m0;
-    S1 m1;
-    S2 m2;
-    S3 m3;
-    float m4;
-    S4 m3s[8];
-};
-
-struct SSBO1
-{
-    Content content;
-    Content content1[2];
-    Content content2;
-    float2x2 m0;
-    float2x2 m1;
-    packed_float2x3 m2[4];
-    float3x2 m3;
-    float2x2 m4;
-    float2x2 m5[9];
-    packed_rm_float2x3 m6[4][2];
-    float3x2 m7;
-    float array[1];
-};
-
-struct S0_1
-{
-    float4 a[1];
-    float b;
-};
-
-struct S1_1
-{
-    packed_float3 a;
-    float b;
-};
-
-struct S2_1
-{
-    float3 a[1];
-    float b;
-};
-
-struct S3_1
-{
-    float2 a;
-    float b;
-};
-
-struct S4_1
-{
-    float2 c;
-};
-
-struct Content_1
-{
-    S0_1 m0s[1];
-    S1_1 m1s[1];
-    S2_1 m2s[1];
-    S0_1 m0;
-    S1_1 m1;
-    S2_1 m2;
-    S3_1 m3;
-    float m4;
-    char _m8_pad[12];
-    /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8];
-};
-
-struct SSBO0
-{
-    Content_1 content;
-    Content_1 content1[2];
-    Content_1 content2;
-    float2x2 m0;
-    char _m4_pad[16];
-    float2x2 m1;
-    char _m5_pad[16];
-    float2x3 m2[4];
-    float3x2 m3;
-    char _m7_pad[24];
-    float2x2 m4;
-    char _m8_pad[16];
-    float2x2 m5[9];
-    float2x3 m6[4][2];
-    float3x2 m7;
-    float4 array[1];
-};
-
-struct SSBO2
-{
-    float m0;
-    packed_float2x2 m1;
-    packed_rm_float3x2 m2;
-};
-
-kernel void main0(device SSBO1& ssbo_scalar [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]], device SSBO2& ssbo_scalar2 [[buffer(2)]])
-{
-    ssbo_scalar.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0].xy;
-    ssbo_scalar.content.m0s[0].b = ssbo_140.content.m0s[0].b;
-    ssbo_scalar.content.m1s[0].a = float3(ssbo_140.content.m1s[0].a);
-    ssbo_scalar.content.m1s[0].b = ssbo_140.content.m1s[0].b;
-    ssbo_scalar.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0];
-    ssbo_scalar.content.m2s[0].b = ssbo_140.content.m2s[0].b;
-    ssbo_scalar.content.m0.a[0] = ssbo_140.content.m0.a[0].xy;
-    ssbo_scalar.content.m0.b = ssbo_140.content.m0.b;
-    ssbo_scalar.content.m1.a = float3(ssbo_140.content.m1.a);
-    ssbo_scalar.content.m1.b = ssbo_140.content.m1.b;
-    ssbo_scalar.content.m2.a[0] = ssbo_140.content.m2.a[0];
-    ssbo_scalar.content.m2.b = ssbo_140.content.m2.b;
-    ssbo_scalar.content.m3.a = ssbo_140.content.m3.a;
-    ssbo_scalar.content.m3.b = ssbo_140.content.m3.b;
-    ssbo_scalar.content.m4 = ssbo_140.content.m4;
-    ssbo_scalar.content.m3s[0].c = ssbo_140.content.m3s[0].c;
-    ssbo_scalar.content.m3s[1].c = ssbo_140.content.m3s[1].c;
-    ssbo_scalar.content.m3s[2].c = ssbo_140.content.m3s[2].c;
-    ssbo_scalar.content.m3s[3].c = ssbo_140.content.m3s[3].c;
-    ssbo_scalar.content.m3s[4].c = ssbo_140.content.m3s[4].c;
-    ssbo_scalar.content.m3s[5].c = ssbo_140.content.m3s[5].c;
-    ssbo_scalar.content.m3s[6].c = ssbo_140.content.m3s[6].c;
-    ssbo_scalar.content.m3s[7].c = ssbo_140.content.m3s[7].c;
-    ssbo_scalar.content.m1.a = float2x3(float3(ssbo_scalar.m2[1][0]), float3(ssbo_scalar.m2[1][1])) * float2(ssbo_scalar.content.m0.a[0]);
-    ssbo_scalar.m0 = float2x2(float2(ssbo_scalar2.m1[0]), float2(ssbo_scalar2.m1[1]));
-    ssbo_scalar2.m1[0] = transpose(ssbo_scalar.m4)[0];
-    ssbo_scalar2.m1[1] = transpose(ssbo_scalar.m4)[1];
-    ssbo_scalar2.m2[0] = spvConvertFromRowMajor3x2(ssbo_scalar.m3)[0];
-    ssbo_scalar2.m2[1] = spvConvertFromRowMajor3x2(ssbo_scalar.m3)[1];
-}
-
diff --git a/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp b/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
deleted file mode 100644
index 948806db4ef..00000000000
--- a/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
+++ /dev/null
@@ -1,92 +0,0 @@
-#pragma clang diagnostic ignored "-Wmissing-prototypes"
-
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct SSBO
-{
-    float FragColor;
-};
-
-inline uint4 spvSubgroupBallot(bool value)
-{
-    simd_vote vote = simd_ballot(value);
-    // simd_ballot() returns a 64-bit integer-like object, but
-    // SPIR-V callers expect a uint4. We must convert.
-    // FIXME: This won't include higher bits if Apple ever supports
-    // 128 lanes in an SIMD-group.
-    return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
-}
-
-inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
-{
-    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
-}
-
-inline uint spvSubgroupBallotFindLSB(uint4 ballot)
-{
-    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
-}
-
-inline uint spvSubgroupBallotFindMSB(uint4 ballot)
-{
-    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
-}
-
-inline uint spvSubgroupBallotBitCount(uint4 ballot)
-{
-    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
-}
-
-inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
-{
-    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
-    return spvSubgroupBallotBitCount(ballot & mask);
-}
-
-inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
-{
-    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
-    return spvSubgroupBallotBitCount(ballot & mask);
-}
-
-template<typename T>
-inline bool spvSubgroupAllEqual(T value)
-{
-    return simd_all(value == simd_broadcast_first(value));
-}
-
-template<>
-inline bool spvSubgroupAllEqual(bool value)
-{
-    return simd_all(value) || !simd_any(value);
-}
-
-kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
-{
-    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
-    uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
-    uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
-    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
-    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
-    _9.FragColor = float(gl_NumSubgroups);
-    _9.FragColor = float(gl_SubgroupID);
-    _9.FragColor = float(gl_SubgroupSize);
-    _9.FragColor = float(gl_SubgroupInvocationID);
-    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
-    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
-    simdgroup_barrier(mem_flags::mem_device);
-    simdgroup_barrier(mem_flags::mem_threadgroup);
-    simdgroup_barrier(mem_flags::mem_texture);
-    _9.FragColor = float4(gl_SubgroupEqMask).x;
-    _9.FragColor = float4(gl_SubgroupGeMask).x;
-    _9.FragColor = float4(gl_SubgroupGtMask).x;
-    _9.FragColor = float4(gl_SubgroupLeMask).x;
-    _9.FragColor = float4(gl_SubgroupLtMask).x;
-    uint4 _83 = spvSubgroupBallot(true);
-    float4 _165 = simd_prefix_inclusive_product(simd_product(float4(20.0)));
-    int4 _167 = simd_prefix_inclusive_product(simd_product(int4(20)));
-}
-
diff --git a/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp b/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp
deleted file mode 100644
index 6d32de695ac..00000000000
--- a/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct SSBO
-{
-    float FragColor;
-};
-
-kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]])
-{
-    _9.FragColor = float(gl_NumSubgroups);
-    _9.FragColor = float(gl_SubgroupID);
-    _9.FragColor = float(gl_SubgroupSize);
-    _9.FragColor = float(gl_SubgroupInvocationID);
-    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
-    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
-    simdgroup_barrier(mem_flags::mem_device);
-    simdgroup_barrier(mem_flags::mem_threadgroup);
-    simdgroup_barrier(mem_flags::mem_texture);
-}
-
diff --git a/reference/opt/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag
new file mode 100644
index 00000000000..f0935f6dcf4
--- /dev/null
+++ b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag
@@ -0,0 +1,73 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+    float2 vTex_0 [[user(locn1)]];
+    float2 vTex_1 [[user(locn2)]];
+    float2 vTex_2 [[user(locn3)]];
+    float2 vTex_3 [[user(locn4)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float2, 4> vTex = {};
+    vTex[0] = in.vTex_0;
+    vTex[1] = in.vTex_1;
+    vTex[2] = in.vTex_2;
+    vTex[3] = in.vTex_3;
+    const uint gl_ViewIndex = spvViewMask[0];
+    out.FragColor = in.vColor * uTex.sample(uTexSmplr, vTex[int(gl_ViewIndex)]);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
index 23c554940b8..67895e3e92c 100644
--- a/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
+++ b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float4 FragColor [[color(0)]];
@@ -20,7 +61,7 @@ struct main0_in
 fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]])
 {
     main0_out out = {};
-    float2 vTex[4] = {};
+    spvUnsafeArray<float2, 4> vTex = {};
     vTex[0] = in.vTex_0;
     vTex[1] = in.vTex_1;
     vTex[2] = in.vTex_2;
diff --git a/reference/opt/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag
new file mode 100644
index 00000000000..274cea2de15
--- /dev/null
+++ b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    bool gl_HelperInvocation = {};
+    gl_HelperInvocation = simd_is_helper_thread();
+    bool _15 = gl_HelperInvocation;
+    gl_HelperInvocation = true, discard_fragment();
+    if (!_15)
+    {
+        out.FragColor = float4(1.0, 0.0, 0.0, 1.0);
+    }
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag
new file mode 100644
index 00000000000..e2b2a85712a
--- /dev/null
+++ b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag
@@ -0,0 +1,13 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+fragment void main0()
+{
+    bool gl_HelperInvocation = {};
+    gl_HelperInvocation = simd_is_helper_thread();
+    gl_HelperInvocation = true, discard_fragment();
+    bool _19 = gl_HelperInvocation;
+}
+
diff --git a/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag
new file mode 100644
index 00000000000..82eb282f17c
--- /dev/null
+++ b/reference/opt/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag
@@ -0,0 +1,13 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+fragment void main0()
+{
+    bool gl_HelperInvocation = {};
+    gl_HelperInvocation = simd_is_helper_thread();
+    gl_HelperInvocation = true, discard_fragment();
+    bool _9 = gl_HelperInvocation;
+}
+
diff --git a/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag b/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
deleted file mode 100644
index fc9c4fcdd19..00000000000
--- a/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
+++ /dev/null
@@ -1,89 +0,0 @@
-#pragma clang diagnostic ignored "-Wmissing-prototypes"
-
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct main0_out
-{
-    float FragColor [[color(0)]];
-};
-
-inline uint4 spvSubgroupBallot(bool value)
-{
-    simd_vote vote = simd_ballot(value);
-    // simd_ballot() returns a 64-bit integer-like object, but
-    // SPIR-V callers expect a uint4. We must convert.
-    // FIXME: This won't include higher bits if Apple ever supports
-    // 128 lanes in an SIMD-group.
-    return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
-}
-
-inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
-{
-    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
-}
-
-inline uint spvSubgroupBallotFindLSB(uint4 ballot)
-{
-    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
-}
-
-inline uint spvSubgroupBallotFindMSB(uint4 ballot)
-{
-    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
-}
-
-inline uint spvSubgroupBallotBitCount(uint4 ballot)
-{
-    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
-}
-
-inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
-{
-    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
-    return spvSubgroupBallotBitCount(ballot & mask);
-}
-
-inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
-{
-    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
-    return spvSubgroupBallotBitCount(ballot & mask);
-}
-
-template<typename T>
-inline bool spvSubgroupAllEqual(T value)
-{
-    return simd_all(value == simd_broadcast_first(value));
-}
-
-template<>
-inline bool spvSubgroupAllEqual(bool value)
-{
-    return simd_all(value) || !simd_any(value);
-}
-
-fragment main0_out main0()
-{
-    main0_out out = {};
-    uint gl_SubgroupSize = simd_sum(1);
-    uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1);
-    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
-    uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
-    uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
-    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
-    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
-    out.FragColor = float(gl_SubgroupSize);
-    out.FragColor = float(gl_SubgroupInvocationID);
-    out.FragColor = float4(gl_SubgroupEqMask).x;
-    out.FragColor = float4(gl_SubgroupGeMask).x;
-    out.FragColor = float4(gl_SubgroupGtMask).x;
-    out.FragColor = float4(gl_SubgroupLeMask).x;
-    out.FragColor = float4(gl_SubgroupLtMask).x;
-    uint4 _63 = spvSubgroupBallot(true);
-    float4 _147 = simd_prefix_inclusive_product(simd_product(float4(20.0)));
-    int4 _149 = simd_prefix_inclusive_product(simd_product(int4(20)));
-    return out;
-}
-
diff --git a/reference/opt/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert
new file mode 100644
index 00000000000..e36576b86f5
--- /dev/null
+++ b/reference/opt/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    const int gl_DeviceIndex = 0;
+    const uint gl_ViewIndex = 0;
+    out.gl_Position = float4(float(gl_DeviceIndex), float(int(gl_ViewIndex)), 0.0, 1.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert
new file mode 100644
index 00000000000..cc4bcc42027
--- /dev/null
+++ b/reference/opt/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    const int gl_DeviceIndex = 0;
+    out.gl_Position = float4(float(gl_DeviceIndex));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert
new file mode 100644
index 00000000000..8959afe821e
--- /dev/null
+++ b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct MVPs
+{
+    float4x4 MVP[2];
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]])
+{
+    main0_out out = {};
+    const uint gl_ViewIndex = spvViewMask[0];
+    out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
index c42e67211e7..20eff0a124f 100644
--- a/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
+++ b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
@@ -19,11 +19,11 @@ struct main0_in
     float4 Position [[attribute(0)]];
 };
 
-vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
+vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_BaseInstance [[base_instance]])
 {
     main0_out out = {};
-    uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1];
-    gl_InstanceIndex /= spvViewMask[1];
+    uint gl_ViewIndex = spvViewMask[0] + (gl_InstanceIndex - gl_BaseInstance) % spvViewMask[1];
+    gl_InstanceIndex = (gl_InstanceIndex - gl_BaseInstance) / spvViewMask[1] + gl_BaseInstance;
     out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
     out.gl_Layer = gl_ViewIndex - spvViewMask[0];
     return out;
diff --git a/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
index f87d2a11adc..5152b6222ee 100644
--- a/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
+++ b/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
@@ -19,7 +19,7 @@ struct main0_in
     float4 Position [[attribute(0)]];
 };
 
-vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
+vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_BaseInstance [[base_instance]])
 {
     main0_out out = {};
     const uint gl_ViewIndex = 0;
diff --git a/reference/opt/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert b/reference/opt/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert
index 53e26e4a8eb..86a0cea5bb0 100644
--- a/reference/opt/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert
+++ b/reference/opt/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert
@@ -11,7 +11,7 @@ struct main0_out
 vertex main0_out main0(uint gl_VertexIndex [[vertex_id]], uint gl_InstanceIndex [[instance_id]])
 {
     main0_out out = {};
-    out.gl_Position = float4(1.0, 2.0, 3.0, 4.0) * float(gl_VertexIndex + gl_InstanceIndex);
+    out.gl_Position = float4(1.0, 2.0, 3.0, 4.0) * float(int(gl_VertexIndex) + int(gl_InstanceIndex));
     return out;
 }
 
diff --git a/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag b/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag
new file mode 100644
index 00000000000..a7b390a8cf7
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag
@@ -0,0 +1,321 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_Globals
+{
+    float3 SoftTransitionScale;
+    float4x4 ShadowViewProjectionMatrices[6];
+    float InvShadowmapResolution;
+    float ShadowFadeFraction;
+    float ShadowSharpen;
+    float4 LightPositionAndInvRadius;
+    float2 ProjectionDepthBiasParameters;
+    float4 PointLightDepthBiasAndProjParameters;
+};
+
+constant float4 _471 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<float> SceneTexturesStruct_SceneDepthTexture [[texture(0)]], texture2d<float> SceneTexturesStruct_GBufferATexture [[texture(1)]], texture2d<float> SceneTexturesStruct_GBufferBTexture [[texture(2)]], texture2d<float> SceneTexturesStruct_GBufferDTexture [[texture(3)]], depthcube<float> ShadowDepthCubeTexture [[texture(4)]], texture2d<float> SSProfilesTexture [[texture(5)]], sampler SceneTexturesStruct_SceneDepthTextureSampler [[sampler(0)]], sampler SceneTexturesStruct_GBufferATextureSampler [[sampler(1)]], sampler SceneTexturesStruct_GBufferBTextureSampler [[sampler(2)]], sampler SceneTexturesStruct_GBufferDTextureSampler [[sampler(3)]], sampler ShadowDepthTextureSampler [[sampler(4)]], sampler ShadowDepthCubeTextureSampler [[sampler(5)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    float2 _114 = gl_FragCoord.xy * View.View_BufferSizeAndInvSize.zw;
+    float4 _118 = SceneTexturesStruct_SceneDepthTexture.sample(SceneTexturesStruct_SceneDepthTextureSampler, _114, level(0.0));
+    float _119 = _118.x;
+    float _133 = fma(_119, View.View_InvDeviceZToWorldZTransform.x, View.View_InvDeviceZToWorldZTransform.y) + (1.0 / fma(_119, View.View_InvDeviceZToWorldZTransform.z, -View.View_InvDeviceZToWorldZTransform.w));
+    float4 _147 = View.View_ScreenToWorld * float4((fma(gl_FragCoord.xy, View.View_BufferSizeAndInvSize.zw, -View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_133), _133, 1.0);
+    float3 _148 = _147.xyz;
+    float3 _152 = _Globals.LightPositionAndInvRadius.xyz - _148;
+    float _158 = length(_152);
+    bool _160 = (_158 * _Globals.LightPositionAndInvRadius.w) < 1.0;
+    float _207;
+    if (_160)
+    {
+        float3 _165 = abs(_152);
+        float _166 = _165.x;
+        float _167 = _165.y;
+        float _168 = _165.z;
+        float _170 = fast::max(_166, fast::max(_167, _168));
+        int _189;
+        if (_170 == _166)
+        {
+            _189 = (_166 == _152.x) ? 0 : 1;
+        }
+        else
+        {
+            int _185;
+            if (_170 == _167)
+            {
+                _185 = (_167 == _152.y) ? 2 : 3;
+            }
+            else
+            {
+                _185 = (_168 == _152.z) ? 4 : 5;
+            }
+            _189 = _185;
+        }
+        float4 _196 = _Globals.ShadowViewProjectionMatrices[_189] * float4(_147.xyz, 1.0);
+        float _198 = _196.w;
+        _207 = ShadowDepthCubeTexture.sample_compare(ShadowDepthCubeTextureSampler, (_152 / float3(_158)), (_196.z / _198) + ((-_Globals.PointLightDepthBiasAndProjParameters.x) / _198), level(0.0));
+    }
+    else
+    {
+        _207 = 1.0;
+    }
+    float _213 = fast::clamp(fma(_207 - 0.5, _Globals.ShadowSharpen, 0.5), 0.0, 1.0);
+    float _218 = sqrt(mix(1.0, _213 * _213, _Globals.ShadowFadeFraction));
+    float4 _219;
+    _219.z = _218;
+    float4 _220 = float4(float3(1.0).x, float3(1.0).y, _219.z, float3(1.0).z);
+    float3 _236 = fast::normalize(fma(SceneTexturesStruct_GBufferATexture.sample(SceneTexturesStruct_GBufferATextureSampler, _114, level(0.0)).xyz, float3(2.0), float3(-1.0)));
+    uint _240 = uint(round(SceneTexturesStruct_GBufferBTexture.sample(SceneTexturesStruct_GBufferBTextureSampler, _114, level(0.0)).w * 255.0));
+    bool _248 = (_240 & 15u) == 5u;
+    float _448;
+    if (_248)
+    {
+        float4 _260 = SSProfilesTexture.read(uint2(int3(1, int(uint(fma(select(float4(0.0), SceneTexturesStruct_GBufferDTexture.sample(SceneTexturesStruct_GBufferDTextureSampler, _114, level(0.0)), bool4(!(((_240 & 4294967280u) & 16u) != 0u))).x, 255.0, 0.5))), 0).xy), 0);
+        float _263 = _260.y * 0.5;
+        float3 _266 = fma(-_236, float3(_263), _148);
+        float _274 = pow(fast::clamp(dot(-(_152 * float3(rsqrt(dot(_152, _152)))), _236), 0.0, 1.0), 1.0);
+        float _445;
+        if (_160)
+        {
+            float3 _278 = _152 / float3(_158);
+            float3 _280 = fast::normalize(cross(_278, float3(0.0, 0.0, 1.0)));
+            float3 _284 = float3(_Globals.InvShadowmapResolution);
+            float3 _285 = _280 * _284;
+            float3 _286 = cross(_280, _278) * _284;
+            float3 _287 = abs(_278);
+            float _288 = _287.x;
+            float _289 = _287.y;
+            float _290 = _287.z;
+            float _292 = fast::max(_288, fast::max(_289, _290));
+            int _311;
+            if (_292 == _288)
+            {
+                _311 = (_288 == _278.x) ? 0 : 1;
+            }
+            else
+            {
+                int _307;
+                if (_292 == _289)
+                {
+                    _307 = (_289 == _278.y) ? 2 : 3;
+                }
+                else
+                {
+                    _307 = (_290 == _278.z) ? 4 : 5;
+                }
+                _311 = _307;
+            }
+            float4 _318 = _Globals.ShadowViewProjectionMatrices[_311] * float4(_266, 1.0);
+            float _323 = _260.x * (10.0 / _Globals.LightPositionAndInvRadius.w);
+            float _457 = -_Globals.PointLightDepthBiasAndProjParameters.w;
+            float _328 = 1.0 / fma(_318.z / _318.w, _Globals.PointLightDepthBiasAndProjParameters.z, _457);
+            float _341 = fma(_328, _Globals.LightPositionAndInvRadius.w, -((1.0 / fma(float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(2.5), _278), level(0.0))).x, _Globals.PointLightDepthBiasAndProjParameters.z, _457)) * _Globals.LightPositionAndInvRadius.w));
+            float _342 = _341 * _323;
+            float _363 = fma(_328, _Globals.LightPositionAndInvRadius.w, -((1.0 / fma(float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(0.77254199981689453125), fma(_285, float3(2.3776409626007080078125), _278)), level(0.0))).x, _Globals.PointLightDepthBiasAndProjParameters.z, _457)) * _Globals.LightPositionAndInvRadius.w));
+            float _364 = _363 * _323;
+            float _386 = fma(_328, _Globals.LightPositionAndInvRadius.w, -((1.0 / fma(float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(-2.0225429534912109375), fma(_285, float3(1.46946299076080322265625), _278)), level(0.0))).x, _Globals.PointLightDepthBiasAndProjParameters.z, _457)) * _Globals.LightPositionAndInvRadius.w));
+            float _387 = _386 * _323;
+            float _409 = fma(_328, _Globals.LightPositionAndInvRadius.w, -((1.0 / fma(float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(-2.02254199981689453125), fma(_285, float3(-1.46946299076080322265625), _278)), level(0.0))).x, _Globals.PointLightDepthBiasAndProjParameters.z, _457)) * _Globals.LightPositionAndInvRadius.w));
+            float _410 = _409 * _323;
+            float _432 = fma(_328, _Globals.LightPositionAndInvRadius.w, -((1.0 / fma(float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, fma(_286, float3(0.772543013095855712890625), fma(_285, float3(-2.3776409626007080078125), _278)), level(0.0))).x, _Globals.PointLightDepthBiasAndProjParameters.z, _457)) * _Globals.LightPositionAndInvRadius.w));
+            float _433 = _432 * _323;
+            _445 = (((((fast::clamp(abs((_342 > 0.0) ? fma(_341, _323, _263) : fast::max(0.0, fma(_342, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25) + (fast::clamp(abs((_364 > 0.0) ? fma(_363, _323, _263) : fast::max(0.0, fma(_364, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_387 > 0.0) ? fma(_386, _323, _263) : fast::max(0.0, fma(_387, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_410 > 0.0) ? fma(_409, _323, _263) : fast::max(0.0, fma(_410, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_433 > 0.0) ? fma(_432, _323, _263) : fast::max(0.0, fma(_433, _274, _263))), 0.1500000059604644775390625, 5.0) + 0.25)) * 0.20000000298023223876953125;
+        }
+        else
+        {
+            _445 = 1.0;
+        }
+        _448 = fma(-_445, 0.20000000298023223876953125, 1.0);
+    }
+    else
+    {
+        _448 = 1.0;
+    }
+    _220.w = _248 ? sqrt(_448) : _218;
+    out.out_var_SV_Target0 = _220;
+    return out;
+}
+
diff --git a/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag b/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag
new file mode 100644
index 00000000000..192c0b411bf
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag
@@ -0,0 +1,1073 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_Globals
+{
+    float4 MappingPolynomial;
+    float3 InverseGamma;
+    float4 ColorMatrixR_ColorCurveCd1;
+    float4 ColorMatrixG_ColorCurveCd3Cm3;
+    float4 ColorMatrixB_ColorCurveCm2;
+    float4 ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3;
+    float4 ColorCurve_Ch1_Ch2;
+    float4 ColorShadow_Luma;
+    float4 ColorShadow_Tint1;
+    float4 ColorShadow_Tint2;
+    float FilmSlope;
+    float FilmToe;
+    float FilmShoulder;
+    float FilmBlackClip;
+    float FilmWhiteClip;
+    packed_float3 ColorScale;
+    float4 OverlayColor;
+    float WhiteTemp;
+    float WhiteTint;
+    float4 ColorSaturation;
+    float4 ColorContrast;
+    float4 ColorGamma;
+    float4 ColorGain;
+    float4 ColorOffset;
+    float4 ColorSaturationShadows;
+    float4 ColorContrastShadows;
+    float4 ColorGammaShadows;
+    float4 ColorGainShadows;
+    float4 ColorOffsetShadows;
+    float4 ColorSaturationMidtones;
+    float4 ColorContrastMidtones;
+    float4 ColorGammaMidtones;
+    float4 ColorGainMidtones;
+    float4 ColorOffsetMidtones;
+    float4 ColorSaturationHighlights;
+    float4 ColorContrastHighlights;
+    float4 ColorGammaHighlights;
+    float4 ColorGainHighlights;
+    float4 ColorOffsetHighlights;
+    float ColorCorrectionShadowsMax;
+    float ColorCorrectionHighlightsMin;
+    uint OutputDevice;
+    uint OutputGamut;
+    float BlueCorrection;
+    float ExpandGamut;
+};
+
+constant spvUnsafeArray<float, 6> _475 = spvUnsafeArray<float, 6>({ -4.0, -4.0, -3.1573765277862548828125, -0.485249996185302734375, 1.84773242473602294921875, 1.84773242473602294921875 });
+constant spvUnsafeArray<float, 6> _476 = spvUnsafeArray<float, 6>({ -0.718548238277435302734375, 2.0810306072235107421875, 3.66812419891357421875, 4.0, 4.0, 4.0 });
+constant spvUnsafeArray<float, 10> _479 = spvUnsafeArray<float, 10>({ -4.97062206268310546875, -3.0293781757354736328125, -2.1261999607086181640625, -1.5104999542236328125, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 });
+constant spvUnsafeArray<float, 10> _480 = spvUnsafeArray<float, 10>({ 0.80891323089599609375, 1.19108676910400390625, 1.5683000087738037109375, 1.94830000400543212890625, 2.308300018310546875, 2.63840007781982421875, 2.85949993133544921875, 2.9872608184814453125, 3.0127391815185546875, 3.0127391815185546875 });
+constant spvUnsafeArray<float, 10> _482 = spvUnsafeArray<float, 10>({ -2.3010299205780029296875, -2.3010299205780029296875, -1.9312000274658203125, -1.5204999446868896484375, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 });
+constant spvUnsafeArray<float, 10> _483 = spvUnsafeArray<float, 10>({ 0.801995217800140380859375, 1.19800484180450439453125, 1.5943000316619873046875, 1.99730002880096435546875, 2.3782999515533447265625, 2.7683999538421630859375, 3.0515000820159912109375, 3.2746293544769287109375, 3.32743072509765625, 3.32743072509765625 });
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 in_var_TEXCOORD0 [[user(locn0), center_no_perspective]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globals [[buffer(0)]], uint gl_Layer [[render_target_array_index]])
+{
+    main0_out out = {};
+    float3x3 _546 = float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * float3x3(float3(1.01303005218505859375, 0.0061053098179399967193603515625, -0.014971000142395496368408203125), float3(0.0076982299797236919403076171875, 0.99816501140594482421875, -0.005032029934227466583251953125), float3(-0.0028413101099431514739990234375, 0.0046851597726345062255859375, 0.92450702190399169921875));
+    float3x3 _547 = _546 * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125));
+    float3x3 _548 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(0.98722398281097412109375, -0.0061132698319852352142333984375, 0.01595330052077770233154296875), float3(-0.007598360069096088409423828125, 1.00186002254486083984375, 0.0053300200961530208587646484375), float3(0.003072570078074932098388671875, -0.0050959498621523380279541015625, 1.0816800594329833984375));
+    float3x3 _549 = _548 * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875));
+    float3x3 _550 = float3x3(float3(0.952552378177642822265625, 0.0, 9.25), float3(0.3439664542675018310546875, 0.728166103363037109375, -0.07213254272937774658203125), float3(0.0, 0.0, 1.00882518291473388671875)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125));
+    float3x3 _551 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625));
+    float3x3 _576;
+    for (;;)
+    {
+        if (_Globals.OutputGamut == 1u)
+        {
+            _576 = _548 * float3x3(float3(2.493396282196044921875, -0.931345880031585693359375, -0.4026944935321807861328125), float3(-0.829486787319183349609375, 1.76265966892242431640625, 0.02362460084259510040283203125), float3(0.0358506999909877777099609375, -0.076182700693607330322265625, 0.957014024257659912109375));
+            break;
+        }
+        else
+        {
+            if (_Globals.OutputGamut == 2u)
+            {
+                _576 = _548 * float3x3(float3(1.71660840511322021484375, -0.3556621074676513671875, -0.253360092639923095703125), float3(-0.666682898998260498046875, 1.61647760868072509765625, 0.01576850004494190216064453125), float3(0.017642199993133544921875, -0.04277630150318145751953125, 0.94222867488861083984375));
+                break;
+            }
+            else
+            {
+                if (_Globals.OutputGamut == 3u)
+                {
+                    _576 = float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625));
+                    break;
+                }
+                else
+                {
+                    if (_Globals.OutputGamut == 4u)
+                    {
+                        _576 = float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0));
+                        break;
+                    }
+                    else
+                    {
+                        _576 = _549;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+    float3 _577 = float4((in.in_var_TEXCOORD0 - float2(0.015625)) * float2(1.03225803375244140625), float(gl_Layer) * 0.0322580635547637939453125, 0.0).xyz;
+    float3 _599;
+    if (_Globals.OutputDevice >= 3u)
+    {
+        float3 _591 = pow(_577, float3(0.0126833133399486541748046875));
+        _599 = pow(fast::max(float3(0.0), _591 - float3(0.8359375)) / fma(float3(-18.6875), _591, float3(18.8515625)), float3(6.277394771575927734375)) * float3(10000.0);
+    }
+    else
+    {
+        _599 = fma(exp2((_577 - float3(0.434017598628997802734375)) * float3(14.0)), float3(0.180000007152557373046875), float3(-0.00266771926544606685638427734375));
+    }
+    float _602 = _Globals.WhiteTemp * 1.00055634975433349609375;
+    float _616 = (_602 <= 7000.0) ? (0.24406300485134124755859375 + ((99.1100006103515625 + ((2967800.0 - (4604438528.0 / _Globals.WhiteTemp)) / _602)) / _602)) : (0.23703999817371368408203125 + ((247.4799957275390625 + ((1901800.0 - (2005284352.0 / _Globals.WhiteTemp)) / _602)) / _602));
+    float _633 = fma(1.2864121856637211749330163002014e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(0.00015411825734190642833709716796875, _Globals.WhiteTemp, 0.860117733478546142578125)) / fma(7.0814513719597016461193561553955e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(0.0008424202096648514270782470703125, _Globals.WhiteTemp, 1.0));
+    float _644 = fma(4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(4.25, _Globals.WhiteTemp, 0.317398726940155029296875)) / fma(1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(-2.8974181986995972692966461181641e-05, _Globals.WhiteTemp, 1.0));
+    float _649 = fma(2.0, _633, _644 * (-8.0)) + 4.0;
+    float2 _653 = float2((3.0 * _633) / _649, (2.0 * _644) / _649);
+    float2 _660 = fast::normalize(float2(_633, _644));
+    float _665 = fma((-_660.y) * _Globals.WhiteTint, 0.0500000007450580596923828125, _633);
+    float _669 = fma(_660.x * _Globals.WhiteTint, 0.0500000007450580596923828125, _644);
+    float _674 = fma(2.0, _665, _669 * (-8.0)) + 4.0;
+    float2 _680 = select(float2(_616, fma(_616, fma(-3.0, _616, 2.86999988555908203125), -0.2750000059604644775390625)), _653, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _665) / _674, (2.0 * _669) / _674) - _653);
+    float _683 = fast::max(_680.y, 1.0000000133514319600180897396058e-10);
+    float3 _697 = float3(_680.x / _683, 1.0, ((1.0 - _680.x) - _680.y) / _683) * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875));
+    float3 _717 = (_599 * ((float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * ((float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)) * float3x3(float3(0.941379249095916748046875 / _697.x, 0.0, 0.0), float3(0.0, 1.04043638706207275390625 / _697.y, 0.0), float3(0.0, 0.0, 1.08976650238037109375 / _697.z))) * float3x3(float3(0.986992895603179931640625, -0.14705429971218109130859375, 0.15996269881725311279296875), float3(0.4323053061962127685546875, 0.518360316753387451171875, 0.049291200935840606689453125), float3(-0.00852870009839534759521484375, 0.0400427989661693572998046875, 0.968486726284027099609375)))) * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)))) * _547;
+    float3 _745;
+    if (_Globals.ColorShadow_Tint2.w != 0.0)
+    {
+        float _724 = dot(_717, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625));
+        float3 _727 = (_717 / float3(_724)) - float3(1.0);
+        _745 = mix(_717, _717 * (_549 * (float3x3(float3(0.544169127941131591796875, 0.23959259688854217529296875, 0.16669429838657379150390625), float3(0.23946559429168701171875, 0.702153027057647705078125, 0.058381401002407073974609375), float3(-0.0023439000360667705535888671875, 0.0361833982169628143310546875, 1.05521833896636962890625)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)))), float3((1.0 - exp2((-4.0) * dot(_727, _727))) * (1.0 - exp2((((-4.0) * _Globals.ExpandGamut) * _724) * _724))));
+    }
+    else
+    {
+        _745 = _717;
+    }
+    float _746 = dot(_745, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625));
+    float4 _751 = _Globals.ColorSaturationShadows * _Globals.ColorSaturation;
+    float4 _756 = _Globals.ColorContrastShadows * _Globals.ColorContrast;
+    float4 _761 = _Globals.ColorGammaShadows * _Globals.ColorGamma;
+    float4 _766 = _Globals.ColorGainShadows * _Globals.ColorGain;
+    float4 _771 = _Globals.ColorOffsetShadows + _Globals.ColorOffset;
+    float3 _772 = float3(_746);
+    float _804 = smoothstep(0.0, _Globals.ColorCorrectionShadowsMax, _746);
+    float4 _808 = _Globals.ColorSaturationHighlights * _Globals.ColorSaturation;
+    float4 _811 = _Globals.ColorContrastHighlights * _Globals.ColorContrast;
+    float4 _814 = _Globals.ColorGammaHighlights * _Globals.ColorGamma;
+    float4 _817 = _Globals.ColorGainHighlights * _Globals.ColorGain;
+    float4 _820 = _Globals.ColorOffsetHighlights + _Globals.ColorOffset;
+    float _852 = smoothstep(_Globals.ColorCorrectionHighlightsMin, 1.0, _746);
+    float4 _855 = _Globals.ColorSaturationMidtones * _Globals.ColorSaturation;
+    float4 _858 = _Globals.ColorContrastMidtones * _Globals.ColorContrast;
+    float4 _861 = _Globals.ColorGammaMidtones * _Globals.ColorGamma;
+    float4 _864 = _Globals.ColorGainMidtones * _Globals.ColorGain;
+    float4 _867 = _Globals.ColorOffsetMidtones + _Globals.ColorOffset;
+    float3 _905 = fma(fma(pow(pow(fast::max(float3(0.0), mix(_772, _745, _808.xyz * float3(_808.w))) * float3(5.5555553436279296875), _811.xyz * float3(_811.w)) * float3(0.180000007152557373046875), float3(1.0) / (_814.xyz * float3(_814.w))), _817.xyz * float3(_817.w), _820.xyz + float3(_820.w)), float3(_852), fma(fma(pow(pow(fast::max(float3(0.0), mix(_772, _745, _751.xyz * float3(_751.w))) * float3(5.5555553436279296875), _756.xyz * float3(_756.w)) * float3(0.180000007152557373046875), float3(1.0) / (_761.xyz * float3(_761.w))), _766.xyz * float3(_766.w), _771.xyz + float3(_771.w)), float3(1.0 - _804), fma(pow(pow(fast::max(float3(0.0), mix(_772, _745, _855.xyz * float3(_855.w))) * float3(5.5555553436279296875), _858.xyz * float3(_858.w)) * float3(0.180000007152557373046875), float3(1.0) / (_861.xyz * float3(_861.w))), _864.xyz * float3(_864.w), _867.xyz + float3(_867.w)) * float3(_804 - _852)));
+    float3 _906 = _905 * _549;
+    float3 _914 = float3(_Globals.BlueCorrection);
+    float3 _916 = mix(_905, _905 * ((_551 * float3x3(float3(0.940437257289886474609375, -0.01830687932670116424560546875, 0.07786960899829864501953125), float3(0.008378696627914905548095703125, 0.82866001129150390625, 0.162961304187774658203125), float3(0.0005471261101774871349334716796875, -0.00088337459601461887359619140625, 1.00033628940582275390625))) * _550), _914) * _551;
+    float _917 = _916.x;
+    float _918 = _916.y;
+    float _920 = _916.z;
+    float _923 = fast::max(fast::max(_917, _918), _920);
+    float _928 = (fast::max(_923, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_917, _918), _920), 1.0000000133514319600180897396058e-10)) / fast::max(_923, 0.00999999977648258209228515625);
+    float _941 = fma(1.75, sqrt(fma(_917, _917 - _920, fma(_920, _920 - _918, _918 * (_918 - _917)))), (_920 + _918) + _917);
+    float _942 = _941 * 0.3333333432674407958984375;
+    float _943 = _928 - 0.4000000059604644775390625;
+    float _948 = fast::max(1.0 - abs(_943 * 2.5), 0.0);
+    float _956 = fma(float(int(sign(_943 * 5.0))), fma(-_948, _948, 1.0), 1.0) * 0.02500000037252902984619140625;
+    float _969;
+    if (_942 <= 0.053333334624767303466796875)
+    {
+        _969 = _956;
+    }
+    else
+    {
+        float _968;
+        if (_942 >= 0.1599999964237213134765625)
+        {
+            _968 = 0.0;
+        }
+        else
+        {
+            _968 = _956 * ((0.23999999463558197021484375 / _941) - 0.5);
+        }
+        _969 = _968;
+    }
+    float3 _972 = _916 * float3(1.0 + _969);
+    float _973 = _972.x;
+    float _974 = _972.y;
+    float _976 = _972.z;
+    float _990;
+    if ((_973 == _974) && (_974 == _976))
+    {
+        _990 = 0.0;
+    }
+    else
+    {
+        _990 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_974 - _976), fma(2.0, _973, -_974) - _976);
+    }
+    float _995;
+    if (_990 < 0.0)
+    {
+        _995 = _990 + 360.0;
+    }
+    else
+    {
+        _995 = _990;
+    }
+    float _996 = fast::clamp(_995, 0.0, 360.0);
+    float _1001;
+    if (_996 > 180.0)
+    {
+        _1001 = _996 - 360.0;
+    }
+    else
+    {
+        _1001 = _996;
+    }
+    float _1005 = smoothstep(0.0, 1.0, 1.0 - abs(_1001 * 0.01481481455266475677490234375));
+    _972.x = fma(((_1005 * _1005) * _928) * (0.02999999932944774627685546875 - _973), 0.180000007152557373046875, _973);
+    float3 _1014 = fast::max(float3(0.0), _972 * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)));
+    float _1023 = (1.0 + _Globals.FilmBlackClip) - _Globals.FilmToe;
+    float _1026 = 1.0 + _Globals.FilmWhiteClip;
+    float _1029 = _1026 - _Globals.FilmShoulder;
+    float _1056;
+    if (_Globals.FilmToe > 0.800000011920928955078125)
+    {
+        _1056 = ((0.819999992847442626953125 - _Globals.FilmToe) / _Globals.FilmSlope) + (-0.744727432727813720703125);
+    }
+    else
+    {
+        float _1035 = (0.180000007152557373046875 + _Globals.FilmBlackClip) / _1023;
+        _1056 = fma(log(_1035 / (2.0 - _1035)) * (-0.5), _1023 / _Globals.FilmSlope, -0.744727432727813720703125);
+    }
+    float _1061 = ((1.0 - _Globals.FilmToe) / _Globals.FilmSlope) - _1056;
+    float _1063 = (_Globals.FilmShoulder / _Globals.FilmSlope) - _1061;
+    float3 _1064 = log(mix(float3(dot(_1014, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1014, float3(0.959999978542327880859375)));
+    float3 _1067 = _1064 * float3(0.4342944622039794921875);
+    float3 _1071 = float3(_Globals.FilmSlope) * fma(_1064, float3(0.4342944622039794921875), float3(_1061));
+    float3 _1079 = float3(_1056);
+    float3 _1080 = fma(_1064, float3(0.4342944622039794921875), -_1079);
+    float3 _1092 = float3(_1063);
+    float3 _1106 = fast::clamp(_1080 / float3(_1063 - _1056), float3(0.0), float3(1.0));
+    float3 _1110 = select(_1106, float3(1.0) - _1106, bool3(_1063 < _1056));
+    float3 _1115 = mix(select(_1071, float3(-_Globals.FilmBlackClip) + (float3(2.0 * _1023) / (float3(1.0) + exp(float3(((-2.0) * _Globals.FilmSlope) / _1023) * _1080))), _1067 < _1079), select(_1071, float3(_1026) - (float3(2.0 * _1029) / (float3(1.0) + exp(float3((2.0 * _Globals.FilmSlope) / _1029) * fma(_1064, float3(0.4342944622039794921875), -_1092)))), _1067 > _1092), (fma(float3(-2.0), _1110, float3(3.0)) * _1110) * _1110);
+    float3 _1119 = fast::max(float3(0.0), mix(float3(dot(_1115, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1115, float3(0.930000007152557373046875)));
+    float3 _1189;
+    if (_Globals.ColorShadow_Tint2.w == 0.0)
+    {
+        float3 _1157 = fast::max(float3(0.0), float3(dot(_906, _Globals.ColorMatrixR_ColorCurveCd1.xyz), dot(_906, _Globals.ColorMatrixG_ColorCurveCd3Cm3.xyz), dot(_906, _Globals.ColorMatrixB_ColorCurveCm2.xyz)) * fma(_Globals.ColorShadow_Tint2.xyz, float3(1.0 / (dot(_906, _Globals.ColorShadow_Luma.xyz) + 1.0)), _Globals.ColorShadow_Tint1.xyz));
+        float3 _1162 = fast::max(float3(0.0), _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx - _1157);
+        float3 _1164 = fast::max(_1157, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz);
+        _1189 = fma(fma(_1164, _Globals.ColorCurve_Ch1_Ch2.xxx, _Globals.ColorCurve_Ch1_Ch2.yyy), float3(1.0) / (_1164 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.www), fma(fast::clamp(_1157, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz), _Globals.ColorMatrixB_ColorCurveCm2.www, fma(_1162 * _Globals.ColorMatrixR_ColorCurveCd1.www, float3(1.0) / (_1162 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.yyy), _Globals.ColorMatrixG_ColorCurveCd3Cm3.www))) - float3(0.00200000009499490261077880859375);
+    }
+    else
+    {
+        _1189 = fast::max(float3(0.0), mix(_1119, _1119 * ((_551 * float3x3(float3(1.06317996978759765625, 0.02339559979736804962158203125, -0.08657260239124298095703125), float3(-0.010633699595928192138671875, 1.2063200473785400390625, -0.1956900060176849365234375), float3(-0.0005908869788981974124908447265625, 0.00105247995816171169281005859375, 0.999538004398345947265625))) * _550), _914) * _549);
+    }
+    float3 _1218 = pow(fast::max(float3(0.0), mix((fma(float3(_Globals.MappingPolynomial.x), _1189 * _1189, float3(_Globals.MappingPolynomial.y) * _1189) + float3(_Globals.MappingPolynomial.z)) * float3(_Globals.ColorScale), _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y));
+    float3 _3001;
+    if (_Globals.OutputDevice == 0u)
+    {
+        float _2961 = _1218.x;
+        float _2973;
+        for (;;)
+        {
+            if (_2961 < 0.00313066993840038776397705078125)
+            {
+                _2973 = _2961 * 12.9200000762939453125;
+                break;
+            }
+            _2973 = fma(pow(_2961, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
+            break;
+        }
+        float _2974 = _1218.y;
+        float _2986;
+        for (;;)
+        {
+            if (_2974 < 0.00313066993840038776397705078125)
+            {
+                _2986 = _2974 * 12.9200000762939453125;
+                break;
+            }
+            _2986 = fma(pow(_2974, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
+            break;
+        }
+        float _2987 = _1218.z;
+        float _2999;
+        for (;;)
+        {
+            if (_2987 < 0.00313066993840038776397705078125)
+            {
+                _2999 = _2987 * 12.9200000762939453125;
+                break;
+            }
+            _2999 = fma(pow(_2987, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
+            break;
+        }
+        _3001 = float3(_2973, _2986, _2999);
+    }
+    else
+    {
+        float3 _2960;
+        if (_Globals.OutputDevice == 1u)
+        {
+            float3 _2953 = fast::max(float3(6.1035199905745685100555419921875e-05), (_1218 * _547) * _576);
+            _2960 = fast::min(_2953 * float3(4.5), fma(pow(fast::max(_2953, float3(0.017999999225139617919921875)), float3(0.449999988079071044921875)), float3(1.09899997711181640625), float3(-0.098999999463558197021484375)));
+        }
+        else
+        {
+            float3 _2950;
+            if ((_Globals.OutputDevice == 3u) || (_Globals.OutputDevice == 5u))
+            {
+                float3 _2100 = (_906 * float3(1.5)) * (_546 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625)));
+                float _2101 = _2100.x;
+                float _2102 = _2100.y;
+                float _2104 = _2100.z;
+                float _2107 = fast::max(fast::max(_2101, _2102), _2104);
+                float _2112 = (fast::max(_2107, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_2101, _2102), _2104), 1.0000000133514319600180897396058e-10)) / fast::max(_2107, 0.00999999977648258209228515625);
+                float _2125 = fma(1.75, sqrt(fma(_2101, _2101 - _2104, fma(_2104, _2104 - _2102, _2102 * (_2102 - _2101)))), (_2104 + _2102) + _2101);
+                float _2126 = _2125 * 0.3333333432674407958984375;
+                float _2127 = _2112 - 0.4000000059604644775390625;
+                float _2132 = fast::max(1.0 - abs(_2127 * 2.5), 0.0);
+                float _2140 = fma(float(int(sign(_2127 * 5.0))), fma(-_2132, _2132, 1.0), 1.0) * 0.02500000037252902984619140625;
+                float _2153;
+                if (_2126 <= 0.053333334624767303466796875)
+                {
+                    _2153 = _2140;
+                }
+                else
+                {
+                    float _2152;
+                    if (_2126 >= 0.1599999964237213134765625)
+                    {
+                        _2152 = 0.0;
+                    }
+                    else
+                    {
+                        _2152 = _2140 * ((0.23999999463558197021484375 / _2125) - 0.5);
+                    }
+                    _2153 = _2152;
+                }
+                float3 _2156 = _2100 * float3(1.0 + _2153);
+                float _2157 = _2156.x;
+                float _2158 = _2156.y;
+                float _2160 = _2156.z;
+                float _2174;
+                if ((_2157 == _2158) && (_2158 == _2160))
+                {
+                    _2174 = 0.0;
+                }
+                else
+                {
+                    _2174 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_2158 - _2160), fma(2.0, _2157, -_2158) - _2160);
+                }
+                float _2179;
+                if (_2174 < 0.0)
+                {
+                    _2179 = _2174 + 360.0;
+                }
+                else
+                {
+                    _2179 = _2174;
+                }
+                float _2180 = fast::clamp(_2179, 0.0, 360.0);
+                float _2185;
+                if (_2180 > 180.0)
+                {
+                    _2185 = _2180 - 360.0;
+                }
+                else
+                {
+                    _2185 = _2180;
+                }
+                float _2235;
+                if ((_2185 > (-67.5)) && (_2185 < 67.5))
+                {
+                    float _2191 = _2185 - (-67.5);
+                    int _2193 = int(_2191 * 0.0296296291053295135498046875);
+                    float _2195 = fma(_2191, 0.0296296291053295135498046875, -float(_2193));
+                    float _2196 = _2195 * _2195;
+                    float _2197 = _2196 * _2195;
+                    float _2234;
+                    if (_2193 == 3)
+                    {
+                        _2234 = fma(_2195, -0.5, fma(_2197, -0.16666667163372039794921875, _2196 * 0.5)) + 0.16666667163372039794921875;
+                    }
+                    else
+                    {
+                        float _2227;
+                        if (_2193 == 2)
+                        {
+                            _2227 = fma(_2197, 0.5, _2196 * (-1.0)) + 0.666666686534881591796875;
+                        }
+                        else
+                        {
+                            float _2222;
+                            if (_2193 == 1)
+                            {
+                                _2222 = fma(_2195, 0.5, fma(_2197, -0.5, _2196 * 0.5)) + 0.16666667163372039794921875;
+                            }
+                            else
+                            {
+                                float _2215;
+                                if (_2193 == 0)
+                                {
+                                    _2215 = _2197 * 0.16666667163372039794921875;
+                                }
+                                else
+                                {
+                                    _2215 = 0.0;
+                                }
+                                _2222 = _2215;
+                            }
+                            _2227 = _2222;
+                        }
+                        _2234 = _2227;
+                    }
+                    _2235 = _2234;
+                }
+                else
+                {
+                    _2235 = 0.0;
+                }
+                _2156.x = fma(((_2235 * 1.5) * _2112) * (0.02999999932944774627685546875 - _2157), 0.180000007152557373046875, _2157);
+                float3 _2245 = fast::clamp(fast::clamp(_2156, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0));
+                float3 _2248 = mix(float3(dot(_2245, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _2245, float3(0.959999978542327880859375));
+                float _2249 = _2248.x;
+                float _2257 = log((_2249 <= 0.0) ? 6.103515625e-05 : _2249);
+                float _2258 = _2257 * 0.4342944622039794921875;
+                float _2327;
+                if (_2258 <= (-5.2601776123046875))
+                {
+                    _2327 = -4.0;
+                }
+                else
+                {
+                    float _2324;
+                    if ((_2258 > (-5.2601776123046875)) && (_2258 < (-0.744727432727813720703125)))
+                    {
+                        float _2304 = fma(_2257, 0.4342944622039794921875, 5.2601776123046875);
+                        int _2308 = int(_2304 * 0.6643855571746826171875);
+                        float _2310 = fma(_2304, 0.6643855571746826171875, -float(_2308));
+                        _2324 = dot(float3(_2310 * _2310, _2310, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2308], _475[_2308 + 1], _475[_2308 + 2]));
+                    }
+                    else
+                    {
+                        float _2303;
+                        if ((_2258 >= (-0.744727432727813720703125)) && (_2258 < 4.673812389373779296875))
+                        {
+                            float _2283 = fma(_2257, 0.4342944622039794921875, 0.744727432727813720703125);
+                            int _2287 = int(_2283 * 0.55365467071533203125);
+                            float _2289 = fma(_2283, 0.55365467071533203125, -float(_2287));
+                            _2303 = dot(float3(_2289 * _2289, _2289, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2287], _476[_2287 + 1], _476[_2287 + 2]));
+                        }
+                        else
+                        {
+                            _2303 = 4.0;
+                        }
+                        _2324 = _2303;
+                    }
+                    _2327 = _2324;
+                }
+                float _2330 = _2248.y;
+                float _2333 = log((_2330 <= 0.0) ? 6.103515625e-05 : _2330);
+                float _2334 = _2333 * 0.4342944622039794921875;
+                float _2401;
+                if (_2334 <= (-5.2601776123046875))
+                {
+                    _2401 = -4.0;
+                }
+                else
+                {
+                    float _2398;
+                    if ((_2334 > (-5.2601776123046875)) && (_2334 < (-0.744727432727813720703125)))
+                    {
+                        float _2378 = fma(_2333, 0.4342944622039794921875, 5.2601776123046875);
+                        int _2382 = int(_2378 * 0.6643855571746826171875);
+                        float _2384 = fma(_2378, 0.6643855571746826171875, -float(_2382));
+                        _2398 = dot(float3(_2384 * _2384, _2384, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2382], _475[_2382 + 1], _475[_2382 + 2]));
+                    }
+                    else
+                    {
+                        float _2377;
+                        if ((_2334 >= (-0.744727432727813720703125)) && (_2334 < 4.673812389373779296875))
+                        {
+                            float _2357 = fma(_2333, 0.4342944622039794921875, 0.744727432727813720703125);
+                            int _2361 = int(_2357 * 0.55365467071533203125);
+                            float _2363 = fma(_2357, 0.55365467071533203125, -float(_2361));
+                            _2377 = dot(float3(_2363 * _2363, _2363, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2361], _476[_2361 + 1], _476[_2361 + 2]));
+                        }
+                        else
+                        {
+                            _2377 = 4.0;
+                        }
+                        _2398 = _2377;
+                    }
+                    _2401 = _2398;
+                }
+                float _2404 = _2248.z;
+                float _2407 = log((_2404 <= 0.0) ? 6.103515625e-05 : _2404);
+                float _2408 = _2407 * 0.4342944622039794921875;
+                float _2475;
+                if (_2408 <= (-5.2601776123046875))
+                {
+                    _2475 = -4.0;
+                }
+                else
+                {
+                    float _2472;
+                    if ((_2408 > (-5.2601776123046875)) && (_2408 < (-0.744727432727813720703125)))
+                    {
+                        float _2452 = fma(_2407, 0.4342944622039794921875, 5.2601776123046875);
+                        int _2456 = int(_2452 * 0.6643855571746826171875);
+                        float _2458 = fma(_2452, 0.6643855571746826171875, -float(_2456));
+                        _2472 = dot(float3(_2458 * _2458, _2458, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2456], _475[_2456 + 1], _475[_2456 + 2]));
+                    }
+                    else
+                    {
+                        float _2451;
+                        if ((_2408 >= (-0.744727432727813720703125)) && (_2408 < 4.673812389373779296875))
+                        {
+                            float _2431 = fma(_2407, 0.4342944622039794921875, 0.744727432727813720703125);
+                            int _2435 = int(_2431 * 0.55365467071533203125);
+                            float _2437 = fma(_2431, 0.55365467071533203125, -float(_2435));
+                            _2451 = dot(float3(_2437 * _2437, _2437, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2435], _476[_2435 + 1], _476[_2435 + 2]));
+                        }
+                        else
+                        {
+                            _2451 = 4.0;
+                        }
+                        _2472 = _2451;
+                    }
+                    _2475 = _2472;
+                }
+                float3 _2479 = (float3(pow(10.0, _2327), pow(10.0, _2401), pow(10.0, _2475)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375));
+                float _2685 = _2479.x;
+                float _2688 = log((_2685 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2685);
+                float _2689 = _2688 * 0.4342944622039794921875;
+                float _2768;
+                if (_2689 <= (-3.84832763671875))
+                {
+                    _2768 = fma(_2688, 1.3028833866119384765625, fma(8.86110210418701171875, 1.3028833866119384765625, -4.0));
+                }
+                else
+                {
+                    float _2760;
+                    if ((_2689 > (-3.84832763671875)) && (_2689 < 0.68124115467071533203125))
+                    {
+                        float _2743 = (7.0 * fma(_2688, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875);
+                        int _2744 = int(_2743);
+                        float _2746 = _2743 - float(_2744);
+                        _2760 = dot(float3(_2746 * _2746, _2746, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_479[_2744], _479[_2744 + 1], _479[_2744 + 2]));
+                    }
+                    else
+                    {
+                        float _2739;
+                        if ((_2689 >= 0.68124115467071533203125) && (_2689 < 3.65370273590087890625))
+                        {
+                            float _2722 = (7.0 * fma(_2688, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.4129619598388671875, 0.4342944622039794921875, -0.68124115467071533203125);
+                            int _2723 = int(_2722);
+                            float _2725 = _2722 - float(_2723);
+                            _2739 = dot(float3(_2725 * _2725, _2725, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_480[_2723], _480[_2723 + 1], _480[_2723 + 2]));
+                        }
+                        else
+                        {
+                            _2739 = fma(_2688, 0.026057668030261993408203125, fma(-8.4129619598388671875, 0.026057668030261993408203125, 3.0));
+                        }
+                        _2760 = _2739;
+                    }
+                    _2768 = _2760;
+                }
+                float _2771 = _2479.y;
+                float _2774 = log((_2771 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2771);
+                float _2775 = _2774 * 0.4342944622039794921875;
+                float _2852;
+                if (_2775 <= (-3.84832763671875))
+                {
+                    _2852 = fma(_2774, 1.3028833866119384765625, fma(8.86110210418701171875, 1.3028833866119384765625, -4.0));
+                }
+                else
+                {
+                    float _2844;
+                    if ((_2775 > (-3.84832763671875)) && (_2775 < 0.68124115467071533203125))
+                    {
+                        float _2827 = (7.0 * fma(_2774, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875);
+                        int _2828 = int(_2827);
+                        float _2830 = _2827 - float(_2828);
+                        _2844 = dot(float3(_2830 * _2830, _2830, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_479[_2828], _479[_2828 + 1], _479[_2828 + 2]));
+                    }
+                    else
+                    {
+                        float _2823;
+                        if ((_2775 >= 0.68124115467071533203125) && (_2775 < 3.65370273590087890625))
+                        {
+                            float _2806 = (7.0 * fma(_2774, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.4129619598388671875, 0.4342944622039794921875, -0.68124115467071533203125);
+                            int _2807 = int(_2806);
+                            float _2809 = _2806 - float(_2807);
+                            _2823 = dot(float3(_2809 * _2809, _2809, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_480[_2807], _480[_2807 + 1], _480[_2807 + 2]));
+                        }
+                        else
+                        {
+                            _2823 = fma(_2774, 0.026057668030261993408203125, fma(-8.4129619598388671875, 0.026057668030261993408203125, 3.0));
+                        }
+                        _2844 = _2823;
+                    }
+                    _2852 = _2844;
+                }
+                float _2855 = _2479.z;
+                float _2858 = log((_2855 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2855);
+                float _2859 = _2858 * 0.4342944622039794921875;
+                float _2936;
+                if (_2859 <= (-3.84832763671875))
+                {
+                    _2936 = fma(_2858, 1.3028833866119384765625, fma(8.86110210418701171875, 1.3028833866119384765625, -4.0));
+                }
+                else
+                {
+                    float _2928;
+                    if ((_2859 > (-3.84832763671875)) && (_2859 < 0.68124115467071533203125))
+                    {
+                        float _2911 = (7.0 * fma(_2858, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875);
+                        int _2912 = int(_2911);
+                        float _2914 = _2911 - float(_2912);
+                        _2928 = dot(float3(_2914 * _2914, _2914, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_479[_2912], _479[_2912 + 1], _479[_2912 + 2]));
+                    }
+                    else
+                    {
+                        float _2907;
+                        if ((_2859 >= 0.68124115467071533203125) && (_2859 < 3.65370273590087890625))
+                        {
+                            float _2890 = (7.0 * fma(_2858, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.4129619598388671875, 0.4342944622039794921875, -0.68124115467071533203125);
+                            int _2891 = int(_2890);
+                            float _2893 = _2890 - float(_2891);
+                            _2907 = dot(float3(_2893 * _2893, _2893, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_480[_2891], _480[_2891 + 1], _480[_2891 + 2]));
+                        }
+                        else
+                        {
+                            _2907 = fma(_2858, 0.026057668030261993408203125, fma(-8.4129619598388671875, 0.026057668030261993408203125, 3.0));
+                        }
+                        _2928 = _2907;
+                    }
+                    _2936 = _2928;
+                }
+                float3 _2942 = pow(((float3(pow(10.0, _2768), pow(10.0, _2852), pow(10.0, _2936)) - float3(3.5073844628641381859779357910156e-05)) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
+                _2950 = pow(fma(float3(18.8515625), _2942, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _2942, float3(1.0))), float3(78.84375));
+            }
+            else
+            {
+                float3 _2097;
+                if ((_Globals.OutputDevice == 4u) || (_Globals.OutputDevice == 6u))
+                {
+                    float3 _1263 = (_906 * float3(1.5)) * (_546 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625)));
+                    float _1264 = _1263.x;
+                    float _1265 = _1263.y;
+                    float _1267 = _1263.z;
+                    float _1270 = fast::max(fast::max(_1264, _1265), _1267);
+                    float _1275 = (fast::max(_1270, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_1264, _1265), _1267), 1.0000000133514319600180897396058e-10)) / fast::max(_1270, 0.00999999977648258209228515625);
+                    float _1288 = fma(1.75, sqrt(fma(_1264, _1264 - _1267, fma(_1267, _1267 - _1265, _1265 * (_1265 - _1264)))), (_1267 + _1265) + _1264);
+                    float _1289 = _1288 * 0.3333333432674407958984375;
+                    float _1290 = _1275 - 0.4000000059604644775390625;
+                    float _1295 = fast::max(1.0 - abs(_1290 * 2.5), 0.0);
+                    float _1303 = fma(float(int(sign(_1290 * 5.0))), fma(-_1295, _1295, 1.0), 1.0) * 0.02500000037252902984619140625;
+                    float _1316;
+                    if (_1289 <= 0.053333334624767303466796875)
+                    {
+                        _1316 = _1303;
+                    }
+                    else
+                    {
+                        float _1315;
+                        if (_1289 >= 0.1599999964237213134765625)
+                        {
+                            _1315 = 0.0;
+                        }
+                        else
+                        {
+                            _1315 = _1303 * ((0.23999999463558197021484375 / _1288) - 0.5);
+                        }
+                        _1316 = _1315;
+                    }
+                    float3 _1319 = _1263 * float3(1.0 + _1316);
+                    float _1320 = _1319.x;
+                    float _1321 = _1319.y;
+                    float _1323 = _1319.z;
+                    float _1337;
+                    if ((_1320 == _1321) && (_1321 == _1323))
+                    {
+                        _1337 = 0.0;
+                    }
+                    else
+                    {
+                        _1337 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_1321 - _1323), fma(2.0, _1320, -_1321) - _1323);
+                    }
+                    float _1342;
+                    if (_1337 < 0.0)
+                    {
+                        _1342 = _1337 + 360.0;
+                    }
+                    else
+                    {
+                        _1342 = _1337;
+                    }
+                    float _1343 = fast::clamp(_1342, 0.0, 360.0);
+                    float _1348;
+                    if (_1343 > 180.0)
+                    {
+                        _1348 = _1343 - 360.0;
+                    }
+                    else
+                    {
+                        _1348 = _1343;
+                    }
+                    float _1398;
+                    if ((_1348 > (-67.5)) && (_1348 < 67.5))
+                    {
+                        float _1354 = _1348 - (-67.5);
+                        int _1356 = int(_1354 * 0.0296296291053295135498046875);
+                        float _1358 = fma(_1354, 0.0296296291053295135498046875, -float(_1356));
+                        float _1359 = _1358 * _1358;
+                        float _1360 = _1359 * _1358;
+                        float _1397;
+                        if (_1356 == 3)
+                        {
+                            _1397 = fma(_1358, -0.5, fma(_1360, -0.16666667163372039794921875, _1359 * 0.5)) + 0.16666667163372039794921875;
+                        }
+                        else
+                        {
+                            float _1390;
+                            if (_1356 == 2)
+                            {
+                                _1390 = fma(_1360, 0.5, _1359 * (-1.0)) + 0.666666686534881591796875;
+                            }
+                            else
+                            {
+                                float _1385;
+                                if (_1356 == 1)
+                                {
+                                    _1385 = fma(_1358, 0.5, fma(_1360, -0.5, _1359 * 0.5)) + 0.16666667163372039794921875;
+                                }
+                                else
+                                {
+                                    float _1378;
+                                    if (_1356 == 0)
+                                    {
+                                        _1378 = _1360 * 0.16666667163372039794921875;
+                                    }
+                                    else
+                                    {
+                                        _1378 = 0.0;
+                                    }
+                                    _1385 = _1378;
+                                }
+                                _1390 = _1385;
+                            }
+                            _1397 = _1390;
+                        }
+                        _1398 = _1397;
+                    }
+                    else
+                    {
+                        _1398 = 0.0;
+                    }
+                    _1319.x = fma(((_1398 * 1.5) * _1275) * (0.02999999932944774627685546875 - _1320), 0.180000007152557373046875, _1320);
+                    float3 _1408 = fast::clamp(fast::clamp(_1319, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0));
+                    float3 _1411 = mix(float3(dot(_1408, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1408, float3(0.959999978542327880859375));
+                    float _1412 = _1411.x;
+                    float _1420 = log((_1412 <= 0.0) ? 6.103515625e-05 : _1412);
+                    float _1421 = _1420 * 0.4342944622039794921875;
+                    float _1490;
+                    if (_1421 <= (-5.2601776123046875))
+                    {
+                        _1490 = -4.0;
+                    }
+                    else
+                    {
+                        float _1487;
+                        if ((_1421 > (-5.2601776123046875)) && (_1421 < (-0.744727432727813720703125)))
+                        {
+                            float _1467 = fma(_1420, 0.4342944622039794921875, 5.2601776123046875);
+                            int _1471 = int(_1467 * 0.6643855571746826171875);
+                            float _1473 = fma(_1467, 0.6643855571746826171875, -float(_1471));
+                            _1487 = dot(float3(_1473 * _1473, _1473, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1471], _475[_1471 + 1], _475[_1471 + 2]));
+                        }
+                        else
+                        {
+                            float _1466;
+                            if ((_1421 >= (-0.744727432727813720703125)) && (_1421 < 4.673812389373779296875))
+                            {
+                                float _1446 = fma(_1420, 0.4342944622039794921875, 0.744727432727813720703125);
+                                int _1450 = int(_1446 * 0.55365467071533203125);
+                                float _1452 = fma(_1446, 0.55365467071533203125, -float(_1450));
+                                _1466 = dot(float3(_1452 * _1452, _1452, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1450], _476[_1450 + 1], _476[_1450 + 2]));
+                            }
+                            else
+                            {
+                                _1466 = 4.0;
+                            }
+                            _1487 = _1466;
+                        }
+                        _1490 = _1487;
+                    }
+                    float _1493 = _1411.y;
+                    float _1496 = log((_1493 <= 0.0) ? 6.103515625e-05 : _1493);
+                    float _1497 = _1496 * 0.4342944622039794921875;
+                    float _1564;
+                    if (_1497 <= (-5.2601776123046875))
+                    {
+                        _1564 = -4.0;
+                    }
+                    else
+                    {
+                        float _1561;
+                        if ((_1497 > (-5.2601776123046875)) && (_1497 < (-0.744727432727813720703125)))
+                        {
+                            float _1541 = fma(_1496, 0.4342944622039794921875, 5.2601776123046875);
+                            int _1545 = int(_1541 * 0.6643855571746826171875);
+                            float _1547 = fma(_1541, 0.6643855571746826171875, -float(_1545));
+                            _1561 = dot(float3(_1547 * _1547, _1547, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1545], _475[_1545 + 1], _475[_1545 + 2]));
+                        }
+                        else
+                        {
+                            float _1540;
+                            if ((_1497 >= (-0.744727432727813720703125)) && (_1497 < 4.673812389373779296875))
+                            {
+                                float _1520 = fma(_1496, 0.4342944622039794921875, 0.744727432727813720703125);
+                                int _1524 = int(_1520 * 0.55365467071533203125);
+                                float _1526 = fma(_1520, 0.55365467071533203125, -float(_1524));
+                                _1540 = dot(float3(_1526 * _1526, _1526, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1524], _476[_1524 + 1], _476[_1524 + 2]));
+                            }
+                            else
+                            {
+                                _1540 = 4.0;
+                            }
+                            _1561 = _1540;
+                        }
+                        _1564 = _1561;
+                    }
+                    float _1567 = _1411.z;
+                    float _1570 = log((_1567 <= 0.0) ? 6.103515625e-05 : _1567);
+                    float _1571 = _1570 * 0.4342944622039794921875;
+                    float _1638;
+                    if (_1571 <= (-5.2601776123046875))
+                    {
+                        _1638 = -4.0;
+                    }
+                    else
+                    {
+                        float _1635;
+                        if ((_1571 > (-5.2601776123046875)) && (_1571 < (-0.744727432727813720703125)))
+                        {
+                            float _1615 = fma(_1570, 0.4342944622039794921875, 5.2601776123046875);
+                            int _1619 = int(_1615 * 0.6643855571746826171875);
+                            float _1621 = fma(_1615, 0.6643855571746826171875, -float(_1619));
+                            _1635 = dot(float3(_1621 * _1621, _1621, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1619], _475[_1619 + 1], _475[_1619 + 2]));
+                        }
+                        else
+                        {
+                            float _1614;
+                            if ((_1571 >= (-0.744727432727813720703125)) && (_1571 < 4.673812389373779296875))
+                            {
+                                float _1594 = fma(_1570, 0.4342944622039794921875, 0.744727432727813720703125);
+                                int _1598 = int(_1594 * 0.55365467071533203125);
+                                float _1600 = fma(_1594, 0.55365467071533203125, -float(_1598));
+                                _1614 = dot(float3(_1600 * _1600, _1600, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1598], _476[_1598 + 1], _476[_1598 + 2]));
+                            }
+                            else
+                            {
+                                _1614 = 4.0;
+                            }
+                            _1635 = _1614;
+                        }
+                        _1638 = _1635;
+                    }
+                    float3 _1642 = (float3(pow(10.0, _1490), pow(10.0, _1564), pow(10.0, _1638)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375));
+                    float _1848 = _1642.x;
+                    float _1851 = log((_1848 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1848);
+                    float _1852 = _1851 * 0.4342944622039794921875;
+                    float _1926;
+                    if (_1852 <= (-3.84832763671875))
+                    {
+                        _1926 = -2.3010299205780029296875;
+                    }
+                    else
+                    {
+                        float _1923;
+                        if ((_1852 > (-3.84832763671875)) && (_1852 < 0.68124115467071533203125))
+                        {
+                            float _1906 = (7.0 * fma(_1851, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875);
+                            int _1907 = int(_1906);
+                            float _1909 = _1906 - float(_1907);
+                            _1923 = dot(float3(_1909 * _1909, _1909, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_482[_1907], _482[_1907 + 1], _482[_1907 + 2]));
+                        }
+                        else
+                        {
+                            float _1902;
+                            if ((_1852 >= 0.68124115467071533203125) && (_1852 < 3.761315822601318359375))
+                            {
+                                float _1885 = (7.0 * fma(_1851, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.66075038909912109375, 0.4342944622039794921875, -0.68124115467071533203125);
+                                int _1886 = int(_1885);
+                                float _1888 = _1885 - float(_1886);
+                                _1902 = dot(float3(_1888 * _1888, _1888, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_483[_1886], _483[_1886 + 1], _483[_1886 + 2]));
+                            }
+                            else
+                            {
+                                _1902 = fma(_1851, 0.05211533606052398681640625, fma(-8.66075038909912109375, 0.05211533606052398681640625, 3.3010299205780029296875));
+                            }
+                            _1923 = _1902;
+                        }
+                        _1926 = _1923;
+                    }
+                    float _1929 = _1642.y;
+                    float _1932 = log((_1929 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1929);
+                    float _1933 = _1932 * 0.4342944622039794921875;
+                    float _2005;
+                    if (_1933 <= (-3.84832763671875))
+                    {
+                        _2005 = -2.3010299205780029296875;
+                    }
+                    else
+                    {
+                        float _2002;
+                        if ((_1933 > (-3.84832763671875)) && (_1933 < 0.68124115467071533203125))
+                        {
+                            float _1985 = (7.0 * fma(_1932, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875);
+                            int _1986 = int(_1985);
+                            float _1988 = _1985 - float(_1986);
+                            _2002 = dot(float3(_1988 * _1988, _1988, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_482[_1986], _482[_1986 + 1], _482[_1986 + 2]));
+                        }
+                        else
+                        {
+                            float _1981;
+                            if ((_1933 >= 0.68124115467071533203125) && (_1933 < 3.761315822601318359375))
+                            {
+                                float _1964 = (7.0 * fma(_1932, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.66075038909912109375, 0.4342944622039794921875, -0.68124115467071533203125);
+                                int _1965 = int(_1964);
+                                float _1967 = _1964 - float(_1965);
+                                _1981 = dot(float3(_1967 * _1967, _1967, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_483[_1965], _483[_1965 + 1], _483[_1965 + 2]));
+                            }
+                            else
+                            {
+                                _1981 = fma(_1932, 0.05211533606052398681640625, fma(-8.66075038909912109375, 0.05211533606052398681640625, 3.3010299205780029296875));
+                            }
+                            _2002 = _1981;
+                        }
+                        _2005 = _2002;
+                    }
+                    float _2008 = _1642.z;
+                    float _2011 = log((_2008 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2008);
+                    float _2012 = _2011 * 0.4342944622039794921875;
+                    float _2084;
+                    if (_2012 <= (-3.84832763671875))
+                    {
+                        _2084 = -2.3010299205780029296875;
+                    }
+                    else
+                    {
+                        float _2081;
+                        if ((_2012 > (-3.84832763671875)) && (_2012 < 0.68124115467071533203125))
+                        {
+                            float _2064 = (7.0 * fma(_2011, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875);
+                            int _2065 = int(_2064);
+                            float _2067 = _2064 - float(_2065);
+                            _2081 = dot(float3(_2067 * _2067, _2067, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_482[_2065], _482[_2065 + 1], _482[_2065 + 2]));
+                        }
+                        else
+                        {
+                            float _2060;
+                            if ((_2012 >= 0.68124115467071533203125) && (_2012 < 3.761315822601318359375))
+                            {
+                                float _2043 = (7.0 * fma(_2011, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.66075038909912109375, 0.4342944622039794921875, -0.68124115467071533203125);
+                                int _2044 = int(_2043);
+                                float _2046 = _2043 - float(_2044);
+                                _2060 = dot(float3(_2046 * _2046, _2046, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_483[_2044], _483[_2044 + 1], _483[_2044 + 2]));
+                            }
+                            else
+                            {
+                                _2060 = fma(_2011, 0.05211533606052398681640625, fma(-8.66075038909912109375, 0.05211533606052398681640625, 3.3010299205780029296875));
+                            }
+                            _2081 = _2060;
+                        }
+                        _2084 = _2081;
+                    }
+                    float3 _2089 = pow((float3(pow(10.0, _1926), pow(10.0, _2005), pow(10.0, _2084)) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
+                    _2097 = pow(fma(float3(18.8515625), _2089, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _2089, float3(1.0))), float3(78.84375));
+                }
+                else
+                {
+                    float3 _1260;
+                    if (_Globals.OutputDevice == 7u)
+                    {
+                        float3 _1252 = pow(((_906 * _547) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
+                        _1260 = pow(fma(float3(18.8515625), _1252, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _1252, float3(1.0))), float3(78.84375));
+                    }
+                    else
+                    {
+                        _1260 = pow((_1218 * _547) * _576, float3(_Globals.InverseGamma.z));
+                    }
+                    _2097 = _1260;
+                }
+                _2950 = _2097;
+            }
+            _2960 = _2950;
+        }
+        _3001 = _2960;
+    }
+    float3 _3002 = _3001 * float3(0.95238101482391357421875);
+    float4 _3003 = float4(_3002.x, _3002.y, _3002.z, float4(0.0).w);
+    _3003.w = 0.0;
+    out.out_var_SV_Target0 = _3003;
+    return out;
+}
+
diff --git a/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag b/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag
new file mode 100644
index 00000000000..8b53cca3ad9
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag
@@ -0,0 +1,1121 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_Globals
+{
+    float4 MappingPolynomial;
+    float3 InverseGamma;
+    float4 ColorMatrixR_ColorCurveCd1;
+    float4 ColorMatrixG_ColorCurveCd3Cm3;
+    float4 ColorMatrixB_ColorCurveCm2;
+    float4 ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3;
+    float4 ColorCurve_Ch1_Ch2;
+    float4 ColorShadow_Luma;
+    float4 ColorShadow_Tint1;
+    float4 ColorShadow_Tint2;
+    float FilmSlope;
+    float FilmToe;
+    float FilmShoulder;
+    float FilmBlackClip;
+    float FilmWhiteClip;
+    float4 LUTWeights[5];
+    float3 ColorScale;
+    float4 OverlayColor;
+    float WhiteTemp;
+    float WhiteTint;
+    float4 ColorSaturation;
+    float4 ColorContrast;
+    float4 ColorGamma;
+    float4 ColorGain;
+    float4 ColorOffset;
+    float4 ColorSaturationShadows;
+    float4 ColorContrastShadows;
+    float4 ColorGammaShadows;
+    float4 ColorGainShadows;
+    float4 ColorOffsetShadows;
+    float4 ColorSaturationMidtones;
+    float4 ColorContrastMidtones;
+    float4 ColorGammaMidtones;
+    float4 ColorGainMidtones;
+    float4 ColorOffsetMidtones;
+    float4 ColorSaturationHighlights;
+    float4 ColorContrastHighlights;
+    float4 ColorGammaHighlights;
+    float4 ColorGainHighlights;
+    float4 ColorOffsetHighlights;
+    float ColorCorrectionShadowsMax;
+    float ColorCorrectionHighlightsMin;
+    uint OutputDevice;
+    uint OutputGamut;
+    float BlueCorrection;
+    float ExpandGamut;
+};
+
+constant spvUnsafeArray<float, 6> _499 = spvUnsafeArray<float, 6>({ -4.0, -4.0, -3.1573765277862548828125, -0.485249996185302734375, 1.84773242473602294921875, 1.84773242473602294921875 });
+constant spvUnsafeArray<float, 6> _500 = spvUnsafeArray<float, 6>({ -0.718548238277435302734375, 2.0810306072235107421875, 3.66812419891357421875, 4.0, 4.0, 4.0 });
+constant spvUnsafeArray<float, 10> _503 = spvUnsafeArray<float, 10>({ -4.97062206268310546875, -3.0293781757354736328125, -2.1261999607086181640625, -1.5104999542236328125, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 });
+constant spvUnsafeArray<float, 10> _504 = spvUnsafeArray<float, 10>({ 0.80891323089599609375, 1.19108676910400390625, 1.5683000087738037109375, 1.94830000400543212890625, 2.308300018310546875, 2.63840007781982421875, 2.85949993133544921875, 2.9872608184814453125, 3.0127391815185546875, 3.0127391815185546875 });
+constant spvUnsafeArray<float, 10> _506 = spvUnsafeArray<float, 10>({ -2.3010299205780029296875, -2.3010299205780029296875, -1.9312000274658203125, -1.5204999446868896484375, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 });
+constant spvUnsafeArray<float, 10> _507 = spvUnsafeArray<float, 10>({ 0.801995217800140380859375, 1.19800484180450439453125, 1.5943000316619873046875, 1.99730002880096435546875, 2.3782999515533447265625, 2.7683999538421630859375, 3.0515000820159912109375, 3.2746293544769287109375, 3.32743072509765625, 3.32743072509765625 });
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 in_var_TEXCOORD0 [[user(locn0), center_no_perspective]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globals [[buffer(0)]], texture2d<float> Texture1 [[texture(0)]], sampler Texture1Sampler [[sampler(0)]], uint gl_Layer [[render_target_array_index]])
+{
+    main0_out out = {};
+    float3x3 _572 = float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * float3x3(float3(1.01303005218505859375, 0.0061053098179399967193603515625, -0.014971000142395496368408203125), float3(0.0076982299797236919403076171875, 0.99816501140594482421875, -0.005032029934227466583251953125), float3(-0.0028413101099431514739990234375, 0.0046851597726345062255859375, 0.92450702190399169921875));
+    float3x3 _573 = _572 * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125));
+    float3x3 _574 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(0.98722398281097412109375, -0.0061132698319852352142333984375, 0.01595330052077770233154296875), float3(-0.007598360069096088409423828125, 1.00186002254486083984375, 0.0053300200961530208587646484375), float3(0.003072570078074932098388671875, -0.0050959498621523380279541015625, 1.0816800594329833984375));
+    float3x3 _575 = _574 * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875));
+    float3x3 _576 = float3x3(float3(0.952552378177642822265625, 0.0, 9.25), float3(0.3439664542675018310546875, 0.728166103363037109375, -0.07213254272937774658203125), float3(0.0, 0.0, 1.00882518291473388671875)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125));
+    float3x3 _577 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625));
+    float3x3 _602;
+    for (;;)
+    {
+        if (_Globals.OutputGamut == 1u)
+        {
+            _602 = _574 * float3x3(float3(2.493396282196044921875, -0.931345880031585693359375, -0.4026944935321807861328125), float3(-0.829486787319183349609375, 1.76265966892242431640625, 0.02362460084259510040283203125), float3(0.0358506999909877777099609375, -0.076182700693607330322265625, 0.957014024257659912109375));
+            break;
+        }
+        else
+        {
+            if (_Globals.OutputGamut == 2u)
+            {
+                _602 = _574 * float3x3(float3(1.71660840511322021484375, -0.3556621074676513671875, -0.253360092639923095703125), float3(-0.666682898998260498046875, 1.61647760868072509765625, 0.01576850004494190216064453125), float3(0.017642199993133544921875, -0.04277630150318145751953125, 0.94222867488861083984375));
+                break;
+            }
+            else
+            {
+                if (_Globals.OutputGamut == 3u)
+                {
+                    _602 = float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625));
+                    break;
+                }
+                else
+                {
+                    if (_Globals.OutputGamut == 4u)
+                    {
+                        _602 = float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0));
+                        break;
+                    }
+                    else
+                    {
+                        _602 = _575;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+    float3 _603 = float4((in.in_var_TEXCOORD0 - float2(0.015625)) * float2(1.03225803375244140625), float(gl_Layer) * 0.0322580635547637939453125, 0.0).xyz;
+    float3 _625;
+    if (_Globals.OutputDevice >= 3u)
+    {
+        float3 _617 = pow(_603, float3(0.0126833133399486541748046875));
+        _625 = pow(fast::max(float3(0.0), _617 - float3(0.8359375)) / fma(float3(-18.6875), _617, float3(18.8515625)), float3(6.277394771575927734375)) * float3(10000.0);
+    }
+    else
+    {
+        _625 = fma(exp2((_603 - float3(0.434017598628997802734375)) * float3(14.0)), float3(0.180000007152557373046875), float3(-0.00266771926544606685638427734375));
+    }
+    float _628 = _Globals.WhiteTemp * 1.00055634975433349609375;
+    float _642 = (_628 <= 7000.0) ? (0.24406300485134124755859375 + ((99.1100006103515625 + ((2967800.0 - (4604438528.0 / _Globals.WhiteTemp)) / _628)) / _628)) : (0.23703999817371368408203125 + ((247.4799957275390625 + ((1901800.0 - (2005284352.0 / _Globals.WhiteTemp)) / _628)) / _628));
+    float _659 = fma(1.2864121856637211749330163002014e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(0.00015411825734190642833709716796875, _Globals.WhiteTemp, 0.860117733478546142578125)) / fma(7.0814513719597016461193561553955e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(0.0008424202096648514270782470703125, _Globals.WhiteTemp, 1.0));
+    float _670 = fma(4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(4.25, _Globals.WhiteTemp, 0.317398726940155029296875)) / fma(1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp, _Globals.WhiteTemp, fma(-2.8974181986995972692966461181641e-05, _Globals.WhiteTemp, 1.0));
+    float _675 = fma(2.0, _659, _670 * (-8.0)) + 4.0;
+    float2 _679 = float2((3.0 * _659) / _675, (2.0 * _670) / _675);
+    float2 _686 = fast::normalize(float2(_659, _670));
+    float _691 = fma((-_686.y) * _Globals.WhiteTint, 0.0500000007450580596923828125, _659);
+    float _695 = fma(_686.x * _Globals.WhiteTint, 0.0500000007450580596923828125, _670);
+    float _700 = fma(2.0, _691, _695 * (-8.0)) + 4.0;
+    float2 _706 = select(float2(_642, fma(_642, fma(-3.0, _642, 2.86999988555908203125), -0.2750000059604644775390625)), _679, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _691) / _700, (2.0 * _695) / _700) - _679);
+    float _709 = fast::max(_706.y, 1.0000000133514319600180897396058e-10);
+    float3 _723 = float3(_706.x / _709, 1.0, ((1.0 - _706.x) - _706.y) / _709) * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875));
+    float3 _743 = (_625 * ((float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * ((float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)) * float3x3(float3(0.941379249095916748046875 / _723.x, 0.0, 0.0), float3(0.0, 1.04043638706207275390625 / _723.y, 0.0), float3(0.0, 0.0, 1.08976650238037109375 / _723.z))) * float3x3(float3(0.986992895603179931640625, -0.14705429971218109130859375, 0.15996269881725311279296875), float3(0.4323053061962127685546875, 0.518360316753387451171875, 0.049291200935840606689453125), float3(-0.00852870009839534759521484375, 0.0400427989661693572998046875, 0.968486726284027099609375)))) * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)))) * _573;
+    float3 _771;
+    if (_Globals.ColorShadow_Tint2.w != 0.0)
+    {
+        float _750 = dot(_743, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625));
+        float3 _753 = (_743 / float3(_750)) - float3(1.0);
+        _771 = mix(_743, _743 * (_575 * (float3x3(float3(0.544169127941131591796875, 0.23959259688854217529296875, 0.16669429838657379150390625), float3(0.23946559429168701171875, 0.702153027057647705078125, 0.058381401002407073974609375), float3(-0.0023439000360667705535888671875, 0.0361833982169628143310546875, 1.05521833896636962890625)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)))), float3((1.0 - exp2((-4.0) * dot(_753, _753))) * (1.0 - exp2((((-4.0) * _Globals.ExpandGamut) * _750) * _750))));
+    }
+    else
+    {
+        _771 = _743;
+    }
+    float _772 = dot(_771, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625));
+    float4 _777 = _Globals.ColorSaturationShadows * _Globals.ColorSaturation;
+    float4 _782 = _Globals.ColorContrastShadows * _Globals.ColorContrast;
+    float4 _787 = _Globals.ColorGammaShadows * _Globals.ColorGamma;
+    float4 _792 = _Globals.ColorGainShadows * _Globals.ColorGain;
+    float4 _797 = _Globals.ColorOffsetShadows + _Globals.ColorOffset;
+    float3 _798 = float3(_772);
+    float _830 = smoothstep(0.0, _Globals.ColorCorrectionShadowsMax, _772);
+    float4 _834 = _Globals.ColorSaturationHighlights * _Globals.ColorSaturation;
+    float4 _837 = _Globals.ColorContrastHighlights * _Globals.ColorContrast;
+    float4 _840 = _Globals.ColorGammaHighlights * _Globals.ColorGamma;
+    float4 _843 = _Globals.ColorGainHighlights * _Globals.ColorGain;
+    float4 _846 = _Globals.ColorOffsetHighlights + _Globals.ColorOffset;
+    float _878 = smoothstep(_Globals.ColorCorrectionHighlightsMin, 1.0, _772);
+    float4 _881 = _Globals.ColorSaturationMidtones * _Globals.ColorSaturation;
+    float4 _884 = _Globals.ColorContrastMidtones * _Globals.ColorContrast;
+    float4 _887 = _Globals.ColorGammaMidtones * _Globals.ColorGamma;
+    float4 _890 = _Globals.ColorGainMidtones * _Globals.ColorGain;
+    float4 _893 = _Globals.ColorOffsetMidtones + _Globals.ColorOffset;
+    float3 _931 = fma(fma(pow(pow(fast::max(float3(0.0), mix(_798, _771, _834.xyz * float3(_834.w))) * float3(5.5555553436279296875), _837.xyz * float3(_837.w)) * float3(0.180000007152557373046875), float3(1.0) / (_840.xyz * float3(_840.w))), _843.xyz * float3(_843.w), _846.xyz + float3(_846.w)), float3(_878), fma(fma(pow(pow(fast::max(float3(0.0), mix(_798, _771, _777.xyz * float3(_777.w))) * float3(5.5555553436279296875), _782.xyz * float3(_782.w)) * float3(0.180000007152557373046875), float3(1.0) / (_787.xyz * float3(_787.w))), _792.xyz * float3(_792.w), _797.xyz + float3(_797.w)), float3(1.0 - _830), fma(pow(pow(fast::max(float3(0.0), mix(_798, _771, _881.xyz * float3(_881.w))) * float3(5.5555553436279296875), _884.xyz * float3(_884.w)) * float3(0.180000007152557373046875), float3(1.0) / (_887.xyz * float3(_887.w))), _890.xyz * float3(_890.w), _893.xyz + float3(_893.w)) * float3(_830 - _878)));
+    float3 _932 = _931 * _575;
+    float3 _940 = float3(_Globals.BlueCorrection);
+    float3 _942 = mix(_931, _931 * ((_577 * float3x3(float3(0.940437257289886474609375, -0.01830687932670116424560546875, 0.07786960899829864501953125), float3(0.008378696627914905548095703125, 0.82866001129150390625, 0.162961304187774658203125), float3(0.0005471261101774871349334716796875, -0.00088337459601461887359619140625, 1.00033628940582275390625))) * _576), _940) * _577;
+    float _943 = _942.x;
+    float _944 = _942.y;
+    float _946 = _942.z;
+    float _949 = fast::max(fast::max(_943, _944), _946);
+    float _954 = (fast::max(_949, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_943, _944), _946), 1.0000000133514319600180897396058e-10)) / fast::max(_949, 0.00999999977648258209228515625);
+    float _967 = fma(1.75, sqrt(fma(_943, _943 - _946, fma(_946, _946 - _944, _944 * (_944 - _943)))), (_946 + _944) + _943);
+    float _968 = _967 * 0.3333333432674407958984375;
+    float _969 = _954 - 0.4000000059604644775390625;
+    float _974 = fast::max(1.0 - abs(_969 * 2.5), 0.0);
+    float _982 = fma(float(int(sign(_969 * 5.0))), fma(-_974, _974, 1.0), 1.0) * 0.02500000037252902984619140625;
+    float _995;
+    if (_968 <= 0.053333334624767303466796875)
+    {
+        _995 = _982;
+    }
+    else
+    {
+        float _994;
+        if (_968 >= 0.1599999964237213134765625)
+        {
+            _994 = 0.0;
+        }
+        else
+        {
+            _994 = _982 * ((0.23999999463558197021484375 / _967) - 0.5);
+        }
+        _995 = _994;
+    }
+    float3 _998 = _942 * float3(1.0 + _995);
+    float _999 = _998.x;
+    float _1000 = _998.y;
+    float _1002 = _998.z;
+    float _1016;
+    if ((_999 == _1000) && (_1000 == _1002))
+    {
+        _1016 = 0.0;
+    }
+    else
+    {
+        _1016 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_1000 - _1002), fma(2.0, _999, -_1000) - _1002);
+    }
+    float _1021;
+    if (_1016 < 0.0)
+    {
+        _1021 = _1016 + 360.0;
+    }
+    else
+    {
+        _1021 = _1016;
+    }
+    float _1022 = fast::clamp(_1021, 0.0, 360.0);
+    float _1027;
+    if (_1022 > 180.0)
+    {
+        _1027 = _1022 - 360.0;
+    }
+    else
+    {
+        _1027 = _1022;
+    }
+    float _1031 = smoothstep(0.0, 1.0, 1.0 - abs(_1027 * 0.01481481455266475677490234375));
+    _998.x = fma(((_1031 * _1031) * _954) * (0.02999999932944774627685546875 - _999), 0.180000007152557373046875, _999);
+    float3 _1040 = fast::max(float3(0.0), _998 * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)));
+    float _1049 = (1.0 + _Globals.FilmBlackClip) - _Globals.FilmToe;
+    float _1052 = 1.0 + _Globals.FilmWhiteClip;
+    float _1055 = _1052 - _Globals.FilmShoulder;
+    float _1082;
+    if (_Globals.FilmToe > 0.800000011920928955078125)
+    {
+        _1082 = ((0.819999992847442626953125 - _Globals.FilmToe) / _Globals.FilmSlope) + (-0.744727432727813720703125);
+    }
+    else
+    {
+        float _1061 = (0.180000007152557373046875 + _Globals.FilmBlackClip) / _1049;
+        _1082 = fma(log(_1061 / (2.0 - _1061)) * (-0.5), _1049 / _Globals.FilmSlope, -0.744727432727813720703125);
+    }
+    float _1087 = ((1.0 - _Globals.FilmToe) / _Globals.FilmSlope) - _1082;
+    float _1089 = (_Globals.FilmShoulder / _Globals.FilmSlope) - _1087;
+    float3 _1090 = log(mix(float3(dot(_1040, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1040, float3(0.959999978542327880859375)));
+    float3 _1093 = _1090 * float3(0.4342944622039794921875);
+    float3 _1097 = float3(_Globals.FilmSlope) * fma(_1090, float3(0.4342944622039794921875), float3(_1087));
+    float3 _1105 = float3(_1082);
+    float3 _1106 = fma(_1090, float3(0.4342944622039794921875), -_1105);
+    float3 _1118 = float3(_1089);
+    float3 _1132 = fast::clamp(_1106 / float3(_1089 - _1082), float3(0.0), float3(1.0));
+    float3 _1136 = select(_1132, float3(1.0) - _1132, bool3(_1089 < _1082));
+    float3 _1141 = mix(select(_1097, float3(-_Globals.FilmBlackClip) + (float3(2.0 * _1049) / (float3(1.0) + exp(float3(((-2.0) * _Globals.FilmSlope) / _1049) * _1106))), _1093 < _1105), select(_1097, float3(_1052) - (float3(2.0 * _1055) / (float3(1.0) + exp(float3((2.0 * _Globals.FilmSlope) / _1055) * fma(_1090, float3(0.4342944622039794921875), -_1118)))), _1093 > _1118), (fma(float3(-2.0), _1136, float3(3.0)) * _1136) * _1136);
+    float3 _1145 = fast::max(float3(0.0), mix(float3(dot(_1141, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1141, float3(0.930000007152557373046875)));
+    float3 _1215;
+    if (_Globals.ColorShadow_Tint2.w == 0.0)
+    {
+        float3 _1183 = fast::max(float3(0.0), float3(dot(_932, _Globals.ColorMatrixR_ColorCurveCd1.xyz), dot(_932, _Globals.ColorMatrixG_ColorCurveCd3Cm3.xyz), dot(_932, _Globals.ColorMatrixB_ColorCurveCm2.xyz)) * fma(_Globals.ColorShadow_Tint2.xyz, float3(1.0 / (dot(_932, _Globals.ColorShadow_Luma.xyz) + 1.0)), _Globals.ColorShadow_Tint1.xyz));
+        float3 _1188 = fast::max(float3(0.0), _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx - _1183);
+        float3 _1190 = fast::max(_1183, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz);
+        _1215 = fma(fma(_1190, _Globals.ColorCurve_Ch1_Ch2.xxx, _Globals.ColorCurve_Ch1_Ch2.yyy), float3(1.0) / (_1190 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.www), fma(fast::clamp(_1183, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz), _Globals.ColorMatrixB_ColorCurveCm2.www, fma(_1188 * _Globals.ColorMatrixR_ColorCurveCd1.www, float3(1.0) / (_1188 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.yyy), _Globals.ColorMatrixG_ColorCurveCd3Cm3.www))) - float3(0.00200000009499490261077880859375);
+    }
+    else
+    {
+        _1215 = fast::max(float3(0.0), mix(_1145, _1145 * ((_577 * float3x3(float3(1.06317996978759765625, 0.02339559979736804962158203125, -0.08657260239124298095703125), float3(-0.010633699595928192138671875, 1.2063200473785400390625, -0.1956900060176849365234375), float3(-0.0005908869788981974124908447265625, 0.00105247995816171169281005859375, 0.999538004398345947265625))) * _576), _940) * _575);
+    }
+    float3 _1216 = fast::clamp(_1215, float3(0.0), float3(1.0));
+    float _1217 = _1216.x;
+    float _1229;
+    for (;;)
+    {
+        if (_1217 < 0.00313066993840038776397705078125)
+        {
+            _1229 = _1217 * 12.9200000762939453125;
+            break;
+        }
+        _1229 = fma(pow(_1217, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
+        break;
+    }
+    float _1230 = _1216.y;
+    float _1242;
+    for (;;)
+    {
+        if (_1230 < 0.00313066993840038776397705078125)
+        {
+            _1242 = _1230 * 12.9200000762939453125;
+            break;
+        }
+        _1242 = fma(pow(_1230, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
+        break;
+    }
+    float _1243 = _1216.z;
+    float _1255;
+    for (;;)
+    {
+        if (_1243 < 0.00313066993840038776397705078125)
+        {
+            _1255 = _1243 * 12.9200000762939453125;
+            break;
+        }
+        _1255 = fma(pow(_1243, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
+        break;
+    }
+    float3 _1256 = float3(_1229, _1242, _1255);
+    float3 _1258 = fma(_1256, float3(0.9375), float3(0.03125));
+    float _1270 = fma(_1258.z, 16.0, -0.5);
+    float _1271 = floor(_1270);
+    float _1274 = _1258.x + _1271;
+    float _1276 = _1258.y;
+    float4 _1279 = Texture1.sample(Texture1Sampler, float2(_1274 * 0.0625, _1276));
+    float4 _1283 = Texture1.sample(Texture1Sampler, float2(fma(_1274, 0.0625, 0.0625), _1276));
+    float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), fma(float3(_Globals.LUTWeights[0].x), _1256, float3(_Globals.LUTWeights[1].x) * mix(_1279, _1283, float4(_1270 - _1271)).xyz));
+    float3 _1295 = select(_1289 * float3(0.077399380505084991455078125), pow(fma(_1289, float3(0.94786727428436279296875), float3(0.0521326996386051177978515625)), float3(2.400000095367431640625)), _1289 > float3(0.040449999272823333740234375));
+    float3 _1324 = pow(fast::max(float3(0.0), mix((fma(float3(_Globals.MappingPolynomial.x), _1295 * _1295, float3(_Globals.MappingPolynomial.y) * _1295) + float3(_Globals.MappingPolynomial.z)) * _Globals.ColorScale, _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y));
+    float3 _3103;
+    if (_Globals.OutputDevice == 0u)
+    {
+        float _3063 = _1324.x;
+        float _3075;
+        for (;;)
+        {
+            if (_3063 < 0.00313066993840038776397705078125)
+            {
+                _3075 = _3063 * 12.9200000762939453125;
+                break;
+            }
+            _3075 = fma(pow(_3063, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
+            break;
+        }
+        float _3076 = _1324.y;
+        float _3088;
+        for (;;)
+        {
+            if (_3076 < 0.00313066993840038776397705078125)
+            {
+                _3088 = _3076 * 12.9200000762939453125;
+                break;
+            }
+            _3088 = fma(pow(_3076, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
+            break;
+        }
+        float _3089 = _1324.z;
+        float _3101;
+        for (;;)
+        {
+            if (_3089 < 0.00313066993840038776397705078125)
+            {
+                _3101 = _3089 * 12.9200000762939453125;
+                break;
+            }
+            _3101 = fma(pow(_3089, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
+            break;
+        }
+        _3103 = float3(_3075, _3088, _3101);
+    }
+    else
+    {
+        float3 _3062;
+        if (_Globals.OutputDevice == 1u)
+        {
+            float3 _3055 = fast::max(float3(6.1035199905745685100555419921875e-05), (_1324 * _573) * _602);
+            _3062 = fast::min(_3055 * float3(4.5), fma(pow(fast::max(_3055, float3(0.017999999225139617919921875)), float3(0.449999988079071044921875)), float3(1.09899997711181640625), float3(-0.098999999463558197021484375)));
+        }
+        else
+        {
+            float3 _3052;
+            if ((_Globals.OutputDevice == 3u) || (_Globals.OutputDevice == 5u))
+            {
+                float3 _2204 = (_932 * float3(1.5)) * (_572 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625)));
+                float _2205 = _2204.x;
+                float _2206 = _2204.y;
+                float _2208 = _2204.z;
+                float _2211 = fast::max(fast::max(_2205, _2206), _2208);
+                float _2216 = (fast::max(_2211, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_2205, _2206), _2208), 1.0000000133514319600180897396058e-10)) / fast::max(_2211, 0.00999999977648258209228515625);
+                float _2229 = fma(1.75, sqrt(fma(_2205, _2205 - _2208, fma(_2208, _2208 - _2206, _2206 * (_2206 - _2205)))), (_2208 + _2206) + _2205);
+                float _2230 = _2229 * 0.3333333432674407958984375;
+                float _2231 = _2216 - 0.4000000059604644775390625;
+                float _2236 = fast::max(1.0 - abs(_2231 * 2.5), 0.0);
+                float _2244 = fma(float(int(sign(_2231 * 5.0))), fma(-_2236, _2236, 1.0), 1.0) * 0.02500000037252902984619140625;
+                float _2257;
+                if (_2230 <= 0.053333334624767303466796875)
+                {
+                    _2257 = _2244;
+                }
+                else
+                {
+                    float _2256;
+                    if (_2230 >= 0.1599999964237213134765625)
+                    {
+                        _2256 = 0.0;
+                    }
+                    else
+                    {
+                        _2256 = _2244 * ((0.23999999463558197021484375 / _2229) - 0.5);
+                    }
+                    _2257 = _2256;
+                }
+                float3 _2260 = _2204 * float3(1.0 + _2257);
+                float _2261 = _2260.x;
+                float _2262 = _2260.y;
+                float _2264 = _2260.z;
+                float _2278;
+                if ((_2261 == _2262) && (_2262 == _2264))
+                {
+                    _2278 = 0.0;
+                }
+                else
+                {
+                    _2278 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_2262 - _2264), fma(2.0, _2261, -_2262) - _2264);
+                }
+                float _2283;
+                if (_2278 < 0.0)
+                {
+                    _2283 = _2278 + 360.0;
+                }
+                else
+                {
+                    _2283 = _2278;
+                }
+                float _2284 = fast::clamp(_2283, 0.0, 360.0);
+                float _2289;
+                if (_2284 > 180.0)
+                {
+                    _2289 = _2284 - 360.0;
+                }
+                else
+                {
+                    _2289 = _2284;
+                }
+                float _2339;
+                if ((_2289 > (-67.5)) && (_2289 < 67.5))
+                {
+                    float _2295 = _2289 - (-67.5);
+                    int _2297 = int(_2295 * 0.0296296291053295135498046875);
+                    float _2299 = fma(_2295, 0.0296296291053295135498046875, -float(_2297));
+                    float _2300 = _2299 * _2299;
+                    float _2301 = _2300 * _2299;
+                    float _2338;
+                    if (_2297 == 3)
+                    {
+                        _2338 = fma(_2299, -0.5, fma(_2301, -0.16666667163372039794921875, _2300 * 0.5)) + 0.16666667163372039794921875;
+                    }
+                    else
+                    {
+                        float _2331;
+                        if (_2297 == 2)
+                        {
+                            _2331 = fma(_2301, 0.5, _2300 * (-1.0)) + 0.666666686534881591796875;
+                        }
+                        else
+                        {
+                            float _2326;
+                            if (_2297 == 1)
+                            {
+                                _2326 = fma(_2299, 0.5, fma(_2301, -0.5, _2300 * 0.5)) + 0.16666667163372039794921875;
+                            }
+                            else
+                            {
+                                float _2319;
+                                if (_2297 == 0)
+                                {
+                                    _2319 = _2301 * 0.16666667163372039794921875;
+                                }
+                                else
+                                {
+                                    _2319 = 0.0;
+                                }
+                                _2326 = _2319;
+                            }
+                            _2331 = _2326;
+                        }
+                        _2338 = _2331;
+                    }
+                    _2339 = _2338;
+                }
+                else
+                {
+                    _2339 = 0.0;
+                }
+                _2260.x = fma(((_2339 * 1.5) * _2216) * (0.02999999932944774627685546875 - _2261), 0.180000007152557373046875, _2261);
+                float3 _2349 = fast::clamp(fast::clamp(_2260, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0));
+                float3 _2352 = mix(float3(dot(_2349, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _2349, float3(0.959999978542327880859375));
+                float _2353 = _2352.x;
+                float _2361 = log((_2353 <= 0.0) ? 6.103515625e-05 : _2353);
+                float _2362 = _2361 * 0.4342944622039794921875;
+                float _2431;
+                if (_2362 <= (-5.2601776123046875))
+                {
+                    _2431 = -4.0;
+                }
+                else
+                {
+                    float _2428;
+                    if ((_2362 > (-5.2601776123046875)) && (_2362 < (-0.744727432727813720703125)))
+                    {
+                        float _2408 = fma(_2361, 0.4342944622039794921875, 5.2601776123046875);
+                        int _2412 = int(_2408 * 0.6643855571746826171875);
+                        float _2414 = fma(_2408, 0.6643855571746826171875, -float(_2412));
+                        _2428 = dot(float3(_2414 * _2414, _2414, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2412], _499[_2412 + 1], _499[_2412 + 2]));
+                    }
+                    else
+                    {
+                        float _2407;
+                        if ((_2362 >= (-0.744727432727813720703125)) && (_2362 < 4.673812389373779296875))
+                        {
+                            float _2387 = fma(_2361, 0.4342944622039794921875, 0.744727432727813720703125);
+                            int _2391 = int(_2387 * 0.55365467071533203125);
+                            float _2393 = fma(_2387, 0.55365467071533203125, -float(_2391));
+                            _2407 = dot(float3(_2393 * _2393, _2393, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2391], _500[_2391 + 1], _500[_2391 + 2]));
+                        }
+                        else
+                        {
+                            _2407 = 4.0;
+                        }
+                        _2428 = _2407;
+                    }
+                    _2431 = _2428;
+                }
+                float _2434 = _2352.y;
+                float _2437 = log((_2434 <= 0.0) ? 6.103515625e-05 : _2434);
+                float _2438 = _2437 * 0.4342944622039794921875;
+                float _2505;
+                if (_2438 <= (-5.2601776123046875))
+                {
+                    _2505 = -4.0;
+                }
+                else
+                {
+                    float _2502;
+                    if ((_2438 > (-5.2601776123046875)) && (_2438 < (-0.744727432727813720703125)))
+                    {
+                        float _2482 = fma(_2437, 0.4342944622039794921875, 5.2601776123046875);
+                        int _2486 = int(_2482 * 0.6643855571746826171875);
+                        float _2488 = fma(_2482, 0.6643855571746826171875, -float(_2486));
+                        _2502 = dot(float3(_2488 * _2488, _2488, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2486], _499[_2486 + 1], _499[_2486 + 2]));
+                    }
+                    else
+                    {
+                        float _2481;
+                        if ((_2438 >= (-0.744727432727813720703125)) && (_2438 < 4.673812389373779296875))
+                        {
+                            float _2461 = fma(_2437, 0.4342944622039794921875, 0.744727432727813720703125);
+                            int _2465 = int(_2461 * 0.55365467071533203125);
+                            float _2467 = fma(_2461, 0.55365467071533203125, -float(_2465));
+                            _2481 = dot(float3(_2467 * _2467, _2467, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2465], _500[_2465 + 1], _500[_2465 + 2]));
+                        }
+                        else
+                        {
+                            _2481 = 4.0;
+                        }
+                        _2502 = _2481;
+                    }
+                    _2505 = _2502;
+                }
+                float _2508 = _2352.z;
+                float _2511 = log((_2508 <= 0.0) ? 6.103515625e-05 : _2508);
+                float _2512 = _2511 * 0.4342944622039794921875;
+                float _2579;
+                if (_2512 <= (-5.2601776123046875))
+                {
+                    _2579 = -4.0;
+                }
+                else
+                {
+                    float _2576;
+                    if ((_2512 > (-5.2601776123046875)) && (_2512 < (-0.744727432727813720703125)))
+                    {
+                        float _2556 = fma(_2511, 0.4342944622039794921875, 5.2601776123046875);
+                        int _2560 = int(_2556 * 0.6643855571746826171875);
+                        float _2562 = fma(_2556, 0.6643855571746826171875, -float(_2560));
+                        _2576 = dot(float3(_2562 * _2562, _2562, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2560], _499[_2560 + 1], _499[_2560 + 2]));
+                    }
+                    else
+                    {
+                        float _2555;
+                        if ((_2512 >= (-0.744727432727813720703125)) && (_2512 < 4.673812389373779296875))
+                        {
+                            float _2535 = fma(_2511, 0.4342944622039794921875, 0.744727432727813720703125);
+                            int _2539 = int(_2535 * 0.55365467071533203125);
+                            float _2541 = fma(_2535, 0.55365467071533203125, -float(_2539));
+                            _2555 = dot(float3(_2541 * _2541, _2541, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2539], _500[_2539 + 1], _500[_2539 + 2]));
+                        }
+                        else
+                        {
+                            _2555 = 4.0;
+                        }
+                        _2576 = _2555;
+                    }
+                    _2579 = _2576;
+                }
+                float3 _2583 = (float3(pow(10.0, _2431), pow(10.0, _2505), pow(10.0, _2579)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375));
+                float _2787 = _2583.x;
+                float _2790 = log((_2787 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2787);
+                float _2791 = _2790 * 0.4342944622039794921875;
+                float _2870;
+                if (_2791 <= (-3.84832763671875))
+                {
+                    _2870 = fma(_2790, 1.3028833866119384765625, fma(8.86110210418701171875, 1.3028833866119384765625, -4.0));
+                }
+                else
+                {
+                    float _2862;
+                    if ((_2791 > (-3.84832763671875)) && (_2791 < 0.68124115467071533203125))
+                    {
+                        float _2845 = (7.0 * fma(_2790, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875);
+                        int _2846 = int(_2845);
+                        float _2848 = _2845 - float(_2846);
+                        _2862 = dot(float3(_2848 * _2848, _2848, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_503[_2846], _503[_2846 + 1], _503[_2846 + 2]));
+                    }
+                    else
+                    {
+                        float _2841;
+                        if ((_2791 >= 0.68124115467071533203125) && (_2791 < 3.65370273590087890625))
+                        {
+                            float _2824 = (7.0 * fma(_2790, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.4129619598388671875, 0.4342944622039794921875, -0.68124115467071533203125);
+                            int _2825 = int(_2824);
+                            float _2827 = _2824 - float(_2825);
+                            _2841 = dot(float3(_2827 * _2827, _2827, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_504[_2825], _504[_2825 + 1], _504[_2825 + 2]));
+                        }
+                        else
+                        {
+                            _2841 = fma(_2790, 0.026057668030261993408203125, fma(-8.4129619598388671875, 0.026057668030261993408203125, 3.0));
+                        }
+                        _2862 = _2841;
+                    }
+                    _2870 = _2862;
+                }
+                float _2873 = _2583.y;
+                float _2876 = log((_2873 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2873);
+                float _2877 = _2876 * 0.4342944622039794921875;
+                float _2954;
+                if (_2877 <= (-3.84832763671875))
+                {
+                    _2954 = fma(_2876, 1.3028833866119384765625, fma(8.86110210418701171875, 1.3028833866119384765625, -4.0));
+                }
+                else
+                {
+                    float _2946;
+                    if ((_2877 > (-3.84832763671875)) && (_2877 < 0.68124115467071533203125))
+                    {
+                        float _2929 = (7.0 * fma(_2876, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875);
+                        int _2930 = int(_2929);
+                        float _2932 = _2929 - float(_2930);
+                        _2946 = dot(float3(_2932 * _2932, _2932, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_503[_2930], _503[_2930 + 1], _503[_2930 + 2]));
+                    }
+                    else
+                    {
+                        float _2925;
+                        if ((_2877 >= 0.68124115467071533203125) && (_2877 < 3.65370273590087890625))
+                        {
+                            float _2908 = (7.0 * fma(_2876, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.4129619598388671875, 0.4342944622039794921875, -0.68124115467071533203125);
+                            int _2909 = int(_2908);
+                            float _2911 = _2908 - float(_2909);
+                            _2925 = dot(float3(_2911 * _2911, _2911, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_504[_2909], _504[_2909 + 1], _504[_2909 + 2]));
+                        }
+                        else
+                        {
+                            _2925 = fma(_2876, 0.026057668030261993408203125, fma(-8.4129619598388671875, 0.026057668030261993408203125, 3.0));
+                        }
+                        _2946 = _2925;
+                    }
+                    _2954 = _2946;
+                }
+                float _2957 = _2583.z;
+                float _2960 = log((_2957 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2957);
+                float _2961 = _2960 * 0.4342944622039794921875;
+                float _3038;
+                if (_2961 <= (-3.84832763671875))
+                {
+                    _3038 = fma(_2960, 1.3028833866119384765625, fma(8.86110210418701171875, 1.3028833866119384765625, -4.0));
+                }
+                else
+                {
+                    float _3030;
+                    if ((_2961 > (-3.84832763671875)) && (_2961 < 0.68124115467071533203125))
+                    {
+                        float _3013 = (7.0 * fma(_2960, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875);
+                        int _3014 = int(_3013);
+                        float _3016 = _3013 - float(_3014);
+                        _3030 = dot(float3(_3016 * _3016, _3016, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_503[_3014], _503[_3014 + 1], _503[_3014 + 2]));
+                    }
+                    else
+                    {
+                        float _3009;
+                        if ((_2961 >= 0.68124115467071533203125) && (_2961 < 3.65370273590087890625))
+                        {
+                            float _2992 = (7.0 * fma(_2960, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.4129619598388671875, 0.4342944622039794921875, -0.68124115467071533203125);
+                            int _2993 = int(_2992);
+                            float _2995 = _2992 - float(_2993);
+                            _3009 = dot(float3(_2995 * _2995, _2995, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_504[_2993], _504[_2993 + 1], _504[_2993 + 2]));
+                        }
+                        else
+                        {
+                            _3009 = fma(_2960, 0.026057668030261993408203125, fma(-8.4129619598388671875, 0.026057668030261993408203125, 3.0));
+                        }
+                        _3030 = _3009;
+                    }
+                    _3038 = _3030;
+                }
+                float3 _3044 = pow(((float3(pow(10.0, _2870), pow(10.0, _2954), pow(10.0, _3038)) - float3(3.5073844628641381859779357910156e-05)) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
+                _3052 = pow(fma(float3(18.8515625), _3044, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _3044, float3(1.0))), float3(78.84375));
+            }
+            else
+            {
+                float3 _2201;
+                if ((_Globals.OutputDevice == 4u) || (_Globals.OutputDevice == 6u))
+                {
+                    float3 _1369 = (_932 * float3(1.5)) * (_572 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625)));
+                    float _1370 = _1369.x;
+                    float _1371 = _1369.y;
+                    float _1373 = _1369.z;
+                    float _1376 = fast::max(fast::max(_1370, _1371), _1373);
+                    float _1381 = (fast::max(_1376, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_1370, _1371), _1373), 1.0000000133514319600180897396058e-10)) / fast::max(_1376, 0.00999999977648258209228515625);
+                    float _1394 = fma(1.75, sqrt(fma(_1370, _1370 - _1373, fma(_1373, _1373 - _1371, _1371 * (_1371 - _1370)))), (_1373 + _1371) + _1370);
+                    float _1395 = _1394 * 0.3333333432674407958984375;
+                    float _1396 = _1381 - 0.4000000059604644775390625;
+                    float _1401 = fast::max(1.0 - abs(_1396 * 2.5), 0.0);
+                    float _1409 = fma(float(int(sign(_1396 * 5.0))), fma(-_1401, _1401, 1.0), 1.0) * 0.02500000037252902984619140625;
+                    float _1422;
+                    if (_1395 <= 0.053333334624767303466796875)
+                    {
+                        _1422 = _1409;
+                    }
+                    else
+                    {
+                        float _1421;
+                        if (_1395 >= 0.1599999964237213134765625)
+                        {
+                            _1421 = 0.0;
+                        }
+                        else
+                        {
+                            _1421 = _1409 * ((0.23999999463558197021484375 / _1394) - 0.5);
+                        }
+                        _1422 = _1421;
+                    }
+                    float3 _1425 = _1369 * float3(1.0 + _1422);
+                    float _1426 = _1425.x;
+                    float _1427 = _1425.y;
+                    float _1429 = _1425.z;
+                    float _1443;
+                    if ((_1426 == _1427) && (_1427 == _1429))
+                    {
+                        _1443 = 0.0;
+                    }
+                    else
+                    {
+                        _1443 = 57.2957763671875 * precise::atan2(1.73205077648162841796875 * (_1427 - _1429), fma(2.0, _1426, -_1427) - _1429);
+                    }
+                    float _1448;
+                    if (_1443 < 0.0)
+                    {
+                        _1448 = _1443 + 360.0;
+                    }
+                    else
+                    {
+                        _1448 = _1443;
+                    }
+                    float _1449 = fast::clamp(_1448, 0.0, 360.0);
+                    float _1454;
+                    if (_1449 > 180.0)
+                    {
+                        _1454 = _1449 - 360.0;
+                    }
+                    else
+                    {
+                        _1454 = _1449;
+                    }
+                    float _1504;
+                    if ((_1454 > (-67.5)) && (_1454 < 67.5))
+                    {
+                        float _1460 = _1454 - (-67.5);
+                        int _1462 = int(_1460 * 0.0296296291053295135498046875);
+                        float _1464 = fma(_1460, 0.0296296291053295135498046875, -float(_1462));
+                        float _1465 = _1464 * _1464;
+                        float _1466 = _1465 * _1464;
+                        float _1503;
+                        if (_1462 == 3)
+                        {
+                            _1503 = fma(_1464, -0.5, fma(_1466, -0.16666667163372039794921875, _1465 * 0.5)) + 0.16666667163372039794921875;
+                        }
+                        else
+                        {
+                            float _1496;
+                            if (_1462 == 2)
+                            {
+                                _1496 = fma(_1466, 0.5, _1465 * (-1.0)) + 0.666666686534881591796875;
+                            }
+                            else
+                            {
+                                float _1491;
+                                if (_1462 == 1)
+                                {
+                                    _1491 = fma(_1464, 0.5, fma(_1466, -0.5, _1465 * 0.5)) + 0.16666667163372039794921875;
+                                }
+                                else
+                                {
+                                    float _1484;
+                                    if (_1462 == 0)
+                                    {
+                                        _1484 = _1466 * 0.16666667163372039794921875;
+                                    }
+                                    else
+                                    {
+                                        _1484 = 0.0;
+                                    }
+                                    _1491 = _1484;
+                                }
+                                _1496 = _1491;
+                            }
+                            _1503 = _1496;
+                        }
+                        _1504 = _1503;
+                    }
+                    else
+                    {
+                        _1504 = 0.0;
+                    }
+                    _1425.x = fma(((_1504 * 1.5) * _1381) * (0.02999999932944774627685546875 - _1426), 0.180000007152557373046875, _1426);
+                    float3 _1514 = fast::clamp(fast::clamp(_1425, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0));
+                    float3 _1517 = mix(float3(dot(_1514, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1514, float3(0.959999978542327880859375));
+                    float _1518 = _1517.x;
+                    float _1526 = log((_1518 <= 0.0) ? 6.103515625e-05 : _1518);
+                    float _1527 = _1526 * 0.4342944622039794921875;
+                    float _1596;
+                    if (_1527 <= (-5.2601776123046875))
+                    {
+                        _1596 = -4.0;
+                    }
+                    else
+                    {
+                        float _1593;
+                        if ((_1527 > (-5.2601776123046875)) && (_1527 < (-0.744727432727813720703125)))
+                        {
+                            float _1573 = fma(_1526, 0.4342944622039794921875, 5.2601776123046875);
+                            int _1577 = int(_1573 * 0.6643855571746826171875);
+                            float _1579 = fma(_1573, 0.6643855571746826171875, -float(_1577));
+                            _1593 = dot(float3(_1579 * _1579, _1579, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1577], _499[_1577 + 1], _499[_1577 + 2]));
+                        }
+                        else
+                        {
+                            float _1572;
+                            if ((_1527 >= (-0.744727432727813720703125)) && (_1527 < 4.673812389373779296875))
+                            {
+                                float _1552 = fma(_1526, 0.4342944622039794921875, 0.744727432727813720703125);
+                                int _1556 = int(_1552 * 0.55365467071533203125);
+                                float _1558 = fma(_1552, 0.55365467071533203125, -float(_1556));
+                                _1572 = dot(float3(_1558 * _1558, _1558, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1556], _500[_1556 + 1], _500[_1556 + 2]));
+                            }
+                            else
+                            {
+                                _1572 = 4.0;
+                            }
+                            _1593 = _1572;
+                        }
+                        _1596 = _1593;
+                    }
+                    float _1599 = _1517.y;
+                    float _1602 = log((_1599 <= 0.0) ? 6.103515625e-05 : _1599);
+                    float _1603 = _1602 * 0.4342944622039794921875;
+                    float _1670;
+                    if (_1603 <= (-5.2601776123046875))
+                    {
+                        _1670 = -4.0;
+                    }
+                    else
+                    {
+                        float _1667;
+                        if ((_1603 > (-5.2601776123046875)) && (_1603 < (-0.744727432727813720703125)))
+                        {
+                            float _1647 = fma(_1602, 0.4342944622039794921875, 5.2601776123046875);
+                            int _1651 = int(_1647 * 0.6643855571746826171875);
+                            float _1653 = fma(_1647, 0.6643855571746826171875, -float(_1651));
+                            _1667 = dot(float3(_1653 * _1653, _1653, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1651], _499[_1651 + 1], _499[_1651 + 2]));
+                        }
+                        else
+                        {
+                            float _1646;
+                            if ((_1603 >= (-0.744727432727813720703125)) && (_1603 < 4.673812389373779296875))
+                            {
+                                float _1626 = fma(_1602, 0.4342944622039794921875, 0.744727432727813720703125);
+                                int _1630 = int(_1626 * 0.55365467071533203125);
+                                float _1632 = fma(_1626, 0.55365467071533203125, -float(_1630));
+                                _1646 = dot(float3(_1632 * _1632, _1632, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1630], _500[_1630 + 1], _500[_1630 + 2]));
+                            }
+                            else
+                            {
+                                _1646 = 4.0;
+                            }
+                            _1667 = _1646;
+                        }
+                        _1670 = _1667;
+                    }
+                    float _1673 = _1517.z;
+                    float _1676 = log((_1673 <= 0.0) ? 6.103515625e-05 : _1673);
+                    float _1677 = _1676 * 0.4342944622039794921875;
+                    float _1744;
+                    if (_1677 <= (-5.2601776123046875))
+                    {
+                        _1744 = -4.0;
+                    }
+                    else
+                    {
+                        float _1741;
+                        if ((_1677 > (-5.2601776123046875)) && (_1677 < (-0.744727432727813720703125)))
+                        {
+                            float _1721 = fma(_1676, 0.4342944622039794921875, 5.2601776123046875);
+                            int _1725 = int(_1721 * 0.6643855571746826171875);
+                            float _1727 = fma(_1721, 0.6643855571746826171875, -float(_1725));
+                            _1741 = dot(float3(_1727 * _1727, _1727, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1725], _499[_1725 + 1], _499[_1725 + 2]));
+                        }
+                        else
+                        {
+                            float _1720;
+                            if ((_1677 >= (-0.744727432727813720703125)) && (_1677 < 4.673812389373779296875))
+                            {
+                                float _1700 = fma(_1676, 0.4342944622039794921875, 0.744727432727813720703125);
+                                int _1704 = int(_1700 * 0.55365467071533203125);
+                                float _1706 = fma(_1700, 0.55365467071533203125, -float(_1704));
+                                _1720 = dot(float3(_1706 * _1706, _1706, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1704], _500[_1704 + 1], _500[_1704 + 2]));
+                            }
+                            else
+                            {
+                                _1720 = 4.0;
+                            }
+                            _1741 = _1720;
+                        }
+                        _1744 = _1741;
+                    }
+                    float3 _1748 = (float3(pow(10.0, _1596), pow(10.0, _1670), pow(10.0, _1744)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375));
+                    float _1952 = _1748.x;
+                    float _1955 = log((_1952 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1952);
+                    float _1956 = _1955 * 0.4342944622039794921875;
+                    float _2030;
+                    if (_1956 <= (-3.84832763671875))
+                    {
+                        _2030 = -2.3010299205780029296875;
+                    }
+                    else
+                    {
+                        float _2027;
+                        if ((_1956 > (-3.84832763671875)) && (_1956 < 0.68124115467071533203125))
+                        {
+                            float _2010 = (7.0 * fma(_1955, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875);
+                            int _2011 = int(_2010);
+                            float _2013 = _2010 - float(_2011);
+                            _2027 = dot(float3(_2013 * _2013, _2013, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_506[_2011], _506[_2011 + 1], _506[_2011 + 2]));
+                        }
+                        else
+                        {
+                            float _2006;
+                            if ((_1956 >= 0.68124115467071533203125) && (_1956 < 3.761315822601318359375))
+                            {
+                                float _1989 = (7.0 * fma(_1955, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.66075038909912109375, 0.4342944622039794921875, -0.68124115467071533203125);
+                                int _1990 = int(_1989);
+                                float _1992 = _1989 - float(_1990);
+                                _2006 = dot(float3(_1992 * _1992, _1992, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_507[_1990], _507[_1990 + 1], _507[_1990 + 2]));
+                            }
+                            else
+                            {
+                                _2006 = fma(_1955, 0.05211533606052398681640625, fma(-8.66075038909912109375, 0.05211533606052398681640625, 3.3010299205780029296875));
+                            }
+                            _2027 = _2006;
+                        }
+                        _2030 = _2027;
+                    }
+                    float _2033 = _1748.y;
+                    float _2036 = log((_2033 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2033);
+                    float _2037 = _2036 * 0.4342944622039794921875;
+                    float _2109;
+                    if (_2037 <= (-3.84832763671875))
+                    {
+                        _2109 = -2.3010299205780029296875;
+                    }
+                    else
+                    {
+                        float _2106;
+                        if ((_2037 > (-3.84832763671875)) && (_2037 < 0.68124115467071533203125))
+                        {
+                            float _2089 = (7.0 * fma(_2036, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875);
+                            int _2090 = int(_2089);
+                            float _2092 = _2089 - float(_2090);
+                            _2106 = dot(float3(_2092 * _2092, _2092, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_506[_2090], _506[_2090 + 1], _506[_2090 + 2]));
+                        }
+                        else
+                        {
+                            float _2085;
+                            if ((_2037 >= 0.68124115467071533203125) && (_2037 < 3.761315822601318359375))
+                            {
+                                float _2068 = (7.0 * fma(_2036, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.66075038909912109375, 0.4342944622039794921875, -0.68124115467071533203125);
+                                int _2069 = int(_2068);
+                                float _2071 = _2068 - float(_2069);
+                                _2085 = dot(float3(_2071 * _2071, _2071, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_507[_2069], _507[_2069 + 1], _507[_2069 + 2]));
+                            }
+                            else
+                            {
+                                _2085 = fma(_2036, 0.05211533606052398681640625, fma(-8.66075038909912109375, 0.05211533606052398681640625, 3.3010299205780029296875));
+                            }
+                            _2106 = _2085;
+                        }
+                        _2109 = _2106;
+                    }
+                    float _2112 = _1748.z;
+                    float _2115 = log((_2112 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2112);
+                    float _2116 = _2115 * 0.4342944622039794921875;
+                    float _2188;
+                    if (_2116 <= (-3.84832763671875))
+                    {
+                        _2188 = -2.3010299205780029296875;
+                    }
+                    else
+                    {
+                        float _2185;
+                        if ((_2116 > (-3.84832763671875)) && (_2116 < 0.68124115467071533203125))
+                        {
+                            float _2168 = (7.0 * fma(_2115, 0.4342944622039794921875, 3.84832763671875)) / fma(1.56861579418182373046875, 0.4342944622039794921875, 3.84832763671875);
+                            int _2169 = int(_2168);
+                            float _2171 = _2168 - float(_2169);
+                            _2185 = dot(float3(_2171 * _2171, _2171, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_506[_2169], _506[_2169 + 1], _506[_2169 + 2]));
+                        }
+                        else
+                        {
+                            float _2164;
+                            if ((_2116 >= 0.68124115467071533203125) && (_2116 < 3.761315822601318359375))
+                            {
+                                float _2147 = (7.0 * fma(_2115, 0.4342944622039794921875, -0.68124115467071533203125)) / fma(8.66075038909912109375, 0.4342944622039794921875, -0.68124115467071533203125);
+                                int _2148 = int(_2147);
+                                float _2150 = _2147 - float(_2148);
+                                _2164 = dot(float3(_2150 * _2150, _2150, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_507[_2148], _507[_2148 + 1], _507[_2148 + 2]));
+                            }
+                            else
+                            {
+                                _2164 = fma(_2115, 0.05211533606052398681640625, fma(-8.66075038909912109375, 0.05211533606052398681640625, 3.3010299205780029296875));
+                            }
+                            _2185 = _2164;
+                        }
+                        _2188 = _2185;
+                    }
+                    float3 _2193 = pow((float3(pow(10.0, _2030), pow(10.0, _2109), pow(10.0, _2188)) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
+                    _2201 = pow(fma(float3(18.8515625), _2193, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _2193, float3(1.0))), float3(78.84375));
+                }
+                else
+                {
+                    float3 _1366;
+                    if (_Globals.OutputDevice == 7u)
+                    {
+                        float3 _1358 = pow(((_932 * _573) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
+                        _1366 = pow(fma(float3(18.8515625), _1358, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _1358, float3(1.0))), float3(78.84375));
+                    }
+                    else
+                    {
+                        _1366 = pow((_1324 * _573) * _602, float3(_Globals.InverseGamma.z));
+                    }
+                    _2201 = _1366;
+                }
+                _3052 = _2201;
+            }
+            _3062 = _3052;
+        }
+        _3103 = _3062;
+    }
+    float3 _3104 = _3103 * float3(0.95238101482391357421875);
+    float4 _3105 = float4(_3104.x, _3104.y, _3104.z, float4(0.0).w);
+    _3105.w = 0.0;
+    out.out_var_SV_Target0 = _3105;
+    return out;
+}
+
diff --git a/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag b/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag
new file mode 100644
index 00000000000..790ad27a1d7
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag
@@ -0,0 +1,503 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_StructuredBuffer_v4float
+{
+    float4 _m0[1];
+};
+
+struct type_TranslucentBasePass
+{
+    uint TranslucentBasePass_Shared_Forward_NumLocalLights;
+    uint TranslucentBasePass_Shared_Forward_NumReflectionCaptures;
+    uint TranslucentBasePass_Shared_Forward_HasDirectionalLight;
+    uint TranslucentBasePass_Shared_Forward_NumGridCells;
+    packed_int3 TranslucentBasePass_Shared_Forward_CulledGridSize;
+    uint TranslucentBasePass_Shared_Forward_MaxCulledLightsPerCell;
+    uint TranslucentBasePass_Shared_Forward_LightGridPixelSizeShift;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_36;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_40;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_44;
+    packed_float3 TranslucentBasePass_Shared_Forward_LightGridZParams;
+    float PrePadding_TranslucentBasePass_Shared_Forward_60;
+    packed_float3 TranslucentBasePass_Shared_Forward_DirectionalLightDirection;
+    float PrePadding_TranslucentBasePass_Shared_Forward_76;
+    packed_float3 TranslucentBasePass_Shared_Forward_DirectionalLightColor;
+    float TranslucentBasePass_Shared_Forward_DirectionalLightVolumetricScatteringIntensity;
+    uint TranslucentBasePass_Shared_Forward_DirectionalLightShadowMapChannelMask;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_100;
+    float2 TranslucentBasePass_Shared_Forward_DirectionalLightDistanceFadeMAD;
+    uint TranslucentBasePass_Shared_Forward_NumDirectionalLightCascades;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_116;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_120;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_124;
+    float4 TranslucentBasePass_Shared_Forward_CascadeEndDepths;
+    float4x4 TranslucentBasePass_Shared_Forward_DirectionalLightWorldToShadowMatrix[4];
+    float4 TranslucentBasePass_Shared_Forward_DirectionalLightShadowmapMinMax[4];
+    float4 TranslucentBasePass_Shared_Forward_DirectionalLightShadowmapAtlasBufferSize;
+    float TranslucentBasePass_Shared_Forward_DirectionalLightDepthBias;
+    uint TranslucentBasePass_Shared_Forward_DirectionalLightUseStaticShadowing;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_488;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_492;
+    float4 TranslucentBasePass_Shared_Forward_DirectionalLightStaticShadowBufferSize;
+    float4x4 TranslucentBasePass_Shared_Forward_DirectionalLightWorldToStaticShadow;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_576;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_580;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_584;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_588;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_592;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_596;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_600;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_604;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_608;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_612;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_616;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_620;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_624;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_628;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_632;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_636;
+    uint TranslucentBasePass_Shared_ForwardISR_NumLocalLights;
+    uint TranslucentBasePass_Shared_ForwardISR_NumReflectionCaptures;
+    uint TranslucentBasePass_Shared_ForwardISR_HasDirectionalLight;
+    uint TranslucentBasePass_Shared_ForwardISR_NumGridCells;
+    packed_int3 TranslucentBasePass_Shared_ForwardISR_CulledGridSize;
+    uint TranslucentBasePass_Shared_ForwardISR_MaxCulledLightsPerCell;
+    uint TranslucentBasePass_Shared_ForwardISR_LightGridPixelSizeShift;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_676;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_680;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_684;
+    packed_float3 TranslucentBasePass_Shared_ForwardISR_LightGridZParams;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_700;
+    packed_float3 TranslucentBasePass_Shared_ForwardISR_DirectionalLightDirection;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_716;
+    packed_float3 TranslucentBasePass_Shared_ForwardISR_DirectionalLightColor;
+    float TranslucentBasePass_Shared_ForwardISR_DirectionalLightVolumetricScatteringIntensity;
+    uint TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowMapChannelMask;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_740;
+    float2 TranslucentBasePass_Shared_ForwardISR_DirectionalLightDistanceFadeMAD;
+    uint TranslucentBasePass_Shared_ForwardISR_NumDirectionalLightCascades;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_756;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_760;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_764;
+    float4 TranslucentBasePass_Shared_ForwardISR_CascadeEndDepths;
+    float4x4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightWorldToShadowMatrix[4];
+    float4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowmapMinMax[4];
+    float4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowmapAtlasBufferSize;
+    float TranslucentBasePass_Shared_ForwardISR_DirectionalLightDepthBias;
+    uint TranslucentBasePass_Shared_ForwardISR_DirectionalLightUseStaticShadowing;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_1128;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_1132;
+    float4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightStaticShadowBufferSize;
+    float4x4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightWorldToStaticShadow;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1216;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1220;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1224;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1228;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1232;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1236;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1240;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1244;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1248;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1252;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1256;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1260;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1264;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1268;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1272;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1276;
+    float4 TranslucentBasePass_Shared_Reflection_SkyLightParameters;
+    float TranslucentBasePass_Shared_Reflection_SkyLightCubemapBrightness;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1300;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1304;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1308;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1312;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1316;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1320;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1324;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1328;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1332;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1336;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1340;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1344;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1348;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1352;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1356;
+    float4 TranslucentBasePass_Shared_PlanarReflection_ReflectionPlane;
+    float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionOrigin;
+    float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionXAxis;
+    float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionYAxis;
+    float3x4 TranslucentBasePass_Shared_PlanarReflection_InverseTransposeMirrorMatrix;
+    packed_float3 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionParameters;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1484;
+    float2 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionParameters2;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1496;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1500;
+    float4x4 TranslucentBasePass_Shared_PlanarReflection_ProjectionWithExtraFOV[2];
+    float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionScreenScaleBias[2];
+    float2 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionScreenBound;
+    uint TranslucentBasePass_Shared_PlanarReflection_bIsStereo;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1676;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1680;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1684;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1688;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1692;
+    float4 TranslucentBasePass_Shared_Fog_ExponentialFogParameters;
+    float4 TranslucentBasePass_Shared_Fog_ExponentialFogParameters2;
+    float4 TranslucentBasePass_Shared_Fog_ExponentialFogColorParameter;
+    float4 TranslucentBasePass_Shared_Fog_ExponentialFogParameters3;
+    float4 TranslucentBasePass_Shared_Fog_InscatteringLightDirection;
+    float4 TranslucentBasePass_Shared_Fog_DirectionalInscatteringColor;
+    float2 TranslucentBasePass_Shared_Fog_SinCosInscatteringColorCubemapRotation;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1800;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1804;
+    packed_float3 TranslucentBasePass_Shared_Fog_FogInscatteringTextureParameters;
+    float TranslucentBasePass_Shared_Fog_ApplyVolumetricFog;
+    float PrePadding_TranslucentBasePass_1824;
+    float PrePadding_TranslucentBasePass_1828;
+    float PrePadding_TranslucentBasePass_1832;
+    float PrePadding_TranslucentBasePass_1836;
+    float PrePadding_TranslucentBasePass_1840;
+    float PrePadding_TranslucentBasePass_1844;
+    float PrePadding_TranslucentBasePass_1848;
+    float PrePadding_TranslucentBasePass_1852;
+    float PrePadding_TranslucentBasePass_1856;
+    float PrePadding_TranslucentBasePass_1860;
+    float PrePadding_TranslucentBasePass_1864;
+    float PrePadding_TranslucentBasePass_1868;
+    float PrePadding_TranslucentBasePass_1872;
+    float PrePadding_TranslucentBasePass_1876;
+    float PrePadding_TranslucentBasePass_1880;
+    float PrePadding_TranslucentBasePass_1884;
+    float PrePadding_TranslucentBasePass_1888;
+    float PrePadding_TranslucentBasePass_1892;
+    float PrePadding_TranslucentBasePass_1896;
+    float PrePadding_TranslucentBasePass_1900;
+    float PrePadding_TranslucentBasePass_1904;
+    float PrePadding_TranslucentBasePass_1908;
+    float PrePadding_TranslucentBasePass_1912;
+    float PrePadding_TranslucentBasePass_1916;
+    float PrePadding_TranslucentBasePass_1920;
+    float PrePadding_TranslucentBasePass_1924;
+    float PrePadding_TranslucentBasePass_1928;
+    float PrePadding_TranslucentBasePass_1932;
+    float PrePadding_TranslucentBasePass_1936;
+    float PrePadding_TranslucentBasePass_1940;
+    float PrePadding_TranslucentBasePass_1944;
+    float PrePadding_TranslucentBasePass_1948;
+    float PrePadding_TranslucentBasePass_1952;
+    float PrePadding_TranslucentBasePass_1956;
+    float PrePadding_TranslucentBasePass_1960;
+    float PrePadding_TranslucentBasePass_1964;
+    float PrePadding_TranslucentBasePass_1968;
+    float PrePadding_TranslucentBasePass_1972;
+    float PrePadding_TranslucentBasePass_1976;
+    float PrePadding_TranslucentBasePass_1980;
+    float PrePadding_TranslucentBasePass_1984;
+    float PrePadding_TranslucentBasePass_1988;
+    float PrePadding_TranslucentBasePass_1992;
+    float PrePadding_TranslucentBasePass_1996;
+    float PrePadding_TranslucentBasePass_2000;
+    float PrePadding_TranslucentBasePass_2004;
+    float PrePadding_TranslucentBasePass_2008;
+    float PrePadding_TranslucentBasePass_2012;
+    float PrePadding_TranslucentBasePass_2016;
+    float PrePadding_TranslucentBasePass_2020;
+    float PrePadding_TranslucentBasePass_2024;
+    float PrePadding_TranslucentBasePass_2028;
+    float PrePadding_TranslucentBasePass_2032;
+    float PrePadding_TranslucentBasePass_2036;
+    float PrePadding_TranslucentBasePass_2040;
+    float PrePadding_TranslucentBasePass_2044;
+    float PrePadding_TranslucentBasePass_2048;
+    float PrePadding_TranslucentBasePass_2052;
+    float PrePadding_TranslucentBasePass_2056;
+    float PrePadding_TranslucentBasePass_2060;
+    float PrePadding_TranslucentBasePass_2064;
+    float PrePadding_TranslucentBasePass_2068;
+    float PrePadding_TranslucentBasePass_2072;
+    float PrePadding_TranslucentBasePass_2076;
+    float PrePadding_TranslucentBasePass_2080;
+    float PrePadding_TranslucentBasePass_2084;
+    float PrePadding_TranslucentBasePass_2088;
+    float PrePadding_TranslucentBasePass_2092;
+    float PrePadding_TranslucentBasePass_2096;
+    float PrePadding_TranslucentBasePass_2100;
+    float PrePadding_TranslucentBasePass_2104;
+    float PrePadding_TranslucentBasePass_2108;
+    float PrePadding_TranslucentBasePass_2112;
+    float PrePadding_TranslucentBasePass_2116;
+    float PrePadding_TranslucentBasePass_2120;
+    float PrePadding_TranslucentBasePass_2124;
+    float PrePadding_TranslucentBasePass_2128;
+    float PrePadding_TranslucentBasePass_2132;
+    float PrePadding_TranslucentBasePass_2136;
+    float PrePadding_TranslucentBasePass_2140;
+    float4 TranslucentBasePass_HZBUvFactorAndInvFactor;
+    float4 TranslucentBasePass_PrevScreenPositionScaleBias;
+    float TranslucentBasePass_PrevSceneColorPreExposureInv;
+};
+
+struct type_Material
+{
+    float4 Material_VectorExpressions[2];
+    float4 Material_ScalarExpressions[1];
+};
+
+constant float _108 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+    uint gl_SampleMask [[sample_mask]];
+};
+
+struct main0_in
+{
+    float4 in_var_TEXCOORD10_centroid [[user(locn0)]];
+    float4 in_var_TEXCOORD11_centroid [[user(locn1)]];
+    uint in_var_PRIMITIVE_ID [[user(locn2)]];
+    float4 in_var_TEXCOORD7 [[user(locn3)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], const device type_StructuredBuffer_v4float& View_PrimitiveSceneData [[buffer(1)]], constant type_TranslucentBasePass& TranslucentBasePass [[buffer(2)]], constant type_Material& Material [[buffer(3)]], texture3d<float> TranslucentBasePass_Shared_Fog_IntegratedLightScattering [[texture(0)]], sampler View_SharedBilinearClampedSampler [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleMaskIn [[sample_mask]])
+{
+    main0_out out = {};
+    float4 _137 = View.View_SVPositionToTranslatedWorld * float4(gl_FragCoord.xyz, 1.0);
+    float3 _142 = (_137.xyz / float3(_137.w)) - float3(View.View_PreViewTranslation);
+    bool _165 = TranslucentBasePass.TranslucentBasePass_Shared_Fog_ApplyVolumetricFog > 0.0;
+    float4 _215;
+    if (_165)
+    {
+        float4 _172 = View.View_WorldToClip * float4(_142, 1.0);
+        float _173 = _172.w;
+        float4 _202;
+        if (_165)
+        {
+            _202 = TranslucentBasePass_Shared_Fog_IntegratedLightScattering.sample(View_SharedBilinearClampedSampler, float3(fma((_172.xy / float2(_173)).xy, float2(0.5, -0.5), float2(0.5)), (log2(fma(_173, View.View_VolumetricFogGridZParams[0], View.View_VolumetricFogGridZParams[1])) * View.View_VolumetricFogGridZParams[2]) * View.View_VolumetricFogInvGridSize[2]), level(0.0));
+        }
+        else
+        {
+            _202 = float4(0.0, 0.0, 0.0, 1.0);
+        }
+        _215 = float4(fma(in.in_var_TEXCOORD7.xyz, float3(_202.w), _202.xyz), _202.w * in.in_var_TEXCOORD7.w);
+    }
+    else
+    {
+        _215 = in.in_var_TEXCOORD7;
+    }
+    float3 _216 = fast::max(Material.Material_VectorExpressions[1].xyz * float3(fma(1.0 + dot(float3(-0.2857142984867095947265625, -0.4285714328289031982421875, 0.857142865657806396484375), fast::normalize(float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz) * fast::normalize(fma(float3(0.0, 0.0, 1.0), float3(View.View_NormalOverrideParameter.w), View.View_NormalOverrideParameter.xyz)))), 0.5, 0.20000000298023223876953125)), float3(0.0));
+    float3 _246;
+    if (View.View_OutOfBoundsMask > 0.0)
+    {
+        uint _222 = in.in_var_PRIMITIVE_ID * 26u;
+        float3 _245;
+        if (any(abs(_142 - View_PrimitiveSceneData._m0[_222 + 5u].xyz) > (View_PrimitiveSceneData._m0[_222 + 19u].xyz + float3(1.0))))
+        {
+            _245 = mix(float3(1.0, 1.0, 0.0), float3(0.0, 1.0, 1.0), float3(float3(fract(dot(_142, float3(0.57700002193450927734375)) * 0.00200000009499490261077880859375)) > float3(0.5)));
+        }
+        else
+        {
+            _245 = _216;
+        }
+        _246 = _245;
+    }
+    else
+    {
+        _246 = _216;
+    }
+    float4 _255 = float4(fma(_246, float3(_215.w), _215.xyz), _108);
+    _255.w = 1.0;
+    float4 _268;
+    uint _269;
+    if (View.View_NumSceneColorMSAASamples > 1)
+    {
+        _268 = _255 * float4(float(View.View_NumSceneColorMSAASamples) * 0.25);
+        _269 = gl_SampleMaskIn & 15u;
+    }
+    else
+    {
+        _268 = _255;
+        _269 = gl_SampleMaskIn;
+    }
+    out.out_var_SV_Target0 = _268;
+    out.gl_SampleMask = _269;
+    return out;
+}
+
diff --git a/reference/opt/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag b/reference/opt/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag
new file mode 100644
index 00000000000..6a7f419a5fa
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag
@@ -0,0 +1,213 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_Globals
+{
+    float3 SoftTransitionScale;
+    float4 ShadowBufferSize;
+    float ShadowFadeFraction;
+    float ShadowSharpen;
+    float4 LightPositionAndInvRadius;
+    float4x4 ScreenToShadowMatrix;
+    float2 ProjectionDepthBiasParameters;
+    float4 ModulatedShadowColor;
+    float4 ShadowTileOffsetAndSize;
+};
+
+constant float4 _58 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], float4 _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData [[color(0)]], texture2d<float> ShadowDepthTexture [[texture(0)]], sampler ShadowDepthTextureSampler [[sampler(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    float4 _67 = _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData;
+    float _68 = _67.w;
+    float4 _82 = _Globals.ScreenToShadowMatrix * float4((fma(gl_FragCoord.xy, View.View_BufferSizeAndInvSize.zw, -View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_68), _68, 1.0);
+    float _118 = fast::clamp(fma(fast::clamp(fma(ShadowDepthTexture.sample(ShadowDepthTextureSampler, (((_82.xyz / float3(_82.w)).xy * _Globals.ShadowTileOffsetAndSize.zw).xy + _Globals.ShadowTileOffsetAndSize.xy).xy, level(0.0)).xxx, float3(_Globals.SoftTransitionScale.z), -float3(fma(fast::min(_82.z, 0.999989986419677734375), _Globals.SoftTransitionScale.z, -1.0))), float3(0.0), float3(1.0)).x - 0.5, _Globals.ShadowSharpen, 0.5), 0.0, 1.0);
+    float3 _127 = mix(_Globals.ModulatedShadowColor.xyz, float3(1.0), float3(mix(1.0, _118 * _118, _Globals.ShadowFadeFraction)));
+    float4 _128 = float4(_127.x, _127.y, _127.z, _58.w);
+    _128.w = 0.0;
+    out.out_var_SV_Target0 = _128;
+    return out;
+}
+
diff --git a/reference/opt/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag b/reference/opt/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag
new file mode 100644
index 00000000000..6a7f419a5fa
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag
@@ -0,0 +1,213 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_Globals
+{
+    float3 SoftTransitionScale;
+    float4 ShadowBufferSize;
+    float ShadowFadeFraction;
+    float ShadowSharpen;
+    float4 LightPositionAndInvRadius;
+    float4x4 ScreenToShadowMatrix;
+    float2 ProjectionDepthBiasParameters;
+    float4 ModulatedShadowColor;
+    float4 ShadowTileOffsetAndSize;
+};
+
+constant float4 _58 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], float4 _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData [[color(0)]], texture2d<float> ShadowDepthTexture [[texture(0)]], sampler ShadowDepthTextureSampler [[sampler(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    float4 _67 = _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData;
+    float _68 = _67.w;
+    float4 _82 = _Globals.ScreenToShadowMatrix * float4((fma(gl_FragCoord.xy, View.View_BufferSizeAndInvSize.zw, -View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_68), _68, 1.0);
+    float _118 = fast::clamp(fma(fast::clamp(fma(ShadowDepthTexture.sample(ShadowDepthTextureSampler, (((_82.xyz / float3(_82.w)).xy * _Globals.ShadowTileOffsetAndSize.zw).xy + _Globals.ShadowTileOffsetAndSize.xy).xy, level(0.0)).xxx, float3(_Globals.SoftTransitionScale.z), -float3(fma(fast::min(_82.z, 0.999989986419677734375), _Globals.SoftTransitionScale.z, -1.0))), float3(0.0), float3(1.0)).x - 0.5, _Globals.ShadowSharpen, 0.5), 0.0, 1.0);
+    float3 _127 = mix(_Globals.ModulatedShadowColor.xyz, float3(1.0), float3(mix(1.0, _118 * _118, _Globals.ShadowFadeFraction)));
+    float4 _128 = float4(_127.x, _127.y, _127.z, _58.w);
+    _128.w = 0.0;
+    out.out_var_SV_Target0 = _128;
+    return out;
+}
+
diff --git a/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag b/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag
new file mode 100644
index 00000000000..3c9be2985ad
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag
@@ -0,0 +1,84 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct type_StructuredBuffer_v4float
+{
+    float4 _m0[1];
+};
+
+struct type_Globals
+{
+    uint2 ShadowTileListGroupSize;
+};
+
+constant float3 _70 = {};
+
+struct spvDescriptorSetBuffer0
+{
+    const device type_StructuredBuffer_v4float* CulledObjectBoxBounds [[id(0)]];
+    constant type_Globals* _Globals [[id(1)]];
+    texture2d<uint> RWShadowTileNumCulledObjects [[id(2)]];
+    device atomic_uint* RWShadowTileNumCulledObjects_atomic [[id(3)]];
+};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    uint in_var_TEXCOORD0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    uint2 _77 = uint2(gl_FragCoord.xy);
+    uint _78 = _77.y;
+    uint _83 = _77.x;
+    float2 _91 = float2(float(_83), float(((*spvDescriptorSet0._Globals).ShadowTileListGroupSize.y - 1u) - _78));
+    float2 _93 = float2((*spvDescriptorSet0._Globals).ShadowTileListGroupSize);
+    float2 _96 = fma(_91 / _93, float2(2.0), float2(-1.0));
+    float2 _100 = fma((_91 + float2(1.0)) / _93, float2(2.0), float2(-1.0));
+    float3 _101 = float3(_100.x, _100.y, _70.z);
+    _101.z = 1.0;
+    uint _103 = in.in_var_TEXCOORD0 * 5u;
+    uint _107 = _103 + 1u;
+    if (all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xy > _96.xy) && all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz < _101))
+    {
+        float3 _120 = (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz + (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xyz;
+        float _122 = _96.x;
+        float _123 = _96.y;
+        float _126 = _100.x;
+        float _129 = _100.y;
+        float3 _166 = fma(float3(-0.5), _120, float3(_122, _123, -1000.0));
+        float3 _170 = float3(dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
+        float3 _189 = fma(float3(-0.5), _120, float3(_126, _123, -1000.0));
+        float3 _193 = float3(dot(_189, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_189, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_189, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
+        float3 _205 = fma(float3(-0.5), _120, float3(_122, _129, -1000.0));
+        float3 _209 = float3(dot(_205, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_205, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_205, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
+        float3 _221 = fma(float3(-0.5), _120, float3(_126, _129, -1000.0));
+        float3 _225 = float3(dot(_221, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_221, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_221, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
+        float3 _237 = fma(float3(-0.5), _120, float3(_122, _123, 1.0));
+        float3 _241 = float3(dot(_237, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_237, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_237, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
+        float3 _253 = fma(float3(-0.5), _120, float3(_126, _123, 1.0));
+        float3 _257 = float3(dot(_253, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_253, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_253, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
+        float3 _269 = fma(float3(-0.5), _120, float3(_122, _129, 1.0));
+        float3 _273 = float3(dot(_269, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_269, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_269, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
+        float3 _285 = fma(float3(-0.5), _120, float3(_126, _129, 1.0));
+        float3 _289 = float3(dot(_285, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_285, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_285, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
+        if (all(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(float3(500000.0), _170), _193), _209), _225), _241), _257), _273), _289) < float3(1.0)) && all(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(float3(-500000.0), _170), _193), _209), _225), _241), _257), _273), _289) > float3(-1.0)))
+        {
+            uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.RWShadowTileNumCulledObjects_atomic[(_78 * (*spvDescriptorSet0._Globals).ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
+        }
+    }
+    out.out_var_SV_Target0 = float4(0.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.frag b/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.frag
new file mode 100644
index 00000000000..617bb4d0b81
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.frag
@@ -0,0 +1,76 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct type_StructuredBuffer_v4float
+{
+    float4 _m0[1];
+};
+
+struct type_Globals
+{
+    uint2 ShadowTileListGroupSize;
+};
+
+constant float3 _70 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    uint in_var_TEXCOORD0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(0)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    uint2 _77 = uint2(gl_FragCoord.xy);
+    uint _78 = _77.y;
+    uint _83 = _77.x;
+    float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78));
+    float2 _93 = float2(_Globals.ShadowTileListGroupSize);
+    float2 _96 = fma(_91 / _93, float2(2.0), float2(-1.0));
+    float2 _100 = fma((_91 + float2(1.0)) / _93, float2(2.0), float2(-1.0));
+    float3 _101 = float3(_100.x, _100.y, _70.z);
+    _101.z = 1.0;
+    uint _103 = in.in_var_TEXCOORD0 * 5u;
+    uint _107 = _103 + 1u;
+    if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _101))
+    {
+        float3 _120 = CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz;
+        float _122 = _96.x;
+        float _123 = _96.y;
+        float _126 = _100.x;
+        float _129 = _100.y;
+        float3 _166 = fma(float3(-0.5), _120, float3(_122, _123, -1000.0));
+        float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
+        float3 _189 = fma(float3(-0.5), _120, float3(_126, _123, -1000.0));
+        float3 _193 = float3(dot(_189, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_189, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_189, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
+        float3 _205 = fma(float3(-0.5), _120, float3(_122, _129, -1000.0));
+        float3 _209 = float3(dot(_205, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_205, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_205, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
+        float3 _221 = fma(float3(-0.5), _120, float3(_126, _129, -1000.0));
+        float3 _225 = float3(dot(_221, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_221, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_221, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
+        float3 _237 = fma(float3(-0.5), _120, float3(_122, _123, 1.0));
+        float3 _241 = float3(dot(_237, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_237, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_237, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
+        float3 _253 = fma(float3(-0.5), _120, float3(_126, _123, 1.0));
+        float3 _257 = float3(dot(_253, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_253, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_253, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
+        float3 _269 = fma(float3(-0.5), _120, float3(_122, _129, 1.0));
+        float3 _273 = float3(dot(_269, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_269, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_269, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
+        float3 _285 = fma(float3(-0.5), _120, float3(_126, _129, 1.0));
+        float3 _289 = float3(dot(_285, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_285, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_285, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
+        if (all(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(float3(500000.0), _170), _193), _209), _225), _241), _257), _273), _289) < float3(1.0)) && all(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(float3(-500000.0), _170), _193), _209), _225), _241), _257), _273), _289) > float3(-1.0)))
+        {
+            uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
+        }
+    }
+    out.out_var_SV_Target0 = float4(0.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag b/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag
new file mode 100644
index 00000000000..3f68a92f0a7
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag
@@ -0,0 +1,77 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct type_StructuredBuffer_v4float
+{
+    float4 _m0[1];
+};
+
+struct type_Globals
+{
+    uint2 ShadowTileListGroupSize;
+};
+
+constant float3 _70 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    uint in_var_TEXCOORD0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvBufferSizeConstants [[buffer(25)]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(0)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    constant uint& CulledObjectBoxBoundsBufferSize = spvBufferSizeConstants[0];
+    uint2 _77 = uint2(gl_FragCoord.xy);
+    uint _78 = _77.y;
+    uint _83 = _77.x;
+    float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78));
+    float2 _93 = float2(_Globals.ShadowTileListGroupSize);
+    float2 _96 = fma(_91 / _93, float2(2.0), float2(-1.0));
+    float2 _100 = fma((_91 + float2(1.0)) / _93, float2(2.0), float2(-1.0));
+    float3 _101 = float3(_100.x, _100.y, _70.z);
+    _101.z = 1.0;
+    uint _103 = in.in_var_TEXCOORD0 * 5u;
+    uint _323 = uint(clamp(int(_103 + 1u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))));
+    if (all(CulledObjectBoxBounds._m0[_323].xy > _96.xy) && all(CulledObjectBoxBounds._m0[uint(clamp(int(_103), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz < _101))
+    {
+        float3 _120 = CulledObjectBoxBounds._m0[uint(clamp(int(_103), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz + CulledObjectBoxBounds._m0[_323].xyz;
+        float _122 = _96.x;
+        float _123 = _96.y;
+        float _126 = _100.x;
+        float _129 = _100.y;
+        float3 _166 = fma(float3(-0.5), _120, float3(_122, _123, -1000.0));
+        float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_166, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_166, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz));
+        float3 _189 = fma(float3(-0.5), _120, float3(_126, _123, -1000.0));
+        float3 _193 = float3(dot(_189, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_189, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_189, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz));
+        float3 _205 = fma(float3(-0.5), _120, float3(_122, _129, -1000.0));
+        float3 _209 = float3(dot(_205, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_205, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_205, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz));
+        float3 _221 = fma(float3(-0.5), _120, float3(_126, _129, -1000.0));
+        float3 _225 = float3(dot(_221, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_221, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_221, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz));
+        float3 _237 = fma(float3(-0.5), _120, float3(_122, _123, 1.0));
+        float3 _241 = float3(dot(_237, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_237, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_237, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz));
+        float3 _253 = fma(float3(-0.5), _120, float3(_126, _123, 1.0));
+        float3 _257 = float3(dot(_253, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_253, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_253, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz));
+        float3 _269 = fma(float3(-0.5), _120, float3(_122, _129, 1.0));
+        float3 _273 = float3(dot(_269, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_269, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_269, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz));
+        float3 _285 = fma(float3(-0.5), _120, float3(_126, _129, 1.0));
+        float3 _289 = float3(dot(_285, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 2u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_285, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 3u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz), dot(_285, CulledObjectBoxBounds._m0[uint(clamp(int(_103 + 4u), int(0u), int(min((((CulledObjectBoxBoundsBufferSize - 0) / 16) - 1u), 2147483647u))))].xyz));
+        if (all(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(fast::min(float3(500000.0), _170), _193), _209), _225), _241), _257), _273), _289) < float3(1.0)) && all(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(fast::max(float3(-500000.0), _170), _193), _209), _225), _241), _257), _273), _289) > float3(-1.0)))
+        {
+            uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
+        }
+    }
+    out.out_var_SV_Target0 = float4(0.0);
+    return out;
+}
+
diff --git a/reference/opt/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc b/reference/opt/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc
new file mode 100644
index 00000000000..1d0212593bb
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc
@@ -0,0 +1,396 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct FVertexFactoryInterpolantsVSToPS
+{
+    float4 TangentToWorld0;
+    float4 TangentToWorld2;
+    float4 Color;
+    spvUnsafeArray<float4, 1> TexCoords;
+    float4 LightMapCoordinate;
+    uint PrimitiveId;
+    uint LightmapDataIndex;
+};
+
+struct FVertexFactoryInterpolantsVSToDS
+{
+    FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS;
+};
+
+struct FSharedBasePassInterpolants
+{
+};
+struct FBasePassInterpolantsVSToDS
+{
+    FSharedBasePassInterpolants _m0;
+};
+
+struct FBasePassVSToDS
+{
+    FVertexFactoryInterpolantsVSToDS FactoryInterpolants;
+    FBasePassInterpolantsVSToDS BasePassInterpolants;
+    float4 Position;
+};
+
+struct FPNTessellationHSToDS
+{
+    FBasePassVSToDS PassSpecificData;
+    spvUnsafeArray<float4, 3> WorldPosition;
+    float3 DisplacementScale;
+    float TessellationMultiplier;
+    float WorldDisplacementMultiplier;
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_StructuredBuffer_v4float
+{
+    float4 _m0[1];
+};
+
+constant float4 _142 = {};
+
+struct main0_out
+{
+    float4 out_var_COLOR0;
+    uint out_var_LIGHTMAP_ID;
+    float3 out_var_PN_DisplacementScales;
+    spvUnsafeArray<float4, 3> out_var_PN_POSITION;
+    float out_var_PN_TessellationMultiplier;
+    float out_var_PN_WorldDisplacementMultiplier;
+    uint out_var_PRIMITIVE_ID;
+    spvUnsafeArray<float4, 1> out_var_TEXCOORD0;
+    float4 out_var_TEXCOORD10_centroid;
+    float4 out_var_TEXCOORD11_centroid;
+    float4 out_var_TEXCOORD4;
+    float4 out_var_VS_To_DS_Position;
+};
+
+struct main0_patchOut
+{
+    float4 out_var_PN_POSITION9;
+};
+
+struct main0_in
+{
+    float4 in_var_TEXCOORD10_centroid [[attribute(0)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(1)]];
+    float4 in_var_COLOR0 [[attribute(2)]];
+    float4 in_var_TEXCOORD0_0 [[attribute(3)]];
+    float4 in_var_TEXCOORD4 [[attribute(4)]];
+    uint in_var_PRIMITIVE_ID [[attribute(5)]];
+    uint in_var_LIGHTMAP_ID [[attribute(6)]];
+    float4 in_var_VS_To_DS_Position [[attribute(7)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], const device type_StructuredBuffer_v4float& View_PrimitiveSceneData [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 3)
+        return;
+    spvUnsafeArray<float4, 12> _144 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid, gl_in[3].in_var_TEXCOORD10_centroid, gl_in[4].in_var_TEXCOORD10_centroid, gl_in[5].in_var_TEXCOORD10_centroid, gl_in[6].in_var_TEXCOORD10_centroid, gl_in[7].in_var_TEXCOORD10_centroid, gl_in[8].in_var_TEXCOORD10_centroid, gl_in[9].in_var_TEXCOORD10_centroid, gl_in[10].in_var_TEXCOORD10_centroid, gl_in[11].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 12> _145 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid, gl_in[3].in_var_TEXCOORD11_centroid, gl_in[4].in_var_TEXCOORD11_centroid, gl_in[5].in_var_TEXCOORD11_centroid, gl_in[6].in_var_TEXCOORD11_centroid, gl_in[7].in_var_TEXCOORD11_centroid, gl_in[8].in_var_TEXCOORD11_centroid, gl_in[9].in_var_TEXCOORD11_centroid, gl_in[10].in_var_TEXCOORD11_centroid, gl_in[11].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<float4, 12> _146 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_COLOR0, gl_in[1].in_var_COLOR0, gl_in[2].in_var_COLOR0, gl_in[3].in_var_COLOR0, gl_in[4].in_var_COLOR0, gl_in[5].in_var_COLOR0, gl_in[6].in_var_COLOR0, gl_in[7].in_var_COLOR0, gl_in[8].in_var_COLOR0, gl_in[9].in_var_COLOR0, gl_in[10].in_var_COLOR0, gl_in[11].in_var_COLOR0 });
+    spvUnsafeArray<spvUnsafeArray<float4, 1>, 12> _147 = spvUnsafeArray<spvUnsafeArray<float4, 1>, 12>({ spvUnsafeArray<float4, 1>({ gl_in[0].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[1].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[2].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[3].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[4].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[5].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[6].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[7].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[8].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[9].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[10].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[11].in_var_TEXCOORD0_0 }) });
+    spvUnsafeArray<float4, 12> _148 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD4, gl_in[1].in_var_TEXCOORD4, gl_in[2].in_var_TEXCOORD4, gl_in[3].in_var_TEXCOORD4, gl_in[4].in_var_TEXCOORD4, gl_in[5].in_var_TEXCOORD4, gl_in[6].in_var_TEXCOORD4, gl_in[7].in_var_TEXCOORD4, gl_in[8].in_var_TEXCOORD4, gl_in[9].in_var_TEXCOORD4, gl_in[10].in_var_TEXCOORD4, gl_in[11].in_var_TEXCOORD4 });
+    spvUnsafeArray<uint, 12> _149 = spvUnsafeArray<uint, 12>({ gl_in[0].in_var_PRIMITIVE_ID, gl_in[1].in_var_PRIMITIVE_ID, gl_in[2].in_var_PRIMITIVE_ID, gl_in[3].in_var_PRIMITIVE_ID, gl_in[4].in_var_PRIMITIVE_ID, gl_in[5].in_var_PRIMITIVE_ID, gl_in[6].in_var_PRIMITIVE_ID, gl_in[7].in_var_PRIMITIVE_ID, gl_in[8].in_var_PRIMITIVE_ID, gl_in[9].in_var_PRIMITIVE_ID, gl_in[10].in_var_PRIMITIVE_ID, gl_in[11].in_var_PRIMITIVE_ID });
+    spvUnsafeArray<uint, 12> _150 = spvUnsafeArray<uint, 12>({ gl_in[0].in_var_LIGHTMAP_ID, gl_in[1].in_var_LIGHTMAP_ID, gl_in[2].in_var_LIGHTMAP_ID, gl_in[3].in_var_LIGHTMAP_ID, gl_in[4].in_var_LIGHTMAP_ID, gl_in[5].in_var_LIGHTMAP_ID, gl_in[6].in_var_LIGHTMAP_ID, gl_in[7].in_var_LIGHTMAP_ID, gl_in[8].in_var_LIGHTMAP_ID, gl_in[9].in_var_LIGHTMAP_ID, gl_in[10].in_var_LIGHTMAP_ID, gl_in[11].in_var_LIGHTMAP_ID });
+    spvUnsafeArray<float4, 12> _259 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position, gl_in[3].in_var_VS_To_DS_Position, gl_in[4].in_var_VS_To_DS_Position, gl_in[5].in_var_VS_To_DS_Position, gl_in[6].in_var_VS_To_DS_Position, gl_in[7].in_var_VS_To_DS_Position, gl_in[8].in_var_VS_To_DS_Position, gl_in[9].in_var_VS_To_DS_Position, gl_in[10].in_var_VS_To_DS_Position, gl_in[11].in_var_VS_To_DS_Position });
+    spvUnsafeArray<FBasePassVSToDS, 12> _284 = spvUnsafeArray<FBasePassVSToDS, 12>({ FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[0], _145[0], _146[0], _147[0], _148[0], _149[0], _150[0] } }, FBasePassInterpolantsVSToDS{ { } }, _259[0] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[1], _145[1], _146[1], _147[1], _148[1], _149[1], _150[1] } }, FBasePassInterpolantsVSToDS{ { } }, _259[1] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[2], _145[2], _146[2], _147[2], _148[2], _149[2], _150[2] } }, FBasePassInterpolantsVSToDS{ { } }, _259[2] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[3], _145[3], _146[3], _147[3], _148[3], _149[3], _150[3] } }, FBasePassInterpolantsVSToDS{ { } }, _259[3] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[4], _145[4], _146[4], _147[4], _148[4], _149[4], _150[4] } }, FBasePassInterpolantsVSToDS{ { } }, _259[4] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[5], _145[5], _146[5], _147[5], _148[5], _149[5], _150[5] } }, FBasePassInterpolantsVSToDS{ { } }, _259[5] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[6], _145[6], _146[6], _147[6], _148[6], _149[6], _150[6] } }, FBasePassInterpolantsVSToDS{ { } }, _259[6] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[7], _145[7], _146[7], _147[7], _148[7], _149[7], _150[7] } }, FBasePassInterpolantsVSToDS{ { } }, _259[7] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[8], _145[8], _146[8], _147[8], _148[8], _149[8], _150[8] } }, FBasePassInterpolantsVSToDS{ { } }, _259[8] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[9], _145[9], _146[9], _147[9], _148[9], _149[9], _150[9] } }, FBasePassInterpolantsVSToDS{ { } }, _259[9] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[10], _145[10], _146[10], _147[10], _148[10], _149[10], _150[10] } }, FBasePassInterpolantsVSToDS{ { } }, _259[10] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[11], _145[11], _146[11], _147[11], _148[11], _149[11], _150[11] } }, FBasePassInterpolantsVSToDS{ { } }, _259[11] } });
+    spvUnsafeArray<FBasePassVSToDS, 12> param_var_I;
+    param_var_I = _284;
+    float4 _301 = float4(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float3 _310 = View_PrimitiveSceneData._m0[(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.PrimitiveId * 26u) + 22u].xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz);
+    uint _313 = (gl_InvocationID < 2u) ? (gl_InvocationID + 1u) : 0u;
+    uint _314 = 2u * gl_InvocationID;
+    uint _315 = 3u + _314;
+    uint _316 = _314 + 4u;
+    float4 _328 = float4(param_var_I[_313].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _336 = float4(param_var_I[_315].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _344 = float4(param_var_I[_316].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    spvUnsafeArray<float4, 3> _392 = spvUnsafeArray<float4, 3>({ param_var_I[gl_InvocationID].Position, (((((float4(2.0) * param_var_I[gl_InvocationID].Position) + param_var_I[_313].Position) - (float4(dot(param_var_I[_313].Position - param_var_I[gl_InvocationID].Position, _301)) * _301)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_315].Position) + param_var_I[_316].Position) - (float4(dot(param_var_I[_316].Position - param_var_I[_315].Position, _336)) * _336)) * float4(0.3333333432674407958984375))) * float4(0.5), (((((float4(2.0) * param_var_I[_313].Position) + param_var_I[gl_InvocationID].Position) - (float4(dot(param_var_I[gl_InvocationID].Position - param_var_I[_313].Position, _328)) * _328)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_316].Position) + param_var_I[_315].Position) - (float4(dot(param_var_I[_315].Position - param_var_I[_316].Position, _344)) * _344)) * float4(0.3333333432674407958984375))) * float4(0.5) });
+    gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+    gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+    gl_out[gl_InvocationID].out_var_COLOR0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.Color;
+    gl_out[gl_InvocationID].out_var_TEXCOORD0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TexCoords;
+    gl_out[gl_InvocationID].out_var_TEXCOORD4 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.LightMapCoordinate;
+    gl_out[gl_InvocationID].out_var_PRIMITIVE_ID = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.PrimitiveId;
+    gl_out[gl_InvocationID].out_var_LIGHTMAP_ID = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.LightmapDataIndex;
+    gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position;
+    gl_out[gl_InvocationID].out_var_PN_POSITION = _392;
+    gl_out[gl_InvocationID].out_var_PN_DisplacementScales = _310;
+    gl_out[gl_InvocationID].out_var_PN_TessellationMultiplier = 1.0;
+    gl_out[gl_InvocationID].out_var_PN_WorldDisplacementMultiplier = 1.0;
+    temp_var_hullMainRetVal[gl_InvocationID] = FPNTessellationHSToDS{ param_var_I[gl_InvocationID], _392, _310, 1.0, 1.0 };
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
+    if (gl_InvocationID == 0u)
+    {
+        float4 _450 = (((((temp_var_hullMainRetVal[0u].WorldPosition[1] + temp_var_hullMainRetVal[0u].WorldPosition[2]) + temp_var_hullMainRetVal[1u].WorldPosition[1]) + temp_var_hullMainRetVal[1u].WorldPosition[2]) + temp_var_hullMainRetVal[2u].WorldPosition[1]) + temp_var_hullMainRetVal[2u].WorldPosition[2]) * float4(0.16666667163372039794921875);
+        float4 _463;
+        _463.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        _463.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier);
+        _463.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier);
+        _463.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        float4 _589;
+        for (;;)
+        {
+            float4 _489 = View.View_ViewToClip * float4(0.0);
+            float4 _494 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[0u].WorldPosition[0].xyz, 1.0);
+            float3 _495 = _494.xyz;
+            float3 _496 = _489.xyz;
+            float _498 = _494.w;
+            float _499 = _489.w;
+            float4 _516 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[1u].WorldPosition[0].xyz, 1.0);
+            float3 _517 = _516.xyz;
+            float _519 = _516.w;
+            float4 _537 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0);
+            float3 _538 = _537.xyz;
+            float _540 = _537.w;
+            if (any((((int3((_495 - _496) < float3(_498 + _499)) + (int3(2) * int3((_495 + _496) > float3((-_498) - _499)))) | (int3((_517 - _496) < float3(_519 + _499)) + (int3(2) * int3((_517 + _496) > float3((-_519) - _499))))) | (int3((_538 - _496) < float3(_540 + _499)) + (int3(2) * int3((_538 + _496) > float3((-_540) - _499))))) != int3(3)))
+            {
+                _589 = float4(0.0);
+                break;
+            }
+            float3 _558 = temp_var_hullMainRetVal[0u].WorldPosition[0].xyz - temp_var_hullMainRetVal[1u].WorldPosition[0].xyz;
+            float3 _559 = temp_var_hullMainRetVal[1u].WorldPosition[0].xyz - temp_var_hullMainRetVal[2u].WorldPosition[0].xyz;
+            float3 _560 = temp_var_hullMainRetVal[2u].WorldPosition[0].xyz - temp_var_hullMainRetVal[0u].WorldPosition[0].xyz;
+            float3 _563 = (float3(0.5) * (temp_var_hullMainRetVal[0u].WorldPosition[0].xyz + temp_var_hullMainRetVal[1u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float3 _566 = (float3(0.5) * (temp_var_hullMainRetVal[1u].WorldPosition[0].xyz + temp_var_hullMainRetVal[2u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float3 _569 = (float3(0.5) * (temp_var_hullMainRetVal[2u].WorldPosition[0].xyz + temp_var_hullMainRetVal[0u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float _573 = sqrt(dot(_559, _559) / dot(_566, _566));
+            float _577 = sqrt(dot(_560, _560) / dot(_569, _569));
+            float _581 = sqrt(dot(_558, _558) / dot(_563, _563));
+            float4 _582 = float4(_573, _577, _581, 1.0);
+            _582.w = 0.333000004291534423828125 * ((_573 + _577) + _581);
+            _589 = float4(View.View_AdaptiveTessellationFactor) * _582;
+            break;
+        }
+        float4 _591 = fast::clamp(_463 * _589, float4(1.0), float4(15.0));
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_591.x);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_591.y);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_591.z);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_591.w);
+        patchOut.out_var_PN_POSITION9 = _450 + ((_450 - (((temp_var_hullMainRetVal[2u].WorldPosition[0] + temp_var_hullMainRetVal[1u].WorldPosition[0]) + temp_var_hullMainRetVal[0u].WorldPosition[0]) * float4(0.3333333432674407958984375))) * float4(0.5));
+    }
+}
+
diff --git a/reference/opt/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc b/reference/opt/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc
new file mode 100644
index 00000000000..f72e5d3b753
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc
@@ -0,0 +1,464 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct FVertexFactoryInterpolantsVSToPS
+{
+    float4 TangentToWorld0;
+    float4 TangentToWorld2;
+};
+
+struct FVertexFactoryInterpolantsVSToDS
+{
+    FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS;
+};
+
+struct FHitProxyVSToDS
+{
+    FVertexFactoryInterpolantsVSToDS FactoryInterpolants;
+    float4 Position;
+    uint VertexID;
+};
+
+struct FHullShaderConstantDominantVertexData
+{
+    float2 UV;
+    float4 Normal;
+    float3 Tangent;
+};
+
+struct FHullShaderConstantDominantEdgeData
+{
+    float2 UV0;
+    float2 UV1;
+    float4 Normal0;
+    float4 Normal1;
+    float3 Tangent0;
+    float3 Tangent1;
+};
+
+struct FPNTessellationHSToDS
+{
+    FHitProxyVSToDS PassSpecificData;
+    spvUnsafeArray<float4, 3> WorldPosition;
+    float3 DisplacementScale;
+    float TessellationMultiplier;
+    float WorldDisplacementMultiplier;
+    FHullShaderConstantDominantVertexData DominantVertex;
+    FHullShaderConstantDominantEdgeData DominantEdge;
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_ClipToWorld;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_908;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_924;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_940;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_956;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_972;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_1020;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_1036;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_1052;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1068;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1724;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1740;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1756;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2076;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2148;
+    float PrePadding_View_2152;
+    float PrePadding_View_2156;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2228;
+    float PrePadding_View_2232;
+    float PrePadding_View_2236;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2268;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2412;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    float View_AtmosphericFogSunDiscHalfApexAngleRadian;
+    float PrePadding_View_2492;
+    float4 View_AtmosphericFogSunDiscLuminance;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    uint PrePadding_View_2520;
+    uint PrePadding_View_2524;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2584;
+    float PrePadding_View_2588;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2744;
+    float PrePadding_View_2748;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float PrePadding_View_2908;
+    int2 View_CursorPosition;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    float PrePadding_View_2924;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2940;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2956;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2972;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2988;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_3004;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_Primitive
+{
+    float4x4 Primitive_LocalToWorld;
+    float4 Primitive_InvNonUniformScaleAndDeterminantSign;
+    float4 Primitive_ObjectWorldPositionAndRadius;
+    float4x4 Primitive_WorldToLocal;
+    float4x4 Primitive_PreviousLocalToWorld;
+    float4x4 Primitive_PreviousWorldToLocal;
+    packed_float3 Primitive_ActorWorldPosition;
+    float Primitive_UseSingleSampleShadowFromStationaryLights;
+    packed_float3 Primitive_ObjectBounds;
+    float Primitive_LpvBiasMultiplier;
+    float Primitive_DecalReceiverMask;
+    float Primitive_PerObjectGBufferData;
+    float Primitive_UseVolumetricLightmapShadowFromStationaryLights;
+    float Primitive_DrawsVelocity;
+    float4 Primitive_ObjectOrientation;
+    float4 Primitive_NonUniformScale;
+    packed_float3 Primitive_LocalObjectBoundsMin;
+    uint Primitive_LightingChannelMask;
+    packed_float3 Primitive_LocalObjectBoundsMax;
+    uint Primitive_LightmapDataIndex;
+    packed_float3 Primitive_PreSkinnedLocalBounds;
+    int Primitive_SingleCaptureIndex;
+    uint Primitive_OutputVelocity;
+    uint PrePadding_Primitive_420;
+    uint PrePadding_Primitive_424;
+    uint PrePadding_Primitive_428;
+    float4 Primitive_CustomPrimitiveData[4];
+};
+
+constant float4 _140 = {};
+
+struct main0_out
+{
+    float3 out_var_PN_DisplacementScales;
+    float2 out_var_PN_DominantEdge;
+    float2 out_var_PN_DominantEdge1;
+    float4 out_var_PN_DominantEdge2;
+    float4 out_var_PN_DominantEdge3;
+    float3 out_var_PN_DominantEdge4;
+    float3 out_var_PN_DominantEdge5;
+    float2 out_var_PN_DominantVertex;
+    float4 out_var_PN_DominantVertex1;
+    float3 out_var_PN_DominantVertex2;
+    spvUnsafeArray<float4, 3> out_var_PN_POSITION;
+    float out_var_PN_TessellationMultiplier;
+    float out_var_PN_WorldDisplacementMultiplier;
+    float4 out_var_TEXCOORD10_centroid;
+    float4 out_var_TEXCOORD11_centroid;
+    float4 out_var_VS_To_DS_Position;
+    uint out_var_VS_To_DS_VertexID;
+};
+
+struct main0_patchOut
+{
+    float4 out_var_PN_POSITION9;
+};
+
+struct main0_in
+{
+    float4 in_var_TEXCOORD10_centroid [[attribute(0)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(1)]];
+    float4 in_var_VS_To_DS_Position [[attribute(2)]];
+    uint in_var_VS_To_DS_VertexID [[attribute(3)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 3)
+        return;
+    spvUnsafeArray<float4, 12> _142 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid, gl_in[3].in_var_TEXCOORD10_centroid, gl_in[4].in_var_TEXCOORD10_centroid, gl_in[5].in_var_TEXCOORD10_centroid, gl_in[6].in_var_TEXCOORD10_centroid, gl_in[7].in_var_TEXCOORD10_centroid, gl_in[8].in_var_TEXCOORD10_centroid, gl_in[9].in_var_TEXCOORD10_centroid, gl_in[10].in_var_TEXCOORD10_centroid, gl_in[11].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 12> _143 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid, gl_in[3].in_var_TEXCOORD11_centroid, gl_in[4].in_var_TEXCOORD11_centroid, gl_in[5].in_var_TEXCOORD11_centroid, gl_in[6].in_var_TEXCOORD11_centroid, gl_in[7].in_var_TEXCOORD11_centroid, gl_in[8].in_var_TEXCOORD11_centroid, gl_in[9].in_var_TEXCOORD11_centroid, gl_in[10].in_var_TEXCOORD11_centroid, gl_in[11].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<float4, 12> _192 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position, gl_in[3].in_var_VS_To_DS_Position, gl_in[4].in_var_VS_To_DS_Position, gl_in[5].in_var_VS_To_DS_Position, gl_in[6].in_var_VS_To_DS_Position, gl_in[7].in_var_VS_To_DS_Position, gl_in[8].in_var_VS_To_DS_Position, gl_in[9].in_var_VS_To_DS_Position, gl_in[10].in_var_VS_To_DS_Position, gl_in[11].in_var_VS_To_DS_Position });
+    spvUnsafeArray<uint, 12> _193 = spvUnsafeArray<uint, 12>({ gl_in[0].in_var_VS_To_DS_VertexID, gl_in[1].in_var_VS_To_DS_VertexID, gl_in[2].in_var_VS_To_DS_VertexID, gl_in[3].in_var_VS_To_DS_VertexID, gl_in[4].in_var_VS_To_DS_VertexID, gl_in[5].in_var_VS_To_DS_VertexID, gl_in[6].in_var_VS_To_DS_VertexID, gl_in[7].in_var_VS_To_DS_VertexID, gl_in[8].in_var_VS_To_DS_VertexID, gl_in[9].in_var_VS_To_DS_VertexID, gl_in[10].in_var_VS_To_DS_VertexID, gl_in[11].in_var_VS_To_DS_VertexID });
+    spvUnsafeArray<FHitProxyVSToDS, 12> _230 = spvUnsafeArray<FHitProxyVSToDS, 12>({ FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[0], _143[0] } }, _192[0], _193[0] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[1], _143[1] } }, _192[1], _193[1] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[2], _143[2] } }, _192[2], _193[2] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[3], _143[3] } }, _192[3], _193[3] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[4], _143[4] } }, _192[4], _193[4] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[5], _143[5] } }, _192[5], _193[5] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[6], _143[6] } }, _192[6], _193[6] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[7], _143[7] } }, _192[7], _193[7] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[8], _143[8] } }, _192[8], _193[8] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[9], _143[9] } }, _192[9], _193[9] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[10], _143[10] } }, _192[10], _193[10] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[11], _143[11] } }, _192[11], _193[11] } });
+    spvUnsafeArray<FHitProxyVSToDS, 12> param_var_I;
+    param_var_I = _230;
+    float4 _247 = float4(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float3 _251 = Primitive.Primitive_NonUniformScale.xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz);
+    uint _254 = (gl_InvocationID < 2u) ? (gl_InvocationID + 1u) : 0u;
+    uint _255 = 2u * gl_InvocationID;
+    uint _256 = 3u + _255;
+    uint _257 = _255 + 4u;
+    uint _264 = (_254 < 2u) ? (_254 + 1u) : 0u;
+    uint _265 = 2u * _254;
+    uint _266 = 3u + _265;
+    uint _267 = _265 + 4u;
+    float4 _279 = float4(param_var_I[9u + gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _315;
+    float4 _316;
+    float4 _317;
+    float4 _318;
+    if ((param_var_I[_266].VertexID < param_var_I[_254].VertexID) || ((param_var_I[_266].VertexID == param_var_I[_254].VertexID) && (param_var_I[_267].VertexID < param_var_I[_264].VertexID)))
+    {
+        _315 = param_var_I[_267].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+        _316 = param_var_I[_267].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+        _317 = param_var_I[_266].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+        _318 = param_var_I[_266].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+    }
+    else
+    {
+        _315 = param_var_I[_264].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+        _316 = param_var_I[_264].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+        _317 = param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+        _318 = param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+    }
+    float4 _324 = float4(_318.xyz, 0.0);
+    float4 _328 = float4(_316.xyz, 0.0);
+    float4 _336 = float4(param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _344 = float4(param_var_I[_256].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _352 = float4(param_var_I[_257].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    spvUnsafeArray<float4, 3> _402 = spvUnsafeArray<float4, 3>({ param_var_I[gl_InvocationID].Position, (((((float4(2.0) * param_var_I[gl_InvocationID].Position) + param_var_I[_254].Position) - (float4(dot(param_var_I[_254].Position - param_var_I[gl_InvocationID].Position, _247)) * _247)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_256].Position) + param_var_I[_257].Position) - (float4(dot(param_var_I[_257].Position - param_var_I[_256].Position, _344)) * _344)) * float4(0.3333333432674407958984375))) * float4(0.5), (((((float4(2.0) * param_var_I[_254].Position) + param_var_I[gl_InvocationID].Position) - (float4(dot(param_var_I[gl_InvocationID].Position - param_var_I[_254].Position, _336)) * _336)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_257].Position) + param_var_I[_256].Position) - (float4(dot(param_var_I[_256].Position - param_var_I[_257].Position, _352)) * _352)) * float4(0.3333333432674407958984375))) * float4(0.5) });
+    gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+    gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+    gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position;
+    gl_out[gl_InvocationID].out_var_VS_To_DS_VertexID = param_var_I[gl_InvocationID].VertexID;
+    gl_out[gl_InvocationID].out_var_PN_POSITION = _402;
+    gl_out[gl_InvocationID].out_var_PN_DisplacementScales = _251;
+    gl_out[gl_InvocationID].out_var_PN_TessellationMultiplier = 1.0;
+    gl_out[gl_InvocationID].out_var_PN_WorldDisplacementMultiplier = 1.0;
+    gl_out[gl_InvocationID].out_var_PN_DominantVertex = float2(0.0);
+    gl_out[gl_InvocationID].out_var_PN_DominantVertex1 = _279;
+    gl_out[gl_InvocationID].out_var_PN_DominantVertex2 = param_var_I[9u + gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz;
+    gl_out[gl_InvocationID].out_var_PN_DominantEdge = float2(0.0);
+    gl_out[gl_InvocationID].out_var_PN_DominantEdge1 = float2(0.0);
+    gl_out[gl_InvocationID].out_var_PN_DominantEdge2 = _324;
+    gl_out[gl_InvocationID].out_var_PN_DominantEdge3 = _328;
+    gl_out[gl_InvocationID].out_var_PN_DominantEdge4 = _317.xyz;
+    gl_out[gl_InvocationID].out_var_PN_DominantEdge5 = _315.xyz;
+    temp_var_hullMainRetVal[gl_InvocationID] = FPNTessellationHSToDS{ param_var_I[gl_InvocationID], _402, _251, 1.0, 1.0, FHullShaderConstantDominantVertexData{ float2(0.0), _279, param_var_I[9u + gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz }, FHullShaderConstantDominantEdgeData{ float2(0.0), float2(0.0), _324, _328, _317.xyz, _315.xyz } };
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
+    if (gl_InvocationID == 0u)
+    {
+        float4 _461 = (((((temp_var_hullMainRetVal[0u].WorldPosition[1] + temp_var_hullMainRetVal[0u].WorldPosition[2]) + temp_var_hullMainRetVal[1u].WorldPosition[1]) + temp_var_hullMainRetVal[1u].WorldPosition[2]) + temp_var_hullMainRetVal[2u].WorldPosition[1]) + temp_var_hullMainRetVal[2u].WorldPosition[2]) * float4(0.16666667163372039794921875);
+        float4 _474;
+        _474.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        _474.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier);
+        _474.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier);
+        _474.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        float4 _600;
+        for (;;)
+        {
+            float4 _500 = View.View_ViewToClip * float4(0.0);
+            float4 _505 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[0u].WorldPosition[0].xyz, 1.0);
+            float3 _506 = _505.xyz;
+            float3 _507 = _500.xyz;
+            float _509 = _505.w;
+            float _510 = _500.w;
+            float4 _527 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[1u].WorldPosition[0].xyz, 1.0);
+            float3 _528 = _527.xyz;
+            float _530 = _527.w;
+            float4 _548 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0);
+            float3 _549 = _548.xyz;
+            float _551 = _548.w;
+            if (any((((int3((_506 - _507) < float3(_509 + _510)) + (int3(2) * int3((_506 + _507) > float3((-_509) - _510)))) | (int3((_528 - _507) < float3(_530 + _510)) + (int3(2) * int3((_528 + _507) > float3((-_530) - _510))))) | (int3((_549 - _507) < float3(_551 + _510)) + (int3(2) * int3((_549 + _507) > float3((-_551) - _510))))) != int3(3)))
+            {
+                _600 = float4(0.0);
+                break;
+            }
+            float3 _569 = temp_var_hullMainRetVal[0u].WorldPosition[0].xyz - temp_var_hullMainRetVal[1u].WorldPosition[0].xyz;
+            float3 _570 = temp_var_hullMainRetVal[1u].WorldPosition[0].xyz - temp_var_hullMainRetVal[2u].WorldPosition[0].xyz;
+            float3 _571 = temp_var_hullMainRetVal[2u].WorldPosition[0].xyz - temp_var_hullMainRetVal[0u].WorldPosition[0].xyz;
+            float3 _574 = (float3(0.5) * (temp_var_hullMainRetVal[0u].WorldPosition[0].xyz + temp_var_hullMainRetVal[1u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float3 _577 = (float3(0.5) * (temp_var_hullMainRetVal[1u].WorldPosition[0].xyz + temp_var_hullMainRetVal[2u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float3 _580 = (float3(0.5) * (temp_var_hullMainRetVal[2u].WorldPosition[0].xyz + temp_var_hullMainRetVal[0u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float _584 = sqrt(dot(_570, _570) / dot(_577, _577));
+            float _588 = sqrt(dot(_571, _571) / dot(_580, _580));
+            float _592 = sqrt(dot(_569, _569) / dot(_574, _574));
+            float4 _593 = float4(_584, _588, _592, 1.0);
+            _593.w = 0.333000004291534423828125 * ((_584 + _588) + _592);
+            _600 = float4(View.View_AdaptiveTessellationFactor) * _593;
+            break;
+        }
+        float4 _602 = fast::clamp(_474 * _600, float4(1.0), float4(15.0));
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_602.x);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_602.y);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_602.z);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_602.w);
+        patchOut.out_var_PN_POSITION9 = _461 + ((_461 - (((temp_var_hullMainRetVal[2u].WorldPosition[0] + temp_var_hullMainRetVal[1u].WorldPosition[0]) + temp_var_hullMainRetVal[0u].WorldPosition[0]) * float4(0.3333333432674407958984375))) * float4(0.5));
+    }
+}
+
diff --git a/reference/opt/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc b/reference/opt/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc
new file mode 100644
index 00000000000..5d4e320bd04
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc
@@ -0,0 +1,408 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct FVertexFactoryInterpolantsVSToPS
+{
+    float4 TangentToWorld0;
+    float4 TangentToWorld2;
+    float4 Color;
+    spvUnsafeArray<float2, 2> TexCoords;
+};
+
+struct FVertexFactoryInterpolantsVSToDS
+{
+    FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS;
+};
+
+struct FHitProxyVSToDS
+{
+    FVertexFactoryInterpolantsVSToDS FactoryInterpolants;
+    float4 Position;
+};
+
+struct FPNTessellationHSToDS
+{
+    FHitProxyVSToDS PassSpecificData;
+    spvUnsafeArray<float4, 3> WorldPosition;
+    float3 DisplacementScale;
+    float TessellationMultiplier;
+    float WorldDisplacementMultiplier;
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_ClipToWorld;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_908;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_924;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_940;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_956;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_972;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_1020;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_1036;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_1052;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1068;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1724;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1740;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1756;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2076;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2148;
+    float PrePadding_View_2152;
+    float PrePadding_View_2156;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2228;
+    float PrePadding_View_2232;
+    float PrePadding_View_2236;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2268;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2412;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    float View_AtmosphericFogSunDiscHalfApexAngleRadian;
+    float PrePadding_View_2492;
+    float4 View_AtmosphericFogSunDiscLuminance;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    uint PrePadding_View_2520;
+    uint PrePadding_View_2524;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2584;
+    float PrePadding_View_2588;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2744;
+    float PrePadding_View_2748;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float PrePadding_View_2908;
+    int2 View_CursorPosition;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    float PrePadding_View_2924;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2940;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2956;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2972;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2988;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_3004;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_Primitive
+{
+    float4x4 Primitive_LocalToWorld;
+    float4 Primitive_InvNonUniformScaleAndDeterminantSign;
+    float4 Primitive_ObjectWorldPositionAndRadius;
+    float4x4 Primitive_WorldToLocal;
+    float4x4 Primitive_PreviousLocalToWorld;
+    float4x4 Primitive_PreviousWorldToLocal;
+    packed_float3 Primitive_ActorWorldPosition;
+    float Primitive_UseSingleSampleShadowFromStationaryLights;
+    packed_float3 Primitive_ObjectBounds;
+    float Primitive_LpvBiasMultiplier;
+    float Primitive_DecalReceiverMask;
+    float Primitive_PerObjectGBufferData;
+    float Primitive_UseVolumetricLightmapShadowFromStationaryLights;
+    float Primitive_DrawsVelocity;
+    float4 Primitive_ObjectOrientation;
+    float4 Primitive_NonUniformScale;
+    packed_float3 Primitive_LocalObjectBoundsMin;
+    uint Primitive_LightingChannelMask;
+    packed_float3 Primitive_LocalObjectBoundsMax;
+    uint Primitive_LightmapDataIndex;
+    packed_float3 Primitive_PreSkinnedLocalBounds;
+    int Primitive_SingleCaptureIndex;
+    uint Primitive_OutputVelocity;
+    uint PrePadding_Primitive_420;
+    uint PrePadding_Primitive_424;
+    uint PrePadding_Primitive_428;
+    float4 Primitive_CustomPrimitiveData[4];
+};
+
+constant float4 _127 = {};
+
+struct main0_out
+{
+    float4 out_var_COLOR0;
+    float3 out_var_PN_DisplacementScales;
+    spvUnsafeArray<float4, 3> out_var_PN_POSITION;
+    float out_var_PN_TessellationMultiplier;
+    float out_var_PN_WorldDisplacementMultiplier;
+    spvUnsafeArray<float2, 2> out_var_TEXCOORD0;
+    float4 out_var_TEXCOORD10_centroid;
+    float4 out_var_TEXCOORD11_centroid;
+    float4 out_var_VS_To_DS_Position;
+};
+
+struct main0_patchOut
+{
+    float4 out_var_PN_POSITION9;
+};
+
+struct main0_in
+{
+    float4 in_var_TEXCOORD10_centroid [[attribute(0)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(1)]];
+    float4 in_var_COLOR0 [[attribute(2)]];
+    float2 in_var_TEXCOORD0_0 [[attribute(3)]];
+    float2 in_var_TEXCOORD0_1 [[attribute(4)]];
+    float4 in_var_VS_To_DS_Position [[attribute(5)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 3)
+        return;
+    spvUnsafeArray<float4, 12> _129 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid, gl_in[3].in_var_TEXCOORD10_centroid, gl_in[4].in_var_TEXCOORD10_centroid, gl_in[5].in_var_TEXCOORD10_centroid, gl_in[6].in_var_TEXCOORD10_centroid, gl_in[7].in_var_TEXCOORD10_centroid, gl_in[8].in_var_TEXCOORD10_centroid, gl_in[9].in_var_TEXCOORD10_centroid, gl_in[10].in_var_TEXCOORD10_centroid, gl_in[11].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 12> _130 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid, gl_in[3].in_var_TEXCOORD11_centroid, gl_in[4].in_var_TEXCOORD11_centroid, gl_in[5].in_var_TEXCOORD11_centroid, gl_in[6].in_var_TEXCOORD11_centroid, gl_in[7].in_var_TEXCOORD11_centroid, gl_in[8].in_var_TEXCOORD11_centroid, gl_in[9].in_var_TEXCOORD11_centroid, gl_in[10].in_var_TEXCOORD11_centroid, gl_in[11].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<float4, 12> _131 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_COLOR0, gl_in[1].in_var_COLOR0, gl_in[2].in_var_COLOR0, gl_in[3].in_var_COLOR0, gl_in[4].in_var_COLOR0, gl_in[5].in_var_COLOR0, gl_in[6].in_var_COLOR0, gl_in[7].in_var_COLOR0, gl_in[8].in_var_COLOR0, gl_in[9].in_var_COLOR0, gl_in[10].in_var_COLOR0, gl_in[11].in_var_COLOR0 });
+    spvUnsafeArray<spvUnsafeArray<float2, 2>, 12> _132 = spvUnsafeArray<spvUnsafeArray<float2, 2>, 12>({ spvUnsafeArray<float2, 2>({ gl_in[0].in_var_TEXCOORD0_0, gl_in[0].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[1].in_var_TEXCOORD0_0, gl_in[1].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[2].in_var_TEXCOORD0_0, gl_in[2].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[3].in_var_TEXCOORD0_0, gl_in[3].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[4].in_var_TEXCOORD0_0, gl_in[4].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[5].in_var_TEXCOORD0_0, gl_in[5].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[6].in_var_TEXCOORD0_0, gl_in[6].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[7].in_var_TEXCOORD0_0, gl_in[7].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[8].in_var_TEXCOORD0_0, gl_in[8].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[9].in_var_TEXCOORD0_0, gl_in[9].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[10].in_var_TEXCOORD0_0, gl_in[10].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[11].in_var_TEXCOORD0_0, gl_in[11].in_var_TEXCOORD0_1 }) });
+    spvUnsafeArray<float4, 12> _205 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position, gl_in[3].in_var_VS_To_DS_Position, gl_in[4].in_var_VS_To_DS_Position, gl_in[5].in_var_VS_To_DS_Position, gl_in[6].in_var_VS_To_DS_Position, gl_in[7].in_var_VS_To_DS_Position, gl_in[8].in_var_VS_To_DS_Position, gl_in[9].in_var_VS_To_DS_Position, gl_in[10].in_var_VS_To_DS_Position, gl_in[11].in_var_VS_To_DS_Position });
+    spvUnsafeArray<FHitProxyVSToDS, 12> _230 = spvUnsafeArray<FHitProxyVSToDS, 12>({ FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[0], _130[0], _131[0], _132[0] } }, _205[0] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[1], _130[1], _131[1], _132[1] } }, _205[1] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[2], _130[2], _131[2], _132[2] } }, _205[2] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[3], _130[3], _131[3], _132[3] } }, _205[3] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[4], _130[4], _131[4], _132[4] } }, _205[4] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[5], _130[5], _131[5], _132[5] } }, _205[5] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[6], _130[6], _131[6], _132[6] } }, _205[6] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[7], _130[7], _131[7], _132[7] } }, _205[7] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[8], _130[8], _131[8], _132[8] } }, _205[8] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[9], _130[9], _131[9], _132[9] } }, _205[9] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[10], _130[10], _131[10], _132[10] } }, _205[10] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[11], _130[11], _131[11], _132[11] } }, _205[11] } });
+    spvUnsafeArray<FHitProxyVSToDS, 12> param_var_I;
+    param_var_I = _230;
+    float4 _247 = float4(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float3 _251 = Primitive.Primitive_NonUniformScale.xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz);
+    uint _254 = (gl_InvocationID < 2u) ? (gl_InvocationID + 1u) : 0u;
+    uint _255 = 2u * gl_InvocationID;
+    uint _256 = 3u + _255;
+    uint _257 = _255 + 4u;
+    float4 _269 = float4(param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _277 = float4(param_var_I[_256].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _285 = float4(param_var_I[_257].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    spvUnsafeArray<float4, 3> _333 = spvUnsafeArray<float4, 3>({ param_var_I[gl_InvocationID].Position, (((((float4(2.0) * param_var_I[gl_InvocationID].Position) + param_var_I[_254].Position) - (float4(dot(param_var_I[_254].Position - param_var_I[gl_InvocationID].Position, _247)) * _247)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_256].Position) + param_var_I[_257].Position) - (float4(dot(param_var_I[_257].Position - param_var_I[_256].Position, _277)) * _277)) * float4(0.3333333432674407958984375))) * float4(0.5), (((((float4(2.0) * param_var_I[_254].Position) + param_var_I[gl_InvocationID].Position) - (float4(dot(param_var_I[gl_InvocationID].Position - param_var_I[_254].Position, _269)) * _269)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_257].Position) + param_var_I[_256].Position) - (float4(dot(param_var_I[_256].Position - param_var_I[_257].Position, _285)) * _285)) * float4(0.3333333432674407958984375))) * float4(0.5) });
+    gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+    gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+    gl_out[gl_InvocationID].out_var_COLOR0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.Color;
+    gl_out[gl_InvocationID].out_var_TEXCOORD0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TexCoords;
+    gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position;
+    gl_out[gl_InvocationID].out_var_PN_POSITION = _333;
+    gl_out[gl_InvocationID].out_var_PN_DisplacementScales = _251;
+    gl_out[gl_InvocationID].out_var_PN_TessellationMultiplier = 1.0;
+    gl_out[gl_InvocationID].out_var_PN_WorldDisplacementMultiplier = 1.0;
+    temp_var_hullMainRetVal[gl_InvocationID] = FPNTessellationHSToDS{ param_var_I[gl_InvocationID], _333, _251, 1.0, 1.0 };
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
+    if (gl_InvocationID == 0u)
+    {
+        float4 _385 = (((((temp_var_hullMainRetVal[0u].WorldPosition[1] + temp_var_hullMainRetVal[0u].WorldPosition[2]) + temp_var_hullMainRetVal[1u].WorldPosition[1]) + temp_var_hullMainRetVal[1u].WorldPosition[2]) + temp_var_hullMainRetVal[2u].WorldPosition[1]) + temp_var_hullMainRetVal[2u].WorldPosition[2]) * float4(0.16666667163372039794921875);
+        float4 _398;
+        _398.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        _398.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier);
+        _398.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier);
+        _398.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        float4 _524;
+        for (;;)
+        {
+            float4 _424 = View.View_ViewToClip * float4(0.0);
+            float4 _429 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[0u].WorldPosition[0].xyz, 1.0);
+            float3 _430 = _429.xyz;
+            float3 _431 = _424.xyz;
+            float _433 = _429.w;
+            float _434 = _424.w;
+            float4 _451 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[1u].WorldPosition[0].xyz, 1.0);
+            float3 _452 = _451.xyz;
+            float _454 = _451.w;
+            float4 _472 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0);
+            float3 _473 = _472.xyz;
+            float _475 = _472.w;
+            if (any((((int3((_430 - _431) < float3(_433 + _434)) + (int3(2) * int3((_430 + _431) > float3((-_433) - _434)))) | (int3((_452 - _431) < float3(_454 + _434)) + (int3(2) * int3((_452 + _431) > float3((-_454) - _434))))) | (int3((_473 - _431) < float3(_475 + _434)) + (int3(2) * int3((_473 + _431) > float3((-_475) - _434))))) != int3(3)))
+            {
+                _524 = float4(0.0);
+                break;
+            }
+            float3 _493 = temp_var_hullMainRetVal[0u].WorldPosition[0].xyz - temp_var_hullMainRetVal[1u].WorldPosition[0].xyz;
+            float3 _494 = temp_var_hullMainRetVal[1u].WorldPosition[0].xyz - temp_var_hullMainRetVal[2u].WorldPosition[0].xyz;
+            float3 _495 = temp_var_hullMainRetVal[2u].WorldPosition[0].xyz - temp_var_hullMainRetVal[0u].WorldPosition[0].xyz;
+            float3 _498 = (float3(0.5) * (temp_var_hullMainRetVal[0u].WorldPosition[0].xyz + temp_var_hullMainRetVal[1u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float3 _501 = (float3(0.5) * (temp_var_hullMainRetVal[1u].WorldPosition[0].xyz + temp_var_hullMainRetVal[2u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float3 _504 = (float3(0.5) * (temp_var_hullMainRetVal[2u].WorldPosition[0].xyz + temp_var_hullMainRetVal[0u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float _508 = sqrt(dot(_494, _494) / dot(_501, _501));
+            float _512 = sqrt(dot(_495, _495) / dot(_504, _504));
+            float _516 = sqrt(dot(_493, _493) / dot(_498, _498));
+            float4 _517 = float4(_508, _512, _516, 1.0);
+            _517.w = 0.333000004291534423828125 * ((_508 + _512) + _516);
+            _524 = float4(View.View_AdaptiveTessellationFactor) * _517;
+            break;
+        }
+        float4 _526 = fast::clamp(_398 * _524, float4(1.0), float4(15.0));
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_526.x);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_526.y);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_526.z);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_526.w);
+        patchOut.out_var_PN_POSITION9 = _385 + ((_385 - (((temp_var_hullMainRetVal[2u].WorldPosition[0] + temp_var_hullMainRetVal[1u].WorldPosition[0]) + temp_var_hullMainRetVal[0u].WorldPosition[0]) * float4(0.3333333432674407958984375))) * float4(0.5));
+    }
+}
+
diff --git a/reference/opt/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc b/reference/opt/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc
new file mode 100644
index 00000000000..9ae81e40615
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc
@@ -0,0 +1,175 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct FVertexFactoryInterpolantsVSToPS
+{
+    float4 TangentToWorld0;
+    float4 TangentToWorld2;
+};
+
+struct FVertexFactoryInterpolantsVSToDS
+{
+    FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS;
+};
+
+struct FSharedBasePassInterpolants
+{
+};
+struct FBasePassInterpolantsVSToDS
+{
+    FSharedBasePassInterpolants _m0;
+};
+
+struct FBasePassVSToDS
+{
+    FVertexFactoryInterpolantsVSToDS FactoryInterpolants;
+    FBasePassInterpolantsVSToDS BasePassInterpolants;
+    float4 Position;
+};
+
+struct FFlatTessellationHSToDS
+{
+    FBasePassVSToDS PassSpecificData;
+    float3 DisplacementScale;
+    float TessellationMultiplier;
+    float WorldDisplacementMultiplier;
+};
+
+struct type_Primitive
+{
+    float4x4 Primitive_LocalToWorld;
+    float4 Primitive_InvNonUniformScaleAndDeterminantSign;
+    float4 Primitive_ObjectWorldPositionAndRadius;
+    float4x4 Primitive_WorldToLocal;
+    float4x4 Primitive_PreviousLocalToWorld;
+    float4x4 Primitive_PreviousWorldToLocal;
+    packed_float3 Primitive_ActorWorldPosition;
+    float Primitive_UseSingleSampleShadowFromStationaryLights;
+    packed_float3 Primitive_ObjectBounds;
+    float Primitive_LpvBiasMultiplier;
+    float Primitive_DecalReceiverMask;
+    float Primitive_PerObjectGBufferData;
+    float Primitive_UseVolumetricLightmapShadowFromStationaryLights;
+    float Primitive_DrawsVelocity;
+    float4 Primitive_ObjectOrientation;
+    float4 Primitive_NonUniformScale;
+    packed_float3 Primitive_LocalObjectBoundsMin;
+    uint Primitive_LightingChannelMask;
+    packed_float3 Primitive_LocalObjectBoundsMax;
+    uint Primitive_LightmapDataIndex;
+    packed_float3 Primitive_PreSkinnedLocalBounds;
+    int Primitive_SingleCaptureIndex;
+    uint Primitive_OutputVelocity;
+    uint PrePadding_Primitive_420;
+    uint PrePadding_Primitive_424;
+    uint PrePadding_Primitive_428;
+    float4 Primitive_CustomPrimitiveData[4];
+};
+
+struct type_Material
+{
+    float4 Material_VectorExpressions[3];
+    float4 Material_ScalarExpressions[1];
+};
+
+constant float4 _88 = {};
+
+struct main0_out
+{
+    float3 out_var_Flat_DisplacementScales;
+    float out_var_Flat_TessellationMultiplier;
+    float out_var_Flat_WorldDisplacementMultiplier;
+    float4 out_var_TEXCOORD10_centroid;
+    float4 out_var_TEXCOORD11_centroid;
+    float4 out_var_VS_To_DS_Position;
+};
+
+struct main0_in
+{
+    float4 in_var_TEXCOORD10_centroid [[attribute(0)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(1)]];
+    float4 in_var_VS_To_DS_Position [[attribute(2)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], constant type_Primitive& Primitive [[buffer(0)]], constant type_Material& Material [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    threadgroup FFlatTessellationHSToDS temp_var_hullMainRetVal[3];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 3)
+        return;
+    spvUnsafeArray<float4, 3> _90 = spvUnsafeArray<float4, 3>({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 3> _91 = spvUnsafeArray<float4, 3>({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<float4, 3> _104 = spvUnsafeArray<float4, 3>({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position });
+    spvUnsafeArray<FBasePassVSToDS, 3> _111 = spvUnsafeArray<FBasePassVSToDS, 3>({ FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _90[0], _91[0] } }, FBasePassInterpolantsVSToDS{ { } }, _104[0] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _90[1], _91[1] } }, FBasePassInterpolantsVSToDS{ { } }, _104[1] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _90[2], _91[2] } }, FBasePassInterpolantsVSToDS{ { } }, _104[2] } });
+    spvUnsafeArray<FBasePassVSToDS, 3> param_var_I;
+    param_var_I = _111;
+    float3 _128 = Primitive.Primitive_NonUniformScale.xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz);
+    gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+    gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+    gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position;
+    gl_out[gl_InvocationID].out_var_Flat_DisplacementScales = _128;
+    gl_out[gl_InvocationID].out_var_Flat_TessellationMultiplier = Material.Material_ScalarExpressions[0].x;
+    gl_out[gl_InvocationID].out_var_Flat_WorldDisplacementMultiplier = 1.0;
+    temp_var_hullMainRetVal[gl_InvocationID] = FFlatTessellationHSToDS{ param_var_I[gl_InvocationID], _128, Material.Material_ScalarExpressions[0].x, 1.0 };
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
+    if (gl_InvocationID == 0u)
+    {
+        float4 _154;
+        _154.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        _154.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier);
+        _154.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier);
+        _154.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        float4 _173 = fast::clamp(_154, float4(1.0), float4(15.0));
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_173.x);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_173.y);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_173.z);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_173.w);
+    }
+}
+
diff --git a/reference/opt/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese b/reference/opt/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese
new file mode 100644
index 00000000000..bc0d7e051fa
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese
@@ -0,0 +1,418 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_ClipToWorld;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_908;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_924;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_940;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_956;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_972;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_1020;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_1036;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_1052;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1068;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1724;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1740;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1756;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2076;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2148;
+    float PrePadding_View_2152;
+    float PrePadding_View_2156;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2228;
+    float PrePadding_View_2232;
+    float PrePadding_View_2236;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2268;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2412;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    float View_AtmosphericFogSunDiscHalfApexAngleRadian;
+    float PrePadding_View_2492;
+    float4 View_AtmosphericFogSunDiscLuminance;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    uint PrePadding_View_2520;
+    uint PrePadding_View_2524;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2584;
+    float PrePadding_View_2588;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2744;
+    float PrePadding_View_2748;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float PrePadding_View_2908;
+    int2 View_CursorPosition;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    float PrePadding_View_2924;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2940;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2956;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2972;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2988;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_3004;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_ShadowDepthPass
+{
+    float PrePadding_ShadowDepthPass_LPV_0;
+    float PrePadding_ShadowDepthPass_LPV_4;
+    float PrePadding_ShadowDepthPass_LPV_8;
+    float PrePadding_ShadowDepthPass_LPV_12;
+    float PrePadding_ShadowDepthPass_LPV_16;
+    float PrePadding_ShadowDepthPass_LPV_20;
+    float PrePadding_ShadowDepthPass_LPV_24;
+    float PrePadding_ShadowDepthPass_LPV_28;
+    float PrePadding_ShadowDepthPass_LPV_32;
+    float PrePadding_ShadowDepthPass_LPV_36;
+    float PrePadding_ShadowDepthPass_LPV_40;
+    float PrePadding_ShadowDepthPass_LPV_44;
+    float PrePadding_ShadowDepthPass_LPV_48;
+    float PrePadding_ShadowDepthPass_LPV_52;
+    float PrePadding_ShadowDepthPass_LPV_56;
+    float PrePadding_ShadowDepthPass_LPV_60;
+    float PrePadding_ShadowDepthPass_LPV_64;
+    float PrePadding_ShadowDepthPass_LPV_68;
+    float PrePadding_ShadowDepthPass_LPV_72;
+    float PrePadding_ShadowDepthPass_LPV_76;
+    float PrePadding_ShadowDepthPass_LPV_80;
+    float PrePadding_ShadowDepthPass_LPV_84;
+    float PrePadding_ShadowDepthPass_LPV_88;
+    float PrePadding_ShadowDepthPass_LPV_92;
+    float PrePadding_ShadowDepthPass_LPV_96;
+    float PrePadding_ShadowDepthPass_LPV_100;
+    float PrePadding_ShadowDepthPass_LPV_104;
+    float PrePadding_ShadowDepthPass_LPV_108;
+    float PrePadding_ShadowDepthPass_LPV_112;
+    float PrePadding_ShadowDepthPass_LPV_116;
+    float PrePadding_ShadowDepthPass_LPV_120;
+    float PrePadding_ShadowDepthPass_LPV_124;
+    float PrePadding_ShadowDepthPass_LPV_128;
+    float PrePadding_ShadowDepthPass_LPV_132;
+    float PrePadding_ShadowDepthPass_LPV_136;
+    float PrePadding_ShadowDepthPass_LPV_140;
+    float PrePadding_ShadowDepthPass_LPV_144;
+    float PrePadding_ShadowDepthPass_LPV_148;
+    float PrePadding_ShadowDepthPass_LPV_152;
+    float PrePadding_ShadowDepthPass_LPV_156;
+    float PrePadding_ShadowDepthPass_LPV_160;
+    float PrePadding_ShadowDepthPass_LPV_164;
+    float PrePadding_ShadowDepthPass_LPV_168;
+    float PrePadding_ShadowDepthPass_LPV_172;
+    float PrePadding_ShadowDepthPass_LPV_176;
+    float PrePadding_ShadowDepthPass_LPV_180;
+    float PrePadding_ShadowDepthPass_LPV_184;
+    float PrePadding_ShadowDepthPass_LPV_188;
+    float PrePadding_ShadowDepthPass_LPV_192;
+    float PrePadding_ShadowDepthPass_LPV_196;
+    float PrePadding_ShadowDepthPass_LPV_200;
+    float PrePadding_ShadowDepthPass_LPV_204;
+    float PrePadding_ShadowDepthPass_LPV_208;
+    float PrePadding_ShadowDepthPass_LPV_212;
+    float PrePadding_ShadowDepthPass_LPV_216;
+    float PrePadding_ShadowDepthPass_LPV_220;
+    float PrePadding_ShadowDepthPass_LPV_224;
+    float PrePadding_ShadowDepthPass_LPV_228;
+    float PrePadding_ShadowDepthPass_LPV_232;
+    float PrePadding_ShadowDepthPass_LPV_236;
+    float PrePadding_ShadowDepthPass_LPV_240;
+    float PrePadding_ShadowDepthPass_LPV_244;
+    float PrePadding_ShadowDepthPass_LPV_248;
+    float PrePadding_ShadowDepthPass_LPV_252;
+    float PrePadding_ShadowDepthPass_LPV_256;
+    float PrePadding_ShadowDepthPass_LPV_260;
+    float PrePadding_ShadowDepthPass_LPV_264;
+    float PrePadding_ShadowDepthPass_LPV_268;
+    float4x4 ShadowDepthPass_LPV_mRsmToWorld;
+    float4 ShadowDepthPass_LPV_mLightColour;
+    float4 ShadowDepthPass_LPV_GeometryVolumeCaptureLightDirection;
+    float4 ShadowDepthPass_LPV_mEyePos;
+    packed_int3 ShadowDepthPass_LPV_mOldGridOffset;
+    int PrePadding_ShadowDepthPass_LPV_396;
+    packed_int3 ShadowDepthPass_LPV_mLpvGridOffset;
+    float ShadowDepthPass_LPV_ClearMultiplier;
+    float ShadowDepthPass_LPV_LpvScale;
+    float ShadowDepthPass_LPV_OneOverLpvScale;
+    float ShadowDepthPass_LPV_DirectionalOcclusionIntensity;
+    float ShadowDepthPass_LPV_DirectionalOcclusionRadius;
+    float ShadowDepthPass_LPV_RsmAreaIntensityMultiplier;
+    float ShadowDepthPass_LPV_RsmPixelToTexcoordMultiplier;
+    float ShadowDepthPass_LPV_SecondaryOcclusionStrength;
+    float ShadowDepthPass_LPV_SecondaryBounceStrength;
+    float ShadowDepthPass_LPV_VplInjectionBias;
+    float ShadowDepthPass_LPV_GeometryVolumeInjectionBias;
+    float ShadowDepthPass_LPV_EmissiveInjectionMultiplier;
+    int ShadowDepthPass_LPV_PropagationIndex;
+    float4x4 ShadowDepthPass_ProjectionMatrix;
+    float4x4 ShadowDepthPass_ViewMatrix;
+    float4 ShadowDepthPass_ShadowParams;
+    float ShadowDepthPass_bClampToNearPlane;
+    float PrePadding_ShadowDepthPass_612;
+    float PrePadding_ShadowDepthPass_616;
+    float PrePadding_ShadowDepthPass_620;
+    float4x4 ShadowDepthPass_ShadowViewProjectionMatrices[6];
+    float4x4 ShadowDepthPass_ShadowViewMatrices[6];
+};
+
+constant float4 _113 = {};
+
+struct main0_out
+{
+    float4 out_var_TEXCOORD10_centroid [[user(locn0)]];
+    float4 out_var_TEXCOORD11_centroid [[user(locn1)]];
+    float4 out_var_COLOR0 [[user(locn2)]];
+    float4 out_var_TEXCOORD0_0 [[user(locn3)]];
+    uint out_var_PRIMITIVE_ID [[user(locn4)]];
+    float out_var_TEXCOORD6 [[user(locn5)]];
+    float out_var_TEXCOORD8 [[user(locn6)]];
+    float3 out_var_TEXCOORD7 [[user(locn7)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 in_var_COLOR0 [[attribute(0)]];
+    float4 in_var_PN_POSITION_0 [[attribute(2)]];
+    float4 in_var_PN_POSITION_1 [[attribute(3)]];
+    float4 in_var_PN_POSITION_2 [[attribute(4)]];
+    float in_var_PN_WorldDisplacementMultiplier [[attribute(7)]];
+    uint in_var_PRIMITIVE_ID [[attribute(8)]];
+    float4 in_var_TEXCOORD0_0 [[attribute(9)]];
+    float4 in_var_TEXCOORD10_centroid [[attribute(10)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(11)]];
+};
+
+struct main0_patchIn
+{
+    float4 in_var_PN_POSITION9 [[attribute(5)]];
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_View& View [[buffer(0)]], constant type_ShadowDepthPass& ShadowDepthPass [[buffer(1)]], texture2d<float> Material_Texture2D_3 [[texture(0)]], sampler Material_Texture2D_3Sampler [[sampler(0)]], float3 gl_TessCoord [[position_in_patch]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 1> out_var_TEXCOORD0 = {};
+    spvUnsafeArray<float4, 3> _117 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 3> _118 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<float4, 3> _119 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_COLOR0, patchIn.gl_in[1].in_var_COLOR0, patchIn.gl_in[2].in_var_COLOR0 });
+    spvUnsafeArray<spvUnsafeArray<float4, 1>, 3> _120 = spvUnsafeArray<spvUnsafeArray<float4, 1>, 3>({ spvUnsafeArray<float4, 1>({ patchIn.gl_in[0].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ patchIn.gl_in[1].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ patchIn.gl_in[2].in_var_TEXCOORD0_0 }) });
+    spvUnsafeArray<spvUnsafeArray<float4, 3>, 3> _135 = spvUnsafeArray<spvUnsafeArray<float4, 3>, 3>({ spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_PN_POSITION_0, patchIn.gl_in[0].in_var_PN_POSITION_1, patchIn.gl_in[0].in_var_PN_POSITION_2 }), spvUnsafeArray<float4, 3>({ patchIn.gl_in[1].in_var_PN_POSITION_0, patchIn.gl_in[1].in_var_PN_POSITION_1, patchIn.gl_in[1].in_var_PN_POSITION_2 }), spvUnsafeArray<float4, 3>({ patchIn.gl_in[2].in_var_PN_POSITION_0, patchIn.gl_in[2].in_var_PN_POSITION_1, patchIn.gl_in[2].in_var_PN_POSITION_2 }) });
+    spvUnsafeArray<float, 3> _136 = spvUnsafeArray<float, 3>({ patchIn.gl_in[0].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[1].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[2].in_var_PN_WorldDisplacementMultiplier });
+    float _157 = gl_TessCoord.x * gl_TessCoord.x;
+    float _158 = gl_TessCoord.y * gl_TessCoord.y;
+    float _159 = gl_TessCoord.z * gl_TessCoord.z;
+    float4 _165 = float4(gl_TessCoord.x);
+    float4 _169 = float4(gl_TessCoord.y);
+    float4 _174 = float4(gl_TessCoord.z);
+    float4 _177 = float4(_157 * 3.0);
+    float4 _181 = float4(_158 * 3.0);
+    float4 _188 = float4(_159 * 3.0);
+    float4 _202 = fma(((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _174) * _165, _169, fma(_135[2][2] * _177, _174, fma(_135[2][1] * _188, _165, fma(_135[1][2] * _188, _169, fma(_135[1][1] * _181, _174, fma(_135[0][2] * _181, _165, fma(_135[0][1] * _177, _169, fma(_135[2][0] * float4(_159), _174, fma(_135[0][0] * float4(_157), _165, (_135[1][0] * float4(_158)) * _169)))))))));
+    float3 _226 = fma(_117[2].xyz, float3(gl_TessCoord.z), fma(_117[0].xyz, float3(gl_TessCoord.x), _117[1].xyz * float3(gl_TessCoord.y)).xyz);
+    float4 _229 = fma(_118[2], _174, fma(_118[0], _165, _118[1] * _169));
+    float4 _231 = fma(_119[2], _174, fma(_119[0], _165, _119[1] * _169));
+    float4 _233 = fma(_120[2][0], _174, fma(_120[0][0], _165, _120[1][0] * _169));
+    spvUnsafeArray<float4, 1> _234 = spvUnsafeArray<float4, 1>({ _233 });
+    float3 _236 = _229.xyz;
+    float3 _264 = fma((float3((Material_Texture2D_3.sample(Material_Texture2D_3Sampler, fma(_233.zw, float2(1.0, 2.0), float2(View.View_GameTime * 0.20000000298023223876953125, View.View_GameTime * (-0.699999988079071044921875))), level(-1.0)).x * 10.0) * (1.0 - _231.x)) * _236) * float3(0.5), float3(fma(_136[2], gl_TessCoord.z, fma(_136[0], gl_TessCoord.x, _136[1] * gl_TessCoord.y))), _202.xyz);
+    float4 _270 = ShadowDepthPass.ShadowDepthPass_ProjectionMatrix * float4(_264.x, _264.y, _264.z, _202.w);
+    float4 _281;
+    if ((ShadowDepthPass.ShadowDepthPass_bClampToNearPlane > 0.0) && (_270.z < 0.0))
+    {
+        float4 _279 = _270;
+        _279.z = 9.9999999747524270787835121154785e-07;
+        _279.w = 1.0;
+        _281 = _279;
+    }
+    else
+    {
+        _281 = _270;
+    }
+    float _290 = abs(dot(float3(ShadowDepthPass.ShadowDepthPass_ViewMatrix[0].z, ShadowDepthPass.ShadowDepthPass_ViewMatrix[1].z, ShadowDepthPass.ShadowDepthPass_ViewMatrix[2].z), _236));
+    out.out_var_TEXCOORD10_centroid = float4(_226.x, _226.y, _226.z, _113.w);
+    out.out_var_TEXCOORD11_centroid = _229;
+    out.out_var_COLOR0 = _231;
+    out_var_TEXCOORD0 = _234;
+    out.out_var_PRIMITIVE_ID = patchIn.gl_in[0u].in_var_PRIMITIVE_ID;
+    out.out_var_TEXCOORD6 = _281.z;
+    out.out_var_TEXCOORD8 = fma(ShadowDepthPass.ShadowDepthPass_ShadowParams.y, fast::clamp((abs(_290) > 0.0) ? (sqrt(fast::clamp(fma(-_290, _290, 1.0), 0.0, 1.0)) / _290) : ShadowDepthPass.ShadowDepthPass_ShadowParams.z, 0.0, ShadowDepthPass.ShadowDepthPass_ShadowParams.z), ShadowDepthPass.ShadowDepthPass_ShadowParams.x);
+    out.out_var_TEXCOORD7 = _264.xyz;
+    out.gl_Position = _281;
+    out.out_var_TEXCOORD0_0 = out_var_TEXCOORD0[0];
+    return out;
+}
+
diff --git a/reference/opt/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese b/reference/opt/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese
new file mode 100644
index 00000000000..987ba54eada
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese
@@ -0,0 +1,416 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_ClipToWorld;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_908;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_924;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_940;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_956;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_972;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_1020;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_1036;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_1052;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1068;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1724;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1740;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1756;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2076;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2148;
+    float PrePadding_View_2152;
+    float PrePadding_View_2156;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2228;
+    float PrePadding_View_2232;
+    float PrePadding_View_2236;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2268;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2412;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    float View_AtmosphericFogSunDiscHalfApexAngleRadian;
+    float PrePadding_View_2492;
+    float4 View_AtmosphericFogSunDiscLuminance;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    uint PrePadding_View_2520;
+    uint PrePadding_View_2524;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2584;
+    float PrePadding_View_2588;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2744;
+    float PrePadding_View_2748;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float PrePadding_View_2908;
+    int2 View_CursorPosition;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    float PrePadding_View_2924;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2940;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2956;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2972;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2988;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_3004;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+    float PrePadding_View_3048;
+    float PrePadding_View_3052;
+    float4x4 View_WorldToVirtualTexture;
+    float4 View_VirtualTextureParams;
+    float4 View_XRPassthroughCameraUVs[2];
+};
+
+struct type_Material
+{
+    float4 Material_VectorExpressions[5];
+    float4 Material_ScalarExpressions[2];
+};
+
+constant float4 _118 = {};
+
+struct main0_out
+{
+    float4 out_var_TEXCOORD6 [[user(locn0)]];
+    float4 out_var_TEXCOORD7 [[user(locn1)]];
+    float4 out_var_TEXCOORD10_centroid [[user(locn2)]];
+    float4 out_var_TEXCOORD11_centroid [[user(locn3)]];
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [1];
+    float gl_ClipDistance_0 [[user(clip0)]];
+};
+
+struct main0_in
+{
+    float4 in_var_PN_DominantEdge2 [[attribute(3)]];
+    float4 in_var_PN_DominantEdge3 [[attribute(4)]];
+    float3 in_var_PN_DominantEdge4 [[attribute(5)]];
+    float3 in_var_PN_DominantEdge5 [[attribute(6)]];
+    float4 in_var_PN_DominantVertex1 [[attribute(8)]];
+    float3 in_var_PN_DominantVertex2 [[attribute(9)]];
+    float4 in_var_PN_POSITION_0 [[attribute(10)]];
+    float4 in_var_PN_POSITION_1 [[attribute(11)]];
+    float4 in_var_PN_POSITION_2 [[attribute(12)]];
+    float in_var_PN_WorldDisplacementMultiplier [[attribute(15)]];
+    float4 in_var_TEXCOORD10_centroid [[attribute(16)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(17)]];
+    float4 in_var_TEXCOORD6 [[attribute(18)]];
+    float4 in_var_TEXCOORD8 [[attribute(19)]];
+};
+
+struct main0_patchIn
+{
+    float4 in_var_PN_POSITION9 [[attribute(13)]];
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Material& Material [[buffer(1)]], texture3d<float> View_GlobalDistanceFieldTexture0 [[texture(0)]], texture3d<float> View_GlobalDistanceFieldTexture1 [[texture(1)]], texture3d<float> View_GlobalDistanceFieldTexture2 [[texture(2)]], texture3d<float> View_GlobalDistanceFieldTexture3 [[texture(3)]], sampler View_GlobalDistanceFieldSampler0 [[sampler(0)]], float3 gl_TessCoord [[position_in_patch]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 3> _120 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD6, patchIn.gl_in[1].in_var_TEXCOORD6, patchIn.gl_in[2].in_var_TEXCOORD6 });
+    spvUnsafeArray<float4, 3> _121 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD8, patchIn.gl_in[1].in_var_TEXCOORD8, patchIn.gl_in[2].in_var_TEXCOORD8 });
+    spvUnsafeArray<float4, 3> _128 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 3> _129 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<spvUnsafeArray<float4, 3>, 3> _136 = spvUnsafeArray<spvUnsafeArray<float4, 3>, 3>({ spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_PN_POSITION_0, patchIn.gl_in[0].in_var_PN_POSITION_1, patchIn.gl_in[0].in_var_PN_POSITION_2 }), spvUnsafeArray<float4, 3>({ patchIn.gl_in[1].in_var_PN_POSITION_0, patchIn.gl_in[1].in_var_PN_POSITION_1, patchIn.gl_in[1].in_var_PN_POSITION_2 }), spvUnsafeArray<float4, 3>({ patchIn.gl_in[2].in_var_PN_POSITION_0, patchIn.gl_in[2].in_var_PN_POSITION_1, patchIn.gl_in[2].in_var_PN_POSITION_2 }) });
+    spvUnsafeArray<float, 3> _137 = spvUnsafeArray<float, 3>({ patchIn.gl_in[0].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[1].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[2].in_var_PN_WorldDisplacementMultiplier });
+    spvUnsafeArray<float4, 3> _138 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_PN_DominantVertex1, patchIn.gl_in[1].in_var_PN_DominantVertex1, patchIn.gl_in[2].in_var_PN_DominantVertex1 });
+    spvUnsafeArray<float3, 3> _139 = spvUnsafeArray<float3, 3>({ patchIn.gl_in[0].in_var_PN_DominantVertex2, patchIn.gl_in[1].in_var_PN_DominantVertex2, patchIn.gl_in[2].in_var_PN_DominantVertex2 });
+    spvUnsafeArray<float4, 3> _146 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_PN_DominantEdge2, patchIn.gl_in[1].in_var_PN_DominantEdge2, patchIn.gl_in[2].in_var_PN_DominantEdge2 });
+    spvUnsafeArray<float4, 3> _147 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_PN_DominantEdge3, patchIn.gl_in[1].in_var_PN_DominantEdge3, patchIn.gl_in[2].in_var_PN_DominantEdge3 });
+    spvUnsafeArray<float3, 3> _148 = spvUnsafeArray<float3, 3>({ patchIn.gl_in[0].in_var_PN_DominantEdge4, patchIn.gl_in[1].in_var_PN_DominantEdge4, patchIn.gl_in[2].in_var_PN_DominantEdge4 });
+    spvUnsafeArray<float3, 3> _149 = spvUnsafeArray<float3, 3>({ patchIn.gl_in[0].in_var_PN_DominantEdge5, patchIn.gl_in[1].in_var_PN_DominantEdge5, patchIn.gl_in[2].in_var_PN_DominantEdge5 });
+    float _190 = gl_TessCoord.x * gl_TessCoord.x;
+    float _191 = gl_TessCoord.y * gl_TessCoord.y;
+    float _192 = gl_TessCoord.z * gl_TessCoord.z;
+    float4 _198 = float4(gl_TessCoord.x);
+    float4 _202 = float4(gl_TessCoord.y);
+    float4 _207 = float4(gl_TessCoord.z);
+    float4 _210 = float4(_190 * 3.0);
+    float4 _214 = float4(_191 * 3.0);
+    float4 _221 = float4(_192 * 3.0);
+    float4 _235 = fma(((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _207) * _198, _202, fma(_136[2][2] * _210, _207, fma(_136[2][1] * _221, _198, fma(_136[1][2] * _221, _202, fma(_136[1][1] * _214, _207, fma(_136[0][2] * _214, _198, fma(_136[0][1] * _210, _202, fma(_136[2][0] * float4(_192), _207, fma(_136[0][0] * float4(_190), _198, (_136[1][0] * float4(_191)) * _202)))))))));
+    float3 _237 = float3(gl_TessCoord.x);
+    float3 _240 = float3(gl_TessCoord.y);
+    float3 _254 = float3(gl_TessCoord.z);
+    float3 _256 = fma(_128[2].xyz, _254, fma(_128[0].xyz, _237, _128[1].xyz * _240).xyz);
+    float4 _259 = fma(_129[2], _207, fma(_129[0], _198, _129[1] * _202));
+    float3 _264 = _235.xyz;
+    float3 _265 = _256.xyz;
+    float3 _266 = _259.xyz;
+    float3 _272 = _264 + float3(View.View_WorldCameraOrigin);
+    float _279 = float(int(gl_TessCoord.x == 0.0));
+    float _282 = float(int(gl_TessCoord.y == 0.0));
+    float _285 = float(int(gl_TessCoord.z == 0.0));
+    float _286 = _279 + _282;
+    float _287 = _286 + _285;
+    float4 _387;
+    float3 _388;
+    if (float(int(_287 == 2.0)) == 1.0)
+    {
+        float _363 = float(int((_282 + _285) == 2.0));
+        float _367 = float(int((_285 + _279) == 2.0));
+        float _370 = float(int(_286 == 2.0));
+        _387 = fma(float4(_370), _138[2], fma(float4(_363), _138[0], float4(_367) * _138[1]));
+        _388 = fma(float3(_370), _139[2], fma(float3(_363), _139[0], float3(_367) * _139[1]));
+    }
+    else
+    {
+        float4 _358;
+        float3 _359;
+        if (float(int(_287 == 1.0)) != 0.0)
+        {
+            float4 _304 = float4(_279);
+            float4 _306 = float4(_282);
+            float4 _309 = float4(_285);
+            float4 _311 = fma(_309, _146[2], fma(_304, _146[0], _306 * _146[1]));
+            float4 _316 = fma(_309, _147[2], fma(_304, _147[0], _306 * _147[1]));
+            float3 _331 = float3(_279);
+            float3 _333 = float3(_282);
+            float3 _336 = float3(_285);
+            float3 _338 = fma(_336, _148[2], fma(_331, _148[0], _333 * _148[1]));
+            float3 _343 = fma(_336, _149[2], fma(_331, _149[0], _333 * _149[1]));
+            _358 = fma(_309, fma(_198, _311, _202 * _316), fma(_304, fma(_202, _311, _207 * _316), _306 * fma(_207, _311, _198 * _316)));
+            _359 = fma(_336, fma(_237, _338, _240 * _343), fma(_331, fma(_240, _338, _254 * _343), _333 * fma(_254, _338, _237 * _343)));
+        }
+        else
+        {
+            _358 = float4(_259.xyz, 0.0);
+            _359 = _265;
+        }
+        _387 = _358;
+        _388 = _359;
+    }
+    float3x3 _398;
+    if (float(int(_287 == 0.0)) == 0.0)
+    {
+        _398 = float3x3(_388, cross(_387.xyz, _388) * float3(_387.w), _387.xyz);
+    }
+    else
+    {
+        _398 = float3x3(_265, cross(_266, _265) * float3(_259.w), _266);
+    }
+    float3 _411 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[0].xyz) + View.View_GlobalVolumeCenterAndExtent[0].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[0].xyz + View.View_GlobalVolumeCenterAndExtent[0].www) - _272, float3(0.0)));
+    float _547;
+    if (fast::min(_411.x, fast::min(_411.y, _411.z)) > (View.View_GlobalVolumeCenterAndExtent[0].w * View.View_GlobalVolumeTexelSize))
+    {
+        _547 = View_GlobalDistanceFieldTexture0.sample(View_GlobalDistanceFieldSampler0, fma(_272, View.View_GlobalVolumeWorldToUVAddAndMul[0u].www, View.View_GlobalVolumeWorldToUVAddAndMul[0u].xyz), level(0.0)).x;
+    }
+    else
+    {
+        float3 _436 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[1].xyz) + View.View_GlobalVolumeCenterAndExtent[1].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[1].xyz + View.View_GlobalVolumeCenterAndExtent[1].www) - _272, float3(0.0)));
+        float _535;
+        if (fast::min(_436.x, fast::min(_436.y, _436.z)) > (View.View_GlobalVolumeCenterAndExtent[1].w * View.View_GlobalVolumeTexelSize))
+        {
+            _535 = View_GlobalDistanceFieldTexture1.sample(View_GlobalDistanceFieldSampler0, fma(_272, View.View_GlobalVolumeWorldToUVAddAndMul[1u].www, View.View_GlobalVolumeWorldToUVAddAndMul[1u].xyz), level(0.0)).x;
+        }
+        else
+        {
+            float3 _459 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[2].xyz) + View.View_GlobalVolumeCenterAndExtent[2].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[2].xyz + View.View_GlobalVolumeCenterAndExtent[2].www) - _272, float3(0.0)));
+            float3 _475 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[3].xyz) + View.View_GlobalVolumeCenterAndExtent[3].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[3].xyz + View.View_GlobalVolumeCenterAndExtent[3].www) - _272, float3(0.0)));
+            float _480 = fast::min(_475.x, fast::min(_475.y, _475.z));
+            float _523;
+            if (fast::min(_459.x, fast::min(_459.y, _459.z)) > (View.View_GlobalVolumeCenterAndExtent[2].w * View.View_GlobalVolumeTexelSize))
+            {
+                _523 = View_GlobalDistanceFieldTexture2.sample(View_GlobalDistanceFieldSampler0, fma(_272, View.View_GlobalVolumeWorldToUVAddAndMul[2u].www, View.View_GlobalVolumeWorldToUVAddAndMul[2u].xyz), level(0.0)).x;
+            }
+            else
+            {
+                float _511;
+                if (_480 > (View.View_GlobalVolumeCenterAndExtent[3].w * View.View_GlobalVolumeTexelSize))
+                {
+                    _511 = mix(View.View_MaxGlobalDistance, View_GlobalDistanceFieldTexture3.sample(View_GlobalDistanceFieldSampler0, fma(_272, View.View_GlobalVolumeWorldToUVAddAndMul[3u].www, View.View_GlobalVolumeWorldToUVAddAndMul[3u].xyz), level(0.0)).x, fast::clamp((_480 * 10.0) * View.View_GlobalVolumeWorldToUVAddAndMul[3].w, 0.0, 1.0));
+                }
+                else
+                {
+                    _511 = View.View_MaxGlobalDistance;
+                }
+                _523 = _511;
+            }
+            _535 = _523;
+        }
+        _547 = _535;
+    }
+    float3 _565 = fma(_398[2] * float3(fast::min(_547 + Material.Material_ScalarExpressions[0].z, 0.0) * Material.Material_ScalarExpressions[0].w), float3(fma(_137[2], gl_TessCoord.z, fma(_137[0], gl_TessCoord.x, _137[1] * gl_TessCoord.y))), _264);
+    float4 _574 = View.View_TranslatedWorldToClip * float4(_565.x, _565.y, _565.z, _235.w);
+    _574.z = fma(0.001000000047497451305389404296875, _574.w, _574.z);
+    out.gl_Position = _574;
+    out.out_var_TEXCOORD6 = fma(_120[2], _207, fma(_120[0], _198, _120[1] * _202));
+    out.out_var_TEXCOORD7 = fma(_121[2], _207, fma(_121[0], _198, _121[1] * _202));
+    out.out_var_TEXCOORD10_centroid = float4(_256.x, _256.y, _256.z, _118.w);
+    out.out_var_TEXCOORD11_centroid = _259;
+    out.gl_ClipDistance[0u] = dot(View.View_GlobalClippingPlane, float4(_565.xyz - float3(View.View_PreViewTranslation), 1.0));
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    return out;
+}
+
diff --git a/reference/opt/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese b/reference/opt/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese
new file mode 100644
index 00000000000..e0efbbfec2e
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese
@@ -0,0 +1,215 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_ShadowDepthPass
+{
+    float PrePadding_ShadowDepthPass_LPV_0;
+    float PrePadding_ShadowDepthPass_LPV_4;
+    float PrePadding_ShadowDepthPass_LPV_8;
+    float PrePadding_ShadowDepthPass_LPV_12;
+    float PrePadding_ShadowDepthPass_LPV_16;
+    float PrePadding_ShadowDepthPass_LPV_20;
+    float PrePadding_ShadowDepthPass_LPV_24;
+    float PrePadding_ShadowDepthPass_LPV_28;
+    float PrePadding_ShadowDepthPass_LPV_32;
+    float PrePadding_ShadowDepthPass_LPV_36;
+    float PrePadding_ShadowDepthPass_LPV_40;
+    float PrePadding_ShadowDepthPass_LPV_44;
+    float PrePadding_ShadowDepthPass_LPV_48;
+    float PrePadding_ShadowDepthPass_LPV_52;
+    float PrePadding_ShadowDepthPass_LPV_56;
+    float PrePadding_ShadowDepthPass_LPV_60;
+    float PrePadding_ShadowDepthPass_LPV_64;
+    float PrePadding_ShadowDepthPass_LPV_68;
+    float PrePadding_ShadowDepthPass_LPV_72;
+    float PrePadding_ShadowDepthPass_LPV_76;
+    float PrePadding_ShadowDepthPass_LPV_80;
+    float PrePadding_ShadowDepthPass_LPV_84;
+    float PrePadding_ShadowDepthPass_LPV_88;
+    float PrePadding_ShadowDepthPass_LPV_92;
+    float PrePadding_ShadowDepthPass_LPV_96;
+    float PrePadding_ShadowDepthPass_LPV_100;
+    float PrePadding_ShadowDepthPass_LPV_104;
+    float PrePadding_ShadowDepthPass_LPV_108;
+    float PrePadding_ShadowDepthPass_LPV_112;
+    float PrePadding_ShadowDepthPass_LPV_116;
+    float PrePadding_ShadowDepthPass_LPV_120;
+    float PrePadding_ShadowDepthPass_LPV_124;
+    float PrePadding_ShadowDepthPass_LPV_128;
+    float PrePadding_ShadowDepthPass_LPV_132;
+    float PrePadding_ShadowDepthPass_LPV_136;
+    float PrePadding_ShadowDepthPass_LPV_140;
+    float PrePadding_ShadowDepthPass_LPV_144;
+    float PrePadding_ShadowDepthPass_LPV_148;
+    float PrePadding_ShadowDepthPass_LPV_152;
+    float PrePadding_ShadowDepthPass_LPV_156;
+    float PrePadding_ShadowDepthPass_LPV_160;
+    float PrePadding_ShadowDepthPass_LPV_164;
+    float PrePadding_ShadowDepthPass_LPV_168;
+    float PrePadding_ShadowDepthPass_LPV_172;
+    float PrePadding_ShadowDepthPass_LPV_176;
+    float PrePadding_ShadowDepthPass_LPV_180;
+    float PrePadding_ShadowDepthPass_LPV_184;
+    float PrePadding_ShadowDepthPass_LPV_188;
+    float PrePadding_ShadowDepthPass_LPV_192;
+    float PrePadding_ShadowDepthPass_LPV_196;
+    float PrePadding_ShadowDepthPass_LPV_200;
+    float PrePadding_ShadowDepthPass_LPV_204;
+    float PrePadding_ShadowDepthPass_LPV_208;
+    float PrePadding_ShadowDepthPass_LPV_212;
+    float PrePadding_ShadowDepthPass_LPV_216;
+    float PrePadding_ShadowDepthPass_LPV_220;
+    float PrePadding_ShadowDepthPass_LPV_224;
+    float PrePadding_ShadowDepthPass_LPV_228;
+    float PrePadding_ShadowDepthPass_LPV_232;
+    float PrePadding_ShadowDepthPass_LPV_236;
+    float PrePadding_ShadowDepthPass_LPV_240;
+    float PrePadding_ShadowDepthPass_LPV_244;
+    float PrePadding_ShadowDepthPass_LPV_248;
+    float PrePadding_ShadowDepthPass_LPV_252;
+    float PrePadding_ShadowDepthPass_LPV_256;
+    float PrePadding_ShadowDepthPass_LPV_260;
+    float PrePadding_ShadowDepthPass_LPV_264;
+    float PrePadding_ShadowDepthPass_LPV_268;
+    float4x4 ShadowDepthPass_LPV_mRsmToWorld;
+    float4 ShadowDepthPass_LPV_mLightColour;
+    float4 ShadowDepthPass_LPV_GeometryVolumeCaptureLightDirection;
+    float4 ShadowDepthPass_LPV_mEyePos;
+    packed_int3 ShadowDepthPass_LPV_mOldGridOffset;
+    int PrePadding_ShadowDepthPass_LPV_396;
+    packed_int3 ShadowDepthPass_LPV_mLpvGridOffset;
+    float ShadowDepthPass_LPV_ClearMultiplier;
+    float ShadowDepthPass_LPV_LpvScale;
+    float ShadowDepthPass_LPV_OneOverLpvScale;
+    float ShadowDepthPass_LPV_DirectionalOcclusionIntensity;
+    float ShadowDepthPass_LPV_DirectionalOcclusionRadius;
+    float ShadowDepthPass_LPV_RsmAreaIntensityMultiplier;
+    float ShadowDepthPass_LPV_RsmPixelToTexcoordMultiplier;
+    float ShadowDepthPass_LPV_SecondaryOcclusionStrength;
+    float ShadowDepthPass_LPV_SecondaryBounceStrength;
+    float ShadowDepthPass_LPV_VplInjectionBias;
+    float ShadowDepthPass_LPV_GeometryVolumeInjectionBias;
+    float ShadowDepthPass_LPV_EmissiveInjectionMultiplier;
+    int ShadowDepthPass_LPV_PropagationIndex;
+    float4x4 ShadowDepthPass_ProjectionMatrix;
+    float4x4 ShadowDepthPass_ViewMatrix;
+    float4 ShadowDepthPass_ShadowParams;
+    float ShadowDepthPass_bClampToNearPlane;
+    float PrePadding_ShadowDepthPass_612;
+    float PrePadding_ShadowDepthPass_616;
+    float PrePadding_ShadowDepthPass_620;
+    float4x4 ShadowDepthPass_ShadowViewProjectionMatrices[6];
+    float4x4 ShadowDepthPass_ShadowViewMatrices[6];
+};
+
+constant float4 _90 = {};
+
+struct main0_out
+{
+    float4 out_var_TEXCOORD10_centroid [[user(locn0)]];
+    float4 out_var_TEXCOORD11_centroid [[user(locn1)]];
+    float out_var_TEXCOORD6 [[user(locn2)]];
+    float3 out_var_TEXCOORD7 [[user(locn3)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 in_var_PN_POSITION_0 [[attribute(10)]];
+    float4 in_var_PN_POSITION_1 [[attribute(11)]];
+    float4 in_var_PN_POSITION_2 [[attribute(12)]];
+    float4 in_var_TEXCOORD10_centroid [[attribute(16)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(17)]];
+};
+
+struct main0_patchIn
+{
+    float4 in_var_PN_POSITION9 [[attribute(13)]];
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_ShadowDepthPass& ShadowDepthPass [[buffer(0)]], float3 gl_TessCoord [[position_in_patch]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 3> _93 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 3> _94 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<spvUnsafeArray<float4, 3>, 3> _101 = spvUnsafeArray<spvUnsafeArray<float4, 3>, 3>({ spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_PN_POSITION_0, patchIn.gl_in[0].in_var_PN_POSITION_1, patchIn.gl_in[0].in_var_PN_POSITION_2 }), spvUnsafeArray<float4, 3>({ patchIn.gl_in[1].in_var_PN_POSITION_0, patchIn.gl_in[1].in_var_PN_POSITION_1, patchIn.gl_in[1].in_var_PN_POSITION_2 }), spvUnsafeArray<float4, 3>({ patchIn.gl_in[2].in_var_PN_POSITION_0, patchIn.gl_in[2].in_var_PN_POSITION_1, patchIn.gl_in[2].in_var_PN_POSITION_2 }) });
+    float _119 = gl_TessCoord.x * gl_TessCoord.x;
+    float _120 = gl_TessCoord.y * gl_TessCoord.y;
+    float _121 = gl_TessCoord.z * gl_TessCoord.z;
+    float4 _127 = float4(gl_TessCoord.x);
+    float4 _131 = float4(gl_TessCoord.y);
+    float4 _136 = float4(gl_TessCoord.z);
+    float4 _139 = float4(_119 * 3.0);
+    float4 _143 = float4(_120 * 3.0);
+    float4 _150 = float4(_121 * 3.0);
+    float4 _164 = fma(((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _136) * _127, _131, fma(_101[2][2] * _139, _136, fma(_101[2][1] * _150, _127, fma(_101[1][2] * _150, _131, fma(_101[1][1] * _143, _136, fma(_101[0][2] * _143, _127, fma(_101[0][1] * _139, _131, fma(_101[2][0] * float4(_121), _136, fma(_101[0][0] * float4(_119), _127, (_101[1][0] * float4(_120)) * _131)))))))));
+    float3 _179 = fma(_93[2].xyz, float3(gl_TessCoord.z), fma(_93[0].xyz, float3(gl_TessCoord.x), _93[1].xyz * float3(gl_TessCoord.y)).xyz);
+    float4 _182 = fma(_94[2], _136, fma(_94[0], _127, _94[1] * _131));
+    float4 _189 = ShadowDepthPass.ShadowDepthPass_ProjectionMatrix * float4(_164.x, _164.y, _164.z, _164.w);
+    float4 _200;
+    if ((ShadowDepthPass.ShadowDepthPass_bClampToNearPlane > 0.0) && (_189.z < 0.0))
+    {
+        float4 _198 = _189;
+        _198.z = 9.9999999747524270787835121154785e-07;
+        _198.w = 1.0;
+        _200 = _198;
+    }
+    else
+    {
+        _200 = _189;
+    }
+    float _209 = abs(dot(float3(ShadowDepthPass.ShadowDepthPass_ViewMatrix[0].z, ShadowDepthPass.ShadowDepthPass_ViewMatrix[1].z, ShadowDepthPass.ShadowDepthPass_ViewMatrix[2].z), _182.xyz));
+    float4 _234 = _200;
+    _234.z = fma(_200.z, ShadowDepthPass.ShadowDepthPass_ShadowParams.w, fma(ShadowDepthPass.ShadowDepthPass_ShadowParams.y, fast::clamp((abs(_209) > 0.0) ? (sqrt(fast::clamp(fma(-_209, _209, 1.0), 0.0, 1.0)) / _209) : ShadowDepthPass.ShadowDepthPass_ShadowParams.z, 0.0, ShadowDepthPass.ShadowDepthPass_ShadowParams.z), ShadowDepthPass.ShadowDepthPass_ShadowParams.x)) * _200.w;
+    out.out_var_TEXCOORD10_centroid = float4(_179.x, _179.y, _179.z, _90.w);
+    out.out_var_TEXCOORD11_centroid = _182;
+    out.out_var_TEXCOORD6 = 0.0;
+    out.out_var_TEXCOORD7 = _164.xyz;
+    out.gl_Position = _234;
+    return out;
+}
+
diff --git a/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert b/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert
new file mode 100644
index 00000000000..1f47ec47e3c
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/vert/array-missing-copies.asm.vert
@@ -0,0 +1,457 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_MobileBasePass
+{
+    float4 MobileBasePass_Fog_ExponentialFogParameters;
+    float4 MobileBasePass_Fog_ExponentialFogParameters2;
+    float4 MobileBasePass_Fog_ExponentialFogColorParameter;
+    float4 MobileBasePass_Fog_ExponentialFogParameters3;
+    float4 MobileBasePass_Fog_InscatteringLightDirection;
+    float4 MobileBasePass_Fog_DirectionalInscatteringColor;
+    float2 MobileBasePass_Fog_SinCosInscatteringColorCubemapRotation;
+    float PrePadding_MobileBasePass_Fog_104;
+    float PrePadding_MobileBasePass_Fog_108;
+    packed_float3 MobileBasePass_Fog_FogInscatteringTextureParameters;
+    float MobileBasePass_Fog_ApplyVolumetricFog;
+    float PrePadding_MobileBasePass_PlanarReflection_128;
+    float PrePadding_MobileBasePass_PlanarReflection_132;
+    float PrePadding_MobileBasePass_PlanarReflection_136;
+    float PrePadding_MobileBasePass_PlanarReflection_140;
+    float PrePadding_MobileBasePass_PlanarReflection_144;
+    float PrePadding_MobileBasePass_PlanarReflection_148;
+    float PrePadding_MobileBasePass_PlanarReflection_152;
+    float PrePadding_MobileBasePass_PlanarReflection_156;
+    float4 MobileBasePass_PlanarReflection_ReflectionPlane;
+    float4 MobileBasePass_PlanarReflection_PlanarReflectionOrigin;
+    float4 MobileBasePass_PlanarReflection_PlanarReflectionXAxis;
+    float4 MobileBasePass_PlanarReflection_PlanarReflectionYAxis;
+    float3x4 MobileBasePass_PlanarReflection_InverseTransposeMirrorMatrix;
+    packed_float3 MobileBasePass_PlanarReflection_PlanarReflectionParameters;
+    float PrePadding_MobileBasePass_PlanarReflection_284;
+    float2 MobileBasePass_PlanarReflection_PlanarReflectionParameters2;
+    float PrePadding_MobileBasePass_PlanarReflection_296;
+    float PrePadding_MobileBasePass_PlanarReflection_300;
+    float4x4 MobileBasePass_PlanarReflection_ProjectionWithExtraFOV[2];
+    float4 MobileBasePass_PlanarReflection_PlanarReflectionScreenScaleBias[2];
+    float2 MobileBasePass_PlanarReflection_PlanarReflectionScreenBound;
+    uint MobileBasePass_PlanarReflection_bIsStereo;
+};
+
+struct type_Primitive
+{
+    float4x4 Primitive_LocalToWorld;
+    float4 Primitive_InvNonUniformScaleAndDeterminantSign;
+    float4 Primitive_ObjectWorldPositionAndRadius;
+    float4x4 Primitive_WorldToLocal;
+    float4x4 Primitive_PreviousLocalToWorld;
+    float4x4 Primitive_PreviousWorldToLocal;
+    packed_float3 Primitive_ActorWorldPosition;
+    float Primitive_UseSingleSampleShadowFromStationaryLights;
+    packed_float3 Primitive_ObjectBounds;
+    float Primitive_LpvBiasMultiplier;
+    float Primitive_DecalReceiverMask;
+    float Primitive_PerObjectGBufferData;
+    float Primitive_UseVolumetricLightmapShadowFromStationaryLights;
+    float Primitive_UseEditorDepthTest;
+    float4 Primitive_ObjectOrientation;
+    float4 Primitive_NonUniformScale;
+    packed_float3 Primitive_LocalObjectBoundsMin;
+    float PrePadding_Primitive_380;
+    packed_float3 Primitive_LocalObjectBoundsMax;
+    uint Primitive_LightingChannelMask;
+    uint Primitive_LightmapDataIndex;
+    int Primitive_SingleCaptureIndex;
+};
+
+struct type_LandscapeParameters
+{
+    float4 LandscapeParameters_HeightmapUVScaleBias;
+    float4 LandscapeParameters_WeightmapUVScaleBias;
+    float4 LandscapeParameters_LandscapeLightmapScaleBias;
+    float4 LandscapeParameters_SubsectionSizeVertsLayerUVPan;
+    float4 LandscapeParameters_SubsectionOffsetParams;
+    float4 LandscapeParameters_LightmapSubsectionOffsetParams;
+    float4x4 LandscapeParameters_LocalToWorldNoScaling;
+};
+
+struct type_Globals
+{
+    float4 LodBias;
+    float4 LodValues;
+    float4 SectionLods;
+    float4 NeighborSectionLod[4];
+};
+
+struct main0_out
+{
+    float2 out_var_TEXCOORD0 [[user(locn0)]];
+    float2 out_var_TEXCOORD1 [[user(locn1)]];
+    float4 out_var_TEXCOORD2 [[user(locn2)]];
+    float4 out_var_TEXCOORD3 [[user(locn3)]];
+    float4 out_var_TEXCOORD8 [[user(locn4)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 in_var_ATTRIBUTE0 [[attribute(0)]];
+    float4 in_var_ATTRIBUTE1_0 [[attribute(1)]];
+    float4 in_var_ATTRIBUTE1_1 [[attribute(2)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_MobileBasePass& MobileBasePass [[buffer(1)]], constant type_Primitive& Primitive [[buffer(2)]], constant type_LandscapeParameters& LandscapeParameters [[buffer(3)]], constant type_Globals& _Globals [[buffer(4)]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 2> in_var_ATTRIBUTE1 = {};
+    in_var_ATTRIBUTE1[0] = in.in_var_ATTRIBUTE1_0;
+    in_var_ATTRIBUTE1[1] = in.in_var_ATTRIBUTE1_1;
+    float4 _115 = in.in_var_ATTRIBUTE0 * float4(255.0);
+    float2 _116 = _115.zw;
+    float2 _118 = fract(_116 * float2(0.5));
+    float2 _119 = _118 * float2(2.0);
+    float2 _121 = fma(-_118, float2(2.0), _116) * float2(0.0039215688593685626983642578125);
+    float2 _122 = _115.xy;
+    float2 _126 = _122 * float2(_Globals.LodValues.w);
+    float _127 = _126.y;
+    float _128 = _126.x;
+    float4 _131 = float4(_127, _128, 1.0 - _128, 1.0 - _127);
+    float4 _132 = _131 * float4(2.0);
+    float4 _186;
+    if (_119.y > 0.5)
+    {
+        float4 _161;
+        if (_119.x > 0.5)
+        {
+            _161 = fma(_132, float4(_Globals.SectionLods.w), fma(-_131, float4(2.0), float4(1.0)) * _Globals.NeighborSectionLod[3]);
+        }
+        else
+        {
+            _161 = fma(_132, float4(_Globals.SectionLods.z), fma(-_131, float4(2.0), float4(1.0)) * _Globals.NeighborSectionLod[2]);
+        }
+        _186 = _161;
+    }
+    else
+    {
+        float4 _185;
+        if (_119.x > 0.5)
+        {
+            _185 = fma(_132, float4(_Globals.SectionLods.y), fma(-_131, float4(2.0), float4(1.0)) * _Globals.NeighborSectionLod[1]);
+        }
+        else
+        {
+            _185 = fma(_132, float4(_Globals.SectionLods.x), fma(-_131, float4(2.0), float4(1.0)) * _Globals.NeighborSectionLod[0]);
+        }
+        _186 = _185;
+    }
+    float _206;
+    if ((_128 + _127) > 1.0)
+    {
+        float _198;
+        if (_128 < _127)
+        {
+            _198 = _186.w;
+        }
+        else
+        {
+            _198 = _186.z;
+        }
+        _206 = _198;
+    }
+    else
+    {
+        float _205;
+        if (_128 < _127)
+        {
+            _205 = _186.y;
+        }
+        else
+        {
+            _205 = _186.x;
+        }
+        _206 = _205;
+    }
+    float _207 = floor(_206);
+    float _220 = _121.x;
+    float3 _235 = select(select(select(select(select(float3(0.03125, _121.yy), float3(0.0625, _220, _121.y), bool3(_207 < 5.0)), float3(0.125, in_var_ATTRIBUTE1[1].w, _220), bool3(_207 < 4.0)), float3(0.25, in_var_ATTRIBUTE1[1].zw), bool3(_207 < 3.0)), float3(0.5, in_var_ATTRIBUTE1[1].yz), bool3(_207 < 2.0)), float3(1.0, in_var_ATTRIBUTE1[1].xy), bool3(_207 < 1.0));
+    float _236 = _235.x;
+    float _245 = (fma(in_var_ATTRIBUTE1[0].x, 65280.0, in_var_ATTRIBUTE1[0].y * 255.0) - 32768.0) * 0.0078125;
+    float _252 = (fma(in_var_ATTRIBUTE1[0].z, 65280.0, in_var_ATTRIBUTE1[0].w * 255.0) - 32768.0) * 0.0078125;
+    float2 _257 = floor(_122 * float2(_236));
+    float2 _271 = float2(fma(LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.x, _236, -1.0), fast::max((LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.x * 0.5) * _236, 2.0) - 1.0) * float2(LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.y);
+    float3 _287 = mix(float3(_257 / float2(_271.x), mix(_245, _252, _235.y)), float3(floor(_257 * float2(0.5)) / float2(_271.y), mix(_245, _252, _235.z)), float3(_206 - _207));
+    float2 _288 = _119.xy;
+    float3 _296 = _287 + float3(_288 * LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.ww, 0.0);
+    float4 _322 = float4(fma(Primitive.Primitive_LocalToWorld[2u].xyz, _296.zzz, fma(Primitive.Primitive_LocalToWorld[0u].xyz, _296.xxx, Primitive.Primitive_LocalToWorld[1u].xyz * _296.yyy)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0);
+    float2 _323 = _287.xy;
+    float4 _338 = float4(_322.x, _322.y, _322.z, _322.w);
+    float4 _339 = View.View_TranslatedWorldToClip * _338;
+    float3 _341 = _322.xyz - float3(View.View_TranslatedWorldCameraOrigin);
+    float _345 = dot(_341, _341);
+    float _346 = rsqrt(_345);
+    float _347 = _345 * _346;
+    float _354 = _341.z;
+    float _357 = fast::max(0.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.w);
+    float _393;
+    float _394;
+    float _395;
+    float _396;
+    if (_357 > 0.0)
+    {
+        float _361 = _357 * _346;
+        float _365 = fma(_361, _354, View.View_WorldCameraOrigin[2]);
+        _393 = fma(-_357, _346, 1.0) * _347;
+        _394 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.z * exp2(-fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.y * (_365 - MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.w)));
+        _395 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.x * exp2(-fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.y * (_365 - MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.y)));
+        _396 = fma(-_361, _354, _354);
+    }
+    else
+    {
+        _393 = _347;
+        _394 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.x;
+        _395 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.x;
+        _396 = _354;
+    }
+    float _400 = fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.y * _396);
+    float _417 = fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.y * _396);
+    float _428 = fma(_395, (abs(_400) > 0.00999999977648258209228515625) ? ((1.0 - exp2(-_400)) / _400) : fma(-0.2402265071868896484375, _400, 0.693147182464599609375), _394 * ((abs(_417) > 0.00999999977648258209228515625) ? ((1.0 - exp2(-_417)) / _417) : fma(-0.2402265071868896484375, _417, 0.693147182464599609375)));
+    float3 _459;
+    if (MobileBasePass.MobileBasePass_Fog_InscatteringLightDirection.w >= 0.0)
+    {
+        _459 = (MobileBasePass.MobileBasePass_Fog_DirectionalInscatteringColor.xyz * float3(pow(fast::clamp(dot(_341 * float3(_346), MobileBasePass.MobileBasePass_Fog_InscatteringLightDirection.xyz), 0.0, 1.0), MobileBasePass.MobileBasePass_Fog_DirectionalInscatteringColor.w))) * float3(1.0 - fast::clamp(exp2(-(_428 * fast::max(_393 - MobileBasePass.MobileBasePass_Fog_InscatteringLightDirection.w, 0.0))), 0.0, 1.0));
+    }
+    else
+    {
+        _459 = float3(0.0);
+    }
+    bool _468 = (MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w > 0.0) && (_347 > MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w);
+    float _471 = _468 ? 1.0 : fast::max(fast::clamp(exp2(-(_428 * _393)), 0.0, 1.0), MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.w);
+    float4 _479 = float4(fma(MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.xyz, float3(1.0 - _471), select(_459, float3(0.0), bool3(_468))), _471);
+    float4 _482 = _338;
+    _482.w = _339.w;
+    out.out_var_TEXCOORD0 = fma(_288, LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.ww, _323 + LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.zw).xy;
+    out.out_var_TEXCOORD1 = fma(_288, LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.zz, fma(_323, LandscapeParameters.LandscapeParameters_WeightmapUVScaleBias.xy, LandscapeParameters.LandscapeParameters_WeightmapUVScaleBias.zw));
+    out.out_var_TEXCOORD2 = float4(float4(0.0).x, float4(0.0).y, _479.x, _479.y);
+    out.out_var_TEXCOORD3 = float4(float4(0.0).x, float4(0.0).y, _479.z, _479.w);
+    out.out_var_TEXCOORD8 = _482;
+    out.gl_Position = _339;
+    return out;
+}
+
diff --git a/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert b/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert
new file mode 100644
index 00000000000..2f9afa1678c
--- /dev/null
+++ b/reference/opt/shaders-ue4/asm/vert/texture-buffer.asm.vert
@@ -0,0 +1,387 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+// Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
+uint2 spvTexelBufferCoord(uint tc)
+{
+    return uint2(tc % 4096, tc / 4096);
+}
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_Primitive
+{
+    float4x4 Primitive_LocalToWorld;
+    float4 Primitive_InvNonUniformScaleAndDeterminantSign;
+    float4 Primitive_ObjectWorldPositionAndRadius;
+    float4x4 Primitive_WorldToLocal;
+    float4x4 Primitive_PreviousLocalToWorld;
+    float4x4 Primitive_PreviousWorldToLocal;
+    packed_float3 Primitive_ActorWorldPosition;
+    float Primitive_UseSingleSampleShadowFromStationaryLights;
+    packed_float3 Primitive_ObjectBounds;
+    float Primitive_LpvBiasMultiplier;
+    float Primitive_DecalReceiverMask;
+    float Primitive_PerObjectGBufferData;
+    float Primitive_UseVolumetricLightmapShadowFromStationaryLights;
+    float Primitive_UseEditorDepthTest;
+    float4 Primitive_ObjectOrientation;
+    float4 Primitive_NonUniformScale;
+    packed_float3 Primitive_LocalObjectBoundsMin;
+    float PrePadding_Primitive_380;
+    packed_float3 Primitive_LocalObjectBoundsMax;
+    uint Primitive_LightingChannelMask;
+    uint Primitive_LightmapDataIndex;
+    int Primitive_SingleCaptureIndex;
+};
+
+struct type_MobileShadowDepthPass
+{
+    float PrePadding_MobileShadowDepthPass_0;
+    float PrePadding_MobileShadowDepthPass_4;
+    float PrePadding_MobileShadowDepthPass_8;
+    float PrePadding_MobileShadowDepthPass_12;
+    float PrePadding_MobileShadowDepthPass_16;
+    float PrePadding_MobileShadowDepthPass_20;
+    float PrePadding_MobileShadowDepthPass_24;
+    float PrePadding_MobileShadowDepthPass_28;
+    float PrePadding_MobileShadowDepthPass_32;
+    float PrePadding_MobileShadowDepthPass_36;
+    float PrePadding_MobileShadowDepthPass_40;
+    float PrePadding_MobileShadowDepthPass_44;
+    float PrePadding_MobileShadowDepthPass_48;
+    float PrePadding_MobileShadowDepthPass_52;
+    float PrePadding_MobileShadowDepthPass_56;
+    float PrePadding_MobileShadowDepthPass_60;
+    float PrePadding_MobileShadowDepthPass_64;
+    float PrePadding_MobileShadowDepthPass_68;
+    float PrePadding_MobileShadowDepthPass_72;
+    float PrePadding_MobileShadowDepthPass_76;
+    float4x4 MobileShadowDepthPass_ProjectionMatrix;
+    float2 MobileShadowDepthPass_ShadowParams;
+    float MobileShadowDepthPass_bClampToNearPlane;
+    float PrePadding_MobileShadowDepthPass_156;
+    float4x4 MobileShadowDepthPass_ShadowViewProjectionMatrices[6];
+};
+
+struct type_EmitterDynamicUniforms
+{
+    float2 EmitterDynamicUniforms_LocalToWorldScale;
+    float EmitterDynamicUniforms_EmitterInstRandom;
+    float PrePadding_EmitterDynamicUniforms_12;
+    float4 EmitterDynamicUniforms_AxisLockRight;
+    float4 EmitterDynamicUniforms_AxisLockUp;
+    float4 EmitterDynamicUniforms_DynamicColor;
+    float4 EmitterDynamicUniforms_MacroUVParameters;
+};
+
+struct type_EmitterUniforms
+{
+    float4 EmitterUniforms_ColorCurve;
+    float4 EmitterUniforms_ColorScale;
+    float4 EmitterUniforms_ColorBias;
+    float4 EmitterUniforms_MiscCurve;
+    float4 EmitterUniforms_MiscScale;
+    float4 EmitterUniforms_MiscBias;
+    float4 EmitterUniforms_SizeBySpeed;
+    float4 EmitterUniforms_SubImageSize;
+    float4 EmitterUniforms_TangentSelector;
+    packed_float3 EmitterUniforms_CameraFacingBlend;
+    float EmitterUniforms_RemoveHMDRoll;
+    float EmitterUniforms_RotationRateScale;
+    float EmitterUniforms_RotationBias;
+    float EmitterUniforms_CameraMotionBlurAmount;
+    float PrePadding_EmitterUniforms_172;
+    float2 EmitterUniforms_PivotOffset;
+};
+
+struct type_Globals
+{
+    uint ParticleIndicesOffset;
+};
+
+struct main0_out
+{
+    float out_var_TEXCOORD6 [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float2 in_var_ATTRIBUTE0 [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], constant type_MobileShadowDepthPass& MobileShadowDepthPass [[buffer(2)]], constant type_EmitterDynamicUniforms& EmitterDynamicUniforms [[buffer(3)]], constant type_EmitterUniforms& EmitterUniforms [[buffer(4)]], constant type_Globals& _Globals [[buffer(5)]], texture2d<float> ParticleIndices [[texture(0)]], texture2d<float> PositionTexture [[texture(1)]], texture2d<float> VelocityTexture [[texture(2)]], texture2d<float> AttributesTexture [[texture(3)]], texture2d<float> CurveTexture [[texture(4)]], sampler PositionTextureSampler [[sampler(0)]], sampler VelocityTextureSampler [[sampler(1)]], sampler AttributesTextureSampler [[sampler(2)]], sampler CurveTextureSampler [[sampler(3)]], uint gl_VertexIndex [[vertex_id]], uint gl_InstanceIndex [[instance_id]])
+{
+    main0_out out = {};
+    float2 _133 = ParticleIndices.read(spvTexelBufferCoord((_Globals.ParticleIndicesOffset + ((gl_InstanceIndex * 16u) + (gl_VertexIndex / 4u))))).xy;
+    float4 _137 = PositionTexture.sample(PositionTextureSampler, _133, level(0.0));
+    float4 _145 = AttributesTexture.sample(AttributesTextureSampler, _133, level(0.0));
+    float _146 = _137.w;
+    float3 _158 = float3x3(Primitive.Primitive_LocalToWorld[0].xyz, Primitive.Primitive_LocalToWorld[1].xyz, Primitive.Primitive_LocalToWorld[2].xyz) * VelocityTexture.sample(VelocityTextureSampler, _133, level(0.0)).xyz;
+    float3 _160 = fast::normalize(_158 + float3(0.0, 0.0, 9.9999997473787516355514526367188e-05));
+    float2 _204 = ((((_145.xy + float2((_145.x < 0.5) ? 0.0 : (-0.5), (_145.y < 0.5) ? 0.0 : (-0.5))) * float2(2.0)) * (fma(CurveTexture.sample(CurveTextureSampler, fma(EmitterUniforms.EmitterUniforms_MiscCurve.zw, float2(_146), EmitterUniforms.EmitterUniforms_MiscCurve.xy), level(0.0)), EmitterUniforms.EmitterUniforms_MiscScale, EmitterUniforms.EmitterUniforms_MiscBias).xy * EmitterDynamicUniforms.EmitterDynamicUniforms_LocalToWorldScale)) * fast::min(fast::max(EmitterUniforms.EmitterUniforms_SizeBySpeed.xy * float2(length(_158)), float2(1.0)), EmitterUniforms.EmitterUniforms_SizeBySpeed.zw)) * float2(step(_146, 1.0));
+    float3 _239 = float4(fma(Primitive.Primitive_LocalToWorld[2u].xyz, _137.zzz, fma(Primitive.Primitive_LocalToWorld[0u].xyz, _137.xxx, Primitive.Primitive_LocalToWorld[1u].xyz * _137.yyy)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0).xyz;
+    float3 _242 = float3(EmitterUniforms.EmitterUniforms_RemoveHMDRoll);
+    float3 _251 = mix(mix(float3(View.View_ViewRight), float3(View.View_HMDViewNoRollRight), _242), EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.xyz, float3(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.w));
+    float3 _259 = mix(-mix(float3(View.View_ViewUp), float3(View.View_HMDViewNoRollUp), _242), EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockUp.xyz, float3(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockUp.w));
+    float3 _260 = float3(View.View_TranslatedWorldCameraOrigin) - _239;
+    float _261 = dot(_260, _260);
+    float3 _265 = _260 / float3(sqrt(fast::max(_261, 0.00999999977648258209228515625)));
+    float3 _335;
+    float3 _336;
+    if (EmitterUniforms.EmitterUniforms_CameraFacingBlend[0] > 0.0)
+    {
+        float3 _279 = cross(_265, float3(0.0, 0.0, 1.0));
+        float3 _284 = _279 / float3(sqrt(fast::max(dot(_279, _279), 0.00999999977648258209228515625)));
+        float3 _286 = float3(fast::clamp(fma(_261, EmitterUniforms.EmitterUniforms_CameraFacingBlend[1], -EmitterUniforms.EmitterUniforms_CameraFacingBlend[2]), 0.0, 1.0));
+        _335 = fast::normalize(mix(_251, _284, _286));
+        _336 = fast::normalize(mix(_259, cross(_265, _284), _286));
+    }
+    else
+    {
+        float3 _333;
+        float3 _334;
+        if (EmitterUniforms.EmitterUniforms_TangentSelector.y > 0.0)
+        {
+            float3 _297 = cross(_265, _160);
+            _333 = _297 / float3(sqrt(fast::max(dot(_297, _297), 0.00999999977648258209228515625)));
+            _334 = -_160;
+        }
+        else
+        {
+            float3 _331;
+            float3 _332;
+            if (EmitterUniforms.EmitterUniforms_TangentSelector.z > 0.0)
+            {
+                float3 _310 = cross(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.xyz, _265);
+                _331 = EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.xyz;
+                _332 = -(_310 / float3(sqrt(fast::max(dot(_310, _310), 0.00999999977648258209228515625))));
+            }
+            else
+            {
+                float3 _329;
+                float3 _330;
+                if (EmitterUniforms.EmitterUniforms_TangentSelector.w > 0.0)
+                {
+                    float3 _322 = cross(_265, float3(0.0, 0.0, 1.0));
+                    float3 _327 = _322 / float3(sqrt(fast::max(dot(_322, _322), 0.00999999977648258209228515625)));
+                    _329 = _327;
+                    _330 = cross(_265, _327);
+                }
+                else
+                {
+                    _329 = _251;
+                    _330 = _259;
+                }
+                _331 = _329;
+                _332 = _330;
+            }
+            _333 = _331;
+            _334 = _332;
+        }
+        _335 = _333;
+        _336 = _334;
+    }
+    float _339 = fma(fma(_145.w * EmitterUniforms.EmitterUniforms_RotationRateScale, _146, _145.z), 6.283185482025146484375, EmitterUniforms.EmitterUniforms_RotationBias);
+    float3 _342 = float3(sin(_339));
+    float3 _344 = float3(cos(_339));
+    float3 _367 = _239 + fma(float3(_204.x * (in.in_var_ATTRIBUTE0.x + EmitterUniforms.EmitterUniforms_PivotOffset.x)), fma(_342, _336, _344 * _335), float3(_204.y * (in.in_var_ATTRIBUTE0.y + EmitterUniforms.EmitterUniforms_PivotOffset.y)) * fma(_344, _336, -(_342 * _335)));
+    float4 _371 = float4(_367, 1.0);
+    float4 _375 = MobileShadowDepthPass.MobileShadowDepthPass_ProjectionMatrix * float4(_371.x, _371.y, _371.z, _371.w);
+    float4 _386;
+    if ((MobileShadowDepthPass.MobileShadowDepthPass_bClampToNearPlane > 0.0) && (_375.z < 0.0))
+    {
+        float4 _384 = _375;
+        _384.z = 9.9999999747524270787835121154785e-07;
+        _384.w = 1.0;
+        _386 = _384;
+    }
+    else
+    {
+        _386 = _375;
+    }
+    float4 _396 = _386;
+    _396.z = fma(_386.z, MobileShadowDepthPass.MobileShadowDepthPass_ShadowParams.y, MobileShadowDepthPass.MobileShadowDepthPass_ShadowParams.x) * _386.w;
+    out.out_var_TEXCOORD6 = 0.0;
+    out.gl_Position = _396;
+    return out;
+}
+
diff --git a/reference/opt/shaders/amd/fs.invalid.frag b/reference/opt/shaders/amd/fs.invalid.frag
deleted file mode 100644
index aecf69eba7a..00000000000
--- a/reference/opt/shaders/amd/fs.invalid.frag
+++ /dev/null
@@ -1,15 +0,0 @@
-#version 450
-#extension GL_AMD_shader_fragment_mask : require
-#extension GL_AMD_shader_explicit_vertex_parameter : require
-
-layout(binding = 0) uniform sampler2DMS texture1;
-
-layout(location = 0) __explicitInterpAMD in vec4 vary;
-
-void main()
-{
-    uint testi1 = fragmentMaskFetchAMD(texture1, ivec2(0));
-    vec4 test1 = fragmentFetchAMD(texture1, ivec2(1), 2u);
-    vec4 pos = interpolateAtVertexAMD(vary, 0u);
-}
-
diff --git a/reference/opt/shaders/amd/gcn_shader.comp b/reference/opt/shaders/amd/gcn_shader.comp
index e4bb67e9ba1..f1961854861 100644
--- a/reference/opt/shaders/amd/gcn_shader.comp
+++ b/reference/opt/shaders/amd/gcn_shader.comp
@@ -1,5 +1,11 @@
 #version 450
+#if defined(GL_ARB_gpu_shader_int64)
 #extension GL_ARB_gpu_shader_int64 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
 #extension GL_AMD_gcn_shader : require
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
diff --git a/reference/opt/shaders/amd/shader_ballot.comp b/reference/opt/shaders/amd/shader_ballot.comp
index cc54a244df2..fc497abba94 100644
--- a/reference/opt/shaders/amd/shader_ballot.comp
+++ b/reference/opt/shaders/amd/shader_ballot.comp
@@ -1,5 +1,11 @@
 #version 450
+#if defined(GL_ARB_gpu_shader_int64)
 #extension GL_ARB_gpu_shader_int64 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
 #extension GL_ARB_shader_ballot : require
 #extension GL_AMD_shader_ballot : require
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
@@ -16,13 +22,12 @@ layout(binding = 1, std430) buffer outputData
 
 void main()
 {
-    float _25 = _12.inputDataArray[gl_LocalInvocationID.x];
-    bool _31 = _25 > 0.0;
+    bool _31 = _12.inputDataArray[gl_LocalInvocationID.x] > 0.0;
     uvec4 _37 = uvec4(unpackUint2x32(ballotARB(_31)), 0u, 0u);
     uint _44 = mbcntAMD(packUint2x32(uvec2(_37.xy)));
     if (_31)
     {
-        _74.outputDataArray[_44] = _25;
+        _74.outputDataArray[_44] = _12.inputDataArray[gl_LocalInvocationID.x];
     }
 }
 
diff --git a/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000000..8d59fcc856a
--- /dev/null
+++ b/reference/opt/shaders/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) restrict buffer _3_5
+{
+    ivec4 _m0;
+    uvec4 _m1;
+} _5;
+
+layout(binding = 1, std430) restrict buffer _4_6
+{
+    uvec4 _m0;
+    ivec4 _m1;
+} _6;
+
+void main()
+{
+    _6._m0 = uvec4(lessThan(ivec4(_5._m1), _5._m0));
+    _6._m0 = uvec4(lessThanEqual(ivec4(_5._m1), _5._m0));
+    _6._m0 = uvec4(lessThan(_5._m1, uvec4(_5._m0)));
+    _6._m0 = uvec4(lessThanEqual(_5._m1, uvec4(_5._m0)));
+    _6._m0 = uvec4(greaterThan(ivec4(_5._m1), _5._m0));
+    _6._m0 = uvec4(greaterThanEqual(ivec4(_5._m1), _5._m0));
+    _6._m0 = uvec4(greaterThan(_5._m1, uvec4(_5._m0)));
+    _6._m0 = uvec4(greaterThanEqual(_5._m1, uvec4(_5._m0)));
+}
+
diff --git a/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp b/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp
index bdb3eeb9afd..8a552dba068 100644
--- a/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp
+++ b/reference/opt/shaders/asm/comp/bitcast_iequal.asm.comp
@@ -21,13 +21,13 @@ void main()
     bvec4 _35 = equal(_30, ivec4(_31));
     bvec4 _36 = equal(_31, _31);
     bvec4 _37 = equal(_30, _30);
-    _6._m0 = mix(uvec4(0u), uvec4(1u), _34);
-    _6._m0 = mix(uvec4(0u), uvec4(1u), _35);
-    _6._m0 = mix(uvec4(0u), uvec4(1u), _36);
-    _6._m0 = mix(uvec4(0u), uvec4(1u), _37);
-    _6._m1 = mix(ivec4(0), ivec4(1), _34);
-    _6._m1 = mix(ivec4(0), ivec4(1), _35);
-    _6._m1 = mix(ivec4(0), ivec4(1), _36);
-    _6._m1 = mix(ivec4(0), ivec4(1), _37);
+    _6._m0 = uvec4(_34);
+    _6._m0 = uvec4(_35);
+    _6._m0 = uvec4(_36);
+    _6._m0 = uvec4(_37);
+    _6._m1 = ivec4(_34);
+    _6._m1 = ivec4(_35);
+    _6._m1 = ivec4(_36);
+    _6._m1 = ivec4(_37);
 }
 
diff --git a/reference/opt/shaders/asm/comp/block-name-alias-global.asm.comp b/reference/opt/shaders/asm/comp/block-name-alias-global.asm.comp
index 08fccbcde6b..86ba0a3b9f9 100644
--- a/reference/opt/shaders/asm/comp/block-name-alias-global.asm.comp
+++ b/reference/opt/shaders/asm/comp/block-name-alias-global.asm.comp
@@ -7,12 +7,6 @@ struct A
     int b;
 };
 
-struct A_1
-{
-    int a;
-    int b;
-};
-
 layout(binding = 1, std430) buffer C1
 {
     A Data[];
@@ -20,7 +14,7 @@ layout(binding = 1, std430) buffer C1
 
 layout(binding = 2, std140) uniform C2
 {
-    A_1 Data[1024];
+    A Data[1024];
 } C2_1;
 
 layout(binding = 0, std430) buffer B
@@ -30,7 +24,7 @@ layout(binding = 0, std430) buffer B
 
 layout(binding = 3, std140) uniform B
 {
-    A_1 Data[1024];
+    A Data[1024];
 } C4;
 
 void main()
diff --git a/reference/opt/shaders/asm/comp/control-flow-hints.asm.comp b/reference/opt/shaders/asm/comp/control-flow-hints.asm.comp
new file mode 100644
index 00000000000..617cde4bcad
--- /dev/null
+++ b/reference/opt/shaders/asm/comp/control-flow-hints.asm.comp
@@ -0,0 +1,58 @@
+#version 450
+#if defined(GL_EXT_control_flow_attributes)
+#extension GL_EXT_control_flow_attributes : require
+#define SPIRV_CROSS_FLATTEN [[flatten]]
+#define SPIRV_CROSS_BRANCH [[dont_flatten]]
+#define SPIRV_CROSS_UNROLL [[unroll]]
+#define SPIRV_CROSS_LOOP [[dont_unroll]]
+#else
+#define SPIRV_CROSS_FLATTEN
+#define SPIRV_CROSS_BRANCH
+#define SPIRV_CROSS_UNROLL
+#define SPIRV_CROSS_LOOP
+#endif
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer bar
+{
+    vec4 _data[];
+} bar_1;
+
+layout(binding = 1, std430) buffer foo
+{
+    vec4 _data[];
+} foo_1;
+
+void main()
+{
+    bar_1._data[0] = foo_1._data[0];
+    bar_1._data[1] = foo_1._data[1];
+    bar_1._data[2] = foo_1._data[2];
+    bar_1._data[3] = foo_1._data[3];
+    bar_1._data[4] = foo_1._data[4];
+    bar_1._data[5] = foo_1._data[5];
+    bar_1._data[6] = foo_1._data[6];
+    bar_1._data[7] = foo_1._data[7];
+    bar_1._data[8] = foo_1._data[8];
+    bar_1._data[9] = foo_1._data[9];
+    bar_1._data[10] = foo_1._data[10];
+    bar_1._data[11] = foo_1._data[11];
+    bar_1._data[12] = foo_1._data[12];
+    bar_1._data[13] = foo_1._data[13];
+    bar_1._data[14] = foo_1._data[14];
+    bar_1._data[15] = foo_1._data[15];
+    SPIRV_CROSS_LOOP
+    for (int _137 = 0; _137 < 16; )
+    {
+        bar_1._data[15 - _137] = foo_1._data[_137];
+        _137++;
+        continue;
+    }
+    SPIRV_CROSS_BRANCH
+    if (bar_1._data[10].x > 10.0)
+    {
+        foo_1._data[20] = vec4(5.0);
+    }
+    foo_1._data[20] = vec4(20.0);
+}
+
diff --git a/reference/opt/shaders/asm/comp/hlsl-functionality.asm.comp b/reference/opt/shaders/asm/comp/hlsl-functionality.asm.comp
index 6860cfafe47..d30cddafec8 100644
--- a/reference/opt/shaders/asm/comp/hlsl-functionality.asm.comp
+++ b/reference/opt/shaders/asm/comp/hlsl-functionality.asm.comp
@@ -13,7 +13,7 @@ layout(binding = 1, std430) buffer Buf_count
 
 void main()
 {
-    int _32 = atomicAdd(Buf_count_1._count, 1);
-    Buf_1._data[_32] = vec4(1.0);
+    int _33 = atomicAdd(Buf_count_1._count, 1);
+    Buf_1._data[_33] = vec4(1.0);
 }
 
diff --git a/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000000..32d8e025012
--- /dev/null
+++ b/reference/opt/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,42 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float a1;
+    vec2 a2;
+    vec3 a3;
+    vec4 a4;
+    float b1;
+    vec2 b2;
+    vec3 b3;
+    vec4 b4;
+    float c1;
+    vec2 c2;
+    vec3 c3;
+    vec4 c4;
+} _4;
+
+void main()
+{
+    _4.a1 = min(_4.b1, _4.c1);
+    _4.a2 = min(_4.b2, _4.c2);
+    _4.a3 = min(_4.b3, _4.c3);
+    _4.a4 = min(_4.b4, _4.c4);
+    _4.a1 = max(_4.b1, _4.c1);
+    _4.a2 = max(_4.b2, _4.c2);
+    _4.a3 = max(_4.b3, _4.c3);
+    _4.a4 = max(_4.b4, _4.c4);
+    _4.a1 = clamp(_4.a1, _4.b1, _4.c1);
+    _4.a2 = clamp(_4.a2, _4.b2, _4.c2);
+    _4.a3 = clamp(_4.a3, _4.b3, _4.c3);
+    _4.a4 = clamp(_4.a4, _4.b4, _4.c4);
+    for (int _139 = 0; _139 < 2; )
+    {
+        _4.a2 = min(_4.b2, _4.c2);
+        _4.a1 = clamp(_4.a1, _4.b2.x, _4.b2.y);
+        _139++;
+        continue;
+    }
+}
+
diff --git a/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
deleted file mode 100644
index 9ae8d6fd7f3..00000000000
--- a/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
+++ /dev/null
@@ -1,25 +0,0 @@
-#version 450
-layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
-
-layout(binding = 1, rgba32f) uniform writeonly image2D outImageTexture;
-
-void main()
-{
-    int _30;
-    _30 = 7;
-    int _27_copy;
-    for (int _27 = 7; _27 >= 0; _27_copy = _27, _27--, _30 = _27_copy)
-    {
-        if (5.0 > float(_27))
-        {
-            break;
-        }
-        else
-        {
-            continue;
-        }
-        continue;
-    }
-    imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0));
-}
-
diff --git a/reference/opt/shaders/asm/comp/recompile-block-naming.asm.comp b/reference/opt/shaders/asm/comp/recompile-block-naming.asm.comp
index ff83de345d1..7d43b6f54d0 100644
--- a/reference/opt/shaders/asm/comp/recompile-block-naming.asm.comp
+++ b/reference/opt/shaders/asm/comp/recompile-block-naming.asm.comp
@@ -18,18 +18,18 @@ layout(binding = 0, std430) buffer MyThirdBuffer
 
 void main()
 {
-    uint _105 = MyFirstBuffer_1._data[0];
-    uint _109 = MyFirstBuffer_1._data[1];
-    uint _113 = MyFirstBuffer_1._data[2];
-    uint _117 = MyFirstBuffer_1._data[3];
-    uint _122 = MySecondBuffer_1._data[1];
-    uint _126 = MySecondBuffer_1._data[2];
-    uint _130 = MySecondBuffer_1._data[3];
-    uint _134 = MySecondBuffer_1._data[4];
-    uvec4 _140 = uvec4(_105, _109, _113, _117) + uvec4(_122, _126, _130, _134);
-    MyThirdBuffer_1._data[0] = _140.x;
-    MyThirdBuffer_1._data[1] = _140.y;
-    MyThirdBuffer_1._data[2] = _140.z;
-    MyThirdBuffer_1._data[3] = _140.w;
+    uint _106 = MyFirstBuffer_1._data[0];
+    uint _110 = MyFirstBuffer_1._data[1];
+    uint _114 = MyFirstBuffer_1._data[2];
+    uint _118 = MyFirstBuffer_1._data[3];
+    uint _123 = MySecondBuffer_1._data[1];
+    uint _127 = MySecondBuffer_1._data[2];
+    uint _131 = MySecondBuffer_1._data[3];
+    uint _135 = MySecondBuffer_1._data[4];
+    uvec4 _141 = uvec4(_106, _110, _114, _118) + uvec4(_123, _127, _131, _135);
+    MyThirdBuffer_1._data[0] = _141.x;
+    MyThirdBuffer_1._data[1] = _141.y;
+    MyThirdBuffer_1._data[2] = _141.z;
+    MyThirdBuffer_1._data[3] = _141.w;
 }
 
diff --git a/reference/shaders/asm/comp/switch-break-ladder.asm.comp b/reference/opt/shaders/asm/comp/switch-break-ladder.asm.invalid.comp
similarity index 100%
rename from reference/shaders/asm/comp/switch-break-ladder.asm.comp
rename to reference/opt/shaders/asm/comp/switch-break-ladder.asm.invalid.comp
diff --git a/reference/opt/shaders/asm/comp/undefined-constant-composite.asm.comp b/reference/opt/shaders/asm/comp/undefined-constant-composite.asm.comp
new file mode 100644
index 00000000000..eb94756d736
--- /dev/null
+++ b/reference/opt/shaders/asm/comp/undefined-constant-composite.asm.comp
@@ -0,0 +1,26 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+struct _20
+{
+    int _m0;
+    int _m1;
+};
+
+int _28;
+
+layout(binding = 1, std430) buffer _5_6
+{
+    int _m0[10];
+} _6;
+
+layout(binding = 0, std430) buffer _7_8
+{
+    int _m0[10];
+} _8;
+
+void main()
+{
+    _6._m0[gl_GlobalInvocationID.x] = _8._m0[gl_GlobalInvocationID.x] + _20(_28, 200)._m1;
+}
+
diff --git a/reference/opt/shaders/asm/frag/complex-name-workarounds.asm.frag b/reference/opt/shaders/asm/frag/complex-name-workarounds.asm.frag
index c07f1657f40..d68f84b48e9 100644
--- a/reference/opt/shaders/asm/frag/complex-name-workarounds.asm.frag
+++ b/reference/opt/shaders/asm/frag/complex-name-workarounds.asm.frag
@@ -7,10 +7,9 @@ layout(location = 0) out vec4 b;
 void main()
 {
     vec4 _28 = (_ + a) + _;
-    vec4 _34 = (_ - a) + a;
     b = _28;
-    b = _34;
+    b = _;
     b = _28;
-    b = _34;
+    b = _;
 }
 
diff --git a/reference/opt/shaders/asm/frag/default-member-names.asm.frag b/reference/opt/shaders/asm/frag/default-member-names.asm.frag
index 13f81b11a67..ad64761ede4 100644
--- a/reference/opt/shaders/asm/frag/default-member-names.asm.frag
+++ b/reference/opt/shaders/asm/frag/default-member-names.asm.frag
@@ -1,9 +1,9 @@
 #version 450
 
-layout(location = 0) out vec4 _3;
-
 float _49;
 
+layout(location = 0) out vec4 _3;
+
 void main()
 {
     _3 = vec4(_49);
diff --git a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
index 452fd6fb95c..350142d4b76 100644
--- a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
+++ b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag
@@ -7,7 +7,7 @@ layout(location = 0) out vec4 _entryPointOutput;
 
 void main()
 {
-    ivec2 _152 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy;
-    _entryPointOutput = ((texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _152, 0) + texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _152, 0)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy);
+    ivec2 _154 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy;
+    _entryPointOutput = ((texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _154, 0) + texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _154, 0)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy);
 }
 
diff --git a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
index 55e2c2da630..b898b01bc0e 100644
--- a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
+++ b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.asm.vk.frag.vk
@@ -8,7 +8,7 @@ layout(location = 0) out vec4 _entryPointOutput;
 
 void main()
 {
-    ivec2 _152 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy;
-    _entryPointOutput = ((texelFetch(SampledImage, _152, 0) + texelFetch(SampledImage, _152, 0)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy);
+    ivec2 _154 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy;
+    _entryPointOutput = ((texelFetch(SampledImage, _154, 0) + texelFetch(SampledImage, _154, 0)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy);
 }
 
diff --git a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag
new file mode 100644
index 00000000000..350142d4b76
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag
@@ -0,0 +1,13 @@
+#version 450
+
+uniform sampler2D SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler;
+uniform sampler2D SPIRV_Cross_CombinedSampledImageSampler;
+
+layout(location = 0) out vec4 _entryPointOutput;
+
+void main()
+{
+    ivec2 _154 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy;
+    _entryPointOutput = ((texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _154, 0) + texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, _154, 0)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy)) + texture(SPIRV_Cross_CombinedSampledImageSampler, gl_FragCoord.xy);
+}
+
diff --git a/reference/opt/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag.vk b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag.vk
new file mode 100644
index 00000000000..21797b4cf3a
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag.vk
@@ -0,0 +1,14 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler Sampler;
+layout(set = 0, binding = 0) uniform texture2D SampledImage;
+layout(set = 0, binding = 0) uniform sampler SPIRV_Cross_DummySampler;
+
+layout(location = 0) out vec4 _entryPointOutput;
+
+void main()
+{
+    ivec2 _154 = ivec3(int(gl_FragCoord.x * 1280.0), int(gl_FragCoord.y * 720.0), 0).xy;
+    _entryPointOutput = ((texelFetch(sampler2D(SampledImage, SPIRV_Cross_DummySampler), _154, 0) + texelFetch(sampler2D(SampledImage, SPIRV_Cross_DummySampler), _154, 0)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy)) + texture(sampler2D(SampledImage, Sampler), gl_FragCoord.xy);
+}
+
diff --git a/reference/opt/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag b/reference/opt/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag
new file mode 100644
index 00000000000..05ce10adfaa
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag
@@ -0,0 +1,6 @@
+#version 450
+
+void main()
+{
+}
+
diff --git a/reference/opt/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag.vk b/reference/opt/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag.vk
new file mode 100644
index 00000000000..05ce10adfaa
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag.vk
@@ -0,0 +1,6 @@
+#version 450
+
+void main()
+{
+}
+
diff --git a/reference/opt/shaders/asm/frag/inf-nan-constant-double.asm.frag b/reference/opt/shaders/asm/frag/inf-nan-constant-double.asm.frag
index d8e29aa4041..e53b282f879 100644
--- a/reference/opt/shaders/asm/frag/inf-nan-constant-double.asm.frag
+++ b/reference/opt/shaders/asm/frag/inf-nan-constant-double.asm.frag
@@ -1,11 +1,17 @@
 #version 450
+#if defined(GL_ARB_gpu_shader_int64)
 #extension GL_ARB_gpu_shader_int64 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
 
 layout(location = 0) out vec3 FragColor;
 layout(location = 0) flat in double vTmp;
 
 void main()
 {
-    FragColor = vec3(dvec3(uint64BitsToDouble(0x7ff0000000000000ul), uint64BitsToDouble(0xfff0000000000000ul), uint64BitsToDouble(0x7ff8000000000000ul)) + dvec3(vTmp));
+    FragColor = vec3(dvec3(uint64BitsToDouble(0x7ff0000000000000ul /* inf */), uint64BitsToDouble(0xfff0000000000000ul /* -inf */), uint64BitsToDouble(0x7ff8000000000000ul /* nan */)) + dvec3(vTmp));
 }
 
diff --git a/reference/opt/shaders/asm/frag/inf-nan-constant.asm.frag b/reference/opt/shaders/asm/frag/inf-nan-constant.asm.frag
index dd4284c9b11..b5e0c6e968b 100644
--- a/reference/opt/shaders/asm/frag/inf-nan-constant.asm.frag
+++ b/reference/opt/shaders/asm/frag/inf-nan-constant.asm.frag
@@ -6,6 +6,6 @@ layout(location = 0) out highp vec3 FragColor;
 
 void main()
 {
-    FragColor = vec3(uintBitsToFloat(0x7f800000u), uintBitsToFloat(0xff800000u), uintBitsToFloat(0x7fc00000u));
+    FragColor = vec3(uintBitsToFloat(0x7f800000u /* inf */), uintBitsToFloat(0xff800000u /* -inf */), uintBitsToFloat(0x7fc00000u /* nan */));
 }
 
diff --git a/reference/opt/shaders/asm/frag/line-directive.line.asm.frag b/reference/opt/shaders/asm/frag/line-directive.line.asm.frag
index 30be934fc6e..4682d79e808 100644
--- a/reference/opt/shaders/asm/frag/line-directive.line.asm.frag
+++ b/reference/opt/shaders/asm/frag/line-directive.line.asm.frag
@@ -7,14 +7,12 @@ layout(location = 0) in float vColor;
 #line 8 "test.frag"
 void main()
 {
-    float _80;
 #line 8 "test.frag"
     FragColor = 1.0;
 #line 9 "test.frag"
     FragColor = 2.0;
 #line 10 "test.frag"
-    _80 = vColor;
-    if (_80 < 0.0)
+    if (vColor < 0.0)
     {
 #line 12 "test.frag"
         FragColor = 3.0;
@@ -24,16 +22,19 @@ void main()
 #line 16 "test.frag"
         FragColor = 4.0;
     }
-    for (int _126 = 0; float(_126) < (40.0 + _80); )
+#line 19 "test.frag"
+    for (int _127 = 0; float(_127) < (40.0 + vColor); )
     {
 #line 21 "test.frag"
         FragColor += 0.20000000298023223876953125;
 #line 22 "test.frag"
         FragColor += 0.300000011920928955078125;
-        _126 += (int(_80) + 5);
+#line 19 "test.frag"
+        _127 += (int(vColor) + 5);
         continue;
     }
-    switch (int(_80))
+#line 25 "test.frag"
+    switch (int(vColor))
     {
         case 0:
         {
@@ -59,7 +60,8 @@ void main()
     }
     for (;;)
     {
-        FragColor += (10.0 + _80);
+#line 42 "test.frag"
+        FragColor += (10.0 + vColor);
 #line 43 "test.frag"
         if (FragColor < 100.0)
         {
@@ -69,5 +71,6 @@ void main()
             break;
         }
     }
+#line 48 "test.frag"
 }
 
diff --git a/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag b/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag
index 8c35e62ecef..eb16828e67a 100644
--- a/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag
+++ b/reference/opt/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag
@@ -1,4 +1,18 @@
 #version 450
+#if defined(GL_EXT_control_flow_attributes)
+#extension GL_EXT_control_flow_attributes : require
+#define SPIRV_CROSS_FLATTEN [[flatten]]
+#define SPIRV_CROSS_BRANCH [[dont_flatten]]
+#define SPIRV_CROSS_UNROLL [[unroll]]
+#define SPIRV_CROSS_LOOP [[dont_unroll]]
+#else
+#define SPIRV_CROSS_FLATTEN
+#define SPIRV_CROSS_BRANCH
+#define SPIRV_CROSS_UNROLL
+#define SPIRV_CROSS_LOOP
+#endif
+
+int _231;
 
 layout(binding = 0, std140) uniform Foo
 {
@@ -10,65 +24,60 @@ layout(binding = 0, std140) uniform Foo
 layout(location = 0) in vec3 fragWorld;
 layout(location = 0) out int _entryPointOutput;
 
-int _240;
+mat4 spvWorkaroundRowMajor(mat4 wrap) { return wrap; }
 
 void main()
 {
-    uint _227;
-    int _236;
-    for (;;)
+    int _228;
+    do
     {
-        _227 = 0u;
-        bool _231;
-        int _237;
+        bool _225;
+        int _229;
+        uint _222 = 0u;
+        SPIRV_CROSS_UNROLL
         for (;;)
         {
-            if (_227 < _11.shadowCascadesNum)
+            if (_222 < _11.shadowCascadesNum)
             {
-                mat4 _228;
-                for (;;)
+                mat4 _223;
+                do
                 {
                     if (_11.test == 0)
                     {
-                        _228 = mat4(vec4(0.5, 0.0, 0.0, 0.0), vec4(0.0, 0.5, 0.0, 0.0), vec4(0.0, 0.0, 0.5, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+                        _223 = mat4(vec4(0.5, 0.0, 0.0, 0.0), vec4(0.0, 0.5, 0.0, 0.0), vec4(0.0, 0.0, 0.5, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
                         break;
                     }
-                    _228 = mat4(vec4(1.0, 0.0, 0.0, 0.0), vec4(0.0, 1.0, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+                    _223 = mat4(vec4(1.0, 0.0, 0.0, 0.0), vec4(0.0, 1.0, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
                     break;
-                }
-                vec4 _177 = (_228 * _11.lightVP[_227]) * vec4(fragWorld, 1.0);
-                float _179 = _177.z;
-                float _186 = _177.x;
-                float _188 = _177.y;
-                if ((((_179 >= 0.0) && (_179 <= 1.0)) && (max(_186, _188) <= 1.0)) && (min(_186, _188) >= 0.0))
+                } while(false);
+                vec4 _170 = (_223 * spvWorkaroundRowMajor(_11.lightVP[_222])) * vec4(fragWorld, 1.0);
+                float _172 = _170.z;
+                float _179 = _170.x;
+                float _181 = _170.y;
+                if ((((_172 >= 0.0) && (_172 <= 1.0)) && (max(_179, _181) <= 1.0)) && (min(_179, _181) >= 0.0))
                 {
-                    _237 = int(_227);
-                    _231 = true;
+                    _229 = int(_222);
+                    _225 = true;
                     break;
                 }
-                else
-                {
-                    _227++;
-                    continue;
-                }
-                _227++;
+                _222++;
                 continue;
             }
             else
             {
-                _237 = _240;
-                _231 = false;
+                _229 = _231;
+                _225 = false;
                 break;
             }
         }
-        if (_231)
+        if (_225)
         {
-            _236 = _237;
+            _228 = _229;
             break;
         }
-        _236 = -1;
+        _228 = -1;
         break;
-    }
-    _entryPointOutput = _236;
+    } while(false);
+    _entryPointOutput = _228;
 }
 
diff --git a/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag b/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag
index c2dba928df2..97d3b74f022 100644
--- a/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag
+++ b/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag
@@ -22,8 +22,10 @@ layout(location = 0) out vec4 _entryPointOutput;
 
 void main()
 {
+    vec2 _45 = vec2(0.0, _8.CB1.TextureSize.w);
     vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv);
     float _50 = _49.y;
+    float _53 = clamp(_50 * 0.06399999558925628662109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375);
     float _55;
     float _58;
     _55 = 0.0;
@@ -31,10 +33,11 @@ void main()
     for (int _60 = -3; _60 <= 3; )
     {
         float _64 = float(_60);
-        vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64));
-        float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp(_50 * 0.06399999558925628662109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375));
-        _55 += (_72.x * _78);
-        _58 += _78;
+        float _68 = exp(((-_64) * _64) * 0.2222220003604888916015625);
+        vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (_45 * _64));
+        float _77 = float(abs(_72.y - _50) < _53);
+        _55 = fma(_72.x, _68 * _77, _55);
+        _58 = fma(_68, _77, _58);
         _60++;
         continue;
     }
diff --git a/reference/opt/shaders/asm/frag/loop-merge-to-continue.asm.frag b/reference/opt/shaders/asm/frag/loop-merge-to-continue.asm.frag
deleted file mode 100644
index faf32edcf42..00000000000
--- a/reference/opt/shaders/asm/frag/loop-merge-to-continue.asm.frag
+++ /dev/null
@@ -1,21 +0,0 @@
-#version 450
-
-layout(location = 0) out vec4 FragColor;
-layout(location = 0) in vec4 v0;
-
-void main()
-{
-    FragColor = vec4(1.0);
-    int _50;
-    _50 = 0;
-    for (; _50 < 4; _50++)
-    {
-        for (int _51 = 0; _51 < 4; )
-        {
-            FragColor += vec4(v0[(_50 + _51) & 3]);
-            _51++;
-            continue;
-        }
-    }
-}
-
diff --git a/reference/opt/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag b/reference/opt/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag
new file mode 100644
index 00000000000..4734c89c9af
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag
@@ -0,0 +1,8 @@
+#version 320 es
+precision mediump float;
+precision highp int;
+
+void main()
+{
+}
+
diff --git a/reference/opt/shaders/asm/frag/out-of-bounds-access.asm.frag b/reference/opt/shaders/asm/frag/out-of-bounds-access.asm.frag
new file mode 100644
index 00000000000..4734c89c9af
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/out-of-bounds-access.asm.frag
@@ -0,0 +1,8 @@
+#version 320 es
+precision mediump float;
+precision highp int;
+
+void main()
+{
+}
+
diff --git a/reference/opt/shaders/asm/frag/pack-and-unpack-uint2.asm.frag b/reference/opt/shaders/asm/frag/pack-and-unpack-uint2.asm.frag
new file mode 100644
index 00000000000..9aa9a471e84
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/pack-and-unpack-uint2.asm.frag
@@ -0,0 +1,9 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(18.0, 52.0, 1.0, 1.0);
+}
+
diff --git a/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag b/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag
deleted file mode 100644
index 05c17c7a66e..00000000000
--- a/reference/opt/shaders/asm/frag/selection-merge-to-continue.asm.frag
+++ /dev/null
@@ -1,24 +0,0 @@
-#version 450
-
-layout(location = 0) out vec4 FragColor;
-layout(location = 0) in vec4 v0;
-
-void main()
-{
-    FragColor = vec4(1.0);
-    for (int _54 = 0; _54 < 4; _54++)
-    {
-        if (v0.x == 20.0)
-        {
-            FragColor += vec4(v0[_54 & 3]);
-            continue;
-        }
-        else
-        {
-            FragColor += vec4(v0[_54 & 1]);
-            continue;
-        }
-        continue;
-    }
-}
-
diff --git a/reference/opt/shaders/asm/frag/storage-class-output-initializer.asm.frag b/reference/opt/shaders/asm/frag/storage-class-output-initializer.asm.frag
index 229358757aa..a5faaefb309 100644
--- a/reference/opt/shaders/asm/frag/storage-class-output-initializer.asm.frag
+++ b/reference/opt/shaders/asm/frag/storage-class-output-initializer.asm.frag
@@ -2,10 +2,12 @@
 
 layout(location = 0) out vec4 FragColors[2];
 layout(location = 2) out vec4 FragColor;
+const vec4 _3_init[2] = vec4[](vec4(1.0, 2.0, 3.0, 4.0), vec4(10.0));
+const vec4 _4_init = vec4(5.0);
 
 void main()
 {
-    FragColors = vec4[](vec4(1.0, 2.0, 3.0, 4.0), vec4(10.0));
-    FragColor = vec4(5.0);
+    FragColors = _3_init;
+    FragColor = _4_init;
 }
 
diff --git a/reference/opt/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag b/reference/opt/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag
index b2473f4d037..7930ca3b4a0 100644
--- a/reference/opt/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag
+++ b/reference/opt/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag
@@ -8,12 +8,12 @@ struct Foo
     float var2;
 };
 
+Foo _22;
+
 layout(binding = 0) uniform mediump sampler2D uSampler;
 
 layout(location = 0) out vec4 FragColor;
 
-Foo _22;
-
 void main()
 {
     FragColor = texture(uSampler, vec2(_22.var1, _22.var2));
diff --git a/reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag b/reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag
deleted file mode 100644
index ea4a25995a0..00000000000
--- a/reference/opt/shaders/asm/frag/switch-merge-to-continue.asm.frag
+++ /dev/null
@@ -1,31 +0,0 @@
-#version 450
-
-layout(location = 0) out vec4 FragColor;
-
-void main()
-{
-    FragColor = vec4(1.0);
-    for (int _52 = 0; _52 < 4; _52++)
-    {
-        switch (_52)
-        {
-            case 0:
-            {
-                FragColor.x += 1.0;
-                break;
-            }
-            case 1:
-            {
-                FragColor.y += 3.0;
-                break;
-            }
-            default:
-            {
-                FragColor.z += 3.0;
-                break;
-            }
-        }
-        continue;
-    }
-}
-
diff --git a/reference/opt/shaders/asm/frag/switch-preserve-sign-extension.asm.frag b/reference/opt/shaders/asm/frag/switch-preserve-sign-extension.asm.frag
new file mode 100644
index 00000000000..41b98085125
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/switch-preserve-sign-extension.asm.frag
@@ -0,0 +1,9 @@
+#version 330
+#ifdef GL_ARB_shading_language_420pack
+#extension GL_ARB_shading_language_420pack : require
+#endif
+
+void main()
+{
+}
+
diff --git a/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag b/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag
index 270c779aa18..fcad3fbf0d8 100644
--- a/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag
+++ b/reference/opt/shaders/asm/frag/vector-shuffle-oom.asm.frag
@@ -1,4 +1,16 @@
 #version 450
+#if defined(GL_EXT_control_flow_attributes)
+#extension GL_EXT_control_flow_attributes : require
+#define SPIRV_CROSS_FLATTEN [[flatten]]
+#define SPIRV_CROSS_BRANCH [[dont_flatten]]
+#define SPIRV_CROSS_UNROLL [[unroll]]
+#define SPIRV_CROSS_LOOP [[dont_unroll]]
+#else
+#define SPIRV_CROSS_FLATTEN
+#define SPIRV_CROSS_BRANCH
+#define SPIRV_CROSS_UNROLL
+#define SPIRV_CROSS_LOOP
+#endif
 
 struct _28
 {
@@ -90,19 +102,17 @@ uniform sampler2D SPIRV_Cross_Combined_2;
 
 layout(location = 0) out vec4 _5;
 
-_28 _74;
-
 void main()
 {
-    vec2 _82 = gl_FragCoord.xy * _19._m23.xy;
     vec4 _88 = _7._m2 * _7._m0.xyxy;
     vec2 _95 = _88.xy;
     vec2 _96 = _88.zw;
-    vec2 _97 = clamp(_82 + (vec2(0.0, -2.0) * _7._m0.xy), _95, _96);
+    vec2 _97 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(0.0, -2.0) * _7._m0.xy), _95, _96);
     vec3 _109 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _97, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _113 = textureLod(SPIRV_Cross_Combined_1, _97, 0.0);
     float _114 = _113.y;
     vec3 _129;
+    SPIRV_CROSS_BRANCH
     if (_114 > 0.0)
     {
         _129 = _109 + (textureLod(SPIRV_Cross_Combined_2, _97, 0.0).xyz * clamp(_114 * _113.z, 0.0, 1.0));
@@ -111,12 +121,12 @@ void main()
     {
         _129 = _109;
     }
-    vec3 _130 = _129 * 0.5;
-    vec2 _144 = clamp(_82 + (vec2(-1.0) * _7._m0.xy), _95, _96);
+    vec2 _144 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(-1.0) * _7._m0.xy), _95, _96);
     vec3 _156 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _144, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _160 = textureLod(SPIRV_Cross_Combined_1, _144, 0.0);
     float _161 = _160.y;
     vec3 _176;
+    SPIRV_CROSS_BRANCH
     if (_161 > 0.0)
     {
         _176 = _156 + (textureLod(SPIRV_Cross_Combined_2, _144, 0.0).xyz * clamp(_161 * _160.z, 0.0, 1.0));
@@ -125,12 +135,12 @@ void main()
     {
         _176 = _156;
     }
-    vec3 _177 = _176 * 0.5;
-    vec2 _191 = clamp(_82 + (vec2(0.0, -1.0) * _7._m0.xy), _95, _96);
+    vec2 _191 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(0.0, -1.0) * _7._m0.xy), _95, _96);
     vec3 _203 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _191, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _207 = textureLod(SPIRV_Cross_Combined_1, _191, 0.0);
     float _208 = _207.y;
     vec3 _223;
+    SPIRV_CROSS_BRANCH
     if (_208 > 0.0)
     {
         _223 = _203 + (textureLod(SPIRV_Cross_Combined_2, _191, 0.0).xyz * clamp(_208 * _207.z, 0.0, 1.0));
@@ -139,12 +149,12 @@ void main()
     {
         _223 = _203;
     }
-    vec3 _224 = _223 * 0.75;
-    vec2 _238 = clamp(_82 + (vec2(1.0, -1.0) * _7._m0.xy), _95, _96);
+    vec2 _238 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(1.0, -1.0) * _7._m0.xy), _95, _96);
     vec3 _250 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _238, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _254 = textureLod(SPIRV_Cross_Combined_1, _238, 0.0);
     float _255 = _254.y;
     vec3 _270;
+    SPIRV_CROSS_BRANCH
     if (_255 > 0.0)
     {
         _270 = _250 + (textureLod(SPIRV_Cross_Combined_2, _238, 0.0).xyz * clamp(_255 * _254.z, 0.0, 1.0));
@@ -153,12 +163,12 @@ void main()
     {
         _270 = _250;
     }
-    vec3 _271 = _270 * 0.5;
-    vec2 _285 = clamp(_82 + (vec2(-2.0, 0.0) * _7._m0.xy), _95, _96);
+    vec2 _285 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(-2.0, 0.0) * _7._m0.xy), _95, _96);
     vec3 _297 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _285, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _301 = textureLod(SPIRV_Cross_Combined_1, _285, 0.0);
     float _302 = _301.y;
     vec3 _317;
+    SPIRV_CROSS_BRANCH
     if (_302 > 0.0)
     {
         _317 = _297 + (textureLod(SPIRV_Cross_Combined_2, _285, 0.0).xyz * clamp(_302 * _301.z, 0.0, 1.0));
@@ -167,12 +177,12 @@ void main()
     {
         _317 = _297;
     }
-    vec3 _318 = _317 * 0.5;
-    vec2 _332 = clamp(_82 + (vec2(-1.0, 0.0) * _7._m0.xy), _95, _96);
+    vec2 _332 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(-1.0, 0.0) * _7._m0.xy), _95, _96);
     vec3 _344 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _332, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _348 = textureLod(SPIRV_Cross_Combined_1, _332, 0.0);
     float _349 = _348.y;
     vec3 _364;
+    SPIRV_CROSS_BRANCH
     if (_349 > 0.0)
     {
         _364 = _344 + (textureLod(SPIRV_Cross_Combined_2, _332, 0.0).xyz * clamp(_349 * _348.z, 0.0, 1.0));
@@ -181,12 +191,12 @@ void main()
     {
         _364 = _344;
     }
-    vec3 _365 = _364 * 0.75;
-    vec2 _379 = clamp(_82, _95, _96);
+    vec2 _379 = clamp(gl_FragCoord.xy * _19._m23.xy, _95, _96);
     vec3 _391 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _379, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _395 = textureLod(SPIRV_Cross_Combined_1, _379, 0.0);
     float _396 = _395.y;
     vec3 _411;
+    SPIRV_CROSS_BRANCH
     if (_396 > 0.0)
     {
         _411 = _391 + (textureLod(SPIRV_Cross_Combined_2, _379, 0.0).xyz * clamp(_396 * _395.z, 0.0, 1.0));
@@ -195,12 +205,12 @@ void main()
     {
         _411 = _391;
     }
-    vec3 _412 = _411 * 1.0;
-    vec2 _426 = clamp(_82 + (vec2(1.0, 0.0) * _7._m0.xy), _95, _96);
+    vec2 _426 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(1.0, 0.0) * _7._m0.xy), _95, _96);
     vec3 _438 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _426, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _442 = textureLod(SPIRV_Cross_Combined_1, _426, 0.0);
     float _443 = _442.y;
     vec3 _458;
+    SPIRV_CROSS_BRANCH
     if (_443 > 0.0)
     {
         _458 = _438 + (textureLod(SPIRV_Cross_Combined_2, _426, 0.0).xyz * clamp(_443 * _442.z, 0.0, 1.0));
@@ -209,12 +219,12 @@ void main()
     {
         _458 = _438;
     }
-    vec3 _459 = _458 * 0.75;
-    vec2 _473 = clamp(_82 + (vec2(2.0, 0.0) * _7._m0.xy), _95, _96);
+    vec2 _473 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(2.0, 0.0) * _7._m0.xy), _95, _96);
     vec3 _485 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _473, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _489 = textureLod(SPIRV_Cross_Combined_1, _473, 0.0);
     float _490 = _489.y;
     vec3 _505;
+    SPIRV_CROSS_BRANCH
     if (_490 > 0.0)
     {
         _505 = _485 + (textureLod(SPIRV_Cross_Combined_2, _473, 0.0).xyz * clamp(_490 * _489.z, 0.0, 1.0));
@@ -223,12 +233,12 @@ void main()
     {
         _505 = _485;
     }
-    vec3 _506 = _505 * 0.5;
-    vec2 _520 = clamp(_82 + (vec2(-1.0, 1.0) * _7._m0.xy), _95, _96);
+    vec2 _520 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(-1.0, 1.0) * _7._m0.xy), _95, _96);
     vec3 _532 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _520, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _536 = textureLod(SPIRV_Cross_Combined_1, _520, 0.0);
     float _537 = _536.y;
     vec3 _552;
+    SPIRV_CROSS_BRANCH
     if (_537 > 0.0)
     {
         _552 = _532 + (textureLod(SPIRV_Cross_Combined_2, _520, 0.0).xyz * clamp(_537 * _536.z, 0.0, 1.0));
@@ -237,12 +247,12 @@ void main()
     {
         _552 = _532;
     }
-    vec3 _553 = _552 * 0.5;
-    vec2 _567 = clamp(_82 + (vec2(0.0, 1.0) * _7._m0.xy), _95, _96);
+    vec2 _567 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(0.0, 1.0) * _7._m0.xy), _95, _96);
     vec3 _579 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _567, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _583 = textureLod(SPIRV_Cross_Combined_1, _567, 0.0);
     float _584 = _583.y;
     vec3 _599;
+    SPIRV_CROSS_BRANCH
     if (_584 > 0.0)
     {
         _599 = _579 + (textureLod(SPIRV_Cross_Combined_2, _567, 0.0).xyz * clamp(_584 * _583.z, 0.0, 1.0));
@@ -251,12 +261,12 @@ void main()
     {
         _599 = _579;
     }
-    vec3 _600 = _599 * 0.75;
-    vec2 _614 = clamp(_82 + _7._m0.xy, _95, _96);
+    vec2 _614 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, _7._m0.xy), _95, _96);
     vec3 _626 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _614, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _630 = textureLod(SPIRV_Cross_Combined_1, _614, 0.0);
     float _631 = _630.y;
     vec3 _646;
+    SPIRV_CROSS_BRANCH
     if (_631 > 0.0)
     {
         _646 = _626 + (textureLod(SPIRV_Cross_Combined_2, _614, 0.0).xyz * clamp(_631 * _630.z, 0.0, 1.0));
@@ -265,12 +275,12 @@ void main()
     {
         _646 = _626;
     }
-    vec3 _647 = _646 * 0.5;
-    vec2 _661 = clamp(_82 + (vec2(0.0, 2.0) * _7._m0.xy), _95, _96);
+    vec2 _661 = clamp(fma(gl_FragCoord.xy, _19._m23.xy, vec2(0.0, 2.0) * _7._m0.xy), _95, _96);
     vec3 _673 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _661, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _677 = textureLod(SPIRV_Cross_Combined_1, _661, 0.0);
     float _678 = _677.y;
     vec3 _693;
+    SPIRV_CROSS_BRANCH
     if (_678 > 0.0)
     {
         _693 = _673 + (textureLod(SPIRV_Cross_Combined_2, _661, 0.0).xyz * clamp(_678 * _677.z, 0.0, 1.0));
@@ -279,11 +289,9 @@ void main()
     {
         _693 = _673;
     }
-    vec3 _702 = ((((((((((((_130.xyz + _177).xyz + _224).xyz + _271).xyz + _318).xyz + _365).xyz + _412).xyz + _459).xyz + _506).xyz + _553).xyz + _600).xyz + _647).xyz + (_693 * 0.5)).xyz * vec3(0.125);
-    _28 _704 = _74;
-    _704._m0 = vec4(_702.x, _702.y, _702.z, vec4(0.0).w);
-    _28 _705 = _704;
-    _705._m0.w = 1.0;
-    _5 = _705._m0;
+    vec3 _702 = (((((((((((((_129 * 0.5).xyz + (_176 * 0.5)).xyz + (_223 * 0.75)).xyz + (_270 * 0.5)).xyz + (_317 * 0.5)).xyz + (_364 * 0.75)).xyz + (_411 * 1.0)).xyz + (_458 * 0.75)).xyz + (_505 * 0.5)).xyz + (_552 * 0.5)).xyz + (_599 * 0.75)).xyz + (_646 * 0.5)).xyz + (_693 * 0.5)).xyz * vec3(0.125);
+    _28 _750 = _28(vec4(_702.x, _702.y, _702.z, vec4(0.0).w));
+    _750._m0.w = 1.0;
+    _5 = _750._m0;
 }
 
diff --git a/reference/opt/shaders/asm/geom/unroll-glposition-load.asm.geom b/reference/opt/shaders/asm/geom/unroll-glposition-load.asm.geom
index d1f8963fa10..678379dddee 100644
--- a/reference/opt/shaders/asm/geom/unroll-glposition-load.asm.geom
+++ b/reference/opt/shaders/asm/geom/unroll-glposition-load.asm.geom
@@ -4,17 +4,11 @@ layout(max_vertices = 3, triangle_strip) out;
 
 void main()
 {
-    vec4 _35_unrolled[3];
-    for (int i = 0; i < int(3); i++)
+    for (int _74 = 0; _74 < 3; )
     {
-        _35_unrolled[i] = gl_in[i].gl_Position;
-    }
-    vec4 param[3] = _35_unrolled;
-    for (int _73 = 0; _73 < 3; )
-    {
-        gl_Position = param[_73];
+        gl_Position = gl_in[_74].gl_Position;
         EmitVertex();
-        _73++;
+        _74++;
         continue;
     }
     EndPrimitive();
diff --git a/reference/opt/shaders/asm/vert/empty-io.asm.vert b/reference/opt/shaders/asm/vert/empty-io.asm.vert
index 3819a71dd28..52fd706565b 100644
--- a/reference/opt/shaders/asm/vert/empty-io.asm.vert
+++ b/reference/opt/shaders/asm/vert/empty-io.asm.vert
@@ -6,6 +6,7 @@ struct VSOutput
 };
 
 layout(location = 0) in vec4 position;
+layout(location = 0) out VSOutput _entryPointOutput;
 
 void main()
 {
diff --git a/reference/opt/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert b/reference/opt/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert
index b237783f6cb..3ac9732b9bb 100644
--- a/reference/opt/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert
+++ b/reference/opt/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert
@@ -1,4 +1,7 @@
 #version 450
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
 
 struct InstanceData
 {
@@ -12,7 +15,11 @@ layout(binding = 0, std430) readonly buffer gInstanceData
 } gInstanceData_1;
 
 layout(location = 0) in vec3 PosL;
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
 uniform int SPIRV_Cross_BaseInstance;
+#endif
 layout(location = 0) out vec4 _entryPointOutput_Color;
 
 void main()
diff --git a/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert b/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
index 2608c1d578f..3d9ad3b4865 100644
--- a/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
+++ b/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
@@ -10,21 +10,21 @@ const int _20 = (_7 + 2);
 #endif
 const uint _8 = SPIRV_CROSS_CONSTANT_ID_202;
 const uint _25 = (_8 % 5u);
-const ivec4 _30 = ivec4(20, 30, _20, _20);
-const ivec2 _32 = ivec2(_30.y, _30.x);
-const int _33 = _30.y;
+const int _30 = _7 - (-3) * (_7 / (-3));
+const ivec4 _32 = ivec4(20, 30, _20, _30);
+const ivec2 _34 = ivec2(_32.y, _32.x);
+const int _35 = _32.y;
 
 layout(location = 0) flat out int _4;
 
 void main()
 {
-    vec4 _63 = vec4(0.0);
-    _63.y = float(_20);
-    vec4 _66 = _63;
-    _66.z = float(_25);
-    vec4 _52 = _66 + vec4(_30);
-    vec2 _56 = _52.xy + vec2(_32);
-    gl_Position = vec4(_56.x, _56.y, _52.z, _52.w);
-    _4 = _33;
+    vec4 _65 = vec4(0.0);
+    _65.y = float(_20);
+    _65.z = float(_25);
+    vec4 _54 = _65 + vec4(_32);
+    vec2 _58 = _54.xy + vec2(_34);
+    gl_Position = vec4(_58.x, _58.y, _54.z, _54.w);
+    _4 = _35;
 }
 
diff --git a/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk b/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk
index 10da8f4b8e8..ed9d98e9dee 100644
--- a/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk
+++ b/reference/opt/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk
@@ -4,21 +4,25 @@ layout(constant_id = 201) const int _7 = -10;
 const int _20 = (_7 + 2);
 layout(constant_id = 202) const uint _8 = 100u;
 const uint _25 = (_8 % 5u);
-const ivec4 _30 = ivec4(20, 30, _20, _20);
-const ivec2 _32 = ivec2(_30.y, _30.x);
-const int _33 = _30.y;
+const int _30 = _7 - (-3) * (_7 / (-3));
+const ivec4 _32 = ivec4(20, 30, _20, _30);
+const ivec2 _34 = ivec2(_32.y, _32.x);
+const int _35 = _32.y;
 
 layout(location = 0) flat out int _4;
 
 void main()
 {
-    vec4 _63 = vec4(0.0);
-    _63.y = float(_20);
-    vec4 _66 = _63;
-    _66.z = float(_25);
-    vec4 _52 = _66 + vec4(_30);
-    vec2 _56 = _52.xy + vec2(_32);
-    gl_Position = vec4(_56.x, _56.y, _52.z, _52.w);
-    _4 = _33;
+    float _42 = float(_20);
+    vec4 _65 = vec4(0.0);
+    _65.y = _42;
+    float _47 = float(_25);
+    _65.z = _47;
+    vec4 _52 = vec4(_32);
+    vec4 _54 = _65 + _52;
+    vec2 _55 = vec2(_34);
+    vec2 _58 = _54.xy + _55;
+    gl_Position = vec4(_58.x, _58.y, _54.z, _54.w);
+    _4 = _35;
 }
 
diff --git a/reference/opt/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert b/reference/opt/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert
index c25e9bbe5b2..134e08d592c 100644
--- a/reference/opt/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert
+++ b/reference/opt/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert
@@ -1,6 +1,13 @@
 #version 450
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
 
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
 uniform int SPIRV_Cross_BaseInstance;
+#endif
 
 void main()
 {
diff --git a/reference/opt/shaders/comp/bake_gradient.comp b/reference/opt/shaders/comp/bake_gradient.comp
index 69634d5d848..7ef245e62ed 100644
--- a/reference/opt/shaders/comp/bake_gradient.comp
+++ b/reference/opt/shaders/comp/bake_gradient.comp
@@ -16,11 +16,23 @@ void main()
 {
     vec4 _59 = (vec2(gl_GlobalInvocationID.xy) * _46.uInvSize.xy).xyxy + (_46.uInvSize * 0.5);
     vec2 _67 = _59.xy;
+    mediump float _79 = textureLodOffset(uHeight, _67, 0.0, ivec2(-1, 0)).x;
+    float hp_copy_79 = _79;
+    mediump float _87 = textureLodOffset(uHeight, _67, 0.0, ivec2(1, 0)).x;
+    float hp_copy_87 = _87;
+    mediump float _94 = textureLodOffset(uHeight, _67, 0.0, ivec2(0, -1)).x;
+    float hp_copy_94 = _94;
+    mediump float _101 = textureLodOffset(uHeight, _67, 0.0, ivec2(0, 1)).x;
+    float hp_copy_101 = _101;
     vec2 _128 = _59.zw;
     vec2 _157 = ((textureLodOffset(uDisplacement, _128, 0.0, ivec2(1, 0)).xy - textureLodOffset(uDisplacement, _128, 0.0, ivec2(-1, 0)).xy) * 0.60000002384185791015625) * _46.uScale.z;
     vec2 _161 = ((textureLodOffset(uDisplacement, _128, 0.0, ivec2(0, 1)).xy - textureLodOffset(uDisplacement, _128, 0.0, ivec2(0, -1)).xy) * 0.60000002384185791015625) * _46.uScale.z;
+    mediump float _203 = _157.y * _161.x;
+    float hp_copy_203 = _203;
+    float _209 = -hp_copy_203;
+    mediump float mp_copy_209 = _209;
     ivec2 _172 = ivec2(gl_GlobalInvocationID.xy);
     imageStore(iHeightDisplacement, _172, vec4(textureLod(uHeight, _67, 0.0).x, 0.0, 0.0, 0.0));
-    imageStore(iGradJacobian, _172, vec4((_46.uScale.xy * 0.5) * vec2(textureLodOffset(uHeight, _67, 0.0, ivec2(1, 0)).x - textureLodOffset(uHeight, _67, 0.0, ivec2(-1, 0)).x, textureLodOffset(uHeight, _67, 0.0, ivec2(0, 1)).x - textureLodOffset(uHeight, _67, 0.0, ivec2(0, -1)).x), ((1.0 + _157.x) * (1.0 + _161.y)) - (_157.y * _161.x), 0.0));
+    imageStore(iGradJacobian, _172, vec4((_46.uScale.xy * 0.5) * vec2(hp_copy_87 - hp_copy_79, hp_copy_101 - hp_copy_94), (1.0 + _157.x) * (1.0 + _161.y) + mp_copy_209, 0.0));
 }
 
diff --git a/reference/opt/shaders/comp/barriers.comp b/reference/opt/shaders/comp/barriers.comp
index a091497a49b..7dfde372adb 100644
--- a/reference/opt/shaders/comp/barriers.comp
+++ b/reference/opt/shaders/comp/barriers.comp
@@ -8,21 +8,15 @@ void main()
     memoryBarrierImage();
     memoryBarrierBuffer();
     groupMemoryBarrier();
-    memoryBarrierShared();
     barrier();
     memoryBarrier();
-    memoryBarrierShared();
     barrier();
     memoryBarrierImage();
-    memoryBarrierShared();
     barrier();
     memoryBarrierBuffer();
-    memoryBarrierShared();
     barrier();
     groupMemoryBarrier();
-    memoryBarrierShared();
     barrier();
-    memoryBarrierShared();
     barrier();
 }
 
diff --git a/reference/opt/shaders/comp/bitcast-16bit-1.invalid.comp b/reference/opt/shaders/comp/bitcast-16bit-1.invalid.comp
deleted file mode 100644
index 501f97955fc..00000000000
--- a/reference/opt/shaders/comp/bitcast-16bit-1.invalid.comp
+++ /dev/null
@@ -1,34 +0,0 @@
-#version 450
-#if defined(GL_AMD_gpu_shader_half_float)
-#extension GL_AMD_gpu_shader_half_float : require
-#elif defined(GL_NV_gpu_shader5)
-#extension GL_NV_gpu_shader5 : require
-#else
-#error No extension available for FP16.
-#endif
-#if defined(GL_AMD_gpu_shader_int16)
-#extension GL_AMD_gpu_shader_int16 : require
-#else
-#error No extension available for Int16.
-#endif
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-layout(binding = 0, std430) buffer SSBO0
-{
-    i16vec4 inputs[];
-} _25;
-
-layout(binding = 1, std430) buffer SSBO1
-{
-    ivec4 outputs[];
-} _39;
-
-void main()
-{
-    uint ident = gl_GlobalInvocationID.x;
-    f16vec2 a = int16BitsToFloat16(_25.inputs[ident].xy);
-    _39.outputs[ident].x = int(packFloat2x16(a + f16vec2(float16_t(1.0))));
-    _39.outputs[ident].y = packInt2x16(_25.inputs[ident].zw);
-    _39.outputs[ident].z = int(packUint2x16(u16vec2(_25.inputs[ident].xy)));
-}
-
diff --git a/reference/opt/shaders/comp/casts.comp b/reference/opt/shaders/comp/casts.comp
index 11ef36287b4..12cf17885bc 100644
--- a/reference/opt/shaders/comp/casts.comp
+++ b/reference/opt/shaders/comp/casts.comp
@@ -13,6 +13,6 @@ layout(binding = 0, std430) buffer SSBO0
 
 void main()
 {
-    _21.outputs[gl_GlobalInvocationID.x] = mix(ivec4(0), ivec4(1), notEqual((_27.inputs[gl_GlobalInvocationID.x] & ivec4(3)), ivec4(uvec4(0u))));
+    _21.outputs[gl_GlobalInvocationID.x] = ivec4(notEqual((_27.inputs[gl_GlobalInvocationID.x] & ivec4(3)), ivec4(uvec4(0u))));
 }
 
diff --git a/reference/opt/shaders/comp/cfg.comp b/reference/opt/shaders/comp/cfg.comp
index 0b7e0c16162..f6e02a85516 100644
--- a/reference/opt/shaders/comp/cfg.comp
+++ b/reference/opt/shaders/comp/cfg.comp
@@ -1,13 +1,13 @@
 #version 310 es
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
+float _188;
+
 layout(binding = 0, std430) buffer SSBO
 {
     float data;
 } _11;
 
-float _183;
-
 void main()
 {
     if (_11.data != 0.0)
@@ -31,14 +31,14 @@ void main()
             break;
         }
     }
-    float _180;
-    _180 = _183;
-    for (int _179 = 0; _179 < 20; )
+    float _185;
+    _185 = _188;
+    for (int _184 = 0; _184 < 20; )
     {
-        _180 += 10.0;
-        _179++;
+        _185 += 10.0;
+        _184++;
         continue;
     }
-    _11.data = _180;
+    _11.data = _185;
 }
 
diff --git a/reference/opt/shaders/comp/dowhile.comp b/reference/opt/shaders/comp/dowhile.comp
index 4370ea3079a..d9a9f77cbf9 100644
--- a/reference/opt/shaders/comp/dowhile.comp
+++ b/reference/opt/shaders/comp/dowhile.comp
@@ -14,19 +14,19 @@ layout(binding = 1, std430) writeonly buffer SSBO2
 
 void main()
 {
-    vec4 _57;
-    int _58;
-    _58 = 0;
-    _57 = _28.in_data[gl_GlobalInvocationID.x];
+    vec4 _59;
+    int _60;
+    _60 = 0;
+    _59 = _28.in_data[gl_GlobalInvocationID.x];
     vec4 _42;
     for (;;)
     {
-        _42 = _28.mvp * _57;
-        int _44 = _58 + 1;
+        _42 = _28.mvp * _59;
+        int _44 = _60 + 1;
         if (_44 < 16)
         {
-            _58 = _44;
-            _57 = _42;
+            _60 = _44;
+            _59 = _42;
         }
         else
         {
diff --git a/reference/opt/shaders/comp/generate_height.comp b/reference/opt/shaders/comp/generate_height.comp
index feb8d41c0a2..75ad1f9125a 100644
--- a/reference/opt/shaders/comp/generate_height.comp
+++ b/reference/opt/shaders/comp/generate_height.comp
@@ -18,39 +18,39 @@ layout(binding = 1, std430) writeonly buffer HeightmapFFT
 
 void main()
 {
-    uvec2 _264 = uvec2(64u, 1u) * gl_NumWorkGroups.xy;
-    uvec2 _269 = _264 - gl_GlobalInvocationID.xy;
-    bvec2 _271 = equal(gl_GlobalInvocationID.xy, uvec2(0u));
-    uint _454;
-    if (_271.x)
+    uvec2 _265 = uvec2(64u, 1u) * gl_NumWorkGroups.xy;
+    uvec2 _270 = _265 - gl_GlobalInvocationID.xy;
+    bvec2 _272 = equal(gl_GlobalInvocationID.xy, uvec2(0u));
+    uint _460;
+    if (_272.x)
     {
-        _454 = 0u;
+        _460 = 0u;
     }
     else
     {
-        _454 = _269.x;
+        _460 = _270.x;
     }
-    uint _455;
-    if (_271.y)
+    uint _461;
+    if (_272.y)
     {
-        _455 = 0u;
+        _461 = 0u;
     }
     else
     {
-        _455 = _269.y;
+        _461 = _270.y;
     }
-    uint _276 = _264.x;
-    uint _280 = (gl_GlobalInvocationID.y * _276) + gl_GlobalInvocationID.x;
-    uint _290 = (_455 * _276) + _454;
-    vec2 _297 = vec2(gl_GlobalInvocationID.xy);
-    vec2 _299 = vec2(_264);
-    float _309 = sqrt(9.81000041961669921875 * length(_166.uModTime.xy * mix(_297, _297 - _299, greaterThan(_297, _299 * 0.5)))) * _166.uModTime.z;
-    vec2 _316 = vec2(cos(_309), sin(_309));
-    vec2 _387 = _316.xx;
-    vec2 _392 = _316.yy;
-    vec2 _395 = _392 * _137.distribution[_280].yx;
-    vec2 _421 = _392 * _137.distribution[_290].yx;
-    vec2 _429 = (_137.distribution[_290] * _387) + vec2(-_421.x, _421.y);
-    _225.heights[_280] = packHalf2x16(((_137.distribution[_280] * _387) + vec2(-_395.x, _395.y)) + vec2(_429.x, -_429.y));
+    uint _277 = _265.x;
+    uint _281 = (gl_GlobalInvocationID.y * _277) + gl_GlobalInvocationID.x;
+    uint _291 = (_461 * _277) + _460;
+    vec2 _298 = vec2(gl_GlobalInvocationID.xy);
+    vec2 _300 = vec2(_265);
+    float _310 = sqrt(9.81000041961669921875 * length(_166.uModTime.xy * mix(_298, _298 - _300, greaterThan(_298, _300 * 0.5)))) * _166.uModTime.z;
+    vec2 _317 = vec2(cos(_310), sin(_310));
+    vec2 _391 = _317.xx;
+    vec2 _396 = _317.yy;
+    vec2 _399 = _396 * _137.distribution[_281].yx;
+    vec2 _426 = _396 * _137.distribution[_291].yx;
+    vec2 _434 = _137.distribution[_291] * _391 + vec2(-_426.x, _426.y);
+    _225.heights[_281] = packHalf2x16((_137.distribution[_281] * _391 + vec2(-_399.x, _399.y)) + vec2(_434.x, -_434.y));
 }
 
diff --git a/reference/opt/shaders/comp/insert.comp b/reference/opt/shaders/comp/insert.comp
index 5ff719449a2..97c55dd5aac 100644
--- a/reference/opt/shaders/comp/insert.comp
+++ b/reference/opt/shaders/comp/insert.comp
@@ -6,19 +6,9 @@ layout(binding = 0, std430) writeonly buffer SSBO
     vec4 out_data[];
 } _27;
 
-vec4 _52;
-
 void main()
 {
-    vec4 _45 = _52;
-    _45.x = 10.0;
-    vec4 _47 = _45;
-    _47.y = 30.0;
-    vec4 _49 = _47;
-    _49.z = 70.0;
-    vec4 _51 = _49;
-    _51.w = 90.0;
-    _27.out_data[gl_GlobalInvocationID.x] = _51;
+    _27.out_data[gl_GlobalInvocationID.x] = vec4(10.0, 30.0, 70.0, 90.0);
     _27.out_data[gl_GlobalInvocationID.x].y = 20.0;
 }
 
diff --git a/reference/opt/shaders/comp/shared.comp b/reference/opt/shaders/comp/shared.comp
index f95cb2b8b9a..62cf4a4b209 100644
--- a/reference/opt/shaders/comp/shared.comp
+++ b/reference/opt/shaders/comp/shared.comp
@@ -16,7 +16,6 @@ shared float sShared[4];
 void main()
 {
     sShared[gl_LocalInvocationIndex] = _22.in_data[gl_GlobalInvocationID.x];
-    memoryBarrierShared();
     barrier();
     _44.out_data[gl_GlobalInvocationID.x] = sShared[3u - gl_LocalInvocationIndex];
 }
diff --git a/reference/opt/shaders/comp/struct-packing.comp b/reference/opt/shaders/comp/struct-packing.comp
index cd1eda1b32b..f4b58342d48 100644
--- a/reference/opt/shaders/comp/struct-packing.comp
+++ b/reference/opt/shaders/comp/struct-packing.comp
@@ -43,48 +43,6 @@ struct Content
     S4 m3s[8];
 };
 
-struct S0_1
-{
-    vec2 a[1];
-    float b;
-};
-
-struct S1_1
-{
-    vec3 a;
-    float b;
-};
-
-struct S2_1
-{
-    vec3 a[1];
-    float b;
-};
-
-struct S3_1
-{
-    vec2 a;
-    float b;
-};
-
-struct S4_1
-{
-    vec2 c;
-};
-
-struct Content_1
-{
-    S0_1 m0s[1];
-    S1_1 m1s[1];
-    S2_1 m2s[1];
-    S0_1 m0;
-    S1_1 m1;
-    S2_1 m2;
-    S3_1 m3;
-    float m4;
-    S4_1 m3s[8];
-};
-
 layout(binding = 1, std430) restrict buffer SSBO1
 {
     Content content;
@@ -103,9 +61,9 @@ layout(binding = 1, std430) restrict buffer SSBO1
 
 layout(binding = 0, std140) restrict buffer SSBO0
 {
-    Content_1 content;
-    Content_1 content1[2];
-    Content_1 content2;
+    Content content;
+    Content content1[2];
+    Content content2;
     mat2 m0;
     mat2 m1;
     mat2x3 m2[4];
diff --git a/reference/opt/shaders/comp/torture-loop.comp b/reference/opt/shaders/comp/torture-loop.comp
index 5943966c059..9ca2b9591f0 100644
--- a/reference/opt/shaders/comp/torture-loop.comp
+++ b/reference/opt/shaders/comp/torture-loop.comp
@@ -14,27 +14,27 @@ layout(binding = 1, std430) writeonly buffer SSBO2
 
 void main()
 {
-    vec4 _99;
-    _99 = _24.in_data[gl_GlobalInvocationID.x];
-    for (int _93 = 0; (_93 + 1) < 10; )
+    vec4 _101;
+    _101 = _24.in_data[gl_GlobalInvocationID.x];
+    for (int _95 = 0; (_95 + 1) < 10; )
     {
-        _99 *= 2.0;
-        _93 += 2;
+        _101 *= 2.0;
+        _95 += 2;
         continue;
     }
-    vec4 _98;
-    _98 = _99;
-    vec4 _103;
-    for (uint _94 = 0u; _94 < 16u; _98 = _103, _94++)
+    vec4 _100;
+    _100 = _101;
+    vec4 _105;
+    for (uint _96 = 0u; _96 < 16u; _100 = _105, _96++)
     {
-        _103 = _98;
-        for (uint _100 = 0u; _100 < 30u; )
+        _105 = _100;
+        for (uint _102 = 0u; _102 < 30u; )
         {
-            _103 = _24.mvp * _103;
-            _100++;
+            _105 = _24.mvp * _105;
+            _102++;
             continue;
         }
     }
-    _89.out_data[gl_GlobalInvocationID.x] = _98;
+    _89.out_data[gl_GlobalInvocationID.x] = _100;
 }
 
diff --git a/reference/opt/shaders/desktop-only/comp/enhanced-layouts.comp b/reference/opt/shaders/desktop-only/comp/enhanced-layouts.comp
index 45b25064b6b..ba37ca237b8 100644
--- a/reference/opt/shaders/desktop-only/comp/enhanced-layouts.comp
+++ b/reference/opt/shaders/desktop-only/comp/enhanced-layouts.comp
@@ -8,13 +8,6 @@ struct Foo
     int c;
 };
 
-struct Foo_1
-{
-    int a;
-    int b;
-    int c;
-};
-
 layout(binding = 1, std140) buffer SSBO1
 {
     layout(offset = 4) int a;
@@ -27,7 +20,7 @@ layout(binding = 2, std430) buffer SSBO2
 {
     layout(offset = 4) int a;
     layout(offset = 8) int b;
-    layout(offset = 16) Foo_1 foo;
+    layout(offset = 16) Foo foo;
     layout(offset = 48) int c[8];
 } ssbo2;
 
diff --git a/reference/opt/shaders/desktop-only/comp/int64.desktop.comp b/reference/opt/shaders/desktop-only/comp/int64.desktop.comp
index 702456b303f..28afc2fbd7d 100644
--- a/reference/opt/shaders/desktop-only/comp/int64.desktop.comp
+++ b/reference/opt/shaders/desktop-only/comp/int64.desktop.comp
@@ -1,5 +1,11 @@
 #version 450
+#if defined(GL_ARB_gpu_shader_int64)
 #extension GL_ARB_gpu_shader_int64 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
 struct M0
diff --git a/reference/opt/shaders/desktop-only/frag/image-size.frag b/reference/opt/shaders/desktop-only/frag/image-size.frag
new file mode 100644
index 00000000000..5bb060398ed
--- /dev/null
+++ b/reference/opt/shaders/desktop-only/frag/image-size.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(binding = 0, r32f) uniform readonly writeonly image2D uImage1;
+layout(binding = 1, r32f) uniform readonly writeonly image2D uImage2;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(vec2(imageSize(uImage1)), vec2(imageSize(uImage2)));
+}
+
diff --git a/reference/opt/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag b/reference/opt/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag
new file mode 100644
index 00000000000..1d9062064a8
--- /dev/null
+++ b/reference/opt/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(binding = 0, r32f) uniform image2D uImage1;
+layout(binding = 1, r32f) uniform image2D uImage2;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(vec2(imageSize(uImage1)), vec2(imageSize(uImage2)));
+}
+
diff --git a/reference/opt/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag b/reference/opt/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag
index d5e45bda431..334a6b19446 100644
--- a/reference/opt/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag
+++ b/reference/opt/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag
@@ -13,14 +13,15 @@ layout(location = 0) in vec3 vClip3;
 
 void main()
 {
-    vec4 _20 = vClip4;
-    _20.y = vClip4.w;
-    FragColor = textureProj(uShadow1D, vec4(_20.x, 0.0, vClip4.z, _20.y));
-    vec4 _30 = vClip4;
-    _30.z = vClip4.w;
-    FragColor = textureProj(uShadow2D, vec4(_30.xy, vClip4.z, _30.z));
+    vec4 _17 = vClip4;
+    vec4 _20 = _17;
+    _20.y = _17.w;
+    FragColor = textureProj(uShadow1D, vec4(_20.x, 0.0, _17.z, _20.y));
+    vec4 _30 = _17;
+    _30.z = _17.w;
+    FragColor = textureProj(uShadow2D, vec4(_30.xy, _17.z, _30.z));
     FragColor = textureProj(uSampler1D, vClip2).x;
     FragColor = textureProj(uSampler2D, vClip3).x;
-    FragColor = textureProj(uSampler3D, vClip4).x;
+    FragColor = textureProj(uSampler3D, _17).x;
 }
 
diff --git a/reference/opt/shaders/desktop-only/tesc/basic.desktop.sso.tesc b/reference/opt/shaders/desktop-only/tesc/basic.desktop.sso.tesc
index 5e958256af5..c51699db6db 100644
--- a/reference/opt/shaders/desktop-only/tesc/basic.desktop.sso.tesc
+++ b/reference/opt/shaders/desktop-only/tesc/basic.desktop.sso.tesc
@@ -4,7 +4,7 @@ layout(vertices = 1) out;
 in gl_PerVertex
 {
     vec4 gl_Position;
-} gl_in[gl_MaxPatchVertices];
+} gl_in[];
 
 out gl_PerVertex
 {
diff --git a/reference/opt/shaders/desktop-only/tese/triangle.desktop.sso.tese b/reference/opt/shaders/desktop-only/tese/triangle.desktop.sso.tese
index 31027dae80f..c9bacd464e4 100644
--- a/reference/opt/shaders/desktop-only/tese/triangle.desktop.sso.tese
+++ b/reference/opt/shaders/desktop-only/tese/triangle.desktop.sso.tese
@@ -4,7 +4,7 @@ layout(triangles, cw, fractional_even_spacing) in;
 in gl_PerVertex
 {
     vec4 gl_Position;
-} gl_in[gl_MaxPatchVertices];
+} gl_in[];
 
 out gl_PerVertex
 {
diff --git a/reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert b/reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert
new file mode 100644
index 00000000000..2b3c5ce0516
--- /dev/null
+++ b/reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert
@@ -0,0 +1,24 @@
+#version 450
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
+
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseVertex gl_BaseVertexARB
+#else
+uniform int SPIRV_Cross_BaseVertex;
+#endif
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
+uniform int SPIRV_Cross_BaseInstance;
+#endif
+#ifndef GL_ARB_shader_draw_parameters
+#error GL_ARB_shader_draw_parameters is not supported.
+#endif
+
+void main()
+{
+    gl_Position = vec4(float(SPIRV_Cross_BaseVertex), float(SPIRV_Cross_BaseInstance), float(gl_DrawIDARB), 1.0);
+}
+
diff --git a/reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vert b/reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert.vk
similarity index 100%
rename from reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vert
rename to reference/opt/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert.vk
diff --git a/reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert b/reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert
new file mode 100644
index 00000000000..bc16d0431aa
--- /dev/null
+++ b/reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert
@@ -0,0 +1,24 @@
+#version 460
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
+
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseVertex gl_BaseVertexARB
+#else
+uniform int SPIRV_Cross_BaseVertex;
+#endif
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
+uniform int SPIRV_Cross_BaseInstance;
+#endif
+#ifndef GL_ARB_shader_draw_parameters
+#error GL_ARB_shader_draw_parameters is not supported.
+#endif
+
+void main()
+{
+    gl_Position = vec4(float(SPIRV_Cross_BaseVertex), float(SPIRV_Cross_BaseInstance), float(gl_DrawIDARB), 1.0);
+}
+
diff --git a/reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vert b/reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert.vk
similarity index 100%
rename from reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vert
rename to reference/opt/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert.vk
diff --git a/reference/opt/shaders/frag/16bit-constants.frag b/reference/opt/shaders/frag/16bit-constants.frag
deleted file mode 100644
index 57d8256138b..00000000000
--- a/reference/opt/shaders/frag/16bit-constants.frag
+++ /dev/null
@@ -1,25 +0,0 @@
-#version 450
-#if defined(GL_AMD_gpu_shader_half_float)
-#extension GL_AMD_gpu_shader_half_float : require
-#elif defined(GL_NV_gpu_shader5)
-#extension GL_NV_gpu_shader5 : require
-#else
-#error No extension available for FP16.
-#endif
-#if defined(GL_AMD_gpu_shader_int16)
-#extension GL_AMD_gpu_shader_int16 : require
-#else
-#error No extension available for Int16.
-#endif
-
-layout(location = 0) out float16_t foo;
-layout(location = 1) out int16_t bar;
-layout(location = 2) out uint16_t baz;
-
-void main()
-{
-    foo = float16_t(1.0);
-    bar = 2s;
-    baz = 3us;
-}
-
diff --git a/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag b/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag
new file mode 100644
index 00000000000..8eaea64e630
--- /dev/null
+++ b/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag
@@ -0,0 +1,29 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(binding = 1, std140) uniform Count
+{
+    float count;
+} _44;
+
+layout(binding = 0) uniform mediump sampler2D tex;
+
+layout(location = 0) in highp vec4 vertex;
+layout(location = 0) out vec4 fragColor;
+
+void main()
+{
+    highp float _24 = 1.0 / float(textureSize(tex, 0).x);
+    highp float _34 = dFdx(vertex.x);
+    float _62;
+    _62 = 0.0;
+    for (float _61 = 0.0; _61 < _44.count; )
+    {
+        _62 = _24 * _34 + _62;
+        _61 += 1.0;
+        continue;
+    }
+    fragColor = vec4(_62);
+}
+
diff --git a/reference/opt/shaders/frag/barycentric-khr.frag b/reference/opt/shaders/frag/barycentric-khr.frag
new file mode 100644
index 00000000000..71a44c38575
--- /dev/null
+++ b/reference/opt/shaders/frag/barycentric-khr.frag
@@ -0,0 +1,13 @@
+#version 450
+#extension GL_EXT_fragment_shader_barycentric : require
+
+layout(location = 0) out vec2 value;
+layout(location = 0) pervertexEXT in vec2 vUV[3];
+layout(location = 3) pervertexEXT in vec2 vUV2[3];
+
+void main()
+{
+    value = ((vUV[0] * gl_BaryCoordEXT.x) + (vUV[1] * gl_BaryCoordEXT.y)) + (vUV[2] * gl_BaryCoordEXT.z);
+    value += (((vUV2[0] * gl_BaryCoordNoPerspEXT.x) + (vUV2[1] * gl_BaryCoordNoPerspEXT.y)) + (vUV2[2] * gl_BaryCoordNoPerspEXT.z));
+}
+
diff --git a/reference/opt/shaders/frag/barycentric-nv.frag b/reference/opt/shaders/frag/barycentric-nv.frag
index cc3b4de27c9..b3b57e2f880 100644
--- a/reference/opt/shaders/frag/barycentric-nv.frag
+++ b/reference/opt/shaders/frag/barycentric-nv.frag
@@ -1,19 +1,13 @@
 #version 450
 #extension GL_NV_fragment_shader_barycentric : require
 
-layout(binding = 0, std430) readonly buffer Vertices
-{
-    vec2 uvs[];
-} _19;
-
 layout(location = 0) out vec2 value;
+layout(location = 0) pervertexNV in vec2 vUV[3];
+layout(location = 1) pervertexNV in vec2 vUV2[3];
 
 void main()
 {
-    int _23 = 3 * gl_PrimitiveID;
-    int _32 = _23 + 1;
-    int _39 = _23 + 2;
-    value = ((_19.uvs[_23] * gl_BaryCoordNV.x) + (_19.uvs[_32] * gl_BaryCoordNV.y)) + (_19.uvs[_39] * gl_BaryCoordNV.z);
-    value += (((_19.uvs[_23] * gl_BaryCoordNoPerspNV.x) + (_19.uvs[_32] * gl_BaryCoordNoPerspNV.y)) + (_19.uvs[_39] * gl_BaryCoordNoPerspNV.z));
+    value = ((vUV[0] * gl_BaryCoordNV.x) + (vUV[1] * gl_BaryCoordNV.y)) + (vUV[2] * gl_BaryCoordNV.z);
+    value += (((vUV2[0] * gl_BaryCoordNoPerspNV.x) + (vUV2[1] * gl_BaryCoordNoPerspNV.y)) + (vUV2[2] * gl_BaryCoordNoPerspNV.z));
 }
 
diff --git a/reference/opt/shaders/frag/constant-array.frag b/reference/opt/shaders/frag/constant-array.frag
index 914888aaf6f..a7a064a163c 100644
--- a/reference/opt/shaders/frag/constant-array.frag
+++ b/reference/opt/shaders/frag/constant-array.frag
@@ -17,6 +17,10 @@ layout(location = 0) flat in mediump int index;
 
 void main()
 {
-    FragColor = ((_37[index] + _55[index][index + 1]) + vec4(30.0)) + vec4(_75[index].a + _75[index].b);
+    highp float _106 = _75[index].a;
+    float mp_copy_106 = _106;
+    highp float _107 = _75[index].b;
+    float mp_copy_107 = _107;
+    FragColor = ((_37[index] + _55[index][index + 1]) + vec4(30.0)) + vec4(mp_copy_106 + mp_copy_107);
 }
 
diff --git a/reference/opt/shaders/frag/for-loop-init.frag b/reference/opt/shaders/frag/for-loop-init.frag
index 3aee71c7a14..6c2dfb50943 100644
--- a/reference/opt/shaders/frag/for-loop-init.frag
+++ b/reference/opt/shaders/frag/for-loop-init.frag
@@ -6,62 +6,60 @@ layout(location = 0) out mediump int FragColor;
 
 void main()
 {
-    mediump int _145;
-    for (;;)
+    do
     {
         FragColor = 16;
-        _145 = 0;
-        for (; _145 < 25; )
+        for (mediump int _143 = 0; _143 < 25; )
         {
             FragColor += 10;
-            _145++;
+            _143++;
             continue;
         }
-        for (mediump int _146 = 1; _146 < 30; )
+        for (mediump int _144 = 1; _144 < 30; )
         {
             FragColor += 11;
-            _146++;
+            _144++;
             continue;
         }
-        mediump int _147;
-        _147 = 0;
-        for (; _147 < 20; )
+        mediump int _145;
+        _145 = 0;
+        for (; _145 < 20; )
         {
             FragColor += 12;
-            _147++;
+            _145++;
             continue;
         }
-        mediump int _62 = _147 + 3;
+        mediump int _62 = _145 + 3;
         FragColor += _62;
         if (_62 == 40)
         {
-            for (mediump int _151 = 0; _151 < 40; )
+            for (mediump int _149 = 0; _149 < 40; )
             {
                 FragColor += 13;
-                _151++;
+                _149++;
                 continue;
             }
             break;
         }
         FragColor += _62;
-        mediump ivec2 _148;
-        _148 = ivec2(0);
-        for (; _148.x < 10; )
+        mediump ivec2 _146;
+        _146 = ivec2(0);
+        for (; _146.x < 10; )
         {
-            FragColor += _148.y;
-            mediump ivec2 _144 = _148;
-            _144.x = _148.x + 4;
-            _148 = _144;
+            FragColor += _146.y;
+            mediump ivec2 _142 = _146;
+            _142.x = _146.x + 4;
+            _146 = _142;
             continue;
         }
-        for (mediump int _150 = _62; _150 < 40; )
+        for (mediump int _148 = _62; _148 < 40; )
         {
-            FragColor += _150;
-            _150++;
+            FragColor += _148;
+            _148++;
             continue;
         }
         FragColor += _62;
         break;
-    }
+    } while(false);
 }
 
diff --git a/reference/opt/shaders/frag/frexp-modf.frag b/reference/opt/shaders/frag/frexp-modf.frag
index 25f3360aaa2..134878e0340 100644
--- a/reference/opt/shaders/frag/frexp-modf.frag
+++ b/reference/opt/shaders/frag/frexp-modf.frag
@@ -22,12 +22,14 @@ void main()
 {
     ResType _22;
     _22._m0 = frexp(v0 + 1.0, _22._m1);
+    highp float _24 = _22._m0;
+    float mp_copy_24 = _24;
     ResType_1 _35;
     _35._m0 = frexp(v1, _35._m1);
     float r0;
     float _41 = modf(v0, r0);
     vec2 r1;
     vec2 _45 = modf(v1, r1);
-    FragColor = ((((_22._m0 + _35._m0.x) + _35._m0.y) + _41) + _45.x) + _45.y;
+    FragColor = ((((mp_copy_24 + _35._m0.x) + _35._m0.y) + _41) + _45.x) + _45.y;
 }
 
diff --git a/reference/opt/shaders/frag/ground.frag b/reference/opt/shaders/frag/ground.frag
index f59a402fe37..d28f93efff3 100644
--- a/reference/opt/shaders/frag/ground.frag
+++ b/reference/opt/shaders/frag/ground.frag
@@ -26,10 +26,13 @@ void main()
     vec3 _68 = normalize((texture(TexNormalmap, TexCoord).xyz * 2.0) - vec3(1.0));
     float _113 = smoothstep(0.0, 0.1500000059604644775390625, (_101.g_CamPos.y + EyeVec.y) * 0.004999999888241291046142578125);
     float _125 = smoothstep(0.699999988079071044921875, 0.75, _68.y);
+    highp float hp_copy_125 = _125;
     vec3 _130 = mix(vec3(0.100000001490116119384765625), mix(vec3(0.100000001490116119384765625, 0.300000011920928955078125, 0.100000001490116119384765625), vec3(0.800000011920928955078125), vec3(_113)), vec3(_125));
+    highp float _172 = -hp_copy_125;
+    float mp_copy_172 = _172;
     LightingOut = vec4(0.0);
     NormalOut = vec4((_68 * 0.5) + vec3(0.5), 0.0);
-    SpecularOut = vec4(1.0 - (_125 * _113), 0.0, 0.0, 0.0);
+    SpecularOut = vec4(mp_copy_172 * _113 + 1.0, 0.0, 0.0, 0.0);
     AlbedoOut = vec4(_130 * _130, 1.0);
 }
 
diff --git a/reference/opt/shaders/frag/helper-invocation.frag b/reference/opt/shaders/frag/helper-invocation.frag
index 759a21bdc5a..0c44f72ad6e 100644
--- a/reference/opt/shaders/frag/helper-invocation.frag
+++ b/reference/opt/shaders/frag/helper-invocation.frag
@@ -9,15 +9,15 @@ layout(location = 0) out vec4 FragColor;
 
 void main()
 {
-    vec4 _51;
+    vec4 _52;
     if (!gl_HelperInvocation)
     {
-        _51 = textureLod(uSampler, vUV, 0.0);
+        _52 = textureLod(uSampler, vUV, 0.0);
     }
     else
     {
-        _51 = vec4(1.0);
+        _52 = vec4(1.0);
     }
-    FragColor = _51;
+    FragColor = _52;
 }
 
diff --git a/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag b/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
index 91d7e37cdd2..050218b13b3 100644
--- a/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
+++ b/reference/opt/shaders/frag/hoisted-temporary-use-continue-block-as-value.frag
@@ -9,20 +9,20 @@ layout(location = 1) flat in mediump int vB;
 void main()
 {
     FragColor = vec4(0.0);
+    mediump int _49;
     mediump int _58;
-    for (mediump int _57 = 0, _60 = 0; _57 < vA; FragColor += vec4(1.0), _60 = _58, _57 += (_58 + 10))
+    for (mediump int _57 = 0, _60 = 0; _57 < vA; _60 = _58, _57 += _49)
     {
         if ((vA + _57) == 20)
         {
             _58 = 50;
-            continue;
         }
         else
         {
             _58 = ((vB + _57) == 40) ? 60 : _60;
-            continue;
         }
-        continue;
+        _49 = _58 + 10;
+        FragColor += vec4(1.0);
     }
 }
 
diff --git a/reference/opt/shaders/frag/image-load-store-uint-coord.asm.frag b/reference/opt/shaders/frag/image-load-store-uint-coord.asm.frag
index 5dfb4d0028c..f25b4b738ac 100644
--- a/reference/opt/shaders/frag/image-load-store-uint-coord.asm.frag
+++ b/reference/opt/shaders/frag/image-load-store-uint-coord.asm.frag
@@ -10,8 +10,8 @@ layout(location = 0) out vec4 _entryPointOutput;
 void main()
 {
     imageStore(RWIm, ivec2(uvec2(10u)), vec4(10.0, 0.5, 8.0, 2.0));
-    vec4 _69 = imageLoad(RWIm, ivec2(uvec2(30u)));
-    imageStore(RWBuf, int(80u), _69);
-    _entryPointOutput = (_69 + texelFetch(ROIm, ivec2(uvec2(50u, 60u)), 0)) + texelFetch(ROBuf, int(80u));
+    vec4 _70 = imageLoad(RWIm, ivec2(uvec2(30u)));
+    imageStore(RWBuf, int(80u), _70);
+    _entryPointOutput = (_70 + texelFetch(ROIm, ivec2(uvec2(50u, 60u)), 0)) + texelFetch(ROBuf, int(80u));
 }
 
diff --git a/reference/opt/shaders/frag/loop-dominator-and-switch-default.frag b/reference/opt/shaders/frag/loop-dominator-and-switch-default.frag
index a9457f22d2b..ee64d8335a3 100644
--- a/reference/opt/shaders/frag/loop-dominator-and-switch-default.frag
+++ b/reference/opt/shaders/frag/loop-dominator-and-switch-default.frag
@@ -2,10 +2,10 @@
 precision mediump float;
 precision highp int;
 
-layout(location = 0) out vec4 fragColor;
-
 vec4 _80;
 
+layout(location = 0) out vec4 fragColor;
+
 void main()
 {
     mediump int _18 = int(_80.x);
@@ -33,26 +33,15 @@ void main()
             }
             default:
             {
-                mediump int _84;
                 vec4 _88;
                 _88 = _82;
-                _84 = 0;
-                mediump int _50;
-                for (;;)
+                for (mediump int _84 = 0; _84 < _18; )
                 {
-                    _50 = _84 + 1;
-                    if (_84 < _18)
-                    {
-                        vec4 _72 = _88;
-                        _72.y = _88.y + 0.5;
-                        _88 = _72;
-                        _84 = _50;
-                        continue;
-                    }
-                    else
-                    {
-                        break;
-                    }
+                    vec4 _72 = _88;
+                    _72.y = _88.y + 0.5;
+                    _88 = _72;
+                    _84++;
+                    continue;
                 }
                 _89 = _88;
                 continue;
@@ -61,7 +50,6 @@ void main()
         vec4 _79 = _83;
         _79.y = _83.y + 0.5;
         _89 = _79;
-        continue;
     }
     fragColor = _82;
 }
diff --git a/reference/opt/shaders/frag/modf-pointer-function-analysis.frag b/reference/opt/shaders/frag/modf-pointer-function-analysis.frag
new file mode 100644
index 00000000000..07160bbdece
--- /dev/null
+++ b/reference/opt/shaders/frag/modf-pointer-function-analysis.frag
@@ -0,0 +1,18 @@
+#version 450
+
+layout(location = 0) in vec4 v;
+layout(location = 0) out vec4 vo0;
+layout(location = 1) out vec4 vo1;
+
+void main()
+{
+    vec4 param;
+    vec4 _59 = modf(v, param);
+    vo0 = _59;
+    vo1 = param;
+    vec4 param_1 = param;
+    float _65 = modf(v.x, param_1.x);
+    vo0.x += _65;
+    vo1.x += param_1.x;
+}
+
diff --git a/reference/opt/shaders/frag/pixel-interlock-ordered.frag b/reference/opt/shaders/frag/pixel-interlock-ordered.frag
new file mode 100644
index 00000000000..915b56511f2
--- /dev/null
+++ b/reference/opt/shaders/frag/pixel-interlock-ordered.frag
@@ -0,0 +1,35 @@
+#version 450
+#ifdef GL_ARB_fragment_shader_interlock
+#extension GL_ARB_fragment_shader_interlock : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
+#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
+#elif defined(GL_INTEL_fragment_shader_ordering)
+#extension GL_INTEL_fragment_shader_ordering : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
+#define SPIRV_Cross_endInvocationInterlock()
+#endif
+#if defined(GL_ARB_fragment_shader_interlock)
+layout(pixel_interlock_ordered) in;
+#elif !defined(GL_INTEL_fragment_shader_ordering)
+#error Fragment Shader Interlock/Ordering extension missing!
+#endif
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    SPIRV_Cross_beginInvocationInterlock();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    SPIRV_Cross_endInvocationInterlock();
+}
+
diff --git a/reference/opt/shaders/frag/pixel-interlock-unordered.frag b/reference/opt/shaders/frag/pixel-interlock-unordered.frag
new file mode 100644
index 00000000000..13962daf19d
--- /dev/null
+++ b/reference/opt/shaders/frag/pixel-interlock-unordered.frag
@@ -0,0 +1,35 @@
+#version 450
+#ifdef GL_ARB_fragment_shader_interlock
+#extension GL_ARB_fragment_shader_interlock : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
+#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
+#elif defined(GL_INTEL_fragment_shader_ordering)
+#extension GL_INTEL_fragment_shader_ordering : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
+#define SPIRV_Cross_endInvocationInterlock()
+#endif
+#if defined(GL_ARB_fragment_shader_interlock)
+layout(pixel_interlock_unordered) in;
+#elif !defined(GL_INTEL_fragment_shader_ordering)
+#error Fragment Shader Interlock/Ordering extension missing!
+#endif
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    SPIRV_Cross_beginInvocationInterlock();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    SPIRV_Cross_endInvocationInterlock();
+}
+
diff --git a/reference/opt/shaders/frag/post-depth-coverage-es.frag b/reference/opt/shaders/frag/post-depth-coverage-es.frag
new file mode 100644
index 00000000000..d086560e5d2
--- /dev/null
+++ b/reference/opt/shaders/frag/post-depth-coverage-es.frag
@@ -0,0 +1,14 @@
+#version 310 es
+#extension GL_EXT_post_depth_coverage : require
+#extension GL_OES_sample_variables : require
+precision mediump float;
+precision highp int;
+layout(early_fragment_tests, post_depth_coverage) in;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(float(gl_SampleMaskIn[0]));
+}
+
diff --git a/reference/opt/shaders/frag/post-depth-coverage.frag b/reference/opt/shaders/frag/post-depth-coverage.frag
new file mode 100644
index 00000000000..caca9c03cb5
--- /dev/null
+++ b/reference/opt/shaders/frag/post-depth-coverage.frag
@@ -0,0 +1,15 @@
+#version 450
+#if defined(GL_ARB_post_depth_coverge)
+#extension GL_ARB_post_depth_coverage : require
+#else
+#extension GL_EXT_post_depth_coverage : require
+#endif
+layout(early_fragment_tests, post_depth_coverage) in;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(float(gl_SampleMaskIn[0]));
+}
+
diff --git a/reference/opt/shaders/frag/round-even.frag b/reference/opt/shaders/frag/round-even.frag
new file mode 100644
index 00000000000..ab6f37adc14
--- /dev/null
+++ b/reference/opt/shaders/frag/round-even.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vA;
+layout(location = 1) in float vB;
+
+void main()
+{
+    FragColor = roundEven(vA);
+    FragColor *= roundEven(vB);
+}
+
diff --git a/reference/opt/shaders/frag/round.frag b/reference/opt/shaders/frag/round.frag
new file mode 100644
index 00000000000..0f1fc0db0f3
--- /dev/null
+++ b/reference/opt/shaders/frag/round.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vA;
+layout(location = 1) in float vB;
+
+void main()
+{
+    FragColor = round(vA);
+    FragColor *= round(vB);
+}
+
diff --git a/reference/opt/shaders/frag/sample-interlock-ordered.frag b/reference/opt/shaders/frag/sample-interlock-ordered.frag
new file mode 100644
index 00000000000..9d5f90e4aaf
--- /dev/null
+++ b/reference/opt/shaders/frag/sample-interlock-ordered.frag
@@ -0,0 +1,35 @@
+#version 450
+#ifdef GL_ARB_fragment_shader_interlock
+#extension GL_ARB_fragment_shader_interlock : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
+#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
+#elif defined(GL_INTEL_fragment_shader_ordering)
+#extension GL_INTEL_fragment_shader_ordering : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
+#define SPIRV_Cross_endInvocationInterlock()
+#endif
+#if defined(GL_ARB_fragment_shader_interlock)
+layout(sample_interlock_ordered) in;
+#elif !defined(GL_INTEL_fragment_shader_ordering)
+#error Fragment Shader Interlock/Ordering extension missing!
+#endif
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    SPIRV_Cross_beginInvocationInterlock();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0]));
+    SPIRV_Cross_endInvocationInterlock();
+}
+
diff --git a/reference/opt/shaders/frag/sample-interlock-unordered.frag b/reference/opt/shaders/frag/sample-interlock-unordered.frag
new file mode 100644
index 00000000000..441198814e0
--- /dev/null
+++ b/reference/opt/shaders/frag/sample-interlock-unordered.frag
@@ -0,0 +1,35 @@
+#version 450
+#ifdef GL_ARB_fragment_shader_interlock
+#extension GL_ARB_fragment_shader_interlock : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
+#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
+#elif defined(GL_INTEL_fragment_shader_ordering)
+#extension GL_INTEL_fragment_shader_ordering : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
+#define SPIRV_Cross_endInvocationInterlock()
+#endif
+#if defined(GL_ARB_fragment_shader_interlock)
+layout(sample_interlock_unordered) in;
+#elif !defined(GL_INTEL_fragment_shader_ordering)
+#error Fragment Shader Interlock/Ordering extension missing!
+#endif
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    SPIRV_Cross_beginInvocationInterlock();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    SPIRV_Cross_endInvocationInterlock();
+}
+
diff --git a/reference/opt/shaders/frag/selection-block-dominator.frag b/reference/opt/shaders/frag/selection-block-dominator.frag
index f737f489172..50a5a371c64 100644
--- a/reference/opt/shaders/frag/selection-block-dominator.frag
+++ b/reference/opt/shaders/frag/selection-block-dominator.frag
@@ -5,7 +5,7 @@ layout(location = 0) out vec4 FragColor;
 
 void main()
 {
-    for (;;)
+    do
     {
         if (vIndex != 1)
         {
@@ -14,6 +14,6 @@ void main()
         }
         FragColor = vec4(10.0);
         break;
-    }
+    } while(false);
 }
 
diff --git a/reference/opt/shaders/frag/struct-type-unrelated-alias.frag b/reference/opt/shaders/frag/struct-type-unrelated-alias.frag
new file mode 100644
index 00000000000..d6fa667f3b4
--- /dev/null
+++ b/reference/opt/shaders/frag/struct-type-unrelated-alias.frag
@@ -0,0 +1,9 @@
+#version 450
+
+layout(location = 0) out float FragColor;
+
+void main()
+{
+    FragColor = 30.0;
+}
+
diff --git a/reference/opt/shaders/frag/switch-unreachable-break.frag b/reference/opt/shaders/frag/switch-unreachable-break.frag
new file mode 100644
index 00000000000..111a4d4be32
--- /dev/null
+++ b/reference/opt/shaders/frag/switch-unreachable-break.frag
@@ -0,0 +1,37 @@
+#version 450
+
+layout(binding = 0, std140) uniform UBO
+{
+    int cond;
+    int cond2;
+} _13;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    bool _49;
+    switch (_13.cond)
+    {
+        case 1:
+        {
+            if (_13.cond2 < 50)
+            {
+                _49 = false;
+                break;
+            }
+            else
+            {
+                discard;
+            }
+            break; // unreachable workaround
+        }
+        default:
+        {
+            _49 = true;
+            break;
+        }
+    }
+    FragColor = mix(vec4(20.0), vec4(10.0), bvec4(_49));
+}
+
diff --git a/reference/opt/shaders/frag/swizzle.frag b/reference/opt/shaders/frag/swizzle.frag
index a229e5b0d5e..51f5b198957 100644
--- a/reference/opt/shaders/frag/swizzle.frag
+++ b/reference/opt/shaders/frag/swizzle.frag
@@ -10,9 +10,11 @@ layout(location = 1) in vec3 vNormal;
 
 void main()
 {
-    FragColor = vec4(texture(samp, vUV).xyz, 1.0);
-    FragColor = vec4(texture(samp, vUV).xz, 1.0, 4.0);
-    FragColor = vec4(texture(samp, vUV).xx, texture(samp, vUV + vec2(0.100000001490116119384765625)).yy);
+    vec4 _19 = texture(samp, vUV);
+    float _23 = _19.x;
+    FragColor = vec4(_23, _19.yz, 1.0);
+    FragColor = vec4(_23, _19.z, 1.0, 4.0);
+    FragColor = vec4(_23, _23, texture(samp, vUV + vec2(0.100000001490116119384765625)).yy);
     FragColor = vec4(vNormal, 1.0);
     FragColor = vec4(vNormal + vec3(1.7999999523162841796875), 1.0);
     FragColor = vec4(vUV, vUV + vec2(1.7999999523162841796875));
diff --git a/reference/opt/shaders/frag/texel-fetch-offset.frag b/reference/opt/shaders/frag/texel-fetch-offset.frag
index 416f764d43f..520b4ee88b7 100644
--- a/reference/opt/shaders/frag/texel-fetch-offset.frag
+++ b/reference/opt/shaders/frag/texel-fetch-offset.frag
@@ -8,7 +8,7 @@ layout(location = 0) out vec4 FragColor;
 
 void main()
 {
-    mediump ivec2 _22 = ivec2(gl_FragCoord.xy);
+    ivec2 _22 = ivec2(gl_FragCoord.xy);
     FragColor = texelFetchOffset(uTexture, _22, 0, ivec2(1));
     FragColor += texelFetchOffset(uTexture, _22, 0, ivec2(-1, 1));
 }
diff --git a/reference/opt/shaders/frag/ubo-load-row-major-workaround.frag b/reference/opt/shaders/frag/ubo-load-row-major-workaround.frag
new file mode 100644
index 00000000000..90b000f94f0
--- /dev/null
+++ b/reference/opt/shaders/frag/ubo-load-row-major-workaround.frag
@@ -0,0 +1,46 @@
+#version 450
+
+struct RowMajor
+{
+    mat4 B;
+};
+
+struct NestedRowMajor
+{
+    RowMajor rm;
+};
+
+layout(binding = 2, std140) uniform UBO3
+{
+    layout(row_major) NestedRowMajor rm2;
+} _17;
+
+layout(binding = 1, std140) uniform UBO2
+{
+    layout(row_major) RowMajor rm;
+} _35;
+
+layout(binding = 0, std140) uniform UBO
+{
+    layout(row_major) mat4 A;
+    mat4 C;
+} _42;
+
+layout(binding = 3, std140) uniform UBONoWorkaround
+{
+    mat4 D;
+} _56;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 Clip;
+
+NestedRowMajor spvWorkaroundRowMajor(NestedRowMajor wrap) { return wrap; }
+mat4 spvWorkaroundRowMajor(mat4 wrap) { return wrap; }
+
+void main()
+{
+    FragColor = (((spvWorkaroundRowMajor(_17.rm2).rm.B * spvWorkaroundRowMajor(_35.rm.B)) * spvWorkaroundRowMajor(_42.A)) * spvWorkaroundRowMajor(_42.C)) * Clip;
+    FragColor += (_56.D * Clip);
+    FragColor = fma(_42.A[1], Clip, FragColor);
+}
+
diff --git a/reference/opt/shaders/frag/ubo_layout.frag b/reference/opt/shaders/frag/ubo_layout.frag
index 4b66e1396a7..bc0b01c065f 100644
--- a/reference/opt/shaders/frag/ubo_layout.frag
+++ b/reference/opt/shaders/frag/ubo_layout.frag
@@ -7,11 +7,6 @@ struct Str
     mat4 foo;
 };
 
-struct Str_1
-{
-    mat4 foo;
-};
-
 layout(binding = 0, std140) uniform UBO1
 {
     layout(row_major) Str foo;
@@ -19,7 +14,7 @@ layout(binding = 0, std140) uniform UBO1
 
 layout(binding = 1, std140) uniform UBO2
 {
-    Str_1 foo;
+    Str foo;
 } ubo0;
 
 layout(location = 0) out vec4 FragColor;
diff --git a/reference/opt/shaders/geom/geometry-passthrough.geom b/reference/opt/shaders/geom/geometry-passthrough.geom
new file mode 100644
index 00000000000..afbd662324f
--- /dev/null
+++ b/reference/opt/shaders/geom/geometry-passthrough.geom
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_NV_geometry_shader_passthrough : require
+layout(triangles) in;
+
+layout(passthrough, location = 0) in VertexBlock
+{
+    int a;
+    int b;
+} v1[3];
+
+layout(location = 2) in VertexBlock2
+{
+    int a;
+    layout(passthrough) int b;
+} v2[3];
+
+
+void main()
+{
+    gl_Layer = (gl_InvocationID + v1[0].a) + v2[1].b;
+}
+
diff --git a/reference/opt/shaders/geom/multi-stream.geom b/reference/opt/shaders/geom/multi-stream.geom
new file mode 100644
index 00000000000..548164d7804
--- /dev/null
+++ b/reference/opt/shaders/geom/multi-stream.geom
@@ -0,0 +1,14 @@
+#version 450
+layout(triangles) in;
+layout(max_vertices = 2, points) out;
+
+void main()
+{
+    gl_Position = gl_in[0].gl_Position;
+    EmitStreamVertex(0);
+    EndStreamPrimitive(0);
+    gl_Position = gl_in[0].gl_Position + vec4(2.0);
+    EmitStreamVertex(1);
+    EndStreamPrimitive(1);
+}
+
diff --git a/reference/opt/shaders/geom/transform-feedback-streams.geom b/reference/opt/shaders/geom/transform-feedback-streams.geom
new file mode 100644
index 00000000000..4d238b4adff
--- /dev/null
+++ b/reference/opt/shaders/geom/transform-feedback-streams.geom
@@ -0,0 +1,26 @@
+#version 450
+layout(points) in;
+layout(max_vertices = 2, points) out;
+
+layout(xfb_buffer = 1, xfb_stride = 20, stream = 1) out gl_PerVertex
+{
+    layout(xfb_offset = 4) vec4 gl_Position;
+    float gl_PointSize;
+};
+
+layout(location = 0, xfb_buffer = 2, xfb_stride = 32, xfb_offset = 16, stream = 1) out vec4 vFoo;
+layout(xfb_buffer = 3, xfb_stride = 16, stream = 2) out VertOut
+{
+    layout(location = 1, xfb_offset = 0) vec4 vBar;
+} _23;
+
+
+void main()
+{
+    gl_Position = vec4(1.0);
+    vFoo = vec4(3.0);
+    EmitStreamVertex(1);
+    _23.vBar = vec4(5.0);
+    EmitStreamVertex(2);
+}
+
diff --git a/reference/opt/shaders/legacy/fragment/explicit-lod.legacy.vert b/reference/opt/shaders/legacy/fragment/explicit-lod.legacy.vert
new file mode 100644
index 00000000000..b73faa47ab5
--- /dev/null
+++ b/reference/opt/shaders/legacy/fragment/explicit-lod.legacy.vert
@@ -0,0 +1,11 @@
+#version 100
+
+uniform mediump sampler2D tex;
+
+varying mediump vec4 FragColor;
+
+void main()
+{
+    FragColor = texture2DLod(tex, vec2(0.4000000059604644775390625, 0.60000002384185791015625), 3.0);
+}
+
diff --git a/reference/opt/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag b/reference/opt/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag
new file mode 100644
index 00000000000..10ce5a513f4
--- /dev/null
+++ b/reference/opt/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag
@@ -0,0 +1,36 @@
+#version 100
+precision mediump float;
+precision highp int;
+
+struct Foo
+{
+    highp vec4 a;
+    highp vec4 b;
+};
+
+struct Bar
+{
+    highp vec4 a;
+    highp vec4 b;
+};
+
+struct Baz
+{
+    Foo foo;
+    Bar bar;
+};
+
+varying highp vec4 baz_foo_a;
+varying highp vec4 baz_foo_b;
+varying highp vec4 baz_bar_a;
+varying highp vec4 baz_bar_b;
+varying highp vec4 _33_a_a;
+varying highp vec4 _33_a_b;
+varying highp vec4 _33_b_a;
+varying highp vec4 _33_b_b;
+
+void main()
+{
+    gl_FragData[0] = (((_33_a_a + _33_b_b) + baz_foo_b) + baz_foo_a) + baz_bar_b;
+}
+
diff --git a/reference/opt/shaders/legacy/fragment/round.legacy.frag b/reference/opt/shaders/legacy/fragment/round.legacy.frag
new file mode 100644
index 00000000000..9033bc3c56c
--- /dev/null
+++ b/reference/opt/shaders/legacy/fragment/round.legacy.frag
@@ -0,0 +1,13 @@
+#version 100
+precision mediump float;
+precision highp int;
+
+varying highp vec4 vA;
+varying highp float vB;
+
+void main()
+{
+    gl_FragData[0] = floor(vA + vec4(0.5));
+    gl_FragData[0] *= floor(vB + float(0.5));
+}
+
diff --git a/reference/opt/shaders/legacy/fragment/switch.legacy.frag b/reference/opt/shaders/legacy/fragment/switch.legacy.frag
new file mode 100644
index 00000000000..169f591a74e
--- /dev/null
+++ b/reference/opt/shaders/legacy/fragment/switch.legacy.frag
@@ -0,0 +1,77 @@
+#version 100
+precision mediump float;
+precision highp int;
+
+varying highp float vIndexF;
+
+void main()
+{
+    int _13 = int(vIndexF);
+    highp vec4 _65;
+    highp vec4 _66;
+    highp vec4 _68;
+    for (int spvDummy25 = 0; spvDummy25 < 1; spvDummy25++)
+    {
+        if (_13 == 2)
+        {
+            _68 = vec4(0.0, 2.0, 3.0, 4.0);
+            break;
+        }
+        else if ((_13 == 4) || (_13 == 5))
+        {
+            _68 = vec4(1.0, 2.0, 3.0, 4.0);
+            break;
+        }
+        else if ((_13 == 8) || (_13 == 9))
+        {
+            _68 = vec4(40.0, 20.0, 30.0, 40.0);
+            break;
+        }
+        else if (_13 == 10)
+        {
+            _65 = vec4(10.0);
+            highp vec4 _45 = _65 + vec4(1.0);
+            _66 = _45;
+            highp vec4 _48 = _66 + vec4(2.0);
+            _68 = _48;
+            break;
+        }
+        else if (_13 == 11)
+        {
+            _65 = vec4(0.0);
+            highp vec4 _45 = _65 + vec4(1.0);
+            _66 = _45;
+            highp vec4 _48 = _66 + vec4(2.0);
+            _68 = _48;
+            break;
+        }
+        else if (_13 == 12)
+        {
+            _66 = vec4(0.0);
+            highp vec4 _48 = _66 + vec4(2.0);
+            _68 = _48;
+            break;
+        }
+        else
+        {
+            _68 = vec4(10.0, 20.0, 30.0, 40.0);
+            break;
+        }
+    }
+    highp vec4 _70;
+    for (int spvDummy146 = 0; spvDummy146 < 1; spvDummy146++)
+    {
+        if ((_13 == 10) || (_13 == 20))
+        {
+            _70 = vec4(40.0);
+            break;
+        }
+        else
+        {
+            _70 = vec4(20.0);
+            break;
+        }
+    }
+    gl_FragData[0] = _68 + _70;
+}
+
diff --git a/reference/opt/shaders/legacy/vert/implicit-lod.legacy.vert b/reference/opt/shaders/legacy/vert/implicit-lod.legacy.vert
index 6e441074482..2d2050498e0 100644
--- a/reference/opt/shaders/legacy/vert/implicit-lod.legacy.vert
+++ b/reference/opt/shaders/legacy/vert/implicit-lod.legacy.vert
@@ -4,6 +4,6 @@ uniform mediump sampler2D tex;
 
 void main()
 {
-    gl_Position = texture2D(tex, vec2(0.4000000059604644775390625, 0.60000002384185791015625));
+    gl_Position = texture2DLod(tex, vec2(0.4000000059604644775390625, 0.60000002384185791015625), 0.0);
 }
 
diff --git a/reference/opt/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert b/reference/opt/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert
new file mode 100644
index 00000000000..837a11a843d
--- /dev/null
+++ b/reference/opt/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert
@@ -0,0 +1,17 @@
+#version 100
+
+struct Foo
+{
+    float a[4];
+};
+
+varying float foo_a[4];
+
+void main()
+{
+    gl_Position = vec4(1.0);
+    for (int _46 = 0; _46 < 4; foo_a[_46] = float(_46 + 2), _46++)
+    {
+    }
+}
+
diff --git a/reference/opt/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert b/reference/opt/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert
new file mode 100644
index 00000000000..cf807c41f7f
--- /dev/null
+++ b/reference/opt/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert
@@ -0,0 +1,49 @@
+#version 100
+
+struct Foo
+{
+    vec4 a;
+    vec4 b;
+};
+
+struct Bar
+{
+    vec4 a;
+    vec4 b;
+};
+
+struct Baz
+{
+    Foo foo;
+    Bar bar;
+};
+
+varying vec4 _12_a_a;
+varying vec4 _12_a_b;
+varying vec4 _12_b_a;
+varying vec4 _12_b_b;
+varying vec4 baz_foo_a;
+varying vec4 baz_foo_b;
+varying vec4 baz_bar_a;
+varying vec4 baz_bar_b;
+
+void main()
+{
+    _12_a_a = vec4(10.0);
+    _12_a_b = vec4(20.0);
+    _12_b_a = vec4(30.0);
+    _12_b_b = vec4(40.0);
+    _12_a_a = Foo(vec4(50.0), vec4(60.0)).a;
+    _12_a_b = Foo(vec4(50.0), vec4(60.0)).b;
+    _12_b_a = Bar(vec4(50.0), vec4(60.0)).a;
+    _12_b_b = Bar(vec4(50.0), vec4(60.0)).b;
+    baz_foo_a = Foo(vec4(100.0), vec4(200.0)).a;
+    baz_foo_b = Foo(vec4(100.0), vec4(200.0)).b;
+    baz_bar_a = Bar(vec4(300.0), vec4(400.0)).a;
+    baz_bar_b = Bar(vec4(300.0), vec4(400.0)).b;
+    baz_foo_a = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).foo.a;
+    baz_foo_b = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).foo.b;
+    baz_bar_a = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).bar.a;
+    baz_bar_b = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).bar.b;
+}
+
diff --git a/reference/opt/shaders/legacy/vert/struct-varying.legacy.vert b/reference/opt/shaders/legacy/vert/struct-varying.legacy.vert
index 01a3d73535e..66136d27ae7 100644
--- a/reference/opt/shaders/legacy/vert/struct-varying.legacy.vert
+++ b/reference/opt/shaders/legacy/vert/struct-varying.legacy.vert
@@ -11,18 +11,13 @@ varying vec2 vout_b;
 
 void main()
 {
-    {
-        Output vout = Output(vec4(0.5), vec2(0.25));
-        vout_a = vout.a;
-        vout_b = vout.b;
-    }
-    {
-        Output vout = Output(vec4(0.5), vec2(0.25));
-        vout_a = vout.a;
-        vout_b = vout.b;
-    }
-    vout_a = Output(vout_a, vout_b).a;
-    vout_b = Output(vout_a, vout_b).b;
+    vout_a = Output(vec4(0.5), vec2(0.25)).a;
+    vout_b = Output(vec4(0.5), vec2(0.25)).b;
+    vout_a = Output(vec4(0.5), vec2(0.25)).a;
+    vout_b = Output(vec4(0.5), vec2(0.25)).b;
+    Output _22 = Output(vout_a, vout_b);
+    vout_a = _22.a;
+    vout_b = _22.b;
     vout_a.x = 1.0;
     vout_b.y = 1.0;
 }
diff --git a/reference/opt/shaders/legacy/vert/switch-nested.legacy.vert b/reference/opt/shaders/legacy/vert/switch-nested.legacy.vert
new file mode 100644
index 00000000000..dd987e8f1b4
--- /dev/null
+++ b/reference/opt/shaders/legacy/vert/switch-nested.legacy.vert
@@ -0,0 +1,45 @@
+#version 100
+
+struct UBO
+{
+    int func_arg;
+    int inner_func_arg;
+};
+
+uniform UBO _34;
+
+void main()
+{
+    vec4 _102;
+    for (int spvDummy30 = 0; spvDummy30 < 1; spvDummy30++)
+    {
+        if (_34.func_arg != 0)
+        {
+            vec4 _101;
+            for (int spvDummy45 = 0; spvDummy45 < 1; spvDummy45++)
+            {
+                if (_34.inner_func_arg != 0)
+                {
+                    _101 = vec4(1.0);
+                    break;
+                }
+                else
+                {
+                    _101 = vec4(0.0);
+                    break;
+                }
+                break; // unreachable workaround
+            }
+            _102 = _101;
+            break;
+        }
+        else
+        {
+            _102 = vec4(0.0);
+            break;
+        }
+        break; // unreachable workaround
+    }
+    gl_Position = _102;
+}
+
diff --git a/reference/opt/shaders/legacy/vert/transpose.legacy.vert b/reference/opt/shaders/legacy/vert/transpose.legacy.vert
index 0d30c0e243b..d725bfbb092 100644
--- a/reference/opt/shaders/legacy/vert/transpose.legacy.vert
+++ b/reference/opt/shaders/legacy/vert/transpose.legacy.vert
@@ -11,8 +11,20 @@ uniform Buffer _13;
 
 attribute vec4 Position;
 
+highp mat4 spvWorkaroundRowMajor(highp mat4 wrap) { return wrap; }
+mediump mat4 spvWorkaroundRowMajorMP(mediump mat4 wrap) { return wrap; }
+
+mat4 spvTranspose(mat4 m)
+{
+    return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);
+}
+
 void main()
 {
-    gl_Position = (((_13.M * (Position * _13.MVPRowMajor)) + (_13.M * (_13.MVPColMajor * Position))) + (_13.M * (_13.MVPRowMajor * Position))) + (_13.M * (Position * _13.MVPColMajor));
+    mat4 _55 = _13.MVPRowMajor;
+    mat4 _61 = spvWorkaroundRowMajor(_13.MVPColMajor);
+    mat4 _80 = spvTranspose(_13.MVPRowMajor) * 2.0;
+    mat4 _87 = spvTranspose(_61) * 2.0;
+    gl_Position = (((((((((((spvWorkaroundRowMajor(_13.M) * (Position * _13.MVPRowMajor)) + (spvWorkaroundRowMajor(_13.M) * (spvWorkaroundRowMajor(_13.MVPColMajor) * Position))) + (spvWorkaroundRowMajor(_13.M) * (_13.MVPRowMajor * Position))) + (spvWorkaroundRowMajor(_13.M) * (Position * spvWorkaroundRowMajor(_13.MVPColMajor)))) + (_55 * Position)) + (Position * _61)) + (Position * _55)) + (_61 * Position)) + (_80 * Position)) + (_87 * Position)) + (Position * _80)) + (Position * _87);
 }
 
diff --git a/reference/opt/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk b/reference/opt/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk
new file mode 100644
index 00000000000..81f3c96ec18
--- /dev/null
+++ b/reference/opt/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk
@@ -0,0 +1,66 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+#extension GL_EXT_fragment_shading_rate : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(max_vertices = 24, max_primitives = 22, lines) out;
+
+out gl_MeshPerVertexEXT
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[2];
+} gl_MeshVerticesEXT[];
+
+struct TaskPayload
+{
+    float a;
+    float b;
+    int c;
+};
+
+layout(location = 0) out vec4 vOut[24];
+layout(location = 2) out BlockOut
+{
+    vec4 a;
+    vec4 b;
+} outputs[24];
+
+layout(location = 1) perprimitiveEXT out vec4 vPrim[22];
+layout(location = 4) perprimitiveEXT out BlockOutPrim
+{
+    vec4 a;
+    vec4 b;
+} prim_outputs[22];
+
+taskPayloadSharedEXT TaskPayload payload;
+shared float shared_float[16];
+
+void main()
+{
+    SetMeshOutputsEXT(24u, 22u);
+    vec3 _29 = vec3(gl_GlobalInvocationID);
+    float _31 = _29.x;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(_31, _29.yz, 1.0);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
+    vOut[gl_LocalInvocationIndex] = vec4(_31, _29.yz, 2.0);
+    outputs[gl_LocalInvocationIndex].a = vec4(5.0);
+    outputs[gl_LocalInvocationIndex].b = vec4(6.0);
+    barrier();
+    if (gl_LocalInvocationIndex < 22u)
+    {
+        vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0);
+        prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
+        prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
+        gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0u, 1u) + uvec2(gl_LocalInvocationIndex);
+        int _128 = int(gl_GlobalInvocationID.x);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _128;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _128 + 1;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _128 + 2;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _128 + 3;
+    }
+}
+
diff --git a/reference/opt/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk b/reference/opt/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk
new file mode 100644
index 00000000000..bacc7fdfdc5
--- /dev/null
+++ b/reference/opt/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk
@@ -0,0 +1,66 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+#extension GL_EXT_fragment_shading_rate : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(max_vertices = 24, max_primitives = 22, points) out;
+
+out gl_MeshPerVertexEXT
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[2];
+} gl_MeshVerticesEXT[];
+
+struct TaskPayload
+{
+    float a;
+    float b;
+    int c;
+};
+
+layout(location = 0) out vec4 vOut[24];
+layout(location = 2) out BlockOut
+{
+    vec4 a;
+    vec4 b;
+} outputs[24];
+
+layout(location = 1) perprimitiveEXT out vec4 vPrim[22];
+layout(location = 4) perprimitiveEXT out BlockOutPrim
+{
+    vec4 a;
+    vec4 b;
+} prim_outputs[22];
+
+taskPayloadSharedEXT TaskPayload payload;
+shared float shared_float[16];
+
+void main()
+{
+    SetMeshOutputsEXT(24u, 22u);
+    vec3 _29 = vec3(gl_GlobalInvocationID);
+    float _31 = _29.x;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(_31, _29.yz, 1.0);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
+    vOut[gl_LocalInvocationIndex] = vec4(_31, _29.yz, 2.0);
+    outputs[gl_LocalInvocationIndex].a = vec4(5.0);
+    outputs[gl_LocalInvocationIndex].b = vec4(6.0);
+    barrier();
+    if (gl_LocalInvocationIndex < 22u)
+    {
+        vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0);
+        prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
+        prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
+        gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;
+        int _124 = int(gl_GlobalInvocationID.x);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _124;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _124 + 1;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _124 + 2;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _124 + 3;
+    }
+}
+
diff --git a/reference/opt/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk b/reference/opt/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk
new file mode 100644
index 00000000000..87fd2c2b7b6
--- /dev/null
+++ b/reference/opt/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk
@@ -0,0 +1,66 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+#extension GL_EXT_fragment_shading_rate : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(max_vertices = 24, max_primitives = 22, triangles) out;
+
+out gl_MeshPerVertexEXT
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[2];
+} gl_MeshVerticesEXT[];
+
+struct TaskPayload
+{
+    float a;
+    float b;
+    int c;
+};
+
+layout(location = 0) out vec4 vOut[24];
+layout(location = 2) out BlockOut
+{
+    vec4 a;
+    vec4 b;
+} outputs[24];
+
+layout(location = 1) perprimitiveEXT out vec4 vPrim[22];
+layout(location = 4) perprimitiveEXT out BlockOutPrim
+{
+    vec4 a;
+    vec4 b;
+} prim_outputs[22];
+
+taskPayloadSharedEXT TaskPayload payload;
+shared float shared_float[16];
+
+void main()
+{
+    SetMeshOutputsEXT(24u, 22u);
+    vec3 _29 = vec3(gl_GlobalInvocationID);
+    float _31 = _29.x;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(_31, _29.yz, 1.0);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
+    vOut[gl_LocalInvocationIndex] = vec4(_31, _29.yz, 2.0);
+    outputs[gl_LocalInvocationIndex].a = vec4(5.0);
+    outputs[gl_LocalInvocationIndex].b = vec4(6.0);
+    barrier();
+    if (gl_LocalInvocationIndex < 22u)
+    {
+        vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0);
+        prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
+        prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
+        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0u, 1u, 2u) + uvec3(gl_LocalInvocationIndex);
+        int _127 = int(gl_GlobalInvocationID.x);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _127;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _127 + 1;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _127 + 2;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _127 + 3;
+    }
+}
+
diff --git a/reference/opt/shaders/tesc/water_tess.tesc b/reference/opt/shaders/tesc/water_tess.tesc
index eb3e84d53d1..d3d9c8b3e06 100644
--- a/reference/opt/shaders/tesc/water_tess.tesc
+++ b/reference/opt/shaders/tesc/water_tess.tesc
@@ -18,23 +18,23 @@ layout(location = 0) in vec2 vPatchPosBase[];
 
 void main()
 {
-    vec2 _430 = (vPatchPosBase[0] - vec2(10.0)) * _41.uScale.xy;
-    vec2 _440 = ((vPatchPosBase[0] + _41.uPatchSize) + vec2(10.0)) * _41.uScale.xy;
-    vec3 _445 = vec3(_430.x, -10.0, _430.y);
-    vec3 _450 = vec3(_440.x, 10.0, _440.y);
-    vec4 _466 = vec4((_445 + _450) * 0.5, 1.0);
-    vec3 _513 = vec3(length(_450 - _445) * (-0.5));
-    bool _515 = any(lessThanEqual(vec3(dot(_41.uFrustum[0], _466), dot(_41.uFrustum[1], _466), dot(_41.uFrustum[2], _466)), _513));
-    bool _525;
-    if (!_515)
+    vec2 _431 = (vPatchPosBase[0] - vec2(10.0)) * _41.uScale.xy;
+    vec2 _441 = ((vPatchPosBase[0] + _41.uPatchSize) + vec2(10.0)) * _41.uScale.xy;
+    vec3 _446 = vec3(_431.x, -10.0, _431.y);
+    vec3 _451 = vec3(_441.x, 10.0, _441.y);
+    vec4 _467 = vec4((_446 + _451) * 0.5, 1.0);
+    vec3 _514 = vec3(length(_451 - _446) * (-0.5));
+    bool _516 = any(lessThanEqual(vec3(dot(_41.uFrustum[0], _467), dot(_41.uFrustum[1], _467), dot(_41.uFrustum[2], _467)), _514));
+    bool _526;
+    if (!_516)
     {
-        _525 = any(lessThanEqual(vec3(dot(_41.uFrustum[3], _466), dot(_41.uFrustum[4], _466), dot(_41.uFrustum[5], _466)), _513));
+        _526 = any(lessThanEqual(vec3(dot(_41.uFrustum[3], _467), dot(_41.uFrustum[4], _467), dot(_41.uFrustum[5], _467)), _514));
     }
     else
     {
-        _525 = _515;
+        _526 = _516;
     }
-    if (!(!_525))
+    if (!(!_526))
     {
         gl_TessLevelOuter[0] = -1.0;
         gl_TessLevelOuter[1] = -1.0;
@@ -46,34 +46,34 @@ void main()
     else
     {
         vOutPatchPosBase = vPatchPosBase[0];
-        vec2 _678 = (vPatchPosBase[0] + (vec2(-0.5) * _41.uPatchSize)) * _41.uScale.xy;
-        vec2 _706 = (vPatchPosBase[0] + (vec2(0.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float _725 = clamp(log2((length(_41.uCamPos - vec3(_706.x, 0.0, _706.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
-        vec2 _734 = (vPatchPosBase[0] + (vec2(1.5, -0.5) * _41.uPatchSize)) * _41.uScale.xy;
-        vec2 _762 = (vPatchPosBase[0] + (vec2(-0.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float _781 = clamp(log2((length(_41.uCamPos - vec3(_762.x, 0.0, _762.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
-        vec2 _790 = (vPatchPosBase[0] + (vec2(0.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float _809 = clamp(log2((length(_41.uCamPos - vec3(_790.x, 0.0, _790.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
-        vec2 _818 = (vPatchPosBase[0] + (vec2(1.5, 0.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float _837 = clamp(log2((length(_41.uCamPos - vec3(_818.x, 0.0, _818.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
-        vec2 _846 = (vPatchPosBase[0] + (vec2(-0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy;
-        vec2 _874 = (vPatchPosBase[0] + (vec2(0.5, 1.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float _893 = clamp(log2((length(_41.uCamPos - vec3(_874.x, 0.0, _874.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
-        vec2 _902 = (vPatchPosBase[0] + (vec2(1.5) * _41.uPatchSize)) * _41.uScale.xy;
-        float _612 = dot(vec4(_781, _809, clamp(log2((length(_41.uCamPos - vec3(_846.x, 0.0, _846.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _893), vec4(0.25));
-        float _618 = dot(vec4(clamp(log2((length(_41.uCamPos - vec3(_678.x, 0.0, _678.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _725, _781, _809), vec4(0.25));
-        float _624 = dot(vec4(_725, clamp(log2((length(_41.uCamPos - vec3(_734.x, 0.0, _734.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _809, _837), vec4(0.25));
-        float _630 = dot(vec4(_809, _837, _893, clamp(log2((length(_41.uCamPos - vec3(_902.x, 0.0, _902.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x)), vec4(0.25));
-        vec4 _631 = vec4(_612, _618, _624, _630);
-        vPatchLods = _631;
-        vec4 _928 = exp2(-min(_631, _631.yzwx)) * _41.uMaxTessLevel.y;
-        gl_TessLevelOuter[0] = _928.x;
-        gl_TessLevelOuter[1] = _928.y;
-        gl_TessLevelOuter[2] = _928.z;
-        gl_TessLevelOuter[3] = _928.w;
-        float _935 = _41.uMaxTessLevel.y * exp2(-min(min(min(_612, _618), min(_624, _630)), _809));
-        gl_TessLevelInner[0] = _935;
-        gl_TessLevelInner[1] = _935;
+        vec2 _681 = (vec2(-0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy;
+        vec2 _710 = (vec2(0.5, -0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy;
+        float _729 = clamp(log2((length(_41.uCamPos - vec3(_710.x, 0.0, _710.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        vec2 _739 = (vec2(1.5, -0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy;
+        vec2 _768 = (vec2(-0.5, 0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy;
+        float _787 = clamp(log2((length(_41.uCamPos - vec3(_768.x, 0.0, _768.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        vec2 _797 = (vec2(0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy;
+        float _816 = clamp(log2((length(_41.uCamPos - vec3(_797.x, 0.0, _797.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        vec2 _826 = (vec2(1.5, 0.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy;
+        float _845 = clamp(log2((length(_41.uCamPos - vec3(_826.x, 0.0, _826.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        vec2 _855 = (vec2(-0.5, 1.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy;
+        vec2 _884 = (vec2(0.5, 1.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy;
+        float _903 = clamp(log2((length(_41.uCamPos - vec3(_884.x, 0.0, _884.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x);
+        vec2 _913 = (vec2(1.5) * _41.uPatchSize + vPatchPosBase[0]) * _41.uScale.xy;
+        float _614 = dot(vec4(_787, _816, clamp(log2((length(_41.uCamPos - vec3(_855.x, 0.0, _855.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _903), vec4(0.25));
+        float _620 = dot(vec4(clamp(log2((length(_41.uCamPos - vec3(_681.x, 0.0, _681.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _729, _787, _816), vec4(0.25));
+        float _626 = dot(vec4(_729, clamp(log2((length(_41.uCamPos - vec3(_739.x, 0.0, _739.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x), _816, _845), vec4(0.25));
+        float _632 = dot(vec4(_816, _845, _903, clamp(log2((length(_41.uCamPos - vec3(_913.x, 0.0, _913.y)) + 9.9999997473787516355514526367188e-05) * _41.uDistanceMod), 0.0, _41.uMaxTessLevel.x)), vec4(0.25));
+        vec4 _633 = vec4(_614, _620, _626, _632);
+        vPatchLods = _633;
+        vec4 _940 = exp2(-min(_633, _633.yzwx)) * _41.uMaxTessLevel.y;
+        gl_TessLevelOuter[0] = _940.x;
+        gl_TessLevelOuter[1] = _940.y;
+        gl_TessLevelOuter[2] = _940.z;
+        gl_TessLevelOuter[3] = _940.w;
+        float _948 = _41.uMaxTessLevel.y * exp2(-min(min(min(_614, _620), min(_626, _632)), _816));
+        gl_TessLevelInner[0] = _948;
+        gl_TessLevelInner[1] = _948;
     }
 }
 
diff --git a/reference/opt/shaders/tese/load-array-of-array.tese b/reference/opt/shaders/tese/load-array-of-array.tese
new file mode 100644
index 00000000000..e4b426d0ad6
--- /dev/null
+++ b/reference/opt/shaders/tese/load-array-of-array.tese
@@ -0,0 +1,10 @@
+#version 450
+layout(quads, ccw, equal_spacing) in;
+
+layout(location = 0) in vec4 vTexCoord[][1];
+
+void main()
+{
+    gl_Position = (vTexCoord[0u][0] + vTexCoord[2u][0]) + vTexCoord[3u][0];
+}
+
diff --git a/reference/opt/shaders/tese/patch-input-array.tese b/reference/opt/shaders/tese/patch-input-array.tese
new file mode 100644
index 00000000000..413d8b391fb
--- /dev/null
+++ b/reference/opt/shaders/tese/patch-input-array.tese
@@ -0,0 +1,10 @@
+#version 450
+layout(quads, ccw, equal_spacing) in;
+
+layout(location = 0) patch in float P[4];
+
+void main()
+{
+    gl_Position = vec4(P[0], P[1], P[2], P[3]);
+}
+
diff --git a/reference/opt/shaders/tese/water_tess.tese b/reference/opt/shaders/tese/water_tess.tese
index a2aa1044763..c862cfbdc0e 100644
--- a/reference/opt/shaders/tese/water_tess.tese
+++ b/reference/opt/shaders/tese/water_tess.tese
@@ -21,16 +21,16 @@ layout(location = 0) out vec3 vWorld;
 
 void main()
 {
-    vec2 _201 = vOutPatchPosBase + (gl_TessCoord.xy * _31.uPatchSize);
-    vec2 _214 = mix(vPatchLods.yx, vPatchLods.zw, vec2(gl_TessCoord.x));
-    float _221 = mix(_214.x, _214.y, gl_TessCoord.y);
-    mediump float _223 = floor(_221);
-    vec2 _125 = _201 * _31.uInvHeightmapSize;
-    vec2 _141 = _31.uInvHeightmapSize * exp2(_223);
-    vGradNormalTex = vec4(_125 + (_31.uInvHeightmapSize * 0.5), _125 * _31.uScale.zw);
-    mediump vec3 _253 = mix(textureLod(uHeightmapDisplacement, _125 + (_141 * 0.5), _223).xyz, textureLod(uHeightmapDisplacement, _125 + (_141 * 1.0), _223 + 1.0).xyz, vec3(_221 - _223));
-    vec2 _171 = (_201 * _31.uScale.xy) + _253.yz;
-    vWorld = vec3(_171.x, _253.x, _171.y);
+    vec2 _202 = gl_TessCoord.xy * _31.uPatchSize + vOutPatchPosBase;
+    vec2 _216 = mix(vPatchLods.yx, vPatchLods.zw, vec2(gl_TessCoord.x));
+    float _223 = mix(_216.x, _216.y, gl_TessCoord.y);
+    mediump float mp_copy_223 = _223;
+    mediump float _225 = floor(mp_copy_223);
+    vec2 _141 = _31.uInvHeightmapSize * exp2(_225);
+    vGradNormalTex = vec4(_202 * _31.uInvHeightmapSize + (_31.uInvHeightmapSize * 0.5), (_202 * _31.uInvHeightmapSize) * _31.uScale.zw);
+    mediump vec3 _256 = mix(textureLod(uHeightmapDisplacement, _202 * _31.uInvHeightmapSize + (_141 * 0.5), _225).xyz, textureLod(uHeightmapDisplacement, _202 * _31.uInvHeightmapSize + (_141 * 1.0), _225 + 1.0).xyz, vec3(mp_copy_223 - _225));
+    vec2 _171 = _202 * _31.uScale.xy + _256.yz;
+    vWorld = vec3(_171.x, _256.x, _171.y);
     gl_Position = _31.uMVP * vec4(vWorld, 1.0);
 }
 
diff --git a/reference/opt/shaders/vert/ground.vert b/reference/opt/shaders/vert/ground.vert
index c82c1037b3b..5840c3d5a98 100644
--- a/reference/opt/shaders/vert/ground.vert
+++ b/reference/opt/shaders/vert/ground.vert
@@ -1,4 +1,7 @@
 #version 310 es
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
 
 struct PatchData
 {
@@ -44,44 +47,52 @@ layout(binding = 1) uniform mediump sampler2D TexLOD;
 layout(binding = 0) uniform mediump sampler2D TexHeightmap;
 
 layout(location = 1) in vec4 LODWeights;
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
 uniform int SPIRV_Cross_BaseInstance;
+#endif
 layout(location = 0) in vec2 Position;
 layout(location = 1) out vec3 EyeVec;
 layout(location = 0) out vec2 TexCoord;
 
 void main()
 {
-    float _300 = all(equal(LODWeights, vec4(0.0))) ? _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.w : dot(LODWeights, _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].LODs);
-    float _302 = floor(_300);
-    uint _307 = uint(_302);
-    uvec2 _309 = uvec2(Position);
-    uvec2 _316 = (uvec2(1u) << uvec2(_307, _307 + 1u)) - uvec2(1u);
-    uint _382;
-    if (_309.x < 32u)
+    float _301 = all(equal(LODWeights, vec4(0.0))) ? _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.w : dot(LODWeights, _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].LODs);
+    float _303 = floor(_301);
+    uint _308 = uint(_303);
+    uvec2 _310 = uvec2(Position);
+    uvec2 _317 = (uvec2(1u) << uvec2(_308, _308 + 1u)) - uvec2(1u);
+    uint _384;
+    if (_310.x < 32u)
     {
-        _382 = _316.x;
+        _384 = _317.x;
     }
     else
     {
-        _382 = 0u;
+        _384 = 0u;
     }
-    uint _383;
-    if (_309.y < 32u)
+    uint _385;
+    if (_310.y < 32u)
     {
-        _383 = _316.y;
+        _385 = _317.y;
     }
     else
     {
-        _383 = 0u;
+        _385 = 0u;
     }
-    vec4 _344 = vec4((_309 + uvec2(_382, _383)).xyxy & (~_316).xxyy);
-    vec2 _173 = ((_53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _156.InvGroundSize_PatchScale.zw) + mix(_344.xy, _344.zw, vec2(_300 - _302))) * _156.InvGroundSize_PatchScale.xy;
-    mediump float _360 = textureLod(TexLOD, _173, 0.0).x * 7.96875;
-    float _362 = floor(_360);
-    vec2 _185 = _156.InvGroundSize_PatchScale.xy * exp2(_362);
-    vec3 _230 = (vec3(_173.x, mix(textureLod(TexHeightmap, _173 + (_185 * 0.5), _362).x, textureLod(TexHeightmap, _173 + (_185 * 1.0), _362 + 1.0).x, _360 - _362), _173.y) * _156.GroundScale.xyz) + _156.GroundPosition.xyz;
+    vec4 _345 = vec4((_310 + uvec2(_384, _385)).xyxy & (~_317).xxyy);
+    vec2 _167 = _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _156.InvGroundSize_PatchScale.zw + mix(_345.xy, _345.zw, vec2(_301 - _303));
+    vec2 _173 = _167 * _156.InvGroundSize_PatchScale.xy;
+    mediump vec4 _360 = textureLod(TexLOD, _173, 0.0);
+    mediump float _361 = _360.x;
+    mediump float _362 = _361 * 7.96875;
+    float hp_copy_362 = _362;
+    float _364 = floor(hp_copy_362);
+    vec2 _185 = _156.InvGroundSize_PatchScale.xy * exp2(_364);
+    vec3 _230 = vec3(_173.x, mix(textureLod(TexHeightmap, _167 * _156.InvGroundSize_PatchScale.xy + (_185 * 0.5), _364).x, textureLod(TexHeightmap, _167 * _156.InvGroundSize_PatchScale.xy + (_185 * 1.0), _364 + 1.0).x, _361 * 7.96875 + (-_364)), _173.y) * _156.GroundScale.xyz + _156.GroundPosition.xyz;
     EyeVec = _230 - _236.g_CamPos.xyz;
-    TexCoord = _173 + (_156.InvGroundSize_PatchScale.xy * 0.5);
+    TexCoord = _167 * _156.InvGroundSize_PatchScale.xy + (_156.InvGroundSize_PatchScale.xy * 0.5);
     gl_Position = (((_236.g_ViewProj_Row0 * _230.x) + (_236.g_ViewProj_Row1 * _230.y)) + (_236.g_ViewProj_Row2 * _230.z)) + _236.g_ViewProj_Row3;
 }
 
diff --git a/reference/opt/shaders/vert/invariant.vert b/reference/opt/shaders/vert/invariant.vert
index 648ea2947c9..31e0c2d46f6 100644
--- a/reference/opt/shaders/vert/invariant.vert
+++ b/reference/opt/shaders/vert/invariant.vert
@@ -9,8 +9,7 @@ layout(location = 0) invariant out vec4 vColor;
 
 void main()
 {
-    vec4 _20 = vInput1 * vInput2;
-    vec4 _21 = vInput0 + _20;
+    vec4 _21 = vInput1 * vInput2 + vInput0;
     gl_Position = _21;
     vec4 _27 = vInput0 - vInput1;
     vec4 _29 = _27 * vInput2;
diff --git a/reference/opt/shaders/vert/no-contraction.vert b/reference/opt/shaders/vert/no-contraction.vert
new file mode 100644
index 00000000000..9f9969cd74d
--- /dev/null
+++ b/reference/opt/shaders/vert/no-contraction.vert
@@ -0,0 +1,18 @@
+#version 450
+
+layout(location = 0) in vec4 vA;
+layout(location = 1) in vec4 vB;
+layout(location = 2) in vec4 vC;
+
+void main()
+{
+    precise vec4 _15 = vA * vB;
+    precise vec4 _19 = vA + vB;
+    precise vec4 _23 = vA - vB;
+    precise vec4 _30 = _15 + vC;
+    precise vec4 _34 = _15 + _19;
+    precise vec4 _36 = _34 + _23;
+    precise vec4 _38 = _36 + _30;
+    gl_Position = _38;
+}
+
diff --git a/reference/opt/shaders/vert/ocean.vert b/reference/opt/shaders/vert/ocean.vert
index 8f82c316d88..489e82959e0 100644
--- a/reference/opt/shaders/vert/ocean.vert
+++ b/reference/opt/shaders/vert/ocean.vert
@@ -1,4 +1,7 @@
 #version 310 es
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
 
 struct PatchData
 {
@@ -45,75 +48,72 @@ layout(binding = 1) uniform mediump sampler2D TexLOD;
 layout(binding = 0) uniform mediump sampler2D TexDisplacement;
 
 layout(location = 1) in vec4 LODWeights;
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
 uniform int SPIRV_Cross_BaseInstance;
+#endif
 layout(location = 0) in vec4 Position;
 layout(location = 0) out vec3 EyeVec;
 layout(location = 1) out vec4 TexCoord;
 
-uvec4 _474;
-
 void main()
 {
-    float _350 = all(equal(LODWeights, vec4(0.0))) ? _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.w : dot(LODWeights, _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].LODs);
-    float _352 = floor(_350);
-    uint _357 = uint(_352);
-    uvec4 _359 = uvec4(Position);
-    uvec2 _366 = (uvec2(1u) << uvec2(_357, _357 + 1u)) - uvec2(1u);
-    bool _369 = _359.x < 32u;
-    uint _465;
-    if (_369)
+    float _351 = all(equal(LODWeights, vec4(0.0))) ? _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.w : dot(LODWeights, _53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].LODs);
+    float _353 = floor(_351);
+    uint _358 = uint(_353);
+    uvec4 _360 = uvec4(Position);
+    uvec2 _367 = (uvec2(1u) << uvec2(_358, _358 + 1u)) - uvec2(1u);
+    bool _370 = _360.x < 32u;
+    uint _467;
+    if (_370)
     {
-        _465 = _366.x;
+        _467 = _367.x;
     }
     else
     {
-        _465 = 0u;
+        _467 = 0u;
     }
-    uvec4 _443 = _474;
-    _443.x = _465;
-    bool _379 = _359.y < 32u;
-    uint _468;
-    if (_379)
+    bool _380 = _360.y < 32u;
+    uint _470;
+    if (_380)
     {
-        _468 = _366.x;
+        _470 = _367.x;
     }
     else
     {
-        _468 = 0u;
+        _470 = 0u;
     }
-    uvec4 _447 = _443;
-    _447.y = _468;
-    uint _470;
-    if (_369)
+    uint _472;
+    if (_370)
     {
-        _470 = _366.y;
+        _472 = _367.y;
     }
     else
     {
-        _470 = 0u;
+        _472 = 0u;
     }
-    uvec4 _451 = _447;
-    _451.z = _470;
-    uint _472;
-    if (_379)
+    uint _474;
+    if (_380)
     {
-        _472 = _366.y;
+        _474 = _367.y;
     }
     else
     {
-        _472 = 0u;
+        _474 = 0u;
     }
-    uvec4 _455 = _451;
-    _455.w = _472;
-    vec4 _415 = vec4((_359.xyxy + _455) & (~_366).xxyy);
-    vec2 _197 = ((_53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _180.InvOceanSize_PatchScale.zw) + mix(_415.xy, _415.zw, vec2(_350 - _352))) * _180.InvOceanSize_PatchScale.xy;
+    vec4 _416 = vec4((_360.xyxy + uvec4(_467, _470, _472, _474)) & (~_367).xxyy);
+    vec2 _197 = (_53.Patches[(gl_InstanceID + SPIRV_Cross_BaseInstance)].Position.xz * _180.InvOceanSize_PatchScale.zw + mix(_416.xy, _416.zw, vec2(_351 - _353))) * _180.InvOceanSize_PatchScale.xy;
     vec2 _204 = _197 * _180.NormalTexCoordScale.zw;
-    mediump float _431 = textureLod(TexLOD, _197, 0.0).x * 7.96875;
-    float _433 = floor(_431);
-    vec2 _220 = (_180.InvOceanSize_PatchScale.xy * exp2(_433)) * _180.NormalTexCoordScale.zw;
-    vec3 _267 = ((vec3(_197.x, 0.0, _197.y) + mix(textureLod(TexDisplacement, _204 + (_220 * 0.5), _433).yxz, textureLod(TexDisplacement, _204 + (_220 * 1.0), _433 + 1.0).yxz, vec3(_431 - _433))) * _180.OceanScale.xyz) + _180.OceanPosition.xyz;
+    mediump vec4 _431 = textureLod(TexLOD, _197, 0.0);
+    mediump float _432 = _431.x;
+    mediump float _433 = _432 * 7.96875;
+    float hp_copy_433 = _433;
+    float _435 = floor(hp_copy_433);
+    vec2 _220 = (_180.InvOceanSize_PatchScale.xy * exp2(_435)) * _180.NormalTexCoordScale.zw;
+    vec3 _267 = (vec3(_197.x, 0.0, _197.y) + mix(textureLod(TexDisplacement, _197 * _180.NormalTexCoordScale.zw + (_220 * 0.5), _435).yxz, textureLod(TexDisplacement, _197 * _180.NormalTexCoordScale.zw + (_220 * 1.0), _435 + 1.0).yxz, vec3(_432 * 7.96875 + (-_435)))) * _180.OceanScale.xyz + _180.OceanPosition.xyz;
     EyeVec = _267 - _273.g_CamPos.xyz;
-    TexCoord = vec4(_204, _204 * _180.NormalTexCoordScale.xy) + ((_180.InvOceanSize_PatchScale.xyxy * 0.5) * _180.NormalTexCoordScale.zwzw);
+    TexCoord = (_180.InvOceanSize_PatchScale.xyxy * 0.5) * _180.NormalTexCoordScale.zwzw + vec4(_204, _204 * _180.NormalTexCoordScale.xy);
     gl_Position = (((_273.g_ViewProj_Row0 * _267.x) + (_273.g_ViewProj_Row1 * _267.y)) + (_273.g_ViewProj_Row2 * _267.z)) + _273.g_ViewProj_Row3;
 }
 
diff --git a/reference/opt/shaders/vert/read-from-row-major-array.vert b/reference/opt/shaders/vert/read-from-row-major-array.vert
index 25fc9495d23..d5d9681d0d5 100644
--- a/reference/opt/shaders/vert/read-from-row-major-array.vert
+++ b/reference/opt/shaders/vert/read-from-row-major-array.vert
@@ -8,9 +8,24 @@ layout(binding = 0, std140) uniform Block
 layout(location = 0) in vec4 a_position;
 layout(location = 0) out mediump float v_vtxResult;
 
+highp mat2x3 spvWorkaroundRowMajor(highp mat2x3 wrap) { return wrap; }
+mediump mat2x3 spvWorkaroundRowMajorMP(mediump mat2x3 wrap) { return wrap; }
+
 void main()
 {
     gl_Position = a_position;
-    v_vtxResult = ((float(abs(_104.var[0][0][0].x - 2.0) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][0].y - 6.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][0].z - (-6.0)) < 0.0500000007450580596923828125)) * ((float(abs(_104.var[0][0][1].x) < 0.0500000007450580596923828125) * float(abs(_104.var[0][0][1].y - 5.0) < 0.0500000007450580596923828125)) * float(abs(_104.var[0][0][1].z - 5.0) < 0.0500000007450580596923828125));
+    float _172 = float(abs(spvWorkaroundRowMajor(_104.var[0][0])[0].x - 2.0) < 0.0500000007450580596923828125);
+    mediump float mp_copy_172 = _172;
+    float _180 = float(abs(spvWorkaroundRowMajor(_104.var[0][0])[0].y - 6.0) < 0.0500000007450580596923828125);
+    mediump float mp_copy_180 = _180;
+    float _188 = float(abs(spvWorkaroundRowMajor(_104.var[0][0])[0].z - (-6.0)) < 0.0500000007450580596923828125);
+    mediump float mp_copy_188 = _188;
+    float _221 = float(abs(spvWorkaroundRowMajor(_104.var[0][0])[1].x) < 0.0500000007450580596923828125);
+    mediump float mp_copy_221 = _221;
+    float _229 = float(abs(spvWorkaroundRowMajor(_104.var[0][0])[1].y - 5.0) < 0.0500000007450580596923828125);
+    mediump float mp_copy_229 = _229;
+    float _237 = float(abs(spvWorkaroundRowMajor(_104.var[0][0])[1].z - 5.0) < 0.0500000007450580596923828125);
+    mediump float mp_copy_237 = _237;
+    v_vtxResult = ((mp_copy_172 * mp_copy_180) * mp_copy_188) * ((mp_copy_221 * mp_copy_229) * mp_copy_237);
 }
 
diff --git a/reference/opt/shaders/vert/row-major-workaround.vert b/reference/opt/shaders/vert/row-major-workaround.vert
new file mode 100644
index 00000000000..4fe6885d101
--- /dev/null
+++ b/reference/opt/shaders/vert/row-major-workaround.vert
@@ -0,0 +1,30 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform Buffer
+{
+    layout(row_major) mat4 HP;
+    layout(row_major) mediump mat4 MP;
+} _21;
+
+layout(binding = 1, std140) uniform Buffer2
+{
+    layout(row_major) mediump mat4 MP2;
+} _39;
+
+layout(location = 0) out vec4 H;
+layout(location = 0) in vec4 Hin;
+layout(location = 1) out mediump vec4 M;
+layout(location = 1) in mediump vec4 Min;
+layout(location = 2) out mediump vec4 M2;
+
+highp mat4 spvWorkaroundRowMajor(highp mat4 wrap) { return wrap; }
+mediump mat4 spvWorkaroundRowMajorMP(mediump mat4 wrap) { return wrap; }
+
+void main()
+{
+    gl_Position = vec4(1.0);
+    H = spvWorkaroundRowMajor(_21.HP) * Hin;
+    M = spvWorkaroundRowMajor(_21.MP) * Min;
+    M2 = spvWorkaroundRowMajorMP(_39.MP2) * Min;
+}
+
diff --git a/reference/opt/shaders/vert/texture_buffer.vert b/reference/opt/shaders/vert/texture_buffer.vert
index e9442ce1196..217804dfce9 100644
--- a/reference/opt/shaders/vert/texture_buffer.vert
+++ b/reference/opt/shaders/vert/texture_buffer.vert
@@ -1,5 +1,5 @@
 #version 310 es
-#extension GL_OES_texture_buffer : require
+#extension GL_EXT_texture_buffer : require
 
 layout(binding = 4) uniform highp samplerBuffer uSamp;
 layout(binding = 5, rgba32f) uniform readonly highp imageBuffer uSampo;
diff --git a/reference/opt/shaders/vert/transform-feedback-decorations.vert b/reference/opt/shaders/vert/transform-feedback-decorations.vert
new file mode 100644
index 00000000000..23e7cf3c19d
--- /dev/null
+++ b/reference/opt/shaders/vert/transform-feedback-decorations.vert
@@ -0,0 +1,22 @@
+#version 450
+
+layout(xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex
+{
+    layout(xfb_offset = 4) vec4 gl_Position;
+    float gl_PointSize;
+};
+
+layout(location = 0, xfb_buffer = 2, xfb_stride = 32, xfb_offset = 16) out vec4 vFoo;
+layout(xfb_buffer = 3, xfb_stride = 16) out VertOut
+{
+    layout(location = 1, xfb_offset = 0) vec4 vBar;
+} _22;
+
+
+void main()
+{
+    gl_Position = vec4(1.0);
+    vFoo = vec4(3.0);
+    _22.vBar = vec4(5.0);
+}
+
diff --git a/reference/opt/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk
index 82ebb960856..771d0496447 100644
--- a/reference/opt/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk
+++ b/reference/opt/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk
@@ -3,7 +3,7 @@
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
 layout(buffer_reference) buffer Block;
-layout(buffer_reference, std430) buffer Block
+layout(buffer_reference, buffer_reference_align = 4, std430) buffer Block
 {
     float v;
 };
diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp.vk
new file mode 100644
index 00000000000..f5907d3e2c0
--- /dev/null
+++ b/reference/opt/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp.vk
@@ -0,0 +1,28 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer Bar;
+layout(buffer_reference) buffer Foo;
+layout(buffer_reference, buffer_reference_align = 8, std430) buffer Bar
+{
+    uint a;
+    uint b;
+    Foo foo;
+};
+
+layout(buffer_reference, std430) buffer Foo
+{
+    uint v;
+};
+
+layout(push_constant, std430) uniform Push
+{
+    Bar bar;
+} _13;
+
+void main()
+{
+    uint _24 = atomicAdd(_13.bar.b, 1u);
+}
+
diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp.vk
new file mode 100644
index 00000000000..1808beecbba
--- /dev/null
+++ b/reference/opt/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp.vk
@@ -0,0 +1,29 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer Bar;
+layout(buffer_reference) buffer Foo;
+layout(buffer_reference, buffer_reference_align = 8, std430) buffer Bar
+{
+    uint a;
+    uint b;
+    Foo foo;
+};
+
+layout(buffer_reference, std430) buffer Foo
+{
+    uint v;
+};
+
+layout(push_constant, std430) uniform Push
+{
+    Bar bar;
+} _15;
+
+void main()
+{
+    uint v = _15.bar.b;
+    uint _31 = atomicAdd(_15.bar.a, _15.bar.b);
+}
+
diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp.vk
new file mode 100644
index 00000000000..20a4f1b4239
--- /dev/null
+++ b/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp.vk
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference_uvec2 : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer PtrInt;
+layout(buffer_reference, buffer_reference_align = 4, std430) buffer PtrInt
+{
+    int value;
+};
+
+layout(set = 0, binding = 0, std430) buffer Buf
+{
+    uvec2 ptr;
+    PtrInt ptrint;
+} _13;
+
+void main()
+{
+    _13.ptr = uvec2(_13.ptrint);
+}
+
diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp.vk
new file mode 100644
index 00000000000..5cf6e2df36d
--- /dev/null
+++ b/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp.vk
@@ -0,0 +1,21 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference_uvec2 : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer PtrInt;
+layout(buffer_reference, buffer_reference_align = 16, std430) buffer PtrInt
+{
+    int value;
+};
+
+layout(set = 0, binding = 0, std430) buffer Buf
+{
+    uvec2 ptr;
+} _10;
+
+void main()
+{
+    PtrInt(_10.ptr).value = 10;
+}
+
diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk
index 5752f81b268..8923d21d780 100644
--- a/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk
+++ b/reference/opt/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk
@@ -4,12 +4,12 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
 layout(buffer_reference) buffer PtrUint;
 layout(buffer_reference) buffer PtrInt;
-layout(buffer_reference, std430) buffer PtrUint
+layout(buffer_reference, buffer_reference_align = 4, std430) buffer PtrUint
 {
     uint value;
 };
 
-layout(buffer_reference, std430) buffer PtrInt
+layout(buffer_reference, buffer_reference_align = 16, std430) buffer PtrInt
 {
     int value;
 };
diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp.vk
new file mode 100644
index 00000000000..b7e88062a04
--- /dev/null
+++ b/reference/opt/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp.vk
@@ -0,0 +1,35 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer RO;
+layout(buffer_reference) buffer RW;
+layout(buffer_reference) buffer WO;
+layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer RO
+{
+    vec4 v[];
+};
+
+layout(buffer_reference, buffer_reference_align = 16, std430) restrict buffer RW
+{
+    vec4 v[];
+};
+
+layout(buffer_reference, buffer_reference_align = 16, std430) coherent writeonly buffer WO
+{
+    vec4 v[];
+};
+
+layout(push_constant, std430) uniform Registers
+{
+    RO ro;
+    RW rw;
+    WO wo;
+} registers;
+
+void main()
+{
+    registers.rw.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x];
+    registers.wo.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x];
+}
+
diff --git a/reference/opt/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk
index dfcaac83618..c3855cf634a 100644
--- a/reference/opt/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk
+++ b/reference/opt/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk
@@ -1,10 +1,14 @@
 #version 450
+#if defined(GL_ARB_gpu_shader_int64)
 #extension GL_ARB_gpu_shader_int64 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
 #extension GL_EXT_buffer_reference : require
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
 layout(buffer_reference) buffer Node;
-layout(buffer_reference, std430) buffer Node
+layout(buffer_reference, buffer_reference_align = 16, std430) buffer Node
 {
     layout(offset = 0) int value;
     layout(offset = 16) Node next;
@@ -20,26 +24,30 @@ layout(set = 0, binding = 0, std430) restrict buffer LinkedList
 void main()
 {
     Node _45;
+    Node _114;
     if (gl_WorkGroupID.x < 4u)
     {
         _45 = _50.head1;
+        _114 = _50.head1;
     }
     else
     {
         _45 = _50.head2;
+        _114 = _50.head2;
     }
-    restrict Node n = _45;
-    Node param = n.next;
+    restrict Node n = _114;
+    Node param = _114.next;
     Node param_1 = _50.head1;
     Node param_2 = _50.head2;
-    param.value = param_1.value + param_2.value;
+    _114.next.value = _50.head1.value + _50.head2.value;
     Node param_4 = _50.head1;
-    Node param_3 = param_4;
-    n = param_3;
+    Node param_3 = _50.head1;
+    n = _50.head1;
     int v = _50.head2.value;
-    n.value = 20;
-    n.value = v * 10;
-    uint64_t uptr = uint64_t(_50.head2.next);
-    Node unode = Node(uptr);
+    _50.head1.value = 20;
+    _50.head1.value = _50.head2.value * 10;
+    uint64_t _98 = uint64_t(_50.head2.next);
+    uint64_t uptr = _98;
+    Node unode = Node(_98);
 }
 
diff --git a/reference/opt/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp.vk b/reference/opt/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp.vk
new file mode 100644
index 00000000000..b7004746220
--- /dev/null
+++ b/reference/opt/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp.vk
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_EXT_shader_atomic_float : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 1, std430) buffer SSBO
+{
+    float v;
+} _18;
+
+layout(set = 0, binding = 0, r32f) uniform image2D uImage;
+
+shared float shared_v;
+
+void main()
+{
+    float _15 = atomicAdd(shared_v, 2.0);
+    float value = _15;
+    float _24 = atomicAdd(_18.v, _15);
+    float _39 = imageAtomicAdd(uImage, ivec2(gl_GlobalInvocationID.xy), _15);
+    float _45 = imageAtomicExchange(uImage, ivec2(gl_GlobalInvocationID.xy), _15);
+    value = _45;
+}
+
diff --git a/reference/opt/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp.vk b/reference/opt/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp.vk
new file mode 100644
index 00000000000..fbe5e3d9640
--- /dev/null
+++ b/reference/opt/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp.vk
@@ -0,0 +1,72 @@
+#version 460
+#extension GL_EXT_ray_query : require
+#extension GL_EXT_ray_flags_primitive_culling : require
+#extension GL_EXT_ray_tracing : require
+layout(primitive_culling);
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 1, std140) uniform Params
+{
+    uint ray_flags;
+    uint cull_mask;
+    vec3 origin;
+    float tmin;
+    vec3 dir;
+    float tmax;
+    float thit;
+    uvec2 bda;
+} _19;
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT AS;
+
+rayQueryEXT q;
+rayQueryEXT q2[2];
+
+void main()
+{
+    rayQueryInitializeEXT(q, AS, _19.ray_flags, _19.cull_mask, _19.origin, _19.tmin, _19.dir, _19.tmax);
+    rayQueryInitializeEXT(q2[1], accelerationStructureEXT(_19.bda), _19.ray_flags, _19.cull_mask, _19.origin, _19.tmin, _19.dir, _19.tmax);
+    bool _67 = rayQueryProceedEXT(q);
+    bool res = _67;
+    rayQueryTerminateEXT(q2[0]);
+    rayQueryGenerateIntersectionEXT(q, _19.thit);
+    rayQueryConfirmIntersectionEXT(q2[1]);
+    float _75 = rayQueryGetRayTMinEXT(q);
+    float fval = _75;
+    uint _79 = rayQueryGetRayFlagsEXT(q2[0]);
+    uint type = _79;
+    vec3 _82 = rayQueryGetWorldRayDirectionEXT(q);
+    vec3 fvals = _82;
+    vec3 _83 = rayQueryGetWorldRayOriginEXT(q);
+    fvals = _83;
+    uint _86 = rayQueryGetIntersectionTypeEXT(q2[1], bool(1));
+    type = _86;
+    bool _88 = rayQueryGetIntersectionCandidateAABBOpaqueEXT(q2[1]);
+    res = _88;
+    float _91 = rayQueryGetIntersectionTEXT(q2[1], bool(0));
+    fval = _91;
+    int _94 = rayQueryGetIntersectionInstanceCustomIndexEXT(q, bool(1));
+    int ival = _94;
+    int _96 = rayQueryGetIntersectionInstanceIdEXT(q2[0], bool(0));
+    ival = _96;
+    uint _97 = rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(q, bool(1));
+    type = _97;
+    int _99 = rayQueryGetIntersectionGeometryIndexEXT(q2[1], bool(0));
+    ival = _99;
+    int _100 = rayQueryGetIntersectionPrimitiveIndexEXT(q, bool(1));
+    ival = _100;
+    vec2 _103 = rayQueryGetIntersectionBarycentricsEXT(q2[0], bool(0));
+    fvals.x = _103.x;
+    fvals.y = _103.y;
+    bool _110 = rayQueryGetIntersectionFrontFaceEXT(q, bool(1));
+    res = _110;
+    vec3 _111 = rayQueryGetIntersectionObjectRayDirectionEXT(q, bool(0));
+    fvals = _111;
+    vec3 _113 = rayQueryGetIntersectionObjectRayOriginEXT(q2[0], bool(1));
+    fvals = _113;
+    mat4x3 _117 = rayQueryGetIntersectionObjectToWorldEXT(q, bool(0));
+    mat4x3 matrices = _117;
+    mat4x3 _119 = rayQueryGetIntersectionWorldToObjectEXT(q2[1], bool(1));
+    matrices = _119;
+}
+
diff --git a/reference/opt/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk b/reference/opt/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk
deleted file mode 100644
index d67e0beeb65..00000000000
--- a/reference/opt/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk
+++ /dev/null
@@ -1,147 +0,0 @@
-#version 310 es
-#extension GL_EXT_scalar_block_layout : require
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-struct S0
-{
-    vec2 a[1];
-    float b;
-};
-
-struct S1
-{
-    vec3 a;
-    float b;
-};
-
-struct S2
-{
-    vec3 a[1];
-    float b;
-};
-
-struct S3
-{
-    vec2 a;
-    float b;
-};
-
-struct S4
-{
-    vec2 c;
-};
-
-struct Content
-{
-    S0 m0s[1];
-    S1 m1s[1];
-    S2 m2s[1];
-    S0 m0;
-    S1 m1;
-    S2 m2;
-    S3 m3;
-    float m4;
-    S4 m3s[8];
-};
-
-struct S0_1
-{
-    vec2 a[1];
-    float b;
-};
-
-struct S1_1
-{
-    vec3 a;
-    float b;
-};
-
-struct S2_1
-{
-    vec3 a[1];
-    float b;
-};
-
-struct S3_1
-{
-    vec2 a;
-    float b;
-};
-
-struct S4_1
-{
-    vec2 c;
-};
-
-struct Content_1
-{
-    S0_1 m0s[1];
-    S1_1 m1s[1];
-    S2_1 m2s[1];
-    S0_1 m0;
-    S1_1 m1;
-    S2_1 m2;
-    S3_1 m3;
-    float m4;
-    S4_1 m3s[8];
-};
-
-layout(set = 0, binding = 1, scalar) restrict buffer SSBO1
-{
-    Content content;
-    Content content1[2];
-    Content content2;
-    mat2 m0;
-    mat2 m1;
-    mat2x3 m2[4];
-    mat3x2 m3;
-    layout(row_major) mat2 m4;
-    layout(row_major) mat2 m5[9];
-    layout(row_major) mat2x3 m6[4][2];
-    layout(row_major) mat3x2 m7;
-    float array[];
-} ssbo_430;
-
-layout(set = 0, binding = 0, std140) restrict buffer SSBO0
-{
-    Content_1 content;
-    Content_1 content1[2];
-    Content_1 content2;
-    mat2 m0;
-    mat2 m1;
-    mat2x3 m2[4];
-    mat3x2 m3;
-    layout(row_major) mat2 m4;
-    layout(row_major) mat2 m5[9];
-    layout(row_major) mat2x3 m6[4][2];
-    layout(row_major) mat3x2 m7;
-    float array[];
-} ssbo_140;
-
-void main()
-{
-    ssbo_430.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0];
-    ssbo_430.content.m0s[0].b = ssbo_140.content.m0s[0].b;
-    ssbo_430.content.m1s[0].a = ssbo_140.content.m1s[0].a;
-    ssbo_430.content.m1s[0].b = ssbo_140.content.m1s[0].b;
-    ssbo_430.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0];
-    ssbo_430.content.m2s[0].b = ssbo_140.content.m2s[0].b;
-    ssbo_430.content.m0.a[0] = ssbo_140.content.m0.a[0];
-    ssbo_430.content.m0.b = ssbo_140.content.m0.b;
-    ssbo_430.content.m1.a = ssbo_140.content.m1.a;
-    ssbo_430.content.m1.b = ssbo_140.content.m1.b;
-    ssbo_430.content.m2.a[0] = ssbo_140.content.m2.a[0];
-    ssbo_430.content.m2.b = ssbo_140.content.m2.b;
-    ssbo_430.content.m3.a = ssbo_140.content.m3.a;
-    ssbo_430.content.m3.b = ssbo_140.content.m3.b;
-    ssbo_430.content.m4 = ssbo_140.content.m4;
-    ssbo_430.content.m3s[0].c = ssbo_140.content.m3s[0].c;
-    ssbo_430.content.m3s[1].c = ssbo_140.content.m3s[1].c;
-    ssbo_430.content.m3s[2].c = ssbo_140.content.m3s[2].c;
-    ssbo_430.content.m3s[3].c = ssbo_140.content.m3s[3].c;
-    ssbo_430.content.m3s[4].c = ssbo_140.content.m3s[4].c;
-    ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
-    ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
-    ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
-}
-
diff --git a/reference/opt/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk b/reference/opt/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk
deleted file mode 100644
index 6d288574f74..00000000000
--- a/reference/opt/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk
+++ /dev/null
@@ -1,110 +0,0 @@
-#version 450
-#extension GL_KHR_shader_subgroup_basic : require
-#extension GL_KHR_shader_subgroup_ballot : require
-#extension GL_KHR_shader_subgroup_shuffle : require
-#extension GL_KHR_shader_subgroup_shuffle_relative : require
-#extension GL_KHR_shader_subgroup_vote : require
-#extension GL_KHR_shader_subgroup_arithmetic : require
-#extension GL_KHR_shader_subgroup_clustered : require
-#extension GL_KHR_shader_subgroup_quad : require
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-layout(set = 0, binding = 0, std430) buffer SSBO
-{
-    float FragColor;
-} _9;
-
-void main()
-{
-    _9.FragColor = float(gl_NumSubgroups);
-    _9.FragColor = float(gl_SubgroupID);
-    _9.FragColor = float(gl_SubgroupSize);
-    _9.FragColor = float(gl_SubgroupInvocationID);
-    subgroupMemoryBarrier();
-    subgroupBarrier();
-    subgroupMemoryBarrier();
-    subgroupMemoryBarrierBuffer();
-    subgroupMemoryBarrierShared();
-    subgroupMemoryBarrierImage();
-    bool elected = subgroupElect();
-    _9.FragColor = vec4(gl_SubgroupEqMask).x;
-    _9.FragColor = vec4(gl_SubgroupGeMask).x;
-    _9.FragColor = vec4(gl_SubgroupGtMask).x;
-    _9.FragColor = vec4(gl_SubgroupLeMask).x;
-    _9.FragColor = vec4(gl_SubgroupLtMask).x;
-    vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
-    vec3 first = subgroupBroadcastFirst(vec3(20.0));
-    uvec4 ballot_value = subgroupBallot(true);
-    bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
-    bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
-    uint bit_count = subgroupBallotBitCount(ballot_value);
-    uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
-    uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
-    uint lsb = subgroupBallotFindLSB(ballot_value);
-    uint msb = subgroupBallotFindMSB(ballot_value);
-    uint shuffled = subgroupShuffle(10u, 8u);
-    uint shuffled_xor = subgroupShuffleXor(30u, 8u);
-    uint shuffled_up = subgroupShuffleUp(20u, 4u);
-    uint shuffled_down = subgroupShuffleDown(20u, 4u);
-    bool has_all = subgroupAll(true);
-    bool has_any = subgroupAny(true);
-    bool has_equal = subgroupAllEqual(true);
-    vec4 added = subgroupAdd(vec4(20.0));
-    ivec4 iadded = subgroupAdd(ivec4(20));
-    vec4 multiplied = subgroupMul(vec4(20.0));
-    ivec4 imultiplied = subgroupMul(ivec4(20));
-    vec4 lo = subgroupMin(vec4(20.0));
-    vec4 hi = subgroupMax(vec4(20.0));
-    ivec4 slo = subgroupMin(ivec4(20));
-    ivec4 shi = subgroupMax(ivec4(20));
-    uvec4 ulo = subgroupMin(uvec4(20u));
-    uvec4 uhi = subgroupMax(uvec4(20u));
-    uvec4 anded = subgroupAnd(ballot_value);
-    uvec4 ored = subgroupOr(ballot_value);
-    uvec4 xored = subgroupXor(ballot_value);
-    added = subgroupInclusiveAdd(added);
-    iadded = subgroupInclusiveAdd(iadded);
-    multiplied = subgroupInclusiveMul(multiplied);
-    imultiplied = subgroupInclusiveMul(imultiplied);
-    lo = subgroupInclusiveMin(lo);
-    hi = subgroupInclusiveMax(hi);
-    slo = subgroupInclusiveMin(slo);
-    shi = subgroupInclusiveMax(shi);
-    ulo = subgroupInclusiveMin(ulo);
-    uhi = subgroupInclusiveMax(uhi);
-    anded = subgroupInclusiveAnd(anded);
-    ored = subgroupInclusiveOr(ored);
-    xored = subgroupInclusiveXor(ored);
-    added = subgroupExclusiveAdd(lo);
-    added = subgroupExclusiveAdd(multiplied);
-    multiplied = subgroupExclusiveMul(multiplied);
-    iadded = subgroupExclusiveAdd(imultiplied);
-    imultiplied = subgroupExclusiveMul(imultiplied);
-    lo = subgroupExclusiveMin(lo);
-    hi = subgroupExclusiveMax(hi);
-    ulo = subgroupExclusiveMin(ulo);
-    uhi = subgroupExclusiveMax(uhi);
-    slo = subgroupExclusiveMin(slo);
-    shi = subgroupExclusiveMax(shi);
-    anded = subgroupExclusiveAnd(anded);
-    ored = subgroupExclusiveOr(ored);
-    xored = subgroupExclusiveXor(ored);
-    added = subgroupClusteredAdd(added, 4u);
-    multiplied = subgroupClusteredMul(multiplied, 4u);
-    iadded = subgroupClusteredAdd(iadded, 4u);
-    imultiplied = subgroupClusteredMul(imultiplied, 4u);
-    lo = subgroupClusteredMin(lo, 4u);
-    hi = subgroupClusteredMax(hi, 4u);
-    ulo = subgroupClusteredMin(ulo, 4u);
-    uhi = subgroupClusteredMax(uhi, 4u);
-    slo = subgroupClusteredMin(slo, 4u);
-    shi = subgroupClusteredMax(shi, 4u);
-    anded = subgroupClusteredAnd(anded, 4u);
-    ored = subgroupClusteredOr(ored, 4u);
-    xored = subgroupClusteredXor(xored, 4u);
-    vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
-    vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
-    vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
-    vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
-}
-
diff --git a/reference/opt/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag.vk b/reference/opt/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag.vk
new file mode 100644
index 00000000000..153164920f0
--- /dev/null
+++ b/reference/opt/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag.vk
@@ -0,0 +1,15 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    bool _15 = helperInvocationEXT();
+    demote;
+    if (!_15)
+    {
+        FragColor = vec4(1.0, 0.0, 0.0, 1.0);
+    }
+}
+
diff --git a/reference/opt/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag.vk b/reference/opt/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag.vk
new file mode 100644
index 00000000000..688a5800d12
--- /dev/null
+++ b/reference/opt/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag.vk
@@ -0,0 +1,9 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+void main()
+{
+    demote;
+    bool _9 = helperInvocationEXT();
+}
+
diff --git a/reference/opt/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk b/reference/opt/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk
index 294f908d140..891ed232e8b 100644
--- a/reference/opt/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk
+++ b/reference/opt/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk
@@ -1,19 +1,24 @@
 #version 450
 #extension GL_EXT_nonuniform_qualifier : require
+#extension GL_EXT_samplerless_texture_functions : require
 
 layout(set = 0, binding = 2, std140) uniform UBO
 {
     vec4 v[64];
 } ubos[];
 
-layout(set = 0, binding = 3, std430) readonly buffer SSBO
+layout(set = 0, binding = 3, std430) buffer SSBO
 {
+    uint counter;
     vec4 v[];
 } ssbos[];
 
 layout(set = 0, binding = 0) uniform texture2D uSamplers[];
 layout(set = 0, binding = 1) uniform sampler uSamps[];
 layout(set = 0, binding = 4) uniform sampler2D uCombinedSamplers[];
+layout(set = 0, binding = 0) uniform texture2DMS uSamplersMS[];
+layout(set = 0, binding = 5, r32f) uniform image2D uImages[];
+layout(set = 0, binding = 5, r32ui) uniform uimage2D uImagesU32[];
 
 layout(location = 0) flat in int vIndex;
 layout(location = 0) out vec4 FragColor;
@@ -23,9 +28,37 @@ void main()
 {
     int _22 = vIndex + 10;
     int _32 = vIndex + 40;
-    FragColor = texture(sampler2D(uSamplers[nonuniformEXT(_22)], uSamps[nonuniformEXT(_32)]), vUV);
-    FragColor = texture(uCombinedSamplers[nonuniformEXT(_22)], vUV);
-    FragColor += ubos[nonuniformEXT(vIndex + 20)].v[_32];
-    FragColor += ssbos[nonuniformEXT(vIndex + 50)].v[vIndex + 60];
+    FragColor = texture(nonuniformEXT(sampler2D(uSamplers[_22], uSamps[_32])), vUV);
+    int _49 = _22;
+    FragColor = texture(uCombinedSamplers[nonuniformEXT(_49)], vUV);
+    int _65 = vIndex + 20;
+    int _69 = _32;
+    FragColor += ubos[nonuniformEXT(_65)].v[_69];
+    int _83 = vIndex + 50;
+    int _88 = vIndex + 60;
+    FragColor += ssbos[nonuniformEXT(_83)].v[_88];
+    int _100 = vIndex + 70;
+    ssbos[nonuniformEXT(_88)].v[_100] = vec4(20.0);
+    ivec2 _111 = ivec2(vUV);
+    FragColor = texelFetch(uSamplers[nonuniformEXT(_49)], _111, 0);
+    int _116 = vIndex + 100;
+    uint _122 = atomicAdd(ssbos[_116].counter, 100u);
+    vec4 _147 = FragColor;
+    vec2 _149 = _147.xy + (textureQueryLod(nonuniformEXT(sampler2D(uSamplers[_22], uSamps[_32])), vUV) + textureQueryLod(uCombinedSamplers[nonuniformEXT(_49)], vUV));
+    FragColor.x = _149.x;
+    FragColor.y = _149.y;
+    FragColor.x += float(textureQueryLevels(uSamplers[nonuniformEXT(_65)]));
+    FragColor.y += float(textureSamples(uSamplersMS[nonuniformEXT(_65)]));
+    vec4 _189 = FragColor;
+    vec2 _191 = _189.xy + vec2(textureSize(uSamplers[nonuniformEXT(_65)], 0));
+    FragColor.x = _191.x;
+    FragColor.y = _191.y;
+    FragColor += imageLoad(uImages[nonuniformEXT(_83)], _111);
+    vec4 _218 = FragColor;
+    vec2 _220 = _218.xy + vec2(imageSize(uImages[nonuniformEXT(_65)]));
+    FragColor.x = _220.x;
+    FragColor.y = _220.y;
+    imageStore(uImages[nonuniformEXT(_88)], _111, vec4(50.0));
+    uint _248 = imageAtomicAdd(uImagesU32[nonuniformEXT(_100)], _111, 40u);
 }
 
diff --git a/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag b/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
index df2994efb92..f77b448cdc8 100644
--- a/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
+++ b/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
@@ -13,7 +13,11 @@ layout(location = 0) out vec4 FragColor;
 
 void main()
 {
-    vec2 _95 = (vTex + (vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[1], 0)))) + (vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[2], 1)));
+    highp vec2 _76 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[1], 0));
+    vec2 mp_copy_76 = _76;
+    highp vec2 _86 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[2], 1));
+    vec2 mp_copy_86 = _86;
+    vec2 _95 = (vTex + mp_copy_76) + mp_copy_86;
     FragColor = ((((texture(SPIRV_Cross_CombineduTextureuSampler[2], _95) + texture(SPIRV_Cross_CombineduTextureuSampler[1], _95)) + texture(SPIRV_Cross_CombineduTextureuSampler[1], _95)) + texture(SPIRV_Cross_CombineduTextureArrayuSampler[3], vTex3)) + texture(SPIRV_Cross_CombineduTextureCubeuSampler[1], vTex3)) + texture(SPIRV_Cross_CombineduTexture3DuSampler[2], vTex3);
 }
 
diff --git a/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk b/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk
index d275a0f4086..7a0c428d193 100644
--- a/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk
+++ b/reference/opt/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk
@@ -14,7 +14,11 @@ layout(location = 0) out vec4 FragColor;
 
 void main()
 {
-    vec2 _95 = (vTex + (vec2(1.0) / vec2(textureSize(sampler2D(uTexture[1], uSampler), 0)))) + (vec2(1.0) / vec2(textureSize(sampler2D(uTexture[2], uSampler), 1)));
+    highp vec2 _76 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture[1], uSampler), 0));
+    vec2 mp_copy_76 = _76;
+    highp vec2 _86 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture[2], uSampler), 1));
+    vec2 mp_copy_86 = _86;
+    vec2 _95 = (vTex + mp_copy_76) + mp_copy_86;
     FragColor = ((((texture(sampler2D(uTexture[2], uSampler), _95) + texture(sampler2D(uTexture[1], uSampler), _95)) + texture(sampler2D(uTexture[1], uSampler), _95)) + texture(sampler2DArray(uTextureArray[3], uSampler), vTex3)) + texture(samplerCube(uTextureCube[1], uSampler), vTex3)) + texture(sampler3D(uTexture3D[2], uSampler), vTex3);
 }
 
diff --git a/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag b/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag
index aad1e43662b..c664bd55b17 100644
--- a/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag
+++ b/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag
@@ -13,7 +13,11 @@ layout(location = 0) out vec4 FragColor;
 
 void main()
 {
-    vec2 _73 = (vTex + (vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 0)))) + (vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 1)));
+    highp vec2 _54 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 0));
+    vec2 mp_copy_54 = _54;
+    highp vec2 _64 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 1));
+    vec2 mp_copy_64 = _64;
+    vec2 _73 = (vTex + mp_copy_54) + mp_copy_64;
     FragColor = (((texture(SPIRV_Cross_CombineduTextureuSampler, _73) + texture(SPIRV_Cross_CombineduTextureuSampler, _73)) + texture(SPIRV_Cross_CombineduTextureArrayuSampler, vTex3)) + texture(SPIRV_Cross_CombineduTextureCubeuSampler, vTex3)) + texture(SPIRV_Cross_CombineduTexture3DuSampler, vTex3);
 }
 
diff --git a/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk b/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk
index b79374aba98..9fcd3252758 100644
--- a/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk
+++ b/reference/opt/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk
@@ -14,7 +14,11 @@ layout(location = 0) out vec4 FragColor;
 
 void main()
 {
-    vec2 _73 = (vTex + (vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 0)))) + (vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 1)));
+    highp vec2 _54 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 0));
+    vec2 mp_copy_54 = _54;
+    highp vec2 _64 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 1));
+    vec2 mp_copy_64 = _64;
+    vec2 _73 = (vTex + mp_copy_54) + mp_copy_64;
     FragColor = (((texture(sampler2D(uTexture, uSampler), _73) + texture(sampler2D(uTexture, uSampler), _73)) + texture(sampler2DArray(uTextureArray, uSampler), vTex3)) + texture(samplerCube(uTextureCube, uSampler), vTex3)) + texture(sampler3D(uTexture3D, uSampler), vTex3);
 }
 
diff --git a/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk b/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk
index d09930f3ad7..512bc915e6a 100644
--- a/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk
+++ b/reference/opt/shaders/vulkan/frag/shader-arithmetic-8bit.nocompat.vk.frag.vk
@@ -1,6 +1,4 @@
 #version 450
-#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
-#extension GL_EXT_shader_16bit_storage : require
 #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
 #extension GL_EXT_shader_8bit_storage : require
 
@@ -28,42 +26,18 @@ layout(location = 1) out uvec4 FragColorUint;
 
 void main()
 {
-    int16_t _196 = 10s;
-    int _197 = 20;
-    i8vec2 _198 = unpack8(_196);
-    i8vec4 _199 = unpack8(_197);
-    _196 = pack16(_198);
-    _197 = pack32(_199);
-    ssbo.i8[0] = _199.x;
-    ssbo.i8[1] = _199.y;
-    ssbo.i8[2] = _199.z;
-    ssbo.i8[3] = _199.w;
-    uint16_t _220 = 10us;
-    uint _221 = 20u;
-    u8vec2 _222 = unpack8(_220);
-    u8vec4 _223 = unpack8(_221);
-    _220 = pack16(_222);
-    _221 = pack32(_223);
-    ssbo.u8[0] = _223.x;
-    ssbo.u8[1] = _223.y;
-    ssbo.u8[2] = _223.z;
-    ssbo.u8[3] = _223.w;
-    i8vec4 _246 = i8vec4(vColor);
-    i8vec4 _244 = _246;
-    _244 += i8vec4(registers.i8);
-    _244 += i8vec4(-40);
-    _244 += i8vec4(-50);
-    _244 += i8vec4(int8_t(10), int8_t(20), int8_t(30), int8_t(40));
-    _244 += i8vec4(ssbo.i8[4]);
-    _244 += i8vec4(ubo.i8);
-    FragColorInt = ivec4(_244);
-    u8vec4 _271 = u8vec4(_246);
-    _271 += u8vec4(registers.u8);
-    _271 += u8vec4(216);
-    _271 += u8vec4(206);
-    _271 += u8vec4(uint8_t(10), uint8_t(20), uint8_t(30), uint8_t(40));
-    _271 += u8vec4(ssbo.u8[4]);
-    _271 += u8vec4(ubo.u8);
-    FragColorUint = uvec4(_271);
+    i8vec4 _204 = unpack8(20);
+    ssbo.i8[0] = _204.x;
+    ssbo.i8[1] = _204.y;
+    ssbo.i8[2] = _204.z;
+    ssbo.i8[3] = _204.w;
+    u8vec4 _229 = unpack8(20u);
+    ssbo.u8[0] = _229.x;
+    ssbo.u8[1] = _229.y;
+    ssbo.u8[2] = _229.z;
+    ssbo.u8[3] = _229.w;
+    i8vec4 _249 = i8vec4(vColor);
+    FragColorInt = ivec4((((((_249 + i8vec4(registers.i8)) + i8vec4(-40)) + i8vec4(-50)) + i8vec4(int8_t(10), int8_t(20), int8_t(30), int8_t(40))) + i8vec4(ssbo.i8[4])) + i8vec4(ubo.i8));
+    FragColorUint = uvec4((((((u8vec4(_249) + u8vec4(registers.u8)) + u8vec4(216)) + u8vec4(206)) + u8vec4(uint8_t(10), uint8_t(20), uint8_t(30), uint8_t(40))) + u8vec4(ssbo.u8[4])) + u8vec4(ubo.u8));
 }
 
diff --git a/reference/opt/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk b/reference/opt/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk
index 34bfea02604..04c4062a6c9 100644
--- a/reference/opt/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk
+++ b/reference/opt/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk
@@ -8,6 +8,7 @@ layout(location = 0) out float FragColor;
 
 void main()
 {
-    FragColor = float(f);
+    float _17 = float(f);
+    FragColor = _17;
 }
 
diff --git a/reference/opt/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit.vk b/reference/opt/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit.vk
new file mode 100644
index 00000000000..2f7fbc1d953
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit.vk
@@ -0,0 +1,17 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+    if (payload > 0.0)
+    {
+        ignoreIntersectionEXT;
+    }
+    else
+    {
+        terminateRayEXT;
+    }
+}
+
diff --git a/reference/opt/shaders/vulkan/rahit/terminators.nocompat.vk.rahit.vk b/reference/opt/shaders/vulkan/rahit/terminators.nocompat.vk.rahit.vk
new file mode 100644
index 00000000000..9b9e34b3250
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rahit/terminators.nocompat.vk.rahit.vk
@@ -0,0 +1,17 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+rayPayloadInNV float payload;
+
+void main()
+{
+    if (payload > 0.0)
+    {
+        ignoreIntersectionNV();
+    }
+    else
+    {
+        terminateRayNV();
+    }
+}
+
diff --git a/reference/opt/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall.vk b/reference/opt/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall.vk
new file mode 100644
index 00000000000..5adfac164fa
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) callableDataInEXT float c;
+
+void main()
+{
+    executeCallableEXT(10u, 0);
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..b6c1876d313
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,24 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+struct Foo2
+{
+    float a;
+    float b;
+};
+
+layout(location = 0) rayPayloadInEXT Foo payload;
+hitAttributeEXT Foo2 hit;
+
+void main()
+{
+    payload.a = hit.a;
+    payload.b = hit.b;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..614a04d95e7
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit.vk
@@ -0,0 +1,24 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+struct Foo2
+{
+    float a;
+    float b;
+};
+
+layout(location = 0) rayPayloadInNV Foo payload;
+hitAttributeNV Foo2 hit;
+
+void main()
+{
+    payload.a = hit.a;
+    payload.b = hit.b;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..b6c1876d313
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,24 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+struct Foo2
+{
+    float a;
+    float b;
+};
+
+layout(location = 0) rayPayloadInEXT Foo payload;
+hitAttributeEXT Foo2 hit;
+
+void main()
+{
+    payload.a = hit.a;
+    payload.b = hit.b;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..614a04d95e7
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit.vk
@@ -0,0 +1,24 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+struct Foo2
+{
+    float a;
+    float b;
+};
+
+layout(location = 0) rayPayloadInNV Foo payload;
+hitAttributeNV Foo2 hit;
+
+void main()
+{
+    payload.a = hit.a;
+    payload.b = hit.b;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..eeccd3bb092
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,11 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec2 payload;
+hitAttributeEXT vec2 hit;
+
+void main()
+{
+    payload = hit;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..908d96344f3
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit.vk
@@ -0,0 +1,11 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec2 payload;
+hitAttributeNV vec2 hit;
+
+void main()
+{
+    payload = hit;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..a51e6b088f3
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,17 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+layout(location = 0) rayPayloadInEXT Foo payload;
+hitAttributeEXT Foo hit;
+
+void main()
+{
+    payload = hit;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..133bdfc1d90
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit.vk
@@ -0,0 +1,17 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+layout(location = 0) rayPayloadInNV Foo payload;
+hitAttributeNV Foo hit;
+
+void main()
+{
+    payload = hit;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..e4e0103ddb5
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+    payload = gl_HitKindEXT;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..64f79a8dce0
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+    payload = gl_HitKindNV;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..e94e3323c98
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+    payload = gl_RayTmaxEXT;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..9004a00c40e
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV float payload;
+
+void main()
+{
+    payload = gl_HitTNV;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..a013baa11d5
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+    payload = gl_IncomingRayFlagsEXT;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..d17ab8ce76c
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+    payload = gl_IncomingRayFlagsNV;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..e28af5d2527
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+    payload = uint(gl_InstanceCustomIndexEXT);
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..531a1fc2845
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+    payload = uint(gl_InstanceCustomIndexNV);
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..0413e0d234a
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+    payload = uint(gl_InstanceID);
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..ff551db7c9d
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+    payload = uint(gl_InstanceID);
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..237d4790e55
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+    payload = gl_ObjectRayDirectionEXT;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..01afa0e067a
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+    payload = gl_ObjectRayDirectionNV;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..5739ac09ff5
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+    payload = gl_ObjectRayOriginEXT;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..a49e17a1738
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+    payload = gl_ObjectRayOriginNV;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..7922e1efbf4
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+    payload = gl_ObjectToWorldEXT * vec4(payload, 1.0);
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..fc2c5ed0c2c
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+    payload = gl_ObjectToWorldNV * vec4(payload, 1.0);
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..0bde78724c7
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,15 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+struct Payload
+{
+    vec4 a;
+};
+
+layout(location = 0) rayPayloadInEXT Payload payload;
+
+void main()
+{
+    payload.a = vec4(10.0);
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/payloads.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/payloads.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..6d865f7a195
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/payloads.nocompat.vk.rchit.vk
@@ -0,0 +1,15 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+struct Payload
+{
+    vec4 a;
+};
+
+layout(location = 0) rayPayloadInNV Payload payload;
+
+void main()
+{
+    payload.a = vec4(10.0);
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..667c015e8d6
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+    payload = uint(gl_PrimitiveID);
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..d3b0ef19429
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+    payload = uint(gl_PrimitiveID);
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..e94e3323c98
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+    payload = gl_RayTmaxEXT;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..769c96ad6b7
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV float payload;
+
+void main()
+{
+    payload = gl_RayTmaxNV;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..04b89549508
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+    payload = gl_RayTminEXT;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..2709899a13a
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV float payload;
+
+void main()
+{
+    payload = gl_RayTminNV;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..05af948b379
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+    payload = 1.0 + float(gl_InstanceID);
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk
index 547b9cd51a5..103fd66b801 100644
--- a/reference/opt/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk
+++ b/reference/opt/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk
@@ -5,6 +5,6 @@ layout(location = 0) rayPayloadInNV float payload;
 
 void main()
 {
-    payload = 1.0;
+    payload = 1.0 + float(gl_InstanceID);
 }
 
diff --git a/reference/opt/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..68ba2bafa54
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+    payload = gl_WorldRayDirectionEXT;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..4acf03e0649
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+    payload = gl_WorldRayDirectionNV;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..a5c6766e055
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+    payload = gl_WorldRayOriginEXT;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..70241f23620
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+    payload = gl_WorldRayOriginNV;
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..309ca4c6f17
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+    payload = gl_WorldToObjectEXT * vec4(payload, 1.0);
+}
+
diff --git a/reference/opt/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit.vk b/reference/opt/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..0b93e38acd1
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+    payload = gl_WorldToObjectNV * vec4(payload, 1.0);
+}
+
diff --git a/reference/opt/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen.vk b/reference/opt/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen.vk
new file mode 100644
index 00000000000..335f476dc50
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen.vk
@@ -0,0 +1,15 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(push_constant, std430) uniform Registers
+{
+    uvec2 ptr;
+} _19;
+
+layout(location = 0) rayPayloadEXT vec4 payload;
+
+void main()
+{
+    traceRayEXT(accelerationStructureEXT(_19.ptr), 1u, 255u, 0u, 0u, 0u, vec3(0.0), 0.0, vec3(0.0, 0.0, -1.0), 100.0, 0);
+}
+
diff --git a/reference/opt/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen.vk
new file mode 100644
index 00000000000..2cb00f26f1d
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen.vk
@@ -0,0 +1,15 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadEXT vec4 payload;
+layout(location = 1) callableDataEXT float blend;
+layout(set = 0, binding = 1, rgba32f) uniform writeonly image2D image;
+
+void main()
+{
+    traceRayEXT(as, 1u, 255u, 0u, 0u, 0u, vec3(0.0), 0.0, vec3(0.0, 0.0, -1.0), 100.0, 0);
+    executeCallableEXT(0u, 1);
+    imageStore(image, ivec2(gl_LaunchIDEXT.xy), payload + vec4(blend));
+}
+
diff --git a/reference/opt/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..1614c49626e
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform writeonly image2D uImage;
+
+void main()
+{
+    imageStore(uImage, ivec2(gl_LaunchIDEXT.xy), vec4(1.0));
+}
+
diff --git a/reference/opt/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..f907e6fd606
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform writeonly image2D uImage;
+
+void main()
+{
+    imageStore(uImage, ivec2(gl_LaunchIDNV.xy), vec4(1.0));
+}
+
diff --git a/reference/opt/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..240e93daa48
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform writeonly image2D uImage;
+
+void main()
+{
+    imageStore(uImage, ivec2(gl_LaunchSizeEXT.xy) - ivec2(1), vec4(1.0));
+}
+
diff --git a/reference/opt/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..08992c63194
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform writeonly image2D uImage;
+
+void main()
+{
+    imageStore(uImage, ivec2(gl_LaunchSizeNV.xy) - ivec2(1), vec4(1.0));
+}
+
diff --git a/reference/opt/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..434eadf2166
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen.vk
@@ -0,0 +1,31 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+struct Payload
+{
+    float a;
+    float b;
+};
+
+struct Block
+{
+    float a;
+    float b;
+    Payload c;
+    Payload d;
+};
+
+layout(set = 0, binding = 1) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadEXT Payload payload2;
+layout(location = 1) rayPayloadEXT float payload1;
+layout(location = 2) rayPayloadEXT Block _71;
+layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image;
+
+void main()
+{
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(1.0, 0.0, 0.0), 0.0, vec3(0.0, 1.0, 0.0), 1000.0, 1);
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(1.0, 0.0, 0.0), 0.0, vec3(0.0, 1.0, 0.0), 1000.0, 0);
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(1.0, 0.0, 0.0), 0.0, vec3(0.0, 1.0, 0.0), 1000.0, 2);
+    imageStore(image, ivec2(gl_LaunchIDEXT.xy), (vec4(payload1) + (vec4(payload2.a) + vec4(payload2.b))) + vec4(((((_71.a + _71.b) + _71.c.a) + _71.c.b) + _71.d.a) + _71.d.b));
+}
+
diff --git a/reference/opt/shaders/vulkan/rgen/payloads.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/payloads.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..8212fa6484b
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rgen/payloads.nocompat.vk.rgen.vk
@@ -0,0 +1,31 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+struct Payload
+{
+    float a;
+    float b;
+};
+
+struct Block
+{
+    float a;
+    float b;
+    Payload c;
+    Payload d;
+};
+
+layout(set = 0, binding = 1) uniform accelerationStructureNV as;
+layout(location = 1) rayPayloadNV Payload payload2;
+layout(location = 0) rayPayloadNV float payload1;
+layout(location = 2) rayPayloadNV Block _71;
+layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image;
+
+void main()
+{
+    traceNV(as, 0u, 255u, 0u, 1u, 0u, vec3(1.0, 0.0, 0.0), 0.0, vec3(0.0, 1.0, 0.0), 1000.0, 0);
+    traceNV(as, 0u, 255u, 0u, 1u, 0u, vec3(1.0, 0.0, 0.0), 0.0, vec3(0.0, 1.0, 0.0), 1000.0, 1);
+    traceNV(as, 0u, 255u, 0u, 1u, 0u, vec3(1.0, 0.0, 0.0), 0.0, vec3(0.0, 1.0, 0.0), 1000.0, 2);
+    imageStore(image, ivec2(gl_LaunchIDNV.xy), (vec4(payload1) + (vec4(payload2.a) + vec4(payload2.b))) + vec4(((((_71.a + _71.b) + _71.c.a) + _71.c.b) + _71.d.a) + _71.d.b));
+}
+
diff --git a/reference/opt/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..25b8f2877a5
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen.vk
@@ -0,0 +1,13 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 1) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadEXT float payload;
+
+void main()
+{
+    vec2 _57 = vec2(gl_LaunchIDEXT.xy);
+    vec2 _61 = vec2(gl_LaunchSizeEXT.xy);
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(_57.x / _61.x, _57.y / _61.y, 1.0), 0.0, vec3(0.0, 0.0, -1.0), 1000.0, 0);
+}
+
diff --git a/reference/opt/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..d8814465958
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen.vk
@@ -0,0 +1,15 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 1) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadEXT float payload;
+layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image;
+
+void main()
+{
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0), 0.0, vec3(0.0, 0.0, -1.0), 1000.0, 0);
+    vec4 _68 = vec4(0.0, 0.0, 0.0, 1.0);
+    _68.y = payload;
+    imageStore(image, ivec2(gl_LaunchIDEXT.xy), _68);
+}
+
diff --git a/reference/opt/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen.vk b/reference/opt/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..3056e8ad281
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen.vk
@@ -0,0 +1,17 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(shaderRecordEXT, std430) buffer sbt
+{
+    vec3 direction;
+    float tmax;
+} _20;
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadEXT float payload;
+
+void main()
+{
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(0.0), 0.0, _20.direction, _20.tmax, 0);
+}
+
diff --git a/reference/opt/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint.vk b/reference/opt/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint.vk
new file mode 100644
index 00000000000..f9eb7335d83
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint.vk
@@ -0,0 +1,8 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+void main()
+{
+    bool _16 = reportIntersectionEXT(0.5, 10u);
+}
+
diff --git a/reference/opt/shaders/vulkan/rint/report-intersection.nocompat.vk.rint.vk b/reference/opt/shaders/vulkan/rint/report-intersection.nocompat.vk.rint.vk
new file mode 100644
index 00000000000..56873aff06f
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rint/report-intersection.nocompat.vk.rint.vk
@@ -0,0 +1,8 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+void main()
+{
+    bool _16 = reportIntersectionNV(0.5, 10u);
+}
+
diff --git a/reference/opt/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss.vk b/reference/opt/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss.vk
new file mode 100644
index 00000000000..c055a268144
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+    payload = 0.0;
+}
+
diff --git a/reference/opt/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss.vk b/reference/opt/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss.vk
new file mode 100644
index 00000000000..7e791266163
--- /dev/null
+++ b/reference/opt/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss.vk
@@ -0,0 +1,11 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadInEXT float p;
+
+void main()
+{
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0), 0.0, vec3(0.0, 0.0, -1.0), 1000.0, 0);
+}
+
diff --git a/reference/opt/shaders/vulkan/vert/device-group.nocompat.vk.vert.vk b/reference/opt/shaders/vulkan/vert/device-group.nocompat.vk.vert.vk
new file mode 100644
index 00000000000..9cadcdb6dce
--- /dev/null
+++ b/reference/opt/shaders/vulkan/vert/device-group.nocompat.vk.vert.vk
@@ -0,0 +1,8 @@
+#version 450
+#extension GL_EXT_device_group : require
+
+void main()
+{
+    gl_Position = vec4(float(gl_DeviceIndex));
+}
+
diff --git a/reference/opt/shaders/vulkan/vert/small-storage.vk.vert b/reference/opt/shaders/vulkan/vert/small-storage.vk.vert
index b3aafc8d8c1..2c4beb71e5f 100644
--- a/reference/opt/shaders/vulkan/vert/small-storage.vk.vert
+++ b/reference/opt/shaders/vulkan/vert/small-storage.vk.vert
@@ -1,10 +1,20 @@
 #version 450
-#if defined(GL_AMD_gpu_shader_int16)
+#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
+#elif defined(GL_AMD_gpu_shader_int16)
 #extension GL_AMD_gpu_shader_int16 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
 #else
 #error No extension available for Int16.
 #endif
+#if defined(GL_EXT_shader_explicit_arithmetic_types_int8)
 #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for Int8.
+#endif
 #if defined(GL_AMD_gpu_shader_half_float)
 #extension GL_AMD_gpu_shader_half_float : require
 #elif defined(GL_NV_gpu_shader5)
diff --git a/reference/opt/shaders/vulkan/vert/vulkan-vertex.vk.vert b/reference/opt/shaders/vulkan/vert/vulkan-vertex.vk.vert
index 60ba1882f82..d939aa625c5 100644
--- a/reference/opt/shaders/vulkan/vert/vulkan-vertex.vk.vert
+++ b/reference/opt/shaders/vulkan/vert/vulkan-vertex.vk.vert
@@ -1,6 +1,13 @@
 #version 310 es
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
 
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
 uniform int SPIRV_Cross_BaseInstance;
+#endif
 
 void main()
 {
diff --git a/reference/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp
new file mode 100644
index 00000000000..986cc6289f5
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp
@@ -0,0 +1,75 @@
+struct Baz
+{
+    float c;
+};
+
+struct Bar
+{
+    float d[2][4];
+    Baz baz[2];
+};
+
+struct Foo
+{
+    column_major float2x2 a;
+    float2 b;
+    Bar c[5];
+};
+
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer _10 : register(u0);
+
+void comp_main()
+{
+    Foo _64;
+    _64.a = asfloat(uint2x2(_10.Load(0), _10.Load(8), _10.Load(4), _10.Load(12)));
+    _64.b = asfloat(_10.Load2(16));
+    [unroll]
+    for (int _0ident = 0; _0ident < 5; _0ident++)
+    {
+        [unroll]
+        for (int _1ident = 0; _1ident < 2; _1ident++)
+        {
+            [unroll]
+            for (int _2ident = 0; _2ident < 4; _2ident++)
+            {
+                _64.c[_0ident].d[_1ident][_2ident] = asfloat(_10.Load(_2ident * 4 + _1ident * 16 + _0ident * 40 + 24));
+            }
+        }
+        [unroll]
+        for (int _3ident = 0; _3ident < 2; _3ident++)
+        {
+            _64.c[_0ident].baz[_3ident].c = asfloat(_10.Load(_3ident * 4 + _0ident * 40 + 56));
+        }
+    }
+    _10.Store(224, asuint(_64.a[0].x));
+    _10.Store(228, asuint(_64.a[1].x));
+    _10.Store(232, asuint(_64.a[0].y));
+    _10.Store(236, asuint(_64.a[1].y));
+    _10.Store2(240, asuint(_64.b));
+    [unroll]
+    for (int _4ident = 0; _4ident < 5; _4ident++)
+    {
+        [unroll]
+        for (int _5ident = 0; _5ident < 2; _5ident++)
+        {
+            [unroll]
+            for (int _6ident = 0; _6ident < 4; _6ident++)
+            {
+                _10.Store(_6ident * 4 + _5ident * 16 + _4ident * 40 + 248, asuint(_64.c[_4ident].d[_5ident][_6ident]));
+            }
+        }
+        [unroll]
+        for (int _7ident = 0; _7ident < 2; _7ident++)
+        {
+            _10.Store(_7ident * 4 + _4ident * 40 + 280, asuint(_64.c[_4ident].baz[_7ident].c));
+        }
+    }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp
new file mode 100644
index 00000000000..b75157162d9
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp
@@ -0,0 +1,22 @@
+struct T
+{
+    float c;
+};
+
+static const T _18 = { 40.0f };
+
+RWByteAddressBuffer _7 : register(u0);
+RWByteAddressBuffer _10 : register(u1);
+
+void comp_main()
+{
+    T v = _18;
+    _7.Store(40, asuint(v.c));
+    _10.Store(480, asuint(v.c));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp
new file mode 100644
index 00000000000..4f6a3e34c2c
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp
@@ -0,0 +1,18 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer _5 : register(u0);
+
+void comp_main()
+{
+    uint _20;
+    _5.InterlockedAdd(4, 0, _20);
+    uint c = _20;
+    uint _23;
+    _5.InterlockedExchange(0, c, _23);
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/bitfield-signed-operations.asm.comp
new file mode 100644
index 00000000000..ebc431b3edc
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/comp/bitfield-signed-operations.asm.comp
@@ -0,0 +1,105 @@
+RWByteAddressBuffer _3 : register(u0);
+
+uint spvBitfieldInsert(uint Base, uint Insert, uint Offset, uint Count)
+{
+    uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));
+    return (Base & ~Mask) | ((Insert << Offset) & Mask);
+}
+
+uint2 spvBitfieldInsert(uint2 Base, uint2 Insert, uint Offset, uint Count)
+{
+    uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));
+    return (Base & ~Mask) | ((Insert << Offset) & Mask);
+}
+
+uint3 spvBitfieldInsert(uint3 Base, uint3 Insert, uint Offset, uint Count)
+{
+    uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));
+    return (Base & ~Mask) | ((Insert << Offset) & Mask);
+}
+
+uint4 spvBitfieldInsert(uint4 Base, uint4 Insert, uint Offset, uint Count)
+{
+    uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));
+    return (Base & ~Mask) | ((Insert << Offset) & Mask);
+}
+
+uint spvBitfieldUExtract(uint Base, uint Offset, uint Count)
+{
+    uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);
+    return (Base >> Offset) & Mask;
+}
+
+uint2 spvBitfieldUExtract(uint2 Base, uint Offset, uint Count)
+{
+    uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);
+    return (Base >> Offset) & Mask;
+}
+
+uint3 spvBitfieldUExtract(uint3 Base, uint Offset, uint Count)
+{
+    uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);
+    return (Base >> Offset) & Mask;
+}
+
+uint4 spvBitfieldUExtract(uint4 Base, uint Offset, uint Count)
+{
+    uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);
+    return (Base >> Offset) & Mask;
+}
+
+int spvBitfieldSExtract(int Base, int Offset, int Count)
+{
+    int Mask = Count == 32 ? -1 : ((1 << Count) - 1);
+    int Masked = (Base >> Offset) & Mask;
+    int ExtendShift = (32 - Count) & 31;
+    return (Masked << ExtendShift) >> ExtendShift;
+}
+
+int2 spvBitfieldSExtract(int2 Base, int Offset, int Count)
+{
+    int Mask = Count == 32 ? -1 : ((1 << Count) - 1);
+    int2 Masked = (Base >> Offset) & Mask;
+    int ExtendShift = (32 - Count) & 31;
+    return (Masked << ExtendShift) >> ExtendShift;
+}
+
+int3 spvBitfieldSExtract(int3 Base, int Offset, int Count)
+{
+    int Mask = Count == 32 ? -1 : ((1 << Count) - 1);
+    int3 Masked = (Base >> Offset) & Mask;
+    int ExtendShift = (32 - Count) & 31;
+    return (Masked << ExtendShift) >> ExtendShift;
+}
+
+int4 spvBitfieldSExtract(int4 Base, int Offset, int Count)
+{
+    int Mask = Count == 32 ? -1 : ((1 << Count) - 1);
+    int4 Masked = (Base >> Offset) & Mask;
+    int ExtendShift = (32 - Count) & 31;
+    return (Masked << ExtendShift) >> ExtendShift;
+}
+
+void comp_main()
+{
+    int4 _19 = int4(_3.Load4(0));
+    uint4 _20 = _3.Load4(16);
+    _3.Store4(0, uint4(countbits(_19)));
+    _3.Store4(16, uint4(countbits(_19)));
+    _3.Store4(0, uint4(int4(countbits(_20))));
+    _3.Store4(16, countbits(_20));
+    _3.Store4(0, uint4(reversebits(_19)));
+    _3.Store4(16, reversebits(_20));
+    _3.Store4(0, uint4(spvBitfieldSExtract(_19, 1, 11u)));
+    _3.Store4(16, spvBitfieldSExtract(_20, 11u, 1));
+    _3.Store4(0, uint4(spvBitfieldUExtract(_19, 1, 11u)));
+    _3.Store4(16, spvBitfieldUExtract(_20, 11u, 1));
+    _3.Store4(0, uint4(int4(spvBitfieldInsert(_19, _19.wzyx, 1, 11u))));
+    _3.Store4(16, spvBitfieldInsert(_20, _20.wzyx, 11u, 1));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/comp/bitscan.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/bitscan.asm.comp
new file mode 100644
index 00000000000..f8a5fb6fa34
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/comp/bitscan.asm.comp
@@ -0,0 +1,25 @@
+RWByteAddressBuffer _4 : register(u0);
+
+void comp_main()
+{
+    uint4 _19 = _4.Load4(0);
+    int4 _20 = int4(_4.Load4(16));
+    _4.Store4(0, firstbitlow(_19));
+    _4.Store4(16, uint4(int4(firstbitlow(_19))));
+    _4.Store4(0, uint4(firstbitlow(_20)));
+    _4.Store4(16, uint4(firstbitlow(_20)));
+    _4.Store4(0, firstbithigh(_19));
+    _4.Store4(16, uint4(int4(firstbithigh(_19))));
+    _4.Store4(0, firstbithigh(uint4(_20)));
+    _4.Store4(16, uint4(int4(firstbithigh(uint4(_20)))));
+    _4.Store4(0, uint4(firstbithigh(int4(_19))));
+    _4.Store4(16, uint4(firstbithigh(int4(_19))));
+    _4.Store4(0, uint4(firstbithigh(_20)));
+    _4.Store4(16, uint4(firstbithigh(_20)));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/comp/atomic-increment.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/buffer-atomic-nonuniform.asm.sm51.nonuniformresource.comp
similarity index 53%
rename from reference/opt/shaders-hlsl/comp/atomic-increment.asm.comp
rename to reference/shaders-hlsl-no-opt/asm/comp/buffer-atomic-nonuniform.asm.sm51.nonuniformresource.comp
index f2338f22518..0b6d93e48f8 100644
--- a/reference/opt/shaders-hlsl/comp/atomic-increment.asm.comp
+++ b/reference/shaders-hlsl-no-opt/asm/comp/buffer-atomic-nonuniform.asm.sm51.nonuniformresource.comp
@@ -1,5 +1,4 @@
-RWByteAddressBuffer u0_counter : register(u1);
-RWBuffer<uint> u0 : register(u0);
+RWByteAddressBuffer ssbos[] : register(u0, space0);
 
 static uint3 gl_GlobalInvocationID;
 struct SPIRV_Cross_Input
@@ -9,12 +8,12 @@ struct SPIRV_Cross_Input
 
 void comp_main()
 {
-    uint _29;
-    u0_counter.InterlockedAdd(0, 1, _29);
-    u0[uint(asint(asfloat(_29))) + 0u] = uint(int(gl_GlobalInvocationID.x)).x;
+    uint _24 = gl_GlobalInvocationID.z;
+    uint _25;
+    ssbos[NonUniformResourceIndex(_24)].InterlockedAdd(0, 1u, _25);
 }
 
-[numthreads(4, 1, 1)]
+[numthreads(1, 1, 1)]
 void main(SPIRV_Cross_Input stage_input)
 {
     gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
diff --git a/reference/shaders-hlsl-no-opt/asm/comp/constant-composite-undef.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/constant-composite-undef.asm.comp
index 4851d21e16b..a9eab1ccafc 100644
--- a/reference/shaders-hlsl-no-opt/asm/comp/constant-composite-undef.asm.comp
+++ b/reference/shaders-hlsl-no-opt/asm/comp/constant-composite-undef.asm.comp
@@ -1,6 +1,6 @@
-RWByteAddressBuffer block : register(u0);
+static float _15;
 
-float _15;
+RWByteAddressBuffer block : register(u0);
 
 void comp_main()
 {
diff --git a/reference/shaders-hlsl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp
new file mode 100644
index 00000000000..d3dc5337530
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp
@@ -0,0 +1,27 @@
+static const uint3 gl_WorkGroupSize = uint3(4u, 4u, 1u);
+
+static const int indexable[4] = { 0, 1, 2, 3 };
+static const int indexable_1[4] = { 4, 5, 6, 7 };
+
+RWByteAddressBuffer _6 : register(u0);
+
+static uint3 gl_LocalInvocationID;
+static uint3 gl_GlobalInvocationID;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_LocalInvocationID : SV_GroupThreadID;
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+};
+
+void comp_main()
+{
+    _6.Store(gl_GlobalInvocationID.x * 4 + 0, uint(indexable[gl_LocalInvocationID.x] + indexable_1[gl_LocalInvocationID.y]));
+}
+
+[numthreads(4, 4, 1)]
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp
new file mode 100644
index 00000000000..a53efc4f7fe
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp
@@ -0,0 +1,11 @@
+static const uint3 gl_WorkGroupSize = uint3(64u, 1u, 1u);
+
+void comp_main()
+{
+}
+
+[numthreads(64, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.fxconly.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.fxconly.asm.comp
new file mode 100644
index 00000000000..b1232635eac
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.fxconly.asm.comp
@@ -0,0 +1,31 @@
+struct _8
+{
+    float _m0;
+    float _m1;
+};
+
+struct _15
+{
+    float _m0;
+    int _m1;
+};
+
+RWByteAddressBuffer _4 : register(u0);
+
+void comp_main()
+{
+    _8 _23;
+    _23._m0 = modf(20.0f, _23._m1);
+    _15 _24;
+    _24._m0 = frexp(40.0f, _24._m1);
+    _4.Store(0, asuint(_23._m0));
+    _4.Store(0, asuint(_23._m1));
+    _4.Store(0, asuint(_24._m0));
+    _4.Store(4, uint(_24._m1));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/comp/atomic-decrement.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/image-atomic-nonuniform.asm.sm51.nonuniformresource.comp
similarity index 53%
rename from reference/opt/shaders-hlsl/comp/atomic-decrement.asm.comp
rename to reference/shaders-hlsl-no-opt/asm/comp/image-atomic-nonuniform.asm.sm51.nonuniformresource.comp
index b86b5327e7a..07f87ca9e65 100644
--- a/reference/opt/shaders-hlsl/comp/atomic-decrement.asm.comp
+++ b/reference/shaders-hlsl-no-opt/asm/comp/image-atomic-nonuniform.asm.sm51.nonuniformresource.comp
@@ -1,5 +1,4 @@
-RWByteAddressBuffer u0_counter : register(u1);
-RWBuffer<uint> u0 : register(u0);
+RWTexture2D<uint> uImage[] : register(u0, space0);
 
 static uint3 gl_GlobalInvocationID;
 struct SPIRV_Cross_Input
@@ -9,12 +8,12 @@ struct SPIRV_Cross_Input
 
 void comp_main()
 {
-    uint _29;
-    u0_counter.InterlockedAdd(0, -1, _29);
-    u0[uint(asint(asfloat(_29))) + 0u] = uint(int(gl_GlobalInvocationID.x)).x;
+    uint _26 = gl_GlobalInvocationID.z;
+    uint _31;
+    InterlockedAdd(uImage[NonUniformResourceIndex(_26)][int2(gl_GlobalInvocationID.xy)], 1u, _31);
 }
 
-[numthreads(4, 1, 1)]
+[numthreads(1, 1, 1)]
 void main(SPIRV_Cross_Input stage_input)
 {
     gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
diff --git a/reference/shaders-hlsl-no-opt/asm/comp/local-size-id-override.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/local-size-id-override.asm.comp
new file mode 100644
index 00000000000..dbc881f9982
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/comp/local-size-id-override.asm.comp
@@ -0,0 +1,37 @@
+#ifndef SPIRV_CROSS_CONSTANT_ID_1
+#define SPIRV_CROSS_CONSTANT_ID_1 11u
+#endif
+static const uint _10 = SPIRV_CROSS_CONSTANT_ID_1;
+#ifndef SPIRV_CROSS_CONSTANT_ID_2
+#define SPIRV_CROSS_CONSTANT_ID_2 12u
+#endif
+static const uint _11 = SPIRV_CROSS_CONSTANT_ID_2;
+#ifndef SPIRV_CROSS_CONSTANT_ID_3
+#define SPIRV_CROSS_CONSTANT_ID_3 13u
+#endif
+static const uint _4 = SPIRV_CROSS_CONSTANT_ID_3;
+#ifndef SPIRV_CROSS_CONSTANT_ID_4
+#define SPIRV_CROSS_CONSTANT_ID_4 14u
+#endif
+static const uint _5 = SPIRV_CROSS_CONSTANT_ID_4;
+static const uint3 gl_WorkGroupSize = uint3(3u, _10, _11);
+
+RWByteAddressBuffer _8 : register(u0);
+
+static uint3 gl_GlobalInvocationID;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+};
+
+void comp_main()
+{
+    _8.Store4(gl_GlobalInvocationID.x * 16 + 0, asuint(asfloat(_8.Load4(gl_GlobalInvocationID.x * 16 + 0)) + 2.0f.xxxx));
+}
+
+[numthreads(3, SPIRV_CROSS_CONSTANT_ID_1, SPIRV_CROSS_CONSTANT_ID_2)]
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/comp/local-size-id.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/local-size-id.asm.comp
new file mode 100644
index 00000000000..157f9e99218
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/comp/local-size-id.asm.comp
@@ -0,0 +1,38 @@
+#ifndef SPIRV_CROSS_CONSTANT_ID_1
+#define SPIRV_CROSS_CONSTANT_ID_1 11
+#endif
+static const int _10 = SPIRV_CROSS_CONSTANT_ID_1;
+#ifndef SPIRV_CROSS_CONSTANT_ID_2
+#define SPIRV_CROSS_CONSTANT_ID_2 12
+#endif
+static const int _11 = SPIRV_CROSS_CONSTANT_ID_2;
+#ifndef SPIRV_CROSS_CONSTANT_ID_3
+#define SPIRV_CROSS_CONSTANT_ID_3 13
+#endif
+static const int _4 = SPIRV_CROSS_CONSTANT_ID_3;
+#ifndef SPIRV_CROSS_CONSTANT_ID_4
+#define SPIRV_CROSS_CONSTANT_ID_4 14
+#endif
+static const int _5 = SPIRV_CROSS_CONSTANT_ID_4;
+static const uint _29 = (uint(_4) + 3u);
+static const uint3 _30 = uint3(_29, _5, 2u);
+
+RWByteAddressBuffer _8 : register(u0);
+
+static uint3 gl_GlobalInvocationID;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+};
+
+void comp_main()
+{
+    _8.Store4(gl_GlobalInvocationID.x * 16 + 0, asuint(((((asfloat(_8.Load4(gl_GlobalInvocationID.x * 16 + 0)) + 2.0f.xxxx) + float3(_30).xyzz) * float(_4)) * float(_5)) * float(int(2u))));
+}
+
+[numthreads(SPIRV_CROSS_CONSTANT_ID_3, SPIRV_CROSS_CONSTANT_ID_4, 2)]
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/comp/num-workgroups.spv14.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/num-workgroups.spv14.asm.comp
new file mode 100644
index 00000000000..e771d77bb8b
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/comp/num-workgroups.spv14.asm.comp
@@ -0,0 +1,24 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer _3 : register(u1);
+cbuffer UBO : register(b0)
+{
+    uint3 _5_w : packoffset(c0);
+};
+
+cbuffer SPIRV_Cross_NumWorkgroups
+{
+    uint3 SPIRV_Cross_NumWorkgroups_1_count : packoffset(c0);
+};
+
+
+void comp_main()
+{
+    _3.Store3(0, SPIRV_Cross_NumWorkgroups_1_count + _5_w);
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp
new file mode 100644
index 00000000000..423beee63fe
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp
@@ -0,0 +1,51 @@
+#ifndef SPIRV_CROSS_CONSTANT_ID_0
+#define SPIRV_CROSS_CONSTANT_ID_0 0
+#endif
+static const int A = SPIRV_CROSS_CONSTANT_ID_0;
+#ifndef SPIRV_CROSS_CONSTANT_ID_1
+#define SPIRV_CROSS_CONSTANT_ID_1 1
+#endif
+static const int A_1 = SPIRV_CROSS_CONSTANT_ID_1;
+#ifndef SPIRV_CROSS_CONSTANT_ID_2
+#define SPIRV_CROSS_CONSTANT_ID_2 2
+#endif
+static const int A_2 = SPIRV_CROSS_CONSTANT_ID_2;
+#ifndef SPIRV_CROSS_CONSTANT_ID_3
+#define SPIRV_CROSS_CONSTANT_ID_3 3
+#endif
+static const int A_3 = SPIRV_CROSS_CONSTANT_ID_3;
+#ifndef SPIRV_CROSS_CONSTANT_ID_4
+#define SPIRV_CROSS_CONSTANT_ID_4 4
+#endif
+static const int A_4 = SPIRV_CROSS_CONSTANT_ID_4;
+#ifndef SPIRV_CROSS_CONSTANT_ID_5
+#define SPIRV_CROSS_CONSTANT_ID_5 5
+#endif
+static const int A_5 = SPIRV_CROSS_CONSTANT_ID_5;
+static const int A_6 = (A - A_1);
+static const int A_7 = (A_6 - A_2);
+static const int A_8 = (A_7 - A_3);
+static const int A_9 = (A_8 - A_4);
+static const int A_10 = (A_9 - A_5);
+static const int A_11 = (A_10 + A_5);
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer _5 : register(u0);
+
+static uint3 gl_GlobalInvocationID;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+};
+
+void comp_main()
+{
+    _5.Store(gl_GlobalInvocationID.x * 4 + 0, uint(A_11));
+}
+
+[numthreads(1, 1, 1)]
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp b/reference/shaders-hlsl-no-opt/asm/comp/storage-buffer-basic.nofxc.asm.comp
similarity index 100%
rename from reference/opt/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp
rename to reference/shaders-hlsl-no-opt/asm/comp/storage-buffer-basic.nofxc.asm.comp
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag
new file mode 100644
index 00000000000..128a8c52f95
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag
@@ -0,0 +1,89 @@
+struct anon_aa
+{
+    int foo;
+};
+
+struct anon_ab
+{
+    int foo;
+};
+
+struct anon_a
+{
+    anon_aa _aa;
+    anon_ab ab;
+};
+
+struct anon_ba
+{
+    int foo;
+};
+
+struct anon_bb
+{
+    int foo;
+};
+
+struct anon_b
+{
+    anon_ba _ba;
+    anon_bb bb;
+};
+
+struct VertexData
+{
+    anon_a _a;
+    anon_b b;
+};
+
+struct anon_ca
+{
+    int foo;
+};
+
+struct anon_c
+{
+    anon_ca _ca;
+};
+
+struct anon_da
+{
+    int foo;
+};
+
+struct anon_d
+{
+    anon_da da;
+};
+
+struct anon_e
+{
+    int a;
+};
+
+cbuffer UBO : register(b0)
+{
+    anon_c _16_c : packoffset(c0);
+    anon_d _16_d : packoffset(c1);
+};
+
+RWByteAddressBuffer _19 : register(u1);
+
+static VertexData _3;
+
+struct SPIRV_Cross_Input
+{
+    anon_a VertexData__a : TEXCOORD0;
+    anon_b VertexData_b : TEXCOORD2;
+};
+
+void frag_main()
+{
+}
+
+void main(SPIRV_Cross_Input stage_input)
+{
+    _3._a = stage_input.VertexData__a;
+    _3.b = stage_input.VertexData_b;
+    frag_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/composite-insert-inheritance.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/composite-insert-inheritance.asm.frag
new file mode 100644
index 00000000000..8c61e61b5fb
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/composite-insert-inheritance.asm.frag
@@ -0,0 +1,84 @@
+static float4 _32;
+
+static const float4 _34[2] = { 0.0f.xxxx, 0.0f.xxxx };
+
+static float4 vInput;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 vInput : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    float4 _37 = vInput;
+    float4 _38 = _37;
+    _38.x = 1.0f;
+    _38.y = 2.0f;
+    _38.z = 3.0f;
+    _38.w = 4.0f;
+    FragColor = _38;
+    float4 _6 = _37;
+    _6.x = 1.0f;
+    _6.y = 2.0f;
+    _6.z = 3.0f;
+    _6.w = 4.0f;
+    FragColor = _6;
+    float4 _42 = _37;
+    _42.x = 1.0f;
+    _42.y = 2.0f;
+    _42.z = 3.0f;
+    _42.w = 4.0f;
+    FragColor = _42;
+    float4 _44 = _37;
+    _44.x = 1.0f;
+    float4 _45 = _44;
+    _45.y = 2.0f;
+    float4 _46 = _45;
+    _46.z = 3.0f;
+    float4 _47 = _46;
+    _47.w = 4.0f;
+    FragColor = _47 + _44;
+    FragColor = _47 + _45;
+    float4 _49;
+    _49.x = 1.0f;
+    _49.y = 2.0f;
+    _49.z = 3.0f;
+    _49.w = 4.0f;
+    FragColor = _49;
+    float4 _53 = 0.0f.xxxx;
+    _53.x = 1.0f;
+    FragColor = _53;
+    float4 _54[2] = _34;
+    _54[1].z = 1.0f;
+    _54[0].w = 2.0f;
+    FragColor = _54[0];
+    FragColor = _54[1];
+    float4x4 _58 = float4x4(0.0f.xxxx, 0.0f.xxxx, 0.0f.xxxx, 0.0f.xxxx);
+    _58[1].z = 1.0f;
+    _58[2].w = 2.0f;
+    FragColor = _58[0];
+    FragColor = _58[1];
+    FragColor = _58[2];
+    FragColor = _58[3];
+    float4 PHI;
+    PHI = _46;
+    float4 _65 = PHI;
+    _65.w = 4.0f;
+    FragColor = _65;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vInput = stage_input.vInput;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/empty-struct-in-struct.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/empty-struct-in-struct.asm.frag
new file mode 100644
index 00000000000..e7ffd8d6069
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/empty-struct-in-struct.asm.frag
@@ -0,0 +1,37 @@
+struct EmptyStructTest
+{
+    int empty_struct_member;
+};
+
+struct EmptyStruct2Test
+{
+    EmptyStructTest _m0;
+};
+
+static const EmptyStructTest _30 = { 0 };
+static const EmptyStruct2Test _20 = { { 0 } };
+
+float GetValue(EmptyStruct2Test self)
+{
+    return 0.0f;
+}
+
+float GetValue_1(EmptyStruct2Test self)
+{
+    return 0.0f;
+}
+
+void frag_main()
+{
+    EmptyStructTest _25 = { 0 };
+    EmptyStruct2Test _26 = { _25 };
+    EmptyStruct2Test emptyStruct;
+    float value = GetValue(emptyStruct);
+    value = GetValue_1(_26);
+    value = GetValue_1(_20);
+}
+
+void main()
+{
+    frag_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-bracket-handling-2.nonuniformresource.sm51.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-bracket-handling-2.nonuniformresource.sm51.asm.frag
new file mode 100644
index 00000000000..fd758b17d74
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-bracket-handling-2.nonuniformresource.sm51.asm.frag
@@ -0,0 +1,37 @@
+ByteAddressBuffer _8 : register(t0, space2);
+Texture2D<float4> uSamplers[] : register(t0, space0);
+SamplerState _uSamplers_sampler[] : register(s0, space0);
+Texture2D<float4> uSampler : register(t1, space1);
+SamplerState _uSampler_sampler : register(s1, space1);
+
+static float4 gl_FragCoord;
+static float4 FragColor;
+static float2 vUV;
+
+struct SPIRV_Cross_Input
+{
+    float2 vUV : TEXCOORD0;
+    float4 gl_FragCoord : SV_Position;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = uSamplers[NonUniformResourceIndex(_8.Load(40))].SampleLevel(_uSamplers_sampler[NonUniformResourceIndex(_8.Load(40))], vUV, 0.0f);
+    FragColor += uSampler.SampleLevel(_uSampler_sampler, vUV, float(_8.Load(int(gl_FragCoord.y) * 4 + 0)));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
+    vUV = stage_input.vUV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag
index 44cc8ab221e..0356cf58d9c 100644
--- a/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag
+++ b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag
@@ -1,9 +1,9 @@
-struct UBO_1_1
+struct UBO_1
 {
     float4 v[64];
 };
 
-ConstantBuffer<UBO_1_1> ubos[] : register(b0, space2);
+ConstantBuffer<UBO_1> ubos[] : register(b0, space2);
 ByteAddressBuffer ssbos[] : register(t0, space3);
 Texture2D<float4> uSamplers[] : register(t0, space0);
 SamplerState uSamps[] : register(s0, space1);
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-ssbo.sm51.nonuniformresource.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-ssbo.sm51.nonuniformresource.asm.frag
new file mode 100644
index 00000000000..a692cdcf408
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-ssbo.sm51.nonuniformresource.asm.frag
@@ -0,0 +1,39 @@
+RWByteAddressBuffer ssbos[] : register(u3, space0);
+
+static int vIndex;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    nointerpolation int vIndex : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    int i = vIndex;
+    int _42 = i + 60;
+    int _45 = i + 70;
+    ssbos[NonUniformResourceIndex(_42)].Store4(_45 * 16 + 16, asuint(20.0f.xxxx));
+    int _48 = i + 100;
+    uint _49;
+    ssbos[NonUniformResourceIndex(_48)].InterlockedAdd(0, 100u, _49);
+    int _51 = i;
+    uint _52;
+    ssbos[NonUniformResourceIndex(_51)].GetDimensions(_52);
+    _52 = (_52 - 16) / 16;
+    FragColor.z += float(int(_52));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vIndex = stage_input.vIndex;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag
new file mode 100644
index 00000000000..dcbe5d134f1
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag
@@ -0,0 +1,17 @@
+static float gl_FragDepth = 0.5f;
+struct SPIRV_Cross_Output
+{
+    float gl_FragDepth : SV_Depth;
+};
+
+void frag_main()
+{
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_FragDepth = gl_FragDepth;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/phi.zero-initialize.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/phi.zero-initialize.asm.frag
new file mode 100644
index 00000000000..2ce5fd41cf6
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/phi.zero-initialize.asm.frag
@@ -0,0 +1,46 @@
+static int uninit_int = 0;
+static int4 uninit_vector = int4(0, 0, 0, 0);
+static float4x4 uninit_matrix = float4x4(0.0f.xxxx, 0.0f.xxxx, 0.0f.xxxx, 0.0f.xxxx);
+
+struct Foo
+{
+    int a;
+};
+
+static Foo uninit_foo = { 0 };
+
+static float4 vColor;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 vColor : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    int _39 = 0;
+    if (vColor.x > 10.0f)
+    {
+        _39 = 10;
+    }
+    else
+    {
+        _39 = 20;
+    }
+    FragColor = vColor;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vColor = stage_input.vColor;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag
new file mode 100644
index 00000000000..8a47b91bf9a
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag
@@ -0,0 +1,33 @@
+RasterizerOrderedByteAddressBuffer _7 : register(u1, space0);
+RWByteAddressBuffer _9 : register(u0, space0);
+
+static float4 gl_FragCoord;
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+void callee2()
+{
+    int _31 = int(gl_FragCoord.x);
+    _7.Store(_31 * 4 + 0, _7.Load(_31 * 4 + 0) + 1u);
+}
+
+void callee()
+{
+    int _39 = int(gl_FragCoord.x);
+    _9.Store(_39 * 4 + 0, _9.Load(_39 * 4 + 0) + 1u);
+    callee2();
+}
+
+void frag_main()
+{
+    callee();
+}
+
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
+    frag_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag
new file mode 100644
index 00000000000..01bbe7ddc27
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag
@@ -0,0 +1,43 @@
+RasterizerOrderedByteAddressBuffer _7 : register(u1, space0);
+RWByteAddressBuffer _13 : register(u2, space0);
+RasterizerOrderedByteAddressBuffer _9 : register(u0, space0);
+
+static float4 gl_FragCoord;
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+void callee2()
+{
+    int _44 = int(gl_FragCoord.x);
+    _7.Store(_44 * 4 + 0, _7.Load(_44 * 4 + 0) + 1u);
+}
+
+void callee()
+{
+    int _52 = int(gl_FragCoord.x);
+    _9.Store(_52 * 4 + 0, _9.Load(_52 * 4 + 0) + 1u);
+    callee2();
+    if (true)
+    {
+    }
+}
+
+void _35()
+{
+    _13.Store(int(gl_FragCoord.x) * 4 + 0, 4u);
+}
+
+void frag_main()
+{
+    callee();
+    _35();
+}
+
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
+    frag_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag
new file mode 100644
index 00000000000..c1fb6ebbc23
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag
@@ -0,0 +1,43 @@
+RasterizerOrderedByteAddressBuffer _7 : register(u1, space0);
+RasterizerOrderedByteAddressBuffer _9 : register(u0, space0);
+
+static float4 gl_FragCoord;
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+void callee2()
+{
+    int _37 = int(gl_FragCoord.x);
+    _7.Store(_37 * 4 + 0, _7.Load(_37 * 4 + 0) + 1u);
+}
+
+void callee()
+{
+    int _45 = int(gl_FragCoord.x);
+    _9.Store(_45 * 4 + 0, _9.Load(_45 * 4 + 0) + 1u);
+    callee2();
+}
+
+void _29()
+{
+}
+
+void _31()
+{
+}
+
+void frag_main()
+{
+    callee();
+    _29();
+    _31();
+}
+
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
+    frag_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/reserved-function-identifier.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/reserved-function-identifier.asm.frag
new file mode 100644
index 00000000000..1f1f6fac107
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/reserved-function-identifier.asm.frag
@@ -0,0 +1,31 @@
+static float FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float FragColor : SV_Target0;
+};
+
+float _mat3(float a)
+{
+    return a + 1.0f;
+}
+
+float _RESERVED_IDENTIFIER_FIXUP_gl_Foo(int a)
+{
+    return float(a) + 1.0f;
+}
+
+void frag_main()
+{
+    float param = 2.0f;
+    int param_1 = 4;
+    FragColor = _mat3(param) + _RESERVED_IDENTIFIER_FIXUP_gl_Foo(param_1);
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/scalar-select.spv14.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/scalar-select.spv14.asm.frag
new file mode 100644
index 00000000000..b6d1a902424
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/scalar-select.spv14.asm.frag
@@ -0,0 +1,62 @@
+struct _15
+{
+    float _m0;
+};
+
+static const _15 _25 = { 0.0f };
+static const _15 _26 = { 1.0f };
+static const float _29[2] = { 0.0f, 1.0f };
+static const float _30[2] = { 1.0f, 0.0f };
+
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void spvSelectComposite(out _15 out_value, bool cond, _15 true_val, _15 false_val)
+{
+    if (cond)
+    {
+        out_value = true_val;
+    }
+    else
+    {
+        out_value = false_val;
+    }
+}
+
+void spvSelectComposite(out float out_value[2], bool cond, float true_val[2], float false_val[2])
+{
+    if (cond)
+    {
+        out_value = true_val;
+    }
+    else
+    {
+        out_value = false_val;
+    }
+}
+
+void frag_main()
+{
+    FragColor = false ? float4(1.0f, 1.0f, 0.0f, 1.0f) : float4(0.0f, 0.0f, 0.0f, 1.0f);
+    FragColor = false ? 1.0f.xxxx : 0.0f.xxxx;
+    FragColor = float4(bool4(false, true, false, true).x ? float4(1.0f, 1.0f, 0.0f, 1.0f).x : float4(0.0f, 0.0f, 0.0f, 1.0f).x, bool4(false, true, false, true).y ? float4(1.0f, 1.0f, 0.0f, 1.0f).y : float4(0.0f, 0.0f, 0.0f, 1.0f).y, bool4(false, true, false, true).z ? float4(1.0f, 1.0f, 0.0f, 1.0f).z : float4(0.0f, 0.0f, 0.0f, 1.0f).z, bool4(false, true, false, true).w ? float4(1.0f, 1.0f, 0.0f, 1.0f).w : float4(0.0f, 0.0f, 0.0f, 1.0f).w);
+    FragColor = float4(bool4(false, true, false, true));
+    _15 _38;
+    spvSelectComposite(_38, false, _25, _26);
+    _15 _32 = _38;
+    float _39[2];
+    spvSelectComposite(_39, true, _29, _30);
+    float _33[2] = _39;
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/struct-packing-last-element-array-matrix-rule.invalid.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/struct-packing-last-element-array-matrix-rule.invalid.asm.frag
new file mode 100644
index 00000000000..b88ac0dd726
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/struct-packing-last-element-array-matrix-rule.invalid.asm.frag
@@ -0,0 +1,42 @@
+struct Foo
+{
+    row_major float3x3 m[2];
+    float v;
+};
+
+struct Bar
+{
+    row_major float3x3 m;
+    float v;
+};
+
+cbuffer FooUBO : register(b0)
+{
+    Foo _6_foo : packoffset(c0);
+};
+
+cbuffer BarUBO : register(b1)
+{
+    Bar _9_bar : packoffset(c0);
+};
+
+
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = (_6_foo.v + _9_bar.v).xxxx;
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/subgroup-arithmetic-cast.invalid.nofxc.sm60.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/subgroup-arithmetic-cast.invalid.nofxc.sm60.asm.frag
new file mode 100644
index 00000000000..b98e681f6df
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/subgroup-arithmetic-cast.invalid.nofxc.sm60.asm.frag
@@ -0,0 +1,30 @@
+static int index;
+static uint FragColor;
+
+struct SPIRV_Cross_Input
+{
+    nointerpolation int index : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    uint FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    uint _17 = uint(index);
+    FragColor = uint(WaveActiveMin(index));
+    FragColor = uint(WaveActiveMax(int(_17)));
+    FragColor = WaveActiveMin(uint(index));
+    FragColor = WaveActiveMax(_17);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    index = stage_input.index;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag
similarity index 86%
rename from reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag
rename to reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag
index ba66ccf6261..19af59d3bff 100644
--- a/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag
+++ b/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag
@@ -17,14 +17,6 @@ void frag_main()
     int j;
     int _30;
     int _31;
-    if (vIndex != 0 && vIndex != 1 && vIndex != 11 && vIndex != 2 && vIndex != 3 && vIndex != 4 && vIndex != 5)
-    {
-        _30 = 2;
-    }
-    if (vIndex == 1 || vIndex == 11)
-    {
-        _31 = 1;
-    }
     switch (vIndex)
     {
         case 0:
@@ -37,6 +29,7 @@ void frag_main()
         }
         default:
         {
+            _30 = 2;
             j = _30;
             _31 = 0;
             j = _31;
@@ -45,6 +38,7 @@ void frag_main()
         case 1:
         case 11:
         {
+            _31 = 1;
             j = _31;
             break;
         }
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
new file mode 100644
index 00000000000..021333cc742
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
@@ -0,0 +1,52 @@
+static float4 A;
+static float4 B;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 A : TEXCOORD0;
+    float4 B : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+float4 test_vector()
+{
+    bool4 le = bool4(!(A.x >= B.x), !(A.y >= B.y), !(A.z >= B.z), !(A.w >= B.w));
+    bool4 leq = bool4(!(A.x > B.x), !(A.y > B.y), !(A.z > B.z), !(A.w > B.w));
+    bool4 ge = bool4(!(A.x <= B.x), !(A.y <= B.y), !(A.z <= B.z), !(A.w <= B.w));
+    bool4 geq = bool4(!(A.x < B.x), !(A.y < B.y), !(A.z < B.z), !(A.w < B.w));
+    bool4 eq = bool4(A.x == B.x, A.y == B.y, A.z == B.z, A.w == B.w);
+    bool4 neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w);
+    neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w);
+    return ((((float4(le) + float4(leq)) + float4(ge)) + float4(geq)) + float4(eq)) + float4(neq);
+}
+
+float test_scalar()
+{
+    bool le = !(A.x >= B.x);
+    bool leq = !(A.x > B.x);
+    bool ge = !(A.x <= B.x);
+    bool geq = !(A.x < B.x);
+    bool eq = A.x == B.x;
+    bool neq = A.x != B.x;
+    return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq);
+}
+
+void frag_main()
+{
+    FragColor = test_vector() + test_scalar().xxxx;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    A = stage_input.A;
+    B = stage_input.B;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
new file mode 100644
index 00000000000..0172c20bb91
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
@@ -0,0 +1,52 @@
+static float4 A;
+static float4 B;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 A : TEXCOORD0;
+    float4 B : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+float4 test_vector()
+{
+    bool4 le = bool4(A.x < B.x, A.y < B.y, A.z < B.z, A.w < B.w);
+    bool4 leq = bool4(A.x <= B.x, A.y <= B.y, A.z <= B.z, A.w <= B.w);
+    bool4 ge = bool4(A.x > B.x, A.y > B.y, A.z > B.z, A.w > B.w);
+    bool4 geq = bool4(A.x >= B.x, A.y >= B.y, A.z >= B.z, A.w >= B.w);
+    bool4 eq = bool4(A.x == B.x, A.y == B.y, A.z == B.z, A.w == B.w);
+    bool4 neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w);
+    neq = bool4(A.x != B.x, A.y != B.y, A.z != B.z, A.w != B.w);
+    return ((((float4(le) + float4(leq)) + float4(ge)) + float4(geq)) + float4(eq)) + float4(neq);
+}
+
+float test_scalar()
+{
+    bool le = A.x < B.x;
+    bool leq = A.x <= B.x;
+    bool ge = A.x > B.x;
+    bool geq = A.x >= B.x;
+    bool eq = A.x == B.x;
+    bool neq = A.x != B.x;
+    return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq);
+}
+
+void frag_main()
+{
+    FragColor = test_vector() + test_scalar().xxxx;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    A = stage_input.A;
+    B = stage_input.B;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag
index a01d5fa7d0c..95a48835d57 100644
--- a/reference/shaders-hlsl-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag
+++ b/reference/shaders-hlsl-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag
@@ -1,3 +1,5 @@
+static float4 undef;
+
 static float4 FragColor;
 static float4 vFloat;
 
@@ -11,8 +13,6 @@ struct SPIRV_Cross_Output
     float4 FragColor : SV_Target0;
 };
 
-float4 undef;
-
 void frag_main()
 {
     FragColor = float4(undef.x, vFloat.y, 0.0f, vFloat.w) + float4(vFloat.z, vFloat.y, 0.0f, vFloat.w);
diff --git a/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh
new file mode 100644
index 00000000000..8fbd2915ae3
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh
@@ -0,0 +1,63 @@
+struct _12
+{
+    float _m0;
+};
+
+static uint gl_LocalInvocationIndex;
+struct SPIRV_Cross_Input
+{
+    uint gl_LocalInvocationIndex : SV_GroupIndex;
+};
+
+struct gl_MeshPerVertexEXT
+{
+    float4 B : TEXCOORD1;
+    float4 gl_Position : SV_Position;
+};
+
+struct gl_MeshPerPrimitiveEXT
+{
+    float4 C : TEXCOORD3;
+    uint gl_PrimitiveID : SV_PrimitiveID;
+    uint gl_Layer : SV_RenderTargetArrayIndex;
+    bool gl_CullPrimitiveEXT : SV_CullPrimitive;
+};
+
+groupshared float _9[64];
+
+void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], _12 _11, inout uint3 gl_PrimitiveTriangleIndicesEXT[8], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[8])
+{
+    _9[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex);
+    GroupMemoryBarrierWithGroupSync();
+    SetMeshOutputCounts(24u, 8u);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _9[gl_LocalInvocationIndex];
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _9[gl_LocalInvocationIndex];
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _9[gl_LocalInvocationIndex];
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _9[gl_LocalInvocationIndex];
+    float _63 = _11._m0 + _9[gl_LocalInvocationIndex ^ 1u];
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.x = _63;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.y = _63;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.z = _63;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].B.w = _63;
+    if (gl_LocalInvocationIndex < 8u)
+    {
+        uint _71 = gl_LocalInvocationIndex * 3u;
+        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(_71, _71 + 1u, _71 + 2u);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_LocalInvocationIndex & 1u) != 0u;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_LocalInvocationIndex);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_LocalInvocationIndex);
+        uint _81 = gl_LocalInvocationIndex ^ 2u;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.x = _9[_81];
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.y = _9[_81];
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.z = _9[_81];
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].C.w = _9[_81];
+    }
+}
+
+[outputtopology("triangle")]
+[numthreads(2, 3, 4)]
+void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], in payload _12 _11, out indices uint3 gl_PrimitiveTriangleIndicesEXT[8], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[8])
+{
+    gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
+    mesh_main(gl_MeshVerticesEXT, _11, gl_PrimitiveTriangleIndicesEXT, gl_MeshPrimitivesEXT);
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/temporary.zero-initialize.asm.frag b/reference/shaders-hlsl-no-opt/asm/temporary.zero-initialize.asm.frag
new file mode 100644
index 00000000000..dbdd784529d
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/temporary.zero-initialize.asm.frag
@@ -0,0 +1,44 @@
+static float4 FragColor;
+static int vA;
+static int vB;
+
+struct SPIRV_Cross_Input
+{
+    nointerpolation int vA : TEXCOORD0;
+    nointerpolation int vB : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = 0.0f.xxxx;
+    int _10 = 0;
+    int _15 = 0;
+    for (int _16 = 0, _17 = 0; _16 < vA; _17 = _15, _16 += _10)
+    {
+        if ((vA + _16) == 20)
+        {
+            _15 = 50;
+        }
+        else
+        {
+            _15 = ((vB + _16) == 40) ? 60 : _17;
+        }
+        _10 = _15 + 10;
+        FragColor += 1.0f.xxxx;
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vA = stage_input.vA;
+    vB = stage_input.vB;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/vert/block-struct-initializer.asm.vert b/reference/shaders-hlsl-no-opt/asm/vert/block-struct-initializer.asm.vert
new file mode 100644
index 00000000000..72a86d2d44e
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/vert/block-struct-initializer.asm.vert
@@ -0,0 +1,38 @@
+struct Vert
+{
+    float a;
+    float b;
+};
+
+struct Foo
+{
+    float c;
+    float d;
+};
+
+static const Vert _11 = { 0.0f, 0.0f };
+static const Foo _13 = { 0.0f, 0.0f };
+
+static Vert _3 = { 0.0f, 0.0f };
+static Foo foo = _13;
+
+struct SPIRV_Cross_Output
+{
+    float Vert_a : TEXCOORD0;
+    float Vert_b : TEXCOORD1;
+    Foo foo : TEXCOORD2;
+};
+
+void vert_main()
+{
+}
+
+SPIRV_Cross_Output main()
+{
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.Vert_a = _3.a;
+    stage_output.Vert_b = _3.b;
+    stage_output.foo = foo;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/vert/builtin-output-initializer.asm.vert b/reference/shaders-hlsl-no-opt/asm/vert/builtin-output-initializer.asm.vert
new file mode 100644
index 00000000000..ee30c1783e7
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/vert/builtin-output-initializer.asm.vert
@@ -0,0 +1,28 @@
+static const float _23[1] = { 0.0f };
+static const float _24[1] = { 0.0f };
+
+static float4 gl_Position = 0.0f.xxxx;
+static float gl_PointSize = 0.0f;
+static float gl_ClipDistance[1] = _23;
+static float gl_CullDistance[1] = _24;
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+    float gl_ClipDistance0 : SV_ClipDistance0;
+    float gl_CullDistance0 : SV_CullDistance0;
+};
+
+void vert_main()
+{
+    gl_Position = 1.0f.xxxx;
+}
+
+SPIRV_Cross_Output main()
+{
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.gl_ClipDistance0.x = gl_ClipDistance[0];
+    stage_output.gl_CullDistance0.x = gl_CullDistance[0];
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/asm/vert/complex-link-by-name.asm.vert b/reference/shaders-hlsl-no-opt/asm/vert/complex-link-by-name.asm.vert
new file mode 100644
index 00000000000..4846e3f5c62
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/vert/complex-link-by-name.asm.vert
@@ -0,0 +1,56 @@
+struct Struct_vec4
+{
+    float4 m0;
+};
+
+struct VertexOut
+{
+    Struct_vec4 m0;
+    Struct_vec4 m1;
+};
+
+cbuffer UBO : register(b0)
+{
+    Struct_vec4 ubo_binding_0_m0 : packoffset(c0);
+    Struct_vec4 ubo_binding_0_m1 : packoffset(c1);
+};
+
+
+static float4 gl_Position;
+static VertexOut output_location_0;
+static Struct_vec4 output_location_2;
+static Struct_vec4 output_location_3;
+
+struct SPIRV_Cross_Output
+{
+    Struct_vec4 VertexOut_m0 : TEXCOORD0;
+    Struct_vec4 VertexOut_m1 : TEXCOORD1;
+    Struct_vec4 output_location_2 : TEXCOORD2;
+    Struct_vec4 output_location_3 : TEXCOORD3;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    Struct_vec4 c;
+    c.m0 = ubo_binding_0_m0.m0;
+    Struct_vec4 b;
+    b.m0 = ubo_binding_0_m1.m0;
+    gl_Position = c.m0 + b.m0;
+    output_location_0.m0 = c;
+    output_location_0.m1 = b;
+    output_location_2 = c;
+    output_location_3 = b;
+}
+
+SPIRV_Cross_Output main()
+{
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.VertexOut_m0 = output_location_0.m0;
+    stage_output.VertexOut_m1 = output_location_0.m1;
+    stage_output.output_location_2 = output_location_2;
+    stage_output.output_location_3 = output_location_3;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/comp/bitfield.comp b/reference/shaders-hlsl-no-opt/comp/bitfield.comp
index 4e93a145355..be287c4174d 100644
--- a/reference/shaders-hlsl-no-opt/comp/bitfield.comp
+++ b/reference/shaders-hlsl-no-opt/comp/bitfield.comp
@@ -1,52 +1,52 @@
-uint SPIRV_Cross_bitfieldInsert(uint Base, uint Insert, uint Offset, uint Count)
+uint spvBitfieldInsert(uint Base, uint Insert, uint Offset, uint Count)
 {
     uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));
     return (Base & ~Mask) | ((Insert << Offset) & Mask);
 }
 
-uint2 SPIRV_Cross_bitfieldInsert(uint2 Base, uint2 Insert, uint Offset, uint Count)
+uint2 spvBitfieldInsert(uint2 Base, uint2 Insert, uint Offset, uint Count)
 {
     uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));
     return (Base & ~Mask) | ((Insert << Offset) & Mask);
 }
 
-uint3 SPIRV_Cross_bitfieldInsert(uint3 Base, uint3 Insert, uint Offset, uint Count)
+uint3 spvBitfieldInsert(uint3 Base, uint3 Insert, uint Offset, uint Count)
 {
     uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));
     return (Base & ~Mask) | ((Insert << Offset) & Mask);
 }
 
-uint4 SPIRV_Cross_bitfieldInsert(uint4 Base, uint4 Insert, uint Offset, uint Count)
+uint4 spvBitfieldInsert(uint4 Base, uint4 Insert, uint Offset, uint Count)
 {
     uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));
     return (Base & ~Mask) | ((Insert << Offset) & Mask);
 }
 
-uint SPIRV_Cross_bitfieldUExtract(uint Base, uint Offset, uint Count)
+uint spvBitfieldUExtract(uint Base, uint Offset, uint Count)
 {
     uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);
     return (Base >> Offset) & Mask;
 }
 
-uint2 SPIRV_Cross_bitfieldUExtract(uint2 Base, uint Offset, uint Count)
+uint2 spvBitfieldUExtract(uint2 Base, uint Offset, uint Count)
 {
     uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);
     return (Base >> Offset) & Mask;
 }
 
-uint3 SPIRV_Cross_bitfieldUExtract(uint3 Base, uint Offset, uint Count)
+uint3 spvBitfieldUExtract(uint3 Base, uint Offset, uint Count)
 {
     uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);
     return (Base >> Offset) & Mask;
 }
 
-uint4 SPIRV_Cross_bitfieldUExtract(uint4 Base, uint Offset, uint Count)
+uint4 spvBitfieldUExtract(uint4 Base, uint Offset, uint Count)
 {
     uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);
     return (Base >> Offset) & Mask;
 }
 
-int SPIRV_Cross_bitfieldSExtract(int Base, int Offset, int Count)
+int spvBitfieldSExtract(int Base, int Offset, int Count)
 {
     int Mask = Count == 32 ? -1 : ((1 << Count) - 1);
     int Masked = (Base >> Offset) & Mask;
@@ -54,7 +54,7 @@ int SPIRV_Cross_bitfieldSExtract(int Base, int Offset, int Count)
     return (Masked << ExtendShift) >> ExtendShift;
 }
 
-int2 SPIRV_Cross_bitfieldSExtract(int2 Base, int Offset, int Count)
+int2 spvBitfieldSExtract(int2 Base, int Offset, int Count)
 {
     int Mask = Count == 32 ? -1 : ((1 << Count) - 1);
     int2 Masked = (Base >> Offset) & Mask;
@@ -62,7 +62,7 @@ int2 SPIRV_Cross_bitfieldSExtract(int2 Base, int Offset, int Count)
     return (Masked << ExtendShift) >> ExtendShift;
 }
 
-int3 SPIRV_Cross_bitfieldSExtract(int3 Base, int Offset, int Count)
+int3 spvBitfieldSExtract(int3 Base, int Offset, int Count)
 {
     int Mask = Count == 32 ? -1 : ((1 << Count) - 1);
     int3 Masked = (Base >> Offset) & Mask;
@@ -70,7 +70,7 @@ int3 SPIRV_Cross_bitfieldSExtract(int3 Base, int Offset, int Count)
     return (Masked << ExtendShift) >> ExtendShift;
 }
 
-int4 SPIRV_Cross_bitfieldSExtract(int4 Base, int Offset, int Count)
+int4 spvBitfieldSExtract(int4 Base, int Offset, int Count)
 {
     int Mask = Count == 32 ? -1 : ((1 << Count) - 1);
     int4 Masked = (Base >> Offset) & Mask;
@@ -84,23 +84,23 @@ void comp_main()
     uint unsigned_value = 0u;
     int3 signed_values = int3(0, 0, 0);
     uint3 unsigned_values = uint3(0u, 0u, 0u);
-    int s = SPIRV_Cross_bitfieldSExtract(signed_value, 5, 20);
-    uint u = SPIRV_Cross_bitfieldUExtract(unsigned_value, 6, 21);
-    s = int(SPIRV_Cross_bitfieldInsert(s, 40, 5, 4));
-    u = SPIRV_Cross_bitfieldInsert(u, 60u, 5, 4);
+    int s = spvBitfieldSExtract(signed_value, 5, 20);
+    uint u = spvBitfieldUExtract(unsigned_value, 6, 21);
+    s = int(spvBitfieldInsert(s, 40, 5, 4));
+    u = spvBitfieldInsert(u, 60u, 5, 4);
     u = reversebits(u);
     s = reversebits(s);
-    int v0 = countbits(u);
+    int v0 = int(countbits(u));
     int v1 = countbits(s);
     int v2 = int(firstbithigh(u));
     int v3 = firstbitlow(s);
-    int3 s_1 = SPIRV_Cross_bitfieldSExtract(signed_values, 5, 20);
-    uint3 u_1 = SPIRV_Cross_bitfieldUExtract(unsigned_values, 6, 21);
-    s_1 = int3(SPIRV_Cross_bitfieldInsert(s_1, int3(40, 40, 40), 5, 4));
-    u_1 = SPIRV_Cross_bitfieldInsert(u_1, uint3(60u, 60u, 60u), 5, 4);
+    int3 s_1 = spvBitfieldSExtract(signed_values, 5, 20);
+    uint3 u_1 = spvBitfieldUExtract(unsigned_values, 6, 21);
+    s_1 = int3(spvBitfieldInsert(s_1, int3(40, 40, 40), 5, 4));
+    u_1 = spvBitfieldInsert(u_1, uint3(60u, 60u, 60u), 5, 4);
     u_1 = reversebits(u_1);
     s_1 = reversebits(s_1);
-    int3 v0_1 = countbits(u_1);
+    int3 v0_1 = int3(countbits(u_1));
     int3 v1_1 = countbits(s_1);
     int3 v2_1 = int3(firstbithigh(u_1));
     int3 v3_1 = firstbitlow(s_1);
diff --git a/reference/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp b/reference/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp
new file mode 100644
index 00000000000..7936bf94d09
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp
@@ -0,0 +1,297 @@
+struct ResType
+{
+    float _m0;
+    int _m1;
+};
+
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer _19 : register(u0);
+
+uint spvPackHalf2x16(float2 value)
+{
+    uint2 Packed = f32tof16(value);
+    return Packed.x | (Packed.y << 16);
+}
+
+float2 spvUnpackHalf2x16(uint value)
+{
+    return f16tof32(uint2(value & 0xffff, value >> 16));
+}
+
+uint spvPackUnorm4x8(float4 value)
+{
+    uint4 Packed = uint4(round(saturate(value) * 255.0));
+    return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);
+}
+
+float4 spvUnpackUnorm4x8(uint value)
+{
+    uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);
+    return float4(Packed) / 255.0;
+}
+
+uint spvPackSnorm4x8(float4 value)
+{
+    int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;
+    return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));
+}
+
+float4 spvUnpackSnorm4x8(uint value)
+{
+    int SignedValue = int(value);
+    int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;
+    return clamp(float4(Packed) / 127.0, -1.0, 1.0);
+}
+
+uint spvPackUnorm2x16(float2 value)
+{
+    uint2 Packed = uint2(round(saturate(value) * 65535.0));
+    return Packed.x | (Packed.y << 16);
+}
+
+float2 spvUnpackUnorm2x16(uint value)
+{
+    uint2 Packed = uint2(value & 0xffff, value >> 16);
+    return float2(Packed) / 65535.0;
+}
+
+uint spvPackSnorm2x16(float2 value)
+{
+    int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;
+    return uint(Packed.x | (Packed.y << 16));
+}
+
+float2 spvUnpackSnorm2x16(uint value)
+{
+    int SignedValue = int(value);
+    int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;
+    return clamp(float2(Packed) / 32767.0, -1.0, 1.0);
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float2x2 spvInverse(float2x2 m)
+{
+    float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  m[1][1];
+    adj[0][1] = -m[0][1];
+
+    adj[1][0] = -m[1][0];
+    adj[1][1] =  m[0][0];
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+// Returns the determinant of a 2x2 matrix.
+float spvDet2x2(float a1, float a2, float b1, float b2)
+{
+    return a1 * b2 - b1 * a2;
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float3x3 spvInverse(float3x3 m)
+{
+    float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);
+    adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);
+    adj[0][2] =  spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);
+
+    adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);
+    adj[1][1] =  spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);
+    adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);
+
+    adj[2][0] =  spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);
+    adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);
+    adj[2][2] =  spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+// Returns the determinant of a 3x3 matrix.
+float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
+{
+    return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3);
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+float4x4 spvInverse(float4x4 m)
+{
+    float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][2] =  spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]);
+
+    adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][1] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][3] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]);
+
+    adj[2][0] =  spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][2] =  spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]);
+
+    adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][1] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][3] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+float spvReflect(float i, float n)
+{
+    return i - 2.0 * dot(n, i) * n;
+}
+
+float spvRefract(float i, float n, float eta)
+{
+    float NoI = n * i;
+    float NoI2 = NoI * NoI;
+    float k = 1.0 - eta * eta * (1.0 - NoI2);
+    if (k < 0.0)
+    {
+        return 0.0;
+    }
+    else
+    {
+        return eta * i - (eta * NoI + sqrt(k)) * n;
+    }
+}
+
+float spvFaceForward(float n, float i, float nref)
+{
+    return i * nref < 0.0 ? n : -n;
+}
+
+void comp_main()
+{
+    _19.Store(0, asuint(round(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(trunc(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(abs(asfloat(_19.Load(16)))));
+    _19.Store(4, uint(abs(int(_19.Load(32)))));
+    _19.Store(0, asuint(sign(asfloat(_19.Load(16)))));
+    _19.Store(4, uint(sign(int(_19.Load(32)))));
+    _19.Store(0, asuint(floor(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(ceil(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(frac(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(radians(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(degrees(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(sin(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(cos(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(tan(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(asin(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(acos(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(atan(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(sinh(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(cosh(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(tanh(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(atan2(asfloat(_19.Load(16)), asfloat(_19.Load(20)))));
+    _19.Store(0, asuint(pow(asfloat(_19.Load(16)), asfloat(_19.Load(20)))));
+    _19.Store(0, asuint(exp(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(log(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(exp2(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(log2(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(sqrt(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(rsqrt(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(length(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(distance(asfloat(_19.Load(16)), asfloat(_19.Load(20)))));
+    _19.Store(0, asuint(sign(asfloat(_19.Load(16)))));
+    _19.Store(0, asuint(spvFaceForward(asfloat(_19.Load(16)), asfloat(_19.Load(20)), asfloat(_19.Load(24)))));
+    _19.Store(0, asuint(spvReflect(asfloat(_19.Load(16)), asfloat(_19.Load(20)))));
+    _19.Store(0, asuint(spvRefract(asfloat(_19.Load(16)), asfloat(_19.Load(20)), asfloat(_19.Load(24)))));
+    _19.Store(0, asuint(length(asfloat(_19.Load4(16)).xy)));
+    _19.Store(0, asuint(distance(asfloat(_19.Load4(16)).xy, asfloat(_19.Load4(16)).zw)));
+    float2 v2 = normalize(asfloat(_19.Load4(16)).xy);
+    v2 = faceforward(asfloat(_19.Load4(16)).xy, asfloat(_19.Load4(16)).yz, asfloat(_19.Load4(16)).zw);
+    v2 = reflect(asfloat(_19.Load4(16)).xy, asfloat(_19.Load4(16)).zw);
+    v2 = refract(asfloat(_19.Load4(16)).xy, asfloat(_19.Load4(16)).yz, asfloat(_19.Load(28)));
+    float3 v3 = cross(asfloat(_19.Load4(16)).xyz, asfloat(_19.Load4(16)).yzw);
+    float2x2 _240 = asfloat(uint2x2(_19.Load2(64), _19.Load2(72)));
+    _19.Store(0, asuint(determinant(_240)));
+    float3x3 _246 = asfloat(uint3x3(_19.Load3(80), _19.Load3(96), _19.Load3(112)));
+    _19.Store(0, asuint(determinant(_246)));
+    float4x4 _252 = asfloat(uint4x4(_19.Load4(128), _19.Load4(144), _19.Load4(160), _19.Load4(176)));
+    _19.Store(0, asuint(determinant(_252)));
+    float2x2 _256 = asfloat(uint2x2(_19.Load2(64), _19.Load2(72)));
+    float2x2 _257 = spvInverse(_256);
+    _19.Store2(64, asuint(_257[0]));
+    _19.Store2(72, asuint(_257[1]));
+    float3x3 _260 = asfloat(uint3x3(_19.Load3(80), _19.Load3(96), _19.Load3(112)));
+    float3x3 _261 = spvInverse(_260);
+    _19.Store3(80, asuint(_261[0]));
+    _19.Store3(96, asuint(_261[1]));
+    _19.Store3(112, asuint(_261[2]));
+    float4x4 _264 = asfloat(uint4x4(_19.Load4(128), _19.Load4(144), _19.Load4(160), _19.Load4(176)));
+    float4x4 _265 = spvInverse(_264);
+    _19.Store4(128, asuint(_265[0]));
+    _19.Store4(144, asuint(_265[1]));
+    _19.Store4(160, asuint(_265[2]));
+    _19.Store4(176, asuint(_265[3]));
+    float tmp;
+    float _271 = modf(asfloat(_19.Load(16)), tmp);
+    _19.Store(0, asuint(_271));
+    _19.Store(0, asuint(min(asfloat(_19.Load(16)), asfloat(_19.Load(20)))));
+    _19.Store(8, min(_19.Load(48), _19.Load(52)));
+    _19.Store(4, uint(min(int(_19.Load(32)), int(_19.Load(36)))));
+    _19.Store(0, asuint(max(asfloat(_19.Load(16)), asfloat(_19.Load(20)))));
+    _19.Store(8, max(_19.Load(48), _19.Load(52)));
+    _19.Store(4, uint(max(int(_19.Load(32)), int(_19.Load(36)))));
+    _19.Store(0, asuint(clamp(asfloat(_19.Load(16)), asfloat(_19.Load(20)), asfloat(_19.Load(24)))));
+    _19.Store(8, clamp(_19.Load(48), _19.Load(52), _19.Load(56)));
+    _19.Store(4, uint(clamp(int(_19.Load(32)), int(_19.Load(36)), int(_19.Load(40)))));
+    _19.Store(0, asuint(lerp(asfloat(_19.Load(16)), asfloat(_19.Load(20)), asfloat(_19.Load(24)))));
+    _19.Store(0, asuint(step(asfloat(_19.Load(16)), asfloat(_19.Load(20)))));
+    _19.Store(0, asuint(smoothstep(asfloat(_19.Load(16)), asfloat(_19.Load(20)), asfloat(_19.Load(24)))));
+    _19.Store(0, asuint(mad(asfloat(_19.Load(16)), asfloat(_19.Load(20)), asfloat(_19.Load(24)))));
+    ResType _371;
+    _371._m0 = frexp(asfloat(_19.Load(16)), _371._m1);
+    int itmp = _371._m1;
+    _19.Store(0, asuint(_371._m0));
+    _19.Store(0, asuint(ldexp(asfloat(_19.Load(16)), itmp)));
+    _19.Store(8, spvPackSnorm4x8(asfloat(_19.Load4(16))));
+    _19.Store(8, spvPackUnorm4x8(asfloat(_19.Load4(16))));
+    _19.Store(8, spvPackSnorm2x16(asfloat(_19.Load4(16)).xy));
+    _19.Store(8, spvPackUnorm2x16(asfloat(_19.Load4(16)).xy));
+    _19.Store(8, spvPackHalf2x16(asfloat(_19.Load4(16)).xy));
+    v2 = spvUnpackSnorm2x16(_19.Load(48));
+    v2 = spvUnpackUnorm2x16(_19.Load(48));
+    v2 = spvUnpackHalf2x16(_19.Load(48));
+    float4 v4 = spvUnpackSnorm4x8(_19.Load(48));
+    v4 = spvUnpackUnorm4x8(_19.Load(48));
+    _19.Store4(32, uint4(firstbitlow(int4(_19.Load4(32)))));
+    _19.Store4(32, uint4(int4(firstbitlow(_19.Load4(48)))));
+    _19.Store4(32, uint4(firstbithigh(int4(_19.Load4(32)))));
+    _19.Store4(32, uint4(int4(firstbithigh(_19.Load4(48)))));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/comp/illegal-struct-name.asm.comp b/reference/shaders-hlsl-no-opt/comp/illegal-struct-name.asm.comp
new file mode 100644
index 00000000000..dc972bdda87
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/comp/illegal-struct-name.asm.comp
@@ -0,0 +1,22 @@
+struct Foo
+{
+    float _abs;
+};
+
+RWByteAddressBuffer _7 : register(u0);
+
+void comp_main()
+{
+    Foo _24;
+    _24._abs = asfloat(_7.Load(0));
+    Foo f;
+    f._abs = _24._abs;
+    int _abs = 10;
+    _7.Store(4, asuint(f._abs));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/comp/intmin-literal.comp b/reference/shaders-hlsl-no-opt/comp/intmin-literal.comp
new file mode 100644
index 00000000000..9faa7fba7ba
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/comp/intmin-literal.comp
@@ -0,0 +1,19 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer _9 : register(u1);
+cbuffer UBO : register(b0)
+{
+    float _14_b : packoffset(c0);
+};
+
+
+void comp_main()
+{
+    _9.Store(0, asuint(asfloat(asint(_14_b) ^ int(0x80000000))));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/comp/subgroups-boolean.invalid.nofxc.sm60.comp b/reference/shaders-hlsl-no-opt/comp/subgroups-boolean.invalid.nofxc.sm60.comp
new file mode 100644
index 00000000000..ae7ce70c5e9
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/comp/subgroups-boolean.invalid.nofxc.sm60.comp
@@ -0,0 +1,28 @@
+static const uint3 gl_WorkGroupSize = uint3(30u, 1u, 1u);
+
+RWByteAddressBuffer _46 : register(u0, space0);
+
+static uint3 gl_GlobalInvocationID;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+};
+
+void comp_main()
+{
+    bool v = gl_GlobalInvocationID.x != 3u;
+    bool4 v4;
+    v4.x = bool(WaveActiveBitOr(uint(v)));
+    v4.y = bool(WaveActiveBitAnd(uint(v)));
+    v4.z = bool(WaveActiveBitXor(uint(v)));
+    v4.w = WaveActiveAllEqual(v);
+    uint4 w = uint4(v4);
+    _46.Store(gl_GlobalInvocationID.x * 4 + 0, ((w.x + w.y) + w.z) + w.w);
+}
+
+[numthreads(30, 1, 1)]
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp b/reference/shaders-hlsl-no-opt/comp/subgroups.invalid.nofxc.sm60.comp
similarity index 74%
rename from reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
rename to reference/shaders-hlsl-no-opt/comp/subgroups.invalid.nofxc.sm60.comp
index b87574f1a7a..4c11a4b1368 100644
--- a/reference/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
+++ b/reference/shaders-hlsl-no-opt/comp/subgroups.invalid.nofxc.sm60.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _9 : register(u0, space0);
 
 static uint4 gl_SubgroupEqMask;
@@ -19,9 +21,15 @@ void comp_main()
     float3 first = WaveReadLaneFirst(20.0f.xxx);
     uint4 ballot_value = WaveActiveBallot(true);
     uint bit_count = countbits(ballot_value.x) + countbits(ballot_value.y) + countbits(ballot_value.z) + countbits(ballot_value.w);
+    uint inclusive_bit_count = countbits(ballot_value.x & gl_SubgroupLeMask.x) + countbits(ballot_value.y & gl_SubgroupLeMask.y) + countbits(ballot_value.z & gl_SubgroupLeMask.z) + countbits(ballot_value.w & gl_SubgroupLeMask.w);
+    uint exclusive_bit_count = countbits(ballot_value.x & gl_SubgroupLtMask.x) + countbits(ballot_value.y & gl_SubgroupLtMask.y) + countbits(ballot_value.z & gl_SubgroupLtMask.z) + countbits(ballot_value.w & gl_SubgroupLtMask.w);
+    uint shuffled = WaveReadLaneAt(10u, 8u);
+    uint shuffled_xor = WaveReadLaneAt(30u, WaveGetLaneIndex() ^ 8u);
+    uint shuffled_up = WaveReadLaneAt(20u, WaveGetLaneIndex() - 4u);
+    uint shuffled_down = WaveReadLaneAt(20u, WaveGetLaneIndex() + 4u);
     bool has_all = WaveActiveAllTrue(true);
     bool has_any = WaveActiveAnyTrue(true);
-    bool has_equal = WaveActiveAllEqualBool(true);
+    bool has_equal = WaveActiveAllEqual(true);
     float4 added = WaveActiveSum(20.0f.xxxx);
     int4 iadded = WaveActiveSum(int4(20, 20, 20, 20));
     float4 multiplied = WaveActiveProduct(20.0f.xxxx);
@@ -35,6 +43,9 @@ void comp_main()
     uint4 anded = WaveActiveBitAnd(ballot_value);
     uint4 ored = WaveActiveBitOr(ballot_value);
     uint4 xored = WaveActiveBitXor(ballot_value);
+    bool4 anded_b = bool4(WaveActiveBitAnd(uint4(bool4(ballot_value.x == uint4(42u, 42u, 42u, 42u).x, ballot_value.y == uint4(42u, 42u, 42u, 42u).y, ballot_value.z == uint4(42u, 42u, 42u, 42u).z, ballot_value.w == uint4(42u, 42u, 42u, 42u).w))));
+    bool4 ored_b = bool4(WaveActiveBitOr(uint4(bool4(ballot_value.x == uint4(42u, 42u, 42u, 42u).x, ballot_value.y == uint4(42u, 42u, 42u, 42u).y, ballot_value.z == uint4(42u, 42u, 42u, 42u).z, ballot_value.w == uint4(42u, 42u, 42u, 42u).w))));
+    bool4 xored_b = bool4(WaveActiveBitXor(uint4(bool4(ballot_value.x == uint4(42u, 42u, 42u, 42u).x, ballot_value.y == uint4(42u, 42u, 42u, 42u).y, ballot_value.z == uint4(42u, 42u, 42u, 42u).z, ballot_value.w == uint4(42u, 42u, 42u, 42u).w))));
     added = WavePrefixSum(added) + added;
     iadded = WavePrefixSum(iadded) + iadded;
     multiplied = WavePrefixProduct(multiplied) * multiplied;
diff --git a/reference/shaders-hlsl-no-opt/comp/trivial-select-cast-vector.comp b/reference/shaders-hlsl-no-opt/comp/trivial-select-cast-vector.comp
new file mode 100644
index 00000000000..94aec455762
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/comp/trivial-select-cast-vector.comp
@@ -0,0 +1,15 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer _14 : register(u0);
+
+void comp_main()
+{
+    bool3 c = bool3(asfloat(_14.Load3(16)).x < 1.0f.xxx.x, asfloat(_14.Load3(16)).y < 1.0f.xxx.y, asfloat(_14.Load3(16)).z < 1.0f.xxx.z);
+    _14.Store3(0, asuint(float3(c.x ? float3(0.0f, 0.0f, 1.0f).x : float3(1.0f, 0.0f, 0.0f).x, c.y ? float3(0.0f, 0.0f, 1.0f).y : float3(1.0f, 0.0f, 0.0f).y, c.z ? float3(0.0f, 0.0f, 1.0f).z : float3(1.0f, 0.0f, 0.0f).z)));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/comp/trivial-select-matrix.spv14.comp b/reference/shaders-hlsl-no-opt/comp/trivial-select-matrix.spv14.comp
new file mode 100644
index 00000000000..7bd1c761697
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/comp/trivial-select-matrix.spv14.comp
@@ -0,0 +1,22 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer _14 : register(u0);
+
+void comp_main()
+{
+    bool c = asfloat(_14.Load(48)) < 1.0f;
+    float3x3 _29 = c ? float3x3(1.0f.xxx, 1.0f.xxx, 1.0f.xxx) : float3x3(0.0f.xxx, 0.0f.xxx, 0.0f.xxx);
+    _14.Store3(0, asuint(_29[0]));
+    _14.Store3(16, asuint(_29[1]));
+    _14.Store3(32, asuint(_29[2]));
+    float3x3 _37 = c ? float3x3(float3(1.0f, 0.0f, 0.0f), float3(0.0f, 1.0f, 0.0f), float3(0.0f, 0.0f, 1.0f)) : float3x3(0.0f.xxx, 0.0f.xxx, 0.0f.xxx);
+    _14.Store3(0, asuint(_37[0]));
+    _14.Store3(16, asuint(_37[1]));
+    _14.Store3(32, asuint(_37[2]));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/frag/cbuffer-packing-straddle.frag b/reference/shaders-hlsl-no-opt/frag/cbuffer-packing-straddle.frag
new file mode 100644
index 00000000000..2b8ec8119a7
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/frag/cbuffer-packing-straddle.frag
@@ -0,0 +1,56 @@
+cbuffer UBO : register(b0)
+{
+    float4 _18_a[2] : packoffset(c0);
+    float4 _18_b : packoffset(c2);
+    float4 _18_c : packoffset(c3);
+    row_major float4x4 _18_d : packoffset(c4);
+    float _18_e : packoffset(c8);
+    float2 _18_f : packoffset(c8.z);
+    float _18_g : packoffset(c9);
+    float2 _18_h : packoffset(c9.z);
+    float _18_i : packoffset(c10);
+    float2 _18_j : packoffset(c10.z);
+    float _18_k : packoffset(c11);
+    float2 _18_l : packoffset(c11.z);
+    float _18_m : packoffset(c12);
+    float _18_n : packoffset(c12.y);
+    float _18_o : packoffset(c12.z);
+    float4 _18_p : packoffset(c13);
+    float4 _18_q : packoffset(c14);
+    float3 _18_r : packoffset(c15);
+    float4 _18_s : packoffset(c16);
+    float4 _18_t : packoffset(c17);
+    float4 _18_u : packoffset(c18);
+    float _18_v : packoffset(c19);
+    float _18_w : packoffset(c19.y);
+    float _18_x : packoffset(c19.z);
+    float _18_y : packoffset(c19.w);
+    float _18_z : packoffset(c20);
+    float _18_aa : packoffset(c20.y);
+    float _18_ab : packoffset(c20.z);
+    float _18_ac : packoffset(c20.w);
+    float _18_ad : packoffset(c21);
+    float _18_ae : packoffset(c21.y);
+    float4 _18_ef : packoffset(c22);
+};
+
+
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = _18_a[1];
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag b/reference/shaders-hlsl-no-opt/frag/constant-buffer-array.invalid.sm51.frag
similarity index 100%
rename from reference/opt/shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag
rename to reference/shaders-hlsl-no-opt/frag/constant-buffer-array.invalid.sm51.frag
diff --git a/reference/shaders-hlsl/frag/fp16.invalid.desktop.frag b/reference/shaders-hlsl-no-opt/frag/fp16.invalid.desktop.frag
similarity index 93%
rename from reference/shaders-hlsl/frag/fp16.invalid.desktop.frag
rename to reference/shaders-hlsl-no-opt/frag/fp16.invalid.desktop.frag
index e10d6724e00..f0ed32f342d 100644
--- a/reference/shaders-hlsl/frag/fp16.invalid.desktop.frag
+++ b/reference/shaders-hlsl-no-opt/frag/fp16.invalid.desktop.frag
@@ -43,13 +43,13 @@ float4 mod(float4 x, float4 y)
     return x - y * floor(x / y);
 }
 
-uint SPIRV_Cross_packFloat2x16(min16float2 value)
+uint spvPackFloat2x16(min16float2 value)
 {
     uint2 Packed = f32tof16(value);
     return Packed.x | (Packed.y << 16);
 }
 
-min16float2 SPIRV_Cross_unpackFloat2x16(uint value)
+min16float2 spvUnpackFloat2x16(uint value)
 {
     return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));
 }
@@ -128,9 +128,9 @@ void test_builtins()
     bool4 btmp = isnan(v4);
     btmp = isinf(v4);
     res = mad(v4, v4, v4);
-    uint pack0 = SPIRV_Cross_packFloat2x16(v4.xy);
-    uint pack1 = SPIRV_Cross_packFloat2x16(v4.zw);
-    res = min16float4(SPIRV_Cross_unpackFloat2x16(pack0), SPIRV_Cross_unpackFloat2x16(pack1));
+    uint pack0 = spvPackFloat2x16(v4.xy);
+    uint pack1 = spvPackFloat2x16(v4.zw);
+    res = min16float4(spvUnpackFloat2x16(pack0), spvUnpackFloat2x16(pack1));
     min16float t0 = length(v4);
     t0 = distance(v4, v4);
     t0 = dot(v4, v4);
diff --git a/reference/shaders-hlsl-no-opt/frag/frag-coord.frag b/reference/shaders-hlsl-no-opt/frag/frag-coord.frag
new file mode 100644
index 00000000000..17cb4c4b741
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/frag/frag-coord.frag
@@ -0,0 +1,27 @@
+static float4 gl_FragCoord;
+static float3 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+struct SPIRV_Cross_Output
+{
+    float3 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = gl_FragCoord.xyz / gl_FragCoord.w.xxx;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/frag/helper-invocation.fxconly.nofxc.frag b/reference/shaders-hlsl-no-opt/frag/helper-invocation.fxconly.nofxc.frag
new file mode 100644
index 00000000000..ad61b21ea38
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/frag/helper-invocation.fxconly.nofxc.frag
@@ -0,0 +1,26 @@
+static float FragColor;
+
+struct SPIRV_Cross_Input
+{
+};
+
+struct SPIRV_Cross_Output
+{
+    float FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = float(IsHelperLane());
+    discard;
+    bool _16 = IsHelperLane();
+    FragColor = float(_16);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/frag/native-16bit-types.fxconly.nofxc.sm62.native-16bit.frag b/reference/shaders-hlsl-no-opt/frag/native-16bit-types.fxconly.nofxc.sm62.native-16bit.frag
new file mode 100644
index 00000000000..020831d0a05
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/frag/native-16bit-types.fxconly.nofxc.sm62.native-16bit.frag
@@ -0,0 +1,79 @@
+RWByteAddressBuffer _62 : register(u0, space0);
+
+static float4 gl_FragCoord;
+static half4 Output;
+static half4 Input;
+static int16_t4 OutputI;
+static int16_t4 InputI;
+static uint16_t4 OutputU;
+static uint16_t4 InputU;
+
+struct SPIRV_Cross_Input
+{
+    half4 Input : TEXCOORD0;
+    nointerpolation int16_t4 InputI : TEXCOORD1;
+    nointerpolation uint16_t4 InputU : TEXCOORD2;
+    float4 gl_FragCoord : SV_Position;
+};
+
+struct SPIRV_Cross_Output
+{
+    half4 Output : SV_Target0;
+    int16_t4 OutputI : SV_Target1;
+    uint16_t4 OutputU : SV_Target2;
+};
+
+void frag_main()
+{
+    int index = int(gl_FragCoord.x);
+    Output = Input + half(20.0).xxxx;
+    OutputI = InputI + int16_t4(int16_t(-40), int16_t(-40), int16_t(-40), int16_t(-40));
+    OutputU = InputU + uint16_t4(20u, 20u, 20u, 20u);
+    Output += _62.Load<half>(index * 2 + 0).xxxx;
+    OutputI += _62.Load<int16_t>(index * 2 + 8).xxxx;
+    OutputU += _62.Load<uint16_t>(index * 2 + 16).xxxx;
+    Output += _62.Load<half4>(index * 8 + 24);
+    OutputI += _62.Load<int16_t4>(index * 8 + 56);
+    OutputU += _62.Load<uint16_t4>(index * 8 + 88);
+    Output += _62.Load<half3>(index * 16 + 128).xyzz;
+    Output += half3(_62.Load<half>(index * 12 + 186), _62.Load<half>(index * 12 + 190), _62.Load<half>(index * 12 + 194)).xyzz;
+    half2x3 _128 = half2x3(_62.Load<half3>(index * 16 + 120), _62.Load<half3>(index * 16 + 128));
+    half2x3 m0 = _128;
+    half2x3 _132 = half2x3(_62.Load<half>(index * 12 + 184), _62.Load<half>(index * 12 + 188), _62.Load<half>(index * 12 + 192), _62.Load<half>(index * 12 + 186), _62.Load<half>(index * 12 + 190), _62.Load<half>(index * 12 + 194));
+    half2x3 m1 = _132;
+    _62.Store<half>(index * 2 + 0, Output.x);
+    _62.Store<int16_t>(index * 2 + 8, OutputI.y);
+    _62.Store<uint16_t>(index * 2 + 16, OutputU.z);
+    _62.Store<half4>(index * 8 + 24, Output);
+    _62.Store<int16_t4>(index * 8 + 56, OutputI);
+    _62.Store<uint16_t4>(index * 8 + 88, OutputU);
+    _62.Store<half3>(index * 16 + 128, Output.xyz);
+    _62.Store<half>(index * 12 + 186, Output.x);
+    _62.Store<half>(index * 12 + 190, Output.xyz.y);
+    _62.Store<half>(index * 12 + 194, Output.xyz.z);
+    half2x3 _182 = half2x3(half3(Output.xyz), half3(Output.wzy));
+    _62.Store<half3>(index * 16 + 120, _182[0]);
+    _62.Store<half3>(index * 16 + 128, _182[1]);
+    half2x3 _197 = half2x3(half3(Output.xyz), half3(Output.wzy));
+    _62.Store<half>(index * 12 + 184, _197[0].x);
+    _62.Store<half>(index * 12 + 186, _197[1].x);
+    _62.Store<half>(index * 12 + 188, _197[0].y);
+    _62.Store<half>(index * 12 + 190, _197[1].y);
+    _62.Store<half>(index * 12 + 192, _197[0].z);
+    _62.Store<half>(index * 12 + 194, _197[1].z);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
+    Input = stage_input.Input;
+    InputI = stage_input.InputI;
+    InputU = stage_input.InputU;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.Output = Output;
+    stage_output.OutputI = OutputI;
+    stage_output.OutputU = OutputU;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag b/reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag
new file mode 100644
index 00000000000..ca9a116fe8b
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag
@@ -0,0 +1,32 @@
+Texture2D<float4> uTex[] : register(t0, space0);
+SamplerState Immut : register(s0, space1);
+
+static float4 FragColor;
+static int vIndex;
+static float2 vUV;
+
+struct SPIRV_Cross_Input
+{
+    float2 vUV : TEXCOORD0;
+    nointerpolation int vIndex : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = uTex[NonUniformResourceIndex(vIndex)].Sample(Immut, vUV);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vIndex = stage_input.vIndex;
+    vUV = stage_input.vUV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag b/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag
new file mode 100644
index 00000000000..aace6f58ba1
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag
@@ -0,0 +1,33 @@
+RasterizerOrderedByteAddressBuffer _14 : register(u1, space0);
+RasterizerOrderedByteAddressBuffer _35 : register(u0, space0);
+
+static float4 gl_FragCoord;
+struct SPIRV_Cross_Input
+{
+    float4 gl_FragCoord : SV_Position;
+};
+
+void callee2()
+{
+    int _25 = int(gl_FragCoord.x);
+    _14.Store(_25 * 4 + 0, _14.Load(_25 * 4 + 0) + 1u);
+}
+
+void callee()
+{
+    int _38 = int(gl_FragCoord.x);
+    _35.Store(_38 * 4 + 0, _35.Load(_38 * 4 + 0) + 1u);
+    callee2();
+}
+
+void frag_main()
+{
+    callee();
+}
+
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
+    frag_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/frag/texture-gather-uint-component.asm.frag b/reference/shaders-hlsl-no-opt/frag/texture-gather-uint-component.asm.frag
new file mode 100644
index 00000000000..a0f078a87cc
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/frag/texture-gather-uint-component.asm.frag
@@ -0,0 +1,29 @@
+Texture2D<float4> uSamp : register(t0);
+SamplerState _uSamp_sampler : register(s0);
+
+static float4 FragColor;
+static float2 vUV;
+
+struct SPIRV_Cross_Input
+{
+    float2 vUV : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = uSamp.GatherGreen(_uSamp_sampler, vUV);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vUV = stage_input.vUV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/frag/ubo-offset-out-of-order.frag b/reference/shaders-hlsl-no-opt/frag/ubo-offset-out-of-order.frag
new file mode 100644
index 00000000000..4ea3e0dfbe0
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/frag/ubo-offset-out-of-order.frag
@@ -0,0 +1,33 @@
+cbuffer UBO : register(b0)
+{
+    row_major float4x4 _13_m : packoffset(c1);
+    float4 _13_v : packoffset(c0);
+};
+
+
+static float4 FragColor;
+static float4 vColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 vColor : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = mul(vColor, _13_m) + _13_v;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vColor = stage_input.vColor;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/frag/variables.zero-initialize.frag b/reference/shaders-hlsl-no-opt/frag/variables.zero-initialize.frag
new file mode 100644
index 00000000000..7fe47df036d
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/frag/variables.zero-initialize.frag
@@ -0,0 +1,45 @@
+struct Foo
+{
+    int a;
+};
+
+static float4 vColor;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 vColor : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+static int uninit_int = 0;
+static int4 uninit_vector = int4(0, 0, 0, 0);
+static float4x4 uninit_matrix = float4x4(0.0f.xxxx, 0.0f.xxxx, 0.0f.xxxx, 0.0f.xxxx);
+static Foo uninit_foo = { 0 };
+
+void frag_main()
+{
+    int uninit_function_int = 0;
+    if (vColor.x > 10.0f)
+    {
+        uninit_function_int = 10;
+    }
+    else
+    {
+        uninit_function_int = 20;
+    }
+    FragColor = vColor;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vColor = stage_input.vColor;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/frag/volatile-helper-invocation.fxconly.nofxc.spv16.frag b/reference/shaders-hlsl-no-opt/frag/volatile-helper-invocation.fxconly.nofxc.spv16.frag
new file mode 100644
index 00000000000..1311c863452
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/frag/volatile-helper-invocation.fxconly.nofxc.spv16.frag
@@ -0,0 +1,29 @@
+static float FragColor;
+
+struct SPIRV_Cross_Input
+{
+};
+
+struct SPIRV_Cross_Output
+{
+    float FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    bool _12 = IsHelperLane();
+    float _15 = float(_12);
+    FragColor = _15;
+    discard;
+    bool _16 = IsHelperLane();
+    float _17 = float(_16);
+    FragColor = _17;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/vert/base-instance.vert b/reference/shaders-hlsl-no-opt/vert/base-instance.vert
new file mode 100644
index 00000000000..de31f2c6c0f
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/vert/base-instance.vert
@@ -0,0 +1,30 @@
+static float4 gl_Position;
+static int gl_BaseInstanceARB;
+cbuffer SPIRV_Cross_VertexInfo
+{
+    int SPIRV_Cross_BaseVertex;
+    int SPIRV_Cross_BaseInstance;
+};
+
+struct SPIRV_Cross_Input
+{
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = float(gl_BaseInstanceARB).xxxx;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_BaseInstanceARB = SPIRV_Cross_BaseInstance;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/vert/base-vertex.vert b/reference/shaders-hlsl-no-opt/vert/base-vertex.vert
new file mode 100644
index 00000000000..6b9b62bbbb9
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/vert/base-vertex.vert
@@ -0,0 +1,30 @@
+static float4 gl_Position;
+static int gl_BaseVertexARB;
+cbuffer SPIRV_Cross_VertexInfo
+{
+    int SPIRV_Cross_BaseVertex;
+    int SPIRV_Cross_BaseInstance;
+};
+
+struct SPIRV_Cross_Input
+{
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = float(gl_BaseVertexARB).xxxx;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_BaseVertexARB = SPIRV_Cross_BaseVertex;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/vert/block-io-auto-location-assignment.vert b/reference/shaders-hlsl-no-opt/vert/block-io-auto-location-assignment.vert
new file mode 100644
index 00000000000..ea3bdc15644
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/vert/block-io-auto-location-assignment.vert
@@ -0,0 +1,50 @@
+struct Bar
+{
+    float v[2];
+    float w;
+};
+
+struct V
+{
+    float a;
+    float b[2];
+    Bar c[2];
+    Bar d;
+};
+
+static V _14;
+
+struct SPIRV_Cross_Output
+{
+    float V_a : TEXCOORD0;
+    float V_b[2] : TEXCOORD1;
+    Bar V_c[2] : TEXCOORD3;
+    Bar V_d : TEXCOORD9;
+};
+
+void vert_main()
+{
+    _14.a = 1.0f;
+    _14.b[0] = 2.0f;
+    _14.b[1] = 3.0f;
+    _14.c[0].v[0] = 4.0f;
+    _14.c[0].v[1] = 5.0f;
+    _14.c[0].w = 6.0f;
+    _14.c[1].v[0] = 7.0f;
+    _14.c[1].v[1] = 8.0f;
+    _14.c[1].w = 9.0f;
+    _14.d.v[0] = 10.0f;
+    _14.d.v[1] = 11.0f;
+    _14.d.w = 12.0f;
+}
+
+SPIRV_Cross_Output main()
+{
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.V_a = _14.a;
+    stage_output.V_b = _14.b;
+    stage_output.V_c = _14.c;
+    stage_output.V_d = _14.d;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert b/reference/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert
new file mode 100644
index 00000000000..103ff46a3fe
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert
@@ -0,0 +1,8 @@
+void vert_main()
+{
+}
+
+void main()
+{
+    vert_main();
+}
diff --git a/reference/shaders-hlsl-no-opt/vert/flatten-matrix-input.flatten-matrix-vertex-input.vert b/reference/shaders-hlsl-no-opt/vert/flatten-matrix-input.flatten-matrix-vertex-input.vert
new file mode 100644
index 00000000000..d76b24fca3c
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/vert/flatten-matrix-input.flatten-matrix-vertex-input.vert
@@ -0,0 +1,56 @@
+static float4 gl_Position;
+static float4x4 m4;
+static float4 v;
+static float3x3 m3;
+static float2x2 m2;
+
+struct SPIRV_Cross_Input
+{
+    float4 m4_0 : TEXCOORD0;
+    float4 m4_1 : TEXCOORD1;
+    float4 m4_2 : TEXCOORD2;
+    float4 m4_3 : TEXCOORD3;
+    float3 m3_0 : TEXCOORD4;
+    float3 m3_1 : TEXCOORD5;
+    float3 m3_2 : TEXCOORD6;
+    float2 m2_0 : TEXCOORD7;
+    float2 m2_1 : TEXCOORD8;
+    float4 v : TEXCOORD9;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = mul(v, m4);
+    float4 _35 = gl_Position;
+    float3 _37 = _35.xyz + mul(v.xyz, m3);
+    gl_Position.x = _37.x;
+    gl_Position.y = _37.y;
+    gl_Position.z = _37.z;
+    float4 _56 = gl_Position;
+    float2 _58 = _56.xy + mul(v.xy, m2);
+    gl_Position.x = _58.x;
+    gl_Position.y = _58.y;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    m4[0] = stage_input.m4_0;
+    m4[1] = stage_input.m4_1;
+    m4[2] = stage_input.m4_2;
+    m4[3] = stage_input.m4_3;
+    v = stage_input.v;
+    m3[0] = stage_input.m3_0;
+    m3[1] = stage_input.m3_1;
+    m3[2] = stage_input.m3_2;
+    m2[0] = stage_input.m2_0;
+    m2[1] = stage_input.m2_1;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp b/reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000000..da499c3b6da
--- /dev/null
+++ b/reference/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,20 @@
+RWByteAddressBuffer _5 : register(u0);
+RWByteAddressBuffer _6 : register(u1);
+
+void comp_main()
+{
+    _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) < int4(_5.Load4(0)).x, int(_5.Load4(16).y) < int4(_5.Load4(0)).y, int(_5.Load4(16).z) < int4(_5.Load4(0)).z, int(_5.Load4(16).w) < int4(_5.Load4(0)).w)));
+    _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) <= int4(_5.Load4(0)).x, int(_5.Load4(16).y) <= int4(_5.Load4(0)).y, int(_5.Load4(16).z) <= int4(_5.Load4(0)).z, int(_5.Load4(16).w) <= int4(_5.Load4(0)).w)));
+    _6.Store4(0, uint4(bool4(_5.Load4(16).x < uint(int4(_5.Load4(0)).x), _5.Load4(16).y < uint(int4(_5.Load4(0)).y), _5.Load4(16).z < uint(int4(_5.Load4(0)).z), _5.Load4(16).w < uint(int4(_5.Load4(0)).w))));
+    _6.Store4(0, uint4(bool4(_5.Load4(16).x <= uint(int4(_5.Load4(0)).x), _5.Load4(16).y <= uint(int4(_5.Load4(0)).y), _5.Load4(16).z <= uint(int4(_5.Load4(0)).z), _5.Load4(16).w <= uint(int4(_5.Load4(0)).w))));
+    _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) > int4(_5.Load4(0)).x, int(_5.Load4(16).y) > int4(_5.Load4(0)).y, int(_5.Load4(16).z) > int4(_5.Load4(0)).z, int(_5.Load4(16).w) > int4(_5.Load4(0)).w)));
+    _6.Store4(0, uint4(bool4(int(_5.Load4(16).x) >= int4(_5.Load4(0)).x, int(_5.Load4(16).y) >= int4(_5.Load4(0)).y, int(_5.Load4(16).z) >= int4(_5.Load4(0)).z, int(_5.Load4(16).w) >= int4(_5.Load4(0)).w)));
+    _6.Store4(0, uint4(bool4(_5.Load4(16).x > uint(int4(_5.Load4(0)).x), _5.Load4(16).y > uint(int4(_5.Load4(0)).y), _5.Load4(16).z > uint(int4(_5.Load4(0)).z), _5.Load4(16).w > uint(int4(_5.Load4(0)).w))));
+    _6.Store4(0, uint4(bool4(_5.Load4(16).x >= uint(int4(_5.Load4(0)).x), _5.Load4(16).y >= uint(int4(_5.Load4(0)).y), _5.Load4(16).z >= uint(int4(_5.Load4(0)).z), _5.Load4(16).w >= uint(int4(_5.Load4(0)).w))));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp b/reference/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp
index a12274c01c6..e184e03c5c1 100644
--- a/reference/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp
+++ b/reference/shaders-hlsl/asm/comp/block-name-alias-global.asm.comp
@@ -4,22 +4,16 @@ struct A
     int b;
 };
 
-struct A_1
-{
-    int a;
-    int b;
-};
-
 RWByteAddressBuffer C1 : register(u1);
 cbuffer C2 : register(b2)
 {
-    A_1 C2_1_Data[1024] : packoffset(c0);
+    A C2_1_Data[1024] : packoffset(c0);
 };
 
 RWByteAddressBuffer C3 : register(u0);
 cbuffer B : register(b3)
 {
-    A_1 C4_Data[1024] : packoffset(c0);
+    A C4_Data[1024] : packoffset(c0);
 };
 
 
diff --git a/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000000..88f53a4c182
--- /dev/null
+++ b/reference/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,27 @@
+RWByteAddressBuffer _4 : register(u0);
+
+void comp_main()
+{
+    _4.Store(0, asuint(min(asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+    _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    _4.Store3(16, asuint(min(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+    _4.Store4(32, asuint(min(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+    _4.Store(0, asuint(max(asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+    _4.Store2(8, asuint(max(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    _4.Store3(16, asuint(max(asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+    _4.Store4(32, asuint(max(asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+    _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(48)), asfloat(_4.Load(96)))));
+    _4.Store2(8, asuint(clamp(asfloat(_4.Load2(8)), asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    _4.Store3(16, asuint(clamp(asfloat(_4.Load3(16)), asfloat(_4.Load3(64)), asfloat(_4.Load3(112)))));
+    _4.Store4(32, asuint(clamp(asfloat(_4.Load4(32)), asfloat(_4.Load4(80)), asfloat(_4.Load4(128)))));
+    for (int i = 0; i < 2; i++, _4.Store(0, asuint(clamp(asfloat(_4.Load(0)), asfloat(_4.Load(56)), asfloat(_4.Load(60))))))
+    {
+        _4.Store2(8, asuint(min(asfloat(_4.Load2(56)), asfloat(_4.Load2(104)))));
+    }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp b/reference/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp
deleted file mode 100644
index c567fbaf14c..00000000000
--- a/reference/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef SPIRV_CROSS_CONSTANT_ID_0
-#define SPIRV_CROSS_CONSTANT_ID_0 1u
-#endif
-static const uint _3 = SPIRV_CROSS_CONSTANT_ID_0;
-#ifndef SPIRV_CROSS_CONSTANT_ID_2
-#define SPIRV_CROSS_CONSTANT_ID_2 3u
-#endif
-static const uint _4 = SPIRV_CROSS_CONSTANT_ID_2;
-static const uint3 gl_WorkGroupSize = uint3(_3, 2u, _4);
-
-RWByteAddressBuffer _8 : register(u0);
-RWByteAddressBuffer _9 : register(u1);
-
-static uint3 gl_WorkGroupID;
-struct SPIRV_Cross_Input
-{
-    uint3 gl_WorkGroupID : SV_GroupID;
-};
-
-static uint3 _22 = gl_WorkGroupSize;
-
-void comp_main()
-{
-    _8.Store(gl_WorkGroupID.x * 4 + 0, asuint(asfloat(_9.Load(gl_WorkGroupID.x * 4 + 0)) + asfloat(_8.Load(gl_WorkGroupID.x * 4 + 0))));
-}
-
-[numthreads(SPIRV_CROSS_CONSTANT_ID_0, 2, SPIRV_CROSS_CONSTANT_ID_2)]
-void main(SPIRV_Cross_Input stage_input)
-{
-    gl_WorkGroupID = stage_input.gl_WorkGroupID;
-    comp_main();
-}
diff --git a/reference/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag b/reference/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
index ed53720d946..2527d10fdc8 100644
--- a/reference/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
+++ b/reference/shaders-hlsl/asm/frag/image-extract-reuse.asm.frag
@@ -8,7 +8,7 @@ struct SPIRV_Cross_Output
     int2 Size : SV_Target0;
 };
 
-uint2 SPIRV_Cross_textureSize(Texture2D<float4> Tex, uint Level, out uint Param)
+uint2 spvTextureSize(Texture2D<float4> Tex, uint Level, out uint Param)
 {
     uint2 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, Param);
@@ -19,7 +19,7 @@ void frag_main()
 {
     uint _19_dummy_parameter;
     uint _20_dummy_parameter;
-    Size = int2(SPIRV_Cross_textureSize(uTexture, uint(0), _19_dummy_parameter)) + int2(SPIRV_Cross_textureSize(uTexture, uint(1), _20_dummy_parameter));
+    Size = int2(spvTextureSize(uTexture, uint(0), _19_dummy_parameter)) + int2(spvTextureSize(uTexture, uint(1), _20_dummy_parameter));
 }
 
 SPIRV_Cross_Output main()
diff --git a/reference/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag b/reference/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag
index d20cf995acf..25dc6939e5c 100644
--- a/reference/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag
+++ b/reference/shaders-hlsl/asm/frag/inf-nan-constant.asm.frag
@@ -7,7 +7,7 @@ struct SPIRV_Cross_Output
 
 void frag_main()
 {
-    FragColor = float3(asfloat(0x7f800000u), asfloat(0xff800000u), asfloat(0x7fc00000u));
+    FragColor = float3(asfloat(0x7f800000u /* inf */), asfloat(0xff800000u /* -inf */), asfloat(0x7fc00000u /* nan */));
 }
 
 SPIRV_Cross_Output main()
diff --git a/reference/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag b/reference/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag
new file mode 100644
index 00000000000..88fad10ff19
--- /dev/null
+++ b/reference/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag
@@ -0,0 +1,34 @@
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+uint64_t spvPackUint2x32(uint2 value)
+{
+    return (uint64_t(value.y) << 32) | uint64_t(value.x);
+}
+
+uint2 spvUnpackUint2x32(uint64_t value)
+{
+    uint2 Unpacked;
+    Unpacked.x = uint(value & 0xffffffff);
+    Unpacked.y = uint(value >> 32);
+    return Unpacked;
+}
+
+void frag_main()
+{
+    uint64_t _packed = spvPackUint2x32(uint2(18u, 52u));
+    uint2 unpacked = spvUnpackUint2x32(_packed);
+    FragColor = float4(float(unpacked.x), float(unpacked.y), 1.0f, 1.0f);
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag b/reference/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag
index 281c397608f..9c71d08c4f2 100644
--- a/reference/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag
+++ b/reference/shaders-hlsl/asm/frag/single-function-private-lut.asm.frag
@@ -58,6 +58,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.o_color = o_color;
diff --git a/reference/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag b/reference/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
index 695d5fe9dfd..74c12945bfc 100644
--- a/reference/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
+++ b/reference/shaders-hlsl/asm/frag/texel-fetch-no-lod.asm.frag
@@ -22,6 +22,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.FragColor = FragColor;
diff --git a/reference/shaders-hlsl/asm/frag/unreachable.asm.frag b/reference/shaders-hlsl/asm/frag/unreachable.asm.frag
index c2fa519df8f..5eb70adf81c 100644
--- a/reference/shaders-hlsl/asm/frag/unreachable.asm.frag
+++ b/reference/shaders-hlsl/asm/frag/unreachable.asm.frag
@@ -1,3 +1,5 @@
+static float4 _21;
+
 static int counter;
 static float4 FragColor;
 
@@ -11,8 +13,6 @@ struct SPIRV_Cross_Output
     float4 FragColor : SV_Target0;
 };
 
-float4 _21;
-
 void frag_main()
 {
     float4 _24;
diff --git a/reference/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert b/reference/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
index 84b91b69bc4..2a332551f37 100644
--- a/reference/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
+++ b/reference/shaders-hlsl/asm/vert/spec-constant-op-composite.asm.vert
@@ -8,10 +8,7 @@ static const int _20 = (_7 + 2);
 #endif
 static const uint _8 = SPIRV_CROSS_CONSTANT_ID_202;
 static const uint _25 = (_8 % 5u);
-#ifndef SPIRV_CROSS_CONSTANT_ID_0
-#define SPIRV_CROSS_CONSTANT_ID_0 int4(20, 30, _20, _20)
-#endif
-static const int4 _30 = SPIRV_CROSS_CONSTANT_ID_0;
+static const int4 _30 = int4(20, 30, _20, _20);
 static const int2 _32 = int2(_30.y, _30.x);
 static const int _33 = _30.y;
 #ifndef SPIRV_CROSS_CONSTANT_ID_200
diff --git a/reference/shaders-hlsl/comp/access-chain-load-composite.comp b/reference/shaders-hlsl/comp/access-chain-load-composite.comp
new file mode 100644
index 00000000000..1c4016008bd
--- /dev/null
+++ b/reference/shaders-hlsl/comp/access-chain-load-composite.comp
@@ -0,0 +1,164 @@
+struct Baz
+{
+    float c;
+};
+
+struct Bar
+{
+    float d[2][4];
+    Baz baz[2];
+};
+
+struct Foo
+{
+    column_major float2x2 a;
+    float2 b;
+    Bar c[5];
+};
+
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer _31 : register(u0);
+
+void comp_main()
+{
+    Foo _36;
+    _36.a = asfloat(uint2x2(_31.Load(0), _31.Load(8), _31.Load(4), _31.Load(12)));
+    _36.b = asfloat(_31.Load2(16));
+    [unroll]
+    for (int _4ident = 0; _4ident < 5; _4ident++)
+    {
+        [unroll]
+        for (int _5ident = 0; _5ident < 2; _5ident++)
+        {
+            [unroll]
+            for (int _6ident = 0; _6ident < 4; _6ident++)
+            {
+                _36.c[_4ident].d[_5ident][_6ident] = asfloat(_31.Load(_6ident * 4 + _5ident * 16 + _4ident * 40 + 24));
+            }
+        }
+        [unroll]
+        for (int _7ident = 0; _7ident < 2; _7ident++)
+        {
+            _36.c[_4ident].baz[_7ident].c = asfloat(_31.Load(_7ident * 4 + _4ident * 40 + 56));
+        }
+    }
+    Foo f;
+    f.a = _36.a;
+    f.b = _36.b;
+    f.c[0].d[0][0] = _36.c[0].d[0][0];
+    f.c[0].d[0][1] = _36.c[0].d[0][1];
+    f.c[0].d[0][2] = _36.c[0].d[0][2];
+    f.c[0].d[0][3] = _36.c[0].d[0][3];
+    f.c[0].d[1][0] = _36.c[0].d[1][0];
+    f.c[0].d[1][1] = _36.c[0].d[1][1];
+    f.c[0].d[1][2] = _36.c[0].d[1][2];
+    f.c[0].d[1][3] = _36.c[0].d[1][3];
+    f.c[0].baz[0].c = _36.c[0].baz[0].c;
+    f.c[0].baz[1].c = _36.c[0].baz[1].c;
+    f.c[1].d[0][0] = _36.c[1].d[0][0];
+    f.c[1].d[0][1] = _36.c[1].d[0][1];
+    f.c[1].d[0][2] = _36.c[1].d[0][2];
+    f.c[1].d[0][3] = _36.c[1].d[0][3];
+    f.c[1].d[1][0] = _36.c[1].d[1][0];
+    f.c[1].d[1][1] = _36.c[1].d[1][1];
+    f.c[1].d[1][2] = _36.c[1].d[1][2];
+    f.c[1].d[1][3] = _36.c[1].d[1][3];
+    f.c[1].baz[0].c = _36.c[1].baz[0].c;
+    f.c[1].baz[1].c = _36.c[1].baz[1].c;
+    f.c[2].d[0][0] = _36.c[2].d[0][0];
+    f.c[2].d[0][1] = _36.c[2].d[0][1];
+    f.c[2].d[0][2] = _36.c[2].d[0][2];
+    f.c[2].d[0][3] = _36.c[2].d[0][3];
+    f.c[2].d[1][0] = _36.c[2].d[1][0];
+    f.c[2].d[1][1] = _36.c[2].d[1][1];
+    f.c[2].d[1][2] = _36.c[2].d[1][2];
+    f.c[2].d[1][3] = _36.c[2].d[1][3];
+    f.c[2].baz[0].c = _36.c[2].baz[0].c;
+    f.c[2].baz[1].c = _36.c[2].baz[1].c;
+    f.c[3].d[0][0] = _36.c[3].d[0][0];
+    f.c[3].d[0][1] = _36.c[3].d[0][1];
+    f.c[3].d[0][2] = _36.c[3].d[0][2];
+    f.c[3].d[0][3] = _36.c[3].d[0][3];
+    f.c[3].d[1][0] = _36.c[3].d[1][0];
+    f.c[3].d[1][1] = _36.c[3].d[1][1];
+    f.c[3].d[1][2] = _36.c[3].d[1][2];
+    f.c[3].d[1][3] = _36.c[3].d[1][3];
+    f.c[3].baz[0].c = _36.c[3].baz[0].c;
+    f.c[3].baz[1].c = _36.c[3].baz[1].c;
+    f.c[4].d[0][0] = _36.c[4].d[0][0];
+    f.c[4].d[0][1] = _36.c[4].d[0][1];
+    f.c[4].d[0][2] = _36.c[4].d[0][2];
+    f.c[4].d[0][3] = _36.c[4].d[0][3];
+    f.c[4].d[1][0] = _36.c[4].d[1][0];
+    f.c[4].d[1][1] = _36.c[4].d[1][1];
+    f.c[4].d[1][2] = _36.c[4].d[1][2];
+    f.c[4].d[1][3] = _36.c[4].d[1][3];
+    f.c[4].baz[0].c = _36.c[4].baz[0].c;
+    f.c[4].baz[1].c = _36.c[4].baz[1].c;
+    float2 _229 = 1.0f.xx;
+    f.a = float2x2(f.a[0] + _229, f.a[1] + _229);
+    f.b += 2.0f.xx;
+    f.c[3].d[1][1] += 5.0f;
+    _31.Store(224, asuint(f.a[0].x));
+    _31.Store(228, asuint(f.a[1].x));
+    _31.Store(232, asuint(f.a[0].y));
+    _31.Store(236, asuint(f.a[1].y));
+    _31.Store2(240, asuint(f.b));
+    _31.Store(248, asuint(f.c[0].d[0][0]));
+    _31.Store(252, asuint(f.c[0].d[0][1]));
+    _31.Store(256, asuint(f.c[0].d[0][2]));
+    _31.Store(260, asuint(f.c[0].d[0][3]));
+    _31.Store(264, asuint(f.c[0].d[1][0]));
+    _31.Store(268, asuint(f.c[0].d[1][1]));
+    _31.Store(272, asuint(f.c[0].d[1][2]));
+    _31.Store(276, asuint(f.c[0].d[1][3]));
+    _31.Store(280, asuint(f.c[0].baz[0].c));
+    _31.Store(284, asuint(f.c[0].baz[1].c));
+    _31.Store(288, asuint(f.c[1].d[0][0]));
+    _31.Store(292, asuint(f.c[1].d[0][1]));
+    _31.Store(296, asuint(f.c[1].d[0][2]));
+    _31.Store(300, asuint(f.c[1].d[0][3]));
+    _31.Store(304, asuint(f.c[1].d[1][0]));
+    _31.Store(308, asuint(f.c[1].d[1][1]));
+    _31.Store(312, asuint(f.c[1].d[1][2]));
+    _31.Store(316, asuint(f.c[1].d[1][3]));
+    _31.Store(320, asuint(f.c[1].baz[0].c));
+    _31.Store(324, asuint(f.c[1].baz[1].c));
+    _31.Store(328, asuint(f.c[2].d[0][0]));
+    _31.Store(332, asuint(f.c[2].d[0][1]));
+    _31.Store(336, asuint(f.c[2].d[0][2]));
+    _31.Store(340, asuint(f.c[2].d[0][3]));
+    _31.Store(344, asuint(f.c[2].d[1][0]));
+    _31.Store(348, asuint(f.c[2].d[1][1]));
+    _31.Store(352, asuint(f.c[2].d[1][2]));
+    _31.Store(356, asuint(f.c[2].d[1][3]));
+    _31.Store(360, asuint(f.c[2].baz[0].c));
+    _31.Store(364, asuint(f.c[2].baz[1].c));
+    _31.Store(368, asuint(f.c[3].d[0][0]));
+    _31.Store(372, asuint(f.c[3].d[0][1]));
+    _31.Store(376, asuint(f.c[3].d[0][2]));
+    _31.Store(380, asuint(f.c[3].d[0][3]));
+    _31.Store(384, asuint(f.c[3].d[1][0]));
+    _31.Store(388, asuint(f.c[3].d[1][1]));
+    _31.Store(392, asuint(f.c[3].d[1][2]));
+    _31.Store(396, asuint(f.c[3].d[1][3]));
+    _31.Store(400, asuint(f.c[3].baz[0].c));
+    _31.Store(404, asuint(f.c[3].baz[1].c));
+    _31.Store(408, asuint(f.c[4].d[0][0]));
+    _31.Store(412, asuint(f.c[4].d[0][1]));
+    _31.Store(416, asuint(f.c[4].d[0][2]));
+    _31.Store(420, asuint(f.c[4].d[0][3]));
+    _31.Store(424, asuint(f.c[4].d[1][0]));
+    _31.Store(428, asuint(f.c[4].d[1][1]));
+    _31.Store(432, asuint(f.c[4].d[1][2]));
+    _31.Store(436, asuint(f.c[4].d[1][3]));
+    _31.Store(440, asuint(f.c[4].baz[0].c));
+    _31.Store(444, asuint(f.c[4].baz[1].c));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/comp/access-chains.comp b/reference/shaders-hlsl/comp/access-chains.comp
index 924e9191245..c748200b969 100644
--- a/reference/shaders-hlsl/comp/access-chains.comp
+++ b/reference/shaders-hlsl/comp/access-chains.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer wo : register(u1);
 ByteAddressBuffer ro : register(t0);
 
diff --git a/reference/shaders-hlsl/comp/access-chains.force-uav.comp b/reference/shaders-hlsl/comp/access-chains.force-uav.comp
new file mode 100644
index 00000000000..97d046d89a3
--- /dev/null
+++ b/reference/shaders-hlsl/comp/access-chains.force-uav.comp
@@ -0,0 +1,23 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer wo : register(u1);
+RWByteAddressBuffer ro : register(u0);
+
+static uint3 gl_GlobalInvocationID;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+};
+
+void comp_main()
+{
+    wo.Store4(gl_GlobalInvocationID.x * 64 + 272, asuint(asfloat(ro.Load4(gl_GlobalInvocationID.x * 64 + 160))));
+    wo.Store4(gl_GlobalInvocationID.x * 16 + 480, asuint(asfloat(ro.Load4(gl_GlobalInvocationID.x * 16 + 480))));
+}
+
+[numthreads(1, 1, 1)]
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/comp/address-buffers.comp b/reference/shaders-hlsl/comp/address-buffers.comp
index a252fc8ae36..7f1c7975bc6 100644
--- a/reference/shaders-hlsl/comp/address-buffers.comp
+++ b/reference/shaders-hlsl/comp/address-buffers.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer WriteOnly : register(u2);
 ByteAddressBuffer ReadOnly : register(t0);
 RWByteAddressBuffer ReadWrite : register(u1);
diff --git a/reference/shaders-hlsl/comp/atomic.comp b/reference/shaders-hlsl/comp/atomic.comp
index 72e15bf77dc..e6ff891e8c2 100644
--- a/reference/shaders-hlsl/comp/atomic.comp
+++ b/reference/shaders-hlsl/comp/atomic.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer ssbo : register(u2);
 RWTexture2D<uint> uImage : register(u0);
 RWTexture2D<int> iImage : register(u1);
diff --git a/reference/shaders-hlsl/comp/globallycoherent.comp b/reference/shaders-hlsl/comp/globallycoherent.comp
index 69886256f85..236f341e1ab 100644
--- a/reference/shaders-hlsl/comp/globallycoherent.comp
+++ b/reference/shaders-hlsl/comp/globallycoherent.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 globallycoherent RWByteAddressBuffer _29 : register(u3);
 ByteAddressBuffer _33 : register(t2);
 RWTexture2D<float> uImageIn : register(u0);
diff --git a/reference/shaders-hlsl/comp/image.comp b/reference/shaders-hlsl/comp/image.comp
index c8504e636c9..89a99409424 100644
--- a/reference/shaders-hlsl/comp/image.comp
+++ b/reference/shaders-hlsl/comp/image.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWTexture2D<float> uImageInF : register(u0);
 RWTexture2D<float> uImageOutF : register(u1);
 RWTexture2D<int> uImageInI : register(u2);
diff --git a/reference/shaders-hlsl/comp/image.nonwritable-uav-texture.comp b/reference/shaders-hlsl/comp/image.nonwritable-uav-texture.comp
new file mode 100644
index 00000000000..1e11b08777f
--- /dev/null
+++ b/reference/shaders-hlsl/comp/image.nonwritable-uav-texture.comp
@@ -0,0 +1,73 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+Texture2D<float4> uImageInF : register(t0);
+RWTexture2D<float> uImageOutF : register(u1);
+Texture2D<int4> uImageInI : register(t2);
+RWTexture2D<int> uImageOutI : register(u3);
+Texture2D<uint4> uImageInU : register(t4);
+RWTexture2D<uint> uImageOutU : register(u5);
+Buffer<float4> uImageInBuffer : register(t6);
+RWBuffer<float> uImageOutBuffer : register(u7);
+Texture2D<float4> uImageInF2 : register(t8);
+RWTexture2D<float2> uImageOutF2 : register(u9);
+Texture2D<int4> uImageInI2 : register(t10);
+RWTexture2D<int2> uImageOutI2 : register(u11);
+Texture2D<uint4> uImageInU2 : register(t12);
+RWTexture2D<uint2> uImageOutU2 : register(u13);
+Buffer<float4> uImageInBuffer2 : register(t14);
+RWBuffer<float2> uImageOutBuffer2 : register(u15);
+Texture2D<float4> uImageInF4 : register(t16);
+RWTexture2D<float4> uImageOutF4 : register(u17);
+Texture2D<int4> uImageInI4 : register(t18);
+RWTexture2D<int4> uImageOutI4 : register(u19);
+Texture2D<uint4> uImageInU4 : register(t20);
+RWTexture2D<uint4> uImageOutU4 : register(u21);
+Buffer<float4> uImageInBuffer4 : register(t22);
+RWBuffer<float4> uImageOutBuffer4 : register(u23);
+RWTexture2D<float4> uImageNoFmtF : register(u24);
+RWTexture2D<uint4> uImageNoFmtU : register(u25);
+RWTexture2D<int4> uImageNoFmtI : register(u26);
+
+static uint3 gl_GlobalInvocationID;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+};
+
+void comp_main()
+{
+    float4 f = uImageInF[int2(gl_GlobalInvocationID.xy)];
+    uImageOutF[int2(gl_GlobalInvocationID.xy)] = f.x;
+    int4 i = uImageInI[int2(gl_GlobalInvocationID.xy)];
+    uImageOutI[int2(gl_GlobalInvocationID.xy)] = i.x;
+    uint4 u = uImageInU[int2(gl_GlobalInvocationID.xy)];
+    uImageOutU[int2(gl_GlobalInvocationID.xy)] = u.x;
+    float4 b = uImageInBuffer[int(gl_GlobalInvocationID.x)];
+    uImageOutBuffer[int(gl_GlobalInvocationID.x)] = b.x;
+    float4 f2 = uImageInF2[int2(gl_GlobalInvocationID.xy)];
+    uImageOutF2[int2(gl_GlobalInvocationID.xy)] = f2.xy;
+    int4 i2 = uImageInI2[int2(gl_GlobalInvocationID.xy)];
+    uImageOutI2[int2(gl_GlobalInvocationID.xy)] = i2.xy;
+    uint4 u2 = uImageInU2[int2(gl_GlobalInvocationID.xy)];
+    uImageOutU2[int2(gl_GlobalInvocationID.xy)] = u2.xy;
+    float4 b2 = uImageInBuffer2[int(gl_GlobalInvocationID.x)];
+    uImageOutBuffer2[int(gl_GlobalInvocationID.x)] = b2.xy;
+    float4 f4 = uImageInF4[int2(gl_GlobalInvocationID.xy)];
+    uImageOutF4[int2(gl_GlobalInvocationID.xy)] = f4;
+    int4 i4 = uImageInI4[int2(gl_GlobalInvocationID.xy)];
+    uImageOutI4[int2(gl_GlobalInvocationID.xy)] = i4;
+    uint4 u4 = uImageInU4[int2(gl_GlobalInvocationID.xy)];
+    uImageOutU4[int2(gl_GlobalInvocationID.xy)] = u4;
+    float4 b4 = uImageInBuffer4[int(gl_GlobalInvocationID.x)];
+    uImageOutBuffer4[int(gl_GlobalInvocationID.x)] = b4;
+    uImageNoFmtF[int2(gl_GlobalInvocationID.xy)] = b2;
+    uImageNoFmtU[int2(gl_GlobalInvocationID.xy)] = u4;
+    uImageNoFmtI[int2(gl_GlobalInvocationID.xy)] = i4;
+}
+
+[numthreads(1, 1, 1)]
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/comp/inverse.comp b/reference/shaders-hlsl/comp/inverse.comp
index 3be954a6f61..698f647cecc 100644
--- a/reference/shaders-hlsl/comp/inverse.comp
+++ b/reference/shaders-hlsl/comp/inverse.comp
@@ -1,9 +1,11 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _15 : register(u0);
 ByteAddressBuffer _20 : register(t1);
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
-float2x2 SPIRV_Cross_Inverse(float2x2 m)
+float2x2 spvInverse(float2x2 m)
 {
     float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)
 
@@ -23,29 +25,29 @@ float2x2 SPIRV_Cross_Inverse(float2x2 m)
 }
 
 // Returns the determinant of a 2x2 matrix.
-float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)
+float spvDet2x2(float a1, float a2, float b1, float b2)
 {
     return a1 * b2 - b1 * a2;
 }
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
-float3x3 SPIRV_Cross_Inverse(float3x3 m)
+float3x3 spvInverse(float3x3 m)
 {
     float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)
 
     // Create the transpose of the cofactors, as the classical adjoint of the matrix.
-    adj[0][0] =  SPIRV_Cross_Det2x2(m[1][1], m[1][2], m[2][1], m[2][2]);
-    adj[0][1] = -SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[2][1], m[2][2]);
-    adj[0][2] =  SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[1][1], m[1][2]);
+    adj[0][0] =  spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);
+    adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);
+    adj[0][2] =  spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);
 
-    adj[1][0] = -SPIRV_Cross_Det2x2(m[1][0], m[1][2], m[2][0], m[2][2]);
-    adj[1][1] =  SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[2][0], m[2][2]);
-    adj[1][2] = -SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[1][0], m[1][2]);
+    adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);
+    adj[1][1] =  spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);
+    adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);
 
-    adj[2][0] =  SPIRV_Cross_Det2x2(m[1][0], m[1][1], m[2][0], m[2][1]);
-    adj[2][1] = -SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[2][0], m[2][1]);
-    adj[2][2] =  SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[1][0], m[1][1]);
+    adj[2][0] =  spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);
+    adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);
+    adj[2][2] =  spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);
 
     // Calculate the determinant as a combination of the cofactors of the first row.
     float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);
@@ -56,37 +58,37 @@ float3x3 SPIRV_Cross_Inverse(float3x3 m)
 }
 
 // Returns the determinant of a 3x3 matrix.
-float SPIRV_Cross_Det3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
+float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
 {
-    return a1 * SPIRV_Cross_Det2x2(b2, b3, c2, c3) - b1 * SPIRV_Cross_Det2x2(a2, a3, c2, c3) + c1 * SPIRV_Cross_Det2x2(a2, a3, b2, b3);
+    return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3);
 }
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
-float4x4 SPIRV_Cross_Inverse(float4x4 m)
+float4x4 spvInverse(float4x4 m)
 {
     float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
 
     // Create the transpose of the cofactors, as the classical adjoint of the matrix.
-    adj[0][0] =  SPIRV_Cross_Det3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
-    adj[0][1] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
-    adj[0][2] =  SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]);
-    adj[0][3] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]);
-
-    adj[1][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
-    adj[1][1] =  SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
-    adj[1][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]);
-    adj[1][3] =  SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]);
-
-    adj[2][0] =  SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
-    adj[2][1] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
-    adj[2][2] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]);
-    adj[2][3] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]);
-
-    adj[3][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
-    adj[3][1] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
-    adj[3][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]);
-    adj[3][3] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
+    adj[0][0] =  spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][2] =  spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]);
+
+    adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][1] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][3] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]);
+
+    adj[2][0] =  spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][2] =  spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]);
+
+    adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][1] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][3] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
 
     // Calculate the determinant as a combination of the cofactors of the first row.
     float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]);
@@ -99,16 +101,16 @@ float4x4 SPIRV_Cross_Inverse(float4x4 m)
 void comp_main()
 {
     float2x2 _23 = asfloat(uint2x2(_20.Load2(0), _20.Load2(8)));
-    float2x2 _24 = SPIRV_Cross_Inverse(_23);
+    float2x2 _24 = spvInverse(_23);
     _15.Store2(0, asuint(_24[0]));
     _15.Store2(8, asuint(_24[1]));
     float3x3 _29 = asfloat(uint3x3(_20.Load3(16), _20.Load3(32), _20.Load3(48)));
-    float3x3 _30 = SPIRV_Cross_Inverse(_29);
+    float3x3 _30 = spvInverse(_29);
     _15.Store3(16, asuint(_30[0]));
     _15.Store3(32, asuint(_30[1]));
     _15.Store3(48, asuint(_30[2]));
     float4x4 _35 = asfloat(uint4x4(_20.Load4(64), _20.Load4(80), _20.Load4(96), _20.Load4(112)));
-    float4x4 _36 = SPIRV_Cross_Inverse(_35);
+    float4x4 _36 = spvInverse(_35);
     _15.Store4(64, asuint(_36[0]));
     _15.Store4(80, asuint(_36[1]));
     _15.Store4(96, asuint(_36[2]));
diff --git a/reference/shaders-hlsl/comp/num-workgroups-alone.comp b/reference/shaders-hlsl/comp/num-workgroups-alone.comp
index dee39e3d579..ff71a0e103c 100644
--- a/reference/shaders-hlsl/comp/num-workgroups-alone.comp
+++ b/reference/shaders-hlsl/comp/num-workgroups-alone.comp
@@ -1,5 +1,7 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _10 : register(u0);
-cbuffer SPIRV_Cross_NumWorkgroups : register(b0)
+cbuffer SPIRV_Cross_NumWorkgroups
 {
     uint3 SPIRV_Cross_NumWorkgroups_1_count : packoffset(c0);
 };
diff --git a/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp b/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp
index 1c98e5e56d7..cc326db3329 100644
--- a/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp
+++ b/reference/shaders-hlsl/comp/num-workgroups-with-builtins.comp
@@ -1,5 +1,7 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _10 : register(u0);
-cbuffer SPIRV_Cross_NumWorkgroups : register(b0)
+cbuffer SPIRV_Cross_NumWorkgroups
 {
     uint3 SPIRV_Cross_NumWorkgroups_1_count : packoffset(c0);
 };
diff --git a/reference/shaders-hlsl/comp/outer-product.comp b/reference/shaders-hlsl/comp/outer-product.comp
index 71613d4f156..e58c02fe0b8 100644
--- a/reference/shaders-hlsl/comp/outer-product.comp
+++ b/reference/shaders-hlsl/comp/outer-product.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _21 : register(u0);
 ByteAddressBuffer _26 : register(t1);
 
diff --git a/reference/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp b/reference/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp
new file mode 100644
index 00000000000..1339f45f069
--- /dev/null
+++ b/reference/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp
@@ -0,0 +1,224 @@
+struct Ray
+{
+    float3 pos;
+    float tmin;
+    float3 dir;
+    float tmax;
+};
+
+RWByteAddressBuffer _17 : register(u0);
+RWByteAddressBuffer _257 : register(u2);
+uniform RaytracingAccelerationStructure rtas : register(t1);
+
+static RayQuery<RAY_FLAG_NONE> rayQuery;
+
+Ray makeRayDesc()
+{
+    Ray ray;
+    ray.pos = 0.0f.xxx;
+    ray.dir = float3(1.0f, 0.0f, 0.0f);
+    ray.tmin = 0.0f;
+    ray.tmax = 9999.0f;
+    return ray;
+}
+
+void doSomething()
+{
+    _17.Store(0, 0u);
+    _17.Store(4, 0u);
+}
+
+void comp_main()
+{
+    Ray ray = makeRayDesc();
+    RayDesc _1ident = {ray.pos, ray.tmin, ray.dir, ray.tmax};
+    rayQuery.TraceRayInline(rtas, 0u, 255u, _1ident);
+    float4x3 _mat4x3;
+    float3x4 _mat3x4;
+    for (;;)
+    {
+        bool _67 = rayQuery.Proceed();
+        if (_67)
+        {
+            uint _71 = rayQuery.CandidateType();
+            uint candidateType = _71;
+            switch (candidateType)
+            {
+                case 0u:
+                {
+                    rayQuery.Abort();
+                    float4x3 _79 = rayQuery.CandidateObjectToWorld4x3();
+                    _mat4x3 = _79;
+                    _mat3x4 = transpose(_mat4x3);
+                    rayQuery.CommitNonOpaqueTriangleHit();
+                    bool _87 = rayQuery.CommittedTriangleFrontFace();
+                    if (_87)
+                    {
+                        doSomething();
+                    }
+                    float2 _92 = rayQuery.CommittedTriangleBarycentrics();
+                    if (_92.x == 0.0f)
+                    {
+                        doSomething();
+                    }
+                    int _98 = rayQuery.CommittedInstanceID();
+                    if (_98 > 0)
+                    {
+                        doSomething();
+                    }
+                    int _103 = rayQuery.CommittedInstanceIndex();
+                    if (_103 > 0)
+                    {
+                        doSomething();
+                    }
+                    float3 _108 = rayQuery.CommittedObjectRayDirection();
+                    if (_108.x > 0.0f)
+                    {
+                        doSomething();
+                    }
+                    float3 _114 = rayQuery.CommittedObjectRayOrigin();
+                    if (_114.x > 0.0f)
+                    {
+                        doSomething();
+                    }
+                    int _120 = rayQuery.CommittedPrimitiveIndex();
+                    if (_120 > 0)
+                    {
+                        doSomething();
+                    }
+                    float _125 = rayQuery.CommittedRayT();
+                    if (_125 > 0.0f)
+                    {
+                        doSomething();
+                    }
+                    uint _130 = rayQuery.CommittedInstanceContributionToHitGroupIndex();
+                    if (_130 > 0u)
+                    {
+                        doSomething();
+                    }
+                    break;
+                }
+                case 1u:
+                {
+                    float4x3 _136 = rayQuery.CandidateObjectToWorld4x3();
+                    _mat4x3 = _136;
+                    _mat3x4 = transpose(_mat4x3);
+                    bool _139 = rayQuery.CandidateProceduralPrimitiveNonOpaque();
+                    if (_139)
+                    {
+                        doSomething();
+                    }
+                    float t = 0.5f;
+                    rayQuery.CommitProceduralPrimitiveHit(145);
+                    rayQuery.Abort();
+                    break;
+                }
+            }
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+    if (_mat3x4[0].x == _mat4x3[0].x)
+    {
+        doSomething();
+    }
+    uint _157 = rayQuery.CommittedStatus();
+    uint committedStatus = _157;
+    switch (committedStatus)
+    {
+        case 0u:
+        {
+            float4x3 _163 = rayQuery.CandidateWorldToObject4x3();
+            _mat4x3 = _163;
+            _mat3x4 = transpose(_mat4x3);
+            break;
+        }
+        case 1u:
+        {
+            float4x3 _167 = rayQuery.CommittedWorldToObject4x3();
+            _mat4x3 = _167;
+            _mat3x4 = transpose(_mat4x3);
+            bool _170 = rayQuery.CommittedTriangleFrontFace();
+            if (_170)
+            {
+                doSomething();
+            }
+            float2 _174 = rayQuery.CommittedTriangleBarycentrics();
+            if (_174.y == 0.0f)
+            {
+                doSomething();
+            }
+            break;
+        }
+        case 2u:
+        {
+            int _182 = rayQuery.CommittedGeometryIndex();
+            if (_182 > 0)
+            {
+                doSomething();
+            }
+            int _187 = rayQuery.CommittedInstanceIndex();
+            if (_187 > 0)
+            {
+                doSomething();
+            }
+            int _192 = rayQuery.CommittedInstanceID();
+            if (_192 > 0)
+            {
+                doSomething();
+            }
+            float3 _197 = rayQuery.CommittedObjectRayDirection();
+            if (_197.z > 0.0f)
+            {
+                doSomething();
+            }
+            float3 _204 = rayQuery.CommittedObjectRayOrigin();
+            if (_204.x > 0.0f)
+            {
+                doSomething();
+            }
+            int _210 = rayQuery.CommittedPrimitiveIndex();
+            if (_210 > 0)
+            {
+                doSomething();
+            }
+            float _215 = rayQuery.CommittedRayT();
+            if (_215 > 0.0f)
+            {
+                doSomething();
+            }
+            break;
+        }
+    }
+    if (_mat3x4[0].x == _mat4x3[0].x)
+    {
+        doSomething();
+    }
+    uint _230 = rayQuery.RayFlags();
+    if (_230 > 256u)
+    {
+        doSomething();
+    }
+    float _236 = rayQuery.RayTMin();
+    if (_236 > 0.0f)
+    {
+        doSomething();
+    }
+    float3 _242 = rayQuery.WorldRayOrigin();
+    float3 o = _242;
+    float3 _244 = rayQuery.WorldRayDirection();
+    float3 d = _244;
+    if (o.x == d.z)
+    {
+        doSomething();
+    }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/comp/rmw-matrix.comp b/reference/shaders-hlsl/comp/rmw-matrix.comp
index ed666693588..30ac03f84f4 100644
--- a/reference/shaders-hlsl/comp/rmw-matrix.comp
+++ b/reference/shaders-hlsl/comp/rmw-matrix.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _11 : register(u0);
 
 void comp_main()
diff --git a/reference/shaders-hlsl/comp/rwbuffer-matrix.comp b/reference/shaders-hlsl/comp/rwbuffer-matrix.comp
index e79829283e6..197c9a95138 100644
--- a/reference/shaders-hlsl/comp/rwbuffer-matrix.comp
+++ b/reference/shaders-hlsl/comp/rwbuffer-matrix.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _28 : register(u0);
 cbuffer UBO : register(b1)
 {
diff --git a/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
index 47f2fe41076..db2bbe96989 100644
--- a/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
+++ b/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _9 : register(u0);
 
 void comp_main()
diff --git a/reference/shaders-hlsl/comp/spec-constant-op-member-array.comp b/reference/shaders-hlsl/comp/spec-constant-op-member-array.comp
index c4537db0391..4e7c5e6167e 100644
--- a/reference/shaders-hlsl/comp/spec-constant-op-member-array.comp
+++ b/reference/shaders-hlsl/comp/spec-constant-op-member-array.comp
@@ -28,6 +28,7 @@ static const int d = (c + 50);
 #define SPIRV_CROSS_CONSTANT_ID_3 400
 #endif
 static const int e = SPIRV_CROSS_CONSTANT_ID_3;
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
 
 RWByteAddressBuffer _22 : register(u0);
 
diff --git a/reference/shaders-hlsl/comp/ssbo-array-length.comp b/reference/shaders-hlsl/comp/ssbo-array-length.comp
index 2e3df626ae7..82657cacfcb 100644
--- a/reference/shaders-hlsl/comp/ssbo-array-length.comp
+++ b/reference/shaders-hlsl/comp/ssbo-array-length.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer _11 : register(u1);
 
 void comp_main()
diff --git a/reference/shaders-hlsl/comp/ssbo-array.comp b/reference/shaders-hlsl/comp/ssbo-array.comp
index 90927421c68..dab20325b0b 100644
--- a/reference/shaders-hlsl/comp/ssbo-array.comp
+++ b/reference/shaders-hlsl/comp/ssbo-array.comp
@@ -1,3 +1,5 @@
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
 RWByteAddressBuffer ssbo0 : register(u0);
 
 void comp_main()
diff --git a/reference/shaders-hlsl/flatten/array.flatten.vert b/reference/shaders-hlsl/flatten/array.flatten.vert
new file mode 100644
index 00000000000..948a198e6ad
--- /dev/null
+++ b/reference/shaders-hlsl/flatten/array.flatten.vert
@@ -0,0 +1,30 @@
+uniform float4 UBO[56];
+
+static float4 gl_Position;
+static float4 aVertex;
+
+struct SPIRV_Cross_Input
+{
+    float4 aVertex : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    float4 a4 = UBO[23];
+    float4 offset = (UBO[50] + UBO[45]) + UBO[54].x.xxxx;
+    gl_Position = (mul(aVertex, float4x4(UBO[40], UBO[41], UBO[42], UBO[43])) + UBO[55]) + offset;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aVertex = stage_input.aVertex;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/flatten/basic.flatten.vert b/reference/shaders-hlsl/flatten/basic.flatten.vert
new file mode 100644
index 00000000000..778acd48037
--- /dev/null
+++ b/reference/shaders-hlsl/flatten/basic.flatten.vert
@@ -0,0 +1,35 @@
+uniform float4 UBO[4];
+
+static float4 gl_Position;
+static float4 aVertex;
+static float3 vNormal;
+static float3 aNormal;
+
+struct SPIRV_Cross_Input
+{
+    float4 aVertex : TEXCOORD0;
+    float3 aNormal : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float3 vNormal : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3]));
+    vNormal = aNormal;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aVertex = stage_input.aVertex;
+    aNormal = stage_input.aNormal;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.vNormal = vNormal;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/flatten/copy.flatten.vert b/reference/shaders-hlsl/flatten/copy.flatten.vert
new file mode 100644
index 00000000000..f85c890b11d
--- /dev/null
+++ b/reference/shaders-hlsl/flatten/copy.flatten.vert
@@ -0,0 +1,53 @@
+struct Light
+{
+    float3 Position;
+    float Radius;
+    float4 Color;
+};
+
+uniform float4 UBO[12];
+
+static float4 gl_Position;
+static float4 aVertex;
+static float4 vColor;
+static float3 aNormal;
+
+struct SPIRV_Cross_Input
+{
+    float4 aVertex : TEXCOORD0;
+    float3 aNormal : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 vColor : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3]));
+    vColor = 0.0f.xxxx;
+    Light light;
+    for (int i = 0; i < 4; i++)
+    {
+        Light _51 = {UBO[i * 2 + 4].xyz, UBO[i * 2 + 4].w, UBO[i * 2 + 5]};
+        Light _52 = _51;
+        light.Position = _52.Position;
+        light.Radius = _52.Radius;
+        light.Color = _52.Color;
+        float3 L = aVertex.xyz - light.Position;
+        vColor += ((UBO[i * 2 + 5] * clamp(1.0f - (length(L) / light.Radius), 0.0f, 1.0f)) * dot(aNormal, normalize(L)));
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aVertex = stage_input.aVertex;
+    aNormal = stage_input.aNormal;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.vColor = vColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/flatten/dynamic.flatten.vert b/reference/shaders-hlsl/flatten/dynamic.flatten.vert
new file mode 100644
index 00000000000..787eefcdbea
--- /dev/null
+++ b/reference/shaders-hlsl/flatten/dynamic.flatten.vert
@@ -0,0 +1,47 @@
+struct Light
+{
+    float3 Position;
+    float Radius;
+    float4 Color;
+};
+
+uniform float4 UBO[12];
+
+static float4 gl_Position;
+static float4 aVertex;
+static float4 vColor;
+static float3 aNormal;
+
+struct SPIRV_Cross_Input
+{
+    float4 aVertex : TEXCOORD0;
+    float3 aNormal : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 vColor : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3]));
+    vColor = 0.0f.xxxx;
+    for (int i = 0; i < 4; i++)
+    {
+        float3 L = aVertex.xyz - UBO[i * 2 + 4].xyz;
+        vColor += ((UBO[i * 2 + 5] * clamp(1.0f - (length(L) / UBO[i * 2 + 4].w), 0.0f, 1.0f)) * dot(aNormal, normalize(L)));
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aVertex = stage_input.aVertex;
+    aNormal = stage_input.aNormal;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.vColor = vColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/flatten/matrix-conversion.flatten.frag b/reference/shaders-hlsl/flatten/matrix-conversion.flatten.frag
new file mode 100644
index 00000000000..59ec525f41a
--- /dev/null
+++ b/reference/shaders-hlsl/flatten/matrix-conversion.flatten.frag
@@ -0,0 +1,29 @@
+uniform float4 UBO[4];
+
+static float3 FragColor;
+static float3 vNormal;
+
+struct SPIRV_Cross_Input
+{
+    nointerpolation float3 vNormal : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float3 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    float4x4 _19 = float4x4(UBO[0], UBO[1], UBO[2], UBO[3]);
+    FragColor = mul(vNormal, float3x3(_19[0].xyz, _19[1].xyz, _19[2].xyz));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vNormal = stage_input.vNormal;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/flatten/matrixindex.flatten.vert b/reference/shaders-hlsl/flatten/matrixindex.flatten.vert
new file mode 100644
index 00000000000..b69a72dc11a
--- /dev/null
+++ b/reference/shaders-hlsl/flatten/matrixindex.flatten.vert
@@ -0,0 +1,41 @@
+uniform float4 UBO[14];
+
+static float4 gl_Position;
+static float4 oA;
+static float4 oB;
+static float4 oC;
+static float4 oD;
+static float4 oE;
+
+struct SPIRV_Cross_Output
+{
+    float4 oA : TEXCOORD0;
+    float4 oB : TEXCOORD1;
+    float4 oC : TEXCOORD2;
+    float4 oD : TEXCOORD3;
+    float4 oE : TEXCOORD4;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = 0.0f.xxxx;
+    oA = UBO[1];
+    oB = float4(UBO[4].y, UBO[5].y, UBO[6].y, UBO[7].y);
+    oC = UBO[9];
+    oD = float4(UBO[10].x, UBO[11].x, UBO[12].x, UBO[13].x);
+    oE = float4(UBO[1].z, UBO[6].y, UBO[9].z, UBO[12].y);
+}
+
+SPIRV_Cross_Output main()
+{
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.oA = oA;
+    stage_output.oB = oB;
+    stage_output.oC = oC;
+    stage_output.oD = oD;
+    stage_output.oE = oE;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/flatten/multiindex.flatten.vert b/reference/shaders-hlsl/flatten/multiindex.flatten.vert
new file mode 100644
index 00000000000..f21f05ec446
--- /dev/null
+++ b/reference/shaders-hlsl/flatten/multiindex.flatten.vert
@@ -0,0 +1,28 @@
+uniform float4 UBO[15];
+
+static float4 gl_Position;
+static int2 aIndex;
+
+struct SPIRV_Cross_Input
+{
+    int2 aIndex : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = UBO[aIndex.x * 5 + aIndex.y * 1 + 0];
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aIndex = stage_input.aIndex;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/flatten/push-constant.flatten.vert b/reference/shaders-hlsl/flatten/push-constant.flatten.vert
new file mode 100644
index 00000000000..5bfb4dc0651
--- /dev/null
+++ b/reference/shaders-hlsl/flatten/push-constant.flatten.vert
@@ -0,0 +1,35 @@
+uniform float4 PushMe[6];
+
+static float4 gl_Position;
+static float4 Pos;
+static float2 vRot;
+static float2 Rot;
+
+struct SPIRV_Cross_Input
+{
+    float2 Rot : TEXCOORD0;
+    float4 Pos : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float2 vRot : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = mul(Pos, float4x4(PushMe[0], PushMe[1], PushMe[2], PushMe[3]));
+    vRot = mul(Rot, float2x2(PushMe[4].xy, PushMe[4].zw)) + PushMe[5].z.xx;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    Pos = stage_input.Pos;
+    Rot = stage_input.Rot;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.vRot = vRot;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/flatten/rowmajor.flatten.vert b/reference/shaders-hlsl/flatten/rowmajor.flatten.vert
new file mode 100644
index 00000000000..801def3b436
--- /dev/null
+++ b/reference/shaders-hlsl/flatten/rowmajor.flatten.vert
@@ -0,0 +1,29 @@
+uniform float4 UBO[12];
+
+static float4 gl_Position;
+static float4 aVertex;
+
+struct SPIRV_Cross_Input
+{
+    float4 aVertex : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    float2 v = mul(transpose(float4x2(UBO[8].xy, UBO[9].xy, UBO[10].xy, UBO[11].xy)), aVertex);
+    gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3])) + mul(aVertex, transpose(float4x4(UBO[4], UBO[5], UBO[6], UBO[7])));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aVertex = stage_input.aVertex;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/flatten/struct.flatten.vert b/reference/shaders-hlsl/flatten/struct.flatten.vert
new file mode 100644
index 00000000000..9b97bf59e29
--- /dev/null
+++ b/reference/shaders-hlsl/flatten/struct.flatten.vert
@@ -0,0 +1,44 @@
+struct Light
+{
+    float3 Position;
+    float Radius;
+    float4 Color;
+};
+
+uniform float4 UBO[6];
+
+static float4 gl_Position;
+static float4 aVertex;
+static float4 vColor;
+static float3 aNormal;
+
+struct SPIRV_Cross_Input
+{
+    float4 aVertex : TEXCOORD0;
+    float3 aNormal : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 vColor : TEXCOORD0;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = mul(aVertex, float4x4(UBO[0], UBO[1], UBO[2], UBO[3]));
+    vColor = 0.0f.xxxx;
+    float3 L = aVertex.xyz - UBO[4].xyz;
+    vColor += ((UBO[5] * clamp(1.0f - (length(L) / UBO[4].w), 0.0f, 1.0f)) * dot(aNormal, normalize(L)));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    aVertex = stage_input.aVertex;
+    aNormal = stage_input.aNormal;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.vColor = vColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/flatten/struct.rowmajor.flatten.vert b/reference/shaders-hlsl/flatten/struct.rowmajor.flatten.vert
new file mode 100644
index 00000000000..39b0a808023
--- /dev/null
+++ b/reference/shaders-hlsl/flatten/struct.rowmajor.flatten.vert
@@ -0,0 +1,48 @@
+struct Foo
+{
+    column_major float3x4 MVP0;
+    column_major float3x4 MVP1;
+};
+
+uniform float4 UBO[8];
+
+static float4 v0;
+static float4 v1;
+static float3 V0;
+static float3 V1;
+
+struct SPIRV_Cross_Input
+{
+    float4 v0 : TEXCOORD0;
+    float4 v1 : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float3 V0 : TEXCOORD0;
+    float3 V1 : TEXCOORD1;
+};
+
+void vert_main()
+{
+    Foo _19 = {transpose(float4x3(UBO[0].xyz, UBO[1].xyz, UBO[2].xyz, UBO[3].xyz)), transpose(float4x3(UBO[4].xyz, UBO[5].xyz, UBO[6].xyz, UBO[7].xyz))};
+    Foo _20 = _19;
+    Foo f;
+    f.MVP0 = _20.MVP0;
+    f.MVP1 = _20.MVP1;
+    float3 a = mul(f.MVP0, v0);
+    float3 b = mul(f.MVP1, v1);
+    V0 = a;
+    V1 = b;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    v0 = stage_input.v0;
+    v1 = stage_input.v1;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.V0 = V0;
+    stage_output.V1 = V1;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/flatten/swizzle.flatten.vert b/reference/shaders-hlsl/flatten/swizzle.flatten.vert
new file mode 100644
index 00000000000..1091a17e995
--- /dev/null
+++ b/reference/shaders-hlsl/flatten/swizzle.flatten.vert
@@ -0,0 +1,45 @@
+uniform float4 UBO[8];
+
+static float4 gl_Position;
+static float4 oA;
+static float4 oB;
+static float4 oC;
+static float4 oD;
+static float4 oE;
+static float4 oF;
+
+struct SPIRV_Cross_Output
+{
+    float4 oA : TEXCOORD0;
+    float4 oB : TEXCOORD1;
+    float4 oC : TEXCOORD2;
+    float4 oD : TEXCOORD3;
+    float4 oE : TEXCOORD4;
+    float4 oF : TEXCOORD5;
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    gl_Position = 0.0f.xxxx;
+    oA = UBO[0];
+    oB = float4(UBO[1].xy, UBO[1].zw);
+    oC = float4(UBO[2].x, UBO[3].xyz);
+    oD = float4(UBO[4].xyz, UBO[4].w);
+    oE = float4(UBO[5].x, UBO[5].y, UBO[5].z, UBO[5].w);
+    oF = float4(UBO[6].x, UBO[6].zw, UBO[7].x);
+}
+
+SPIRV_Cross_Output main()
+{
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.oA = oA;
+    stage_output.oB = oB;
+    stage_output.oC = oC;
+    stage_output.oD = oD;
+    stage_output.oE = oE;
+    stage_output.oF = oF;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/flatten/types.flatten.frag b/reference/shaders-hlsl/flatten/types.flatten.frag
new file mode 100644
index 00000000000..feb0b36096a
--- /dev/null
+++ b/reference/shaders-hlsl/flatten/types.flatten.frag
@@ -0,0 +1,23 @@
+uniform int4 UBO1[2];
+uniform uint4 UBO2[2];
+uniform float4 UBO0[2];
+
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = ((((float4(UBO1[0]) + float4(UBO1[1])) + float4(UBO2[0])) + float4(UBO2[1])) + UBO0[0]) + UBO0[1];
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/builtins.frag b/reference/shaders-hlsl/frag/builtins.frag
index 922eca7c2d2..8432c42f80d 100644
--- a/reference/shaders-hlsl/frag/builtins.frag
+++ b/reference/shaders-hlsl/frag/builtins.frag
@@ -24,6 +24,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     vColor = stage_input.vColor;
     frag_main();
     SPIRV_Cross_Output stage_output;
diff --git a/reference/shaders-hlsl/frag/combined-texture-sampler-parameter.frag b/reference/shaders-hlsl/frag/combined-texture-sampler-parameter.frag
index 7fcff423b32..66330805f13 100644
--- a/reference/shaders-hlsl/frag/combined-texture-sampler-parameter.frag
+++ b/reference/shaders-hlsl/frag/combined-texture-sampler-parameter.frag
@@ -22,7 +22,7 @@ float4 samp3(Texture2D<float4> s, SamplerState _s_sampler)
 
 float samp4(Texture2D<float4> s, SamplerComparisonState _s_sampler)
 {
-    return s.SampleCmp(_s_sampler, 1.0f.xxx.xy, 1.0f.xxx.z);
+    return s.SampleCmp(_s_sampler, 1.0f.xxx.xy, 1.0f);
 }
 
 float samp(Texture2D<float4> s0, SamplerState _s0_sampler, Texture2D<float4> s1, SamplerComparisonState _s1_sampler)
diff --git a/reference/shaders-hlsl/frag/combined-texture-sampler-shadow.frag b/reference/shaders-hlsl/frag/combined-texture-sampler-shadow.frag
index af5b0b55795..8d48008d382 100644
--- a/reference/shaders-hlsl/frag/combined-texture-sampler-shadow.frag
+++ b/reference/shaders-hlsl/frag/combined-texture-sampler-shadow.frag
@@ -11,7 +11,7 @@ struct SPIRV_Cross_Output
 
 float samp2(Texture2D<float4> t, SamplerComparisonState s)
 {
-    return t.SampleCmp(s, 1.0f.xxx.xy, 1.0f.xxx.z);
+    return t.SampleCmp(s, 1.0f.xxx.xy, 1.0f);
 }
 
 float samp3(Texture2D<float4> t, SamplerState s)
diff --git a/reference/shaders-hlsl/frag/complex-expression-in-access-chain.frag b/reference/shaders-hlsl/frag/complex-expression-in-access-chain.frag
index d5ccb9b9800..b2f995484d7 100644
--- a/reference/shaders-hlsl/frag/complex-expression-in-access-chain.frag
+++ b/reference/shaders-hlsl/frag/complex-expression-in-access-chain.frag
@@ -31,6 +31,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     vIn = stage_input.vIn;
     vIn2 = stage_input.vIn2;
     frag_main();
diff --git a/reference/shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag b/reference/shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag
deleted file mode 100644
index d330706c7bb..00000000000
--- a/reference/shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag
+++ /dev/null
@@ -1,44 +0,0 @@
-struct CBO_1
-{
-    float4 a;
-    float4 b;
-    float4 c;
-    float4 d;
-};
-
-ConstantBuffer<CBO_1> cbo[2][4] : register(b4, space0);
-cbuffer PushMe
-{
-    float4 push_a : packoffset(c0);
-    float4 push_b : packoffset(c1);
-    float4 push_c : packoffset(c2);
-    float4 push_d : packoffset(c3);
-};
-
-
-static float4 FragColor;
-
-struct SPIRV_Cross_Output
-{
-    float4 FragColor : SV_Target0;
-};
-
-void frag_main()
-{
-    FragColor = cbo[1][2].a;
-    FragColor += cbo[1][2].b;
-    FragColor += cbo[1][2].c;
-    FragColor += cbo[1][2].d;
-    FragColor += push_a;
-    FragColor += push_b;
-    FragColor += push_c;
-    FragColor += push_d;
-}
-
-SPIRV_Cross_Output main()
-{
-    frag_main();
-    SPIRV_Cross_Output stage_output;
-    stage_output.FragColor = FragColor;
-    return stage_output;
-}
diff --git a/reference/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag b/reference/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
index 322102ce869..6a31ce04888 100644
--- a/reference/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
+++ b/reference/shaders-hlsl/frag/control-dependent-in-branch.desktop.frag
@@ -28,7 +28,8 @@ void frag_main()
     float4 d7 = ddy_fine(vInput);
     float4 d8 = fwidth(vInput);
     float _56_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vInput.zw);
-    float2 lod = float2(_56_tmp, _56_tmp);
+    float2 _56 = _56_tmp.xx;
+    float2 lod = _56;
     if (vInput.y > 10.0f)
     {
         FragColor += t;
diff --git a/reference/shaders-hlsl/frag/demote-to-helper.frag b/reference/shaders-hlsl/frag/demote-to-helper.frag
new file mode 100644
index 00000000000..743a4228baf
--- /dev/null
+++ b/reference/shaders-hlsl/frag/demote-to-helper.frag
@@ -0,0 +1,9 @@
+void frag_main()
+{
+    discard;
+}
+
+void main()
+{
+    frag_main();
+}
diff --git a/reference/shaders-hlsl/frag/fp16-packing.frag b/reference/shaders-hlsl/frag/fp16-packing.frag
index d87828225fd..54b91e2aa51 100644
--- a/reference/shaders-hlsl/frag/fp16-packing.frag
+++ b/reference/shaders-hlsl/frag/fp16-packing.frag
@@ -15,21 +15,21 @@ struct SPIRV_Cross_Output
     uint FP16Out : SV_Target1;
 };
 
-uint SPIRV_Cross_packHalf2x16(float2 value)
+uint spvPackHalf2x16(float2 value)
 {
     uint2 Packed = f32tof16(value);
     return Packed.x | (Packed.y << 16);
 }
 
-float2 SPIRV_Cross_unpackHalf2x16(uint value)
+float2 spvUnpackHalf2x16(uint value)
 {
     return f16tof32(uint2(value & 0xffff, value >> 16));
 }
 
 void frag_main()
 {
-    FP32Out = SPIRV_Cross_unpackHalf2x16(FP16);
-    FP16Out = SPIRV_Cross_packHalf2x16(FP32);
+    FP32Out = spvUnpackHalf2x16(FP16);
+    FP16Out = spvPackHalf2x16(FP32);
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/shaders-hlsl/frag/image-query-selective.frag b/reference/shaders-hlsl/frag/image-query-selective.frag
index 25c12da669f..69fe83fd289 100644
--- a/reference/shaders-hlsl/frag/image-query-selective.frag
+++ b/reference/shaders-hlsl/frag/image-query-selective.frag
@@ -20,49 +20,49 @@ SamplerState _uSamplerMSArray_sampler : register(s8);
 Texture2D<float4> uSampler2D : register(t1);
 SamplerState _uSampler2D_sampler : register(s1);
 
-uint SPIRV_Cross_textureSize(Texture1D<float4> Tex, uint Level, out uint Param)
+uint spvTextureSize(Texture1D<float4> Tex, uint Level, out uint Param)
 {
     uint ret;
     Tex.GetDimensions(Level, ret.x, Param);
     return ret;
 }
 
-uint SPIRV_Cross_textureSize(Texture1D<int4> Tex, uint Level, out uint Param)
+uint spvTextureSize(Texture1D<int4> Tex, uint Level, out uint Param)
 {
     uint ret;
     Tex.GetDimensions(Level, ret.x, Param);
     return ret;
 }
 
-uint SPIRV_Cross_textureSize(Texture1D<uint4> Tex, uint Level, out uint Param)
+uint spvTextureSize(Texture1D<uint4> Tex, uint Level, out uint Param)
 {
     uint ret;
     Tex.GetDimensions(Level, ret.x, Param);
     return ret;
 }
 
-uint2 SPIRV_Cross_textureSize(Texture2D<float4> Tex, uint Level, out uint Param)
+uint2 spvTextureSize(Texture2D<float4> Tex, uint Level, out uint Param)
 {
     uint2 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, Param);
     return ret;
 }
 
-uint3 SPIRV_Cross_textureSize(Texture2DArray<int4> Tex, uint Level, out uint Param)
+uint3 spvTextureSize(Texture2DArray<int4> Tex, uint Level, out uint Param)
 {
     uint3 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);
     return ret;
 }
 
-uint3 SPIRV_Cross_textureSize(Texture3D<float4> Tex, uint Level, out uint Param)
+uint3 spvTextureSize(Texture3D<float4> Tex, uint Level, out uint Param)
 {
     uint3 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);
     return ret;
 }
 
-uint SPIRV_Cross_textureSize(Buffer<float4> Tex, uint Level, out uint Param)
+uint spvTextureSize(Buffer<float4> Tex, uint Level, out uint Param)
 {
     uint ret;
     Tex.GetDimensions(ret.x);
@@ -70,28 +70,28 @@ uint SPIRV_Cross_textureSize(Buffer<float4> Tex, uint Level, out uint Param)
     return ret;
 }
 
-uint2 SPIRV_Cross_textureSize(TextureCube<float4> Tex, uint Level, out uint Param)
+uint2 spvTextureSize(TextureCube<float4> Tex, uint Level, out uint Param)
 {
     uint2 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, Param);
     return ret;
 }
 
-uint3 SPIRV_Cross_textureSize(TextureCubeArray<uint4> Tex, uint Level, out uint Param)
+uint3 spvTextureSize(TextureCubeArray<uint4> Tex, uint Level, out uint Param)
 {
     uint3 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);
     return ret;
 }
 
-uint2 SPIRV_Cross_textureSize(Texture2DMS<int4> Tex, uint Level, out uint Param)
+uint2 spvTextureSize(Texture2DMS<int4> Tex, uint Level, out uint Param)
 {
     uint2 ret;
     Tex.GetDimensions(ret.x, ret.y, Param);
     return ret;
 }
 
-uint3 SPIRV_Cross_textureSize(Texture2DMSArray<float4> Tex, uint Level, out uint Param)
+uint3 spvTextureSize(Texture2DMSArray<float4> Tex, uint Level, out uint Param)
 {
     uint3 ret;
     Tex.GetDimensions(ret.x, ret.y, ret.z, Param);
@@ -101,42 +101,42 @@ uint3 SPIRV_Cross_textureSize(Texture2DMSArray<float4> Tex, uint Level, out uint
 void frag_main()
 {
     uint _17_dummy_parameter;
-    int a = int(SPIRV_Cross_textureSize(uSampler1DUint, uint(0), _17_dummy_parameter));
+    int a = int(spvTextureSize(uSampler1DUint, uint(0), _17_dummy_parameter));
     uint _24_dummy_parameter;
-    a = int(SPIRV_Cross_textureSize(uSampler1DInt, uint(0), _24_dummy_parameter));
+    a = int(spvTextureSize(uSampler1DInt, uint(0), _24_dummy_parameter));
     uint _32_dummy_parameter;
-    a = int(SPIRV_Cross_textureSize(uSampler1DFloat, uint(0), _32_dummy_parameter));
+    a = int(spvTextureSize(uSampler1DFloat, uint(0), _32_dummy_parameter));
     uint _42_dummy_parameter;
-    int3 c = int3(SPIRV_Cross_textureSize(uSampler2DArray, uint(0), _42_dummy_parameter));
+    int3 c = int3(spvTextureSize(uSampler2DArray, uint(0), _42_dummy_parameter));
     uint _50_dummy_parameter;
-    int3 d = int3(SPIRV_Cross_textureSize(uSampler3D, uint(0), _50_dummy_parameter));
+    int3 d = int3(spvTextureSize(uSampler3D, uint(0), _50_dummy_parameter));
     uint _60_dummy_parameter;
-    int2 e = int2(SPIRV_Cross_textureSize(uSamplerCube, uint(0), _60_dummy_parameter));
+    int2 e = int2(spvTextureSize(uSamplerCube, uint(0), _60_dummy_parameter));
     uint _68_dummy_parameter;
-    int3 f = int3(SPIRV_Cross_textureSize(uSamplerCubeArray, uint(0), _68_dummy_parameter));
+    int3 f = int3(spvTextureSize(uSamplerCubeArray, uint(0), _68_dummy_parameter));
     uint _76_dummy_parameter;
-    int g = int(SPIRV_Cross_textureSize(uSamplerBuffer, 0u, _76_dummy_parameter));
+    int g = int(spvTextureSize(uSamplerBuffer, 0u, _76_dummy_parameter));
     uint _84_dummy_parameter;
-    int2 h = int2(SPIRV_Cross_textureSize(uSamplerMS, 0u, _84_dummy_parameter));
+    int2 h = int2(spvTextureSize(uSamplerMS, 0u, _84_dummy_parameter));
     uint _92_dummy_parameter;
-    int3 i = int3(SPIRV_Cross_textureSize(uSamplerMSArray, 0u, _92_dummy_parameter));
+    int3 i = int3(spvTextureSize(uSamplerMSArray, 0u, _92_dummy_parameter));
     int _100;
-    SPIRV_Cross_textureSize(uSampler2D, 0u, _100);
+    spvTextureSize(uSampler2D, 0u, _100);
     int l1 = int(_100);
     int _104;
-    SPIRV_Cross_textureSize(uSampler2DArray, 0u, _104);
+    spvTextureSize(uSampler2DArray, 0u, _104);
     int l2 = int(_104);
     int _108;
-    SPIRV_Cross_textureSize(uSampler3D, 0u, _108);
+    spvTextureSize(uSampler3D, 0u, _108);
     int l3 = int(_108);
     int _112;
-    SPIRV_Cross_textureSize(uSamplerCube, 0u, _112);
+    spvTextureSize(uSamplerCube, 0u, _112);
     int l4 = int(_112);
     int _116;
-    SPIRV_Cross_textureSize(uSamplerMS, 0u, _116);
+    spvTextureSize(uSamplerMS, 0u, _116);
     int s0 = int(_116);
     int _120;
-    SPIRV_Cross_textureSize(uSamplerMSArray, 0u, _120);
+    spvTextureSize(uSamplerMSArray, 0u, _120);
     int s1 = int(_120);
 }
 
diff --git a/reference/shaders-hlsl/frag/image-query-uav.frag b/reference/shaders-hlsl/frag/image-query-uav.frag
new file mode 100644
index 00000000000..6626ed2f843
--- /dev/null
+++ b/reference/shaders-hlsl/frag/image-query-uav.frag
@@ -0,0 +1,64 @@
+RWTexture1D<float4> uImage1D : register(u0);
+RWTexture2D<float2> uImage2D : register(u1);
+RWTexture2DArray<float> uImage2DArray : register(u2);
+RWTexture3D<unorm float4> uImage3D : register(u3);
+RWBuffer<snorm float4> uImageBuffer : register(u6);
+
+uint3 spvImageSize(RWTexture2DArray<float> Tex, out uint Param)
+{
+    uint3 ret;
+    Tex.GetDimensions(ret.x, ret.y, ret.z);
+    Param = 0u;
+    return ret;
+}
+
+uint2 spvImageSize(RWTexture2D<float2> Tex, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(ret.x, ret.y);
+    Param = 0u;
+    return ret;
+}
+
+uint spvImageSize(RWTexture1D<float4> Tex, out uint Param)
+{
+    uint ret;
+    Tex.GetDimensions(ret.x);
+    Param = 0u;
+    return ret;
+}
+
+uint3 spvImageSize(RWTexture3D<unorm float4> Tex, out uint Param)
+{
+    uint3 ret;
+    Tex.GetDimensions(ret.x, ret.y, ret.z);
+    Param = 0u;
+    return ret;
+}
+
+uint spvImageSize(RWBuffer<snorm float4> Tex, out uint Param)
+{
+    uint ret;
+    Tex.GetDimensions(ret.x);
+    Param = 0u;
+    return ret;
+}
+
+void frag_main()
+{
+    uint _14_dummy_parameter;
+    int a = int(spvImageSize(uImage1D, _14_dummy_parameter));
+    uint _22_dummy_parameter;
+    int2 b = int2(spvImageSize(uImage2D, _22_dummy_parameter));
+    uint _30_dummy_parameter;
+    int3 c = int3(spvImageSize(uImage2DArray, _30_dummy_parameter));
+    uint _36_dummy_parameter;
+    int3 d = int3(spvImageSize(uImage3D, _36_dummy_parameter));
+    uint _42_dummy_parameter;
+    int e = int(spvImageSize(uImageBuffer, _42_dummy_parameter));
+}
+
+void main()
+{
+    frag_main();
+}
diff --git a/reference/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag b/reference/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag
new file mode 100644
index 00000000000..1e77c2c911f
--- /dev/null
+++ b/reference/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag
@@ -0,0 +1,63 @@
+RWTexture1D<float4> uImage1D : register(u0);
+RWTexture2D<float2> uImage2D : register(u1);
+Texture2DArray<float4> uImage2DArray : register(t2);
+RWTexture3D<unorm float4> uImage3D : register(u3);
+RWBuffer<snorm float4> uImageBuffer : register(u6);
+
+uint3 spvTextureSize(Texture2DArray<float4> Tex, uint Level, out uint Param)
+{
+    uint3 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);
+    return ret;
+}
+
+uint2 spvImageSize(RWTexture2D<float2> Tex, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(ret.x, ret.y);
+    Param = 0u;
+    return ret;
+}
+
+uint spvImageSize(RWTexture1D<float4> Tex, out uint Param)
+{
+    uint ret;
+    Tex.GetDimensions(ret.x);
+    Param = 0u;
+    return ret;
+}
+
+uint3 spvImageSize(RWTexture3D<unorm float4> Tex, out uint Param)
+{
+    uint3 ret;
+    Tex.GetDimensions(ret.x, ret.y, ret.z);
+    Param = 0u;
+    return ret;
+}
+
+uint spvImageSize(RWBuffer<snorm float4> Tex, out uint Param)
+{
+    uint ret;
+    Tex.GetDimensions(ret.x);
+    Param = 0u;
+    return ret;
+}
+
+void frag_main()
+{
+    uint _14_dummy_parameter;
+    int a = int(spvImageSize(uImage1D, _14_dummy_parameter));
+    uint _22_dummy_parameter;
+    int2 b = int2(spvImageSize(uImage2D, _22_dummy_parameter));
+    uint _30_dummy_parameter;
+    int3 c = int3(spvTextureSize(uImage2DArray, 0u, _30_dummy_parameter));
+    uint _36_dummy_parameter;
+    int3 d = int3(spvImageSize(uImage3D, _36_dummy_parameter));
+    uint _42_dummy_parameter;
+    int e = int(spvImageSize(uImageBuffer, _42_dummy_parameter));
+}
+
+void main()
+{
+    frag_main();
+}
diff --git a/reference/shaders-hlsl/frag/image-query.frag b/reference/shaders-hlsl/frag/image-query.frag
index 71cefc10301..0e4b26bacac 100644
--- a/reference/shaders-hlsl/frag/image-query.frag
+++ b/reference/shaders-hlsl/frag/image-query.frag
@@ -16,35 +16,35 @@ SamplerState _uSamplerMS_sampler : register(s7);
 Texture2DMSArray<float4> uSamplerMSArray : register(t8);
 SamplerState _uSamplerMSArray_sampler : register(s8);
 
-uint SPIRV_Cross_textureSize(Texture1D<float4> Tex, uint Level, out uint Param)
+uint spvTextureSize(Texture1D<float4> Tex, uint Level, out uint Param)
 {
     uint ret;
     Tex.GetDimensions(Level, ret.x, Param);
     return ret;
 }
 
-uint2 SPIRV_Cross_textureSize(Texture2D<float4> Tex, uint Level, out uint Param)
+uint2 spvTextureSize(Texture2D<float4> Tex, uint Level, out uint Param)
 {
     uint2 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, Param);
     return ret;
 }
 
-uint3 SPIRV_Cross_textureSize(Texture2DArray<float4> Tex, uint Level, out uint Param)
+uint3 spvTextureSize(Texture2DArray<float4> Tex, uint Level, out uint Param)
 {
     uint3 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);
     return ret;
 }
 
-uint3 SPIRV_Cross_textureSize(Texture3D<float4> Tex, uint Level, out uint Param)
+uint3 spvTextureSize(Texture3D<float4> Tex, uint Level, out uint Param)
 {
     uint3 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);
     return ret;
 }
 
-uint SPIRV_Cross_textureSize(Buffer<float4> Tex, uint Level, out uint Param)
+uint spvTextureSize(Buffer<float4> Tex, uint Level, out uint Param)
 {
     uint ret;
     Tex.GetDimensions(ret.x);
@@ -52,28 +52,28 @@ uint SPIRV_Cross_textureSize(Buffer<float4> Tex, uint Level, out uint Param)
     return ret;
 }
 
-uint2 SPIRV_Cross_textureSize(TextureCube<float4> Tex, uint Level, out uint Param)
+uint2 spvTextureSize(TextureCube<float4> Tex, uint Level, out uint Param)
 {
     uint2 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, Param);
     return ret;
 }
 
-uint3 SPIRV_Cross_textureSize(TextureCubeArray<float4> Tex, uint Level, out uint Param)
+uint3 spvTextureSize(TextureCubeArray<float4> Tex, uint Level, out uint Param)
 {
     uint3 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);
     return ret;
 }
 
-uint2 SPIRV_Cross_textureSize(Texture2DMS<float4> Tex, uint Level, out uint Param)
+uint2 spvTextureSize(Texture2DMS<float4> Tex, uint Level, out uint Param)
 {
     uint2 ret;
     Tex.GetDimensions(ret.x, ret.y, Param);
     return ret;
 }
 
-uint3 SPIRV_Cross_textureSize(Texture2DMSArray<float4> Tex, uint Level, out uint Param)
+uint3 spvTextureSize(Texture2DMSArray<float4> Tex, uint Level, out uint Param)
 {
     uint3 ret;
     Tex.GetDimensions(ret.x, ret.y, ret.z, Param);
@@ -83,46 +83,46 @@ uint3 SPIRV_Cross_textureSize(Texture2DMSArray<float4> Tex, uint Level, out uint
 void frag_main()
 {
     uint _17_dummy_parameter;
-    int a = int(SPIRV_Cross_textureSize(uSampler1D, uint(0), _17_dummy_parameter));
+    int a = int(spvTextureSize(uSampler1D, uint(0), _17_dummy_parameter));
     uint _27_dummy_parameter;
-    int2 b = int2(SPIRV_Cross_textureSize(uSampler2D, uint(0), _27_dummy_parameter));
+    int2 b = int2(spvTextureSize(uSampler2D, uint(0), _27_dummy_parameter));
     uint _37_dummy_parameter;
-    int3 c = int3(SPIRV_Cross_textureSize(uSampler2DArray, uint(0), _37_dummy_parameter));
+    int3 c = int3(spvTextureSize(uSampler2DArray, uint(0), _37_dummy_parameter));
     uint _45_dummy_parameter;
-    int3 d = int3(SPIRV_Cross_textureSize(uSampler3D, uint(0), _45_dummy_parameter));
+    int3 d = int3(spvTextureSize(uSampler3D, uint(0), _45_dummy_parameter));
     uint _53_dummy_parameter;
-    int2 e = int2(SPIRV_Cross_textureSize(uSamplerCube, uint(0), _53_dummy_parameter));
+    int2 e = int2(spvTextureSize(uSamplerCube, uint(0), _53_dummy_parameter));
     uint _61_dummy_parameter;
-    int3 f = int3(SPIRV_Cross_textureSize(uSamplerCubeArray, uint(0), _61_dummy_parameter));
+    int3 f = int3(spvTextureSize(uSamplerCubeArray, uint(0), _61_dummy_parameter));
     uint _69_dummy_parameter;
-    int g = int(SPIRV_Cross_textureSize(uSamplerBuffer, 0u, _69_dummy_parameter));
+    int g = int(spvTextureSize(uSamplerBuffer, 0u, _69_dummy_parameter));
     uint _77_dummy_parameter;
-    int2 h = int2(SPIRV_Cross_textureSize(uSamplerMS, 0u, _77_dummy_parameter));
+    int2 h = int2(spvTextureSize(uSamplerMS, 0u, _77_dummy_parameter));
     uint _85_dummy_parameter;
-    int3 i = int3(SPIRV_Cross_textureSize(uSamplerMSArray, 0u, _85_dummy_parameter));
+    int3 i = int3(spvTextureSize(uSamplerMSArray, 0u, _85_dummy_parameter));
     int _89;
-    SPIRV_Cross_textureSize(uSampler1D, 0u, _89);
+    spvTextureSize(uSampler1D, 0u, _89);
     int l0 = int(_89);
     int _93;
-    SPIRV_Cross_textureSize(uSampler2D, 0u, _93);
+    spvTextureSize(uSampler2D, 0u, _93);
     int l1 = int(_93);
     int _97;
-    SPIRV_Cross_textureSize(uSampler2DArray, 0u, _97);
+    spvTextureSize(uSampler2DArray, 0u, _97);
     int l2 = int(_97);
     int _101;
-    SPIRV_Cross_textureSize(uSampler3D, 0u, _101);
+    spvTextureSize(uSampler3D, 0u, _101);
     int l3 = int(_101);
     int _105;
-    SPIRV_Cross_textureSize(uSamplerCube, 0u, _105);
+    spvTextureSize(uSamplerCube, 0u, _105);
     int l4 = int(_105);
     int _109;
-    SPIRV_Cross_textureSize(uSamplerCubeArray, 0u, _109);
+    spvTextureSize(uSamplerCubeArray, 0u, _109);
     int l5 = int(_109);
     int _113;
-    SPIRV_Cross_textureSize(uSamplerMS, 0u, _113);
+    spvTextureSize(uSamplerMS, 0u, _113);
     int s0 = int(_113);
     int _117;
-    SPIRV_Cross_textureSize(uSamplerMSArray, 0u, _117);
+    spvTextureSize(uSamplerMSArray, 0u, _117);
     int s1 = int(_117);
 }
 
diff --git a/reference/shaders-hlsl/frag/input-attachment-ms.frag b/reference/shaders-hlsl/frag/input-attachment-ms.frag
index 130b799651d..954fa1a94b8 100644
--- a/reference/shaders-hlsl/frag/input-attachment-ms.frag
+++ b/reference/shaders-hlsl/frag/input-attachment-ms.frag
@@ -18,7 +18,8 @@ struct SPIRV_Cross_Output
 
 float4 load_subpasses(Texture2DMS<float4> uInput)
 {
-    return uInput.Load(int2(gl_FragCoord.xy), gl_SampleID);
+    float4 _24 = uInput.Load(int2(gl_FragCoord.xy), gl_SampleID);
+    return _24;
 }
 
 void frag_main()
@@ -29,6 +30,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     gl_SampleID = stage_input.gl_SampleID;
     frag_main();
     SPIRV_Cross_Output stage_output;
diff --git a/reference/shaders-hlsl/frag/input-attachment.frag b/reference/shaders-hlsl/frag/input-attachment.frag
index 0b815ae08aa..b0e297c55fa 100644
--- a/reference/shaders-hlsl/frag/input-attachment.frag
+++ b/reference/shaders-hlsl/frag/input-attachment.frag
@@ -27,6 +27,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.FragColor = FragColor;
diff --git a/reference/shaders-hlsl/frag/io-block.frag b/reference/shaders-hlsl/frag/io-block.frag
index 52c1f518bf2..812a44d8138 100644
--- a/reference/shaders-hlsl/frag/io-block.frag
+++ b/reference/shaders-hlsl/frag/io-block.frag
@@ -1,13 +1,18 @@
-static float4 FragColor;
-
 struct VertexOut
 {
-    float4 a : TEXCOORD1;
-    float4 b : TEXCOORD2;
+    float4 a;
+    float4 b;
 };
 
+static float4 FragColor;
 static VertexOut _12;
 
+struct SPIRV_Cross_Input
+{
+    float4 VertexOut_a : TEXCOORD1;
+    float4 VertexOut_b : TEXCOORD2;
+};
+
 struct SPIRV_Cross_Output
 {
     float4 FragColor : SV_Target0;
@@ -18,9 +23,10 @@ void frag_main()
     FragColor = _12.a + _12.b;
 }
 
-SPIRV_Cross_Output main(in VertexOut stage_input_12)
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
-    _12 = stage_input_12;
+    _12.a = stage_input.VertexOut_a;
+    _12.b = stage_input.VertexOut_b;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.FragColor = FragColor;
diff --git a/reference/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag b/reference/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag
new file mode 100644
index 00000000000..2af0e513b44
--- /dev/null
+++ b/reference/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag
@@ -0,0 +1,32 @@
+uniform sampler2D uSampler;
+
+static float4 FragColor;
+static float2 vUV;
+
+struct SPIRV_Cross_Input
+{
+    float2 vUV : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : COLOR0;
+};
+
+void frag_main()
+{
+    float3 _23 = float3(vUV, 5.0f);
+    FragColor = tex2Dproj(uSampler, float4(_23.xy, 0.0, _23.z));
+    FragColor += tex2Dbias(uSampler, float4(vUV, 0.0, 3.0f));
+    FragColor += tex2Dlod(uSampler, float4(vUV, 0.0, 2.0f));
+    FragColor += tex2Dgrad(uSampler, vUV, 4.0f.xx, 5.0f.xx);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vUV = stage_input.vUV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = float4(FragColor);
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag b/reference/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag
index 8f5e022eba3..cd5a6eee949 100644
--- a/reference/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag
+++ b/reference/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag
@@ -3,12 +3,15 @@ struct UBO_1_1
     float4 v[64];
 };
 
-ConstantBuffer<UBO_1_1> ubos[] : register(b0, space3);
-ByteAddressBuffer ssbos[] : register(t0, space4);
+ConstantBuffer<UBO_1_1> ubos[] : register(b2, space9);
+RWByteAddressBuffer ssbos[] : register(u3, space10);
 Texture2D<float4> uSamplers[] : register(t0, space0);
-SamplerState uSamps[] : register(s0, space2);
-Texture2D<float4> uCombinedSamplers[] : register(t0, space1);
-SamplerState _uCombinedSamplers_sampler[] : register(s0, space1);
+SamplerState uSamps[] : register(s1, space3);
+Texture2D<float4> uCombinedSamplers[] : register(t4, space2);
+SamplerState _uCombinedSamplers_sampler[] : register(s4, space2);
+Texture2DMS<float4> uSamplersMS[] : register(t0, space1);
+RWTexture2D<float> uImages[] : register(u5, space7);
+RWTexture2D<uint> uImagesU32[] : register(u5, space8);
 
 static int vIndex;
 static float4 FragColor;
@@ -25,14 +28,86 @@ struct SPIRV_Cross_Output
     float4 FragColor : SV_Target0;
 };
 
+uint2 spvTextureSize(Texture2D<float4> Tex, uint Level, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(Level, ret.x, ret.y, Param);
+    return ret;
+}
+
+uint2 spvTextureSize(Texture2DMS<float4> Tex, uint Level, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(ret.x, ret.y, Param);
+    return ret;
+}
+
+uint2 spvImageSize(RWTexture2D<float> Tex, out uint Param)
+{
+    uint2 ret;
+    Tex.GetDimensions(ret.x, ret.y);
+    Param = 0u;
+    return ret;
+}
+
 void frag_main()
 {
     int i = vIndex;
     FragColor = uSamplers[NonUniformResourceIndex(i + 10)].Sample(uSamps[NonUniformResourceIndex(i + 40)], vUV);
-    int _47 = i + 10;
-    FragColor = uCombinedSamplers[NonUniformResourceIndex(_47)].Sample(_uCombinedSamplers_sampler[NonUniformResourceIndex(_47)], vUV);
-    FragColor += ubos[NonUniformResourceIndex(i + 20)].v[i + 40];
-    FragColor += asfloat(ssbos[NonUniformResourceIndex(i + 50)].Load4((i + 60) * 16 + 0));
+    int _49 = i + 10;
+    FragColor = uCombinedSamplers[NonUniformResourceIndex(_49)].Sample(_uCombinedSamplers_sampler[NonUniformResourceIndex(_49)], vUV);
+    int _65 = i + 20;
+    int _69 = i + 40;
+    FragColor += ubos[NonUniformResourceIndex(_65)].v[_69];
+    int _83 = i + 50;
+    int _88 = i + 60;
+    FragColor += asfloat(ssbos[NonUniformResourceIndex(_83)].Load4(_88 * 16 + 16));
+    int _96 = i + 60;
+    int _100 = i + 70;
+    ssbos[NonUniformResourceIndex(_96)].Store4(_100 * 16 + 16, asuint(20.0f.xxxx));
+    int _106 = i + 10;
+    FragColor = uSamplers[NonUniformResourceIndex(_106)].Load(int3(int2(vUV), 0));
+    int _116 = i + 100;
+    uint _122;
+    ssbos[_116].InterlockedAdd(0, 100u, _122);
+    float _136_tmp = uSamplers[NonUniformResourceIndex(i + 10)].CalculateLevelOfDetail(uSamps[NonUniformResourceIndex(i + 40)], vUV);
+    float2 _136 = _136_tmp.xx;
+    float2 queried = _136;
+    int _139 = i + 10;
+    float _143_tmp = uCombinedSamplers[NonUniformResourceIndex(_139)].CalculateLevelOfDetail(_uCombinedSamplers_sampler[NonUniformResourceIndex(_139)], vUV);
+    float2 _143 = _143_tmp.xx;
+    queried += _143;
+    float4 _147 = FragColor;
+    float2 _149 = _147.xy + queried;
+    FragColor.x = _149.x;
+    FragColor.y = _149.y;
+    int _157 = i + 20;
+    int _160;
+    spvTextureSize(uSamplers[NonUniformResourceIndex(_157)], 0u, _160);
+    FragColor.x += float(int(_160));
+    int _172 = i + 20;
+    int _176;
+    spvTextureSize(uSamplersMS[NonUniformResourceIndex(_172)], 0u, _176);
+    FragColor.y += float(int(_176));
+    int _184 = i + 20;
+    uint _187_dummy_parameter;
+    float4 _189 = FragColor;
+    float2 _191 = _189.xy + float2(int2(spvTextureSize(uSamplers[NonUniformResourceIndex(_184)], uint(0), _187_dummy_parameter)));
+    FragColor.x = _191.x;
+    FragColor.y = _191.y;
+    int _202 = i + 50;
+    FragColor += uImages[NonUniformResourceIndex(_202)][int2(vUV)].xxxx;
+    int _213 = i + 20;
+    uint _216_dummy_parameter;
+    float4 _218 = FragColor;
+    float2 _220 = _218.xy + float2(int2(spvImageSize(uImages[NonUniformResourceIndex(_213)], _216_dummy_parameter)));
+    FragColor.x = _220.x;
+    FragColor.y = _220.y;
+    int _227 = i + 60;
+    uImages[NonUniformResourceIndex(_227)][int2(vUV)] = 50.0f.x;
+    int _240 = i + 70;
+    uint _248;
+    InterlockedAdd(uImagesU32[NonUniformResourceIndex(_240)][int2(vUV)], 40u, _248);
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
new file mode 100644
index 00000000000..8923f96a75e
--- /dev/null
+++ b/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
@@ -0,0 +1,24 @@
+RWByteAddressBuffer _9 : register(u6, space0);
+globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0);
+RasterizerOrderedByteAddressBuffer _52 : register(u4, space0);
+RWTexture2D<unorm float4> img4 : register(u5, space0);
+RasterizerOrderedTexture2D<unorm float4> img : register(u0, space0);
+RasterizerOrderedTexture2D<unorm float4> img3 : register(u2, space0);
+RasterizerOrderedTexture2D<uint> img2 : register(u1, space0);
+
+void frag_main()
+{
+    _9.Store(0, uint(0));
+    img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f);
+    img[int2(0, 0)] = img3[int2(0, 0)];
+    uint _39;
+    InterlockedAdd(img2[int2(0, 0)], 1u, _39);
+    _42.Store(0, uint(int(_42.Load(0)) + 42));
+    uint _55;
+    _42.InterlockedAnd(4, _52.Load(0), _55);
+}
+
+void main()
+{
+    frag_main();
+}
diff --git a/reference/shaders-hlsl/frag/query-lod.desktop.frag b/reference/shaders-hlsl/frag/query-lod.desktop.frag
index fd95798bf42..a9d4bd83d9d 100644
--- a/reference/shaders-hlsl/frag/query-lod.desktop.frag
+++ b/reference/shaders-hlsl/frag/query-lod.desktop.frag
@@ -17,7 +17,8 @@ struct SPIRV_Cross_Output
 void frag_main()
 {
     float _19_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vTexCoord);
-    FragColor = float2(_19_tmp, _19_tmp).xyxy;
+    float2 _19 = _19_tmp.xx;
+    FragColor = _19.xyxy;
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag b/reference/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag
new file mode 100644
index 00000000000..bbe3e4a7d32
--- /dev/null
+++ b/reference/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag
@@ -0,0 +1,21 @@
+globallycoherent RWByteAddressBuffer _12 : register(u0);
+
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = asfloat(_12.Load4(0));
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/readonly-coherent-ssbo.frag b/reference/shaders-hlsl/frag/readonly-coherent-ssbo.frag
new file mode 100644
index 00000000000..02252f9cbc5
--- /dev/null
+++ b/reference/shaders-hlsl/frag/readonly-coherent-ssbo.frag
@@ -0,0 +1,21 @@
+ByteAddressBuffer _12 : register(t0);
+
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = asfloat(_12.Load4(0));
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag b/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag
index b6df0019afc..5c583c66fa9 100644
--- a/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag
+++ b/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag
@@ -31,12 +31,14 @@ void frag_main()
     float l0 = uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1));
     float l1 = uSampler2DArray.SampleCmpLevelZero(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1));
     float l2 = uSamplerCube.SampleCmpLevelZero(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w);
-    float4 _80 = vDirRef;
-    _80.z = vDirRef.w;
-    float p0 = uSampler2D.SampleCmp(_uSampler2D_sampler, _80.xy / _80.z, vDirRef.z / _80.z, int2(1, 1));
-    float4 _87 = vDirRef;
-    _87.z = vDirRef.w;
-    float p1 = uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, _87.xy / _87.z, vDirRef.z / _87.z, int2(1, 1));
+    float4 _75 = vDirRef;
+    float4 _80 = _75;
+    _80.z = _75.w;
+    float p0 = uSampler2D.SampleCmp(_uSampler2D_sampler, _80.xy / _80.z, _75.z / _80.z, int2(1, 1));
+    float4 _84 = vDirRef;
+    float4 _87 = _84;
+    _87.z = _84.w;
+    float p1 = uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, _87.xy / _87.z, _84.z / _87.z, int2(1, 1));
     FragColor = (((((((s0 + s1) + s2) + s3) + l0) + l1) + l2) + p0) + p1;
 }
 
diff --git a/reference/shaders-hlsl/frag/sample-mask-in-and-out.frag b/reference/shaders-hlsl/frag/sample-mask-in-and-out.frag
new file mode 100644
index 00000000000..185a09821ea
--- /dev/null
+++ b/reference/shaders-hlsl/frag/sample-mask-in-and-out.frag
@@ -0,0 +1,30 @@
+static int gl_SampleMaskIn;
+static int gl_SampleMask;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    uint gl_SampleMaskIn : SV_Coverage;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+    uint gl_SampleMask : SV_Coverage;
+};
+
+void frag_main()
+{
+    FragColor = 1.0f.xxxx;
+    gl_SampleMask = gl_SampleMaskIn;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_SampleMaskIn = stage_input.gl_SampleMaskIn;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_SampleMask = gl_SampleMask;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/sample-mask-in.frag b/reference/shaders-hlsl/frag/sample-mask-in.frag
new file mode 100644
index 00000000000..8f6cfaf9e53
--- /dev/null
+++ b/reference/shaders-hlsl/frag/sample-mask-in.frag
@@ -0,0 +1,32 @@
+static int gl_SampleID;
+static int gl_SampleMaskIn;
+static float4 FragColor;
+
+struct SPIRV_Cross_Input
+{
+    uint gl_SampleID : SV_SampleIndex;
+    uint gl_SampleMaskIn : SV_Coverage;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    if ((gl_SampleMaskIn & (1 << gl_SampleID)) != 0)
+    {
+        FragColor = 1.0f.xxxx;
+    }
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    gl_SampleID = stage_input.gl_SampleID;
+    gl_SampleMaskIn = stage_input.gl_SampleMaskIn;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/sample-mask-out.frag b/reference/shaders-hlsl/frag/sample-mask-out.frag
new file mode 100644
index 00000000000..a966c032183
--- /dev/null
+++ b/reference/shaders-hlsl/frag/sample-mask-out.frag
@@ -0,0 +1,23 @@
+static int gl_SampleMask;
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+    uint gl_SampleMask : SV_Coverage;
+};
+
+void frag_main()
+{
+    FragColor = 1.0f.xxxx;
+    gl_SampleMask = 0;
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_SampleMask = gl_SampleMask;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/sampler-array.frag b/reference/shaders-hlsl/frag/sampler-array.frag
index e941357d299..fd08d4230d2 100644
--- a/reference/shaders-hlsl/frag/sampler-array.frag
+++ b/reference/shaders-hlsl/frag/sampler-array.frag
@@ -38,6 +38,7 @@ void frag_main()
 void main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     vTex = stage_input.vTex;
     vIndex = stage_input.vIndex;
     frag_main();
diff --git a/reference/shaders-hlsl/frag/scalar-refract-reflect.frag b/reference/shaders-hlsl/frag/scalar-refract-reflect.frag
index 0fb694c543f..6c2d0be4f71 100644
--- a/reference/shaders-hlsl/frag/scalar-refract-reflect.frag
+++ b/reference/shaders-hlsl/frag/scalar-refract-reflect.frag
@@ -11,12 +11,12 @@ struct SPIRV_Cross_Output
     float FragColor : SV_Target0;
 };
 
-float SPIRV_Cross_Reflect(float i, float n)
+float spvReflect(float i, float n)
 {
     return i - 2.0 * dot(n, i) * n;
 }
 
-float SPIRV_Cross_Refract(float i, float n, float eta)
+float spvRefract(float i, float n, float eta)
 {
     float NoI = n * i;
     float NoI2 = NoI * NoI;
@@ -33,8 +33,8 @@ float SPIRV_Cross_Refract(float i, float n, float eta)
 
 void frag_main()
 {
-    FragColor = SPIRV_Cross_Refract(vRefract.x, vRefract.y, vRefract.z);
-    FragColor += SPIRV_Cross_Reflect(vRefract.x, vRefract.y);
+    FragColor = spvRefract(vRefract.x, vRefract.y, vRefract.z);
+    FragColor += spvReflect(vRefract.x, vRefract.y);
     FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
     FragColor += reflect(vRefract.xy, vRefract.zy).y;
 }
diff --git a/reference/shaders-hlsl/frag/switch-unreachable-break.frag b/reference/shaders-hlsl/frag/switch-unreachable-break.frag
new file mode 100644
index 00000000000..be36b82266e
--- /dev/null
+++ b/reference/shaders-hlsl/frag/switch-unreachable-break.frag
@@ -0,0 +1,55 @@
+cbuffer UBO : register(b0)
+{
+    int _13_cond : packoffset(c0);
+    int _13_cond2 : packoffset(c0.y);
+};
+
+
+static float4 FragColor;
+static float4 vInput;
+
+struct SPIRV_Cross_Input
+{
+    float4 vInput : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    bool frog = false;
+    switch (_13_cond)
+    {
+        case 1:
+        {
+            if (_13_cond2 < 50)
+            {
+                break;
+            }
+            else
+            {
+                discard;
+            }
+            break; // unreachable workaround
+        }
+        default:
+        {
+            frog = true;
+            break;
+        }
+    }
+    bool4 _45 = frog.xxxx;
+    FragColor = float4(_45.x ? 10.0f.xxxx.x : 20.0f.xxxx.x, _45.y ? 10.0f.xxxx.y : 20.0f.xxxx.y, _45.z ? 10.0f.xxxx.z : 20.0f.xxxx.z, _45.w ? 10.0f.xxxx.w : 20.0f.xxxx.w);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vInput = stage_input.vInput;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/tex-sampling-ms.frag b/reference/shaders-hlsl/frag/tex-sampling-ms.frag
index 1435315383b..854ad5016d6 100644
--- a/reference/shaders-hlsl/frag/tex-sampling-ms.frag
+++ b/reference/shaders-hlsl/frag/tex-sampling-ms.frag
@@ -25,6 +25,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.FragColor = FragColor;
diff --git a/reference/shaders-hlsl/frag/tex-sampling.sm30.frag b/reference/shaders-hlsl/frag/tex-sampling.sm30.frag
new file mode 100644
index 00000000000..10c6c065312
--- /dev/null
+++ b/reference/shaders-hlsl/frag/tex-sampling.sm30.frag
@@ -0,0 +1,83 @@
+uniform sampler1D tex1d;
+uniform sampler2D tex2d;
+uniform sampler3D tex3d;
+uniform samplerCUBE texCube;
+uniform sampler1D tex1dShadow;
+uniform sampler2D tex2dShadow;
+
+static float texCoord1d;
+static float2 texCoord2d;
+static float3 texCoord3d;
+static float4 FragColor;
+static float4 texCoord4d;
+
+struct SPIRV_Cross_Input
+{
+    float texCoord1d : TEXCOORD0;
+    float2 texCoord2d : TEXCOORD1;
+    float3 texCoord3d : TEXCOORD2;
+    float4 texCoord4d : TEXCOORD3;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : COLOR0;
+};
+
+void frag_main()
+{
+    float4 texcolor = tex1D(tex1d, texCoord1d);
+    texcolor += tex1Dlod(tex1d, float4(texCoord1d, 0.0, 0.0, 2.0f));
+    texcolor += tex1Dgrad(tex1d, texCoord1d, 1.0f, 2.0f);
+    float2 _34 = float2(texCoord1d, 2.0f);
+    texcolor += tex1Dproj(tex1d, float4(_34.x, 0.0, 0.0, _34.y));
+    texcolor += tex1Dbias(tex1d, float4(texCoord1d, 0.0, 0.0, 1.0f));
+    texcolor += tex2D(tex2d, texCoord2d);
+    texcolor += tex2Dlod(tex2d, float4(texCoord2d, 0.0, 2.0f));
+    texcolor += tex2Dgrad(tex2d, texCoord2d, float2(1.0f, 2.0f), float2(3.0f, 4.0f));
+    float3 _73 = float3(texCoord2d, 2.0f);
+    texcolor += tex2Dproj(tex2d, float4(_73.xy, 0.0, _73.z));
+    texcolor += tex2Dbias(tex2d, float4(texCoord2d, 0.0, 1.0f));
+    texcolor += tex3D(tex3d, texCoord3d);
+    texcolor += tex3Dlod(tex3d, float4(texCoord3d, 2.0f));
+    texcolor += tex3Dgrad(tex3d, texCoord3d, float3(1.0f, 2.0f, 3.0f), float3(4.0f, 5.0f, 6.0f));
+    float4 _112 = float4(texCoord3d, 2.0f);
+    texcolor += tex3Dproj(tex3d, float4(_112.xyz, _112.w));
+    texcolor += tex3Dbias(tex3d, float4(texCoord3d, 1.0f));
+    texcolor += texCUBE(texCube, texCoord3d);
+    texcolor += texCUBElod(texCube, float4(texCoord3d, 2.0f));
+    texcolor += texCUBEbias(texCube, float4(texCoord3d, 1.0f));
+    float3 _147 = float3(texCoord1d, 0.0f, 0.0f);
+    texcolor.w += tex1Dproj(tex1dShadow, float4(_147.x, 0.0, _147.z, 1.0)).x;
+    float3 _159 = float3(texCoord1d, 0.0f, 0.0f);
+    texcolor.w += tex1Dlod(tex1dShadow, float4(_159.x, 0.0, _159.z, 2.0f)).x;
+    float4 _168 = float4(texCoord1d, 0.0f, 0.0f, 2.0f);
+    float4 _171 = _168;
+    _171.y = _168.w;
+    texcolor.w += tex1Dproj(tex1dShadow, float4(_171.x, 0.0, _168.z, _171.y)).x;
+    float3 _179 = float3(texCoord1d, 0.0f, 0.0f);
+    texcolor.w += tex1Dbias(tex1dShadow, float4(_179.x, 0.0, _179.z, 1.0f)).x;
+    float3 _194 = float3(texCoord2d, 0.0f);
+    texcolor.w += tex2Dproj(tex2dShadow, float4(_194.xy, _194.z, 1.0)).x;
+    float3 _205 = float3(texCoord2d, 0.0f);
+    texcolor.w += tex2Dlod(tex2dShadow, float4(_205.xy, _205.z, 2.0f)).x;
+    float4 _216 = float4(texCoord2d, 0.0f, 2.0f);
+    float4 _219 = _216;
+    _219.z = _216.w;
+    texcolor.w += tex2Dproj(tex2dShadow, float4(_219.xy, _216.z, _219.z)).x;
+    float3 _229 = float3(texCoord2d, 0.0f);
+    texcolor.w += tex2Dbias(tex2dShadow, float4(_229.xy, _229.z, 1.0f)).x;
+    FragColor = texcolor;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    texCoord1d = stage_input.texCoord1d;
+    texCoord2d = stage_input.texCoord2d;
+    texCoord3d = stage_input.texCoord3d;
+    texCoord4d = stage_input.texCoord4d;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = float4(FragColor);
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/texel-fetch-offset.frag b/reference/shaders-hlsl/frag/texel-fetch-offset.frag
index f2a02e16295..c7ae589dd2b 100644
--- a/reference/shaders-hlsl/frag/texel-fetch-offset.frag
+++ b/reference/shaders-hlsl/frag/texel-fetch-offset.frag
@@ -23,6 +23,7 @@ void frag_main()
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     gl_FragCoord = stage_input.gl_FragCoord;
+    gl_FragCoord.w = 1.0 / gl_FragCoord.w;
     frag_main();
     SPIRV_Cross_Output stage_output;
     stage_output.FragColor = FragColor;
diff --git a/reference/shaders-hlsl/frag/texture-proj-shadow.frag b/reference/shaders-hlsl/frag/texture-proj-shadow.frag
index 07e06008a0b..f98a5126574 100644
--- a/reference/shaders-hlsl/frag/texture-proj-shadow.frag
+++ b/reference/shaders-hlsl/frag/texture-proj-shadow.frag
@@ -28,12 +28,14 @@ struct SPIRV_Cross_Output
 
 void frag_main()
 {
-    float4 _20 = vClip4;
-    _20.y = vClip4.w;
-    FragColor = uShadow1D.SampleCmp(_uShadow1D_sampler, _20.x / _20.y, vClip4.z / _20.y);
-    float4 _30 = vClip4;
-    _30.z = vClip4.w;
-    FragColor = uShadow2D.SampleCmp(_uShadow2D_sampler, _30.xy / _30.z, vClip4.z / _30.z);
+    float4 _17 = vClip4;
+    float4 _20 = _17;
+    _20.y = _17.w;
+    FragColor = uShadow1D.SampleCmp(_uShadow1D_sampler, _20.x / _20.y, _17.z / _20.y);
+    float4 _27 = vClip4;
+    float4 _30 = _27;
+    _30.z = _27.w;
+    FragColor = uShadow2D.SampleCmp(_uShadow2D_sampler, _30.xy / _30.z, _27.z / _30.z);
     FragColor = uSampler1D.Sample(_uSampler1D_sampler, vClip2.x / vClip2.y).x;
     FragColor = uSampler2D.Sample(_uSampler2D_sampler, vClip3.xy / vClip3.z).x;
     FragColor = uSampler3D.Sample(_uSampler3D_sampler, vClip4.xyz / vClip4.w).x;
diff --git a/reference/shaders-hlsl/frag/texture-size-combined-image-sampler.frag b/reference/shaders-hlsl/frag/texture-size-combined-image-sampler.frag
index d5c373746d8..dd2eb251fc2 100644
--- a/reference/shaders-hlsl/frag/texture-size-combined-image-sampler.frag
+++ b/reference/shaders-hlsl/frag/texture-size-combined-image-sampler.frag
@@ -8,7 +8,7 @@ struct SPIRV_Cross_Output
     int2 FooOut : SV_Target0;
 };
 
-uint2 SPIRV_Cross_textureSize(Texture2D<float4> Tex, uint Level, out uint Param)
+uint2 spvTextureSize(Texture2D<float4> Tex, uint Level, out uint Param)
 {
     uint2 ret;
     Tex.GetDimensions(Level, ret.x, ret.y, Param);
@@ -18,7 +18,7 @@ uint2 SPIRV_Cross_textureSize(Texture2D<float4> Tex, uint Level, out uint Param)
 void frag_main()
 {
     uint _23_dummy_parameter;
-    FooOut = int2(SPIRV_Cross_textureSize(uTex, uint(0), _23_dummy_parameter));
+    FooOut = int2(spvTextureSize(uTex, uint(0), _23_dummy_parameter));
 }
 
 SPIRV_Cross_Output main()
diff --git a/reference/shaders-hlsl/frag/unorm-snorm-packing.frag b/reference/shaders-hlsl/frag/unorm-snorm-packing.frag
index 57b5950636e..95786b93b68 100644
--- a/reference/shaders-hlsl/frag/unorm-snorm-packing.frag
+++ b/reference/shaders-hlsl/frag/unorm-snorm-packing.frag
@@ -27,50 +27,50 @@ struct SPIRV_Cross_Output
     uint SNORM16Out : SV_Target4;
 };
 
-uint SPIRV_Cross_packUnorm4x8(float4 value)
+uint spvPackUnorm4x8(float4 value)
 {
     uint4 Packed = uint4(round(saturate(value) * 255.0));
     return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);
 }
 
-float4 SPIRV_Cross_unpackUnorm4x8(uint value)
+float4 spvUnpackUnorm4x8(uint value)
 {
     uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);
     return float4(Packed) / 255.0;
 }
 
-uint SPIRV_Cross_packSnorm4x8(float4 value)
+uint spvPackSnorm4x8(float4 value)
 {
     int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;
     return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));
 }
 
-float4 SPIRV_Cross_unpackSnorm4x8(uint value)
+float4 spvUnpackSnorm4x8(uint value)
 {
     int SignedValue = int(value);
     int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;
     return clamp(float4(Packed) / 127.0, -1.0, 1.0);
 }
 
-uint SPIRV_Cross_packUnorm2x16(float2 value)
+uint spvPackUnorm2x16(float2 value)
 {
     uint2 Packed = uint2(round(saturate(value) * 65535.0));
     return Packed.x | (Packed.y << 16);
 }
 
-float2 SPIRV_Cross_unpackUnorm2x16(uint value)
+float2 spvUnpackUnorm2x16(uint value)
 {
     uint2 Packed = uint2(value & 0xffff, value >> 16);
     return float2(Packed) / 65535.0;
 }
 
-uint SPIRV_Cross_packSnorm2x16(float2 value)
+uint spvPackSnorm2x16(float2 value)
 {
     int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;
     return uint(Packed.x | (Packed.y << 16));
 }
 
-float2 SPIRV_Cross_unpackSnorm2x16(uint value)
+float2 spvUnpackSnorm2x16(uint value)
 {
     int SignedValue = int(value);
     int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;
@@ -79,16 +79,18 @@ float2 SPIRV_Cross_unpackSnorm2x16(uint value)
 
 void frag_main()
 {
-    FP32Out = SPIRV_Cross_unpackUnorm4x8(UNORM8);
-    FP32Out = SPIRV_Cross_unpackSnorm4x8(SNORM8);
-    float2 _21 = SPIRV_Cross_unpackUnorm2x16(UNORM16);
-    FP32Out = float4(_21.x, _21.y, FP32Out.z, FP32Out.w);
-    float2 _26 = SPIRV_Cross_unpackSnorm2x16(SNORM16);
-    FP32Out = float4(_26.x, _26.y, FP32Out.z, FP32Out.w);
-    UNORM8Out = SPIRV_Cross_packUnorm4x8(FP32);
-    SNORM8Out = SPIRV_Cross_packSnorm4x8(FP32);
-    UNORM16Out = SPIRV_Cross_packUnorm2x16(FP32.xy);
-    SNORM16Out = SPIRV_Cross_packSnorm2x16(FP32.zw);
+    FP32Out = spvUnpackUnorm4x8(UNORM8);
+    FP32Out = spvUnpackSnorm4x8(SNORM8);
+    float2 _21 = spvUnpackUnorm2x16(UNORM16);
+    FP32Out.x = _21.x;
+    FP32Out.y = _21.y;
+    float2 _31 = spvUnpackSnorm2x16(SNORM16);
+    FP32Out.x = _31.x;
+    FP32Out.y = _31.y;
+    UNORM8Out = spvPackUnorm4x8(FP32);
+    SNORM8Out = spvPackSnorm4x8(FP32);
+    UNORM16Out = spvPackUnorm2x16(FP32.xy);
+    SNORM16Out = spvPackSnorm2x16(FP32.zw);
 }
 
 SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000000..dad35928c7d
--- /dev/null
+++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
@@ -0,0 +1,97 @@
+struct BlockOut
+{
+    float4 a;
+    float4 b;
+};
+
+struct BlockOutPrim
+{
+    float4 a;
+    float4 b;
+};
+
+struct TaskPayload
+{
+    float a;
+    float b;
+    int c;
+};
+
+static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u);
+
+static uint3 gl_WorkGroupID;
+static uint3 gl_GlobalInvocationID;
+static uint gl_LocalInvocationIndex;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_WorkGroupID : SV_GroupID;
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+    uint gl_LocalInvocationIndex : SV_GroupIndex;
+};
+
+struct gl_MeshPerVertexEXT
+{
+    float4 vOut : TEXCOORD0;
+    BlockOut outputs : TEXCOORD2;
+    float4 gl_Position : SV_Position;
+    float gl_ClipDistance[1] : SV_ClipDistance;
+    float2 gl_CullDistance : SV_CullDistance;
+};
+
+struct gl_MeshPerPrimitiveEXT
+{
+    float4 vPrim : TEXCOORD1;
+    BlockOutPrim prim_outputs : TEXCOORD4;
+    uint gl_PrimitiveID : SV_PrimitiveID;
+    uint gl_Layer : SV_RenderTargetArrayIndex;
+    uint gl_ViewportIndex : SV_ViewportArrayIndex;
+    uint gl_PrimitiveShadingRateEXT : SV_ShadingRate;
+    bool gl_CullPrimitiveEXT : SV_CullPrimitive;
+};
+
+groupshared float shared_float[16];
+
+void main3(inout uint2 gl_PrimitiveLineIndicesEXT[22], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22])
+{
+    gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uint2(0u, 1u) + gl_LocalInvocationIndex.xx;
+    gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+    gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+    gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+    gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+    gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+}
+
+void main2(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22])
+{
+    SetMeshOutputCounts(24u, 22u);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 6.0f;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(float3(gl_GlobalInvocationID), 2.0f);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx;
+    GroupMemoryBarrierWithGroupSync();
+    if (gl_LocalInvocationIndex < 22u)
+    {
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx;
+        main3(gl_PrimitiveLineIndicesEXT, gl_MeshPrimitivesEXT);
+    }
+}
+
+void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint2 gl_PrimitiveLineIndicesEXT[22])
+{
+    main2(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveLineIndicesEXT);
+}
+
+[outputtopology("line")]
+[numthreads(2, 3, 4)]
+void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint2 gl_PrimitiveLineIndicesEXT[22])
+{
+    gl_WorkGroupID = stage_input.gl_WorkGroupID;
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
+    mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveLineIndicesEXT);
+}
diff --git a/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000000..e636453da4d
--- /dev/null
+++ b/reference/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
@@ -0,0 +1,87 @@
+struct BlockOut
+{
+    float4 a;
+    float4 b;
+};
+
+struct BlockOutPrim
+{
+    float4 a;
+    float4 b;
+};
+
+struct TaskPayload
+{
+    float a;
+    float b;
+    int c;
+};
+
+static const uint3 gl_WorkGroupSize = uint3(2u, 3u, 4u);
+
+static uint3 gl_WorkGroupID;
+static uint3 gl_GlobalInvocationID;
+static uint gl_LocalInvocationIndex;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_WorkGroupID : SV_GroupID;
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+    uint gl_LocalInvocationIndex : SV_GroupIndex;
+};
+
+struct gl_MeshPerVertexEXT
+{
+    float4 vOut : TEXCOORD0;
+    BlockOut outputs : TEXCOORD2;
+    float4 gl_Position : SV_Position;
+    float gl_ClipDistance[1] : SV_ClipDistance;
+    float2 gl_CullDistance : SV_CullDistance;
+};
+
+struct gl_MeshPerPrimitiveEXT
+{
+    float4 vPrim : TEXCOORD1;
+    BlockOutPrim prim_outputs : TEXCOORD4;
+    uint gl_PrimitiveID : SV_PrimitiveID;
+    uint gl_Layer : SV_RenderTargetArrayIndex;
+    uint gl_ViewportIndex : SV_ViewportArrayIndex;
+    uint gl_PrimitiveShadingRateEXT : SV_ShadingRate;
+    bool gl_CullPrimitiveEXT : SV_CullPrimitive;
+};
+
+groupshared float shared_float[16];
+
+void mesh_main(inout gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], inout gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], TaskPayload _payload, inout uint3 gl_PrimitiveTriangleIndicesEXT[22])
+{
+    SetMeshOutputCounts(24u, 22u);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = float4(float3(gl_GlobalInvocationID), 1.0f);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0f;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 3.0f;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0f;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].vOut = float4(float3(gl_GlobalInvocationID), 2.0f);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.a = 5.0f.xxxx;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].outputs.b = 6.0f.xxxx;
+    GroupMemoryBarrierWithGroupSync();
+    if (gl_LocalInvocationIndex < 22u)
+    {
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].vPrim = float4(float3(gl_WorkGroupID), 3.0f);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.a = _payload.a.xxxx;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].prim_outputs.b = _payload.b.xxxx;
+        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uint3(0u, 1u, 2u) + gl_LocalInvocationIndex.xxx;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+    }
+}
+
+[outputtopology("triangle")]
+[numthreads(2, 3, 4)]
+void main(SPIRV_Cross_Input stage_input, out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[24], out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[22], in payload TaskPayload _payload, out indices uint3 gl_PrimitiveTriangleIndicesEXT[22])
+{
+    gl_WorkGroupID = stage_input.gl_WorkGroupID;
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
+    mesh_main(gl_MeshVerticesEXT, gl_MeshPrimitivesEXT, _payload, gl_PrimitiveTriangleIndicesEXT);
+}
diff --git a/reference/shaders-hlsl/vert/invariant.vert b/reference/shaders-hlsl/vert/invariant.vert
new file mode 100644
index 00000000000..54739626865
--- /dev/null
+++ b/reference/shaders-hlsl/vert/invariant.vert
@@ -0,0 +1,40 @@
+static float4 gl_Position;
+static float4 vInput0;
+static float4 vInput1;
+static float4 vInput2;
+static float4 vColor;
+
+struct SPIRV_Cross_Input
+{
+    float4 vInput0 : TEXCOORD0;
+    float4 vInput1 : TEXCOORD1;
+    float4 vInput2 : TEXCOORD2;
+};
+
+struct SPIRV_Cross_Output
+{
+    precise float4 vColor : TEXCOORD0;
+    precise float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    float4 _20 = vInput1 * vInput2;
+    float4 _21 = vInput0 + _20;
+    gl_Position = _21;
+    float4 _27 = vInput0 - vInput1;
+    float4 _29 = _27 * vInput2;
+    vColor = _29;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vInput0 = stage_input.vInput0;
+    vInput1 = stage_input.vInput1;
+    vInput2 = stage_input.vInput2;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    stage_output.vColor = vColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/vert/locations.vert b/reference/shaders-hlsl/vert/locations.vert
index b06b204bdd1..da9fa45ede6 100644
--- a/reference/shaders-hlsl/vert/locations.vert
+++ b/reference/shaders-hlsl/vert/locations.vert
@@ -5,6 +5,12 @@ struct Foo
     float3 c;
 };
 
+struct VertexOut
+{
+    float3 color;
+    float3 foo;
+};
+
 static float4 gl_Position;
 static float4 Input2;
 static float4 Input4;
@@ -14,13 +20,6 @@ static float vLocation1;
 static float vLocation2[2];
 static Foo vLocation4;
 static float vLocation9;
-
-struct VertexOut
-{
-    float3 color : TEXCOORD7;
-    float3 foo : TEXCOORD8;
-};
-
 static VertexOut vout;
 
 struct SPIRV_Cross_Input
@@ -36,6 +35,8 @@ struct SPIRV_Cross_Output
     float vLocation1 : TEXCOORD1;
     float vLocation2[2] : TEXCOORD2;
     Foo vLocation4 : TEXCOORD4;
+    float3 VertexOut_color : TEXCOORD7;
+    float3 VertexOut_foo : TEXCOORD8;
     float vLocation9 : TEXCOORD9;
     float4 gl_Position : SV_Position;
 };
@@ -57,13 +58,12 @@ void vert_main()
     vout.foo = 4.0f.xxx;
 }
 
-SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input, out VertexOut stage_outputvout)
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
 {
     Input2 = stage_input.Input2;
     Input4 = stage_input.Input4;
     Input0 = stage_input.Input0;
     vert_main();
-    stage_outputvout = vout;
     SPIRV_Cross_Output stage_output;
     stage_output.gl_Position = gl_Position;
     stage_output.vLocation0 = vLocation0;
@@ -71,5 +71,7 @@ SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input, out VertexOut stage_outpu
     stage_output.vLocation2 = vLocation2;
     stage_output.vLocation4 = vLocation4;
     stage_output.vLocation9 = vLocation9;
+    stage_output.VertexOut_color = vout.color;
+    stage_output.VertexOut_foo = vout.foo;
     return stage_output;
 }
diff --git a/reference/shaders-hlsl/vert/no-contraction.vert b/reference/shaders-hlsl/vert/no-contraction.vert
new file mode 100644
index 00000000000..ad37dc23f19
--- /dev/null
+++ b/reference/shaders-hlsl/vert/no-contraction.vert
@@ -0,0 +1,45 @@
+static float4 gl_Position;
+static float4 vA;
+static float4 vB;
+static float4 vC;
+
+struct SPIRV_Cross_Input
+{
+    float4 vA : TEXCOORD0;
+    float4 vB : TEXCOORD1;
+    float4 vC : TEXCOORD2;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 gl_Position : SV_Position;
+};
+
+void vert_main()
+{
+    precise float4 _15 = vA * vB;
+    float4 mul = _15;
+    precise float4 _19 = vA + vB;
+    float4 add = _19;
+    precise float4 _23 = vA - vB;
+    float4 sub = _23;
+    precise float4 _27 = vA * vB;
+    precise float4 _30 = _27 + vC;
+    float4 mad = _30;
+    precise float4 _34 = mul + add;
+    precise float4 _36 = _34 + sub;
+    precise float4 _38 = _36 + mad;
+    float4 summed = _38;
+    gl_Position = summed;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vA = stage_input.vA;
+    vB = stage_input.vB;
+    vC = stage_input.vC;
+    vert_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.gl_Position = gl_Position;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/vert/qualifiers.vert b/reference/shaders-hlsl/vert/qualifiers.vert
index 13ee2a8c1c0..bbf7dc61e45 100644
--- a/reference/shaders-hlsl/vert/qualifiers.vert
+++ b/reference/shaders-hlsl/vert/qualifiers.vert
@@ -1,17 +1,16 @@
+struct Block
+{
+    float vFlat;
+    float vCentroid;
+    float vSample;
+    float vNoperspective;
+};
+
 static float4 gl_Position;
 static float vFlat;
 static float vCentroid;
 static float vSample;
 static float vNoperspective;
-
-struct Block
-{
-    nointerpolation float vFlat : TEXCOORD4;
-    centroid float vCentroid : TEXCOORD5;
-    sample float vSample : TEXCOORD6;
-    noperspective float vNoperspective : TEXCOORD7;
-};
-
 static Block vout;
 
 struct SPIRV_Cross_Output
@@ -20,6 +19,10 @@ struct SPIRV_Cross_Output
     centroid float vCentroid : TEXCOORD1;
     sample float vSample : TEXCOORD2;
     noperspective float vNoperspective : TEXCOORD3;
+    nointerpolation float Block_vFlat : TEXCOORD4;
+    centroid float Block_vCentroid : TEXCOORD5;
+    sample float Block_vSample : TEXCOORD6;
+    noperspective float Block_vNoperspective : TEXCOORD7;
     float4 gl_Position : SV_Position;
 };
 
@@ -36,15 +39,18 @@ void vert_main()
     vout.vNoperspective = 3.0f;
 }
 
-SPIRV_Cross_Output main(out Block stage_outputvout)
+SPIRV_Cross_Output main()
 {
     vert_main();
-    stage_outputvout = vout;
     SPIRV_Cross_Output stage_output;
     stage_output.gl_Position = gl_Position;
     stage_output.vFlat = vFlat;
     stage_output.vCentroid = vCentroid;
     stage_output.vSample = vSample;
     stage_output.vNoperspective = vNoperspective;
+    stage_output.Block_vFlat = vout.vFlat;
+    stage_output.Block_vCentroid = vout.vCentroid;
+    stage_output.Block_vSample = vout.vSample;
+    stage_output.Block_vNoperspective = vout.vNoperspective;
     return stage_output;
 }
diff --git a/reference/shaders-hlsl/vert/return-array.vert b/reference/shaders-hlsl/vert/return-array.vert
index 83e3a281232..3e021257bd9 100644
--- a/reference/shaders-hlsl/vert/return-array.vert
+++ b/reference/shaders-hlsl/vert/return-array.vert
@@ -15,17 +15,17 @@ struct SPIRV_Cross_Output
     float4 gl_Position : SV_Position;
 };
 
-void test(out float4 SPIRV_Cross_return_value[2])
+void test(out float4 spvReturnValue[2])
 {
-    SPIRV_Cross_return_value = _20;
+    spvReturnValue = _20;
 }
 
-void test2(out float4 SPIRV_Cross_return_value[2])
+void test2(out float4 spvReturnValue[2])
 {
     float4 foobar[2];
     foobar[0] = vInput0;
     foobar[1] = vInput1;
-    SPIRV_Cross_return_value = foobar;
+    spvReturnValue = foobar;
 }
 
 void vert_main()
diff --git a/reference/shaders-msl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/reference/shaders-msl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp
new file mode 100644
index 00000000000..d24b9666fab
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct T
+{
+    float a;
+};
+
+struct T_1
+{
+    float b;
+};
+
+struct SSBO1
+{
+    T_1 foo[1];
+};
+
+struct T_2
+{
+    float c;
+    char _m0_final_padding[12];
+};
+
+struct SSBO2
+{
+    T_2 bar[1];
+};
+
+kernel void main0(device SSBO1& _7 [[buffer(0)]], device SSBO2& _10 [[buffer(1)]])
+{
+    T v = T{ 40.0 };
+    _7.foo[10].b = v.a;
+    _10.bar[30].c = v.a;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp b/reference/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp
new file mode 100644
index 00000000000..1015d2a5eef
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp
@@ -0,0 +1,23 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct SSBO
+{
+    uint a;
+    uint b;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _5 [[buffer(0)]])
+{
+    uint _20 = atomic_load_explicit((device atomic_uint*)&_5.b, memory_order_relaxed);
+    uint c = _20;
+    atomic_store_explicit((device atomic_uint*)&_5.a, c, memory_order_relaxed);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/atomic-min-max-sign.asm.comp b/reference/shaders-msl-no-opt/asm/comp/atomic-min-max-sign.asm.comp
new file mode 100644
index 00000000000..3fdf46bbc5b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/atomic-min-max-sign.asm.comp
@@ -0,0 +1,28 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct SSBO
+{
+    uint a;
+    int b;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _4 [[buffer(0)]])
+{
+    uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&_4.a, 1u, memory_order_relaxed);
+    uint _27 = uint(atomic_fetch_min_explicit((device atomic_int*)&_4.a, int(1u), memory_order_relaxed));
+    uint _28 = atomic_fetch_min_explicit((device atomic_uint*)&_4.a, 4294967295u, memory_order_relaxed);
+    uint _29 = uint(atomic_fetch_max_explicit((device atomic_int*)&_4.a, int(4294967295u), memory_order_relaxed));
+    int _30 = atomic_fetch_max_explicit((device atomic_int*)&_4.b, -3, memory_order_relaxed);
+    int _31 = int(atomic_fetch_min_explicit((device atomic_uint*)&_4.b, uint(-3), memory_order_relaxed));
+    int _32 = atomic_fetch_min_explicit((device atomic_int*)&_4.b, 4, memory_order_relaxed);
+    int _33 = int(atomic_fetch_max_explicit((device atomic_uint*)&_4.b, uint(4), memory_order_relaxed));
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp b/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp
index 8b6694288e6..4624ef0b7c1 100644
--- a/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp
+++ b/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp
@@ -14,7 +14,7 @@ struct SSBO
 
 kernel void main0(device SSBO& _5 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    uint _24 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_5.count, 1u, memory_order_relaxed);
+    uint _24 = atomic_fetch_add_explicit((device atomic_uint*)&_5.count, 1u, memory_order_relaxed);
     if (_24 < 1024u)
     {
         _5.data[_24] = gl_GlobalInvocationID.x;
diff --git a/reference/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp b/reference/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp
new file mode 100644
index 00000000000..0d63f5fa75b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp
@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    half2 a;
+    float b;
+    float c;
+    half2 d;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _4 [[buffer(0)]])
+{
+    _4.b = as_type<float>(_4.a);
+    _4.d = as_type<half2>(_4.c);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/reference/shaders-msl-no-opt/asm/comp/bitfield-signed-operations.asm.comp
new file mode 100644
index 00000000000..8e198a94df8
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/bitfield-signed-operations.asm.comp
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    int4 ints;
+    uint4 uints;
+};
+
+kernel void main0(device SSBO& _3 [[buffer(0)]])
+{
+    int4 _19 = _3.ints;
+    uint4 _20 = _3.uints;
+    _3.ints = popcount(_19);
+    _3.uints = uint4(popcount(_19));
+    _3.ints = int4(popcount(_20));
+    _3.uints = popcount(_20);
+    _3.ints = reverse_bits(_19);
+    _3.uints = reverse_bits(_20);
+    _3.ints = extract_bits(_19, uint(1), 11u);
+    _3.uints = uint4(extract_bits(int4(_20), 11u, uint(1)));
+    _3.ints = int4(extract_bits(uint4(_19), uint(1), 11u));
+    _3.uints = extract_bits(_20, 11u, uint(1));
+    _3.ints = insert_bits(_19, _19.wzyx, uint(1), 11u);
+    _3.uints = insert_bits(_20, _20.wzyx, 11u, uint(1));
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/bitscan.asm.comp b/reference/shaders-msl-no-opt/asm/comp/bitscan.asm.comp
new file mode 100644
index 00000000000..1be65ec7cd4
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/bitscan.asm.comp
@@ -0,0 +1,53 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+// Implementation of the GLSL findLSB() function
+template<typename T>
+inline T spvFindLSB(T x)
+{
+    return select(ctz(x), T(-1), x == T(0));
+}
+
+// Implementation of the signed GLSL findMSB() function
+template<typename T>
+inline T spvFindSMSB(T x)
+{
+    T v = select(x, T(-1) - x, x < T(0));
+    return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));
+}
+
+// Implementation of the unsigned GLSL findMSB() function
+template<typename T>
+inline T spvFindUMSB(T x)
+{
+    return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));
+}
+
+struct SSBO
+{
+    uint4 u;
+    int4 i;
+};
+
+kernel void main0(device SSBO& _4 [[buffer(0)]])
+{
+    uint4 _19 = _4.u;
+    int4 _20 = _4.i;
+    _4.u = spvFindLSB(_19);
+    _4.i = int4(spvFindLSB(_19));
+    _4.u = uint4(spvFindLSB(_20));
+    _4.i = spvFindLSB(_20);
+    _4.u = spvFindUMSB(_19);
+    _4.i = int4(spvFindUMSB(_19));
+    _4.u = spvFindUMSB(uint4(_20));
+    _4.i = int4(spvFindUMSB(uint4(_20)));
+    _4.u = uint4(spvFindSMSB(int4(_19)));
+    _4.i = spvFindSMSB(int4(_19));
+    _4.u = uint4(spvFindSMSB(_20));
+    _4.i = spvFindSMSB(_20);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp b/reference/shaders-msl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp
new file mode 100644
index 00000000000..734a66870b9
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp
@@ -0,0 +1,77 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_CommonConstants
+{
+    uint g_count;
+    packed_uint3 g_padding4;
+};
+
+struct MyStruct
+{
+    float4 m_coefficients[4];
+};
+
+struct type_RWStructuredBuffer_MyStruct
+{
+    MyStruct _m0[1];
+};
+
+constant spvUnsafeArray<float4, 4> _27 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
+
+kernel void main0(constant type_CommonConstants& CommonConstants [[buffer(0)]], device type_RWStructuredBuffer_MyStruct& g_data [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    do
+    {
+        if (gl_GlobalInvocationID.x >= CommonConstants.g_count)
+        {
+            break;
+        }
+        g_data._m0[gl_GlobalInvocationID.x] = MyStruct{ { float4(0.0), float4(0.0), float4(0.0), float4(0.0) } };
+        break;
+    } while(false);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/block-like-array-type-construct.asm.comp b/reference/shaders-msl-no-opt/asm/comp/block-like-array-type-construct.asm.comp
new file mode 100644
index 00000000000..66550535350
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/block-like-array-type-construct.asm.comp
@@ -0,0 +1,77 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _10
+{
+    float _m0[4];
+    float _m1[4];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+struct SSBO
+{
+    uint a;
+    int b;
+};
+
+constant spvUnsafeArray<float, 4> _31 = spvUnsafeArray<float, 4>({ 1.0, 2.0, 3.0, 4.0 });
+
+kernel void main0()
+{
+    spvUnsafeArray<_10, 2> _34 = spvUnsafeArray<_10, 2>({ _10{ { 1.0, 2.0, 3.0, 4.0 }, { 1.0, 2.0, 3.0, 4.0 } }, _10{ { 1.0, 2.0, 3.0, 4.0 }, { 1.0, 2.0, 3.0, 4.0 } } });
+    
+    spvUnsafeArray<float, 4> foo;
+    foo[0] = 1.0;
+    foo = _31;
+    foo[1] = 2.0;
+    foo[2] = 3.0;
+    foo[3] = 4.0;
+    spvUnsafeArray<float, 4> foo2;
+    foo2 = foo;
+    _10 _37 = _10{ { foo[0], foo[1], foo[2], foo[3] }, { foo2[0], foo2[1], foo2[2], foo2[3] } };
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/buffer-device-address-ptr-casting.msl24.asm.comp b/reference/shaders-msl-no-opt/asm/comp/buffer-device-address-ptr-casting.msl24.asm.comp
new file mode 100644
index 00000000000..74464092ef0
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/buffer-device-address-ptr-casting.msl24.asm.comp
@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SomeBuffer;
+
+struct SomeBuffer
+{
+    float4 v;
+    ulong a;
+    uint2 b;
+};
+
+struct Registers
+{
+    ulong address;
+    uint2 address2;
+};
+
+kernel void main0(constant Registers& registers [[buffer(0)]])
+{
+    device SomeBuffer* _44 = reinterpret_cast<device SomeBuffer*>(registers.address);
+    device SomeBuffer* _45 = reinterpret_cast<device SomeBuffer*>(registers.address);
+    device SomeBuffer* _46 = reinterpret_cast<device SomeBuffer*>(as_type<ulong>(registers.address2));
+    _44->v = float4(1.0, 2.0, 3.0, 4.0);
+    _45->v = float4(1.0, 2.0, 3.0, 4.0);
+    _46->v = float4(1.0, 2.0, 3.0, 4.0);
+    _44->a = reinterpret_cast<ulong>(_44);
+    _45->a = reinterpret_cast<ulong>(_45);
+    _46->b = as_type<uint2>(reinterpret_cast<ulong>(_46));
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/composite-construct-buffer-struct.asm.comp b/reference/shaders-msl-no-opt/asm/comp/composite-construct-buffer-struct.asm.comp
new file mode 100644
index 00000000000..2fe09814bf4
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/composite-construct-buffer-struct.asm.comp
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Block
+{
+    uint2 _m0[2];
+    uint2 _m1[2];
+};
+
+struct SSBO
+{
+    Block _m0[3];
+};
+
+kernel void main0(device SSBO& ssbo [[buffer(0)]])
+{
+    threadgroup uint2 _18[2];
+    ssbo._m0[0u] = Block{ { ssbo._m0[0u]._m1[0], ssbo._m0[0u]._m1[1] }, { ssbo._m0[0u]._m1[0], ssbo._m0[0u]._m1[1] } };
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/reference/shaders-msl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp
new file mode 100644
index 00000000000..a5b6fc32ce2
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp
@@ -0,0 +1,61 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct SSBO
+{
+    int values[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 4u, 1u);
+
+constant spvUnsafeArray<int, 4> indexable = spvUnsafeArray<int, 4>({ 0, 1, 2, 3 });
+constant spvUnsafeArray<int, 4> indexable_1 = spvUnsafeArray<int, 4>({ 4, 5, 6, 7 });
+
+kernel void main0(device SSBO& _6 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
+{
+    _6.values[gl_GlobalInvocationID.x] = indexable[gl_LocalInvocationID.x] + indexable_1[gl_LocalInvocationID.y];
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp
new file mode 100644
index 00000000000..09a31d68a85
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp
@@ -0,0 +1,60 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _11
+{
+    float2x2 _m0;
+};
+
+struct _12
+{
+    float2x4 _m0;
+};
+
+struct B2
+{
+    float4 elem2;
+};
+
+struct C
+{
+    float4 c;
+    B2 b2;
+    B2 b2_array[4];
+    _12 _m3;
+};
+
+struct B1
+{
+    float4 elem1;
+};
+
+struct A
+{
+    float4 a;
+    B1 b1;
+    B1 b1_array[4];
+    _11 _m3;
+};
+
+struct _8
+{
+    A a_block;
+    C c_block;
+};
+
+kernel void main0(device _8& _3 [[buffer(0)]])
+{
+    A _31;
+    _31.a = _3.c_block.c;
+    _31.b1.elem1 = _3.c_block.b2.elem2;
+    _31.b1_array[0].elem1 = _3.c_block.b2_array[0].elem2;
+    _31.b1_array[1].elem1 = _3.c_block.b2_array[1].elem2;
+    _31.b1_array[2].elem1 = _3.c_block.b2_array[2].elem2;
+    _31.b1_array[3].elem1 = _3.c_block.b2_array[3].elem2;
+    _31._m3._m0 = transpose(float2x2(_3.c_block._m3._m0[0].xy, _3.c_block._m3._m0[1].xy));
+    _3.a_block = _31;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp
new file mode 100644
index 00000000000..54087ddc511
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp
@@ -0,0 +1,54 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _8
+{
+    char _m0_pad[4];
+    uint _m0;
+};
+
+struct _9
+{
+    char _m0_pad[8];
+    uint _m0;
+};
+
+struct _4
+{
+    uint _m0;
+    uint4 _m1[2];
+    uint _m2;
+    char _m3_pad[12];
+    _8 _m3;
+    float4 _m4;
+    float3 _m5;
+    float2 _m6;
+};
+
+struct _5
+{
+    uint _m0;
+    uint _m1[2];
+    uint _m2;
+    _9 _m3;
+    float4 _m4;
+    float3 _m5;
+    float2 _m6;
+};
+
+kernel void main0(device _5& _2 [[buffer(0)]], device _4& _3 [[buffer(1)]])
+{
+    _4 _23;
+    _23._m0 = _2._m0;
+    (thread uint&)_23._m1[0] = _2._m1[0];
+    (thread uint&)_23._m1[1] = _2._m1[1];
+    _23._m2 = _2._m2;
+    _23._m3._m0 = _2._m3._m0;
+    _23._m4 = _2._m4;
+    _23._m5 = _2._m5;
+    _23._m6 = _2._m6;
+    _3 = _23;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/copy-logical.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/copy-logical.spv14.asm.comp
new file mode 100644
index 00000000000..2225981524a
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/copy-logical.spv14.asm.comp
@@ -0,0 +1,47 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct B2
+{
+    float4 elem2;
+};
+
+struct C
+{
+    float4 c;
+    B2 b2;
+    B2 b2_array[4];
+};
+
+struct B1
+{
+    float4 elem1;
+};
+
+struct A
+{
+    float4 a;
+    B1 b1;
+    B1 b1_array[4];
+};
+
+struct _8
+{
+    A a_block;
+    C c_block;
+};
+
+kernel void main0(device _8& _3 [[buffer(0)]])
+{
+    A _27;
+    _27.a = _3.c_block.c;
+    _27.b1.elem1 = _3.c_block.b2.elem2;
+    _27.b1_array[0].elem1 = _3.c_block.b2_array[0].elem2;
+    _27.b1_array[1].elem1 = _3.c_block.b2_array[1].elem2;
+    _27.b1_array[2].elem1 = _3.c_block.b2_array[2].elem2;
+    _27.b1_array[3].elem1 = _3.c_block.b2_array[3].elem2;
+    _3.a_block = _27;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/device-array-load-temporary.asm.comp b/reference/shaders-msl-no-opt/asm/comp/device-array-load-temporary.asm.comp
new file mode 100644
index 00000000000..b024b5539d7
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/device-array-load-temporary.asm.comp
@@ -0,0 +1,174 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+struct Block
+{
+    uint2 _m0[2];
+    uint2 _m1[2];
+};
+
+struct SSBO
+{
+    Block _m0[3];
+};
+
+kernel void main0(device SSBO& ssbo [[buffer(0)]])
+{
+    threadgroup uint2 _18[2];
+    spvUnsafeArray<uint2, 2> _27;
+    spvArrayCopyFromDeviceToStack1(_27.elements, ssbo._m0[0u]._m1);
+    spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27.elements);
+    spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27.elements);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.comp b/reference/shaders-msl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.comp
new file mode 100644
index 00000000000..a029a283d46
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.comp
@@ -0,0 +1,135 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+struct Block
+{
+    uint2 _m0[2];
+    uint2 _m1[2];
+};
+
+struct SSBO
+{
+    Block _m0[3];
+};
+
+kernel void main0(device SSBO& ssbo [[buffer(0)]])
+{
+    threadgroup uint2 _18[2];
+    uint2 _27[2];
+    spvArrayCopyFromDeviceToStack1(_27, ssbo._m0[0u]._m1);
+    spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27);
+    spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _27);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.asm.comp b/reference/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.asm.comp
new file mode 100644
index 00000000000..3ebc0d91284
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.asm.comp
@@ -0,0 +1,179 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+struct Block
+{
+    uint2 _m0[2];
+    uint2 _m1[2];
+};
+
+struct SSBO
+{
+    Block _m0[3];
+};
+
+kernel void main0(device SSBO& ssbo [[buffer(0)]], constant SSBO& ubo [[buffer(1)]])
+{
+    threadgroup uint2 _18[2];
+    spvArrayCopyFromDeviceToDevice1(ssbo._m0[0u]._m0, ssbo._m0[0u]._m1);
+    spvArrayCopyFromConstantToDevice1(ssbo._m0[0u]._m0, ubo._m0[0u]._m1);
+    spvUnsafeArray<uint2, 2> _24;
+    spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _24.elements);
+    spvArrayCopyFromThreadGroupToDevice1(ssbo._m0[0u]._m0, _18);
+    spvArrayCopyFromDeviceToThreadGroup1(_18, ssbo._m0[0u]._m1);
+    spvArrayCopyFromDeviceToStack1(_24.elements, ssbo._m0[0u]._m1);
+    spvArrayCopyFromConstantToThreadGroup1(_18, ubo._m0[0u]._m1);
+    spvArrayCopyFromConstantToStack1(_24.elements, ubo._m0[0u]._m1);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.comp b/reference/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.comp
new file mode 100644
index 00000000000..6f63d36e6da
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.comp
@@ -0,0 +1,140 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+struct Block
+{
+    uint2 _m0[2];
+    uint2 _m1[2];
+};
+
+struct SSBO
+{
+    Block _m0[3];
+};
+
+kernel void main0(device SSBO& ssbo [[buffer(0)]], constant SSBO& ubo [[buffer(1)]])
+{
+    threadgroup uint2 _18[2];
+    spvArrayCopyFromDeviceToDevice1(ssbo._m0[0u]._m0, ssbo._m0[0u]._m1);
+    spvArrayCopyFromConstantToDevice1(ssbo._m0[0u]._m0, ubo._m0[0u]._m1);
+    uint2 _24[2];
+    spvArrayCopyFromStackToDevice1(ssbo._m0[0u]._m0, _24);
+    spvArrayCopyFromThreadGroupToDevice1(ssbo._m0[0u]._m0, _18);
+    spvArrayCopyFromDeviceToThreadGroup1(_18, ssbo._m0[0u]._m1);
+    spvArrayCopyFromDeviceToStack1(_24, ssbo._m0[0u]._m1);
+    spvArrayCopyFromConstantToThreadGroup1(_18, ubo._m0[0u]._m1);
+    spvArrayCopyFromConstantToStack1(_24, ubo._m0[0u]._m1);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp
new file mode 100644
index 00000000000..e265f1bd976
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
+
+struct UBO
+{
+    float v;
+};
+
+struct SSBO
+{
+    float v;
+};
+
+kernel void main0()
+{
+    threadgroup float w;
+    float v;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/glsl-signed-operations.asm.comp b/reference/shaders-msl-no-opt/asm/comp/glsl-signed-operations.asm.comp
index e6682c513e1..ab7a3363270 100644
--- a/reference/shaders-msl-no-opt/asm/comp/glsl-signed-operations.asm.comp
+++ b/reference/shaders-msl-no-opt/asm/comp/glsl-signed-operations.asm.comp
@@ -5,15 +5,9 @@
 
 using namespace metal;
 
-struct SSBO
-{
-    int4 ints;
-    uint4 uints;
-};
-
 // Implementation of the signed GLSL findMSB() function
 template<typename T>
-T findSMSB(T x)
+inline T spvFindSMSB(T x)
 {
     T v = select(x, T(-1) - x, x < T(0));
     return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));
@@ -21,18 +15,24 @@ T findSMSB(T x)
 
 // Implementation of the unsigned GLSL findMSB() function
 template<typename T>
-T findUMSB(T x)
+inline T spvFindUMSB(T x)
 {
     return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));
 }
 
 // Implementation of the GLSL sign() function for integer types
 template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>
-T sign(T x)
+inline T sign(T x)
 {
     return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));
 }
 
+struct SSBO
+{
+    int4 ints;
+    uint4 uints;
+};
+
 kernel void main0(device SSBO& _4 [[buffer(0)]])
 {
     int4 _19 = _4.ints;
@@ -45,10 +45,10 @@ kernel void main0(device SSBO& _4 [[buffer(0)]])
     _4.uints = uint4(sign(_19));
     _4.ints = sign(int4(_20));
     _4.uints = uint4(sign(int4(_20)));
-    _4.ints = findSMSB(int4(_20));
-    _4.uints = uint4(findSMSB(int4(_20)));
-    _4.ints = int4(findUMSB(uint4(_19)));
-    _4.uints = findUMSB(uint4(_19));
+    _4.ints = spvFindSMSB(int4(_20));
+    _4.uints = uint4(spvFindSMSB(int4(_20)));
+    _4.ints = int4(spvFindUMSB(uint4(_19)));
+    _4.uints = spvFindUMSB(uint4(_19));
     _4.ints = min(_19, _19);
     _4.uints = uint4(min(_19, int4(_20)));
     _4.ints = min(int4(_20), int4(_20));
diff --git a/reference/shaders-msl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp b/reference/shaders-msl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp
new file mode 100644
index 00000000000..0063faceaa0
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _8
+{
+    float _m0;
+    float _m1;
+};
+
+struct _15
+{
+    float _m0;
+    int _m1;
+};
+
+struct _3
+{
+    float _m0;
+    int _m1;
+};
+
+kernel void main0(device _3& _4 [[buffer(0)]])
+{
+    _8 _23;
+    _23._m0 = modf(20.0, _23._m1);
+    _15 _24;
+    _24._m0 = frexp(40.0, _24._m1);
+    _4._m0 = _23._m0;
+    _4._m0 = _23._m1;
+    _4._m0 = _24._m0;
+    _4._m1 = _24._m1;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/local-size-id-override.asm.comp b/reference/shaders-msl-no-opt/asm/comp/local-size-id-override.asm.comp
new file mode 100644
index 00000000000..365f89f74f5
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/local-size-id-override.asm.comp
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 values[1];
+};
+
+constant uint _10_tmp [[function_constant(1)]];
+constant uint _10 = is_function_constant_defined(_10_tmp) ? _10_tmp : 11u;
+constant uint _11_tmp [[function_constant(2)]];
+constant uint _11 = is_function_constant_defined(_11_tmp) ? _11_tmp : 12u;
+constant uint _4_tmp [[function_constant(3)]];
+constant uint _4 = is_function_constant_defined(_4_tmp) ? _4_tmp : 13u;
+constant uint _5_tmp [[function_constant(4)]];
+constant uint _5 = is_function_constant_defined(_5_tmp) ? _5_tmp : 14u;
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, _10, _11);
+
+kernel void main0(device SSBO& _8 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    _8.values[gl_GlobalInvocationID.x] += float4(2.0);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/local-size-id.asm.comp b/reference/shaders-msl-no-opt/asm/comp/local-size-id.asm.comp
new file mode 100644
index 00000000000..2dcff36923c
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/local-size-id.asm.comp
@@ -0,0 +1,26 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 values[1];
+};
+
+constant int _10_tmp [[function_constant(1)]];
+constant int _10 = is_function_constant_defined(_10_tmp) ? _10_tmp : 11;
+constant int _11_tmp [[function_constant(2)]];
+constant int _11 = is_function_constant_defined(_11_tmp) ? _11_tmp : 12;
+constant int _4_tmp [[function_constant(3)]];
+constant int _4 = is_function_constant_defined(_4_tmp) ? _4_tmp : 13;
+constant int _5_tmp [[function_constant(4)]];
+constant int _5 = is_function_constant_defined(_5_tmp) ? _5_tmp : 14;
+constant uint _29 = (uint(_4) + 3u);
+constant uint3 _30 = uint3(_29, _5, 2u);
+
+kernel void main0(device SSBO& _8 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    _8.values[gl_GlobalInvocationID.x] = ((((_8.values[gl_GlobalInvocationID.x] + float4(2.0)) + float3(_30).xyzz) * float(_4)) * float(_5)) * float(int(2u));
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/modf-storage-class.asm.comp b/reference/shaders-msl-no-opt/asm/comp/modf-storage-class.asm.comp
new file mode 100644
index 00000000000..3c00707f2e2
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/modf-storage-class.asm.comp
@@ -0,0 +1,44 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _17
+{
+    float2 _m0;
+    float2 _m1;
+};
+
+struct _4
+{
+    uint2 _m0[324];
+};
+
+struct _7
+{
+    float2 _m0[648];
+};
+
+struct _10
+{
+    float2 _m0[648];
+};
+
+kernel void main0(const device _4& _5 [[buffer(0)]], device _7& _8 [[buffer(1)]], device _10& _11 [[buffer(2)]])
+{
+    for (uint _39 = 0u; _39 < 648u; _39 += 2u)
+    {
+        uint2 _40 = _5._m0[_39 / 2u];
+        float2 _41 = as_type<float2>(_40);
+        float2 _76;
+        float2 _61 = modf(_41, _76);
+        _8._m0[_39] = _76;
+        _8._m0[_39 + 1u] = _61;
+        _17 _64;
+        _64._m0 = modf(_41, _64._m1);
+        _17 _42 = _64;
+        _11._m0[_39] = _42._m1;
+        _11._m0[_39 + 1u] = _42._m0;
+    }
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/opptrdiff-basic.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/opptrdiff-basic.spv14.asm.comp
new file mode 100644
index 00000000000..2a8b59f0b47
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/opptrdiff-basic.spv14.asm.comp
@@ -0,0 +1,51 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _7
+{
+    int _m0[1][4];
+};
+
+struct _9
+{
+    int _m0[1][17];
+};
+
+struct _11
+{
+    int _m0;
+};
+
+kernel void main0(device _7& _2 [[buffer(0)]], device _9& _3 [[buffer(1)]], constant _11& _4 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
+{
+    if (int3(gl_WorkGroupID).x >= _4._m0)
+    {
+        return;
+    }
+    int _49;
+    if (int3(gl_LocalInvocationID).x == 1)
+    {
+        _3._m0[int3(gl_WorkGroupID).x][16] = &_2._m0[int3(gl_WorkGroupID).x] - &_2._m0[0];
+        _49 = 0;
+    }
+    else
+    {
+        _49 = 0;
+    }
+    for (;;)
+    {
+        int _50 = _49 + 1;
+        _3._m0[int3(gl_WorkGroupID).x][(int3(gl_LocalInvocationID).x * 4) + _49] = &_2._m0[int3(gl_WorkGroupID).x][int3(gl_LocalInvocationID).x] - &_2._m0[int3(gl_WorkGroupID).x][_49];
+        if (_50 == 4)
+        {
+            break;
+        }
+        else
+        {
+            _49 = _50;
+        }
+    }
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp
new file mode 100644
index 00000000000..69e76f3f38b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp
@@ -0,0 +1,45 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _7
+{
+    int _m0;
+    int _m1[1];
+};
+
+struct _9
+{
+    int2 _m0[1];
+};
+
+kernel void main0(device _7& _2 [[buffer(0)]], device _9& _3 [[buffer(1)]])
+{
+    int _28 = _2._m0;
+    device int* _4 = &_2._m1[0];
+    device int* _5 = &_2._m1[0 + _28];
+    int _34;
+    if (!(_28 <= 0))
+    {
+        _34 = 0;
+        for (;;)
+        {
+            device int* _36 = _4;
+            device int* _37 = _5;
+            int _35 = _34 + 1;
+            _4 = &_36[1];
+            _5 = &_37[-1];
+            _3._m0[_34] = int2(_36 - _37, _37 - _36);
+            if (_34 >= _28)
+            {
+                break;
+            }
+            else
+            {
+                _34 = _35;
+            }
+        }
+    }
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp
new file mode 100644
index 00000000000..52916413e55
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp
@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _7
+{
+    uint _m0[1];
+};
+
+kernel void main0(device _7& _2 [[buffer(0)]], device _7& _3 [[buffer(1)]], device _7& _4 [[buffer(2)]], device _7& _5 [[buffer(3)]])
+{
+    uint _18 = 0u;
+    uint _28 = _18 + 1u;
+    _5._m0[_18] = uint(&_2 == &_3);
+    uint _32 = _28 + 1u;
+    _5._m0[_28] = uint(&_2._m0 == &_3._m0);
+    uint _36 = _32 + 1u;
+    _5._m0[_32] = uint(&_2._m0[0u] == &_3._m0[0u]);
+    uint _40 = _36 + 1u;
+    _5._m0[_36] = uint(&_2 == &_4);
+    uint _44 = _40 + 1u;
+    _5._m0[_40] = uint(&_2._m0 == &_4._m0);
+    uint _48 = _44 + 1u;
+    _5._m0[_44] = uint(&_2._m0[0u] == &_4._m0[0u]);
+    uint _52 = _48 + 1u;
+    _5._m0[_48] = uint(&_3 == &_4);
+    uint _56 = _52 + 1u;
+    _5._m0[_52] = uint(&_3._m0 == &_4._m0);
+    _5._m0[_56] = uint(&_3._m0[0u] == &_4._m0[0u]);
+    _5._m0[_56 + 1u] = uint(&_2 == &_2);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp
new file mode 100644
index 00000000000..16d29c1d6c7
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp
@@ -0,0 +1,37 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _6
+{
+    float4x4 _m0;
+    float4x4 _m1;
+    float _m2;
+    float _m3;
+};
+
+struct _7
+{
+    uint _m0[1];
+};
+
+kernel void main0(device _6& _2 [[buffer(0)]], device _6& _3 [[buffer(1)]], device _7& _4 [[buffer(2)]])
+{
+    uint _26 = 0u;
+    uint _39 = _26 + 1u;
+    _4._m0[_26] = (&_2._m2 == &_2._m3) ? 0u : 1u;
+    bool _40 = &_2._m2 == &_3._m2;
+    uint _43 = _39 + 1u;
+    _4._m0[_39] = _40 ? 0u : 1u;
+    bool _46 = (_40 ? &_2._m2 : &_2._m3) == (_40 ? &_3._m2 : &_3._m3);
+    uint _49 = _43 + 1u;
+    _4._m0[_43] = _46 ? 0u : 1u;
+    uint _54 = _49 + 1u;
+    _4._m0[_49] = ((_46 ? &_2._m2 : &_2._m3) == &((device float*)&_2._m0[0u])[0u]) ? 0u : 1u;
+    uint _56 = (&_2._m0 == &_2._m1) ? 0u : 1u;
+    uint _58 = _54 + 1u;
+    _4._m0[_54] = _56;
+    _4._m0[_58] = _56;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp b/reference/shaders-msl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp
new file mode 100644
index 00000000000..d9af203553e
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp
@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _7
+{
+    uint _m0[1];
+};
+
+kernel void main0(device _7& _2 [[buffer(0)]], device _7& _3 [[buffer(1)]], device _7& _4 [[buffer(2)]], device _7& _5 [[buffer(3)]])
+{
+    uint _18 = 0u;
+    uint _28 = _18 + 1u;
+    _5._m0[_18] = uint(&_2 != &_3);
+    uint _32 = _28 + 1u;
+    _5._m0[_28] = uint(&_2._m0 != &_3._m0);
+    uint _36 = _32 + 1u;
+    _5._m0[_32] = uint(&_2._m0[0u] != &_3._m0[0u]);
+    uint _40 = _36 + 1u;
+    _5._m0[_36] = uint(&_2 != &_4);
+    uint _44 = _40 + 1u;
+    _5._m0[_40] = uint(&_2._m0 != &_4._m0);
+    uint _48 = _44 + 1u;
+    _5._m0[_44] = uint(&_2._m0[0u] != &_4._m0[0u]);
+    uint _52 = _48 + 1u;
+    _5._m0[_48] = uint(&_3 != &_4);
+    uint _56 = _52 + 1u;
+    _5._m0[_52] = uint(&_3._m0 != &_4._m0);
+    _5._m0[_56] = uint(&_3._m0[0u] != &_4._m0[0u]);
+    _5._m0[_56 + 1u] = uint(&_2 != &_2);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp b/reference/shaders-msl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp
new file mode 100644
index 00000000000..dda85050991
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    int values[1];
+};
+
+constant int A_tmp [[function_constant(0)]];
+constant int A = is_function_constant_defined(A_tmp) ? A_tmp : 0;
+constant int A_1_tmp [[function_constant(1)]];
+constant int A_1 = is_function_constant_defined(A_1_tmp) ? A_1_tmp : 1;
+constant int A_2_tmp [[function_constant(2)]];
+constant int A_2 = is_function_constant_defined(A_2_tmp) ? A_2_tmp : 2;
+constant int A_3_tmp [[function_constant(3)]];
+constant int A_3 = is_function_constant_defined(A_3_tmp) ? A_3_tmp : 3;
+constant int A_4_tmp [[function_constant(4)]];
+constant int A_4 = is_function_constant_defined(A_4_tmp) ? A_4_tmp : 4;
+constant int A_5_tmp [[function_constant(5)]];
+constant int A_5 = is_function_constant_defined(A_5_tmp) ? A_5_tmp : 5;
+constant int A_6 = (A - A_1);
+constant int A_7 = (A_6 - A_2);
+constant int A_8 = (A_7 - A_3);
+constant int A_9 = (A_8 - A_4);
+constant int A_10 = (A_9 - A_5);
+constant int A_11 = (A_10 + A_5);
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _5 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    _5.values[gl_GlobalInvocationID.x] = A_11;
+}
+
diff --git a/reference/opt/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp b/reference/shaders-msl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp
similarity index 100%
rename from reference/opt/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp
rename to reference/shaders-msl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp
index 473298c2741..5b1ed8ae243 100644
--- a/reference/opt/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp
+++ b/reference/shaders-msl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp
@@ -16,7 +16,7 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(_3, 2u, _4);
 
 kernel void main0(device _6& _8 [[buffer(0)]], device _6& _9 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
 {
-    _8._m0[gl_WorkGroupID.x] = _9._m0[gl_WorkGroupID.x] + _8._m0[gl_WorkGroupID.x];
     uint3 _23 = gl_WorkGroupSize;
+    _8._m0[gl_WorkGroupID.x] = _9._m0[gl_WorkGroupID.x] + _8._m0[gl_WorkGroupID.x];
 }
 
diff --git a/reference/shaders-msl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp b/reference/shaders-msl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp
index ec40c6afc13..6e9768540b7 100644
--- a/reference/shaders-msl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp
+++ b/reference/shaders-msl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp
@@ -15,6 +15,7 @@ struct SSBORead
     float b;
 };
 
+static inline __attribute__((always_inline))
 void copy_out(device float& A, device const float& B)
 {
     A = B;
diff --git a/reference/shaders-msl-no-opt/asm/comp/variable-pointers.asm.comp b/reference/shaders-msl-no-opt/asm/comp/variable-pointers.asm.comp
index 37731c720ec..7c9718d1c95 100644
--- a/reference/shaders-msl-no-opt/asm/comp/variable-pointers.asm.comp
+++ b/reference/shaders-msl-no-opt/asm/comp/variable-pointers.asm.comp
@@ -22,16 +22,19 @@ struct baz
     int e[128];
 };
 
+static inline __attribute__((always_inline))
 device int* select_buffer(device foo& buf, device baz& buf2, constant bar& cb)
 {
     return (cb.d != 0) ? &buf.a[0u] : &buf2.e[0u];
 }
 
+static inline __attribute__((always_inline))
 device int* select_buffer_null(device foo& buf, constant bar& cb)
 {
     return (cb.d != 0) ? &buf.a[0u] : nullptr;
 }
 
+static inline __attribute__((always_inline))
 threadgroup int* select_tgsm(constant bar& cb, threadgroup int (&tgsm)[128])
 {
     return (cb.d != 0) ? &tgsm[0u] : nullptr;
diff --git a/reference/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.argument.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.argument.msl2.asm.frag
new file mode 100644
index 00000000000..a7d3550a7aa
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.argument.msl2.asm.frag
@@ -0,0 +1,27 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBOs
+{
+    float4 v;
+};
+
+struct spvDescriptorSetBuffer0
+{
+    constant UBOs* ubos [[id(0)]][2];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = spvDescriptorSet0.ubos[0]->v + spvDescriptorSet0.ubos[1]->v;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.asm.frag b/reference/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.asm.frag
new file mode 100644
index 00000000000..b6fe72b8d1a
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.asm.frag
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBOs
+{
+    float4 v;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(constant UBOs* ubos_0 [[buffer(0)]], constant UBOs* ubos_1 [[buffer(1)]])
+{
+    constant UBOs* ubos[] =
+    {
+        ubos_0,
+        ubos_1,
+    };
+
+    main0_out out = {};
+    out.FragColor = ubos[0]->v + ubos[1]->v;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag b/reference/shaders-msl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag
new file mode 100644
index 00000000000..e4397f828fa
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag
@@ -0,0 +1,85 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct anon_aa
+{
+    int foo;
+};
+
+struct anon_ab
+{
+    int foo;
+};
+
+struct anon_a
+{
+    anon_aa _aa;
+    anon_ab ab;
+};
+
+struct anon_ba
+{
+    int foo;
+};
+
+struct anon_bb
+{
+    int foo;
+};
+
+struct anon_b
+{
+    anon_ba _ba;
+    anon_bb bb;
+};
+
+struct VertexData
+{
+    anon_a _a;
+    anon_b b;
+};
+
+struct anon_ca
+{
+    int foo;
+};
+
+struct anon_c
+{
+    anon_ca _ca;
+};
+
+struct anon_da
+{
+    int foo;
+};
+
+struct anon_d
+{
+    anon_da da;
+};
+
+struct UBO
+{
+    anon_c _c;
+    anon_d d;
+};
+
+struct anon_e
+{
+    int a;
+};
+
+struct SSBO
+{
+    anon_e _m0;
+    anon_e _e;
+    anon_e f;
+};
+
+fragment void main0()
+{
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/component-insert-packed-expression.asm.frag b/reference/shaders-msl-no-opt/asm/frag/component-insert-packed-expression.asm.frag
new file mode 100644
index 00000000000..2da91dac7de
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/component-insert-packed-expression.asm.frag
@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_Globals
+{
+    float4 _BorderWidths[4];
+};
+
+struct main0_out
+{
+    float4 out_var_SV_Target [[color(0)]];
+};
+
+fragment main0_out main0(constant type_Globals& _Globals [[buffer(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    float2 _31 = float2(_Globals._BorderWidths[0].x, _Globals._BorderWidths[1].x);
+    float2 _39;
+    if (gl_FragCoord.x > 0.0)
+    {
+        float2 _38 = _31;
+        _38.x = _Globals._BorderWidths[2].x;
+        _39 = _38;
+    }
+    else
+    {
+        _39 = _31;
+    }
+    out.out_var_SV_Target = float4(_39, 0.0, 1.0);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/composite-insert-inheritance.asm.frag b/reference/shaders-msl-no-opt/asm/frag/composite-insert-inheritance.asm.frag
new file mode 100644
index 00000000000..a4bb56283a1
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/composite-insert-inheritance.asm.frag
@@ -0,0 +1,121 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant float4 _32 = {};
+
+constant spvUnsafeArray<float4, 2> _34 = spvUnsafeArray<float4, 2>({ float4(0.0), float4(0.0) });
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vInput [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float4 _37 = in.vInput;
+    float4 _38 = _37;
+    _38.x = 1.0;
+    _38.y = 2.0;
+    _38.z = 3.0;
+    _38.w = 4.0;
+    out.FragColor = _38;
+    float4 _6 = _37;
+    _6.x = 1.0;
+    _6.y = 2.0;
+    _6.z = 3.0;
+    _6.w = 4.0;
+    out.FragColor = _6;
+    float4 _42 = _37;
+    _42.x = 1.0;
+    _42.y = 2.0;
+    _42.z = 3.0;
+    _42.w = 4.0;
+    out.FragColor = _42;
+    float4 _44 = _37;
+    _44.x = 1.0;
+    float4 _45 = _44;
+    _45.y = 2.0;
+    float4 _46 = _45;
+    _46.z = 3.0;
+    float4 _47 = _46;
+    _47.w = 4.0;
+    out.FragColor = _47 + _44;
+    out.FragColor = _47 + _45;
+    float4 _49;
+    _49.x = 1.0;
+    _49.y = 2.0;
+    _49.z = 3.0;
+    _49.w = 4.0;
+    out.FragColor = _49;
+    float4 _53 = float4(0.0);
+    _53.x = 1.0;
+    out.FragColor = _53;
+    spvUnsafeArray<float4, 2> _54 = _34;
+    _54[1].z = 1.0;
+    _54[0].w = 2.0;
+    out.FragColor = _54[0];
+    out.FragColor = _54[1];
+    float4x4 _58 = float4x4(float4(0.0), float4(0.0), float4(0.0), float4(0.0));
+    _58[1].z = 1.0;
+    _58[2].w = 2.0;
+    out.FragColor = _58[0];
+    out.FragColor = _58[1];
+    out.FragColor = _58[2];
+    out.FragColor = _58[3];
+    float4 PHI;
+    PHI = _46;
+    float4 _65 = PHI;
+    _65.w = 4.0;
+    out.FragColor = _65;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/constant-composite-block-no-array-stride-2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/constant-composite-block-no-array-stride-2.asm.frag
new file mode 100644
index 00000000000..eb78db53672
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/constant-composite-block-no-array-stride-2.asm.frag
@@ -0,0 +1,70 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _3
+{
+    spvUnsafeArray<float, 2> _m0;
+    float _m1[2];
+    spvUnsafeArray<float, 2> _m2;
+};
+
+constant spvUnsafeArray<float, 2> _15 = spvUnsafeArray<float, 2>({ 1.0, 2.0 });
+constant spvUnsafeArray<float, 2> _16 = spvUnsafeArray<float, 2>({ 3.0, 4.0 });
+constant spvUnsafeArray<float, 2> _17 = spvUnsafeArray<float, 2>({ 5.0, 6.0 });
+
+struct main0_out
+{
+    float m_2 [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    _3 _23 = _3{ spvUnsafeArray<float, 2>({ 1.0, 2.0 }), { 3.0, 4.0 }, spvUnsafeArray<float, 2>({ 5.0, 6.0 }) };
+    out.m_2 = 1.0;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag b/reference/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag
new file mode 100644
index 00000000000..faa528bc327
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag
@@ -0,0 +1,170 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+struct _3
+{
+    float _m0[4];
+};
+
+fragment void main0()
+{
+    spvUnsafeArray<float, 4> _20;
+    _20[0u] = 0.0;
+    _20[1u] = 0.0;
+    _20[2u] = 0.0;
+    _20[3u] = 0.0;
+    _3 _19;
+    spvArrayCopyFromStackToStack1(_19._m0, _20.elements);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/empty-struct-in-struct.asm.frag b/reference/shaders-msl-no-opt/asm/frag/empty-struct-in-struct.asm.frag
new file mode 100644
index 00000000000..fdf4a92b993
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/empty-struct-in-struct.asm.frag
@@ -0,0 +1,35 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct EmptyStructTest
+{
+};
+struct EmptyStruct2Test
+{
+    EmptyStructTest _m0;
+};
+
+static inline __attribute__((always_inline))
+float GetValue(thread const EmptyStruct2Test& self)
+{
+    return 0.0;
+}
+
+static inline __attribute__((always_inline))
+float GetValue_1(EmptyStruct2Test self)
+{
+    return 0.0;
+}
+
+fragment void main0()
+{
+    EmptyStruct2Test emptyStruct;
+    float value = GetValue(emptyStruct);
+    value = GetValue_1(EmptyStruct2Test{ EmptyStructTest{  } });
+    value = GetValue_1(EmptyStruct2Test{ { } });
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/fixup-entry-point-identifier.nomain.asm.frag b/reference/shaders-msl-no-opt/asm/frag/fixup-entry-point-identifier.nomain.asm.frag
new file mode 100644
index 00000000000..9a5e195b488
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/fixup-entry-point-identifier.nomain.asm.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _5ma_in_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment _5ma_in_out _5ma_in()
+{
+    _5ma_in_out out = {};
+    out.FragColor = float4(1.0);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/image-gather.asm.frag b/reference/shaders-msl-no-opt/asm/frag/image-gather.asm.frag
new file mode 100644
index 00000000000..47253429a84
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/image-gather.asm.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 in_var_TEXCOORD0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> g_texture [[texture(0)]], sampler g_sampler [[sampler(0)]], sampler g_comp [[sampler(1)]])
+{
+    main0_out out = {};
+    out.out_var_SV_Target0 = g_texture.gather(g_sampler, in.in_var_TEXCOORD0, int2(0), component::x) * g_texture.gather(g_sampler, in.in_var_TEXCOORD0, int2(0), component::y);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/in_block_with_aliased_struct_and_name.asm.frag b/reference/shaders-msl-no-opt/asm/frag/in_block_with_aliased_struct_and_name.asm.frag
new file mode 100644
index 00000000000..daeccaedc6b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/in_block_with_aliased_struct_and_name.asm.frag
@@ -0,0 +1,105 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float ALIAS_0_a [[user(locn1)]];
+    float ALIAS_0_b [[user(locn2)]];
+    float ALIAS_1_a [[user(locn3)]];
+    float ALIAS_1_b [[user(locn4)]];
+    float ALIAS_2_a [[user(locn5)]];
+    float ALIAS_2_b [[user(locn6)]];
+    float ALIAS_3_a [[user(locn7)]];
+    float ALIAS_3_b [[user(locn8)]];
+    float ALIAS_1_0_a [[user(locn10)]];
+    float ALIAS_1_0_b [[user(locn11)]];
+    float ALIAS_1_1_a [[user(locn12)]];
+    float ALIAS_1_1_b [[user(locn13)]];
+    float ALIAS_1_2_a [[user(locn14)]];
+    float ALIAS_1_2_b [[user(locn15)]];
+    float ALIAS_1_3_a [[user(locn16)]];
+    float ALIAS_1_3_b [[user(locn17)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<Foo, 4> ALIAS = {};
+    spvUnsafeArray<Foo, 4> ALIAS_1 = {};
+    ALIAS[0].a = in.ALIAS_0_a;
+    ALIAS[0].b = in.ALIAS_0_b;
+    ALIAS[1].a = in.ALIAS_1_a;
+    ALIAS[1].b = in.ALIAS_1_b;
+    ALIAS[2].a = in.ALIAS_2_a;
+    ALIAS[2].b = in.ALIAS_2_b;
+    ALIAS[3].a = in.ALIAS_3_a;
+    ALIAS[3].b = in.ALIAS_3_b;
+    ALIAS_1[0].a = in.ALIAS_1_0_a;
+    ALIAS_1[0].b = in.ALIAS_1_0_b;
+    ALIAS_1[1].a = in.ALIAS_1_1_a;
+    ALIAS_1[1].b = in.ALIAS_1_1_b;
+    ALIAS_1[2].a = in.ALIAS_1_2_a;
+    ALIAS_1[2].b = in.ALIAS_1_2_b;
+    ALIAS_1[3].a = in.ALIAS_1_3_a;
+    ALIAS_1[3].b = in.ALIAS_1_3_b;
+    out.FragColor.x = ALIAS[0].a;
+    out.FragColor.y = ALIAS[1].b;
+    out.FragColor.z = ALIAS[2].a;
+    out.FragColor.w = ALIAS_1[3].b;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag b/reference/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
deleted file mode 100644
index 1af9edc351c..00000000000
--- a/reference/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
+++ /dev/null
@@ -1,236 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct VertexOutput
-{
-    float4 HPosition;
-    float4 Uv_EdgeDistance1;
-    float4 UvStuds_EdgeDistance2;
-    float4 Color;
-    float4 LightPosition_Fog;
-    float4 View_Depth;
-    float4 Normal_SpecPower;
-    float3 Tangent;
-    float4 PosLightSpace_Reflectance;
-    float studIndex;
-};
-
-struct Surface
-{
-    float3 albedo;
-    float3 normal;
-    float specular;
-    float gloss;
-    float reflectance;
-    float opacity;
-};
-
-struct SurfaceInput
-{
-    float4 Color;
-    float2 Uv;
-    float2 UvStuds;
-};
-
-struct Globals
-{
-    float4x4 ViewProjection;
-    float4 ViewRight;
-    float4 ViewUp;
-    float4 ViewDir;
-    float3 CameraPosition;
-    float3 AmbientColor;
-    float3 Lamp0Color;
-    float3 Lamp0Dir;
-    float3 Lamp1Color;
-    float4 FogParams;
-    float3 FogColor;
-    float4 LightBorder;
-    float4 LightConfig0;
-    float4 LightConfig1;
-    float4 LightConfig2;
-    float4 LightConfig3;
-    float4 RefractionBias_FadeDistance_GlowFactor;
-    float4 OutlineBrightness_ShadowInfo;
-    float4 ShadowMatrix0;
-    float4 ShadowMatrix1;
-    float4 ShadowMatrix2;
-};
-
-struct CB0
-{
-    Globals CB0;
-};
-
-struct Params
-{
-    float4 LqmatFarTilingFactor;
-};
-
-struct CB2
-{
-    Params CB2;
-};
-
-constant VertexOutput _121 = {};
-constant SurfaceInput _122 = {};
-constant float2 _123 = {};
-constant float4 _124 = {};
-constant Surface _125 = {};
-constant float4 _192 = {};
-constant float4 _219 = {};
-constant float4 _297 = {};
-
-struct main0_out
-{
-    float4 _entryPointOutput [[color(0)]];
-};
-
-struct main0_in
-{
-    float4 IN_Uv_EdgeDistance1 [[user(locn0)]];
-    float4 IN_UvStuds_EdgeDistance2 [[user(locn1)]];
-    float4 IN_Color [[user(locn2)]];
-    float4 IN_LightPosition_Fog [[user(locn3)]];
-    float4 IN_View_Depth [[user(locn4)]];
-    float4 IN_Normal_SpecPower [[user(locn5)]];
-    float3 IN_Tangent [[user(locn6)]];
-    float4 IN_PosLightSpace_Reflectance [[user(locn7)]];
-    float IN_studIndex [[user(locn8)]];
-};
-
-fragment main0_out main0(main0_in in [[stage_in]], constant CB0& _19 [[buffer(0)]], texture3d<float> LightMapTexture [[texture(0)]], texture2d<float> ShadowMapTexture [[texture(1)]], texturecube<float> EnvironmentMapTexture [[texture(2)]], texture2d<float> DiffuseMapTexture [[texture(3)]], texture2d<float> NormalMapTexture [[texture(4)]], texture2d<float> NormalDetailMapTexture [[texture(5)]], texture2d<float> StudsMapTexture [[texture(6)]], texture2d<float> SpecularMapTexture [[texture(7)]], sampler LightMapSampler [[sampler(0)]], sampler ShadowMapSampler [[sampler(1)]], sampler EnvironmentMapSampler [[sampler(2)]], sampler DiffuseMapSampler [[sampler(3)]], sampler NormalMapSampler [[sampler(4)]], sampler NormalDetailMapSampler [[sampler(5)]], sampler StudsMapSampler [[sampler(6)]], sampler SpecularMapSampler [[sampler(7)]], float4 gl_FragCoord [[position]])
-{
-    main0_out out = {};
-    VertexOutput _128 = _121;
-    _128.HPosition = gl_FragCoord;
-    VertexOutput _130 = _128;
-    _130.Uv_EdgeDistance1 = in.IN_Uv_EdgeDistance1;
-    VertexOutput _132 = _130;
-    _132.UvStuds_EdgeDistance2 = in.IN_UvStuds_EdgeDistance2;
-    VertexOutput _134 = _132;
-    _134.Color = in.IN_Color;
-    VertexOutput _136 = _134;
-    _136.LightPosition_Fog = in.IN_LightPosition_Fog;
-    VertexOutput _138 = _136;
-    _138.View_Depth = in.IN_View_Depth;
-    VertexOutput _140 = _138;
-    _140.Normal_SpecPower = in.IN_Normal_SpecPower;
-    VertexOutput _142 = _140;
-    _142.Tangent = in.IN_Tangent;
-    VertexOutput _144 = _142;
-    _144.PosLightSpace_Reflectance = in.IN_PosLightSpace_Reflectance;
-    VertexOutput _146 = _144;
-    _146.studIndex = in.IN_studIndex;
-    SurfaceInput _147 = _122;
-    _147.Color = in.IN_Color;
-    SurfaceInput _149 = _147;
-    _149.Uv = in.IN_Uv_EdgeDistance1.xy;
-    SurfaceInput _151 = _149;
-    _151.UvStuds = in.IN_UvStuds_EdgeDistance2.xy;
-    SurfaceInput _156 = _151;
-    _156.UvStuds.y = (fract(_151.UvStuds.y) + in.IN_studIndex) * 0.25;
-    float _163 = _146.View_Depth.w * _19.CB0.RefractionBias_FadeDistance_GlowFactor.y;
-    float _165 = fast::clamp(1.0 - _163, 0.0, 1.0);
-    float2 _166 = in.IN_Uv_EdgeDistance1.xy * 1.0;
-    bool _173;
-    float4 _193;
-    do
-    {
-        _173 = 0.0 == 0.0;
-        if (_173)
-        {
-            _193 = DiffuseMapTexture.sample(DiffuseMapSampler, _166);
-            break;
-        }
-        else
-        {
-            float _180 = 1.0 / (1.0 - 0.0);
-            _193 = mix(DiffuseMapTexture.sample(DiffuseMapSampler, (_166 * 0.25)), DiffuseMapTexture.sample(DiffuseMapSampler, _166), float4(fast::clamp((fast::clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0) * _180) - (0.0 * _180), 0.0, 1.0)));
-            break;
-        }
-        _193 = _192;
-        break;
-    } while (false);
-    float4 _194 = _193 * 1.0;
-    float4 _220;
-    do
-    {
-        if (_173)
-        {
-            _220 = NormalMapTexture.sample(NormalMapSampler, _166);
-            break;
-        }
-        else
-        {
-            float _207 = 1.0 / (1.0 - 0.0);
-            _220 = mix(NormalMapTexture.sample(NormalMapSampler, (_166 * 0.25)), NormalMapTexture.sample(NormalMapSampler, _166), float4(fast::clamp((_165 * _207) - (0.0 * _207), 0.0, 1.0)));
-            break;
-        }
-        _220 = _219;
-        break;
-    } while (false);
-    float2 _223 = float2(1.0);
-    float2 _224 = (_220.wy * 2.0) - _223;
-    float3 _232 = float3(_224, sqrt(fast::clamp(1.0 + dot(-_224, _224), 0.0, 1.0)));
-    float2 _240 = (NormalDetailMapTexture.sample(NormalDetailMapSampler, (_166 * 0.0)).wy * 2.0) - _223;
-    float2 _252 = _232.xy + (float3(_240, sqrt(fast::clamp(1.0 + dot(-_240, _240), 0.0, 1.0))).xy * 0.0);
-    float3 _253 = float3(_252.x, _252.y, _232.z);
-    float2 _255 = _253.xy * _165;
-    float3 _256 = float3(_255.x, _255.y, _253.z);
-    float3 _271 = ((in.IN_Color.xyz * _194.xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (StudsMapTexture.sample(StudsMapSampler, _156.UvStuds).x * 2.0);
-    float4 _298;
-    do
-    {
-        if (0.75 == 0.0)
-        {
-            _298 = SpecularMapTexture.sample(SpecularMapSampler, _166);
-            break;
-        }
-        else
-        {
-            float _285 = 1.0 / (1.0 - 0.75);
-            _298 = mix(SpecularMapTexture.sample(SpecularMapSampler, (_166 * 0.25)), SpecularMapTexture.sample(SpecularMapSampler, _166), float4(fast::clamp((_165 * _285) - (0.75 * _285), 0.0, 1.0)));
-            break;
-        }
-        _298 = _297;
-        break;
-    } while (false);
-    float2 _303 = mix(float2(0.800000011920928955078125, 120.0), (_298.xy * float2(2.0, 256.0)) + float2(0.0, 0.00999999977648258209228515625), float2(_165));
-    Surface _304 = _125;
-    _304.albedo = _271;
-    Surface _305 = _304;
-    _305.normal = _256;
-    float _306 = _303.x;
-    Surface _307 = _305;
-    _307.specular = _306;
-    float _308 = _303.y;
-    Surface _309 = _307;
-    _309.gloss = _308;
-    float _312 = (_298.xy.y * _165) * 0.0;
-    Surface _313 = _309;
-    _313.reflectance = _312;
-    float4 _318 = float4(_271, _146.Color.w);
-    float3 _329 = normalize(((in.IN_Tangent * _313.normal.x) + (cross(in.IN_Normal_SpecPower.xyz, in.IN_Tangent) * _313.normal.y)) + (in.IN_Normal_SpecPower.xyz * _313.normal.z));
-    float3 _332 = -_19.CB0.Lamp0Dir;
-    float _333 = dot(_329, _332);
-    float _357 = fast::clamp(dot(step(_19.CB0.LightConfig3.xyz, abs(in.IN_LightPosition_Fog.xyz - _19.CB0.LightConfig2.xyz)), float3(1.0)), 0.0, 1.0);
-    float4 _368 = mix(LightMapTexture.sample(LightMapSampler, (in.IN_LightPosition_Fog.xyz.yzx - (in.IN_LightPosition_Fog.xyz.yzx * _357))), _19.CB0.LightBorder, float4(_357));
-    float2 _376 = ShadowMapTexture.sample(ShadowMapSampler, in.IN_PosLightSpace_Reflectance.xyz.xy).xy;
-    float _392 = (1.0 - (((step(_376.x, in.IN_PosLightSpace_Reflectance.xyz.z) * fast::clamp(9.0 - (20.0 * abs(in.IN_PosLightSpace_Reflectance.xyz.z - 0.5)), 0.0, 1.0)) * _376.y) * _19.CB0.OutlineBrightness_ShadowInfo.w)) * _368.w;
-    float3 _403 = mix(_318.xyz, EnvironmentMapTexture.sample(EnvironmentMapSampler, reflect(-in.IN_View_Depth.xyz, _329)).xyz, float3(_312));
-    float4 _404 = float4(_403.x, _403.y, _403.z, _318.w);
-    float3 _422 = (((_19.CB0.AmbientColor + (((_19.CB0.Lamp0Color * fast::clamp(_333, 0.0, 1.0)) + (_19.CB0.Lamp1Color * fast::max(-_333, 0.0))) * _392)) + _368.xyz) * _404.xyz) + (_19.CB0.Lamp0Color * (((step(0.0, _333) * _306) * _392) * pow(fast::clamp(dot(_329, normalize(_332 + normalize(in.IN_View_Depth.xyz))), 0.0, 1.0), _308)));
-    float4 _425 = float4(_422.x, _422.y, _422.z, _124.w);
-    _425.w = _404.w;
-    float2 _435 = fast::min(in.IN_Uv_EdgeDistance1.wz, in.IN_UvStuds_EdgeDistance2.wz);
-    float _439 = fast::min(_435.x, _435.y) / _163;
-    float3 _445 = _425.xyz * fast::clamp((fast::clamp((_163 * _19.CB0.OutlineBrightness_ShadowInfo.x) + _19.CB0.OutlineBrightness_ShadowInfo.y, 0.0, 1.0) * (1.5 - _439)) + _439, 0.0, 1.0);
-    float4 _446 = float4(_445.x, _445.y, _445.z, _425.w);
-    float3 _453 = mix(_19.CB0.FogColor, _446.xyz, float3(fast::clamp(_146.LightPosition_Fog.w, 0.0, 1.0)));
-    out._entryPointOutput = float4(_453.x, _453.y, _453.z, _446.w);
-    return out;
-}
-
diff --git a/reference/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag b/reference/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag
new file mode 100644
index 00000000000..0643acfa72d
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag
@@ -0,0 +1,25 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 load_subpasses(texture2d<float> uInput, thread float4& gl_FragCoord)
+{
+    return uInput.read(uint2(gl_FragCoord.xy));
+}
+
+fragment main0_out main0(texture2d<float> uSubpass0 [[texture(0)]], texture2d<float> uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy)) + load_subpasses(uSubpass1, gl_FragCoord);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/modf-frexp-scalar-access-chain-output.asm.frag b/reference/shaders-msl-no-opt/asm/frag/modf-frexp-scalar-access-chain-output.asm.frag
new file mode 100644
index 00000000000..910c8fa734b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/modf-frexp-scalar-access-chain-output.asm.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+fragment void main0()
+{
+    float3 col;
+    int2 _18;
+    float _23;
+    float _21 = modf(0.1500000059604644775390625, _23);
+    col.x = _23;
+    int _24;
+    float _22 = frexp(0.1500000059604644775390625, _24);
+    _18.y = _24;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag b/reference/shaders-msl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag
new file mode 100644
index 00000000000..3f552ebbd04
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float gl_FragDepth [[depth(any)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.gl_FragDepth = 0.5;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/phi.zero-initialize.asm.frag b/reference/shaders-msl-no-opt/asm/frag/phi.zero-initialize.asm.frag
new file mode 100644
index 00000000000..cffd0bd1afb
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/phi.zero-initialize.asm.frag
@@ -0,0 +1,42 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant int uninit_int = {};
+constant int4 uninit_vector = {};
+constant float4x4 uninit_matrix = {};
+
+struct Foo
+{
+    int a;
+};
+
+constant Foo uninit_foo = {};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    int _39 = {};
+    if (in.vColor.x > 10.0)
+    {
+        _39 = 10;
+    }
+    else
+    {
+        _39 = 20;
+    }
+    out.FragColor = in.vColor;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag
new file mode 100644
index 00000000000..8ceb9f43e72
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag
@@ -0,0 +1,37 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO1
+{
+    uint values1[1];
+};
+
+struct SSBO0
+{
+    uint values0[1];
+};
+
+static inline __attribute__((always_inline))
+void callee2(thread float4& gl_FragCoord, device SSBO1& v_7)
+{
+    int _31 = int(gl_FragCoord.x);
+    v_7.values1[_31]++;
+}
+
+static inline __attribute__((always_inline))
+void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9)
+{
+    int _39 = int(gl_FragCoord.x);
+    v_9.values0[_39]++;
+    callee2(gl_FragCoord, v_7);
+}
+
+fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device SSBO0& v_9 [[buffer(1)]], float4 gl_FragCoord [[position]])
+{
+    callee(gl_FragCoord, v_7, v_9);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag
new file mode 100644
index 00000000000..a3823163914
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag
@@ -0,0 +1,52 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO1
+{
+    uint values1[1];
+};
+
+struct _12
+{
+    uint _m0[1];
+};
+
+struct SSBO0
+{
+    uint values0[1];
+};
+
+static inline __attribute__((always_inline))
+void callee2(thread float4& gl_FragCoord, device SSBO1& v_7)
+{
+    int _44 = int(gl_FragCoord.x);
+    v_7.values1[_44]++;
+}
+
+static inline __attribute__((always_inline))
+void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9)
+{
+    int _52 = int(gl_FragCoord.x);
+    v_9.values0[_52]++;
+    callee2(gl_FragCoord, v_7);
+    if (true)
+    {
+    }
+}
+
+static inline __attribute__((always_inline))
+void _35(thread float4& gl_FragCoord, device _12& v_13)
+{
+    v_13._m0[int(gl_FragCoord.x)] = 4u;
+}
+
+fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device _12& v_13 [[buffer(1)]], device SSBO0& v_9 [[buffer(2), raster_order_group(0)]], float4 gl_FragCoord [[position]])
+{
+    callee(gl_FragCoord, v_7, v_9);
+    _35(gl_FragCoord, v_13);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag
new file mode 100644
index 00000000000..beb21241f4e
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag
@@ -0,0 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO1
+{
+    uint values1[1];
+};
+
+struct SSBO0
+{
+    uint values0[1];
+};
+
+static inline __attribute__((always_inline))
+void callee2(thread float4& gl_FragCoord, device SSBO1& v_7)
+{
+    int _37 = int(gl_FragCoord.x);
+    v_7.values1[_37]++;
+}
+
+static inline __attribute__((always_inline))
+void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9)
+{
+    int _45 = int(gl_FragCoord.x);
+    v_9.values0[_45]++;
+    callee2(gl_FragCoord, v_7);
+}
+
+static inline __attribute__((always_inline))
+void _29()
+{
+}
+
+static inline __attribute__((always_inline))
+void _31()
+{
+}
+
+fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device SSBO0& v_9 [[buffer(1), raster_order_group(0)]], float4 gl_FragCoord [[position]])
+{
+    callee(gl_FragCoord, v_7, v_9);
+    _29();
+    _31();
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/private-initializer-direct-store.asm.frag b/reference/shaders-msl-no-opt/asm/frag/private-initializer-direct-store.asm.frag
new file mode 100644
index 00000000000..5bc3c47ef97
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/private-initializer-direct-store.asm.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    float b = 10.0;
+    b = 20.0;
+    out.FragColor = b + b;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/reserved-function-identifier.asm.frag b/reference/shaders-msl-no-opt/asm/frag/reserved-function-identifier.asm.frag
new file mode 100644
index 00000000000..5c8ec371e30
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/reserved-function-identifier.asm.frag
@@ -0,0 +1,33 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+static inline __attribute__((always_inline))
+float _mat3(thread const float& a)
+{
+    return a + 1.0;
+}
+
+static inline __attribute__((always_inline))
+float _RESERVED_IDENTIFIER_FIXUP_gl_Foo(thread const int& a)
+{
+    return float(a) + 1.0;
+}
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    float param = 2.0;
+    int param_1 = 4;
+    out.FragColor = _mat3(param) + _RESERVED_IDENTIFIER_FIXUP_gl_Foo(param_1);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/scalar-select.spv14.asm.frag b/reference/shaders-msl-no-opt/asm/frag/scalar-select.spv14.asm.frag
new file mode 100644
index 00000000000..e5b8fc5bf1a
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/scalar-select.spv14.asm.frag
@@ -0,0 +1,72 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _15
+{
+    float _m0;
+};
+
+constant spvUnsafeArray<float, 2> _29 = spvUnsafeArray<float, 2>({ 0.0, 1.0 });
+constant spvUnsafeArray<float, 2> _30 = spvUnsafeArray<float, 2>({ 1.0, 0.0 });
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = false ? float4(1.0, 1.0, 0.0, 1.0) : float4(0.0, 0.0, 0.0, 1.0);
+    out.FragColor = float4(false);
+    out.FragColor = select(float4(0.0, 0.0, 0.0, 1.0), float4(1.0, 1.0, 0.0, 1.0), bool4(false, true, false, true));
+    out.FragColor = float4(bool4(false, true, false, true));
+    _15 _32 = false ? (_15{ 0.0 }) : (_15{ 1.0 });
+    spvUnsafeArray<float, 2> _33;
+    _33 = true ? _29 : _30;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/subgroup-arithmetic-cast.msl21.asm.frag b/reference/shaders-msl-no-opt/asm/frag/subgroup-arithmetic-cast.msl21.asm.frag
new file mode 100644
index 00000000000..2f5cd66284f
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/subgroup-arithmetic-cast.msl21.asm.frag
@@ -0,0 +1,30 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    uint FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    int index [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    uint _17 = uint(in.index);
+    out.FragColor = uint(simd_min(in.index));
+    out.FragColor = uint(simd_max(int(_17)));
+    out.FragColor = simd_min(uint(in.index));
+    out.FragColor = simd_max(_17);
+    out.FragColor = uint(quad_min(in.index));
+    out.FragColor = uint(quad_max(int(_17)));
+    out.FragColor = quad_min(uint(in.index));
+    out.FragColor = quad_max(_17);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/reference/shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag
similarity index 100%
rename from reference/shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag
rename to reference/shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag
diff --git a/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag b/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag
index fce6757b45e..6c4cc7248be 100644
--- a/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag
+++ b/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag
@@ -6,22 +6,12 @@
 using namespace metal;
 
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
-enum class spvSwizzle : uint
-{
-    none = 0,
-    zero,
-    one,
-    red,
-    green,
-    blue,
-    alpha
-};
-
 template<typename T> struct spvRemoveReference { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
@@ -34,6 +24,17 @@ template<typename T> inline constexpr thread T&& spvForward(thread typename spvR
     return static_cast<thread T&&>(x);
 }
 
+enum class spvSwizzle : uint
+{
+    none = 0,
+    zero,
+    one,
+    red,
+    green,
+    blue,
+    alpha
+};
+
 template<typename T>
 inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
 {
@@ -72,8 +73,8 @@ inline T spvTextureSwizzle(T x, uint s)
 }
 
 // Wrapper function that swizzles texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
+template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename... Ts>
+inline vec<T, 4> spvGatherSwizzle(const thread Tex<T>& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c)
 {
     if (sw)
     {
@@ -109,8 +110,8 @@ inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params,
 }
 
 // Wrapper function that swizzles depth texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) 
+template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename... Ts>
+inline vec<T, 4> spvGatherCompareSwizzle(const thread Tex<T>& t, sampler s, uint sw, Ts... params) 
 {
     if (sw)
     {
@@ -149,41 +150,41 @@ fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d
     c = spvTextureSwizzle(texCube.sample(texCubeSamp, float3(0.0)), texCubeSwzl);
     c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySamp, float3(0.0).xy, uint(round(float3(0.0).z))), tex2dArraySwzl);
     c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w))), texCubeArraySwzl);
-    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), depth2dSwzl);
-    c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSamp, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), depthCubeSwzl);
-    c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySamp, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), depth2dArraySwzl);
+    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, 1.0), depth2dSwzl);
+    c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSamp, float4(0.0, 0.0, 0.0, 1.0).xyz, 1.0), depthCubeSwzl);
+    c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySamp, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), 1.0), depth2dArraySwzl);
     c.x = spvTextureSwizzle(depthCubeArray.sample_compare(depthCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), depthCubeArraySwzl);
     c = spvTextureSwizzle(tex1d.sample(tex1dSamp, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl);
     c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), tex2dSwzl);
     c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), tex3dSwzl);
     float4 _152 = float4(0.0, 0.0, 1.0, 1.0);
-    _152.z = float4(0.0, 0.0, 1.0, 1.0).w;
-    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, _152.xy / _152.z, float4(0.0, 0.0, 1.0, 1.0).z / _152.z), depth2dSwzl);
+    _152.z = 1.0;
+    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, _152.xy / _152.z, 1.0 / _152.z), depth2dSwzl);
     c = spvTextureSwizzle(tex1d.sample(tex1dSamp, 0.0), tex1dSwzl);
     c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float2(0.0), level(0.0)), tex2dSwzl);
     c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float3(0.0), level(0.0)), tex3dSwzl);
     c = spvTextureSwizzle(texCube.sample(texCubeSamp, float3(0.0), level(0.0)), texCubeSwzl);
     c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySamp, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), tex2dArraySwzl);
     c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySamp, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), texCubeArraySwzl);
-    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), depth2dSwzl);
+    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, float3(0.0, 0.0, 1.0).xy, 1.0, level(0.0)), depth2dSwzl);
     c = spvTextureSwizzle(tex1d.sample(tex1dSamp, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl);
     c = spvTextureSwizzle(tex2d.sample(tex2dSamp, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), tex2dSwzl);
     c = spvTextureSwizzle(tex3d.sample(tex3dSamp, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), tex3dSwzl);
     float4 _202 = float4(0.0, 0.0, 1.0, 1.0);
-    _202.z = float4(0.0, 0.0, 1.0, 1.0).w;
-    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, _202.xy / _202.z, float4(0.0, 0.0, 1.0, 1.0).z / _202.z, level(0.0)), depth2dSwzl);
+    _202.z = 1.0;
+    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSamp, _202.xy / _202.z, 1.0 / _202.z, level(0.0)), depth2dSwzl);
     c = spvTextureSwizzle(tex1d.read(uint(0)), tex1dSwzl);
     c = spvTextureSwizzle(tex2d.read(uint2(int2(0)), 0), tex2dSwzl);
     c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl);
     c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl);
     c = texBuffer.read(spvTexelBufferCoord(0));
-    c = spvGatherSwizzle<float, metal::texture2d<float>, float2, int2>(tex2dSamp, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl);
-    c = spvGatherSwizzle<float, metal::texturecube<float>, float3>(texCubeSamp, texCube, float3(0.0), component::y, texCubeSwzl);
-    c = spvGatherSwizzle<float, metal::texture2d_array<float>, float2, uint, int2>(tex2dArraySamp, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl);
-    c = spvGatherSwizzle<float, metal::texturecube_array<float>, float3, uint>(texCubeArraySamp, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl);
-    c = spvGatherCompareSwizzle<float, metal::depth2d<float>, float2, float>(depth2dSamp, depth2d, float2(0.0), 1.0, depth2dSwzl);
-    c = spvGatherCompareSwizzle<float, metal::depthcube<float>, float3, float>(depthCubeSamp, depthCube, float3(0.0), 1.0, depthCubeSwzl);
-    c = spvGatherCompareSwizzle<float, metal::depth2d_array<float>, float2, uint, float>(depth2dArraySamp, depth2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0, depth2dArraySwzl);
-    c = spvGatherCompareSwizzle<float, metal::depthcube_array<float>, float3, uint, float>(depthCubeArraySamp, depthCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0, depthCubeArraySwzl);
+    c = spvGatherSwizzle(tex2d, tex2dSamp, tex2dSwzl, component::x, float2(0.0), int2(0));
+    c = spvGatherSwizzle(texCube, texCubeSamp, texCubeSwzl, component::y, float3(0.0));
+    c = spvGatherSwizzle(tex2dArray, tex2dArraySamp, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0));
+    c = spvGatherSwizzle(texCubeArray, texCubeArraySamp, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w)));
+    c = spvGatherCompareSwizzle(depth2d, depth2dSamp, depth2dSwzl, float2(0.0), 1.0);
+    c = spvGatherCompareSwizzle(depthCube, depthCubeSamp, depthCubeSwzl, float3(0.0), 1.0);
+    c = spvGatherCompareSwizzle(depth2dArray, depth2dArraySamp, depth2dArraySwzl, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0);
+    c = spvGatherCompareSwizzle(depthCubeArray, depthCubeArraySamp, depthCubeArraySwzl, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0);
 }
 
diff --git a/reference/shaders-msl-no-opt/asm/frag/usage-tracking-modf-io-pointer.asm.frag b/reference/shaders-msl-no-opt/asm/frag/usage-tracking-modf-io-pointer.asm.frag
new file mode 100644
index 00000000000..5ba57b3f626
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/frag/usage-tracking-modf-io-pointer.asm.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 _GLF_color [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    float4 _13 = modf(float4(1.0, 0.0, 0.0, 1.0), out._GLF_color);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.msl2.asm.tesc
new file mode 100644
index 00000000000..3872124d5cd
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.msl2.asm.tesc
@@ -0,0 +1,82 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct C
+{
+    float4 v;
+};
+
+struct P
+{
+    float4 v;
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    float4 p_v;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<C, 4> _18 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
+    
+    threadgroup C c[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    c[gl_InvocationID] = _18[gl_InvocationID];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    patchOut.p_v = float4(0.0);
+    c[gl_InvocationID].v = float4(1.0);
+    patchOut.p_v = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc
new file mode 100644
index 00000000000..e576472f379
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc
@@ -0,0 +1,85 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct C
+{
+    float4 v;
+};
+
+struct P
+{
+    float4 v;
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    float4 p_v;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<C, 4> _18 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup C spvStoragec[8][4];
+    threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
+    c[gl_GlobalInvocationID.x % 4] = _18[gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    patchOut.p_v = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    c[gl_InvocationID].v = float4(1.0);
+    patchOut.p_v = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.msl2.asm.tesc
new file mode 100644
index 00000000000..5c6ad2a8bee
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.msl2.asm.tesc
@@ -0,0 +1,81 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct C
+{
+    float4 v;
+};
+
+struct P
+{
+    float4 v;
+};
+
+struct main0_out
+{
+    float4 c_v;
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+};
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<C, 4> _18 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
+    
+    threadgroup P p;
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    gl_out[gl_InvocationID].c_v = _18[gl_InvocationID].v;
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    p = P{ float4(0.0) };
+    gl_out[gl_InvocationID].c_v = float4(1.0);
+    p.v = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc
new file mode 100644
index 00000000000..12295e778e5
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc
@@ -0,0 +1,84 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct C
+{
+    float4 v;
+};
+
+struct P
+{
+    float4 v;
+};
+
+struct main0_out
+{
+    float4 c_v;
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+};
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<C, 4> _18 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    gl_out[gl_GlobalInvocationID.x % 4].c_v = _18[gl_GlobalInvocationID.x % 4].v;
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup P spvStoragep[8];
+    threadgroup P (&p) = spvStoragep[(gl_GlobalInvocationID.x / 4) % 8];
+    p = P{ float4(0.0) };
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].c_v = float4(1.0);
+    p.v = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.msl2.asm.tesc
new file mode 100644
index 00000000000..d5ff9d0bd21
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.msl2.asm.tesc
@@ -0,0 +1,100 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct C
+{
+    float4 v;
+};
+
+struct P
+{
+    float4 v;
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+constant spvUnsafeArray<float, 1> _51 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _52 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 c_v;
+    float4 gl_Position;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_patchOut
+{
+    float4 p_v;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<C, 4> _18 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
+    spvUnsafeArray<gl_PerVertex, 4> _33 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    threadgroup gl_PerVertex gl_out_masked[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    gl_out[gl_InvocationID].c_v = _18[gl_InvocationID].v;
+    gl_out[gl_InvocationID].gl_Position = _33[gl_InvocationID].gl_Position;
+    gl_out[gl_InvocationID].gl_ClipDistance = _33[gl_InvocationID].gl_ClipDistance;
+    gl_out[gl_InvocationID].gl_CullDistance = _33[gl_InvocationID].gl_CullDistance;
+    gl_out_masked[gl_InvocationID] = _33[gl_InvocationID];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    patchOut.p_v = float4(0.0);
+    gl_out[gl_InvocationID].c_v = float4(1.0);
+    patchOut.p_v = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out_masked[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc
new file mode 100644
index 00000000000..32fb6598937
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc
@@ -0,0 +1,103 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct C
+{
+    float4 v;
+};
+
+struct P
+{
+    float4 v;
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+constant spvUnsafeArray<float, 1> _51 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _52 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 c_v;
+    float4 gl_Position;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_patchOut
+{
+    float4 p_v;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<C, 4> _18 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
+    spvUnsafeArray<gl_PerVertex, 4> _33 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    gl_out[gl_GlobalInvocationID.x % 4].c_v = _18[gl_GlobalInvocationID.x % 4].v;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _33[gl_GlobalInvocationID.x % 4].gl_Position;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _33[gl_GlobalInvocationID.x % 4].gl_ClipDistance;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _33[gl_GlobalInvocationID.x % 4].gl_CullDistance;
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    gl_out_masked[gl_GlobalInvocationID.x % 4] = _33[gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    patchOut.p_v = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].c_v = float4(1.0);
+    patchOut.p_v = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out_masked[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.msl2.asm.tesc
new file mode 100644
index 00000000000..ce16f379750
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.msl2.asm.tesc
@@ -0,0 +1,100 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct C
+{
+    float4 v;
+};
+
+struct P
+{
+    float4 v;
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+constant spvUnsafeArray<float, 1> _51 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _52 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 c_v;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_patchOut
+{
+    float4 p_v;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<C, 4> _18 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
+    spvUnsafeArray<gl_PerVertex, 4> _33 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    threadgroup gl_PerVertex gl_out_masked[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    gl_out[gl_InvocationID].c_v = _18[gl_InvocationID].v;
+    gl_out[gl_InvocationID].gl_PointSize = _33[gl_InvocationID].gl_PointSize;
+    gl_out[gl_InvocationID].gl_ClipDistance = _33[gl_InvocationID].gl_ClipDistance;
+    gl_out[gl_InvocationID].gl_CullDistance = _33[gl_InvocationID].gl_CullDistance;
+    gl_out_masked[gl_InvocationID] = _33[gl_InvocationID];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    patchOut.p_v = float4(0.0);
+    gl_out[gl_InvocationID].c_v = float4(1.0);
+    patchOut.p_v = float4(2.0);
+    gl_out_masked[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc
new file mode 100644
index 00000000000..671aa25a021
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc
@@ -0,0 +1,103 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct C
+{
+    float4 v;
+};
+
+struct P
+{
+    float4 v;
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+constant spvUnsafeArray<float, 1> _51 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _52 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 c_v;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_patchOut
+{
+    float4 p_v;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<C, 4> _18 = spvUnsafeArray<C, 4>({ C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) }, C{ float4(0.0) } });
+    spvUnsafeArray<gl_PerVertex, 4> _33 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    gl_out[gl_GlobalInvocationID.x % 4].c_v = _18[gl_GlobalInvocationID.x % 4].v;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _33[gl_GlobalInvocationID.x % 4].gl_PointSize;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _33[gl_GlobalInvocationID.x % 4].gl_ClipDistance;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _33[gl_GlobalInvocationID.x % 4].gl_CullDistance;
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    gl_out_masked[gl_GlobalInvocationID.x % 4] = _33[gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    patchOut.p_v = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].c_v = float4(1.0);
+    patchOut.p_v = float4(2.0);
+    gl_out_masked[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.asm.tesc
new file mode 100644
index 00000000000..25fe13bf674
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.asm.tesc
@@ -0,0 +1,90 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex
+{
+    float4 _RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    float _RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+    spvUnsafeArray<float, 1> _RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance;
+    spvUnsafeArray<float, 1> _RESERVED_IDENTIFIER_FIXUP_gl_CullDistance;
+};
+
+constant spvUnsafeArray<float4, 4> _15 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
+constant spvUnsafeArray<float, 1> _45 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _46 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_patchOut
+{
+    float4 foo_patch;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _29 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    threadgroup float4 foo[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    foo[gl_InvocationID] = _15[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_Position = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    gl_out[gl_InvocationID].gl_PointSize = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+    gl_out[gl_InvocationID].gl_ClipDistance = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance;
+    gl_out[gl_InvocationID].gl_CullDistance = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance;
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    patchOut.foo_patch = float4(0.0);
+    foo[gl_InvocationID] = float4(1.0);
+    patchOut.foo_patch = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc
new file mode 100644
index 00000000000..750ef96d17b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc
@@ -0,0 +1,93 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex
+{
+    float4 _RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    float _RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+    spvUnsafeArray<float, 1> _RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance;
+    spvUnsafeArray<float, 1> _RESERVED_IDENTIFIER_FIXUP_gl_CullDistance;
+};
+
+constant spvUnsafeArray<float4, 4> _15 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
+constant spvUnsafeArray<float, 1> _45 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _46 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_patchOut
+{
+    float4 foo_patch;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _29 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup float4 spvStoragefoo[8][4];
+    threadgroup float4 (&foo)[4] = spvStoragefoo[(gl_GlobalInvocationID.x / 4) % 8];
+    foo[gl_GlobalInvocationID.x % 4] = _15[gl_GlobalInvocationID.x % 4];
+    gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance;
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    patchOut.foo_patch = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    foo[gl_InvocationID] = float4(1.0);
+    patchOut.foo_patch = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.asm.tesc
new file mode 100644
index 00000000000..e8f1146b42d
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.asm.tesc
@@ -0,0 +1,89 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex
+{
+    float4 _RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    float _RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+    spvUnsafeArray<float, 1> _RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance;
+    spvUnsafeArray<float, 1> _RESERVED_IDENTIFIER_FIXUP_gl_CullDistance;
+};
+
+constant spvUnsafeArray<float4, 4> _15 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
+constant spvUnsafeArray<float, 1> _45 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _46 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 foo;
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_patchOut
+{
+};
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _29 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    threadgroup float4 foo_patch;
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    gl_out[gl_InvocationID].foo = _15[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_Position = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    gl_out[gl_InvocationID].gl_PointSize = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+    gl_out[gl_InvocationID].gl_ClipDistance = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance;
+    gl_out[gl_InvocationID].gl_CullDistance = _29[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance;
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    foo_patch = float4(0.0);
+    gl_out[gl_InvocationID].foo = float4(1.0);
+    foo_patch = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc
new file mode 100644
index 00000000000..a7c1e5d617b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc
@@ -0,0 +1,92 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex
+{
+    float4 _RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    float _RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+    spvUnsafeArray<float, 1> _RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance;
+    spvUnsafeArray<float, 1> _RESERVED_IDENTIFIER_FIXUP_gl_CullDistance;
+};
+
+constant spvUnsafeArray<float4, 4> _15 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
+constant spvUnsafeArray<float, 1> _45 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _46 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 foo;
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_patchOut
+{
+};
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _29 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    gl_out[gl_GlobalInvocationID.x % 4].foo = _15[gl_GlobalInvocationID.x % 4];
+    gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_ClipDistance;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _29[gl_GlobalInvocationID.x % 4]._RESERVED_IDENTIFIER_FIXUP_gl_CullDistance;
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup float4 spvStoragefoo_patch[8];
+    threadgroup float4 (&foo_patch) = spvStoragefoo_patch[(gl_GlobalInvocationID.x / 4) % 8];
+    foo_patch = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].foo = float4(1.0);
+    foo_patch = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.asm.tesc
new file mode 100644
index 00000000000..344751b04ce
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.asm.tesc
@@ -0,0 +1,90 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+constant spvUnsafeArray<float4, 4> _15 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
+constant spvUnsafeArray<float, 1> _45 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _46 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 foo;
+    float4 gl_Position;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_patchOut
+{
+    float4 foo_patch;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<gl_PerVertex, 4> _29 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    threadgroup gl_PerVertex gl_out_masked[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    gl_out[gl_InvocationID].foo = _15[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_Position = _29[gl_InvocationID].gl_Position;
+    gl_out[gl_InvocationID].gl_ClipDistance = _29[gl_InvocationID].gl_ClipDistance;
+    gl_out[gl_InvocationID].gl_CullDistance = _29[gl_InvocationID].gl_CullDistance;
+    gl_out_masked[gl_InvocationID] = _29[gl_InvocationID];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    patchOut.foo_patch = float4(0.0);
+    gl_out[gl_InvocationID].foo = float4(1.0);
+    patchOut.foo_patch = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out_masked[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc
new file mode 100644
index 00000000000..92731ec03a4
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc
@@ -0,0 +1,93 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+constant spvUnsafeArray<float4, 4> _15 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
+constant spvUnsafeArray<float, 1> _45 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _46 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 foo;
+    float4 gl_Position;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_patchOut
+{
+    float4 foo_patch;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<gl_PerVertex, 4> _29 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    gl_out[gl_GlobalInvocationID.x % 4].foo = _15[gl_GlobalInvocationID.x % 4];
+    gl_out[gl_GlobalInvocationID.x % 4].gl_Position = _29[gl_GlobalInvocationID.x % 4].gl_Position;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _29[gl_GlobalInvocationID.x % 4].gl_ClipDistance;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _29[gl_GlobalInvocationID.x % 4].gl_CullDistance;
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    gl_out_masked[gl_GlobalInvocationID.x % 4] = _29[gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    patchOut.foo_patch = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].foo = float4(1.0);
+    patchOut.foo_patch = float4(2.0);
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out_masked[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.asm.tesc
new file mode 100644
index 00000000000..2f11636a0cb
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.asm.tesc
@@ -0,0 +1,90 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+constant spvUnsafeArray<float4, 4> _15 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
+constant spvUnsafeArray<float, 1> _45 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _46 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 foo;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_patchOut
+{
+    float4 foo_patch;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<gl_PerVertex, 4> _29 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    threadgroup gl_PerVertex gl_out_masked[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    gl_out[gl_InvocationID].foo = _15[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_PointSize = _29[gl_InvocationID].gl_PointSize;
+    gl_out[gl_InvocationID].gl_ClipDistance = _29[gl_InvocationID].gl_ClipDistance;
+    gl_out[gl_InvocationID].gl_CullDistance = _29[gl_InvocationID].gl_CullDistance;
+    gl_out_masked[gl_InvocationID] = _29[gl_InvocationID];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    patchOut.foo_patch = float4(0.0);
+    gl_out[gl_InvocationID].foo = float4(1.0);
+    patchOut.foo_patch = float4(2.0);
+    gl_out_masked[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc
new file mode 100644
index 00000000000..7283eddb1f4
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc
@@ -0,0 +1,93 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+constant spvUnsafeArray<float4, 4> _15 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
+constant spvUnsafeArray<float, 1> _45 = spvUnsafeArray<float, 1>({ 0.0 });
+constant spvUnsafeArray<float, 1> _46 = spvUnsafeArray<float, 1>({ 0.0 });
+
+struct main0_out
+{
+    float4 foo;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_patchOut
+{
+    float4 foo_patch;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<gl_PerVertex, 4> _29 = spvUnsafeArray<gl_PerVertex, 4>({ gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) }, gl_PerVertex{ float4(0.0), 0.0, spvUnsafeArray<float, 1>({ 0.0 }), spvUnsafeArray<float, 1>({ 0.0 }) } });
+    
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    gl_out[gl_GlobalInvocationID.x % 4].foo = _15[gl_GlobalInvocationID.x % 4];
+    gl_out[gl_GlobalInvocationID.x % 4].gl_PointSize = _29[gl_GlobalInvocationID.x % 4].gl_PointSize;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_ClipDistance = _29[gl_GlobalInvocationID.x % 4].gl_ClipDistance;
+    gl_out[gl_GlobalInvocationID.x % 4].gl_CullDistance = _29[gl_GlobalInvocationID.x % 4].gl_CullDistance;
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    gl_out_masked[gl_GlobalInvocationID.x % 4] = _29[gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    patchOut.foo_patch = float4(0.0);
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].foo = float4(1.0);
+    patchOut.foo_patch = float4(2.0);
+    gl_out_masked[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/composite-extract-row-major.asm.comp b/reference/shaders-msl-no-opt/asm/packing/composite-extract-row-major.asm.comp
new file mode 100644
index 00000000000..d2c368b8ede
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/composite-extract-row-major.asm.comp
@@ -0,0 +1,16 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBORow
+{
+    float v;
+    float4x4 row_major0;
+};
+
+kernel void main0(device SSBORow& _4 [[buffer(0)]])
+{
+    _4.v = _4.row_major0[2][1];
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-2.asm.comp b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-2.asm.comp
new file mode 100644
index 00000000000..0ae12f0858a
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-2.asm.comp
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOScalar
+{
+    float3 a;
+    float3x3 b;
+    float3x3 c;
+};
+
+kernel void main0(device SSBOScalar& _4 [[buffer(0)]])
+{
+    float3x3 _20 = transpose(_4.b);
+    _4.b = _4.c;
+    _4.a = _20 * _4.a;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-3.asm.comp b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-3.asm.comp
new file mode 100644
index 00000000000..86bdd45279b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-3.asm.comp
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOScalar
+{
+    packed_float3 a;
+    packed_float3 b;
+    packed_float3 c;
+};
+
+kernel void main0(device SSBOScalar& _4 [[buffer(0)]])
+{
+    float3 _17 = float3(_4.b);
+    float3 _19 = float3(_4.c);
+    _4.c = _17;
+    _4.a = _17 * _19;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-4.asm.comp b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-4.asm.comp
new file mode 100644
index 00000000000..669420436b1
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-4.asm.comp
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOScalar
+{
+    float4 a[16];
+    float4 b[16];
+    float4 c[16];
+};
+
+kernel void main0(device SSBOScalar& _4 [[buffer(0)]])
+{
+    float2 _27 = _4.b[10].xy;
+    float _29 = _4.c[10].x;
+    (device float2&)_4.b[10] = float2(10.0, 11.0);
+    (device float2&)_4.a[10] = _27 * _29;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-5.asm.comp b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-5.asm.comp
new file mode 100644
index 00000000000..23d25b82867
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-5.asm.comp
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOScalar
+{
+    float2 a;
+    packed_float3 b;
+    packed_float3 c;
+};
+
+kernel void main0(device SSBOScalar& _4 [[buffer(0)]])
+{
+    float3 _21 = float3(_4.b);
+    float3 _24 = float3(_4.c);
+    _4.b = float3(1.0);
+    _4.a = _21.xy * _24.yz;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding.asm.comp b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding.asm.comp
new file mode 100644
index 00000000000..c21fcc7ffc2
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding.asm.comp
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef packed_float3 packed_rm_float3x3[3];
+
+struct SSBOScalar
+{
+    packed_float3 a;
+    packed_rm_float3x3 b;
+    packed_rm_float3x3 c;
+};
+
+kernel void main0(device SSBOScalar& _4 [[buffer(0)]])
+{
+    float3x3 _20 = transpose(float3x3(float3(_4.b[0]), float3(_4.b[1]), float3(_4.b[2])));
+    _4.b[0] = float3x3(float3(_4.c[0]), float3(_4.c[1]), float3(_4.c[2]))[0];
+    _4.b[1] = float3x3(float3(_4.c[0]), float3(_4.c[1]), float3(_4.c[2]))[1];
+    _4.b[2] = float3x3(float3(_4.c[0]), float3(_4.c[1]), float3(_4.c[2]))[2];
+    _4.a = _20 * float3(_4.a);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/packed-vector-extract-insert.asm.comp b/reference/shaders-msl-no-opt/asm/packing/packed-vector-extract-insert.asm.comp
new file mode 100644
index 00000000000..4c70aede48a
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/packed-vector-extract-insert.asm.comp
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOScalar
+{
+    float2 a;
+    packed_float3 b;
+    packed_float3 c;
+    float3 d;
+};
+
+kernel void main0(device SSBOScalar& _4 [[buffer(0)]])
+{
+    float3 _23 = float3(_4.b);
+    float3 _24 = _23;
+    _24.z = 2.0;
+    _4.a = _23.xy * _23.z;
+    _4.b = _24;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/row-major-split-access-chain.asm.comp b/reference/shaders-msl-no-opt/asm/packing/row-major-split-access-chain.asm.comp
new file mode 100644
index 00000000000..f12092cf8f3
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/row-major-split-access-chain.asm.comp
@@ -0,0 +1,16 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBORow
+{
+    float v;
+    float4x4 row_major0;
+};
+
+kernel void main0(device SSBORow& _4 [[buffer(0)]])
+{
+    _4.v = ((device float*)&_4.row_major0[2u])[1];
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-array-float2.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-array-float2.asm.frag
new file mode 100644
index 00000000000..f26e35c6722
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/scalar-array-float2.asm.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_Foo
+{
+    float4 a[1];
+    char _m1_pad[8];
+    float b;
+};
+
+struct main0_out
+{
+    float2 out_var_SV_Target [[color(0)]];
+};
+
+fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]])
+{
+    main0_out out = {};
+    out.out_var_SV_Target = (Foo.a[0].xy + Foo.a[1].xy) + float2(Foo.b);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-array-float3-one-element.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-array-float3-one-element.asm.frag
new file mode 100644
index 00000000000..6f8546532f4
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/scalar-array-float3-one-element.asm.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_Foo
+{
+    packed_float3 a[1];
+    float b;
+};
+
+struct main0_out
+{
+    float3 out_var_SV_Target [[color(0)]];
+};
+
+fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]])
+{
+    main0_out out = {};
+    out.out_var_SV_Target = float3(Foo.a[0]) + float3(Foo.b);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-array-float3.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-array-float3.asm.frag
new file mode 100644
index 00000000000..565ee64e04b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/scalar-array-float3.asm.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_Foo
+{
+    float4 a[1];
+    char _m1_pad[12];
+    float b;
+};
+
+struct main0_out
+{
+    float3 out_var_SV_Target [[color(0)]];
+};
+
+fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]])
+{
+    main0_out out = {};
+    out.out_var_SV_Target = (Foo.a[0].xyz + Foo.a[1].xyz) + float3(Foo.b);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float2x2-col-major.invalid.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x2-col-major.invalid.asm.frag
new file mode 100644
index 00000000000..8440b2f2297
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x2-col-major.invalid.asm.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_Foo
+{
+    float4 a[1];
+    char _m1_pad[8];
+    float b;
+};
+
+struct main0_out
+{
+    float2 out_var_SV_Target [[color(0)]];
+};
+
+fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]])
+{
+    main0_out out = {};
+    out.out_var_SV_Target = (Foo.a[0u].xy + Foo.a[1u].xy) + float2(Foo.b);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float2x2-row-major.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x2-row-major.asm.frag
new file mode 100644
index 00000000000..9b347718edc
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x2-row-major.asm.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_Foo
+{
+    float4 a[1];
+    char _m1_pad[8];
+    float b;
+};
+
+struct main0_out
+{
+    float2 out_var_SV_Target [[color(0)]];
+};
+
+fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]])
+{
+    main0_out out = {};
+    out.out_var_SV_Target = (float2(Foo.a[0][0u], Foo.a[1][0u]) + float2(Foo.a[0][1u], Foo.a[1][1u])) + float2(Foo.b);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float2x3-col-major.invalid.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x3-col-major.invalid.asm.frag
new file mode 100644
index 00000000000..cd40af10f23
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x3-col-major.invalid.asm.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_Foo
+{
+    float4 a[1];
+    char _m1_pad[12];
+    float b;
+};
+
+struct main0_out
+{
+    float3 out_var_SV_Target [[color(0)]];
+};
+
+fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]])
+{
+    main0_out out = {};
+    out.out_var_SV_Target = (Foo.a[0u].xyz + Foo.a[1u].xyz) + float3(Foo.b);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float2x3-row-major.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x3-row-major.asm.frag
new file mode 100644
index 00000000000..86dfd6054bf
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float2x3-row-major.asm.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_Foo
+{
+    float2x4 a;
+    char _m1_pad[8];
+    float b;
+};
+
+struct main0_out
+{
+    float3 out_var_SV_Target [[color(0)]];
+};
+
+fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]])
+{
+    main0_out out = {};
+    out.out_var_SV_Target = (float3(Foo.a[0][0u], Foo.a[1][0u], Foo.a[2][0u]) + float3(Foo.a[0][1u], Foo.a[1][1u], Foo.a[2][1u])) + float3(Foo.b);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float3x2-col-major.invalid.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x2-col-major.invalid.asm.frag
new file mode 100644
index 00000000000..7430a551fa2
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x2-col-major.invalid.asm.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_Foo
+{
+    float2x4 a;
+    char _m1_pad[8];
+    float b;
+};
+
+struct main0_out
+{
+    float2 out_var_SV_Target [[color(0)]];
+};
+
+fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]])
+{
+    main0_out out = {};
+    out.out_var_SV_Target = (Foo.a[0u].xy + Foo.a[1u].xy) + float2(Foo.b);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float3x2-row-major.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x2-row-major.asm.frag
new file mode 100644
index 00000000000..19b7f1eebb1
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x2-row-major.asm.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_Foo
+{
+    float4 a[1];
+    char _m1_pad[12];
+    float b;
+};
+
+struct main0_out
+{
+    float2 out_var_SV_Target [[color(0)]];
+};
+
+fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]])
+{
+    main0_out out = {};
+    out.out_var_SV_Target = (float2(Foo.a[0][0u], Foo.a[1][0u]) + float2(Foo.a[0][1u], Foo.a[1][1u])) + float2(Foo.b);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float3x3-col-major.invalid.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x3-col-major.invalid.asm.frag
new file mode 100644
index 00000000000..f8008525b64
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x3-col-major.invalid.asm.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_Foo
+{
+    float2x4 a;
+    char _m1_pad[12];
+    float b;
+};
+
+struct main0_out
+{
+    float3 out_var_SV_Target [[color(0)]];
+};
+
+fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]])
+{
+    main0_out out = {};
+    out.out_var_SV_Target = (Foo.a[0u].xyz + Foo.a[1u].xyz) + float3(Foo.b);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/packing/scalar-float3x3-row-major.asm.frag b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x3-row-major.asm.frag
new file mode 100644
index 00000000000..041b6e91d69
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/packing/scalar-float3x3-row-major.asm.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_Foo
+{
+    float2x4 a;
+    char _m1_pad[12];
+    float b;
+};
+
+struct main0_out
+{
+    float3 out_var_SV_Target [[color(0)]];
+};
+
+fragment main0_out main0(constant type_Foo& Foo [[buffer(0)]])
+{
+    main0_out out = {};
+    out.out_var_SV_Target = (float3(Foo.a[0][0u], Foo.a[1][0u], Foo.a[2][0u]) + float3(Foo.a[0][1u], Foo.a[1][1u], Foo.a[2][1u])) + float3(Foo.b);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/temporary.zero-initialize.asm.frag b/reference/shaders-msl-no-opt/asm/temporary.zero-initialize.asm.frag
new file mode 100644
index 00000000000..6fbf2ff70f0
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/temporary.zero-initialize.asm.frag
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    int vA [[user(locn0)]];
+    int vB [[user(locn1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.FragColor = float4(0.0);
+    int _10 = {};
+    int _15 = {};
+    for (int _16 = 0, _17 = 0; _16 < in.vA; _17 = _15, _16 += _10)
+    {
+        if ((in.vA + _16) == 20)
+        {
+            _15 = 50;
+        }
+        else
+        {
+            _15 = ((in.vB + _16) == 40) ? 60 : _17;
+        }
+        _10 = _15 + 10;
+        out.FragColor += float4(1.0);
+    }
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/tesc/array-control-point-initializer.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/array-control-point-initializer.asm.tesc
new file mode 100644
index 00000000000..5c30e05a4b5
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/tesc/array-control-point-initializer.asm.tesc
@@ -0,0 +1,65 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 3> _49 = spvUnsafeArray<float, 3>({ 0.0, 0.0, 0.0 });
+constant spvUnsafeArray<spvUnsafeArray<float, 3>, 4> _27 = spvUnsafeArray<spvUnsafeArray<float, 3>, 4>({ spvUnsafeArray<float, 3>({ 0.0, 0.0, 0.0 }), spvUnsafeArray<float, 3>({ 0.0, 0.0, 0.0 }), spvUnsafeArray<float, 3>({ 0.0, 0.0, 0.0 }), spvUnsafeArray<float, 3>({ 0.0, 0.0, 0.0 }) });
+
+struct main0_out
+{
+    spvUnsafeArray<float, 3> foo;
+    float4 gl_Position;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    gl_out[gl_InvocationID].foo = _27[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    gl_out[gl_InvocationID].foo[0] = float(gl_InvocationID);
+    gl_out[gl_InvocationID].foo[1] = float(gl_InvocationID) + 1.0;
+    gl_out[gl_InvocationID].foo[2] = float(gl_InvocationID) + 2.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/tesc/block-control-point-initializer.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/block-control-point-initializer.asm.tesc
new file mode 100644
index 00000000000..71498f5b201
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/tesc/block-control-point-initializer.asm.tesc
@@ -0,0 +1,70 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Verts
+{
+    float a;
+    float2 b;
+};
+
+struct main0_out
+{
+    float verts_a;
+    float2 verts_b;
+    float4 gl_Position;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<Verts, 4> _27 = spvUnsafeArray<Verts, 4>({ Verts{ 0.0, float2(0.0) }, Verts{ 0.0, float2(0.0) }, Verts{ 0.0, float2(0.0) }, Verts{ 0.0, float2(0.0) } });
+    
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    gl_out[gl_InvocationID].verts_a = _27[gl_InvocationID].a;
+    gl_out[gl_InvocationID].verts_b = _27[gl_InvocationID].b;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    gl_out[gl_InvocationID].verts_a = float(gl_InvocationID);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/tesc/builtin-control-point-initializer.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/builtin-control-point-initializer.asm.tesc
new file mode 100644
index 00000000000..d8b74bfa331
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/tesc/builtin-control-point-initializer.asm.tesc
@@ -0,0 +1,80 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex
+{
+    float4 _RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    float _RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+};
+
+struct Verts
+{
+    float a;
+    float2 b;
+};
+
+struct main0_out
+{
+    float verts_a;
+    float2 verts_b;
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4> _17 = spvUnsafeArray<_RESERVED_IDENTIFIER_FIXUP_gl_PerVertex, 4>({ _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0 }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0 }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0 }, _RESERVED_IDENTIFIER_FIXUP_gl_PerVertex{ float4(0.0), 0.0 } });
+    spvUnsafeArray<Verts, 4> _27 = spvUnsafeArray<Verts, 4>({ Verts{ 0.0, float2(0.0) }, Verts{ 0.0, float2(0.0) }, Verts{ 0.0, float2(0.0) }, Verts{ 0.0, float2(0.0) } });
+    
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    gl_out[gl_InvocationID].gl_Position = _17[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_Position;
+    gl_out[gl_InvocationID].gl_PointSize = _17[gl_InvocationID]._RESERVED_IDENTIFIER_FIXUP_gl_PointSize;
+    gl_out[gl_InvocationID].verts_a = _27[gl_InvocationID].a;
+    gl_out[gl_InvocationID].verts_b = _27[gl_InvocationID].b;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    gl_out[gl_InvocationID].verts_a = float(gl_InvocationID);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/tesc/composite-control-point-initializer.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/composite-control-point-initializer.asm.tesc
new file mode 100644
index 00000000000..a10731bb283
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/tesc/composite-control-point-initializer.asm.tesc
@@ -0,0 +1,69 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Foo
+{
+    float a;
+    float2 b;
+    float4 c;
+};
+
+struct main0_out
+{
+    Foo foo;
+    float4 gl_Position;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    spvUnsafeArray<Foo, 4> _27 = spvUnsafeArray<Foo, 4>({ Foo{ 0.0, float2(0.0), float4(0.0) }, Foo{ 0.0, float2(0.0), float4(0.0) }, Foo{ 0.0, float2(0.0), float4(0.0) }, Foo{ 0.0, float2(0.0), float4(0.0) } });
+    
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    gl_out[gl_InvocationID].foo = _27[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    gl_out[gl_InvocationID].foo.a = float(gl_InvocationID);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/tesc/copy-memory-control-point.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/copy-memory-control-point.asm.tesc
new file mode 100644
index 00000000000..b7246ac570b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/tesc/copy-memory-control-point.asm.tesc
@@ -0,0 +1,125 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct cb1_struct
+{
+    float4 _m0[1];
+};
+
+struct main0_out
+{
+    float3 vocp0;
+    float4 vocp1;
+};
+
+struct main0_in
+{
+    float4 v0 [[attribute(0)]];
+    float4 v1 [[attribute(1)]];
+    float3 vicp0 [[attribute(2)]];
+    float4 vicp1 [[attribute(4)]];
+};
+
+static inline __attribute__((always_inline))
+void fork0_epilogue(thread const float4& _87, thread const float4& _88, thread const float4& _89, device half (&gl_TessLevelOuter)[3])
+{
+    gl_TessLevelOuter[0u] = half(_87.x);
+    gl_TessLevelOuter[1u] = half(_88.x);
+    gl_TessLevelOuter[2u] = half(_89.x);
+}
+
+static inline __attribute__((always_inline))
+void fork0(uint vForkInstanceId, device half (&gl_TessLevelOuter)[3], thread spvUnsafeArray<float4, 4>& opc, constant cb1_struct& cb0_0, thread float4& v_48, thread float4& v_49, thread float4& v_50)
+{
+    float4 r0;
+    r0.x = as_type<float>(vForkInstanceId);
+    opc[as_type<int>(r0.x)].x = cb0_0._m0[0u].x;
+    v_48 = opc[0u];
+    v_49 = opc[1u];
+    v_50 = opc[2u];
+    fork0_epilogue(v_48, v_49, v_50, gl_TessLevelOuter);
+}
+
+static inline __attribute__((always_inline))
+void fork1_epilogue(thread const float4& _109, device half &gl_TessLevelInner)
+{
+    gl_TessLevelInner = half(_109.x);
+}
+
+static inline __attribute__((always_inline))
+void fork1(device half &gl_TessLevelInner, thread spvUnsafeArray<float4, 4>& opc, constant cb1_struct& cb0_0, thread float4& v_56)
+{
+    opc[3u].x = cb0_0._m0[0u].x;
+    v_56 = opc[3u];
+    fork1_epilogue(v_56, gl_TessLevelInner);
+}
+
+kernel void main0(main0_in in [[stage_in]], constant cb1_struct& cb0_0 [[buffer(0)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 3)
+        return;
+    spvUnsafeArray<spvUnsafeArray<float4, 3>, 2> vicp;
+    spvUnsafeArray<float4, 3> _153 = spvUnsafeArray<float4, 3>({ gl_in[0].v0, gl_in[1].v0, gl_in[2].v0 });
+    vicp[0u] = _153;
+    spvUnsafeArray<float4, 3> _154 = spvUnsafeArray<float4, 3>({ gl_in[0].v1, gl_in[1].v1, gl_in[2].v1 });
+    vicp[1u] = _154;
+    gl_out[gl_InvocationID].vocp0 = gl_in[gl_InvocationID].vicp0;
+    gl_out[gl_InvocationID].vocp1 = gl_in[gl_InvocationID].vicp1;
+    spvUnsafeArray<float4, 4> opc;
+    float4 v_48;
+    float4 v_49;
+    float4 v_50;
+    fork0(0u, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor, opc, cb0_0, v_48, v_49, v_50);
+    fork0(1u, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor, opc, cb0_0, v_48, v_49, v_50);
+    fork0(2u, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor, opc, cb0_0, v_48, v_49, v_50);
+    float4 v_56;
+    fork1(spvTessLevel[gl_PrimitiveID].insideTessellationFactor, opc, cb0_0, v_56);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/tesc/copy-tess-level-tri.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/copy-tess-level-tri.asm.tesc
new file mode 100644
index 00000000000..a492cb829cf
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/tesc/copy-tess-level-tri.asm.tesc
@@ -0,0 +1,68 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 2> _19 = spvUnsafeArray<float, 2>({ 1.0, 2.0 });
+constant spvUnsafeArray<float, 4> _25 = spvUnsafeArray<float, 4>({ 1.0, 2.0, 3.0, 4.0 });
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 1];
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_19[0]);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_25[0]);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_25[1]);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_25[2]);
+    spvUnsafeArray<float, 2> inner;
+    inner = spvUnsafeArray<float, 2>({ float(spvTessLevel[gl_PrimitiveID].insideTessellationFactor), 0.0 });
+    spvUnsafeArray<float, 4> outer;
+    outer = spvUnsafeArray<float, 4>({ float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), 0.0 });
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/tesc/plain-control-point-initializer.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/plain-control-point-initializer.asm.tesc
new file mode 100644
index 00000000000..0c6c1dc813a
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/tesc/plain-control-point-initializer.asm.tesc
@@ -0,0 +1,62 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 4> _25 = spvUnsafeArray<float, 4>({ 0.0, 0.0, 0.0, 0.0 });
+
+struct main0_out
+{
+    float v;
+    float4 gl_Position;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    gl_out[gl_InvocationID].v = _25[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    gl_out[gl_InvocationID].v = float(gl_InvocationID);
+}
+
diff --git a/reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
similarity index 60%
rename from reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
rename to reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
index 6a765117e4e..bdbd4bef873 100644
--- a/reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
+++ b/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
@@ -1,10 +1,49 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct VertexOutput
 {
     float4 pos;
@@ -19,7 +58,7 @@ struct HSOut
 
 struct HSConstantOut
 {
-    float EdgeTess[3];
+    spvUnsafeArray<float, 3> EdgeTess;
     float InsideTess;
 };
 
@@ -41,24 +80,12 @@ struct main0_out
 
 struct main0_in
 {
-    float2 VertexOutput_uv [[attribute(0)]];
+    float2 p_uv [[attribute(0)]];
     float4 gl_Position [[attribute(1)]];
 };
 
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-HSOut _hs_main(thread const VertexOutput (&p)[3], thread const uint& i)
+static inline __attribute__((always_inline))
+HSOut _hs_main(thread const spvUnsafeArray<VertexOutput, 3>& p, thread const uint& i)
 {
     HSOut _output;
     _output.pos = p[i].pos;
@@ -66,7 +93,8 @@ HSOut _hs_main(thread const VertexOutput (&p)[3], thread const uint& i)
     return _output;
 }
 
-HSConstantOut PatchHS(thread const VertexOutput (&_patch)[3])
+static inline __attribute__((always_inline))
+HSConstantOut PatchHS(thread const spvUnsafeArray<VertexOutput, 3>& _patch)
 {
     HSConstantOut _output;
     _output.EdgeTess[0] = (float2(1.0) + _patch[0].uv).x;
@@ -84,25 +112,25 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
     threadgroup_barrier(mem_flags::mem_threadgroup);
     if (gl_InvocationID >= 3)
         return;
-    VertexOutput p[3];
+    spvUnsafeArray<VertexOutput, 3> p;
     p[0].pos = gl_in[0].gl_Position;
-    p[0].uv = gl_in[0].VertexOutput_uv;
+    p[0].uv = gl_in[0].p_uv;
     p[1].pos = gl_in[1].gl_Position;
-    p[1].uv = gl_in[1].VertexOutput_uv;
+    p[1].uv = gl_in[1].p_uv;
     p[2].pos = gl_in[2].gl_Position;
-    p[2].uv = gl_in[2].VertexOutput_uv;
+    p[2].uv = gl_in[2].p_uv;
     uint i = gl_InvocationID;
-    VertexOutput param[3];
-    spvArrayCopyFromStack1(param, p);
+    spvUnsafeArray<VertexOutput, 3> param;
+    param = p;
     uint param_1 = i;
     HSOut flattenTemp = _hs_main(param, param_1);
     gl_out[gl_InvocationID].gl_Position = flattenTemp.pos;
     gl_out[gl_InvocationID]._entryPointOutput.uv = flattenTemp.uv;
-    threadgroup_barrier(mem_flags::mem_device);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
     if (int(gl_InvocationID) == 0)
     {
-        VertexOutput param_2[3];
-        spvArrayCopyFromStack1(param_2, p);
+        spvUnsafeArray<VertexOutput, 3> param_2;
+        param_2 = p;
         HSConstantOut _patchConstantResult = PatchHS(param_2);
         spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_patchConstantResult.EdgeTess[0]);
         spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_patchConstantResult.EdgeTess[1]);
diff --git a/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc
new file mode 100644
index 00000000000..dabe1b3857a
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc
@@ -0,0 +1,140 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct VertexOutput
+{
+    float4 pos;
+    float2 uv;
+};
+
+struct HSOut
+{
+    float4 pos;
+    float2 uv;
+};
+
+struct HSConstantOut
+{
+    spvUnsafeArray<float, 3> EdgeTess;
+    float InsideTess;
+};
+
+struct VertexOutput_1
+{
+    float2 uv;
+};
+
+struct HSOut_1
+{
+    float2 uv;
+};
+
+struct main0_out
+{
+    HSOut_1 _entryPointOutput;
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    VertexOutput_1 p;
+    ushort2 m_171;
+    float4 gl_Position;
+};
+
+static inline __attribute__((always_inline))
+HSOut _hs_main(thread const spvUnsafeArray<VertexOutput, 3>& p, thread const uint& i)
+{
+    HSOut _output;
+    _output.pos = p[i].pos;
+    _output.uv = p[i].uv;
+    return _output;
+}
+
+static inline __attribute__((always_inline))
+HSConstantOut PatchHS(thread const spvUnsafeArray<VertexOutput, 3>& _patch)
+{
+    HSConstantOut _output;
+    _output.EdgeTess[0] = (float2(1.0) + _patch[0].uv).x;
+    _output.EdgeTess[1] = (float2(1.0) + _patch[0].uv).x;
+    _output.EdgeTess[2] = (float2(1.0) + _patch[0].uv).x;
+    _output.InsideTess = (float2(1.0) + _patch[0].uv).x;
+    return _output;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 3];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 3;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1);
+    spvUnsafeArray<VertexOutput, 3> p;
+    p[0].pos = gl_in[0].gl_Position;
+    p[0].uv = gl_in[0].p.uv;
+    p[1].pos = gl_in[1].gl_Position;
+    p[1].uv = gl_in[1].p.uv;
+    p[2].pos = gl_in[2].gl_Position;
+    p[2].uv = gl_in[2].p.uv;
+    uint i = gl_InvocationID;
+    spvUnsafeArray<VertexOutput, 3> param;
+    param = p;
+    uint param_1 = i;
+    HSOut flattenTemp = _hs_main(param, param_1);
+    gl_out[gl_InvocationID].gl_Position = flattenTemp.pos;
+    gl_out[gl_InvocationID]._entryPointOutput.uv = flattenTemp.uv;
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
+    if (int(gl_InvocationID) == 0)
+    {
+        spvUnsafeArray<VertexOutput, 3> param_2;
+        param_2 = p;
+        HSConstantOut _patchConstantResult = PatchHS(param_2);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_patchConstantResult.EdgeTess[0]);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_patchConstantResult.EdgeTess[1]);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_patchConstantResult.EdgeTess[2]);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_patchConstantResult.InsideTess);
+    }
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/tesc/tess-level-initializer-quad.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/tess-level-initializer-quad.asm.tesc
new file mode 100644
index 00000000000..d6d0bc01496
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/tesc/tess-level-initializer-quad.asm.tesc
@@ -0,0 +1,78 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 2> _27 = spvUnsafeArray<float, 2>({ 0.0, 0.0 });
+constant spvUnsafeArray<float, 4> _33 = spvUnsafeArray<float, 4>({ 0.0, 0.0, 0.0, 0.0 });
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(0.0);
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(2.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(3.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(4.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(5.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(6.0);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/tesc/tess-level-initializer-triangle.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/tess-level-initializer-triangle.asm.tesc
new file mode 100644
index 00000000000..979f4329f5f
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/tesc/tess-level-initializer-triangle.asm.tesc
@@ -0,0 +1,72 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 2> _27 = spvUnsafeArray<float, 2>({ 0.0, 0.0 });
+constant spvUnsafeArray<float, 4> _33 = spvUnsafeArray<float, 4>({ 0.0, 0.0, 0.0, 0.0 });
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(0.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(0.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(0.0);
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(3.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(4.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(5.0);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/tesc/tess-level-read-write-in-function-tri.asm.tesc b/reference/shaders-msl-no-opt/asm/tesc/tess-level-read-write-in-function-tri.asm.tesc
new file mode 100644
index 00000000000..b6e40bc835d
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/tesc/tess-level-read-write-in-function-tri.asm.tesc
@@ -0,0 +1,35 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+static inline __attribute__((always_inline))
+void store_tess_level_in_func(device half &gl_TessLevelInner, device half (&gl_TessLevelOuter)[3])
+{
+    gl_TessLevelInner = half(1.0);
+    gl_TessLevelOuter[0] = half(3.0);
+    gl_TessLevelOuter[1] = half(4.0);
+    gl_TessLevelOuter[2] = half(5.0);
+}
+
+static inline __attribute__((always_inline))
+float load_tess_level_in_func(device half &gl_TessLevelInner, device half (&gl_TessLevelOuter)[3])
+{
+    return float(gl_TessLevelInner) + float(gl_TessLevelOuter[1]);
+}
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 1];
+    store_tess_level_in_func(spvTessLevel[gl_PrimitiveID].insideTessellationFactor, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor);
+    float v = load_tess_level_in_func(spvTessLevel[gl_PrimitiveID].insideTessellationFactor, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor);
+    gl_out[gl_InvocationID].gl_Position = float4(v);
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/tese/copy-tess-level.asm.msl2.tese b/reference/shaders-msl-no-opt/asm/tese/copy-tess-level.asm.msl2.tese
new file mode 100644
index 00000000000..7fa0f5a4e45
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/tese/copy-tess-level.asm.msl2.tese
@@ -0,0 +1,73 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 out_var_CUSTOM_VALUE [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_patchIn
+{
+    float4 gl_TessLevelOuter [[attribute(0)]];
+    float2 gl_TessLevelInner [[attribute(1)]];
+};
+
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2];
+    gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3];
+    gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0];
+    gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1];
+    out.out_var_CUSTOM_VALUE = float4(gl_TessLevelOuter[0] + gl_TessLevelInner[0], gl_TessLevelOuter[1] + gl_TessLevelInner[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/tese/split-access-chain.asm.tese b/reference/shaders-msl-no-opt/asm/tese/split-access-chain.asm.tese
new file mode 100644
index 00000000000..05a81133310
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/tese/split-access-chain.asm.tese
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float o0 [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 in0 [[attribute(0)]];
+};
+
+struct main0_patchIn
+{
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]])
+{
+    main0_out out = {};
+    out.o0 = patchIn.gl_in[0u].in0.z;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/vert/block-struct-initializer.asm.vert b/reference/shaders-msl-no-opt/asm/vert/block-struct-initializer.asm.vert
new file mode 100644
index 00000000000..6c8f9382701
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/vert/block-struct-initializer.asm.vert
@@ -0,0 +1,39 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Vert
+{
+    float a;
+    float b;
+};
+
+struct Foo
+{
+    float c;
+    float d;
+};
+
+struct main0_out
+{
+    float m_3_a [[user(locn0)]];
+    float m_3_b [[user(locn1)]];
+    float foo_c [[user(locn2)]];
+    float foo_d [[user(locn3)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    Vert _3 = Vert{ 0.0, 0.0 };
+    Foo foo = Foo{ 0.0, 0.0 };
+    out.gl_Position = float4(0.0);
+    out.m_3_a = _3.a;
+    out.m_3_b = _3.b;
+    out.foo_c = foo.c;
+    out.foo_d = foo.d;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/vert/builtin-output-initializer.asm.vert b/reference/shaders-msl-no-opt/asm/vert/builtin-output-initializer.asm.vert
new file mode 100644
index 00000000000..54b88ba9c6b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/vert/builtin-output-initializer.asm.vert
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    float gl_PointSize [[point_size]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    out.gl_Position = float4(0.0);
+    out.gl_PointSize = 0.0;
+    out.gl_Position = float4(1.0);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/vert/composite-extract-physical-type-id.asm.vert b/reference/shaders-msl-no-opt/asm/vert/composite-extract-physical-type-id.asm.vert
new file mode 100644
index 00000000000..ea89378b10a
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/vert/composite-extract-physical-type-id.asm.vert
@@ -0,0 +1,31 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_Float2Array
+{
+    float4 arr[3];
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+static inline __attribute__((always_inline))
+float4 src_VSMain(thread const uint& i, constant type_Float2Array& Float2Array)
+{
+    return float4(Float2Array.arr[i].x, Float2Array.arr[i].y, 0.0, 1.0);
+}
+
+vertex main0_out main0(constant type_Float2Array& Float2Array [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    uint param_var_i = gl_VertexIndex;
+    out.gl_Position = src_VSMain(param_var_i, Float2Array);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/vert/constant-composite-block-no-array-stride.asm.vert b/reference/shaders-msl-no-opt/asm/vert/constant-composite-block-no-array-stride.asm.vert
new file mode 100644
index 00000000000..07bcb9f1019
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/vert/constant-composite-block-no-array-stride.asm.vert
@@ -0,0 +1,89 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _14
+{
+    float _m0[3];
+};
+
+struct _15
+{
+    float _m0[3];
+};
+
+constant spvUnsafeArray<float, 3> _93 = spvUnsafeArray<float, 3>({ 1.0, 2.0, 1.0 });
+constant spvUnsafeArray<float, 3> _94 = spvUnsafeArray<float, 3>({ -1.0, -2.0, -1.0 });
+
+struct main0_out
+{
+    float4 m_4 [[user(locn1)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 m_3 [[attribute(0)]];
+    float4 m_5 [[attribute(1)]];
+};
+
+static inline __attribute__((always_inline))
+float4 _102(float4 _107)
+{
+    float4 _109 = _107;
+    _14 _110 = _14{ { 1.0, 2.0, 1.0 } };
+    _15 _111 = _15{ { -1.0, -2.0, -1.0 } };
+    _109.y = (_110._m0[2] + _111._m0[2]) + _109.y;
+    return _109;
+}
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.gl_Position = in.m_3;
+    out.m_4 = _102(in.m_5);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/vert/duplicate-view-index.asm.vert b/reference/shaders-msl-no-opt/asm/vert/duplicate-view-index.asm.vert
new file mode 100644
index 00000000000..f007a67f226
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/vert/duplicate-view-index.asm.vert
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    uint gl_Layer [[render_target_array_index]];
+};
+
+vertex main0_out main0(uint gl_InstanceIndex [[instance_id]], uint gl_BaseInstance [[base_instance]])
+{
+    main0_out out = {};
+    const uint gl_ViewIndex = 0;
+    out.gl_Position = float4(float(int(gl_ViewIndex)));
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert b/reference/shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert
index e9cd6a540aa..95b61a40dbe 100644
--- a/reference/shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert
+++ b/reference/shaders-msl-no-opt/asm/vert/empty-struct-composite.asm.vert
@@ -5,12 +5,9 @@ using namespace metal;
 
 struct Test
 {
-    int empty_struct_member;
 };
-
 vertex void main0()
 {
-    Test _14 = Test{ 0 };
-    Test t = _14;
+    Test t = Test{  };
 }
 
diff --git a/reference/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag b/reference/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag
index e4f09e890e7..18d98993e5f 100644
--- a/reference/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag
+++ b/reference/shaders-msl-no-opt/asm/vert/op-load-forced-temporary-array.asm.frag
@@ -1,10 +1,49 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 constant float _21 = {};
 
 struct main0_out
@@ -12,35 +51,20 @@ struct main0_out
     float4 gl_Position [[position]];
 };
 
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
 vertex main0_out main0()
 {
     main0_out out = {};
-    float _23[2];
+    spvUnsafeArray<float, 2> _23;
     for (int _25 = 0; _25 < 2; )
     {
         _23[_25] = 0.0;
         _25++;
         continue;
     }
-    float _31[2];
-    spvArrayCopyFromStack1(_31, _23);
     float _37;
     if (as_type<uint>(3.0) != 0u)
     {
-        _37 = _31[0];
+        _37 = _23[0];
     }
     else
     {
diff --git a/reference/shaders-msl-no-opt/asm/vert/pointer-to-pointer.asm.vert b/reference/shaders-msl-no-opt/asm/vert/pointer-to-pointer.asm.vert
new file mode 100644
index 00000000000..750afcf25bf
--- /dev/null
+++ b/reference/shaders-msl-no-opt/asm/vert/pointer-to-pointer.asm.vert
@@ -0,0 +1,20 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+device float* thread * constant _9 = {};
+
+static inline __attribute__((always_inline))
+void _10(device float* thread * const thread & _11)
+{
+}
+
+vertex void main0()
+{
+    device float* thread * _14 = _9;
+    _10(_14);
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp b/reference/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp
new file mode 100644
index 00000000000..eab1df4fce9
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp
@@ -0,0 +1,172 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 1u, 1u);
+
+kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
+{
+    threadgroup float shared_group[8][8];
+    threadgroup float shared_group_alt[8][8];
+    spvUnsafeArray<float, 8> blob;
+    for (int i = 0; i < 8; i++)
+    {
+        blob[i] = float(i);
+    }
+    spvArrayCopyFromStackToThreadGroup1(shared_group[gl_LocalInvocationIndex], blob.elements);
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    spvUnsafeArray<float, 8> copied_blob;
+    spvArrayCopyFromThreadGroupToStack1(copied_blob.elements, shared_group[gl_LocalInvocationIndex ^ 1u]);
+    spvArrayCopyFromThreadGroupToThreadGroup1(shared_group_alt[gl_LocalInvocationIndex], shared_group[gl_LocalInvocationIndex]);
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/basic.dynamic-buffer.msl2.invalid.comp b/reference/shaders-msl-no-opt/comp/basic.dynamic-buffer.msl2.invalid.comp
new file mode 100644
index 00000000000..ae8c5b02953
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/basic.dynamic-buffer.msl2.invalid.comp
@@ -0,0 +1,90 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Baz
+{
+    int e;
+    int f;
+};
+
+struct Foo
+{
+    int a;
+    int b;
+};
+
+struct Bar
+{
+    int c;
+    int d;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u);
+
+struct spvDescriptorSetBuffer0
+{
+    constant Foo* m_34 [[id(0)]];
+    constant Bar* m_40 [[id(1)]];
+};
+
+struct spvDescriptorSetBuffer1
+{
+    device Baz* baz [[id(0)]][3][3][2];
+};
+
+kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvDynamicOffsets [[buffer(23)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    constant auto& _34 = *(constant Foo* )((constant char* )spvDescriptorSet0.m_34 + spvDynamicOffsets[0]);
+    device Baz* baz[3][3][2] =
+    {
+        {
+            {
+                (device Baz* )((device char* )spvDescriptorSet1.baz[0][0][0] + spvDynamicOffsets[1]),
+                (device Baz* )((device char* )spvDescriptorSet1.baz[0][0][1] + spvDynamicOffsets[2]),
+            },
+            {
+                (device Baz* )((device char* )spvDescriptorSet1.baz[0][1][0] + spvDynamicOffsets[3]),
+                (device Baz* )((device char* )spvDescriptorSet1.baz[0][1][1] + spvDynamicOffsets[4]),
+            },
+            {
+                (device Baz* )((device char* )spvDescriptorSet1.baz[0][2][0] + spvDynamicOffsets[5]),
+                (device Baz* )((device char* )spvDescriptorSet1.baz[0][2][1] + spvDynamicOffsets[6]),
+            },
+        },
+        {
+            {
+                (device Baz* )((device char* )spvDescriptorSet1.baz[1][0][0] + spvDynamicOffsets[7]),
+                (device Baz* )((device char* )spvDescriptorSet1.baz[1][0][1] + spvDynamicOffsets[8]),
+            },
+            {
+                (device Baz* )((device char* )spvDescriptorSet1.baz[1][1][0] + spvDynamicOffsets[9]),
+                (device Baz* )((device char* )spvDescriptorSet1.baz[1][1][1] + spvDynamicOffsets[10]),
+            },
+            {
+                (device Baz* )((device char* )spvDescriptorSet1.baz[1][2][0] + spvDynamicOffsets[11]),
+                (device Baz* )((device char* )spvDescriptorSet1.baz[1][2][1] + spvDynamicOffsets[12]),
+            },
+        },
+        {
+            {
+                (device Baz* )((device char* )spvDescriptorSet1.baz[2][0][0] + spvDynamicOffsets[13]),
+                (device Baz* )((device char* )spvDescriptorSet1.baz[2][0][1] + spvDynamicOffsets[14]),
+            },
+            {
+                (device Baz* )((device char* )spvDescriptorSet1.baz[2][1][0] + spvDynamicOffsets[15]),
+                (device Baz* )((device char* )spvDescriptorSet1.baz[2][1][1] + spvDynamicOffsets[16]),
+            },
+            {
+                (device Baz* )((device char* )spvDescriptorSet1.baz[2][2][0] + spvDynamicOffsets[17]),
+                (device Baz* )((device char* )spvDescriptorSet1.baz[2][2][1] + spvDynamicOffsets[18]),
+            },
+        },
+    };
+
+    uint3 coords = gl_GlobalInvocationID;
+    baz[coords.x][coords.y][coords.z]->e = _34.a + (*spvDescriptorSet0.m_40).c;
+    baz[coords.x][coords.y][coords.z]->f = _34.b * (*spvDescriptorSet0.m_40).d;
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/bda-restrict-pointer-variable.msl2.comp b/reference/shaders-msl-no-opt/comp/bda-restrict-pointer-variable.msl2.comp
new file mode 100644
index 00000000000..bf26b3b280b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/bda-restrict-pointer-variable.msl2.comp
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Ref;
+
+struct Ref
+{
+    float4 v;
+};
+
+struct Registers
+{
+    device Ref* foo;
+};
+
+kernel void main0(constant Registers& _14 [[buffer(0)]])
+{
+    device Ref* __restrict ref = _14.foo;
+    ref->v = float4(1.0);
+}
+
diff --git a/reference/shaders-msl/comp/bitcast-16bit-1.invalid.comp b/reference/shaders-msl-no-opt/comp/bitcast-16bit-1.invalid.comp
similarity index 54%
rename from reference/shaders-msl/comp/bitcast-16bit-1.invalid.comp
rename to reference/shaders-msl-no-opt/comp/bitcast-16bit-1.invalid.comp
index 170e4920e7b..2e86f996c9b 100644
--- a/reference/shaders-msl/comp/bitcast-16bit-1.invalid.comp
+++ b/reference/shaders-msl-no-opt/comp/bitcast-16bit-1.invalid.comp
@@ -13,12 +13,14 @@ struct SSBO1
     int4 outputs[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO0& _25 [[buffer(0)]], device SSBO1& _39 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
     half2 a = as_type<half2>(_25.inputs[ident].xy);
-    _39.outputs[ident].x = int(as_type<uint>(a + half2(half(1.0))));
-    _39.outputs[ident].y = as_type<int>(_25.inputs[ident].zw);
-    _39.outputs[ident].z = int(as_type<uint>(ushort2(_25.inputs[ident].xy)));
+    ((device int*)&_39.outputs[ident])[0u] = int(as_type<uint>(a + half2(half(1.0))));
+    ((device int*)&_39.outputs[ident])[1u] = as_type<int>(_25.inputs[ident].zw);
+    ((device int*)&_39.outputs[ident])[2u] = int(as_type<uint>(ushort2(_25.inputs[ident].xy)));
 }
 
diff --git a/reference/shaders-msl-no-opt/comp/bitcast-16bit-2.invalid.comp b/reference/shaders-msl-no-opt/comp/bitcast-16bit-2.invalid.comp
new file mode 100644
index 00000000000..fa65e3bb4de
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/bitcast-16bit-2.invalid.comp
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO1
+{
+    short4 outputs[1];
+};
+
+struct SSBO0
+{
+    int4 inputs[1];
+};
+
+struct UBO
+{
+    half4 const0;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO1& _21 [[buffer(0)]], device SSBO0& _29 [[buffer(1)]], constant UBO& _40 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    uint ident = gl_GlobalInvocationID.x;
+    int _33 = ((device int*)&_29.inputs[ident])[0u];
+    short2 _47 = as_type<short2>(_33) + as_type<short2>(_40.const0.xy);
+    ((device short*)&_21.outputs[ident])[0u] = _47.x;
+    ((device short*)&_21.outputs[ident])[1u] = _47.y;
+    int _57 = ((device int*)&_29.inputs[ident])[1u];
+    short2 _67 = short2(as_type<ushort2>(uint(_57)) - as_type<ushort2>(_40.const0.zw));
+    ((device short*)&_21.outputs[ident])[2u] = _67.x;
+    ((device short*)&_21.outputs[ident])[3u] = _67.y;
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/bitfield.comp b/reference/shaders-msl-no-opt/comp/bitfield.comp
index dbc27f02be9..7f797add160 100644
--- a/reference/shaders-msl-no-opt/comp/bitfield.comp
+++ b/reference/shaders-msl-no-opt/comp/bitfield.comp
@@ -7,14 +7,14 @@ using namespace metal;
 
 // Implementation of the GLSL findLSB() function
 template<typename T>
-T findLSB(T x)
+inline T spvFindLSB(T x)
 {
     return select(ctz(x), T(-1), x == T(0));
 }
 
 // Implementation of the signed GLSL findMSB() function
 template<typename T>
-T findSMSB(T x)
+inline T spvFindSMSB(T x)
 {
     T v = select(x, T(-1) - x, x < T(0));
     return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));
@@ -22,7 +22,7 @@ T findSMSB(T x)
 
 // Implementation of the unsigned GLSL findMSB() function
 template<typename T>
-T findUMSB(T x)
+inline T spvFindUMSB(T x)
 {
     return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));
 }
@@ -31,17 +31,17 @@ kernel void main0()
 {
     int signed_value = 0;
     uint unsigned_value = 0u;
-    int s = extract_bits(signed_value, 5, 20);
-    uint u = extract_bits(unsigned_value, 6, 21);
-    s = insert_bits(s, 40, 5, 4);
-    u = insert_bits(u, 60u, 5, 4);
+    int s = extract_bits(signed_value, uint(5), uint(20));
+    uint u = extract_bits(unsigned_value, uint(6), uint(21));
+    s = insert_bits(s, 40, uint(5), uint(4));
+    u = insert_bits(u, 60u, uint(5), uint(4));
     u = reverse_bits(u);
     s = reverse_bits(s);
-    int v0 = popcount(u);
+    int v0 = int(popcount(u));
     int v1 = popcount(s);
-    int v2 = int(findUMSB(u));
-    int v3 = findSMSB(s);
-    int v4 = findLSB(u);
-    int v5 = findLSB(s);
+    int v2 = int(spvFindUMSB(u));
+    int v3 = spvFindSMSB(s);
+    int v4 = int(spvFindLSB(u));
+    int v5 = spvFindLSB(s);
 }
 
diff --git a/reference/shaders-msl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.msl23.comp b/reference/shaders-msl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.msl23.comp
new file mode 100644
index 00000000000..7864e0fb768
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.msl23.comp
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO;
+
+struct S
+{
+    float3 v;
+};
+
+struct SSBO
+{
+    S s[1];
+};
+
+struct PC
+{
+    uint2 ptr;
+};
+
+kernel void main0(constant PC& pc [[buffer(0)]])
+{
+    device SSBO* ssbo = reinterpret_cast<device SSBO*>(as_type<ulong>(pc.ptr));
+    ssbo->s[0].v = float3(1.0);
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/glsl.std450.comp b/reference/shaders-msl-no-opt/comp/glsl.std450.comp
new file mode 100644
index 00000000000..b1790b23df6
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/glsl.std450.comp
@@ -0,0 +1,289 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+// Implementation of the GLSL radians() function
+template<typename T>
+inline T radians(T d)
+{
+    return d * T(0.01745329251);
+}
+
+// Implementation of the GLSL degrees() function
+template<typename T>
+inline T degrees(T r)
+{
+    return r * T(57.2957795131);
+}
+
+// Implementation of the GLSL findLSB() function
+template<typename T>
+inline T spvFindLSB(T x)
+{
+    return select(ctz(x), T(-1), x == T(0));
+}
+
+// Implementation of the signed GLSL findMSB() function
+template<typename T>
+inline T spvFindSMSB(T x)
+{
+    T v = select(x, T(-1) - x, x < T(0));
+    return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));
+}
+
+// Implementation of the unsigned GLSL findMSB() function
+template<typename T>
+inline T spvFindUMSB(T x)
+{
+    return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));
+}
+
+// Implementation of the GLSL sign() function for integer types
+template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>
+inline T sign(T x)
+{
+    return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));
+}
+
+// Returns the determinant of a 2x2 matrix.
+static inline __attribute__((always_inline))
+float spvDet2x2(float a1, float a2, float b1, float b2)
+{
+    return a1 * b2 - b1 * a2;
+}
+
+// Returns the determinant of a 3x3 matrix.
+static inline __attribute__((always_inline))
+float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
+{
+    return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3);
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+static inline __attribute__((always_inline))
+float4x4 spvInverse4x4(float4x4 m)
+{
+    float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][2] =  spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]);
+    adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]);
+
+    adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][1] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]);
+    adj[1][3] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]);
+
+    adj[2][0] =  spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][2] =  spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]);
+    adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]);
+
+    adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][1] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]);
+    adj[3][3] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] * m[3][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+static inline __attribute__((always_inline))
+float3x3 spvInverse3x3(float3x3 m)
+{
+    float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);
+    adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);
+    adj[0][2] =  spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);
+
+    adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);
+    adj[1][1] =  spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);
+    adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);
+
+    adj[2][0] =  spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);
+    adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);
+    adj[2][2] =  spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+// Returns the inverse of a matrix, by using the algorithm of calculating the classical
+// adjoint and dividing by the determinant. The contents of the matrix are changed.
+static inline __attribute__((always_inline))
+float2x2 spvInverse2x2(float2x2 m)
+{
+    float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)
+
+    // Create the transpose of the cofactors, as the classical adjoint of the matrix.
+    adj[0][0] =  m[1][1];
+    adj[0][1] = -m[0][1];
+
+    adj[1][0] = -m[1][0];
+    adj[1][1] =  m[0][0];
+
+    // Calculate the determinant as a combination of the cofactors of the first row.
+    float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);
+
+    // Divide the classical adjoint matrix by the determinant.
+    // If determinant is zero, matrix is not invertable, so leave it unchanged.
+    return (det != 0.0f) ? (adj * (1.0f / det)) : m;
+}
+
+template<typename T>
+[[clang::optnone]] T spvReflect(T i, T n)
+{
+    return i - T(2) * i * n * n;
+}
+
+template<typename T>
+inline T spvRefract(T i, T n, T eta)
+{
+    T NoI = n * i;
+    T NoI2 = NoI * NoI;
+    T k = T(1) - eta * eta * (T(1) - NoI2);
+    if (k < T(0))
+    {
+        return T(0);
+    }
+    else
+    {
+        return eta * i - (eta * NoI + sqrt(k)) * n;
+    }
+}
+
+template<typename T>
+inline T spvFaceForward(T n, T i, T nref)
+{
+    return i * nref < T(0) ? n : -n;
+}
+
+struct SSBO
+{
+    float res;
+    int ires;
+    uint ures;
+    float4 f32;
+    int4 s32;
+    uint4 u32;
+    float2x2 m2;
+    float3x3 m3;
+    float4x4 m4;
+};
+
+struct ResType
+{
+    float _m0;
+    int _m1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _19 [[buffer(0)]])
+{
+    _19.res = round(((device float*)&_19.f32)[0u]);
+    _19.res = rint(((device float*)&_19.f32)[0u]);
+    _19.res = trunc(((device float*)&_19.f32)[0u]);
+    _19.res = abs(((device float*)&_19.f32)[0u]);
+    _19.ires = abs(((device int*)&_19.s32)[0u]);
+    _19.res = sign(((device float*)&_19.f32)[0u]);
+    _19.ires = sign(((device int*)&_19.s32)[0u]);
+    _19.res = floor(((device float*)&_19.f32)[0u]);
+    _19.res = ceil(((device float*)&_19.f32)[0u]);
+    _19.res = fract(((device float*)&_19.f32)[0u]);
+    _19.res = radians(((device float*)&_19.f32)[0u]);
+    _19.res = degrees(((device float*)&_19.f32)[0u]);
+    _19.res = sin(((device float*)&_19.f32)[0u]);
+    _19.res = cos(((device float*)&_19.f32)[0u]);
+    _19.res = tan(((device float*)&_19.f32)[0u]);
+    _19.res = asin(((device float*)&_19.f32)[0u]);
+    _19.res = acos(((device float*)&_19.f32)[0u]);
+    _19.res = atan(((device float*)&_19.f32)[0u]);
+    _19.res = fast::sinh(((device float*)&_19.f32)[0u]);
+    _19.res = fast::cosh(((device float*)&_19.f32)[0u]);
+    _19.res = precise::tanh(((device float*)&_19.f32)[0u]);
+    _19.res = asinh(((device float*)&_19.f32)[0u]);
+    _19.res = acosh(((device float*)&_19.f32)[0u]);
+    _19.res = atanh(((device float*)&_19.f32)[0u]);
+    _19.res = precise::atan2(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
+    _19.res = pow(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
+    _19.res = exp(((device float*)&_19.f32)[0u]);
+    _19.res = log(((device float*)&_19.f32)[0u]);
+    _19.res = exp2(((device float*)&_19.f32)[0u]);
+    _19.res = log2(((device float*)&_19.f32)[0u]);
+    _19.res = sqrt(((device float*)&_19.f32)[0u]);
+    _19.res = rsqrt(((device float*)&_19.f32)[0u]);
+    _19.res = abs(((device float*)&_19.f32)[0u]);
+    _19.res = abs(((device float*)&_19.f32)[0u] - ((device float*)&_19.f32)[1u]);
+    _19.res = sign(((device float*)&_19.f32)[0u]);
+    _19.res = spvFaceForward(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
+    _19.res = spvReflect(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
+    _19.res = spvRefract(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
+    _19.res = length(_19.f32.xy);
+    _19.res = distance(_19.f32.xy, _19.f32.zw);
+    float2 v2 = fast::normalize(_19.f32.xy);
+    v2 = faceforward(_19.f32.xy, _19.f32.yz, _19.f32.zw);
+    v2 = reflect(_19.f32.xy, _19.f32.zw);
+    v2 = refract(_19.f32.xy, _19.f32.yz, ((device float*)&_19.f32)[3u]);
+    float3 v3 = cross(_19.f32.xyz, _19.f32.yzw);
+    _19.res = determinant(_19.m2);
+    _19.res = determinant(_19.m3);
+    _19.res = determinant(_19.m4);
+    _19.m2 = spvInverse2x2(_19.m2);
+    _19.m3 = spvInverse3x3(_19.m3);
+    _19.m4 = spvInverse4x4(_19.m4);
+    float tmp;
+    float _287 = modf(((device float*)&_19.f32)[0u], tmp);
+    _19.res = _287;
+    _19.res = fast::min(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
+    _19.ures = min(((device uint*)&_19.u32)[0u], ((device uint*)&_19.u32)[1u]);
+    _19.ires = min(((device int*)&_19.s32)[0u], ((device int*)&_19.s32)[1u]);
+    _19.res = fast::max(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
+    _19.ures = max(((device uint*)&_19.u32)[0u], ((device uint*)&_19.u32)[1u]);
+    _19.ires = max(((device int*)&_19.s32)[0u], ((device int*)&_19.s32)[1u]);
+    _19.res = fast::clamp(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
+    _19.ures = clamp(((device uint*)&_19.u32)[0u], ((device uint*)&_19.u32)[1u], ((device uint*)&_19.u32)[2u]);
+    _19.ires = clamp(((device int*)&_19.s32)[0u], ((device int*)&_19.s32)[1u], ((device int*)&_19.s32)[2u]);
+    _19.res = mix(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
+    _19.res = step(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u]);
+    _19.res = smoothstep(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
+    _19.res = fma(((device float*)&_19.f32)[0u], ((device float*)&_19.f32)[1u], ((device float*)&_19.f32)[2u]);
+    ResType _387;
+    _387._m0 = frexp(((device float*)&_19.f32)[0u], _387._m1);
+    int itmp = _387._m1;
+    _19.res = _387._m0;
+    _19.res = ldexp(((device float*)&_19.f32)[0u], itmp);
+    _19.ures = pack_float_to_snorm4x8(_19.f32);
+    _19.ures = pack_float_to_unorm4x8(_19.f32);
+    _19.ures = pack_float_to_snorm2x16(_19.f32.xy);
+    _19.ures = pack_float_to_unorm2x16(_19.f32.xy);
+    _19.ures = as_type<uint>(half2(_19.f32.xy));
+    v2 = unpack_snorm2x16_to_float(((device uint*)&_19.u32)[0u]);
+    v2 = unpack_unorm2x16_to_float(((device uint*)&_19.u32)[0u]);
+    v2 = float2(as_type<half2>(((device uint*)&_19.u32)[0u]));
+    float4 v4 = unpack_snorm4x8_to_float(((device uint*)&_19.u32)[0u]);
+    v4 = unpack_unorm4x8_to_float(((device uint*)&_19.u32)[0u]);
+    _19.s32 = spvFindLSB(_19.s32);
+    _19.s32 = int4(spvFindLSB(_19.u32));
+    _19.s32 = spvFindSMSB(_19.s32);
+    _19.s32 = int4(spvFindUMSB(_19.u32));
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/illegal-struct-name.asm.comp b/reference/shaders-msl-no-opt/comp/illegal-struct-name.asm.comp
new file mode 100644
index 00000000000..de1695b0684
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/illegal-struct-name.asm.comp
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Foo
+{
+    float _abs;
+};
+
+struct Foo_1
+{
+    float _abs;
+};
+
+struct SSBO
+{
+    Foo_1 foo;
+    Foo_1 foo2;
+};
+
+kernel void main0(device SSBO& _7 [[buffer(0)]])
+{
+    Foo f;
+    f._abs = _7.foo._abs;
+    int _abs = 10;
+    _7.foo2._abs = f._abs;
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/implicit-integer-promotion.comp b/reference/shaders-msl-no-opt/comp/implicit-integer-promotion.comp
new file mode 100644
index 00000000000..5c3ce49eb9d
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/implicit-integer-promotion.comp
@@ -0,0 +1,93 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct BUF0
+{
+    half2 f16s;
+    ushort2 u16;
+    short2 i16;
+    ushort4 u16s;
+    short4 i16s;
+    half f16;
+};
+
+static inline __attribute__((always_inline))
+void test_u16(device BUF0& v_24)
+{
+    v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] + ((device ushort*)&v_24.u16)[1u]));
+    v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] - ((device ushort*)&v_24.u16)[1u]));
+    v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] * ((device ushort*)&v_24.u16)[1u]));
+    v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] / ((device ushort*)&v_24.u16)[1u]));
+    v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] % ((device ushort*)&v_24.u16)[1u]));
+    v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] << ((device ushort*)&v_24.u16)[1u]));
+    v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] >> ((device ushort*)&v_24.u16)[1u]));
+    v_24.f16 += as_type<half>(ushort(~((device ushort*)&v_24.u16)[0u]));
+    v_24.f16 += as_type<half>(ushort(-((device ushort*)&v_24.u16)[0u]));
+    v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] ^ ((device ushort*)&v_24.u16)[1u]));
+    v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] & ((device ushort*)&v_24.u16)[1u]));
+    v_24.f16 += as_type<half>(ushort(((device ushort*)&v_24.u16)[0u] | ((device ushort*)&v_24.u16)[1u]));
+}
+
+static inline __attribute__((always_inline))
+void test_i16(device BUF0& v_24)
+{
+    v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] + ((device short*)&v_24.i16)[1u]));
+    v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] - ((device short*)&v_24.i16)[1u]));
+    v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] * ((device short*)&v_24.i16)[1u]));
+    v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] / ((device short*)&v_24.i16)[1u]));
+    v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] % ((device short*)&v_24.i16)[1u]));
+    v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] << ((device short*)&v_24.i16)[1u]));
+    v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] >> ((device short*)&v_24.i16)[1u]));
+    v_24.f16 += as_type<half>(short(~((device short*)&v_24.i16)[0u]));
+    v_24.f16 += as_type<half>(short(-((device short*)&v_24.i16)[0u]));
+    v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] ^ ((device short*)&v_24.i16)[1u]));
+    v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] & ((device short*)&v_24.i16)[1u]));
+    v_24.f16 += as_type<half>(short(((device short*)&v_24.i16)[0u] | ((device short*)&v_24.i16)[1u]));
+}
+
+static inline __attribute__((always_inline))
+void test_u16s(device BUF0& v_24)
+{
+    v_24.f16s += as_type<half2>(v_24.u16s.xy + v_24.u16s.zw);
+    v_24.f16s += as_type<half2>(v_24.u16s.xy - v_24.u16s.zw);
+    v_24.f16s += as_type<half2>(v_24.u16s.xy * v_24.u16s.zw);
+    v_24.f16s += as_type<half2>(v_24.u16s.xy / v_24.u16s.zw);
+    v_24.f16s += as_type<half2>(v_24.u16s.xy % v_24.u16s.zw);
+    v_24.f16s += as_type<half2>(v_24.u16s.xy << v_24.u16s.zw);
+    v_24.f16s += as_type<half2>(v_24.u16s.xy >> v_24.u16s.zw);
+    v_24.f16s += as_type<half2>(~v_24.u16s.xy);
+    v_24.f16s += as_type<half2>(-v_24.u16s.xy);
+    v_24.f16s += as_type<half2>(v_24.u16s.xy ^ v_24.u16s.zw);
+    v_24.f16s += as_type<half2>(v_24.u16s.xy & v_24.u16s.zw);
+    v_24.f16s += as_type<half2>(v_24.u16s.xy | v_24.u16s.zw);
+}
+
+static inline __attribute__((always_inline))
+void test_i16s(device BUF0& v_24)
+{
+    v_24.f16s += as_type<half2>(v_24.i16s.xy + v_24.i16s.zw);
+    v_24.f16s += as_type<half2>(v_24.i16s.xy - v_24.i16s.zw);
+    v_24.f16s += as_type<half2>(v_24.i16s.xy * v_24.i16s.zw);
+    v_24.f16s += as_type<half2>(v_24.i16s.xy / v_24.i16s.zw);
+    v_24.f16s += as_type<half2>(v_24.i16s.xy % v_24.i16s.zw);
+    v_24.f16s += as_type<half2>(v_24.i16s.xy << v_24.i16s.zw);
+    v_24.f16s += as_type<half2>(v_24.i16s.xy >> v_24.i16s.zw);
+    v_24.f16s += as_type<half2>(~v_24.i16s.xy);
+    v_24.f16s += as_type<half2>(-v_24.i16s.xy);
+    v_24.f16s += as_type<half2>(v_24.i16s.xy ^ v_24.i16s.zw);
+    v_24.f16s += as_type<half2>(v_24.i16s.xy & v_24.i16s.zw);
+    v_24.f16s += as_type<half2>(v_24.i16s.xy | v_24.i16s.zw);
+}
+
+kernel void main0(device BUF0& v_24 [[buffer(0)]])
+{
+    test_u16(v_24);
+    test_i16(v_24);
+    test_u16s(v_24);
+    test_i16s(v_24);
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/int16min-literal.comp b/reference/shaders-msl-no-opt/comp/int16min-literal.comp
new file mode 100644
index 00000000000..d73768c3436
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/int16min-literal.comp
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    half b;
+};
+
+struct SSBO
+{
+    half a;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(constant UBO& _12 [[buffer(0)]], device SSBO& _24 [[buffer(1)]])
+{
+    short v = as_type<short>(_12.b);
+    v = short(v ^ short(-32768));
+    _24.a = as_type<half>(v);
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/int64.invalid.msl22.comp b/reference/shaders-msl-no-opt/comp/int64.invalid.msl22.comp
new file mode 100644
index 00000000000..d5bbbb47fc6
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/int64.invalid.msl22.comp
@@ -0,0 +1,106 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct M0
+{
+    long v;
+    spvUnsafeArray<long2, 2> b;
+    ulong c;
+    spvUnsafeArray<ulong, 5> d;
+};
+
+struct SSBO0_Type
+{
+    long4 a;
+    M0 m0;
+};
+
+struct SSBO1_Type
+{
+    ulong4 b;
+    M0 m0;
+};
+
+struct SSBO2_Type
+{
+    spvUnsafeArray<long, 4> a;
+    spvUnsafeArray<long2, 4> b;
+};
+
+struct SSBO3_Type
+{
+    spvUnsafeArray<long, 4> a;
+    spvUnsafeArray<long2, 4> b;
+};
+
+struct SSBO
+{
+    int s32;
+    uint u32;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _96 [[buffer(0)]])
+{
+    SSBO0_Type ssbo_0;
+    ssbo_0.a += long4(10l, 20l, 30l, 40l);
+    SSBO1_Type ssbo_1;
+    ssbo_1.b += ulong4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul);
+    ssbo_0.a += long4(20l);
+    ssbo_0.a = abs(ssbo_0.a + long4(ssbo_1.b));
+    ssbo_0.a += long4(1l);
+    ssbo_1.b += ulong4(long4(1l));
+    ssbo_0.a -= long4(1l);
+    ssbo_1.b -= ulong4(long4(1l));
+    SSBO2_Type ssbo_2;
+    ssbo_2.a[0] += 1l;
+    SSBO3_Type ssbo_3;
+    ssbo_3.a[0] += 2l;
+    _96.s32 = int(uint(((ulong(ssbo_0.a.x) + ssbo_1.b.y) + ulong(ssbo_2.a[1])) + ulong(ssbo_3.a[2])));
+    _96.u32 = uint(((ulong(ssbo_0.a.y) + ssbo_1.b.z) + ulong(ssbo_2.a[0])) + ulong(ssbo_3.a[1]));
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/int64min-literal.msl22.comp b/reference/shaders-msl-no-opt/comp/int64min-literal.msl22.comp
new file mode 100644
index 00000000000..a8f2b0e270c
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/int64min-literal.msl22.comp
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float b;
+};
+
+struct SSBO
+{
+    float a;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(constant UBO& _12 [[buffer(0)]], device SSBO& _25 [[buffer(1)]])
+{
+    long v = long(as_type<int>(_12.b));
+    v ^= long(0x8000000000000000ul);
+    _25.a = as_type<float>(int(v));
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/intmin-literal.comp b/reference/shaders-msl-no-opt/comp/intmin-literal.comp
new file mode 100644
index 00000000000..db2294fe6c0
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/intmin-literal.comp
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float a;
+};
+
+struct UBO
+{
+    float b;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _9 [[buffer(0)]], constant UBO& _14 [[buffer(1)]])
+{
+    _9.a = as_type<float>(as_type<int>(_14.b) ^ int(0x80000000));
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/loop.comp b/reference/shaders-msl-no-opt/comp/loop.comp
index d7677fb4363..34fe64b0a2b 100644
--- a/reference/shaders-msl-no-opt/comp/loop.comp
+++ b/reference/shaders-msl-no-opt/comp/loop.comp
@@ -14,7 +14,9 @@ struct SSBO2
     float4 out_data[1];
 };
 
-kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _177 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(const device SSBO& _24 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
     float4 idat = _24.in_data[ident];
@@ -85,23 +87,5 @@ kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _177 [[buf
         k += 10;
         continue;
     }
-    k = 0;
-    do
-    {
-        k++;
-    } while (k > 10);
-    int l = 0;
-    for (;;)
-    {
-        if (l == 5)
-        {
-            l++;
-            continue;
-        }
-        idat += float4(1.0);
-        l++;
-        continue;
-    }
-    _177.out_data[ident] = idat;
 }
 
diff --git a/reference/shaders-msl-no-opt/comp/return.comp b/reference/shaders-msl-no-opt/comp/return.comp
index 71fcfbe3911..04cacea9d53 100644
--- a/reference/shaders-msl-no-opt/comp/return.comp
+++ b/reference/shaders-msl-no-opt/comp/return.comp
@@ -8,6 +8,8 @@ struct SSBO2
     float4 out_data[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO2& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
@@ -23,7 +25,8 @@ kernel void main0(device SSBO2& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [
             return;
         }
     }
-    for (int i = 0; i < 20; i++)
+    int i = 0;
+    while (i < 20)
     {
         if (i == 10)
         {
diff --git a/reference/shaders-msl-no-opt/comp/std140-array-load-composite-construct.comp b/reference/shaders-msl-no-opt/comp/std140-array-load-composite-construct.comp
new file mode 100644
index 00000000000..ba278ccde76
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/std140-array-load-composite-construct.comp
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 a[16];
+    float4 b[16];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _14 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    _14.b[gl_GlobalInvocationID.x] = float4(_14.a[gl_GlobalInvocationID.x].x);
+}
+
diff --git a/reference/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/reference/shaders-msl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp
similarity index 64%
rename from reference/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp
rename to reference/shaders-msl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp
index 278a8bb2ee8..49758ca3e17 100644
--- a/reference/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp
+++ b/reference/shaders-msl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp
@@ -4,7 +4,6 @@
 using namespace metal;
 
 typedef packed_float2 packed_float2x2[2];
-typedef packed_float2 packed_rm_float2x3[3];
 typedef packed_float3 packed_float2x3[2];
 typedef packed_float3 packed_rm_float3x2[2];
 
@@ -32,11 +31,6 @@ struct S3
     float b;
 };
 
-struct S4
-{
-    float2 c;
-};
-
 struct Content
 {
     S0 m0s[1];
@@ -47,7 +41,6 @@ struct Content
     S2 m2;
     S3 m3;
     float m4;
-    S4 m3s[8];
 };
 
 struct SSBO1
@@ -61,15 +54,17 @@ struct SSBO1
     float3x2 m3;
     float2x2 m4;
     float2x2 m5[9];
-    packed_rm_float2x3 m6[4][2];
-    float3x2 m7;
+    float3x2 m6[4][2];
+    packed_rm_float3x2 m7;
     float array[1];
 };
 
 struct S0_1
 {
-    float4 a[1];
+    float2 a[1];
+    char _m1_pad[8];
     float b;
+    char _m0_final_padding[12];
 };
 
 struct S1_1
@@ -82,6 +77,7 @@ struct S2_1
 {
     float3 a[1];
     float b;
+    char _m0_final_padding[12];
 };
 
 struct S3_1
@@ -90,11 +86,6 @@ struct S3_1
     float b;
 };
 
-struct S4_1
-{
-    float2 c;
-};
-
 struct Content_1
 {
     S0_1 m0s[1];
@@ -105,8 +96,7 @@ struct Content_1
     S2_1 m2;
     S3_1 m3;
     float m4;
-    char _m8_pad[12];
-    /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8];
+    char _m0_final_padding[12];
 };
 
 struct SSBO0
@@ -114,18 +104,14 @@ struct SSBO0
     Content_1 content;
     Content_1 content1[2];
     Content_1 content2;
-    float2x2 m0;
-    char _m4_pad[16];
-    float2x2 m1;
-    char _m5_pad[16];
+    float2x4 m0;
+    float2x4 m1;
     float2x3 m2[4];
-    float3x2 m3;
-    char _m7_pad[24];
-    float2x2 m4;
-    char _m8_pad[16];
-    float2x2 m5[9];
-    float2x3 m6[4][2];
-    float3x2 m7;
+    float3x4 m3;
+    float2x4 m4;
+    float2x4 m5[9];
+    float3x4 m6[4][2];
+    float2x3 m7;
     float4 array[1];
 };
 
@@ -136,15 +122,17 @@ struct SSBO2
     packed_rm_float3x2 m2;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO1& ssbo_scalar [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]], device SSBO2& ssbo_scalar2 [[buffer(2)]])
 {
-    ssbo_scalar.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0].xy;
+    ssbo_scalar.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0];
     ssbo_scalar.content.m0s[0].b = ssbo_140.content.m0s[0].b;
     ssbo_scalar.content.m1s[0].a = float3(ssbo_140.content.m1s[0].a);
     ssbo_scalar.content.m1s[0].b = ssbo_140.content.m1s[0].b;
     ssbo_scalar.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0];
     ssbo_scalar.content.m2s[0].b = ssbo_140.content.m2s[0].b;
-    ssbo_scalar.content.m0.a[0] = ssbo_140.content.m0.a[0].xy;
+    ssbo_scalar.content.m0.a[0] = ssbo_140.content.m0.a[0];
     ssbo_scalar.content.m0.b = ssbo_140.content.m0.b;
     ssbo_scalar.content.m1.a = float3(ssbo_140.content.m1.a);
     ssbo_scalar.content.m1.b = ssbo_140.content.m1.b;
@@ -153,19 +141,11 @@ kernel void main0(device SSBO1& ssbo_scalar [[buffer(0)]], device SSBO0& ssbo_14
     ssbo_scalar.content.m3.a = ssbo_140.content.m3.a;
     ssbo_scalar.content.m3.b = ssbo_140.content.m3.b;
     ssbo_scalar.content.m4 = ssbo_140.content.m4;
-    ssbo_scalar.content.m3s[0].c = ssbo_140.content.m3s[0].c;
-    ssbo_scalar.content.m3s[1].c = ssbo_140.content.m3s[1].c;
-    ssbo_scalar.content.m3s[2].c = ssbo_140.content.m3s[2].c;
-    ssbo_scalar.content.m3s[3].c = ssbo_140.content.m3s[3].c;
-    ssbo_scalar.content.m3s[4].c = ssbo_140.content.m3s[4].c;
-    ssbo_scalar.content.m3s[5].c = ssbo_140.content.m3s[5].c;
-    ssbo_scalar.content.m3s[6].c = ssbo_140.content.m3s[6].c;
-    ssbo_scalar.content.m3s[7].c = ssbo_140.content.m3s[7].c;
     ssbo_scalar.content.m1.a = float2x3(float3(ssbo_scalar.m2[1][0]), float3(ssbo_scalar.m2[1][1])) * float2(ssbo_scalar.content.m0.a[0]);
     ssbo_scalar.m0 = float2x2(float2(ssbo_scalar2.m1[0]), float2(ssbo_scalar2.m1[1]));
-    ssbo_scalar2.m1[0] = transpose(ssbo_scalar.m4)[0];
-    ssbo_scalar2.m1[1] = transpose(ssbo_scalar.m4)[1];
-    ssbo_scalar2.m2[0] = spvConvertFromRowMajor3x2(ssbo_scalar.m3)[0];
-    ssbo_scalar2.m2[1] = spvConvertFromRowMajor3x2(ssbo_scalar.m3)[1];
+    ssbo_scalar2.m1[0] = float2(ssbo_scalar.m4[0][0], ssbo_scalar.m4[1][0]);
+    ssbo_scalar2.m1[1] = float2(ssbo_scalar.m4[0][1], ssbo_scalar.m4[1][1]);
+    ssbo_scalar2.m2[0] = float3(ssbo_scalar.m3[0][0], ssbo_scalar.m3[1][0], ssbo_scalar.m3[2][0]);
+    ssbo_scalar2.m2[1] = float3(ssbo_scalar.m3[0][1], ssbo_scalar.m3[1][1], ssbo_scalar.m3[2][1]);
 }
 
diff --git a/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl12.emulate-subgroup.comp b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl12.emulate-subgroup.comp
new file mode 100644
index 00000000000..651991e3513
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl12.emulate-subgroup.comp
@@ -0,0 +1,30 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float FragColor;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], unsupported-built-in-type gl_WorkGroupSize [[unsupported-built-in]])
+{
+    uint gl_NumSubgroups = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;
+    uint gl_SubgroupID = gl_LocalInvocationIndex;
+    uint gl_SubgroupSize = 1;
+    uint gl_SubgroupInvocationID = 0;
+    _9.FragColor = float(gl_NumSubgroups);
+    _9.FragColor = float(gl_SubgroupID);
+    _9.FragColor = float(gl_SubgroupSize);
+    _9.FragColor = float(gl_SubgroupInvocationID);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    threadgroup_barrier(mem_flags::mem_device);
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    threadgroup_barrier(mem_flags::mem_texture);
+    bool elected = true;
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.comp b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.comp
new file mode 100644
index 00000000000..b1337e2532b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.comp
@@ -0,0 +1,327 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T>
+inline T spvSubgroupBroadcast(T value, ushort lane)
+{
+    return simd_broadcast(value, lane);
+}
+
+template<>
+inline bool spvSubgroupBroadcast(bool value, ushort lane)
+{
+    return !!simd_broadcast((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)
+{
+    return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvSubgroupBroadcastFirst(T value)
+{
+    return simd_broadcast_first(value);
+}
+
+template<>
+inline bool spvSubgroupBroadcastFirst(bool value)
+{
+    return !!simd_broadcast_first((ushort)value);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)
+{
+    return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);
+}
+
+inline uint4 spvSubgroupBallot(bool value)
+{
+    simd_vote vote = simd_ballot(value);
+    // simd_ballot() returns a 64-bit integer-like object, but
+    // SPIR-V callers expect a uint4. We must convert.
+    // FIXME: This won't include higher bits if Apple ever supports
+    // 128 lanes in an SIMD-group.
+    return uint4(as_type<uint2>((simd_vote::vote_t)vote), 0, 0);
+}
+
+inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
+{
+    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
+}
+
+inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    ballot &= mask;
+    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
+}
+
+inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    ballot &= mask;
+    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
+}
+
+inline uint spvPopCount4(uint4 ballot)
+{
+    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
+}
+
+inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+template<typename T>
+inline bool spvSubgroupAllEqual(T value)
+{
+    return simd_all(all(value == simd_broadcast_first(value)));
+}
+
+template<>
+inline bool spvSubgroupAllEqual(bool value)
+{
+    return simd_all(value) || !simd_any(value);
+}
+
+template<uint N>
+inline bool spvSubgroupAllEqual(vec<bool, N> value)
+{
+    return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));
+}
+
+template<typename T>
+inline T spvSubgroupShuffle(T value, ushort lane)
+{
+    return simd_shuffle(value, lane);
+}
+
+template<>
+inline bool spvSubgroupShuffle(bool value, ushort lane)
+{
+    return !!simd_shuffle((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)
+{
+    return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleXor(T value, ushort mask)
+{
+    return simd_shuffle_xor(value, mask);
+}
+
+template<>
+inline bool spvSubgroupShuffleXor(bool value, ushort mask)
+{
+    return !!simd_shuffle_xor((ushort)value, mask);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)
+{
+    return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleUp(T value, ushort delta)
+{
+    return simd_shuffle_up(value, delta);
+}
+
+template<>
+inline bool spvSubgroupShuffleUp(bool value, ushort delta)
+{
+    return !!simd_shuffle_up((ushort)value, delta);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)
+{
+    return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleDown(T value, ushort delta)
+{
+    return simd_shuffle_down(value, delta);
+}
+
+template<>
+inline bool spvSubgroupShuffleDown(bool value, ushort delta)
+{
+    return !!simd_shuffle_down((ushort)value, delta);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)
+{
+    return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);
+}
+
+template<typename T>
+inline T spvQuadBroadcast(T value, uint lane)
+{
+    return quad_broadcast(value, lane);
+}
+
+template<>
+inline bool spvQuadBroadcast(bool value, uint lane)
+{
+    return !!quad_broadcast((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)
+{
+    return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvQuadSwap(T value, uint dir)
+{
+    return quad_shuffle_xor(value, dir + 1);
+}
+
+template<>
+inline bool spvQuadSwap(bool value, uint dir)
+{
+    return !!quad_shuffle_xor((ushort)value, dir + 1);
+}
+
+template<uint N>
+inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)
+{
+    return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);
+}
+
+struct SSBO
+{
+    float FragColor;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
+{
+    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID >= 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
+    uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
+    uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
+    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    _9.FragColor = float(gl_NumSubgroups);
+    _9.FragColor = float(gl_SubgroupID);
+    _9.FragColor = float(gl_SubgroupSize);
+    _9.FragColor = float(gl_SubgroupInvocationID);
+    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    simdgroup_barrier(mem_flags::mem_device);
+    simdgroup_barrier(mem_flags::mem_threadgroup);
+    simdgroup_barrier(mem_flags::mem_texture);
+    bool _39 = simd_is_first();
+    bool elected = _39;
+    _9.FragColor = float4(gl_SubgroupEqMask).x;
+    _9.FragColor = float4(gl_SubgroupGeMask).x;
+    _9.FragColor = float4(gl_SubgroupGtMask).x;
+    _9.FragColor = float4(gl_SubgroupLeMask).x;
+    _9.FragColor = float4(gl_SubgroupLtMask).x;
+    float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u);
+    bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u);
+    float3 first = spvSubgroupBroadcastFirst(float3(20.0));
+    bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false));
+    uint4 ballot_value = spvSubgroupBallot(true);
+    bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
+    bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
+    uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize);
+    uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize);
+    uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize);
+    uint shuffled = spvSubgroupShuffle(10u, 8u);
+    bool shuffled_bool = spvSubgroupShuffle(true, 9u);
+    uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u);
+    bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u);
+    uint shuffled_up = spvSubgroupShuffleUp(20u, 4u);
+    bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u);
+    uint shuffled_down = spvSubgroupShuffleDown(20u, 4u);
+    bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u);
+    bool has_all = simd_all(true);
+    bool has_any = simd_any(true);
+    bool has_equal = spvSubgroupAllEqual(0);
+    has_equal = spvSubgroupAllEqual(true);
+    has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0));
+    has_equal = spvSubgroupAllEqual(bool4(true, true, false, true));
+    float4 added = simd_sum(float4(20.0));
+    int4 iadded = simd_sum(int4(20));
+    float4 multiplied = simd_product(float4(20.0));
+    int4 imultiplied = simd_product(int4(20));
+    float4 lo = simd_min(float4(20.0));
+    float4 hi = simd_max(float4(20.0));
+    int4 slo = simd_min(int4(20));
+    int4 shi = simd_max(int4(20));
+    uint4 ulo = simd_min(uint4(20u));
+    uint4 uhi = simd_max(uint4(20u));
+    uint4 anded = simd_and(ballot_value);
+    uint4 ored = simd_or(ballot_value);
+    uint4 xored = simd_xor(ballot_value);
+    bool4 anded_b = simd_and(ballot_value == uint4(42u));
+    bool4 ored_b = simd_or(ballot_value == uint4(42u));
+    bool4 xored_b = simd_xor(ballot_value == uint4(42u));
+    added = simd_prefix_inclusive_sum(added);
+    iadded = simd_prefix_inclusive_sum(iadded);
+    multiplied = simd_prefix_inclusive_product(multiplied);
+    imultiplied = simd_prefix_inclusive_product(imultiplied);
+    added = simd_prefix_exclusive_sum(multiplied);
+    multiplied = simd_prefix_exclusive_product(multiplied);
+    iadded = simd_prefix_exclusive_sum(imultiplied);
+    imultiplied = simd_prefix_exclusive_product(imultiplied);
+    added = quad_sum(added);
+    multiplied = quad_product(multiplied);
+    iadded = quad_sum(iadded);
+    imultiplied = quad_product(imultiplied);
+    lo = quad_min(lo);
+    hi = quad_max(hi);
+    ulo = quad_min(ulo);
+    uhi = quad_max(uhi);
+    slo = quad_min(slo);
+    shi = quad_max(shi);
+    anded = quad_and(anded);
+    ored = quad_or(ored);
+    xored = quad_xor(xored);
+    anded_b = quad_and(anded == uint4(2u));
+    ored_b = quad_or(ored == uint4(3u));
+    xored_b = quad_xor(xored == uint4(4u));
+    float4 swap_horiz = spvQuadSwap(float4(20.0), 0u);
+    bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u);
+    float4 swap_vertical = spvQuadSwap(float4(20.0), 1u);
+    bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u);
+    float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u);
+    bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u);
+    float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u);
+    bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u);
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.fixed-subgroup.comp b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.fixed-subgroup.comp
new file mode 100644
index 00000000000..462c78fb70c
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.fixed-subgroup.comp
@@ -0,0 +1,322 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T>
+inline T spvSubgroupBroadcast(T value, ushort lane)
+{
+    return simd_broadcast(value, lane);
+}
+
+template<>
+inline bool spvSubgroupBroadcast(bool value, ushort lane)
+{
+    return !!simd_broadcast((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)
+{
+    return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvSubgroupBroadcastFirst(T value)
+{
+    return simd_broadcast_first(value);
+}
+
+template<>
+inline bool spvSubgroupBroadcastFirst(bool value)
+{
+    return !!simd_broadcast_first((ushort)value);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)
+{
+    return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);
+}
+
+inline uint4 spvSubgroupBallot(bool value)
+{
+    simd_vote vote = simd_ballot(value);
+    // simd_ballot() returns a 64-bit integer-like object, but
+    // SPIR-V callers expect a uint4. We must convert.
+    // FIXME: This won't include higher bits if Apple ever supports
+    // 128 lanes in an SIMD-group.
+    return uint4(as_type<uint2>((simd_vote::vote_t)vote), 0, 0);
+}
+
+inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
+{
+    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
+}
+
+inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    ballot &= mask;
+    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
+}
+
+inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    ballot &= mask;
+    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
+}
+
+inline uint spvPopCount4(uint4 ballot)
+{
+    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
+}
+
+inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+template<typename T>
+inline bool spvSubgroupAllEqual(T value)
+{
+    return simd_all(all(value == simd_broadcast_first(value)));
+}
+
+template<>
+inline bool spvSubgroupAllEqual(bool value)
+{
+    return simd_all(value) || !simd_any(value);
+}
+
+template<uint N>
+inline bool spvSubgroupAllEqual(vec<bool, N> value)
+{
+    return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));
+}
+
+template<typename T>
+inline T spvSubgroupShuffle(T value, ushort lane)
+{
+    return simd_shuffle(value, lane);
+}
+
+template<>
+inline bool spvSubgroupShuffle(bool value, ushort lane)
+{
+    return !!simd_shuffle((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)
+{
+    return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleXor(T value, ushort mask)
+{
+    return simd_shuffle_xor(value, mask);
+}
+
+template<>
+inline bool spvSubgroupShuffleXor(bool value, ushort mask)
+{
+    return !!simd_shuffle_xor((ushort)value, mask);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)
+{
+    return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleUp(T value, ushort delta)
+{
+    return simd_shuffle_up(value, delta);
+}
+
+template<>
+inline bool spvSubgroupShuffleUp(bool value, ushort delta)
+{
+    return !!simd_shuffle_up((ushort)value, delta);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)
+{
+    return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleDown(T value, ushort delta)
+{
+    return simd_shuffle_down(value, delta);
+}
+
+template<>
+inline bool spvSubgroupShuffleDown(bool value, ushort delta)
+{
+    return !!simd_shuffle_down((ushort)value, delta);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)
+{
+    return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);
+}
+
+template<typename T>
+inline T spvQuadBroadcast(T value, uint lane)
+{
+    return quad_broadcast(value, lane);
+}
+
+template<>
+inline bool spvQuadBroadcast(bool value, uint lane)
+{
+    return !!quad_broadcast((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)
+{
+    return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvQuadSwap(T value, uint dir)
+{
+    return quad_shuffle_xor(value, dir + 1);
+}
+
+template<>
+inline bool spvQuadSwap(bool value, uint dir)
+{
+    return !!quad_shuffle_xor((ushort)value, dir + 1);
+}
+
+template<uint N>
+inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)
+{
+    return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);
+}
+
+struct SSBO
+{
+    float FragColor;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
+{
+    uint gl_SubgroupSize = 32;
+    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID >= 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
+    uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID, 32 - gl_SubgroupInvocationID), uint3(0));
+    uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID + 1, 32 - gl_SubgroupInvocationID - 1), uint3(0));
+    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    _9.FragColor = float(gl_NumSubgroups);
+    _9.FragColor = float(gl_SubgroupID);
+    _9.FragColor = float(gl_SubgroupSize);
+    _9.FragColor = float(gl_SubgroupInvocationID);
+    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    simdgroup_barrier(mem_flags::mem_device);
+    simdgroup_barrier(mem_flags::mem_threadgroup);
+    simdgroup_barrier(mem_flags::mem_texture);
+    bool _39 = simd_is_first();
+    bool elected = _39;
+    _9.FragColor = float4(gl_SubgroupEqMask).x;
+    _9.FragColor = float4(gl_SubgroupGeMask).x;
+    _9.FragColor = float4(gl_SubgroupGtMask).x;
+    _9.FragColor = float4(gl_SubgroupLeMask).x;
+    _9.FragColor = float4(gl_SubgroupLtMask).x;
+    float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u);
+    bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u);
+    float3 first = spvSubgroupBroadcastFirst(float3(20.0));
+    bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false));
+    uint4 ballot_value = spvSubgroupBallot(true);
+    bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
+    bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
+    uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize);
+    uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize);
+    uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize);
+    uint shuffled = spvSubgroupShuffle(10u, 8u);
+    bool shuffled_bool = spvSubgroupShuffle(true, 9u);
+    uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u);
+    bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u);
+    uint shuffled_up = spvSubgroupShuffleUp(20u, 4u);
+    bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u);
+    uint shuffled_down = spvSubgroupShuffleDown(20u, 4u);
+    bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u);
+    bool has_all = simd_all(true);
+    bool has_any = simd_any(true);
+    bool has_equal = spvSubgroupAllEqual(0);
+    has_equal = spvSubgroupAllEqual(true);
+    has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0));
+    has_equal = spvSubgroupAllEqual(bool4(true, true, false, true));
+    float4 added = simd_sum(float4(20.0));
+    int4 iadded = simd_sum(int4(20));
+    float4 multiplied = simd_product(float4(20.0));
+    int4 imultiplied = simd_product(int4(20));
+    float4 lo = simd_min(float4(20.0));
+    float4 hi = simd_max(float4(20.0));
+    int4 slo = simd_min(int4(20));
+    int4 shi = simd_max(int4(20));
+    uint4 ulo = simd_min(uint4(20u));
+    uint4 uhi = simd_max(uint4(20u));
+    uint4 anded = simd_and(ballot_value);
+    uint4 ored = simd_or(ballot_value);
+    uint4 xored = simd_xor(ballot_value);
+    added = simd_prefix_inclusive_sum(added);
+    iadded = simd_prefix_inclusive_sum(iadded);
+    multiplied = simd_prefix_inclusive_product(multiplied);
+    imultiplied = simd_prefix_inclusive_product(imultiplied);
+    added = simd_prefix_exclusive_sum(multiplied);
+    multiplied = simd_prefix_exclusive_product(multiplied);
+    iadded = simd_prefix_exclusive_sum(imultiplied);
+    imultiplied = simd_prefix_exclusive_product(imultiplied);
+    added = quad_sum(added);
+    multiplied = quad_product(multiplied);
+    iadded = quad_sum(iadded);
+    imultiplied = quad_product(imultiplied);
+    lo = quad_min(lo);
+    hi = quad_max(hi);
+    ulo = quad_min(ulo);
+    uhi = quad_max(uhi);
+    slo = quad_min(slo);
+    shi = quad_max(shi);
+    anded = quad_and(anded);
+    ored = quad_or(ored);
+    xored = quad_xor(xored);
+    float4 swap_horiz = spvQuadSwap(float4(20.0), 0u);
+    bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u);
+    float4 swap_vertical = spvQuadSwap(float4(20.0), 1u);
+    bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u);
+    float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u);
+    bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u);
+    float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u);
+    bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u);
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp
new file mode 100644
index 00000000000..1791ceca1ad
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp
@@ -0,0 +1,151 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T>
+inline T spvSubgroupShuffle(T value, ushort lane)
+{
+    return quad_shuffle(value, lane);
+}
+
+template<>
+inline bool spvSubgroupShuffle(bool value, ushort lane)
+{
+    return !!quad_shuffle((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)
+{
+    return (vec<bool, N>)quad_shuffle((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleXor(T value, ushort mask)
+{
+    return quad_shuffle_xor(value, mask);
+}
+
+template<>
+inline bool spvSubgroupShuffleXor(bool value, ushort mask)
+{
+    return !!quad_shuffle_xor((ushort)value, mask);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)
+{
+    return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, mask);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleUp(T value, ushort delta)
+{
+    return quad_shuffle_up(value, delta);
+}
+
+template<>
+inline bool spvSubgroupShuffleUp(bool value, ushort delta)
+{
+    return !!quad_shuffle_up((ushort)value, delta);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)
+{
+    return (vec<bool, N>)quad_shuffle_up((vec<ushort, N>)value, delta);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleDown(T value, ushort delta)
+{
+    return quad_shuffle_down(value, delta);
+}
+
+template<>
+inline bool spvSubgroupShuffleDown(bool value, ushort delta)
+{
+    return !!quad_shuffle_down((ushort)value, delta);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)
+{
+    return (vec<bool, N>)quad_shuffle_down((vec<ushort, N>)value, delta);
+}
+
+template<typename T>
+inline T spvQuadBroadcast(T value, uint lane)
+{
+    return quad_broadcast(value, lane);
+}
+
+template<>
+inline bool spvQuadBroadcast(bool value, uint lane)
+{
+    return !!quad_broadcast((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)
+{
+    return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvQuadSwap(T value, uint dir)
+{
+    return quad_shuffle_xor(value, dir + 1);
+}
+
+template<>
+inline bool spvQuadSwap(bool value, uint dir)
+{
+    return !!quad_shuffle_xor((ushort)value, dir + 1);
+}
+
+template<uint N>
+inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)
+{
+    return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);
+}
+
+struct SSBO
+{
+    float FragColor;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]])
+{
+    _9.FragColor = float(gl_NumSubgroups);
+    _9.FragColor = float(gl_SubgroupID);
+    _9.FragColor = float(gl_SubgroupSize);
+    _9.FragColor = float(gl_SubgroupInvocationID);
+    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    simdgroup_barrier(mem_flags::mem_device);
+    simdgroup_barrier(mem_flags::mem_threadgroup);
+    simdgroup_barrier(mem_flags::mem_texture);
+    uint shuffled = spvSubgroupShuffle(10u, 8u);
+    bool shuffled_bool = spvSubgroupShuffle(true, 9u);
+    uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u);
+    bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u);
+    uint shuffled_up = spvSubgroupShuffleUp(20u, 4u);
+    bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u);
+    uint shuffled_down = spvSubgroupShuffleDown(20u, 4u);
+    bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u);
+    float4 swap_horiz = spvQuadSwap(float4(20.0), 0u);
+    bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u);
+    float4 swap_vertical = spvQuadSwap(float4(20.0), 1u);
+    bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u);
+    float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u);
+    bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u);
+    float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u);
+    bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u);
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl22.ios.comp b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl22.ios.comp
new file mode 100644
index 00000000000..3910e824405
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl22.ios.comp
@@ -0,0 +1,282 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T>
+inline T spvSubgroupBroadcast(T value, ushort lane)
+{
+    return quad_broadcast(value, lane);
+}
+
+template<>
+inline bool spvSubgroupBroadcast(bool value, ushort lane)
+{
+    return !!quad_broadcast((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)
+{
+    return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvSubgroupBroadcastFirst(T value)
+{
+    return quad_broadcast_first(value);
+}
+
+template<>
+inline bool spvSubgroupBroadcastFirst(bool value)
+{
+    return !!quad_broadcast_first((ushort)value);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)
+{
+    return (vec<bool, N>)quad_broadcast_first((vec<ushort, N>)value);
+}
+
+inline uint4 spvSubgroupBallot(bool value)
+{
+    return uint4((quad_vote::vote_t)quad_ballot(value), 0, 0, 0);
+}
+
+inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
+{
+    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
+}
+
+inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
+    ballot &= mask;
+    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
+}
+
+inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
+    ballot &= mask;
+    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
+}
+
+inline uint spvPopCount4(uint4 ballot)
+{
+    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
+}
+
+inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+template<typename T>
+inline bool spvSubgroupAllEqual(T value)
+{
+    return quad_all(all(value == quad_broadcast_first(value)));
+}
+
+template<>
+inline bool spvSubgroupAllEqual(bool value)
+{
+    return quad_all(value) || !quad_any(value);
+}
+
+template<uint N>
+inline bool spvSubgroupAllEqual(vec<bool, N> value)
+{
+    return quad_all(all(value == (vec<bool, N>)quad_broadcast_first((vec<ushort, N>)value)));
+}
+
+template<typename T>
+inline T spvSubgroupShuffle(T value, ushort lane)
+{
+    return quad_shuffle(value, lane);
+}
+
+template<>
+inline bool spvSubgroupShuffle(bool value, ushort lane)
+{
+    return !!quad_shuffle((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)
+{
+    return (vec<bool, N>)quad_shuffle((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleXor(T value, ushort mask)
+{
+    return quad_shuffle_xor(value, mask);
+}
+
+template<>
+inline bool spvSubgroupShuffleXor(bool value, ushort mask)
+{
+    return !!quad_shuffle_xor((ushort)value, mask);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)
+{
+    return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, mask);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleUp(T value, ushort delta)
+{
+    return quad_shuffle_up(value, delta);
+}
+
+template<>
+inline bool spvSubgroupShuffleUp(bool value, ushort delta)
+{
+    return !!quad_shuffle_up((ushort)value, delta);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)
+{
+    return (vec<bool, N>)quad_shuffle_up((vec<ushort, N>)value, delta);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleDown(T value, ushort delta)
+{
+    return quad_shuffle_down(value, delta);
+}
+
+template<>
+inline bool spvSubgroupShuffleDown(bool value, ushort delta)
+{
+    return !!quad_shuffle_down((ushort)value, delta);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)
+{
+    return (vec<bool, N>)quad_shuffle_down((vec<ushort, N>)value, delta);
+}
+
+template<typename T>
+inline T spvQuadBroadcast(T value, uint lane)
+{
+    return quad_broadcast(value, lane);
+}
+
+template<>
+inline bool spvQuadBroadcast(bool value, uint lane)
+{
+    return !!quad_broadcast((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)
+{
+    return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvQuadSwap(T value, uint dir)
+{
+    return quad_shuffle_xor(value, dir + 1);
+}
+
+template<>
+inline bool spvQuadSwap(bool value, uint dir)
+{
+    return !!quad_shuffle_xor((ushort)value, dir + 1);
+}
+
+template<uint N>
+inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)
+{
+    return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);
+}
+
+struct SSBO
+{
+    float FragColor;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]])
+{
+    uint4 gl_SubgroupEqMask = uint4(1 << gl_SubgroupInvocationID, uint3(0));
+    uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID, gl_SubgroupSize - gl_SubgroupInvocationID), uint3(0));
+    uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID + 1, gl_SubgroupSize - gl_SubgroupInvocationID - 1), uint3(0));
+    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));
+    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint3(0));
+    _9.FragColor = float(gl_NumSubgroups);
+    _9.FragColor = float(gl_SubgroupID);
+    _9.FragColor = float(gl_SubgroupSize);
+    _9.FragColor = float(gl_SubgroupInvocationID);
+    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    simdgroup_barrier(mem_flags::mem_device);
+    simdgroup_barrier(mem_flags::mem_threadgroup);
+    simdgroup_barrier(mem_flags::mem_texture);
+    bool _39 = quad_is_first();
+    bool elected = _39;
+    _9.FragColor = float4(gl_SubgroupEqMask).x;
+    _9.FragColor = float4(gl_SubgroupGeMask).x;
+    _9.FragColor = float4(gl_SubgroupGtMask).x;
+    _9.FragColor = float4(gl_SubgroupLeMask).x;
+    _9.FragColor = float4(gl_SubgroupLtMask).x;
+    float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u);
+    bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u);
+    float3 first = spvSubgroupBroadcastFirst(float3(20.0));
+    bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false));
+    uint4 ballot_value = spvSubgroupBallot(true);
+    bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
+    bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
+    uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize);
+    uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize);
+    uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize);
+    uint shuffled = spvSubgroupShuffle(10u, 8u);
+    bool shuffled_bool = spvSubgroupShuffle(true, 9u);
+    uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u);
+    bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u);
+    uint shuffled_up = spvSubgroupShuffleUp(20u, 4u);
+    bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u);
+    uint shuffled_down = spvSubgroupShuffleDown(20u, 4u);
+    bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u);
+    bool has_all = quad_all(true);
+    bool has_any = quad_any(true);
+    bool has_equal = spvSubgroupAllEqual(0);
+    has_equal = spvSubgroupAllEqual(true);
+    has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0));
+    has_equal = spvSubgroupAllEqual(bool4(true, true, false, true));
+    float4 swap_horiz = spvQuadSwap(float4(20.0), 0u);
+    bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u);
+    float4 swap_vertical = spvQuadSwap(float4(20.0), 1u);
+    bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u);
+    float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u);
+    bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u);
+    float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u);
+    bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u);
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl23.ios.simd.comp b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl23.ios.simd.comp
new file mode 100644
index 00000000000..71916ebb988
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl23.ios.simd.comp
@@ -0,0 +1,316 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T>
+inline T spvSubgroupBroadcast(T value, ushort lane)
+{
+    return simd_broadcast(value, lane);
+}
+
+template<>
+inline bool spvSubgroupBroadcast(bool value, ushort lane)
+{
+    return !!simd_broadcast((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)
+{
+    return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvSubgroupBroadcastFirst(T value)
+{
+    return simd_broadcast_first(value);
+}
+
+template<>
+inline bool spvSubgroupBroadcastFirst(bool value)
+{
+    return !!simd_broadcast_first((ushort)value);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)
+{
+    return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);
+}
+
+inline uint4 spvSubgroupBallot(bool value)
+{
+    return uint4((simd_vote::vote_t)simd_ballot(value), 0, 0, 0);
+}
+
+inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
+{
+    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
+}
+
+inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
+    ballot &= mask;
+    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
+}
+
+inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
+    ballot &= mask;
+    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
+}
+
+inline uint spvPopCount4(uint4 ballot)
+{
+    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
+}
+
+inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+template<typename T>
+inline bool spvSubgroupAllEqual(T value)
+{
+    return simd_all(all(value == simd_broadcast_first(value)));
+}
+
+template<>
+inline bool spvSubgroupAllEqual(bool value)
+{
+    return simd_all(value) || !simd_any(value);
+}
+
+template<uint N>
+inline bool spvSubgroupAllEqual(vec<bool, N> value)
+{
+    return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));
+}
+
+template<typename T>
+inline T spvSubgroupShuffle(T value, ushort lane)
+{
+    return simd_shuffle(value, lane);
+}
+
+template<>
+inline bool spvSubgroupShuffle(bool value, ushort lane)
+{
+    return !!simd_shuffle((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)
+{
+    return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleXor(T value, ushort mask)
+{
+    return simd_shuffle_xor(value, mask);
+}
+
+template<>
+inline bool spvSubgroupShuffleXor(bool value, ushort mask)
+{
+    return !!simd_shuffle_xor((ushort)value, mask);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)
+{
+    return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleUp(T value, ushort delta)
+{
+    return simd_shuffle_up(value, delta);
+}
+
+template<>
+inline bool spvSubgroupShuffleUp(bool value, ushort delta)
+{
+    return !!simd_shuffle_up((ushort)value, delta);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)
+{
+    return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleDown(T value, ushort delta)
+{
+    return simd_shuffle_down(value, delta);
+}
+
+template<>
+inline bool spvSubgroupShuffleDown(bool value, ushort delta)
+{
+    return !!simd_shuffle_down((ushort)value, delta);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)
+{
+    return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);
+}
+
+template<typename T>
+inline T spvQuadBroadcast(T value, uint lane)
+{
+    return quad_broadcast(value, lane);
+}
+
+template<>
+inline bool spvQuadBroadcast(bool value, uint lane)
+{
+    return !!quad_broadcast((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)
+{
+    return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvQuadSwap(T value, uint dir)
+{
+    return quad_shuffle_xor(value, dir + 1);
+}
+
+template<>
+inline bool spvQuadSwap(bool value, uint dir)
+{
+    return !!quad_shuffle_xor((ushort)value, dir + 1);
+}
+
+template<uint N>
+inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)
+{
+    return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);
+}
+
+struct SSBO
+{
+    float FragColor;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
+{
+    uint4 gl_SubgroupEqMask = uint4(1 << gl_SubgroupInvocationID, uint3(0));
+    uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID, gl_SubgroupSize - gl_SubgroupInvocationID), uint3(0));
+    uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, gl_SubgroupInvocationID + 1, gl_SubgroupSize - gl_SubgroupInvocationID - 1), uint3(0));
+    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));
+    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint3(0));
+    _9.FragColor = float(gl_NumSubgroups);
+    _9.FragColor = float(gl_SubgroupID);
+    _9.FragColor = float(gl_SubgroupSize);
+    _9.FragColor = float(gl_SubgroupInvocationID);
+    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    simdgroup_barrier(mem_flags::mem_device);
+    simdgroup_barrier(mem_flags::mem_threadgroup);
+    simdgroup_barrier(mem_flags::mem_texture);
+    bool _39 = simd_is_first();
+    bool elected = _39;
+    _9.FragColor = float4(gl_SubgroupEqMask).x;
+    _9.FragColor = float4(gl_SubgroupGeMask).x;
+    _9.FragColor = float4(gl_SubgroupGtMask).x;
+    _9.FragColor = float4(gl_SubgroupLeMask).x;
+    _9.FragColor = float4(gl_SubgroupLtMask).x;
+    float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u);
+    bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u);
+    float3 first = spvSubgroupBroadcastFirst(float3(20.0));
+    bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false));
+    uint4 ballot_value = spvSubgroupBallot(true);
+    bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
+    bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
+    uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize);
+    uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize);
+    uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize);
+    uint shuffled = spvSubgroupShuffle(10u, 8u);
+    bool shuffled_bool = spvSubgroupShuffle(true, 9u);
+    uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u);
+    bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u);
+    uint shuffled_up = spvSubgroupShuffleUp(20u, 4u);
+    bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u);
+    uint shuffled_down = spvSubgroupShuffleDown(20u, 4u);
+    bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u);
+    bool has_all = simd_all(true);
+    bool has_any = simd_any(true);
+    bool has_equal = spvSubgroupAllEqual(0);
+    has_equal = spvSubgroupAllEqual(true);
+    has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0));
+    has_equal = spvSubgroupAllEqual(bool4(true, true, false, true));
+    float4 added = simd_sum(float4(20.0));
+    int4 iadded = simd_sum(int4(20));
+    float4 multiplied = simd_product(float4(20.0));
+    int4 imultiplied = simd_product(int4(20));
+    float4 lo = simd_min(float4(20.0));
+    float4 hi = simd_max(float4(20.0));
+    int4 slo = simd_min(int4(20));
+    int4 shi = simd_max(int4(20));
+    uint4 ulo = simd_min(uint4(20u));
+    uint4 uhi = simd_max(uint4(20u));
+    uint4 anded = simd_and(ballot_value);
+    uint4 ored = simd_or(ballot_value);
+    uint4 xored = simd_xor(ballot_value);
+    added = simd_prefix_inclusive_sum(added);
+    iadded = simd_prefix_inclusive_sum(iadded);
+    multiplied = simd_prefix_inclusive_product(multiplied);
+    imultiplied = simd_prefix_inclusive_product(imultiplied);
+    added = simd_prefix_exclusive_sum(multiplied);
+    multiplied = simd_prefix_exclusive_product(multiplied);
+    iadded = simd_prefix_exclusive_sum(imultiplied);
+    imultiplied = simd_prefix_exclusive_product(imultiplied);
+    added = quad_sum(added);
+    multiplied = quad_product(multiplied);
+    iadded = quad_sum(iadded);
+    imultiplied = quad_product(imultiplied);
+    lo = quad_min(lo);
+    hi = quad_max(hi);
+    ulo = quad_min(ulo);
+    uhi = quad_max(uhi);
+    slo = quad_min(slo);
+    shi = quad_max(shi);
+    anded = quad_and(anded);
+    ored = quad_or(ored);
+    xored = quad_xor(xored);
+    float4 swap_horiz = spvQuadSwap(float4(20.0), 0u);
+    bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u);
+    float4 swap_vertical = spvQuadSwap(float4(20.0), 1u);
+    bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u);
+    float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u);
+    bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u);
+    float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u);
+    bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u);
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/trivial-select-cast-vector.comp b/reference/shaders-msl-no-opt/comp/trivial-select-cast-vector.comp
new file mode 100644
index 00000000000..328b42ce9fe
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/trivial-select-cast-vector.comp
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct A
+{
+    float3 a;
+    float3 b;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device A& _14 [[buffer(0)]])
+{
+    bool3 c = _14.b < float3(1.0);
+    _14.a = select(float3(1.0, 0.0, 0.0), float3(0.0, 0.0, 1.0), c);
+}
+
diff --git a/reference/shaders-msl-no-opt/comp/trivial-select-matrix.spv14.comp b/reference/shaders-msl-no-opt/comp/trivial-select-matrix.spv14.comp
new file mode 100644
index 00000000000..2e37a326532
--- /dev/null
+++ b/reference/shaders-msl-no-opt/comp/trivial-select-matrix.spv14.comp
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct A
+{
+    float3x3 a;
+    float b;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device A& _14 [[buffer(0)]])
+{
+    bool c = _14.b < 1.0;
+    _14.a = c ? float3x3(float3(1.0), float3(1.0), float3(1.0)) : float3x3(float3(0.0), float3(0.0), float3(0.0));
+    _14.a = c ? float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0)) : float3x3(float3(0.0), float3(0.0), float3(0.0));
+}
+
diff --git a/reference/shaders-msl-no-opt/components/fragment-input-component.frag b/reference/shaders-msl-no-opt/components/fragment-input-component.frag
new file mode 100644
index 00000000000..9a65918a7d9
--- /dev/null
+++ b/reference/shaders-msl-no-opt/components/fragment-input-component.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 Foo3 [[user(locn0)]];
+    float Foo1 [[user(locn0_3)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.FragColor = float4(in.Foo3, in.Foo1);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/components/fragment-output-component.frag b/reference/shaders-msl-no-opt/components/fragment-output-component.frag
new file mode 100644
index 00000000000..45b05b9dba0
--- /dev/null
+++ b/reference/shaders-msl-no-opt/components/fragment-output-component.frag
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 m_location_0 [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    float FragColor0 = {};
+    float2 FragColor1 = {};
+    float FragColor3 = {};
+    FragColor0 = 1.0;
+    FragColor1 = float2(2.0, 3.0);
+    FragColor3 = 4.0;
+    out.m_location_0.x = FragColor0;
+    out.m_location_0.yz = FragColor1;
+    out.m_location_0.w = FragColor3;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/components/fragment-output-component.pad-fragment.frag b/reference/shaders-msl-no-opt/components/fragment-output-component.pad-fragment.frag
new file mode 100644
index 00000000000..0e4bee12f6e
--- /dev/null
+++ b/reference/shaders-msl-no-opt/components/fragment-output-component.pad-fragment.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float3 m_location_0 [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    float FragColor0 = {};
+    float2 FragColor1 = {};
+    FragColor0 = 1.0;
+    FragColor1 = float2(2.0, 3.0);
+    out.m_location_0.x = FragColor0;
+    out.m_location_0.yz = FragColor1;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/components/vertex-input-component.vert b/reference/shaders-msl-no-opt/components/vertex-input-component.vert
new file mode 100644
index 00000000000..7a099f503b1
--- /dev/null
+++ b/reference/shaders-msl-no-opt/components/vertex-input-component.vert
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float3 Foo [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 m_location_0 [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float3 Foo3 = {};
+    float Foo1 = {};
+    Foo3 = in.m_location_0.xyz;
+    Foo1 = in.m_location_0.w;
+    out.gl_Position = float4(Foo3, Foo1);
+    out.Foo = Foo3 + float3(Foo1);
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/viewport-index.msl2.invalid.vert b/reference/shaders-msl-no-opt/components/vertex-output-component.vert
similarity index 57%
rename from reference/shaders-msl/vert/viewport-index.msl2.invalid.vert
rename to reference/shaders-msl-no-opt/components/vertex-output-component.vert
index e5316c072ac..cf135b51288 100644
--- a/reference/shaders-msl/vert/viewport-index.msl2.invalid.vert
+++ b/reference/shaders-msl-no-opt/components/vertex-output-component.vert
@@ -5,20 +5,22 @@ using namespace metal;
 
 struct main0_out
 {
+    float3 Foo3 [[user(locn0)]];
+    float Foo1 [[user(locn0_3)]];
     float4 gl_Position [[position]];
-    uint gl_ViewportIndex [[viewport_array_index]];
 };
 
 struct main0_in
 {
-    float4 coord [[attribute(0)]];
+    float4 vFoo [[attribute(0)]];
 };
 
 vertex main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    out.gl_Position = in.coord;
-    out.gl_ViewportIndex = uint(int(in.coord.z));
+    out.gl_Position = in.vFoo;
+    out.Foo3 = in.vFoo.xyz;
+    out.Foo1 = in.vFoo.w;
     return out;
 }
 
diff --git a/reference/opt/shaders-msl/frag/16bit-constants.frag b/reference/shaders-msl-no-opt/frag/16bit-constants.invalid.frag
similarity index 84%
rename from reference/opt/shaders-msl/frag/16bit-constants.frag
rename to reference/shaders-msl-no-opt/frag/16bit-constants.invalid.frag
index 56c7ea5df4a..542beb31898 100644
--- a/reference/opt/shaders-msl/frag/16bit-constants.frag
+++ b/reference/shaders-msl-no-opt/frag/16bit-constants.invalid.frag
@@ -14,8 +14,8 @@ fragment main0_out main0()
 {
     main0_out out = {};
     out.foo = half(1.0);
-    out.bar = 2;
-    out.baz = 3u;
+    out.bar = short(2);
+    out.baz = ushort(3);
     return out;
 }
 
diff --git a/reference/shaders-msl-no-opt/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag b/reference/shaders-msl-no-opt/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag
new file mode 100644
index 00000000000..0e0348bf851
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag
@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+fragment void main0()
+{
+    bool _9 = simd_is_helper_thread();
+    bool helper = _9;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/depth-image-gather.asm.frag b/reference/shaders-msl-no-opt/frag/depth-image-gather.asm.frag
new file mode 100644
index 00000000000..025e2258561
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/depth-image-gather.asm.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 in_var_TEXCOORD0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> g_depthTexture [[texture(0)]], sampler g_sampler [[sampler(0)]], sampler g_comp [[sampler(1)]])
+{
+    main0_out out = {};
+    out.out_var_SV_Target0 = g_depthTexture.gather_compare(g_comp, in.in_var_TEXCOORD0, 0.5) * g_depthTexture.gather(g_sampler, in.in_var_TEXCOORD0, int2(0));
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/force-active-resources.msl2.argument..force-active.discrete.frag b/reference/shaders-msl-no-opt/frag/force-active-resources.msl2.argument..force-active.discrete.frag
new file mode 100644
index 00000000000..5f8dc7203e5
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/force-active-resources.msl2.argument..force-active.discrete.frag
@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct spvDescriptorSetBuffer0
+{
+    texture2d<float> uTexture2 [[id(0)]];
+    sampler uTexture2Smplr [[id(1)]];
+    texture2d<float> uTexture1 [[id(2)]];
+    sampler uTexture1Smplr [[id(3)]];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], texture2d<float> uTextureDiscrete2 [[texture(0)]], sampler uTextureDiscrete2Smplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = spvDescriptorSet0.uTexture2.sample(spvDescriptorSet0.uTexture2Smplr, in.vUV);
+    out.FragColor += uTextureDiscrete2.sample(uTextureDiscrete2Smplr, in.vUV);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/fp16.desktop.invalid.frag b/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag
similarity index 88%
rename from reference/shaders-msl/frag/fp16.desktop.invalid.frag
rename to reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag
index 001944fcba7..16182ae2e14 100644
--- a/reference/shaders-msl/frag/fp16.desktop.invalid.frag
+++ b/reference/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag
@@ -5,51 +5,54 @@
 
 using namespace metal;
 
-struct ResType
-{
-    half4 _m0;
-    int4 _m1;
-};
-
-struct main0_in
-{
-    half v1 [[user(locn0)]];
-    half2 v2 [[user(locn1)]];
-    half3 v3 [[user(locn2)]];
-    half4 v4 [[user(locn3)]];
-};
-
 // Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
 template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
+inline Tx mod(Tx x, Ty y)
 {
     return x - y * floor(x / y);
 }
 
 // Implementation of the GLSL radians() function
 template<typename T>
-T radians(T d)
+inline T radians(T d)
 {
     return d * T(0.01745329251);
 }
 
 // Implementation of the GLSL degrees() function
 template<typename T>
-T degrees(T r)
+inline T degrees(T r)
 {
     return r * T(57.2957795131);
 }
 
+struct ResType
+{
+    half4 _m0;
+    int4 _m1;
+};
+
+struct main0_in
+{
+    half v1 [[user(locn0)]];
+    half2 v2 [[user(locn1)]];
+    half3 v3 [[user(locn2)]];
+    half4 v4 [[user(locn3)]];
+};
+
+static inline __attribute__((always_inline))
 half2x2 test_mat2(thread const half2& a, thread const half2& b, thread const half2& c, thread const half2& d)
 {
     return half2x2(half2(a), half2(b)) * half2x2(half2(c), half2(d));
 }
 
+static inline __attribute__((always_inline))
 half3x3 test_mat3(thread const half3& a, thread const half3& b, thread const half3& c, thread const half3& d, thread const half3& e, thread const half3& f)
 {
     return half3x3(half3(a), half3(b), half3(c)) * half3x3(half3(d), half3(e), half3(f));
 }
 
+static inline __attribute__((always_inline))
 void test_constants()
 {
     half a = half(1.0);
@@ -62,11 +65,13 @@ void test_constants()
     half h = half(9.5367431640625e-07);
 }
 
+static inline __attribute__((always_inline))
 half test_result()
 {
     return half(1.0);
 }
 
+static inline __attribute__((always_inline))
 void test_conversions()
 {
     half one = test_result();
@@ -80,6 +85,7 @@ void test_conversions()
     half d2 = half(d);
 }
 
+static inline __attribute__((always_inline))
 void test_builtins(thread half4& v4, thread half3& v3, thread half& v1)
 {
     half4 res = radians(v4);
@@ -88,11 +94,11 @@ void test_builtins(thread half4& v4, thread half3& v3, thread half& v1)
     res = cos(v4);
     res = tan(v4);
     res = asin(v4);
-    res = atan2(v4, v3.xyzz);
+    res = precise::atan2(v4, v3.xyzz);
     res = atan(v4);
-    res = sinh(v4);
-    res = cosh(v4);
-    res = tanh(v4);
+    res = fast::sinh(v4);
+    res = fast::cosh(v4);
+    res = precise::tanh(v4);
     res = asinh(v4);
     res = acosh(v4);
     res = atanh(v4);
@@ -119,8 +125,7 @@ void test_builtins(thread half4& v4, thread half3& v3, thread half& v1)
     res = max(v4, v4);
     res = clamp(v4, v4, v4);
     res = mix(v4, v4, v4);
-    bool4 _243 = v4 < v4;
-    res = half4(_243.x ? v4.x : v4.x, _243.y ? v4.y : v4.y, _243.z ? v4.z : v4.z, _243.w ? v4.w : v4.w);
+    res = select(v4, v4, v4 < v4);
     res = step(v4, v4);
     res = smoothstep(v4, v4, v4);
     bool4 btmp = isnan(v4);
@@ -138,7 +143,7 @@ void test_builtins(thread half4& v4, thread half3& v3, thread half& v1)
     t0 = distance(v4, v4);
     t0 = dot(v4, v4);
     half3 res3 = cross(v3, v3);
-    res = normalize(v4);
+    res = fast::normalize(v4);
     res = faceforward(v4, v4, v4);
     res = reflect(v4, v4);
     res = refract(v4, v4, v1);
diff --git a/reference/shaders-msl-no-opt/frag/image-gather.frag b/reference/shaders-msl-no-opt/frag/image-gather.frag
new file mode 100644
index 00000000000..db793c14eea
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/image-gather.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uSamp [[texture(0)]], depth2d<float> uSampShadow [[texture(1)]], sampler uSampSmplr [[sampler(0)]], sampler uSampShadowSmplr [[sampler(1)]])
+{
+    main0_out out = {};
+    out.FragColor = uSamp.gather(uSampSmplr, in.vUV.xy, int2(0), component::x);
+    out.FragColor += uSamp.gather(uSampSmplr, in.vUV.xy, int2(0), component::y);
+    out.FragColor += uSampShadow.gather_compare(uSampShadowSmplr, in.vUV.xy, in.vUV.z);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/in_block_assign.frag b/reference/shaders-msl-no-opt/frag/in_block_assign.frag
index 427c689c49c..6b7afc4d2c5 100644
--- a/reference/shaders-msl-no-opt/frag/in_block_assign.frag
+++ b/reference/shaders-msl-no-opt/frag/in_block_assign.frag
@@ -15,14 +15,14 @@ struct main0_out
 
 struct main0_in
 {
-    float4 VOUT_a [[user(locn0)]];
+    float4 Clip_a [[user(locn0)]];
 };
 
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
     VOUT Clip = {};
-    Clip.a = in.VOUT_a;
+    Clip.a = in.Clip_a;
     VOUT tmp = Clip;
     tmp.a += float4(1.0);
     out.FragColor = tmp.a;
diff --git a/reference/shaders-msl/asm/frag/min-max-clamp.invalid.asm.frag b/reference/shaders-msl-no-opt/frag/min-max-clamp.invalid.asm.frag
similarity index 100%
rename from reference/shaders-msl/asm/frag/min-max-clamp.invalid.asm.frag
rename to reference/shaders-msl-no-opt/frag/min-max-clamp.invalid.asm.frag
diff --git a/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag b/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
new file mode 100644
index 00000000000..7835e013076
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
@@ -0,0 +1,69 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_in
+{
+    float v1 [[user(locn0)]];
+    float2 v2 [[user(locn1)]];
+    float3 v3 [[user(locn2)]];
+    float4 v4 [[user(locn3)]];
+    half h1 [[user(locn4)]];
+    half2 h2 [[user(locn5)]];
+    half3 h3 [[user(locn6)]];
+    half4 h4 [[user(locn7)]];
+};
+
+fragment void main0(main0_in in [[stage_in]])
+{
+    float res = fast::min(in.v1, in.v1);
+    res = fast::max(in.v1, in.v1);
+    res = fast::clamp(in.v1, in.v1, in.v1);
+    res = fast::min(in.v1, in.v1);
+    res = fast::max(in.v1, in.v1);
+    res = fast::clamp(in.v1, in.v1, in.v1);
+    float2 res2 = fast::min(in.v2, in.v2);
+    res2 = fast::max(in.v2, in.v2);
+    res2 = fast::clamp(in.v2, in.v2, in.v2);
+    res2 = fast::min(in.v2, in.v2);
+    res2 = fast::max(in.v2, in.v2);
+    res2 = fast::clamp(in.v2, in.v2, in.v2);
+    float3 res3 = fast::min(in.v3, in.v3);
+    res3 = fast::max(in.v3, in.v3);
+    res3 = fast::clamp(in.v3, in.v3, in.v3);
+    res3 = fast::min(in.v3, in.v3);
+    res3 = fast::max(in.v3, in.v3);
+    res3 = fast::clamp(in.v3, in.v3, in.v3);
+    float4 res4 = fast::min(in.v4, in.v4);
+    res4 = fast::max(in.v4, in.v4);
+    res4 = fast::clamp(in.v4, in.v4, in.v4);
+    res4 = fast::min(in.v4, in.v4);
+    res4 = fast::max(in.v4, in.v4);
+    res4 = fast::clamp(in.v4, in.v4, in.v4);
+    half hres = min(in.h1, in.h1);
+    hres = max(in.h1, in.h1);
+    hres = clamp(in.h1, in.h1, in.h1);
+    hres = min(in.h1, in.h1);
+    hres = max(in.h1, in.h1);
+    hres = clamp(in.h1, in.h1, in.h1);
+    half2 hres2 = min(in.h2, in.h2);
+    hres2 = max(in.h2, in.h2);
+    hres2 = clamp(in.h2, in.h2, in.h2);
+    hres2 = min(in.h2, in.h2);
+    hres2 = max(in.h2, in.h2);
+    hres2 = clamp(in.h2, in.h2, in.h2);
+    half3 hres3 = min(in.h3, in.h3);
+    hres3 = max(in.h3, in.h3);
+    hres3 = clamp(in.h3, in.h3, in.h3);
+    hres3 = min(in.h3, in.h3);
+    hres3 = max(in.h3, in.h3);
+    hres3 = clamp(in.h3, in.h3, in.h3);
+    half4 hres4 = min(in.h4, in.h4);
+    hres4 = max(in.h4, in.h4);
+    hres4 = clamp(in.h4, in.h4, in.h4);
+    hres4 = min(in.h4, in.h4);
+    hres4 = max(in.h4, in.h4);
+    hres4 = clamp(in.h4, in.h4, in.h4);
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/nonuniform-constructor.msl2.frag b/reference/shaders-msl-no-opt/frag/nonuniform-constructor.msl2.frag
new file mode 100644
index 00000000000..f1ad5c5fb25
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/nonuniform-constructor.msl2.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vUV [[user(locn0)]];
+    int vIndex [[user(locn1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], array<texture2d<float>, 10> uTex [[texture(0)]], sampler Immut [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = uTex[in.vIndex].sample(Immut, in.vUV);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag b/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag
new file mode 100644
index 00000000000..cb01950d221
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag
@@ -0,0 +1,37 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO1
+{
+    uint values1[1];
+};
+
+struct SSBO0
+{
+    uint values0[1];
+};
+
+static inline __attribute__((always_inline))
+void callee2(device SSBO1& v_14, thread float4& gl_FragCoord)
+{
+    int _25 = int(gl_FragCoord.x);
+    v_14.values1[_25]++;
+}
+
+static inline __attribute__((always_inline))
+void callee(device SSBO1& v_14, thread float4& gl_FragCoord, device SSBO0& v_35)
+{
+    int _38 = int(gl_FragCoord.x);
+    v_35.values0[_38]++;
+    callee2(v_14, gl_FragCoord);
+}
+
+fragment void main0(device SSBO1& v_14 [[buffer(0), raster_order_group(0)]], device SSBO0& v_35 [[buffer(1), raster_order_group(0)]], float4 gl_FragCoord [[position]])
+{
+    callee(v_14, gl_FragCoord, v_35);
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/pull-interpolant-access-chain.msl23.frag b/reference/shaders-msl-no-opt/frag/pull-interpolant-access-chain.msl23.frag
new file mode 100644
index 00000000000..b5ffd11f40f
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/pull-interpolant-access-chain.msl23.frag
@@ -0,0 +1,75 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    interpolant<float4, interpolation::perspective> a_0 [[user(locn0)]];
+    interpolant<float4, interpolation::perspective> a_1 [[user(locn1)]];
+    interpolant<float4, interpolation::perspective> b_0 [[user(locn2)]];
+    interpolant<float4, interpolation::perspective> b_1 [[user(locn3)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 2> a = {};
+    spvUnsafeArray<float4, 2> b = {};
+    a[0] = in.a_0.interpolate_at_centroid();
+    a[1] = in.a_1.interpolate_at_centroid();
+    b[0] = in.b_0.interpolate_at_centroid();
+    b[1] = in.b_1.interpolate_at_centroid();
+    out.FragColor.x = in.a_0.interpolate_at_offset(float2(0.5) + 0.4375).x;
+    out.FragColor.y = in.a_1.interpolate_at_offset(float2(0.5) + 0.4375).y;
+    out.FragColor.z = in.b_0.interpolate_at_offset(float2(0.5) + 0.4375).z;
+    out.FragColor.w = in.b_1.interpolate_at_offset(float2(0.5) + 0.4375).w;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag b/reference/shaders-msl-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
similarity index 100%
rename from reference/opt/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
rename to reference/shaders-msl-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
diff --git a/reference/shaders-msl/frag/shadow-compare-global-alias.invalid.frag b/reference/shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag
similarity index 69%
rename from reference/shaders-msl/frag/shadow-compare-global-alias.invalid.frag
rename to reference/shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag
index 4bd5d32091f..58985c63541 100644
--- a/reference/shaders-msl/frag/shadow-compare-global-alias.invalid.frag
+++ b/reference/shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag
@@ -15,22 +15,26 @@ struct main0_in
     float3 vUV [[user(locn0)]];
 };
 
-float Samp(thread const float3& uv, thread depth2d<float> uTex, thread sampler uSamp)
+static inline __attribute__((always_inline))
+float Samp(thread const float3& uv, depth2d<float> uTex, sampler uSamp)
 {
     return uTex.sample_compare(uSamp, uv.xy, uv.z);
 }
 
-float Samp2(thread const float3& uv, thread depth2d<float> uSampler, thread const sampler uSamplerSmplr, thread float3& vUV)
+static inline __attribute__((always_inline))
+float Samp2(thread const float3& uv, depth2d<float> uSampler, sampler uSamplerSmplr, thread float3& vUV)
 {
     return uSampler.sample_compare(uSamplerSmplr, vUV.xy, vUV.z);
 }
 
-float Samp3(thread const depth2d<float> uT, thread const sampler uS, thread const float3& uv, thread float3& vUV)
+static inline __attribute__((always_inline))
+float Samp3(depth2d<float> uT, sampler uS, thread const float3& uv, thread float3& vUV)
 {
     return uT.sample_compare(uS, vUV.xy, vUV.z);
 }
 
-float Samp4(thread const depth2d<float> uS, thread const sampler uSSmplr, thread const float3& uv, thread float3& vUV)
+static inline __attribute__((always_inline))
+float Samp4(depth2d<float> uS, sampler uSSmplr, thread const float3& uv, thread float3& vUV)
 {
     return uS.sample_compare(uSSmplr, vUV.xy, vUV.z);
 }
diff --git a/reference/shaders-msl-no-opt/frag/subgroups.nocompat.invalid.vk.msl22.frag b/reference/shaders-msl-no-opt/frag/subgroups.nocompat.invalid.vk.msl22.frag
new file mode 100644
index 00000000000..7680908a448
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/subgroups.nocompat.invalid.vk.msl22.frag
@@ -0,0 +1,314 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T>
+inline T spvSubgroupBroadcast(T value, ushort lane)
+{
+    return simd_broadcast(value, lane);
+}
+
+template<>
+inline bool spvSubgroupBroadcast(bool value, ushort lane)
+{
+    return !!simd_broadcast((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)
+{
+    return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvSubgroupBroadcastFirst(T value)
+{
+    return simd_broadcast_first(value);
+}
+
+template<>
+inline bool spvSubgroupBroadcastFirst(bool value)
+{
+    return !!simd_broadcast_first((ushort)value);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)
+{
+    return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);
+}
+
+inline uint4 spvSubgroupBallot(bool value)
+{
+    simd_vote vote = simd_ballot(value);
+    // simd_ballot() returns a 64-bit integer-like object, but
+    // SPIR-V callers expect a uint4. We must convert.
+    // FIXME: This won't include higher bits if Apple ever supports
+    // 128 lanes in an SIMD-group.
+    return uint4(as_type<uint2>((simd_vote::vote_t)vote), 0, 0);
+}
+
+inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
+{
+    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
+}
+
+inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    ballot &= mask;
+    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
+}
+
+inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    ballot &= mask;
+    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
+}
+
+inline uint spvPopCount4(uint4 ballot)
+{
+    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
+}
+
+inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+template<typename T>
+inline bool spvSubgroupAllEqual(T value)
+{
+    return simd_all(all(value == simd_broadcast_first(value)));
+}
+
+template<>
+inline bool spvSubgroupAllEqual(bool value)
+{
+    return simd_all(value) || !simd_any(value);
+}
+
+template<uint N>
+inline bool spvSubgroupAllEqual(vec<bool, N> value)
+{
+    return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));
+}
+
+template<typename T>
+inline T spvSubgroupShuffle(T value, ushort lane)
+{
+    return simd_shuffle(value, lane);
+}
+
+template<>
+inline bool spvSubgroupShuffle(bool value, ushort lane)
+{
+    return !!simd_shuffle((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)
+{
+    return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleXor(T value, ushort mask)
+{
+    return simd_shuffle_xor(value, mask);
+}
+
+template<>
+inline bool spvSubgroupShuffleXor(bool value, ushort mask)
+{
+    return !!simd_shuffle_xor((ushort)value, mask);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)
+{
+    return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleUp(T value, ushort delta)
+{
+    return simd_shuffle_up(value, delta);
+}
+
+template<>
+inline bool spvSubgroupShuffleUp(bool value, ushort delta)
+{
+    return !!simd_shuffle_up((ushort)value, delta);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)
+{
+    return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);
+}
+
+template<typename T>
+inline T spvSubgroupShuffleDown(T value, ushort delta)
+{
+    return simd_shuffle_down(value, delta);
+}
+
+template<>
+inline bool spvSubgroupShuffleDown(bool value, ushort delta)
+{
+    return !!simd_shuffle_down((ushort)value, delta);
+}
+
+template<uint N>
+inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)
+{
+    return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);
+}
+
+template<typename T>
+inline T spvQuadBroadcast(T value, uint lane)
+{
+    return quad_broadcast(value, lane);
+}
+
+template<>
+inline bool spvQuadBroadcast(bool value, uint lane)
+{
+    return !!quad_broadcast((ushort)value, lane);
+}
+
+template<uint N>
+inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)
+{
+    return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);
+}
+
+template<typename T>
+inline T spvQuadSwap(T value, uint dir)
+{
+    return quad_shuffle_xor(value, dir + 1);
+}
+
+template<>
+inline bool spvQuadSwap(bool value, uint dir)
+{
+    return !!quad_shuffle_xor((ushort)value, dir + 1);
+}
+
+template<uint N>
+inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)
+{
+    return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);
+}
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+fragment main0_out main0(uint gl_SubgroupSize [[threads_per_simdgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
+{
+    main0_out out = {};
+    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID >= 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
+    uint4 gl_SubgroupGeMask = uint4(insert_bits(0u, 0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
+    uint4 gl_SubgroupGtMask = uint4(insert_bits(0u, 0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
+    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    out.FragColor = float(gl_SubgroupSize);
+    out.FragColor = float(gl_SubgroupInvocationID);
+    bool _24 = simd_is_first();
+    bool elected = _24;
+    out.FragColor = float4(gl_SubgroupEqMask).x;
+    out.FragColor = float4(gl_SubgroupGeMask).x;
+    out.FragColor = float4(gl_SubgroupGtMask).x;
+    out.FragColor = float4(gl_SubgroupLeMask).x;
+    out.FragColor = float4(gl_SubgroupLtMask).x;
+    float4 broadcasted = spvSubgroupBroadcast(float4(10.0), 8u);
+    bool2 broadcasted_bool = spvSubgroupBroadcast(bool2(true), 8u);
+    float3 first = spvSubgroupBroadcastFirst(float3(20.0));
+    bool4 first_bool = spvSubgroupBroadcastFirst(bool4(false));
+    uint4 ballot_value = spvSubgroupBallot(true);
+    bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
+    bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
+    uint bit_count = spvSubgroupBallotBitCount(ballot_value, gl_SubgroupSize);
+    uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint lsb = spvSubgroupBallotFindLSB(ballot_value, gl_SubgroupSize);
+    uint msb = spvSubgroupBallotFindMSB(ballot_value, gl_SubgroupSize);
+    uint shuffled = spvSubgroupShuffle(10u, 8u);
+    bool shuffled_bool = spvSubgroupShuffle(true, 9u);
+    uint shuffled_xor = spvSubgroupShuffleXor(30u, 8u);
+    bool shuffled_xor_bool = spvSubgroupShuffleXor(false, 9u);
+    uint shuffled_up = spvSubgroupShuffleUp(20u, 4u);
+    bool shuffled_up_bool = spvSubgroupShuffleUp(true, 4u);
+    uint shuffled_down = spvSubgroupShuffleDown(20u, 4u);
+    bool shuffled_down_bool = spvSubgroupShuffleDown(false, 4u);
+    bool has_all = simd_all(true);
+    bool has_any = simd_any(true);
+    bool has_equal = spvSubgroupAllEqual(0);
+    has_equal = spvSubgroupAllEqual(true);
+    has_equal = spvSubgroupAllEqual(float3(0.0, 1.0, 2.0));
+    has_equal = spvSubgroupAllEqual(bool4(true, true, false, true));
+    float4 added = simd_sum(float4(20.0));
+    int4 iadded = simd_sum(int4(20));
+    float4 multiplied = simd_product(float4(20.0));
+    int4 imultiplied = simd_product(int4(20));
+    float4 lo = simd_min(float4(20.0));
+    float4 hi = simd_max(float4(20.0));
+    int4 slo = simd_min(int4(20));
+    int4 shi = simd_max(int4(20));
+    uint4 ulo = simd_min(uint4(20u));
+    uint4 uhi = simd_max(uint4(20u));
+    uint4 anded = simd_and(ballot_value);
+    uint4 ored = simd_or(ballot_value);
+    uint4 xored = simd_xor(ballot_value);
+    added = simd_prefix_inclusive_sum(added);
+    iadded = simd_prefix_inclusive_sum(iadded);
+    multiplied = simd_prefix_inclusive_product(multiplied);
+    imultiplied = simd_prefix_inclusive_product(imultiplied);
+    added = simd_prefix_exclusive_sum(multiplied);
+    multiplied = simd_prefix_exclusive_product(multiplied);
+    iadded = simd_prefix_exclusive_sum(imultiplied);
+    imultiplied = simd_prefix_exclusive_product(imultiplied);
+    added = quad_sum(added);
+    multiplied = quad_product(multiplied);
+    iadded = quad_sum(iadded);
+    imultiplied = quad_product(imultiplied);
+    lo = quad_min(lo);
+    hi = quad_max(hi);
+    ulo = quad_min(ulo);
+    uhi = quad_max(uhi);
+    slo = quad_min(slo);
+    shi = quad_max(shi);
+    anded = quad_and(anded);
+    ored = quad_or(ored);
+    xored = quad_xor(xored);
+    float4 swap_horiz = spvQuadSwap(float4(20.0), 0u);
+    bool4 swap_horiz_bool = spvQuadSwap(bool4(true), 0u);
+    float4 swap_vertical = spvQuadSwap(float4(20.0), 1u);
+    bool4 swap_vertical_bool = spvQuadSwap(bool4(true), 1u);
+    float4 swap_diagonal = spvQuadSwap(float4(20.0), 2u);
+    bool4 swap_diagonal_bool = spvQuadSwap(bool4(true), 2u);
+    float4 quad_broadcast0 = spvQuadBroadcast(float4(20.0), 3u);
+    bool4 quad_broadcast_bool = spvQuadBroadcast(bool4(true), 3u);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl20.ios.framebuffer-fetch.frag b/reference/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl20.ios.framebuffer-fetch.frag
new file mode 100644
index 00000000000..c67984892dd
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl20.ios.framebuffer-fetch.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(1)]];
+};
+
+fragment main0_out main0(float4 uInput [[color(1)]])
+{
+    main0_out out = {};
+    out.FragColor = uInput;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl23.framebuffer-fetch.frag b/reference/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl23.framebuffer-fetch.frag
new file mode 100644
index 00000000000..c67984892dd
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl23.framebuffer-fetch.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(1)]];
+};
+
+fragment main0_out main0(float4 uInput [[color(1)]])
+{
+    main0_out out = {};
+    out.FragColor = uInput;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.ios.frag b/reference/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.ios.frag
new file mode 100644
index 00000000000..950895d088e
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.ios.frag
@@ -0,0 +1,37 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 samp3(float4 uS)
+{
+    return uS;
+}
+
+static inline __attribute__((always_inline))
+float4 samp(float4 uSub)
+{
+    return uSub + samp3(uSub);
+}
+
+static inline __attribute__((always_inline))
+float4 samp2(float4 uS)
+{
+    return uS + samp3(uS);
+}
+
+fragment main0_out main0(float4 uSub [[color(0)]])
+{
+    main0_out out = {};
+    out.FragColor = samp(uSub) + samp2(uSub);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.msl23.frag b/reference/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.msl23.frag
new file mode 100644
index 00000000000..950895d088e
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.msl23.frag
@@ -0,0 +1,37 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 samp3(float4 uS)
+{
+    return uS;
+}
+
+static inline __attribute__((always_inline))
+float4 samp(float4 uSub)
+{
+    return uSub + samp3(uSub);
+}
+
+static inline __attribute__((always_inline))
+float4 samp2(float4 uS)
+{
+    return uS + samp3(uS);
+}
+
+fragment main0_out main0(float4 uSub [[color(0)]])
+{
+    main0_out out = {};
+    out.FragColor = samp(uSub) + samp2(uSub);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.argument.frag b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.argument.frag
new file mode 100644
index 00000000000..8c7f67b68b0
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.argument.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct spvDescriptorSetBuffer0
+{
+    sampler uSampler [[id(8)]];
+    texture2d<float> uTex [[id(9)]];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], float4 uSub [[color(1)]])
+{
+    main0_out out = {};
+    out.FragColor = uSub + spvDescriptorSet0.uTex.sample(spvDescriptorSet0.uSampler, float2(0.5));
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.frag b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.frag
new file mode 100644
index 00000000000..9108927ee41
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(float4 uSub [[color(1)]], texture2d<float> uTex [[texture(9)]], sampler uSampler [[sampler(8)]])
+{
+    main0_out out = {};
+    out.FragColor = uSub + uTex.sample(uSampler, float2(0.5));
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.argument.frag b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.argument.frag
new file mode 100644
index 00000000000..8c7f67b68b0
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.argument.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct spvDescriptorSetBuffer0
+{
+    sampler uSampler [[id(8)]];
+    texture2d<float> uTex [[id(9)]];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], float4 uSub [[color(1)]])
+{
+    main0_out out = {};
+    out.FragColor = uSub + spvDescriptorSet0.uTex.sample(spvDescriptorSet0.uSampler, float2(0.5));
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.frag b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.frag
new file mode 100644
index 00000000000..9108927ee41
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(float4 uSub [[color(1)]], texture2d<float> uTex [[texture(9)]], sampler uSampler [[sampler(8)]])
+{
+    main0_out out = {};
+    out.FragColor = uSub + uTex.sample(uSampler, float2(0.5));
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag
index 4cb0fdd59bc..ff4b8a91943 100644
--- a/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag
+++ b/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag
@@ -6,22 +6,12 @@
 using namespace metal;
 
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
-enum class spvSwizzle : uint
-{
-    none = 0,
-    zero,
-    one,
-    red,
-    green,
-    blue,
-    alpha
-};
-
 template<typename T> struct spvRemoveReference { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
@@ -34,6 +24,17 @@ template<typename T> inline constexpr thread T&& spvForward(thread typename spvR
     return static_cast<thread T&&>(x);
 }
 
+enum class spvSwizzle : uint
+{
+    none = 0,
+    zero,
+    one,
+    red,
+    green,
+    blue,
+    alpha
+};
+
 template<typename T>
 inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
 {
@@ -72,8 +73,8 @@ inline T spvTextureSwizzle(T x, uint s)
 }
 
 // Wrapper function that swizzles texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
+template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename... Ts>
+inline vec<T, 4> spvGatherSwizzle(const thread Tex<T>& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c)
 {
     if (sw)
     {
@@ -108,29 +109,6 @@ inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params,
     }
 }
 
-// Wrapper function that swizzles depth texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) 
-{
-    if (sw)
-    {
-        switch (spvSwizzle(sw & 0xFF))
-        {
-            case spvSwizzle::none:
-            case spvSwizzle::red:
-                break;
-            case spvSwizzle::zero:
-            case spvSwizzle::green:
-            case spvSwizzle::blue:
-            case spvSwizzle::alpha:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-        }
-    }
-    return t.gather_compare(s, spvForward<Ts>(params)...);
-}
-
 fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d<int> tex1d [[texture(0)]], texture2d<int> tex2d [[texture(1)]], texture3d<int> tex3d [[texture(2)]], texturecube<int> texCube [[texture(3)]], texture2d_array<int> tex2dArray [[texture(4)]], texturecube_array<int> texCubeArray [[texture(5)]], texture2d<int> texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]])
 {
     constant uint& tex1dSwzl = spvSwizzleConstants[0];
@@ -162,9 +140,9 @@ fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d
     c = float4(spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl));
     c = float4(spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl));
     c = float4(texBuffer.read(spvTexelBufferCoord(0)));
-    c = float4(spvGatherSwizzle<int, metal::texture2d<int>, float2, int2>(tex2dSmplr, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl));
-    c = float4(spvGatherSwizzle<int, metal::texturecube<int>, float3>(texCubeSmplr, texCube, float3(0.0), component::y, texCubeSwzl));
-    c = float4(spvGatherSwizzle<int, metal::texture2d_array<int>, float2, uint, int2>(tex2dArraySmplr, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl));
-    c = float4(spvGatherSwizzle<int, metal::texturecube_array<int>, float3, uint>(texCubeArraySmplr, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl));
+    c = float4(spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0)));
+    c = float4(spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0)));
+    c = float4(spvGatherSwizzle(tex2dArray, tex2dArraySmplr, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0)));
+    c = float4(spvGatherSwizzle(texCubeArray, texCubeArraySmplr, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w))));
 }
 
diff --git a/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag
index 581f290941d..9e5dba8c568 100644
--- a/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag
+++ b/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag
@@ -6,22 +6,12 @@
 using namespace metal;
 
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
-enum class spvSwizzle : uint
-{
-    none = 0,
-    zero,
-    one,
-    red,
-    green,
-    blue,
-    alpha
-};
-
 template<typename T> struct spvRemoveReference { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
@@ -34,6 +24,17 @@ template<typename T> inline constexpr thread T&& spvForward(thread typename spvR
     return static_cast<thread T&&>(x);
 }
 
+enum class spvSwizzle : uint
+{
+    none = 0,
+    zero,
+    one,
+    red,
+    green,
+    blue,
+    alpha
+};
+
 template<typename T>
 inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
 {
@@ -72,8 +73,8 @@ inline T spvTextureSwizzle(T x, uint s)
 }
 
 // Wrapper function that swizzles texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
+template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename... Ts>
+inline vec<T, 4> spvGatherSwizzle(const thread Tex<T>& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c)
 {
     if (sw)
     {
@@ -109,8 +110,8 @@ inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params,
 }
 
 // Wrapper function that swizzles depth texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) 
+template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename... Ts>
+inline vec<T, 4> spvGatherCompareSwizzle(const thread Tex<T>& t, sampler s, uint sw, Ts... params) 
 {
     if (sw)
     {
@@ -131,7 +132,8 @@ inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... p
     return t.gather_compare(s, spvForward<Ts>(params)...);
 }
 
-float4 doSwizzle(thread texture1d<float> tex1d, thread const sampler tex1dSmplr, constant uint& tex1dSwzl, thread texture2d<float> tex2d, thread const sampler tex2dSmplr, constant uint& tex2dSwzl, thread texture3d<float> tex3d, thread const sampler tex3dSmplr, constant uint& tex3dSwzl, thread texturecube<float> texCube, thread const sampler texCubeSmplr, constant uint& texCubeSwzl, thread texture2d_array<float> tex2dArray, thread const sampler tex2dArraySmplr, constant uint& tex2dArraySwzl, thread texturecube_array<float> texCubeArray, thread const sampler texCubeArraySmplr, constant uint& texCubeArraySwzl, thread depth2d<float> depth2d, thread const sampler depth2dSmplr, constant uint& depth2dSwzl, thread depthcube<float> depthCube, thread const sampler depthCubeSmplr, constant uint& depthCubeSwzl, thread depth2d_array<float> depth2dArray, thread const sampler depth2dArraySmplr, constant uint& depth2dArraySwzl, thread depthcube_array<float> depthCubeArray, thread const sampler depthCubeArraySmplr, constant uint& depthCubeArraySwzl, thread texture2d<float> texBuffer)
+static inline __attribute__((always_inline))
+float4 doSwizzle(texture1d<float> tex1d, sampler tex1dSmplr, constant uint& tex1dSwzl, texture2d<float> tex2d, sampler tex2dSmplr, constant uint& tex2dSwzl, texture3d<float> tex3d, sampler tex3dSmplr, constant uint& tex3dSwzl, texturecube<float> texCube, sampler texCubeSmplr, constant uint& texCubeSwzl, texture2d_array<float> tex2dArray, sampler tex2dArraySmplr, constant uint& tex2dArraySwzl, texturecube_array<float> texCubeArray, sampler texCubeArraySmplr, constant uint& texCubeArraySwzl, depth2d<float> depth2d, sampler depth2dSmplr, constant uint& depth2dSwzl, depthcube<float> depthCube, sampler depthCubeSmplr, constant uint& depthCubeSwzl, depth2d_array<float> depth2dArray, sampler depth2dArraySmplr, constant uint& depth2dArraySwzl, depthcube_array<float> depthCubeArray, sampler depthCubeArraySmplr, constant uint& depthCubeArraySwzl, texture2d<float> texBuffer)
 {
     float4 c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl);
     c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0)), tex2dSwzl);
@@ -139,42 +141,42 @@ float4 doSwizzle(thread texture1d<float> tex1d, thread const sampler tex1dSmplr,
     c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0)), texCubeSwzl);
     c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z))), tex2dArraySwzl);
     c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w))), texCubeArraySwzl);
-    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), depth2dSwzl);
-    c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), depthCubeSwzl);
-    c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySmplr, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), depth2dArraySwzl);
+    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, 1.0), depth2dSwzl);
+    c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, 1.0), depthCubeSwzl);
+    c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySmplr, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), 1.0), depth2dArraySwzl);
     c.x = spvTextureSwizzle(depthCubeArray.sample_compare(depthCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), depthCubeArraySwzl);
     c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl);
     c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), tex2dSwzl);
     c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), tex3dSwzl);
     float4 _103 = float4(0.0, 0.0, 1.0, 1.0);
-    _103.z = float4(0.0, 0.0, 1.0, 1.0).w;
-    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _103.xy / _103.z, float4(0.0, 0.0, 1.0, 1.0).z / _103.z), depth2dSwzl);
+    _103.z = 1.0;
+    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _103.xy / _103.z, 1.0 / _103.z), depth2dSwzl);
     c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl);
     c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0), level(0.0)), tex2dSwzl);
     c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0), level(0.0)), tex3dSwzl);
     c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0), level(0.0)), texCubeSwzl);
     c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), tex2dArraySwzl);
     c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), texCubeArraySwzl);
-    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), depth2dSwzl);
+    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, 1.0, level(0.0)), depth2dSwzl);
     c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl);
     c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), tex2dSwzl);
     c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), tex3dSwzl);
     float4 _131 = float4(0.0, 0.0, 1.0, 1.0);
-    _131.z = float4(0.0, 0.0, 1.0, 1.0).w;
-    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _131.xy / _131.z, float4(0.0, 0.0, 1.0, 1.0).z / _131.z, level(0.0)), depth2dSwzl);
+    _131.z = 1.0;
+    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _131.xy / _131.z, 1.0 / _131.z, level(0.0)), depth2dSwzl);
     c = spvTextureSwizzle(tex1d.read(uint(0)), tex1dSwzl);
     c = spvTextureSwizzle(tex2d.read(uint2(int2(0)), 0), tex2dSwzl);
     c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl);
     c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl);
     c = texBuffer.read(spvTexelBufferCoord(0));
-    c = spvGatherSwizzle<float, metal::texture2d<float>, float2, int2>(tex2dSmplr, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl);
-    c = spvGatherSwizzle<float, metal::texturecube<float>, float3>(texCubeSmplr, texCube, float3(0.0), component::y, texCubeSwzl);
-    c = spvGatherSwizzle<float, metal::texture2d_array<float>, float2, uint, int2>(tex2dArraySmplr, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl);
-    c = spvGatherSwizzle<float, metal::texturecube_array<float>, float3, uint>(texCubeArraySmplr, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl);
-    c = spvGatherCompareSwizzle<float, metal::depth2d<float>, float2, float>(depth2dSmplr, depth2d, float2(0.0), 1.0, depth2dSwzl);
-    c = spvGatherCompareSwizzle<float, metal::depthcube<float>, float3, float>(depthCubeSmplr, depthCube, float3(0.0), 1.0, depthCubeSwzl);
-    c = spvGatherCompareSwizzle<float, metal::depth2d_array<float>, float2, uint, float>(depth2dArraySmplr, depth2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0, depth2dArraySwzl);
-    c = spvGatherCompareSwizzle<float, metal::depthcube_array<float>, float3, uint, float>(depthCubeArraySmplr, depthCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0, depthCubeArraySwzl);
+    c = spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0));
+    c = spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0));
+    c = spvGatherSwizzle(tex2dArray, tex2dArraySmplr, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0));
+    c = spvGatherSwizzle(texCubeArray, texCubeArraySmplr, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w)));
+    c = spvGatherCompareSwizzle(depth2d, depth2dSmplr, depth2dSwzl, float2(0.0), 1.0);
+    c = spvGatherCompareSwizzle(depthCube, depthCubeSmplr, depthCubeSwzl, float3(0.0), 1.0);
+    c = spvGatherCompareSwizzle(depth2dArray, depth2dArraySmplr, depth2dArraySwzl, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0);
+    c = spvGatherCompareSwizzle(depthCubeArray, depthCubeArraySmplr, depthCubeArraySwzl, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0);
     return c;
 }
 
diff --git a/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag
index 86b712536e8..0ec278f977c 100644
--- a/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag
+++ b/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag
@@ -6,22 +6,12 @@
 using namespace metal;
 
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
-enum class spvSwizzle : uint
-{
-    none = 0,
-    zero,
-    one,
-    red,
-    green,
-    blue,
-    alpha
-};
-
 template<typename T> struct spvRemoveReference { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
@@ -34,6 +24,17 @@ template<typename T> inline constexpr thread T&& spvForward(thread typename spvR
     return static_cast<thread T&&>(x);
 }
 
+enum class spvSwizzle : uint
+{
+    none = 0,
+    zero,
+    one,
+    red,
+    green,
+    blue,
+    alpha
+};
+
 template<typename T>
 inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
 {
@@ -72,8 +73,8 @@ inline T spvTextureSwizzle(T x, uint s)
 }
 
 // Wrapper function that swizzles texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
+template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename... Ts>
+inline vec<T, 4> spvGatherSwizzle(const thread Tex<T>& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c)
 {
     if (sw)
     {
@@ -108,29 +109,6 @@ inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params,
     }
 }
 
-// Wrapper function that swizzles depth texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) 
-{
-    if (sw)
-    {
-        switch (spvSwizzle(sw & 0xFF))
-        {
-            case spvSwizzle::none:
-            case spvSwizzle::red:
-                break;
-            case spvSwizzle::zero:
-            case spvSwizzle::green:
-            case spvSwizzle::blue:
-            case spvSwizzle::alpha:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-        }
-    }
-    return t.gather_compare(s, spvForward<Ts>(params)...);
-}
-
 fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d<uint> tex1d [[texture(0)]], texture2d<uint> tex2d [[texture(1)]], texture3d<uint> tex3d [[texture(2)]], texturecube<uint> texCube [[texture(3)]], texture2d_array<uint> tex2dArray [[texture(4)]], texturecube_array<uint> texCubeArray [[texture(5)]], texture2d<uint> texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]])
 {
     constant uint& tex1dSwzl = spvSwizzleConstants[0];
@@ -162,9 +140,9 @@ fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d
     c = float4(spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl));
     c = float4(spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl));
     c = float4(texBuffer.read(spvTexelBufferCoord(0)));
-    c = float4(spvGatherSwizzle<uint, metal::texture2d<uint>, float2, int2>(tex2dSmplr, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl));
-    c = float4(spvGatherSwizzle<uint, metal::texturecube<uint>, float3>(texCubeSmplr, texCube, float3(0.0), component::y, texCubeSwzl));
-    c = float4(spvGatherSwizzle<uint, metal::texture2d_array<uint>, float2, uint, int2>(tex2dArraySmplr, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl));
-    c = float4(spvGatherSwizzle<uint, metal::texturecube_array<uint>, float3, uint>(texCubeArraySmplr, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl));
+    c = float4(spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0)));
+    c = float4(spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0)));
+    c = float4(spvGatherSwizzle(tex2dArray, tex2dArraySmplr, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0)));
+    c = float4(spvGatherSwizzle(texCubeArray, texCubeArraySmplr, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w))));
 }
 
diff --git a/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag b/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag
index fb9865bcf08..9366eeab585 100644
--- a/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag
+++ b/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag
@@ -6,22 +6,12 @@
 using namespace metal;
 
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
-enum class spvSwizzle : uint
-{
-    none = 0,
-    zero,
-    one,
-    red,
-    green,
-    blue,
-    alpha
-};
-
 template<typename T> struct spvRemoveReference { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
@@ -34,6 +24,17 @@ template<typename T> inline constexpr thread T&& spvForward(thread typename spvR
     return static_cast<thread T&&>(x);
 }
 
+enum class spvSwizzle : uint
+{
+    none = 0,
+    zero,
+    one,
+    red,
+    green,
+    blue,
+    alpha
+};
+
 template<typename T>
 inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
 {
@@ -72,8 +73,8 @@ inline T spvTextureSwizzle(T x, uint s)
 }
 
 // Wrapper function that swizzles texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
+template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename... Ts>
+inline vec<T, 4> spvGatherSwizzle(const thread Tex<T>& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c)
 {
     if (sw)
     {
@@ -109,8 +110,8 @@ inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params,
 }
 
 // Wrapper function that swizzles depth texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) 
+template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename... Ts>
+inline vec<T, 4> spvGatherCompareSwizzle(const thread Tex<T>& t, sampler s, uint sw, Ts... params) 
 {
     if (sw)
     {
@@ -149,41 +150,41 @@ fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d
     c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0)), texCubeSwzl);
     c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z))), tex2dArraySwzl);
     c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w))), texCubeArraySwzl);
-    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), depth2dSwzl);
-    c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), depthCubeSwzl);
-    c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySmplr, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), depth2dArraySwzl);
+    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, 1.0), depth2dSwzl);
+    c.x = spvTextureSwizzle(depthCube.sample_compare(depthCubeSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, 1.0), depthCubeSwzl);
+    c.x = spvTextureSwizzle(depth2dArray.sample_compare(depth2dArraySmplr, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), 1.0), depth2dArraySwzl);
     c.x = spvTextureSwizzle(depthCubeArray.sample_compare(depthCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), depthCubeArraySwzl);
     c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl);
     c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), tex2dSwzl);
     c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), tex3dSwzl);
     float4 _100 = float4(0.0, 0.0, 1.0, 1.0);
-    _100.z = float4(0.0, 0.0, 1.0, 1.0).w;
-    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _100.xy / _100.z, float4(0.0, 0.0, 1.0, 1.0).z / _100.z), depth2dSwzl);
+    _100.z = 1.0;
+    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _100.xy / _100.z, 1.0 / _100.z), depth2dSwzl);
     c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, 0.0), tex1dSwzl);
     c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float2(0.0), level(0.0)), tex2dSwzl);
     c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float3(0.0), level(0.0)), tex3dSwzl);
     c = spvTextureSwizzle(texCube.sample(texCubeSmplr, float3(0.0), level(0.0)), texCubeSwzl);
     c = spvTextureSwizzle(tex2dArray.sample(tex2dArraySmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), tex2dArraySwzl);
     c = spvTextureSwizzle(texCubeArray.sample(texCubeArraySmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), texCubeArraySwzl);
-    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), depth2dSwzl);
+    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, float3(0.0, 0.0, 1.0).xy, 1.0, level(0.0)), depth2dSwzl);
     c = spvTextureSwizzle(tex1d.sample(tex1dSmplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), tex1dSwzl);
     c = spvTextureSwizzle(tex2d.sample(tex2dSmplr, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), tex2dSwzl);
     c = spvTextureSwizzle(tex3d.sample(tex3dSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), tex3dSwzl);
     float4 _128 = float4(0.0, 0.0, 1.0, 1.0);
-    _128.z = float4(0.0, 0.0, 1.0, 1.0).w;
-    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _128.xy / _128.z, float4(0.0, 0.0, 1.0, 1.0).z / _128.z, level(0.0)), depth2dSwzl);
+    _128.z = 1.0;
+    c.x = spvTextureSwizzle(depth2d.sample_compare(depth2dSmplr, _128.xy / _128.z, 1.0 / _128.z, level(0.0)), depth2dSwzl);
     c = spvTextureSwizzle(tex1d.read(uint(0)), tex1dSwzl);
     c = spvTextureSwizzle(tex2d.read(uint2(int2(0)), 0), tex2dSwzl);
     c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl);
     c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl);
     c = texBuffer.read(spvTexelBufferCoord(0));
-    c = spvGatherSwizzle<float, metal::texture2d<float>, float2, int2>(tex2dSmplr, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl);
-    c = spvGatherSwizzle<float, metal::texturecube<float>, float3>(texCubeSmplr, texCube, float3(0.0), component::y, texCubeSwzl);
-    c = spvGatherSwizzle<float, metal::texture2d_array<float>, float2, uint, int2>(tex2dArraySmplr, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl);
-    c = spvGatherSwizzle<float, metal::texturecube_array<float>, float3, uint>(texCubeArraySmplr, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl);
-    c = spvGatherCompareSwizzle<float, metal::depth2d<float>, float2, float>(depth2dSmplr, depth2d, float2(0.0), 1.0, depth2dSwzl);
-    c = spvGatherCompareSwizzle<float, metal::depthcube<float>, float3, float>(depthCubeSmplr, depthCube, float3(0.0), 1.0, depthCubeSwzl);
-    c = spvGatherCompareSwizzle<float, metal::depth2d_array<float>, float2, uint, float>(depth2dArraySmplr, depth2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0, depth2dArraySwzl);
-    c = spvGatherCompareSwizzle<float, metal::depthcube_array<float>, float3, uint, float>(depthCubeArraySmplr, depthCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0, depthCubeArraySwzl);
+    c = spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0));
+    c = spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0));
+    c = spvGatherSwizzle(tex2dArray, tex2dArraySmplr, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0));
+    c = spvGatherSwizzle(texCubeArray, texCubeArraySmplr, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w)));
+    c = spvGatherCompareSwizzle(depth2d, depth2dSmplr, depth2dSwzl, float2(0.0), 1.0);
+    c = spvGatherCompareSwizzle(depthCube, depthCubeSmplr, depthCubeSwzl, float3(0.0), 1.0);
+    c = spvGatherCompareSwizzle(depth2dArray, depth2dArraySmplr, depth2dArraySwzl, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0);
+    c = spvGatherCompareSwizzle(depthCubeArray, depthCubeArraySmplr, depthCubeArraySwzl, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0);
 }
 
diff --git a/reference/shaders-msl-no-opt/frag/texture-gather-uint-component.asm.frag b/reference/shaders-msl-no-opt/frag/texture-gather-uint-component.asm.frag
new file mode 100644
index 00000000000..8fcb19a8505
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/texture-gather-uint-component.asm.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uSamp [[texture(0)]], sampler uSampSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = uSamp.gather(uSampSmplr, in.vUV, int2(0), component::y);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.argument.msl2.frag b/reference/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.argument.msl2.frag
new file mode 100644
index 00000000000..7c601820789
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.argument.msl2.frag
@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Foo
+{
+    float4 v;
+};
+
+struct UBO
+{
+    Foo foo;
+};
+
+struct spvDescriptorSetBuffer0
+{
+    constant UBO* ubos [[id(0)]][2];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = spvDescriptorSet0.ubos[1]->foo.v;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.frag b/reference/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.frag
new file mode 100644
index 00000000000..0b1ca91f547
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.frag
@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Foo
+{
+    float4 v;
+};
+
+struct UBO
+{
+    Foo foo;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(constant UBO* ubos_0 [[buffer(0)]], constant UBO* ubos_1 [[buffer(1)]])
+{
+    constant UBO* ubos[] =
+    {
+        ubos_0,
+        ubos_1,
+    };
+
+    main0_out out = {};
+    out.FragColor = ubos[1]->foo.v;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/ubo-offset-out-of-order.frag b/reference/shaders-msl-no-opt/frag/ubo-offset-out-of-order.frag
new file mode 100644
index 00000000000..ce3291a28ea
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/ubo-offset-out-of-order.frag
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4 v;
+    float4x4 m;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _13 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = (_13.m * in.vColor) + _13.v;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/variables.zero-initialize.frag b/reference/shaders-msl-no-opt/frag/variables.zero-initialize.frag
new file mode 100644
index 00000000000..0720087a637
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/variables.zero-initialize.frag
@@ -0,0 +1,40 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Foo
+{
+    int a;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    int uninit_function_int = {};
+    int uninit_int = {};
+    int4 uninit_vector = {};
+    float4x4 uninit_matrix = {};
+    Foo uninit_foo = {};
+    if (in.vColor.x > 10.0)
+    {
+        uninit_function_int = 10;
+    }
+    else
+    {
+        uninit_function_int = 20;
+    }
+    out.FragColor = in.vColor;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/frag/volatile-helper-invocation.msl23.spv16.frag b/reference/shaders-msl-no-opt/frag/volatile-helper-invocation.msl23.spv16.frag
new file mode 100644
index 00000000000..f42aeb876a7
--- /dev/null
+++ b/reference/shaders-msl-no-opt/frag/volatile-helper-invocation.msl23.spv16.frag
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    bool gl_HelperInvocation = {};
+    gl_HelperInvocation = simd_is_helper_thread();
+    bool _12 = gl_HelperInvocation;
+    float _15 = float(_12);
+    out.FragColor = _15;
+    gl_HelperInvocation = true, discard_fragment();
+    bool _16 = gl_HelperInvocation;
+    float _17 = float(_16);
+    out.FragColor = _17;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/array-of-vec3.comp b/reference/shaders-msl-no-opt/packing/array-of-vec3.comp
new file mode 100644
index 00000000000..0dd52ab36d9
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/array-of-vec3.comp
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    packed_float3 v[16];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _13 [[buffer(0)]])
+{
+    _13.v[1] = float3(_13.v[0]);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/array-of-vec4.comp b/reference/shaders-msl-no-opt/packing/array-of-vec4.comp
new file mode 100644
index 00000000000..025cd425469
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/array-of-vec4.comp
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 v[16];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _13 [[buffer(0)]])
+{
+    _13.v[1] = _13.v[0];
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/isolated-scalar-access.comp b/reference/shaders-msl-no-opt/packing/isolated-scalar-access.comp
new file mode 100644
index 00000000000..f1a3719158f
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/isolated-scalar-access.comp
@@ -0,0 +1,26 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 v;
+    float4x4 cm;
+    float4x4 rm;
+    packed_float3 v3;
+    float f;
+};
+
+kernel void main0(device SSBO& _12 [[buffer(0)]])
+{
+    threadgroup float4 shared_vec4;
+    threadgroup float3 shared_vec3;
+    ((device float*)&_12.v)[0u] = 10.0;
+    _12.v3[1u] = 40.0;
+    ((device float*)&_12.cm[1])[2u] = 20.0;
+    ((device float*)&_12.rm[1u])[3] = 30.0;
+    ((threadgroup float*)&shared_vec4)[2u] = 40.0;
+    ((threadgroup float*)&shared_vec3)[1u] = 1.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/load-store-col-rows.comp b/reference/shaders-msl-no-opt/packing/load-store-col-rows.comp
new file mode 100644
index 00000000000..020ccae0dae
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/load-store-col-rows.comp
@@ -0,0 +1,76 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef packed_float3 packed_float2x3[2];
+typedef packed_float3 packed_rm_float3x2[2];
+
+struct SSBO1
+{
+    float2x4 a;
+    float2x4 a2;
+};
+
+struct SSBO2
+{
+    packed_float2x3 b;
+    packed_rm_float3x2 b2;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_column(device SSBO1& v_21)
+{
+    float2 u = v_21.a[0].xy;
+    float2 v = v_21.a[1].xy;
+    u += v;
+    (device float2&)v_21.a[0] = u;
+    (device float2&)v_21.a[1] = v;
+}
+
+static inline __attribute__((always_inline))
+void load_store_row(device SSBO1& v_21)
+{
+    float2 u = float2(v_21.a2[0][0], v_21.a2[1][0]);
+    float2 v = float2(v_21.a2[0][1], v_21.a2[1][1]);
+    u += v;
+    ((device float*)&v_21.a2[0])[0] = u.x;
+    ((device float*)&v_21.a2[1])[0] = u.y;
+    ((device float*)&v_21.a2[0])[1] = v.x;
+    ((device float*)&v_21.a2[1])[1] = v.y;
+}
+
+static inline __attribute__((always_inline))
+void load_store_packed_column(device SSBO2& v_58)
+{
+    float3 u = float3(v_58.b[0]);
+    float3 v = float3(v_58.b[1]);
+    u += v;
+    v_58.b[0] = u;
+    v_58.b[1] = v;
+}
+
+static inline __attribute__((always_inline))
+void load_store_packed_row(device SSBO2& v_58)
+{
+    float2 u = float2(v_58.b2[0][0], v_58.b2[1][0]);
+    float2 v = float2(v_58.b2[0][1], v_58.b2[1][1]);
+    u += v;
+    ((device float*)&v_58.b2[0])[0] = u.x;
+    ((device float*)&v_58.b2[1])[0] = u.y;
+    ((device float*)&v_58.b2[0])[1] = v.x;
+    ((device float*)&v_58.b2[1])[1] = v.y;
+}
+
+kernel void main0(device SSBO1& v_21 [[buffer(0)]], device SSBO2& v_58 [[buffer(1)]])
+{
+    load_store_column(v_21);
+    load_store_row(v_21);
+    load_store_packed_column(v_58);
+    load_store_packed_row(v_58);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp
new file mode 100644
index 00000000000..a00a679b64c
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp
@@ -0,0 +1,86 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float2x2 col_major0;
+    float2x2 col_major1;
+};
+
+struct SSBORow
+{
+    float2x2 row_major0;
+    float2x2 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float2x2 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float2x2 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x2-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-2x2-std140.comp
new file mode 100644
index 00000000000..fd81f3a9aca
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-2x2-std140.comp
@@ -0,0 +1,92 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float2x4 col_major0;
+    float2x4 col_major1;
+};
+
+struct SSBORow
+{
+    float2x4 row_major0;
+    float2x4 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float2x2 loaded = float2x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy);
+    (device float2&)v_29.col_major1[0] = loaded[0];
+    (device float2&)v_29.col_major1[1] = loaded[1];
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float2x2 loaded = transpose(float2x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy));
+    (device float2&)v_41.row_major0[0] = float2(loaded[0][0], loaded[1][0]);
+    (device float2&)v_41.row_major0[1] = float2(loaded[0][1], loaded[1][1]);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    (device float2&)v_29.col_major0[0] = float2x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy)[0];
+    (device float2&)v_29.col_major0[1] = float2x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy)[1];
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    (device float2&)v_41.row_major0[0] = float2(float2x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy)[0][0], float2x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy)[1][0]);
+    (device float2&)v_41.row_major0[1] = float2(float2x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy)[0][1], float2x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy)[1][1]);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    (device float2&)v_29.col_major0[0] = float2(float2x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy)[0][0], float2x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy)[1][0]);
+    (device float2&)v_29.col_major0[1] = float2(float2x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy)[0][1], float2x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy)[1][1]);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    (device float2&)v_41.row_major0[0] = float2x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy)[0];
+    (device float2&)v_41.row_major0[1] = float2x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy)[1];
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    (device float2&)v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]);
+    ((device float*)&v_41.row_major0[0])[1] = v_29.col_major0[1].x;
+    ((device float*)&v_41.row_major0[1])[1] = v_29.col_major0[1].y;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x2-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-2x2-std430.comp
new file mode 100644
index 00000000000..a00a679b64c
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-2x2-std430.comp
@@ -0,0 +1,86 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float2x2 col_major0;
+    float2x2 col_major1;
+};
+
+struct SSBORow
+{
+    float2x2 row_major0;
+    float2x2 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float2x2 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float2x2 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp
new file mode 100644
index 00000000000..963ec39dc88
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp
@@ -0,0 +1,92 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef packed_float3 packed_float2x3[2];
+
+struct SSBOCol
+{
+    packed_float2x3 col_major0;
+    packed_float2x3 col_major1;
+};
+
+struct SSBORow
+{
+    float3x2 row_major0;
+    float3x2 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float2x3 loaded = float2x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]));
+    v_29.col_major1[0] = loaded[0];
+    v_29.col_major1[1] = loaded[1];
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float2x3 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0[0] = float2x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]))[0];
+    v_29.col_major0[1] = float2x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]))[1];
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(float2x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1])));
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[0] = float3(v_41.row_major0[0][0], v_41.row_major0[1][0], v_41.row_major0[2][0]);
+    v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1][0];
+    v_41.row_major0[1][1] = v_29.col_major0[1][1];
+    v_41.row_major0[2][1] = v_29.col_major0[1][2];
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[0][1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = v_29.col_major0[0][1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x3-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-2x3-std140.comp
new file mode 100644
index 00000000000..d20a4a7da8e
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-2x3-std140.comp
@@ -0,0 +1,93 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float2x3 col_major0;
+    float2x3 col_major1;
+};
+
+struct SSBORow
+{
+    float3x4 row_major0;
+    float3x4 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float2x3 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float2x3 loaded = transpose(float3x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy, v_41.row_major0[2].xy));
+    (device float2&)v_41.row_major0[0] = float2(loaded[0][0], loaded[1][0]);
+    (device float2&)v_41.row_major0[1] = float2(loaded[0][1], loaded[1][1]);
+    (device float2&)v_41.row_major0[2] = float2(loaded[0][2], loaded[1][2]);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    (device float2&)v_41.row_major0[0] = float2(v_29.col_major0[0][0], v_29.col_major0[1][0]);
+    (device float2&)v_41.row_major0[1] = float2(v_29.col_major0[0][1], v_29.col_major0[1][1]);
+    (device float2&)v_41.row_major0[2] = float2(v_29.col_major0[0][2], v_29.col_major0[1][2]);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(float3x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy, v_41.row_major0[2].xy));
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    (device float2&)v_41.row_major0[0] = float3x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy)[0];
+    (device float2&)v_41.row_major0[1] = float3x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy)[1];
+    (device float2&)v_41.row_major0[2] = float3x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy)[2];
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]);
+    ((device float*)&v_41.row_major0[0])[1] = v_29.col_major0[1].x;
+    ((device float*)&v_41.row_major0[1])[1] = v_29.col_major0[1].y;
+    ((device float*)&v_41.row_major0[2])[1] = v_29.col_major0[1].z;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x3-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-2x3-std430.comp
new file mode 100644
index 00000000000..240111b9d23
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-2x3-std430.comp
@@ -0,0 +1,87 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float2x3 col_major0;
+    float2x3 col_major1;
+};
+
+struct SSBORow
+{
+    float3x2 row_major0;
+    float3x2 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float2x3 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float2x3 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+    v_41.row_major0[2][1] = v_29.col_major0[1].z;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp
new file mode 100644
index 00000000000..d9e8cca9277
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp
@@ -0,0 +1,88 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float2x4 col_major0;
+    float2x4 col_major1;
+};
+
+struct SSBORow
+{
+    float4x2 row_major0;
+    float4x2 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float2x4 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float2x4 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+    v_41.row_major0[2][1] = v_29.col_major0[1].z;
+    v_41.row_major0[3][1] = v_29.col_major0[1].w;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x4-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-2x4-std140.comp
new file mode 100644
index 00000000000..e1adc222a04
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-2x4-std140.comp
@@ -0,0 +1,97 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float2x4 col_major0;
+    float2x4 col_major1;
+};
+
+struct SSBORow
+{
+    float4x4 row_major0;
+    float4x4 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float2x4 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float2x4 loaded = transpose(float4x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy, v_41.row_major0[2].xy, v_41.row_major0[3].xy));
+    (device float2&)v_41.row_major0[0] = float2(loaded[0][0], loaded[1][0]);
+    (device float2&)v_41.row_major0[1] = float2(loaded[0][1], loaded[1][1]);
+    (device float2&)v_41.row_major0[2] = float2(loaded[0][2], loaded[1][2]);
+    (device float2&)v_41.row_major0[3] = float2(loaded[0][3], loaded[1][3]);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    (device float2&)v_41.row_major0[0] = float2(v_29.col_major0[0][0], v_29.col_major0[1][0]);
+    (device float2&)v_41.row_major0[1] = float2(v_29.col_major0[0][1], v_29.col_major0[1][1]);
+    (device float2&)v_41.row_major0[2] = float2(v_29.col_major0[0][2], v_29.col_major0[1][2]);
+    (device float2&)v_41.row_major0[3] = float2(v_29.col_major0[0][3], v_29.col_major0[1][3]);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(float4x2(v_41.row_major0[0].xy, v_41.row_major0[1].xy, v_41.row_major0[2].xy, v_41.row_major0[3].xy));
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    (device float2&)v_41.row_major0[0] = float4x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy, v_41.row_major1[3].xy)[0];
+    (device float2&)v_41.row_major0[1] = float4x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy, v_41.row_major1[3].xy)[1];
+    (device float2&)v_41.row_major0[2] = float4x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy, v_41.row_major1[3].xy)[2];
+    (device float2&)v_41.row_major0[3] = float4x2(v_41.row_major1[0].xy, v_41.row_major1[1].xy, v_41.row_major1[2].xy, v_41.row_major1[3].xy)[3];
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]);
+    ((device float*)&v_41.row_major0[0])[1] = v_29.col_major0[1].x;
+    ((device float*)&v_41.row_major0[1])[1] = v_29.col_major0[1].y;
+    ((device float*)&v_41.row_major0[2])[1] = v_29.col_major0[1].z;
+    ((device float*)&v_41.row_major0[3])[1] = v_29.col_major0[1].w;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-2x4-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-2x4-std430.comp
new file mode 100644
index 00000000000..d9e8cca9277
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-2x4-std430.comp
@@ -0,0 +1,88 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float2x4 col_major0;
+    float2x4 col_major1;
+};
+
+struct SSBORow
+{
+    float4x2 row_major0;
+    float4x2 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float2x4 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float2x4 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+    v_41.row_major0[2][1] = v_29.col_major0[1].z;
+    v_41.row_major0[3][1] = v_29.col_major0[1].w;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp
new file mode 100644
index 00000000000..86be094fbe7
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp
@@ -0,0 +1,91 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef packed_float3 packed_rm_float3x2[2];
+
+struct SSBOCol
+{
+    float3x2 col_major0;
+    float3x2 col_major1;
+};
+
+struct SSBORow
+{
+    packed_rm_float3x2 row_major0;
+    packed_rm_float3x2 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float3x2 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float3x2 loaded = transpose(float2x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1])));
+    v_41.row_major0[0] = float3(loaded[0][0], loaded[1][0], loaded[2][0]);
+    v_41.row_major0[1] = float3(loaded[0][1], loaded[1][1], loaded[2][1]);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0[0] = float3(v_29.col_major0[0][0], v_29.col_major0[1][0], v_29.col_major0[2][0]);
+    v_41.row_major0[1] = float3(v_29.col_major0[0][1], v_29.col_major0[1][1], v_29.col_major0[2][1]);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(float2x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1])));
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0[0] = float2x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]))[0];
+    v_41.row_major0[1] = float2x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]))[1];
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]);
+    ((device float*)&v_41.row_major0[0])[1] = v_29.col_major0[1].x;
+    ((device float*)&v_41.row_major0[1])[1] = v_29.col_major0[1].y;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = v_41.row_major0[1u][0];
+    v_41.row_major0[1u][0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x2-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-3x2-std140.comp
new file mode 100644
index 00000000000..9144272f6de
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-3x2-std140.comp
@@ -0,0 +1,92 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float3x4 col_major0;
+    float3x4 col_major1;
+};
+
+struct SSBORow
+{
+    float2x3 row_major0;
+    float2x3 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float3x2 loaded = float3x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy, v_29.col_major0[2].xy);
+    (device float2&)v_29.col_major1[0] = loaded[0];
+    (device float2&)v_29.col_major1[1] = loaded[1];
+    (device float2&)v_29.col_major1[2] = loaded[2];
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float3x2 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    (device float2&)v_29.col_major0[0] = float3x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy)[0];
+    (device float2&)v_29.col_major0[1] = float3x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy)[1];
+    (device float2&)v_29.col_major0[2] = float3x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy)[2];
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(float3x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy, v_29.col_major0[2].xy));
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    (device float2&)v_29.col_major0[0] = float2(v_41.row_major0[0][0], v_41.row_major0[1][0]);
+    (device float2&)v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]);
+    (device float2&)v_29.col_major0[2] = float2(v_41.row_major0[0][2], v_41.row_major0[1][2]);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    (device float2&)v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x2-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-3x2-std430.comp
new file mode 100644
index 00000000000..3266e6c33f0
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-3x2-std430.comp
@@ -0,0 +1,86 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float3x2 col_major0;
+    float3x2 col_major1;
+};
+
+struct SSBORow
+{
+    float2x3 row_major0;
+    float2x3 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float3x2 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float3x2 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp
new file mode 100644
index 00000000000..593a0133362
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp
@@ -0,0 +1,102 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef packed_float3 packed_float3x3[3];
+typedef packed_float3 packed_rm_float3x3[3];
+
+struct SSBOCol
+{
+    packed_float3x3 col_major0;
+    packed_float3x3 col_major1;
+};
+
+struct SSBORow
+{
+    packed_rm_float3x3 row_major0;
+    packed_rm_float3x3 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float3x3 loaded = float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]));
+    v_29.col_major1[0] = loaded[0];
+    v_29.col_major1[1] = loaded[1];
+    v_29.col_major1[2] = loaded[2];
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float3x3 loaded = transpose(float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2])));
+    v_41.row_major0[0] = float3(loaded[0][0], loaded[1][0], loaded[2][0]);
+    v_41.row_major0[1] = float3(loaded[0][1], loaded[1][1], loaded[2][1]);
+    v_41.row_major0[2] = float3(loaded[0][2], loaded[1][2], loaded[2][2]);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0[0] = float3x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]))[0];
+    v_29.col_major0[1] = float3x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]))[1];
+    v_29.col_major0[2] = float3x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]))[2];
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0[0] = float3(float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[0][0], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[1][0], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[2][0]);
+    v_41.row_major0[1] = float3(float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[0][1], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[1][1], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[2][1]);
+    v_41.row_major0[2] = float3(float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[0][2], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[1][2], float3x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]))[2][2]);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[0] = float3(float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[0][0], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[1][0], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[2][0]);
+    v_29.col_major0[1] = float3(float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[0][1], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[1][1], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[2][1]);
+    v_29.col_major0[2] = float3(float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[0][2], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[1][2], float3x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]))[2][2]);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0[0] = float3x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]))[0];
+    v_41.row_major0[1] = float3x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]))[1];
+    v_41.row_major0[2] = float3x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]))[2];
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]);
+    ((device float*)&v_41.row_major0[0])[1] = v_29.col_major0[1][0];
+    ((device float*)&v_41.row_major0[1])[1] = v_29.col_major0[1][1];
+    ((device float*)&v_41.row_major0[2])[1] = v_29.col_major0[1][2];
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[0][1u] = v_41.row_major0[1u][0];
+    v_41.row_major0[1u][0] = v_29.col_major0[0][1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x3-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-3x3-std140.comp
new file mode 100644
index 00000000000..e2d4adb50bd
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-3x3-std140.comp
@@ -0,0 +1,87 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float3x3 col_major0;
+    float3x3 col_major1;
+};
+
+struct SSBORow
+{
+    float3x3 row_major0;
+    float3x3 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float3x3 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float3x3 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+    v_41.row_major0[2][1] = v_29.col_major0[1].z;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x3-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-3x3-std430.comp
new file mode 100644
index 00000000000..e2d4adb50bd
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-3x3-std430.comp
@@ -0,0 +1,87 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float3x3 col_major0;
+    float3x3 col_major1;
+};
+
+struct SSBORow
+{
+    float3x3 row_major0;
+    float3x3 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float3x3 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float3x3 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+    v_41.row_major0[2][1] = v_29.col_major0[1].z;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp
new file mode 100644
index 00000000000..360ef467cf5
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp
@@ -0,0 +1,99 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef packed_float3 packed_rm_float3x4[4];
+
+struct SSBOCol
+{
+    float3x4 col_major0;
+    float3x4 col_major1;
+};
+
+struct SSBORow
+{
+    packed_rm_float3x4 row_major0;
+    packed_rm_float3x4 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float3x4 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float3x4 loaded = transpose(float4x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]), float3(v_41.row_major0[3])));
+    v_41.row_major0[0] = float3(loaded[0][0], loaded[1][0], loaded[2][0]);
+    v_41.row_major0[1] = float3(loaded[0][1], loaded[1][1], loaded[2][1]);
+    v_41.row_major0[2] = float3(loaded[0][2], loaded[1][2], loaded[2][2]);
+    v_41.row_major0[3] = float3(loaded[0][3], loaded[1][3], loaded[2][3]);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0[0] = float3(v_29.col_major0[0][0], v_29.col_major0[1][0], v_29.col_major0[2][0]);
+    v_41.row_major0[1] = float3(v_29.col_major0[0][1], v_29.col_major0[1][1], v_29.col_major0[2][1]);
+    v_41.row_major0[2] = float3(v_29.col_major0[0][2], v_29.col_major0[1][2], v_29.col_major0[2][2]);
+    v_41.row_major0[3] = float3(v_29.col_major0[0][3], v_29.col_major0[1][3], v_29.col_major0[2][3]);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(float4x3(float3(v_41.row_major0[0]), float3(v_41.row_major0[1]), float3(v_41.row_major0[2]), float3(v_41.row_major0[3])));
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0[0] = float4x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]), float3(v_41.row_major1[3]))[0];
+    v_41.row_major0[1] = float4x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]), float3(v_41.row_major1[3]))[1];
+    v_41.row_major0[2] = float4x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]), float3(v_41.row_major1[3]))[2];
+    v_41.row_major0[3] = float4x3(float3(v_41.row_major1[0]), float3(v_41.row_major1[1]), float3(v_41.row_major1[2]), float3(v_41.row_major1[3]))[3];
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]);
+    ((device float*)&v_41.row_major0[0])[1] = v_29.col_major0[1].x;
+    ((device float*)&v_41.row_major0[1])[1] = v_29.col_major0[1].y;
+    ((device float*)&v_41.row_major0[2])[1] = v_29.col_major0[1].z;
+    ((device float*)&v_41.row_major0[3])[1] = v_29.col_major0[1].w;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = v_41.row_major0[1u][0];
+    v_41.row_major0[1u][0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x4-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-3x4-std140.comp
new file mode 100644
index 00000000000..f18917dd76c
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-3x4-std140.comp
@@ -0,0 +1,88 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float3x4 col_major0;
+    float3x4 col_major1;
+};
+
+struct SSBORow
+{
+    float4x3 row_major0;
+    float4x3 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float3x4 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float3x4 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+    v_41.row_major0[2][1] = v_29.col_major0[1].z;
+    v_41.row_major0[3][1] = v_29.col_major0[1].w;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-3x4-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-3x4-std430.comp
new file mode 100644
index 00000000000..f18917dd76c
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-3x4-std430.comp
@@ -0,0 +1,88 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float3x4 col_major0;
+    float3x4 col_major1;
+};
+
+struct SSBORow
+{
+    float4x3 row_major0;
+    float4x3 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float3x4 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float3x4 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+    v_41.row_major0[2][1] = v_29.col_major0[1].z;
+    v_41.row_major0[3][1] = v_29.col_major0[1].w;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp
new file mode 100644
index 00000000000..d98613e43db
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp
@@ -0,0 +1,86 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float4x2 col_major0;
+    float4x2 col_major1;
+};
+
+struct SSBORow
+{
+    float2x4 row_major0;
+    float2x4 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float4x2 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float4x2 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x2-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-4x2-std140.comp
new file mode 100644
index 00000000000..6c231cd8d34
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-4x2-std140.comp
@@ -0,0 +1,95 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float4x4 col_major0;
+    float4x4 col_major1;
+};
+
+struct SSBORow
+{
+    float2x4 row_major0;
+    float2x4 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float4x2 loaded = float4x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy, v_29.col_major0[2].xy, v_29.col_major0[3].xy);
+    (device float2&)v_29.col_major1[0] = loaded[0];
+    (device float2&)v_29.col_major1[1] = loaded[1];
+    (device float2&)v_29.col_major1[2] = loaded[2];
+    (device float2&)v_29.col_major1[3] = loaded[3];
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float4x2 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    (device float2&)v_29.col_major0[0] = float4x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy, v_29.col_major1[3].xy)[0];
+    (device float2&)v_29.col_major0[1] = float4x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy, v_29.col_major1[3].xy)[1];
+    (device float2&)v_29.col_major0[2] = float4x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy, v_29.col_major1[3].xy)[2];
+    (device float2&)v_29.col_major0[3] = float4x2(v_29.col_major1[0].xy, v_29.col_major1[1].xy, v_29.col_major1[2].xy, v_29.col_major1[3].xy)[3];
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(float4x2(v_29.col_major0[0].xy, v_29.col_major0[1].xy, v_29.col_major0[2].xy, v_29.col_major0[3].xy));
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    (device float2&)v_29.col_major0[0] = float2(v_41.row_major0[0][0], v_41.row_major0[1][0]);
+    (device float2&)v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]);
+    (device float2&)v_29.col_major0[2] = float2(v_41.row_major0[0][2], v_41.row_major0[1][2]);
+    (device float2&)v_29.col_major0[3] = float2(v_41.row_major0[0][3], v_41.row_major0[1][3]);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    (device float2&)v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x2-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-4x2-std430.comp
new file mode 100644
index 00000000000..d98613e43db
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-4x2-std430.comp
@@ -0,0 +1,86 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float4x2 col_major0;
+    float4x2 col_major1;
+};
+
+struct SSBORow
+{
+    float2x4 row_major0;
+    float2x4 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float4x2 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float4x2 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float2(v_41.row_major0[0][1], v_41.row_major0[1][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp
new file mode 100644
index 00000000000..7a156f85e30
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp
@@ -0,0 +1,98 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef packed_float3 packed_float4x3[4];
+
+struct SSBOCol
+{
+    packed_float4x3 col_major0;
+    packed_float4x3 col_major1;
+};
+
+struct SSBORow
+{
+    float3x4 row_major0;
+    float3x4 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float4x3 loaded = float4x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]), float3(v_29.col_major0[3]));
+    v_29.col_major1[0] = loaded[0];
+    v_29.col_major1[1] = loaded[1];
+    v_29.col_major1[2] = loaded[2];
+    v_29.col_major1[3] = loaded[3];
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float4x3 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0[0] = float4x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]), float3(v_29.col_major1[3]))[0];
+    v_29.col_major0[1] = float4x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]), float3(v_29.col_major1[3]))[1];
+    v_29.col_major0[2] = float4x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]), float3(v_29.col_major1[3]))[2];
+    v_29.col_major0[3] = float4x3(float3(v_29.col_major1[0]), float3(v_29.col_major1[1]), float3(v_29.col_major1[2]), float3(v_29.col_major1[3]))[3];
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(float4x3(float3(v_29.col_major0[0]), float3(v_29.col_major0[1]), float3(v_29.col_major0[2]), float3(v_29.col_major0[3])));
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[0] = float3(v_41.row_major0[0][0], v_41.row_major0[1][0], v_41.row_major0[2][0]);
+    v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]);
+    v_29.col_major0[2] = float3(v_41.row_major0[0][2], v_41.row_major0[1][2], v_41.row_major0[2][2]);
+    v_29.col_major0[3] = float3(v_41.row_major0[0][3], v_41.row_major0[1][3], v_41.row_major0[2][3]);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1][0];
+    v_41.row_major0[1][1] = v_29.col_major0[1][1];
+    v_41.row_major0[2][1] = v_29.col_major0[1][2];
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[0][1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = v_29.col_major0[0][1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x3-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-4x3-std140.comp
new file mode 100644
index 00000000000..0964f849529
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-4x3-std140.comp
@@ -0,0 +1,87 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float4x3 col_major0;
+    float4x3 col_major1;
+};
+
+struct SSBORow
+{
+    float3x4 row_major0;
+    float3x4 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float4x3 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float4x3 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+    v_41.row_major0[2][1] = v_29.col_major0[1].z;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x3-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-4x3-std430.comp
new file mode 100644
index 00000000000..0964f849529
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-4x3-std430.comp
@@ -0,0 +1,87 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float4x3 col_major0;
+    float4x3 col_major1;
+};
+
+struct SSBORow
+{
+    float3x4 row_major0;
+    float3x4 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float4x3 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float4x3 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float3(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+    v_41.row_major0[2][1] = v_29.col_major0[1].z;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp b/reference/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp
new file mode 100644
index 00000000000..865cc198651
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp
@@ -0,0 +1,88 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float4x4 col_major0;
+    float4x4 col_major1;
+};
+
+struct SSBORow
+{
+    float4x4 row_major0;
+    float4x4 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float4x4 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float4x4 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+    v_41.row_major0[2][1] = v_29.col_major0[1].z;
+    v_41.row_major0[3][1] = v_29.col_major0[1].w;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x4-std140.comp b/reference/shaders-msl-no-opt/packing/matrix-4x4-std140.comp
new file mode 100644
index 00000000000..865cc198651
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-4x4-std140.comp
@@ -0,0 +1,88 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float4x4 col_major0;
+    float4x4 col_major1;
+};
+
+struct SSBORow
+{
+    float4x4 row_major0;
+    float4x4 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float4x4 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float4x4 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+    v_41.row_major0[2][1] = v_29.col_major0[1].z;
+    v_41.row_major0[3][1] = v_29.col_major0[1].w;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-4x4-std430.comp b/reference/shaders-msl-no-opt/packing/matrix-4x4-std430.comp
new file mode 100644
index 00000000000..865cc198651
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-4x4-std430.comp
@@ -0,0 +1,88 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBOCol
+{
+    float4x4 col_major0;
+    float4x4 col_major1;
+};
+
+struct SSBORow
+{
+    float4x4 row_major0;
+    float4x4 row_major1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_col_major(device SSBOCol& v_29)
+{
+    float4x4 loaded = v_29.col_major0;
+    v_29.col_major1 = loaded;
+}
+
+static inline __attribute__((always_inline))
+void load_store_to_variable_row_major(device SSBORow& v_41)
+{
+    float4x4 loaded = transpose(v_41.row_major0);
+    v_41.row_major0 = transpose(loaded);
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_col_major(device SSBOCol& v_29)
+{
+    v_29.col_major0 = v_29.col_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_col_major_to_row_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_41.row_major0 = transpose(v_29.col_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_col_major(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0 = transpose(v_41.row_major0);
+}
+
+static inline __attribute__((always_inline))
+void copy_row_major_to_row_major(device SSBORow& v_41)
+{
+    v_41.row_major0 = v_41.row_major1;
+}
+
+static inline __attribute__((always_inline))
+void copy_columns(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    v_29.col_major0[1] = float4(v_41.row_major0[0][1], v_41.row_major0[1][1], v_41.row_major0[2][1], v_41.row_major0[3][1]);
+    v_41.row_major0[0][1] = v_29.col_major0[1].x;
+    v_41.row_major0[1][1] = v_29.col_major0[1].y;
+    v_41.row_major0[2][1] = v_29.col_major0[1].z;
+    v_41.row_major0[3][1] = v_29.col_major0[1].w;
+}
+
+static inline __attribute__((always_inline))
+void copy_elements(device SSBOCol& v_29, device SSBORow& v_41)
+{
+    ((device float*)&v_29.col_major0[0])[1u] = ((device float*)&v_41.row_major0[1u])[0];
+    ((device float*)&v_41.row_major0[1u])[0] = ((device float*)&v_29.col_major0[0])[1u];
+}
+
+kernel void main0(device SSBOCol& v_29 [[buffer(0)]], device SSBORow& v_41 [[buffer(1)]])
+{
+    load_store_to_variable_col_major(v_29);
+    load_store_to_variable_row_major(v_41);
+    copy_col_major_to_col_major(v_29);
+    copy_col_major_to_row_major(v_29, v_41);
+    copy_row_major_to_col_major(v_29, v_41);
+    copy_row_major_to_row_major(v_41);
+    copy_columns(v_29, v_41);
+    copy_elements(v_29, v_41);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp
new file mode 100644
index 00000000000..2384e3648e9
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp
@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float3x3 m0;
+    float3x3 m1;
+    float3 v0;
+    float3 v1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _11 [[buffer(0)]])
+{
+    _11.v0 = _11.v1 * (_11.m1 * _11.m0);
+    _11.v0 = (_11.v1 * _11.m1) * _11.m0;
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp
new file mode 100644
index 00000000000..3fb36e0561a
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef packed_float3 packed_float3x3[3];
+
+struct SSBO
+{
+    packed_float3x3 m0;
+    packed_float3x3 m1;
+    packed_float3 v0;
+    packed_float3 v1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _11 [[buffer(0)]])
+{
+    _11.v0 = (float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2])) * float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2]))) * float3(_11.v1);
+    _11.v0 = float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2])) * (float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2])) * float3(_11.v1));
+    _11.v0 = (float3(_11.v1) * float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2]))) * float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2]));
+    _11.v0 = float3(_11.v1) * (float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2])) * float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2])));
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp
new file mode 100644
index 00000000000..40f00886dde
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float2x4 m0;
+    float2x4 m1;
+    float2 v0;
+    float2 v1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _11 [[buffer(0)]])
+{
+    _11.v0 = (float2x2(_11.m0[0].xy, _11.m0[1].xy) * float2x2(_11.m1[0].xy, _11.m1[1].xy)) * _11.v1;
+    _11.v0 = float2x2(_11.m0[0].xy, _11.m0[1].xy) * (float2x2(_11.m1[0].xy, _11.m1[1].xy) * _11.v1);
+    _11.v0 = (_11.v1 * float2x2(_11.m0[0].xy, _11.m0[1].xy)) * float2x2(_11.m1[0].xy, _11.m1[1].xy);
+    _11.v0 = _11.v1 * (float2x2(_11.m0[0].xy, _11.m0[1].xy) * float2x2(_11.m1[0].xy, _11.m1[1].xy));
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp
new file mode 100644
index 00000000000..7130c9a8e0c
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef packed_float3 packed_rm_float3x3[3];
+
+struct SSBO
+{
+    packed_rm_float3x3 m0;
+    packed_rm_float3x3 m1;
+    packed_float3 v0;
+    packed_float3 v1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _11 [[buffer(0)]])
+{
+    _11.v0 = float3(_11.v1) * (float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2])) * float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2])));
+    _11.v0 = (float3(_11.v1) * float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2]))) * float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2]));
+    _11.v0 = float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2])) * (float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2])) * float3(_11.v1));
+    _11.v0 = (float3x3(float3(_11.m1[0]), float3(_11.m1[1]), float3(_11.m1[2])) * float3x3(float3(_11.m0[0]), float3(_11.m0[1]), float3(_11.m0[2]))) * float3(_11.v1);
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp
new file mode 100644
index 00000000000..f061dd66627
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float2x4 m0;
+    float2x4 m1;
+    float2 v0;
+    float2 v1;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _11 [[buffer(0)]])
+{
+    _11.v0 = _11.v1 * (float2x2(_11.m1[0].xy, _11.m1[1].xy) * float2x2(_11.m0[0].xy, _11.m0[1].xy));
+    _11.v0 = (_11.v1 * float2x2(_11.m1[0].xy, _11.m1[1].xy)) * float2x2(_11.m0[0].xy, _11.m0[1].xy);
+    _11.v0 = float2x2(_11.m1[0].xy, _11.m1[1].xy) * (float2x2(_11.m0[0].xy, _11.m0[1].xy) * _11.v1);
+    _11.v0 = (float2x2(_11.m1[0].xy, _11.m1[1].xy) * float2x2(_11.m0[0].xy, _11.m0[1].xy)) * _11.v1;
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/member-padding.comp b/reference/shaders-msl-no-opt/packing/member-padding.comp
new file mode 100644
index 00000000000..4f653ecdd67
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/member-padding.comp
@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    char _m0_pad[16];
+    float a;
+    char _m1_pad[20];
+    float b;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _9 [[buffer(0)]])
+{
+    _9.a = 10.0;
+    _9.b = 20.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/std140-array-of-vectors.comp b/reference/shaders-msl-no-opt/packing/std140-array-of-vectors.comp
new file mode 100644
index 00000000000..4d5ba324a0d
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/std140-array-of-vectors.comp
@@ -0,0 +1,42 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 v1[4];
+    float4 v2[4];
+    float3 v3[4];
+    float4 v4[4];
+    float4 v1_array_of_array[4][4];
+    float4 v2_array_of_array[4][4];
+    float3 v3_array_of_array[4][4];
+    float4 v4_array_of_array[4][4];
+    float4 v_unsized[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _29 [[buffer(0)]])
+{
+    float loaded1 = _29.v1[1].x;
+    (device float&)_29.v1[2] = loaded1;
+    float2 loaded2 = _29.v2[1].xy;
+    (device float2&)_29.v2[2] = loaded2;
+    float3 loaded3 = _29.v3[1];
+    _29.v3[2] = loaded3;
+    float4 loaded4 = _29.v4[1];
+    _29.v4[2] = loaded4;
+    loaded1 = _29.v1_array_of_array[1][2].x;
+    (device float&)_29.v1_array_of_array[2][3] = loaded1;
+    loaded2 = _29.v2_array_of_array[1][2].xy;
+    (device float2&)_29.v2_array_of_array[2][3] = loaded2;
+    loaded3 = _29.v3_array_of_array[1][2];
+    _29.v3_array_of_array[2][3] = loaded3;
+    loaded4 = _29.v4_array_of_array[1][2];
+    _29.v4_array_of_array[2][3] = loaded4;
+    loaded1 = _29.v_unsized[1].x;
+    (device float&)_29.v_unsized[2] = loaded1;
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/struct-alignment.comp b/reference/shaders-msl-no-opt/packing/struct-alignment.comp
new file mode 100644
index 00000000000..34647b46f45
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/struct-alignment.comp
@@ -0,0 +1,27 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Foo
+{
+    packed_float3 a;
+    float b;
+};
+
+struct SSBO
+{
+    float2 a;
+    float b;
+    char _m2_pad[4];
+    Foo foo;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _12 [[buffer(0)]])
+{
+    ((device float*)&_12.a)[0u] = 10.0;
+    _12.b = 20.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp b/reference/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp
new file mode 100644
index 00000000000..587ee4ad158
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Foo
+{
+    packed_float3 a;
+};
+
+struct SSBOScalar
+{
+    Foo v[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBOScalar& buffer_scalar [[buffer(0)]])
+{
+    buffer_scalar.v[1].a[1u] = 1.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/struct-packing-recursive.comp b/reference/shaders-msl-no-opt/packing/struct-packing-recursive.comp
new file mode 100644
index 00000000000..e0652b933dd
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/struct-packing-recursive.comp
@@ -0,0 +1,33 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Foo
+{
+    packed_float4 a;
+};
+
+struct Bar
+{
+    Foo a;
+};
+
+struct Baz
+{
+    Bar a;
+};
+
+struct SSBOScalar
+{
+    float v;
+    Baz baz;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBOScalar& buffer_scalar [[buffer(0)]])
+{
+    buffer_scalar.baz.a.a.a[3u] = 10.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/struct-packing.comp b/reference/shaders-msl-no-opt/packing/struct-packing.comp
new file mode 100644
index 00000000000..a86809fee97
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/struct-packing.comp
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Foo
+{
+    packed_float3 a;
+};
+
+struct Bar
+{
+    packed_float3 a;
+};
+
+struct SSBOScalar
+{
+    Foo foo;
+    Bar bar;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBOScalar& buffer_scalar [[buffer(0)]])
+{
+    buffer_scalar.foo.a[0u] = 10.0;
+    buffer_scalar.bar.a[0u] = 20.0;
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp b/reference/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp
new file mode 100644
index 00000000000..c30fd070ec4
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp
@@ -0,0 +1,54 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct A
+{
+    float v;
+    char _m0_final_padding[12];
+};
+
+struct B
+{
+    float2 v;
+    char _m0_final_padding[8];
+};
+
+struct C
+{
+    float3 v;
+};
+
+struct D
+{
+    float4 v;
+};
+
+struct E
+{
+    float4 a;
+    float2 b;
+    char _m0_final_padding[8];
+};
+
+struct SSBO
+{
+    A a[2][4];
+    B b[2][4];
+    C c[2][4];
+    D d[2][4];
+    float2x4 e[2][4];
+    E f[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _32 [[buffer(0)]])
+{
+    _32.f[0].a = float4(2.0);
+    float2x2 tmp = float2x2(_32.e[0][1][0].xy, _32.e[0][1][1].xy);
+    (device float2&)_32.e[1][2][0] = tmp[0];
+    (device float2&)_32.e[1][2][1] = tmp[1];
+}
+
diff --git a/reference/shaders-msl-no-opt/packing/struct-size-padding.comp b/reference/shaders-msl-no-opt/packing/struct-size-padding.comp
new file mode 100644
index 00000000000..98f039fc98a
--- /dev/null
+++ b/reference/shaders-msl-no-opt/packing/struct-size-padding.comp
@@ -0,0 +1,54 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct A
+{
+    float v;
+    char _m0_final_padding[12];
+};
+
+struct B
+{
+    float2 v;
+    char _m0_final_padding[8];
+};
+
+struct C
+{
+    float3 v;
+};
+
+struct D
+{
+    float4 v;
+};
+
+struct E
+{
+    float4 a;
+    float2 b;
+    char _m0_final_padding[8];
+};
+
+struct SSBO
+{
+    A a[4];
+    B b[4];
+    C c[4];
+    D d[4];
+    float2x4 e[4];
+    E f[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _26 [[buffer(0)]])
+{
+    _26.f[0].a = float4(2.0);
+    float2x2 tmp = float2x2(_26.e[1][0].xy, _26.e[1][1].xy);
+    (device float2&)_26.e[2][0] = tmp[0];
+    (device float2&)_26.e[2][1] = tmp[1];
+}
+
diff --git a/reference/shaders-msl-no-opt/tesc/copy-tess-level.tesc b/reference/shaders-msl-no-opt/tesc/copy-tess-level.tesc
new file mode 100644
index 00000000000..3bb5419795b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/tesc/copy-tess-level.tesc
@@ -0,0 +1,70 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 2> _14 = spvUnsafeArray<float, 2>({ 1.0, 2.0 });
+constant spvUnsafeArray<float, 4> _21 = spvUnsafeArray<float, 4>({ 1.0, 2.0, 3.0, 4.0 });
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 1];
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(_14[0]);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(_14[1]);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_21[0]);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_21[1]);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(_21[2]);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(_21[3]);
+    spvUnsafeArray<float, 2> inner;
+    inner = spvUnsafeArray<float, 2>({ float(spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1]) });
+    spvUnsafeArray<float, 4> outer;
+    outer = spvUnsafeArray<float, 4>({ float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]) });
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
diff --git a/reference/shaders-msl-no-opt/tesc/passthrough-clip-cull.multi-patch.tesc b/reference/shaders-msl-no-opt/tesc/passthrough-clip-cull.multi-patch.tesc
new file mode 100644
index 00000000000..f55e47baf05
--- /dev/null
+++ b/reference/shaders-msl-no-opt/tesc/passthrough-clip-cull.multi-patch.tesc
@@ -0,0 +1,71 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    spvUnsafeArray<float, 2> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_in
+{
+    uint3 m_57;
+    ushort2 m_61;
+    spvUnsafeArray<float, 2> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].gl_ClipDistance[0] = gl_in[gl_InvocationID].gl_ClipDistance[0];
+    gl_out[gl_InvocationID].gl_ClipDistance[1] = gl_in[gl_InvocationID].gl_ClipDistance[1];
+    gl_out[gl_InvocationID].gl_CullDistance[0] = gl_in[gl_InvocationID].gl_CullDistance[0];
+}
+
diff --git a/reference/shaders-msl-no-opt/tesc/tess-level-read-write-in-function-quad.tesc b/reference/shaders-msl-no-opt/tesc/tess-level-read-write-in-function-quad.tesc
new file mode 100644
index 00000000000..c76da101bc6
--- /dev/null
+++ b/reference/shaders-msl-no-opt/tesc/tess-level-read-write-in-function-quad.tesc
@@ -0,0 +1,37 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+static inline __attribute__((always_inline))
+void store_tess_level_in_func(device half (&gl_TessLevelInner)[2], device half (&gl_TessLevelOuter)[4])
+{
+    gl_TessLevelInner[0] = half(1.0);
+    gl_TessLevelInner[1] = half(2.0);
+    gl_TessLevelOuter[0] = half(3.0);
+    gl_TessLevelOuter[1] = half(4.0);
+    gl_TessLevelOuter[2] = half(5.0);
+    gl_TessLevelOuter[3] = half(6.0);
+}
+
+static inline __attribute__((always_inline))
+float load_tess_level_in_func(device half (&gl_TessLevelInner)[2], device half (&gl_TessLevelOuter)[4])
+{
+    return float(gl_TessLevelInner[0]) + float(gl_TessLevelOuter[1]);
+}
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 1];
+    store_tess_level_in_func(spvTessLevel[gl_PrimitiveID].insideTessellationFactor, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor);
+    float v = load_tess_level_in_func(spvTessLevel[gl_PrimitiveID].insideTessellationFactor, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor);
+    gl_out[gl_InvocationID].gl_Position = float4(v);
+}
+
diff --git a/reference/shaders-msl-no-opt/tese/builtin-input-automatic-attribute-assignment.tese b/reference/shaders-msl-no-opt/tese/builtin-input-automatic-attribute-assignment.tese
new file mode 100644
index 00000000000..15f04e7371c
--- /dev/null
+++ b/reference/shaders-msl-no-opt/tese/builtin-input-automatic-attribute-assignment.tese
@@ -0,0 +1,80 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 FragColors [[attribute(2)]];
+    float4 gl_Position [[attribute(1)]];
+};
+
+struct main0_patchIn
+{
+    float4 FragColor [[attribute(0)]];
+    float4 gl_TessLevelOuter [[attribute(3)]];
+    float2 gl_TessLevelInner [[attribute(4)]];
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], uint gl_PrimitiveID [[patch_id]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0];
+    gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1];
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2];
+    gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3];
+    out.gl_Position = (((((float4(1.0) + patchIn.FragColor) + patchIn.gl_in[0].FragColors) + patchIn.gl_in[1].FragColors) + float4(gl_TessLevelInner[0])) + float4(gl_TessLevelOuter[int(gl_PrimitiveID) & 1])) + patchIn.gl_in[0].gl_Position;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/tese/load-clip-cull.msl2.tese b/reference/shaders-msl-no-opt/tese/load-clip-cull.msl2.tese
new file mode 100644
index 00000000000..09c5cd75026
--- /dev/null
+++ b/reference/shaders-msl-no-opt/tese/load-clip-cull.msl2.tese
@@ -0,0 +1,37 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 gl_Position [[attribute(0)]];
+    float gl_ClipDistance_0 [[attribute(1)]];
+    float gl_ClipDistance_1 [[attribute(2)]];
+    float gl_CullDistance_0 [[attribute(3)]];
+    float gl_CullDistance_1 [[attribute(4)]];
+    float gl_CullDistance_2 [[attribute(5)]];
+};
+
+struct main0_patchIn
+{
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]])
+{
+    main0_out out = {};
+    out.gl_Position.x = patchIn.gl_in[0].gl_ClipDistance_0;
+    out.gl_Position.y = patchIn.gl_in[1].gl_CullDistance_0;
+    out.gl_Position.z = patchIn.gl_in[0].gl_ClipDistance_1;
+    out.gl_Position.w = patchIn.gl_in[1].gl_CullDistance_1;
+    out.gl_Position += patchIn.gl_in[0].gl_Position;
+    out.gl_Position += patchIn.gl_in[1].gl_Position;
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/vert/cull-distance.for-tess.vert b/reference/shaders-msl-no-opt/vert/cull-distance.for-tess.vert
new file mode 100644
index 00000000000..5c2311d7412
--- /dev/null
+++ b/reference/shaders-msl-no-opt/vert/cull-distance.for-tess.vert
@@ -0,0 +1,62 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+    spvUnsafeArray<float, 2> gl_CullDistance;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    out.gl_CullDistance[0] = 1.0;
+    out.gl_CullDistance[1] = 3.0;
+    out.gl_Position = float4(1.0);
+}
+
diff --git a/reference/shaders-msl-no-opt/vert/functions_nested.vert b/reference/shaders-msl-no-opt/vert/functions_nested.vert
index 5d67f408bef..2d394f67d4d 100644
--- a/reference/shaders-msl-no-opt/vert/functions_nested.vert
+++ b/reference/shaders-msl-no-opt/vert/functions_nested.vert
@@ -5,6 +5,13 @@
 
 using namespace metal;
 
+// Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
+uint2 spvTexelBufferCoord(uint tc)
+{
+    return uint2(tc % 4096, tc / 4096);
+}
+
 struct attr_desc
 {
     int type;
@@ -27,8 +34,6 @@ struct VertexConstantsBuffer
     float4 vc[16];
 };
 
-constant float4 _295 = {};
-
 struct main0_out
 {
     float4 tc0 [[user(locn0)]];
@@ -36,12 +41,7 @@ struct main0_out
     float4 gl_Position [[position]];
 };
 
-// Returns 2D texture coords corresponding to 1D texel buffer coords
-uint2 spvTexelBufferCoord(uint tc)
-{
-    return uint2(tc % 4096, tc / 4096);
-}
-
+static inline __attribute__((always_inline))
 attr_desc fetch_desc(thread const int& location, constant VertexBuffer& v_227)
 {
     int attribute_flags = v_227.input_attributes[location].w;
@@ -55,6 +55,7 @@ attr_desc fetch_desc(thread const int& location, constant VertexBuffer& v_227)
     return result;
 }
 
+static inline __attribute__((always_inline))
 uint get_bits(thread const uint4& v, thread const int& swap)
 {
     if (swap != 0)
@@ -64,7 +65,8 @@ uint get_bits(thread const uint4& v, thread const int& swap)
     return ((v.x | (v.y << uint(8))) | (v.z << uint(16))) | (v.w << uint(24));
 }
 
-float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thread const texture2d<uint> input_stream)
+static inline __attribute__((always_inline))
+float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, texture2d<uint> input_stream)
 {
     float4 result = float4(0.0, 0.0, 0.0, 1.0);
     bool reverse_order = false;
@@ -132,11 +134,12 @@ float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thr
     return _210;
 }
 
-float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d<uint> buff_in_2, thread texture2d<uint> buff_in_1)
+static inline __attribute__((always_inline))
+float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, texture2d<uint> buff_in_2, texture2d<uint> buff_in_1)
 {
     int param = location;
     attr_desc desc = fetch_desc(param, v_227);
-    int vertex_id = gl_VertexIndex - int(v_227.vertex_base_index);
+    int vertex_id = int(gl_VertexIndex) - int(v_227.vertex_base_index);
     if (desc.is_volatile != 0)
     {
         attr_desc param_1 = desc;
@@ -151,7 +154,8 @@ float4 read_location(thread const int& location, constant VertexBuffer& v_227, t
     }
 }
 
-void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& dst_reg7, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d<uint> buff_in_2, thread texture2d<uint> buff_in_1, constant VertexConstantsBuffer& v_309)
+static inline __attribute__((always_inline))
+void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& dst_reg7, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, texture2d<uint> buff_in_2, texture2d<uint> buff_in_1, constant VertexConstantsBuffer& v_309)
 {
     int param = 3;
     float4 in_diff_color = read_location(param, v_227, gl_VertexIndex, buff_in_2, buff_in_1);
@@ -165,7 +169,8 @@ void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4&
     tmp0.y = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[5])).y;
     tmp0.z = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[6])).z;
     float4 tmp1;
-    tmp1 = float4(in_tc0.xy.x, in_tc0.xy.y, tmp1.z, tmp1.w);
+    tmp1.x = in_tc0.xy.x;
+    tmp1.y = in_tc0.xy.y;
     tmp1.z = v_309.vc[15].x;
     dst_reg7.y = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[8])).y;
     dst_reg7.x = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[7])).x;
diff --git a/reference/opt/shaders-msl/vert/layer.msl11.invalid.vert b/reference/shaders-msl-no-opt/vert/layer.msl11.invalid.vert
similarity index 100%
rename from reference/opt/shaders-msl/vert/layer.msl11.invalid.vert
rename to reference/shaders-msl-no-opt/vert/layer.msl11.invalid.vert
diff --git a/reference/shaders-msl-no-opt/vert/modf-storage-class.capture.vert b/reference/shaders-msl-no-opt/vert/modf-storage-class.capture.vert
new file mode 100644
index 00000000000..87f4e955172
--- /dev/null
+++ b/reference/shaders-msl-no-opt/vert/modf-storage-class.capture.vert
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 f [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 f2 [[attribute(0)]];
+};
+
+vertex void main0(main0_in in [[stage_in]], uint gl_VertexIndex [[vertex_id]], uint gl_BaseVertex [[base_vertex]], uint gl_InstanceIndex [[instance_id]], uint gl_BaseInstance [[base_instance]], device main0_out* spvOut [[buffer(28)]], device uint* spvIndirectParams [[buffer(29)]])
+{
+    device main0_out& out = spvOut[(gl_InstanceIndex - gl_BaseInstance) * spvIndirectParams[0] + gl_VertexIndex - gl_BaseVertex];
+    float4 _35;
+    float4 _21 = modf(in.f2, _35);
+    out.f = _35;
+    out.gl_Position = _21;
+}
+
diff --git a/reference/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert b/reference/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert
new file mode 100644
index 00000000000..9cdf12439ab
--- /dev/null
+++ b/reference/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert
@@ -0,0 +1,157 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+constant float4 _68[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) };
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    int Index1 [[attribute(0)]];
+    int Index2 [[attribute(1)]];
+};
+
+static inline __attribute__((always_inline))
+float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
+{
+    float4 indexable[4];
+    spvArrayCopyFromStackToStack1(indexable, positions);
+    float4 indexable_1[4];
+    spvArrayCopyFromStackToStack1(indexable_1, positions2);
+    return indexable[Index1] + indexable_1[Index2];
+}
+
+static inline __attribute__((always_inline))
+float4 consume_constant_arrays(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
+{
+    return consume_constant_arrays2(positions, positions2, Index1, Index2);
+}
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    float4 _68_array_copy[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) };
+    main0_out out = {};
+    float4 LUT2[4];
+    LUT2[0] = float4(10.0);
+    LUT2[1] = float4(11.0);
+    LUT2[2] = float4(12.0);
+    LUT2[3] = float4(13.0);
+    out.gl_Position = consume_constant_arrays(_68_array_copy, LUT2, in.Index1, in.Index2);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert b/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert
index ab5631366a7..f6e3efbecf7 100644
--- a/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert
+++ b/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert
@@ -1,11 +1,50 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
-constant float4 _68[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) };
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float4, 4> _68 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(1.0), float4(2.0), float4(3.0) });
 
 struct main0_out
 {
@@ -18,43 +57,31 @@ struct main0_in
     int Index2 [[attribute(1)]];
 };
 
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
+static inline __attribute__((always_inline))
+float4 consume_constant_arrays2(spvUnsafeArray<float4, 4> positions, spvUnsafeArray<float4, 4> positions2, thread int& Index1, thread int& Index2)
 {
-    float4 indexable[4];
-    spvArrayCopyFromStack1(indexable, positions);
-    float4 indexable_1[4];
-    spvArrayCopyFromStack1(indexable_1, positions2);
+    spvUnsafeArray<float4, 4> indexable;
+    indexable = positions;
+    spvUnsafeArray<float4, 4> indexable_1;
+    indexable_1 = positions2;
     return indexable[Index1] + indexable_1[Index2];
 }
 
-float4 consume_constant_arrays(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
+static inline __attribute__((always_inline))
+float4 consume_constant_arrays(spvUnsafeArray<float4, 4> positions, spvUnsafeArray<float4, 4> positions2, thread int& Index1, thread int& Index2)
 {
     return consume_constant_arrays2(positions, positions2, Index1, Index2);
 }
 
 vertex main0_out main0(main0_in in [[stage_in]])
 {
-    float4 _68_array_copy[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) };
     main0_out out = {};
-    float4 LUT2[4];
+    spvUnsafeArray<float4, 4> LUT2;
     LUT2[0] = float4(10.0);
     LUT2[1] = float4(11.0);
     LUT2[2] = float4(12.0);
     LUT2[3] = float4(13.0);
-    out.gl_Position = consume_constant_arrays(_68_array_copy, LUT2, in.Index1, in.Index2);
+    out.gl_Position = consume_constant_arrays(_68, LUT2, in.Index1, in.Index2);
     return out;
 }
 
diff --git a/reference/shaders-msl-no-opt/vert/uninitialized-vertex-output.vert b/reference/shaders-msl-no-opt/vert/uninitialized-vertex-output.vert
new file mode 100644
index 00000000000..f8209ae6fcf
--- /dev/null
+++ b/reference/shaders-msl-no-opt/vert/uninitialized-vertex-output.vert
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 Pos [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    out.gl_Position = float4(1.0);
+    return out;
+}
+
diff --git a/reference/shaders-msl-no-opt/vert/unused-subgroup-builtin.msl22.vert b/reference/shaders-msl-no-opt/vert/unused-subgroup-builtin.msl22.vert
new file mode 100644
index 00000000000..9e024c2095b
--- /dev/null
+++ b/reference/shaders-msl-no-opt/vert/unused-subgroup-builtin.msl22.vert
@@ -0,0 +1,9 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+vertex void main0()
+{
+}
+
diff --git a/reference/opt/shaders-msl/vert/viewport-index.msl2.invalid.vert b/reference/shaders-msl-no-opt/vert/viewport-index.msl2.invalid.vert
similarity index 100%
rename from reference/opt/shaders-msl/vert/viewport-index.msl2.invalid.vert
rename to reference/shaders-msl-no-opt/vert/viewport-index.msl2.invalid.vert
diff --git a/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag b/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag
index 9c3fff593b1..e402bbb259c 100644
--- a/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag
+++ b/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag
@@ -5,28 +5,13 @@
 
 using namespace metal;
 
-struct main0_out
-{
-    float4 fragColor [[color(0)]];
-};
-
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
-enum class spvSwizzle : uint
-{
-    none = 0,
-    zero,
-    one,
-    red,
-    green,
-    blue,
-    alpha
-};
-
 template<typename T> struct spvRemoveReference { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
@@ -39,6 +24,17 @@ template<typename T> inline constexpr thread T&& spvForward(thread typename spvR
     return static_cast<thread T&&>(x);
 }
 
+enum class spvSwizzle : uint
+{
+    none = 0,
+    zero,
+    one,
+    red,
+    green,
+    blue,
+    alpha
+};
+
 template<typename T>
 inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
 {
@@ -77,8 +73,8 @@ inline T spvTextureSwizzle(T x, uint s)
 }
 
 // Wrapper function that swizzles texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
+template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename... Ts>
+inline vec<T, 4> spvGatherSwizzle(const thread Tex<T>& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c)
 {
     if (sw)
     {
@@ -114,8 +110,8 @@ inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params,
 }
 
 // Wrapper function that swizzles depth texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) 
+template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename... Ts>
+inline vec<T, 4> spvGatherCompareSwizzle(const thread Tex<T>& t, sampler s, uint sw, Ts... params) 
 {
     if (sw)
     {
@@ -136,7 +132,13 @@ inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... p
     return t.gather_compare(s, spvForward<Ts>(params)...);
 }
 
-float4 do_samples(thread const texture1d<float> t1, thread const sampler t1Smplr, constant uint& t1Swzl, thread const texture2d<float> t2, constant uint& t2Swzl, thread const texture3d<float> t3, thread const sampler t3Smplr, constant uint& t3Swzl, thread const texturecube<float> tc, constant uint& tcSwzl, thread const texture2d_array<float> t2a, thread const sampler t2aSmplr, constant uint& t2aSwzl, thread const texturecube_array<float> tca, thread const sampler tcaSmplr, constant uint& tcaSwzl, thread const texture2d<float> tb, thread const depth2d<float> d2, thread const sampler d2Smplr, constant uint& d2Swzl, thread const depthcube<float> dc, thread const sampler dcSmplr, constant uint& dcSwzl, thread const depth2d_array<float> d2a, constant uint& d2aSwzl, thread const depthcube_array<float> dca, thread const sampler dcaSmplr, constant uint& dcaSwzl, thread sampler defaultSampler, thread sampler shadowSampler)
+struct main0_out
+{
+    float4 fragColor [[color(0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 do_samples(texture1d<float> t1, sampler t1Smplr, constant uint& t1Swzl, texture2d<float> t2, constant uint& t2Swzl, texture3d<float> t3, sampler t3Smplr, constant uint& t3Swzl, texturecube<float> tc, constant uint& tcSwzl, texture2d_array<float> t2a, sampler t2aSmplr, constant uint& t2aSwzl, texturecube_array<float> tca, sampler tcaSmplr, constant uint& tcaSwzl, texture2d<float> tb, depth2d<float> d2, sampler d2Smplr, constant uint& d2Swzl, depthcube<float> dc, sampler dcSmplr, constant uint& dcSwzl, depth2d_array<float> d2a, constant uint& d2aSwzl, depthcube_array<float> dca, sampler dcaSmplr, constant uint& dcaSwzl, sampler defaultSampler, sampler shadowSampler)
 {
     float4 c = spvTextureSwizzle(t1.sample(t1Smplr, 0.0), t1Swzl);
     c = spvTextureSwizzle(t2.sample(defaultSampler, float2(0.0)), t2Swzl);
@@ -144,42 +146,42 @@ float4 do_samples(thread const texture1d<float> t1, thread const sampler t1Smplr
     c = spvTextureSwizzle(tc.sample(defaultSampler, float3(0.0)), tcSwzl);
     c = spvTextureSwizzle(t2a.sample(t2aSmplr, float3(0.0).xy, uint(round(float3(0.0).z))), t2aSwzl);
     c = spvTextureSwizzle(tca.sample(tcaSmplr, float4(0.0).xyz, uint(round(float4(0.0).w))), tcaSwzl);
-    c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z), d2Swzl);
-    c.x = spvTextureSwizzle(dc.sample_compare(dcSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, float4(0.0, 0.0, 0.0, 1.0).w), dcSwzl);
-    c.x = spvTextureSwizzle(d2a.sample_compare(shadowSampler, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), float4(0.0, 0.0, 0.0, 1.0).w), d2aSwzl);
+    c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, float3(0.0, 0.0, 1.0).xy, 1.0), d2Swzl);
+    c.x = spvTextureSwizzle(dc.sample_compare(dcSmplr, float4(0.0, 0.0, 0.0, 1.0).xyz, 1.0), dcSwzl);
+    c.x = spvTextureSwizzle(d2a.sample_compare(shadowSampler, float4(0.0, 0.0, 0.0, 1.0).xy, uint(round(float4(0.0, 0.0, 0.0, 1.0).z)), 1.0), d2aSwzl);
     c.x = spvTextureSwizzle(dca.sample_compare(dcaSmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0), dcaSwzl);
     c = spvTextureSwizzle(t1.sample(t1Smplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), t1Swzl);
     c = spvTextureSwizzle(t2.sample(defaultSampler, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z), t2Swzl);
     c = spvTextureSwizzle(t3.sample(t3Smplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w), t3Swzl);
     float4 _119 = float4(0.0, 0.0, 1.0, 1.0);
-    _119.z = float4(0.0, 0.0, 1.0, 1.0).w;
-    c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, _119.xy / _119.z, float4(0.0, 0.0, 1.0, 1.0).z / _119.z), d2Swzl);
+    _119.z = 1.0;
+    c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, _119.xy / _119.z, 1.0 / _119.z), d2Swzl);
     c = spvTextureSwizzle(t1.sample(t1Smplr, 0.0), t1Swzl);
     c = spvTextureSwizzle(t2.sample(defaultSampler, float2(0.0), level(0.0)), t2Swzl);
     c = spvTextureSwizzle(t3.sample(t3Smplr, float3(0.0), level(0.0)), t3Swzl);
     c = spvTextureSwizzle(tc.sample(defaultSampler, float3(0.0), level(0.0)), tcSwzl);
     c = spvTextureSwizzle(t2a.sample(t2aSmplr, float3(0.0).xy, uint(round(float3(0.0).z)), level(0.0)), t2aSwzl);
     c = spvTextureSwizzle(tca.sample(tcaSmplr, float4(0.0).xyz, uint(round(float4(0.0).w)), level(0.0)), tcaSwzl);
-    c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, float3(0.0, 0.0, 1.0).xy, float3(0.0, 0.0, 1.0).z, level(0.0)), d2Swzl);
+    c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, float3(0.0, 0.0, 1.0).xy, 1.0, level(0.0)), d2Swzl);
     c = spvTextureSwizzle(t1.sample(t1Smplr, float2(0.0, 1.0).x / float2(0.0, 1.0).y), t1Swzl);
     c = spvTextureSwizzle(t2.sample(defaultSampler, float3(0.0, 0.0, 1.0).xy / float3(0.0, 0.0, 1.0).z, level(0.0)), t2Swzl);
     c = spvTextureSwizzle(t3.sample(t3Smplr, float4(0.0, 0.0, 0.0, 1.0).xyz / float4(0.0, 0.0, 0.0, 1.0).w, level(0.0)), t3Swzl);
     float4 _153 = float4(0.0, 0.0, 1.0, 1.0);
-    _153.z = float4(0.0, 0.0, 1.0, 1.0).w;
-    c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, _153.xy / _153.z, float4(0.0, 0.0, 1.0, 1.0).z / _153.z, level(0.0)), d2Swzl);
+    _153.z = 1.0;
+    c.x = spvTextureSwizzle(d2.sample_compare(d2Smplr, _153.xy / _153.z, 1.0 / _153.z, level(0.0)), d2Swzl);
     c = spvTextureSwizzle(t1.read(uint(0)), t1Swzl);
     c = spvTextureSwizzle(t2.read(uint2(int2(0)), 0), t2Swzl);
     c = spvTextureSwizzle(t3.read(uint3(int3(0)), 0), t3Swzl);
     c = spvTextureSwizzle(t2a.read(uint2(int3(0).xy), uint(int3(0).z), 0), t2aSwzl);
     c = tb.read(spvTexelBufferCoord(0));
-    c = spvGatherSwizzle<float, metal::texture2d<float>, float2, int2>(defaultSampler, t2, float2(0.0), int2(0), component::x, t2Swzl);
-    c = spvGatherSwizzle<float, metal::texturecube<float>, float3>(defaultSampler, tc, float3(0.0), component::y, tcSwzl);
-    c = spvGatherSwizzle<float, metal::texture2d_array<float>, float2, uint, int2>(t2aSmplr, t2a, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, t2aSwzl);
-    c = spvGatherSwizzle<float, metal::texturecube_array<float>, float3, uint>(tcaSmplr, tca, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, tcaSwzl);
-    c = spvGatherCompareSwizzle<float, metal::depth2d<float>, float2, float>(d2Smplr, d2, float2(0.0), 1.0, d2Swzl);
-    c = spvGatherCompareSwizzle<float, metal::depthcube<float>, float3, float>(dcSmplr, dc, float3(0.0), 1.0, dcSwzl);
-    c = spvGatherCompareSwizzle<float, metal::depth2d_array<float>, float2, uint, float>(shadowSampler, d2a, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0, d2aSwzl);
-    c = spvGatherCompareSwizzle<float, metal::depthcube_array<float>, float3, uint, float>(dcaSmplr, dca, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0, dcaSwzl);
+    c = spvGatherSwizzle(t2, defaultSampler, t2Swzl, component::x, float2(0.0), int2(0));
+    c = spvGatherSwizzle(tc, defaultSampler, tcSwzl, component::y, float3(0.0));
+    c = spvGatherSwizzle(t2a, t2aSmplr, t2aSwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0));
+    c = spvGatherSwizzle(tca, tcaSmplr, tcaSwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w)));
+    c = spvGatherCompareSwizzle(d2, d2Smplr, d2Swzl, float2(0.0), 1.0);
+    c = spvGatherCompareSwizzle(dc, dcSmplr, dcSwzl, float3(0.0), 1.0);
+    c = spvGatherCompareSwizzle(d2a, shadowSampler, d2aSwzl, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0);
+    c = spvGatherCompareSwizzle(dca, dcaSmplr, dcaSwzl, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0);
     return c;
 }
 
diff --git a/reference/shaders-msl/amd/shader_trinary_minmax.msl21.comp b/reference/shaders-msl/amd/shader_trinary_minmax.msl21.comp
new file mode 100644
index 00000000000..14404719640
--- /dev/null
+++ b/reference/shaders-msl/amd/shader_trinary_minmax.msl21.comp
@@ -0,0 +1,14 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
+
+kernel void main0()
+{
+    int t11 = min3(0, 3, 2);
+    int t12 = max3(0, 3, 2);
+    int t13 = median3(0, 3, 2);
+}
+
diff --git a/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp b/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp
index 95841a78a2d..272337fc122 100644
--- a/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp
+++ b/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp
@@ -7,20 +7,21 @@
 
 using namespace metal;
 
-struct u0_counters
-{
-    uint c;
-};
-
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
+struct u0_counters
+{
+    uint c;
+};
+
 kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
+    uint _29 = atomic_fetch_sub_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
     float4 r0;
     r0.x = as_type<float>(_29);
     u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type<int>(r0.x)) * 1u) + (uint(0) >> 2u))));
diff --git a/reference/shaders-msl/asm/comp/atomic-increment.asm.comp b/reference/shaders-msl/asm/comp/atomic-increment.asm.comp
index cd78fa2c64c..4518248b897 100644
--- a/reference/shaders-msl/asm/comp/atomic-increment.asm.comp
+++ b/reference/shaders-msl/asm/comp/atomic-increment.asm.comp
@@ -7,20 +7,21 @@
 
 using namespace metal;
 
-struct u0_counters
-{
-    uint c;
-};
-
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
+struct u0_counters
+{
+    uint c;
+};
+
 kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
+    uint _29 = atomic_fetch_add_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
     float4 r0;
     r0.x = as_type<float>(_29);
     u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type<int>(r0.x)) * 1u) + (uint(0) >> 2u))));
diff --git a/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp b/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp
index 47ce85f8fc3..cbbf27d65da 100644
--- a/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp
+++ b/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp
@@ -15,7 +15,7 @@ struct _4
     int4 _m1;
 };
 
-kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
+kernel void main0(device _3& __restrict _5 [[buffer(0)]], device _4& __restrict _6 [[buffer(1)]])
 {
     _6._m0 = _5._m1 + uint4(_5._m0);
     _6._m0 = uint4(_5._m0) + _5._m1;
diff --git a/reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp b/reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000000..a55d8916dfa
--- /dev/null
+++ b/reference/shaders-msl/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _3
+{
+    int4 _m0;
+    uint4 _m1;
+};
+
+struct _4
+{
+    uint4 _m0;
+    int4 _m1;
+};
+
+kernel void main0(device _3& __restrict _5 [[buffer(0)]], device _4& __restrict _6 [[buffer(1)]])
+{
+    _6._m0 = uint4(int4(_5._m1) < _5._m0);
+    _6._m0 = uint4(int4(_5._m1) <= _5._m0);
+    _6._m0 = uint4(_5._m1 < uint4(_5._m0));
+    _6._m0 = uint4(_5._m1 <= uint4(_5._m0));
+    _6._m0 = uint4(int4(_5._m1) > _5._m0);
+    _6._m0 = uint4(int4(_5._m1) >= _5._m0);
+    _6._m0 = uint4(_5._m1 > uint4(_5._m0));
+    _6._m0 = uint4(_5._m1 >= uint4(_5._m0));
+}
+
diff --git a/reference/shaders-msl/asm/comp/block-name-alias-global.asm.comp b/reference/shaders-msl/asm/comp/block-name-alias-global.asm.comp
index 2928efda2c4..6dcc14ea8d5 100644
--- a/reference/shaders-msl/asm/comp/block-name-alias-global.asm.comp
+++ b/reference/shaders-msl/asm/comp/block-name-alias-global.asm.comp
@@ -18,11 +18,12 @@ struct A_2
 {
     int a;
     int b;
+    char _m0_final_padding[8];
 };
 
 struct A_3
 {
-    /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ A_2 Data[1024];
+    A_2 Data[1024];
 };
 
 struct B
@@ -32,7 +33,7 @@ struct B
 
 struct B_1
 {
-    /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ A_2 Data[1024];
+    A_2 Data[1024];
 };
 
 kernel void main0(device A_1& C1 [[buffer(0)]], constant A_3& C2 [[buffer(1)]], device B& C3 [[buffer(2)]], constant B_1& C4 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
diff --git a/reference/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp b/reference/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp
index 217782ce784..ac78a65196b 100644
--- a/reference/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp
+++ b/reference/shaders-msl/asm/comp/buffer-write-relative-addr.asm.comp
@@ -5,17 +5,18 @@
 
 using namespace metal;
 
-struct cb5_struct
-{
-    float4 _m0[5];
-};
-
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
+struct cb5_struct
+{
+    float4 _m0[5];
+};
+
 kernel void main0(constant cb5_struct& cb0_5 [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
 {
     float4 r0;
diff --git a/reference/shaders-msl/asm/comp/buffer-write.asm.comp b/reference/shaders-msl/asm/comp/buffer-write.asm.comp
index 159d09b38c8..89e8d83ea71 100644
--- a/reference/shaders-msl/asm/comp/buffer-write.asm.comp
+++ b/reference/shaders-msl/asm/comp/buffer-write.asm.comp
@@ -5,17 +5,18 @@
 
 using namespace metal;
 
-struct cb
-{
-    float value;
-};
-
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
+struct cb
+{
+    float value;
+};
+
 kernel void main0(constant cb& _6 [[buffer(0)]], texture2d<float, access::write> _buffer [[texture(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
 {
     _buffer.write(float4(_6.value), spvTexelBufferCoord(((32u * gl_WorkGroupID.x) + gl_LocalInvocationIndex)));
diff --git a/reference/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp b/reference/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp
new file mode 100644
index 00000000000..986e9096633
--- /dev/null
+++ b/reference/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _19
+{
+};
+struct _5
+{
+    int _m0;
+    _19 _m1;
+    char _m2_pad[4];
+    _19 _m2;
+    char _m3_pad[4];
+    int _m3;
+};
+
+kernel void main0(device _5& _3 [[buffer(0)]], device _5& _4 [[buffer(1)]])
+{
+    _4 = _3;
+}
+
diff --git a/reference/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp b/reference/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp
new file mode 100644
index 00000000000..4bcfeb21ab5
--- /dev/null
+++ b/reference/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _19
+{
+};
+struct _5
+{
+    int _m0;
+    char _m1_pad[12];
+    _19 _m1;
+    char _m2_pad[16];
+    _19 _m2;
+    char _m3_pad[16];
+    int _m3;
+};
+
+kernel void main0(constant _5& _3 [[buffer(0)]], device _5& _4 [[buffer(1)]])
+{
+    _4 = _3;
+}
+
diff --git a/reference/shaders-msl/asm/comp/global-parameter-name-alias.asm.comp b/reference/shaders-msl/asm/comp/global-parameter-name-alias.asm.comp
index 967d14bfcb7..ec1b2a2da24 100644
--- a/reference/shaders-msl/asm/comp/global-parameter-name-alias.asm.comp
+++ b/reference/shaders-msl/asm/comp/global-parameter-name-alias.asm.comp
@@ -10,12 +10,14 @@ struct ssbo
     uint _data[1];
 };
 
+static inline __attribute__((always_inline))
 void Load(thread const uint& size, const device ssbo& ssbo_1)
 {
     int byteAddrTemp = int(size >> uint(2));
     uint4 data = uint4(ssbo_1._data[byteAddrTemp], ssbo_1._data[byteAddrTemp + 1], ssbo_1._data[byteAddrTemp + 2], ssbo_1._data[byteAddrTemp + 3]);
 }
 
+static inline __attribute__((always_inline))
 void _main(thread const uint3& id, const device ssbo& ssbo_1)
 {
     uint param = 4u;
diff --git a/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp b/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp
new file mode 100644
index 00000000000..536556391ec
--- /dev/null
+++ b/reference/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp
@@ -0,0 +1,22 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+static inline __attribute__((always_inline))
+void _main(thread const uint3& id, texture2d<float, access::read_write> TargetTexture)
+{
+    float2 loaded = TargetTexture.read(uint2(id.xy)).xy;
+    float2 storeTemp = loaded + float2(1.0);
+    TargetTexture.write(storeTemp.xyyy, uint2((id.xy + uint2(1u))));
+}
+
+kernel void main0(texture2d<float, access::read_write> TargetTexture [[texture(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
+{
+    uint3 id = gl_WorkGroupID;
+    uint3 param = id;
+    _main(param, TargetTexture);
+}
+
diff --git a/reference/shaders-msl/asm/comp/multiple-entry.asm.comp b/reference/shaders-msl/asm/comp/multiple-entry.asm.comp
index 7652733268f..35843733790 100644
--- a/reference/shaders-msl/asm/comp/multiple-entry.asm.comp
+++ b/reference/shaders-msl/asm/comp/multiple-entry.asm.comp
@@ -15,7 +15,7 @@ struct _7
     int4 _m1;
 };
 
-kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]])
+kernel void main0(device _6& __restrict _8 [[buffer(0)]], device _7& __restrict _9 [[buffer(1)]])
 {
     _9._m0 = _8._m1 + uint4(_8._m0);
     _9._m0 = uint4(_8._m0) + _8._m1;
diff --git a/reference/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp b/reference/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp
new file mode 100644
index 00000000000..42c13f7a4d0
--- /dev/null
+++ b/reference/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp
@@ -0,0 +1,111 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _29
+{
+    spvUnsafeArray<spvUnsafeArray<int, 3>, 3> _m0;
+};
+
+struct _7
+{
+    int _m0[1];
+};
+
+constant int3 _32 = {};
+constant int _3_tmp [[function_constant(0)]];
+constant int _3 = is_function_constant_defined(_3_tmp) ? _3_tmp : 0;
+constant int _4_tmp [[function_constant(1)]];
+constant int _4 = is_function_constant_defined(_4_tmp) ? _4_tmp : 0;
+constant int _5_tmp [[function_constant(2)]];
+constant int _5 = is_function_constant_defined(_5_tmp) ? _5_tmp : 0;
+constant spvUnsafeArray<int, 3> _36 = spvUnsafeArray<int, 3>({ _3, 0, 0 });
+constant spvUnsafeArray<int, 3> _37 = spvUnsafeArray<int, 3>({ _3, _4, 0 });
+constant spvUnsafeArray<int, 3> _38 = spvUnsafeArray<int, 3>({ _3, _4, _5 });
+constant spvUnsafeArray<int, 3> _39 = spvUnsafeArray<int, 3>({ _4, 0, 0 });
+constant spvUnsafeArray<int, 3> _40 = spvUnsafeArray<int, 3>({ _4, _5, 0 });
+constant spvUnsafeArray<int, 3> _41 = spvUnsafeArray<int, 3>({ _4, _5, _3 });
+constant spvUnsafeArray<int, 3> _42 = spvUnsafeArray<int, 3>({ _5, 0, 0 });
+constant spvUnsafeArray<int, 3> _43 = spvUnsafeArray<int, 3>({ _5, _3, 0 });
+constant spvUnsafeArray<int, 3> _44 = spvUnsafeArray<int, 3>({ _5, _3, _4 });
+constant spvUnsafeArray<spvUnsafeArray<int, 3>, 3> _45 = spvUnsafeArray<spvUnsafeArray<int, 3>, 3>({ spvUnsafeArray<int, 3>({ _3, _4, _5 }), spvUnsafeArray<int, 3>({ 0, 0, 0 }), spvUnsafeArray<int, 3>({ 0, 0, 0 }) });
+constant spvUnsafeArray<spvUnsafeArray<int, 3>, 3> _46 = spvUnsafeArray<spvUnsafeArray<int, 3>, 3>({ spvUnsafeArray<int, 3>({ _3, _4, _5 }), spvUnsafeArray<int, 3>({ _4, _5, _3 }), spvUnsafeArray<int, 3>({ 0, 0, 0 }) });
+constant spvUnsafeArray<spvUnsafeArray<int, 3>, 3> _47 = spvUnsafeArray<spvUnsafeArray<int, 3>, 3>({ spvUnsafeArray<int, 3>({ _3, _4, _5 }), spvUnsafeArray<int, 3>({ _4, _5, _3 }), spvUnsafeArray<int, 3>({ _5, _3, _4 }) });
+constant _29 _48 = _29{ spvUnsafeArray<spvUnsafeArray<int, 3>, 3>({ spvUnsafeArray<int, 3>({ _3, _4, _5 }), spvUnsafeArray<int, 3>({ _4, _5, _3 }), spvUnsafeArray<int, 3>({ _5, _3, _4 }) }) };
+constant _29 _49 = _29{ spvUnsafeArray<spvUnsafeArray<int, 3>, 3>({ spvUnsafeArray<int, 3>({ _3, _4, _5 }), spvUnsafeArray<int, 3>({ _4, _5, _5 }), spvUnsafeArray<int, 3>({ _5, _3, _4 }) }) };
+constant int _50 = _48._m0[0][0];
+constant int _51 = _48._m0[1][0];
+constant int _52 = _48._m0[0][1];
+constant int _53 = _48._m0[2][2];
+constant int _54 = _48._m0[2][0];
+constant int _55 = _48._m0[1][1];
+constant bool _56 = (_50 == _51);
+constant bool _57 = (_52 == _53);
+constant bool _58 = (_54 == _55);
+constant int _59 = int(_56);
+constant int _60 = int(_57);
+constant int _61 = _58 ? 2 : 1;
+constant int3 _62 = int3(_3, 0, 0);
+constant int3 _63 = int3(0, _4, 0);
+constant int3 _64 = int3(0, 0, _5);
+constant int3 _65 = int3(_62.x, 0, _62.z);
+constant int3 _66 = int3(0, _63.y, _63.x);
+constant int3 _67 = int3(_64.z, 0, _64.z);
+constant int3 _68 = int3(_65.y, _65.x, _66.y);
+constant int3 _69 = int3(_67.z, _68.y, _68.z);
+constant int _70 = _69.x;
+constant int _71 = _69.y;
+constant int _72 = _69.z;
+constant int _73 = (_70 - _71);
+constant int _74 = (_73 * _72);
+
+constant spvUnsafeArray<int, 3> _33 = spvUnsafeArray<int, 3>({ 0, 0, 0 });
+constant spvUnsafeArray<spvUnsafeArray<int, 3>, 3> _34 = spvUnsafeArray<spvUnsafeArray<int, 3>, 3>({ spvUnsafeArray<int, 3>({ 0, 0, 0 }), spvUnsafeArray<int, 3>({ 0, 0, 0 }), spvUnsafeArray<int, 3>({ 0, 0, 0 }) });
+
+kernel void main0(device _7& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    _9._m0[gl_GlobalInvocationID.x] = _8._m0[gl_GlobalInvocationID.x] + ((((1 - _59) * _60) * (_61 - 1)) * _74);
+}
+
diff --git a/reference/shaders-msl/asm/comp/quantize.asm.comp b/reference/shaders-msl/asm/comp/quantize.asm.comp
index 1839ec7a3b8..672c2b20883 100644
--- a/reference/shaders-msl/asm/comp/quantize.asm.comp
+++ b/reference/shaders-msl/asm/comp/quantize.asm.comp
@@ -1,8 +1,21 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template <typename F> struct SpvHalfTypeSelector;
+template <> struct SpvHalfTypeSelector<float> { public: using H = half; };
+template<uint N> struct SpvHalfTypeSelector<vec<float, N>> { using H = vec<half, N>; };
+template<typename F, typename H = typename SpvHalfTypeSelector<F>::H>
+[[clang::optnone]] F spvQuantizeToF16(F fval)
+{
+    H hval = H(fval);
+    hval = select(copysign(H(0), hval), hval, isnormal(hval) || isinf(hval) || isnan(hval));
+    return F(hval);
+}
+
 struct SSBO0
 {
     float scalar;
@@ -13,9 +26,9 @@ struct SSBO0
 
 kernel void main0(device SSBO0& _4 [[buffer(0)]])
 {
-    _4.scalar = float(half(_4.scalar));
-    _4.vec2_val = float2(half2(_4.vec2_val));
-    _4.vec3_val = float3(half3(_4.vec3_val));
-    _4.vec4_val = float4(half4(_4.vec4_val));
+    _4.scalar = spvQuantizeToF16(_4.scalar);
+    _4.vec2_val = spvQuantizeToF16(_4.vec2_val);
+    _4.vec3_val = spvQuantizeToF16(_4.vec3_val);
+    _4.vec4_val = spvQuantizeToF16(_4.vec4_val);
 }
 
diff --git a/reference/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp b/reference/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp
deleted file mode 100644
index 473298c2741..00000000000
--- a/reference/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct _6
-{
-    float _m0[1];
-};
-
-constant uint _3_tmp [[function_constant(0)]];
-constant uint _3 = is_function_constant_defined(_3_tmp) ? _3_tmp : 1u;
-constant uint _4_tmp [[function_constant(2)]];
-constant uint _4 = is_function_constant_defined(_4_tmp) ? _4_tmp : 3u;
-constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(_3, 2u, _4);
-
-kernel void main0(device _6& _8 [[buffer(0)]], device _6& _9 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
-{
-    _8._m0[gl_WorkGroupID.x] = _9._m0[gl_WorkGroupID.x] + _8._m0[gl_WorkGroupID.x];
-    uint3 _23 = gl_WorkGroupSize;
-}
-
diff --git a/reference/shaders-msl/asm/comp/struct-resource-name-aliasing.asm.comp b/reference/shaders-msl/asm/comp/struct-resource-name-aliasing.asm.comp
index a3323bf2514..fa7d2698210 100644
--- a/reference/shaders-msl/asm/comp/struct-resource-name-aliasing.asm.comp
+++ b/reference/shaders-msl/asm/comp/struct-resource-name-aliasing.asm.comp
@@ -10,6 +10,7 @@ struct bufA
     uint _data[1];
 };
 
+static inline __attribute__((always_inline))
 void _main(device bufA& bufA_1, device bufA& bufB)
 {
     bufA_1._data[0] = 0u;
diff --git a/reference/shaders-msl/asm/comp/uint_smulextended.asm.comp b/reference/shaders-msl/asm/comp/uint_smulextended.asm.comp
new file mode 100644
index 00000000000..6996f7fd26a
--- /dev/null
+++ b/reference/shaders-msl/asm/comp/uint_smulextended.asm.comp
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _4
+{
+    uint _m0[1];
+};
+
+struct _20
+{
+    uint _m0;
+    uint _m1;
+};
+
+kernel void main0(device _4& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]], device _4& _7 [[buffer(2)]], device _4& _8 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    _20 _28;
+    _28._m0 = uint(int(_5._m0[gl_GlobalInvocationID.x]) * int(_6._m0[gl_GlobalInvocationID.x]));
+    _28._m1 = uint(mulhi(int(_5._m0[gl_GlobalInvocationID.x]), int(_6._m0[gl_GlobalInvocationID.x])));
+    _7._m0[gl_GlobalInvocationID.x] = _28._m0;
+    _8._m0[gl_GlobalInvocationID.x] = _28._m1;
+}
+
diff --git a/reference/shaders-msl/asm/comp/undefined-constant-composite.asm.comp b/reference/shaders-msl/asm/comp/undefined-constant-composite.asm.comp
new file mode 100644
index 00000000000..e3ded1f2088
--- /dev/null
+++ b/reference/shaders-msl/asm/comp/undefined-constant-composite.asm.comp
@@ -0,0 +1,38 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _20
+{
+    int _m0;
+    int _m1;
+};
+
+struct _5
+{
+    int _m0[10];
+};
+
+struct _7
+{
+    int _m0[10];
+};
+
+constant int _28 = {};
+
+static inline __attribute__((always_inline))
+int _39(thread const int& _41, thread const _20& _42)
+{
+    return _41 + _42._m1;
+}
+
+kernel void main0(device _5& _6 [[buffer(0)]], device _7& _8 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    int _32 = _8._m0[gl_GlobalInvocationID.x];
+    _20 _33 = _20{ _28, 200 };
+    _6._m0[gl_GlobalInvocationID.x] = _39(_32, _33);
+}
+
diff --git a/reference/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp b/reference/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp
new file mode 100644
index 00000000000..d0f2790d8d0
--- /dev/null
+++ b/reference/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp
@@ -0,0 +1,42 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _21
+{
+    int _m0;
+    int _m1;
+};
+
+struct _5
+{
+    int _m0[10];
+};
+
+struct _7
+{
+    int _m0[10];
+};
+
+constant int _29 = {};
+constant int _9_tmp [[function_constant(0)]];
+constant int _9 = is_function_constant_defined(_9_tmp) ? _9_tmp : 0;
+constant _21 _30 = _21{ _9, _29 };
+
+static inline __attribute__((always_inline))
+int _42(thread const int& _44, thread const _21& _45, thread const _21& _46)
+{
+    return (_44 + _45._m0) + _46._m1;
+}
+
+kernel void main0(device _5& _6 [[buffer(0)]], device _7& _8 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    int _34 = _8._m0[gl_GlobalInvocationID.x];
+    _21 _35 = _30;
+    _21 _36 = _21{ _29, 200 };
+    _6._m0[gl_GlobalInvocationID.x] = _42(_34, _35, _36);
+}
+
diff --git a/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp b/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp
index b4e22959520..dafd6d5d4f5 100644
--- a/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp
+++ b/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp
@@ -17,11 +17,13 @@ struct bar
     int d;
 };
 
+static inline __attribute__((always_inline))
 device foo* select_buffer(device foo& a, constant bar& cb)
 {
     return (cb.d != 0) ? &a : nullptr;
 }
 
+static inline __attribute__((always_inline))
 thread uint3* select_input(thread uint3& gl_GlobalInvocationID, thread uint3& gl_LocalInvocationID, constant bar& cb)
 {
     return (cb.d != 0) ? &gl_GlobalInvocationID : &gl_LocalInvocationID;
@@ -31,12 +33,31 @@ kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]],
 {
     device foo* _46 = select_buffer(buf, cb);
     device foo* _45 = _46;
-    for (device int* _52 = &_45->a[0u], * _55 = &buf.a[0u]; (*_52) != (*_55); _52 = &_52[1u], _55 = &_55[1u])
+    thread uint3* _47 = select_input(gl_GlobalInvocationID, gl_LocalInvocationID, cb);
+    device foo* _48 = _45;
+    device int* _52;
+    device int* _55;
+    _52 = &_48->a[0u];
+    _55 = &buf.a[0u];
+    int _57;
+    int _58;
+    for (;;)
     {
-        int _66 = ((*_52) + (*_55)) + int((*select_input(gl_GlobalInvocationID, gl_LocalInvocationID, cb)).x);
-        *_52 = _66;
-        *_55 = _66;
-        continue;
+        _57 = *_52;
+        _58 = *_55;
+        if (_57 != _58)
+        {
+            int _66 = (_57 + _58) + int((*_47).x);
+            *_52 = _66;
+            *_55 = _66;
+            _52 = &_52[1u];
+            _55 = &_55[1u];
+            continue;
+        }
+        else
+        {
+            break;
+        }
     }
 }
 
diff --git a/reference/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp b/reference/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp
index b2f8fc424ed..00c490c77fc 100644
--- a/reference/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp
+++ b/reference/shaders-msl/asm/comp/variable-pointers-store-forwarding.asm.comp
@@ -15,6 +15,7 @@ struct bar
     int b;
 };
 
+static inline __attribute__((always_inline))
 device int* _24(device foo& a, device bar& b, thread uint3& gl_GlobalInvocationID)
 {
     return (gl_GlobalInvocationID.x != 0u) ? &a.a : &b.b;
diff --git a/reference/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp b/reference/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp
index e4092261d6d..9fb68a63192 100644
--- a/reference/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp
+++ b/reference/shaders-msl/asm/comp/vector-builtin-type-cast-func.asm.comp
@@ -7,11 +7,12 @@ using namespace metal;
 
 struct cb1_struct
 {
-    float4 _m0[1];
+    float4 _RESERVED_IDENTIFIER_FIXUP_m0[1];
 };
 
 constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 16u, 1u);
 
+static inline __attribute__((always_inline))
 int2 get_texcoord(thread const int2& base, thread const int2& index, thread uint3& gl_LocalInvocationID)
 {
     return (base * int3(gl_LocalInvocationID).xy) + index;
@@ -26,7 +27,7 @@ kernel void main0(constant cb1_struct& cb0_1 [[buffer(0)]], texture2d<float, acc
         {
             int2 param = r0;
             int2 param_1 = int2(i, j);
-            u0.write(cb0_1._m0[0].xxxx, uint2(get_texcoord(param, param_1, gl_LocalInvocationID)));
+            u0.write(cb0_1._RESERVED_IDENTIFIER_FIXUP_m0[0].xxxx, uint2(get_texcoord(param, param_1, gl_LocalInvocationID)));
         }
     }
 }
diff --git a/reference/shaders-msl/asm/comp/vector-builtin-type-cast.asm.comp b/reference/shaders-msl/asm/comp/vector-builtin-type-cast.asm.comp
index 793713d5883..405556b9ffb 100644
--- a/reference/shaders-msl/asm/comp/vector-builtin-type-cast.asm.comp
+++ b/reference/shaders-msl/asm/comp/vector-builtin-type-cast.asm.comp
@@ -5,7 +5,7 @@ using namespace metal;
 
 struct cb1_struct
 {
-    float4 _m0[1];
+    float4 _RESERVED_IDENTIFIER_FIXUP_m0[1];
 };
 
 constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 16u, 1u);
@@ -17,7 +17,7 @@ kernel void main0(constant cb1_struct& cb0_1 [[buffer(0)]], texture2d<float, acc
     {
         for (int j = 0; j < r0.x; j++)
         {
-            u0.write(cb0_1._m0[0].xxxx, uint2(((r0 * int3(gl_LocalInvocationID).xy) + int2(i, j))));
+            u0.write(cb0_1._RESERVED_IDENTIFIER_FIXUP_m0[0].xxxx, uint2(((r0 * int3(gl_LocalInvocationID).xy) + int2(i, j))));
         }
     }
 }
diff --git a/reference/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag b/reference/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag
new file mode 100644
index 00000000000..46bdd3e4539
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag
@@ -0,0 +1,47 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _7
+{
+    float4 _m0[64];
+};
+
+struct main0_out
+{
+    float4 m_3 [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 m_2 [[user(locn1)]];
+};
+
+static inline __attribute__((always_inline))
+void _108(int _109, texture2d<float> v_8, device _7& v_10)
+{
+    int2 _113 = int2(_109 - 8 * (_109 / 8), _109 / 8);
+    v_10._m0[_109] = v_8.read(uint2(_113), 0);
+}
+
+static inline __attribute__((always_inline))
+float4 _98(float4 _119, texture2d<float> v_8, device _7& v_10)
+{
+    for (int _121 = 0; _121 < 64; _121++)
+    {
+        _108(_121, v_8, v_10);
+    }
+    return _119;
+}
+
+fragment main0_out main0(main0_in in [[stage_in]], device _7& v_10 [[buffer(0)]], texture2d<float> v_8 [[texture(0)]])
+{
+    main0_out out = {};
+    float4 _97 = _98(in.m_2, v_8, v_10);
+    out.m_3 = _97;
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag b/reference/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag
new file mode 100644
index 00000000000..df9f8f43cf3
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag
@@ -0,0 +1,46 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _7
+{
+    float4 _m0[64];
+};
+
+struct main0_out
+{
+    float4 m_3 [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 m_2 [[user(locn1)]];
+};
+
+static inline __attribute__((always_inline))
+void _108(int _109, texture2d<float> v_8, sampler v_9, device _7& v_10)
+{
+    v_10._m0[_109] = v_8.sample(v_9, (float2(int2(_109 - 8 * (_109 / 8), _109 / 8)) / float2(8.0)), level(0.0));
+}
+
+static inline __attribute__((always_inline))
+float4 _98(float4 _121, texture2d<float> v_8, sampler v_9, device _7& v_10)
+{
+    for (int _123 = 0; _123 < 64; _123++)
+    {
+        _108(_123, v_8, v_9, v_10);
+    }
+    return _121;
+}
+
+fragment main0_out main0(main0_in in [[stage_in]], device _7& v_10 [[buffer(0)]], texture2d<float> v_8 [[texture(0)]], sampler v_9 [[sampler(0)]])
+{
+    main0_out out = {};
+    float4 _97 = _98(in.m_2, v_8, v_9, v_10);
+    out.m_3 = _97;
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag b/reference/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag
new file mode 100644
index 00000000000..b64ccabe6bc
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 o1 [[color(1)]];
+    float4 o3 [[color(3)]];
+    float4 o6 [[color(6)]];
+    float4 o7 [[color(7)]];
+};
+
+fragment main0_out main0()
+{
+    float4 o0;
+    float4 o2;
+    float4 o4;
+    float4 o5;
+    float gl_FragDepth;
+    int gl_FragStencilRefARB;
+    main0_out out = {};
+    o0 = float4(0.0, 0.0, 0.0, 1.0);
+    out.o1 = float4(1.0, 0.0, 0.0, 1.0);
+    o2 = float4(0.0, 1.0, 0.0, 1.0);
+    out.o3 = float4(0.0, 0.0, 1.0, 1.0);
+    o4 = float4(1.0, 0.0, 1.0, 0.5);
+    o5 = float4(0.25);
+    out.o6 = float4(0.75);
+    out.o7 = float4(1.0);
+    gl_FragDepth = 0.89999997615814208984375;
+    gl_FragStencilRefARB = uint(127);
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/empty-struct.asm.frag b/reference/shaders-msl/asm/frag/empty-struct.asm.frag
index 0a56f1f153e..e30c5864b26 100644
--- a/reference/shaders-msl/asm/frag/empty-struct.asm.frag
+++ b/reference/shaders-msl/asm/frag/empty-struct.asm.frag
@@ -7,14 +7,14 @@ using namespace metal;
 
 struct EmptyStructTest
 {
-    int empty_struct_member;
 };
-
+static inline __attribute__((always_inline))
 float GetValue(thread const EmptyStructTest& self)
 {
     return 0.0;
 }
 
+static inline __attribute__((always_inline))
 float GetValue_1(EmptyStructTest self)
 {
     return 0.0;
@@ -22,9 +22,8 @@ float GetValue_1(EmptyStructTest self)
 
 fragment void main0()
 {
-    EmptyStructTest _23 = EmptyStructTest{ 0 };
     EmptyStructTest emptyStruct;
     float value = GetValue(emptyStruct);
-    value = GetValue_1(_23);
+    value = GetValue_1(EmptyStructTest{  });
 }
 
diff --git a/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag b/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag
index 1f92b341a80..e926bc5f3e8 100644
--- a/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag
+++ b/reference/shaders-msl/asm/frag/extract-packed-from-composite.asm.frag
@@ -28,6 +28,7 @@ struct main0_out
     float4 _entryPointOutput [[color(0)]];
 };
 
+static inline __attribute__((always_inline))
 float4 _main(thread const float4& pos, constant buf& v_11)
 {
     int _46 = int(pos.x) % 16;
diff --git a/reference/shaders-msl/asm/frag/function-overload-alias.asm.frag b/reference/shaders-msl/asm/frag/function-overload-alias.asm.frag
index 1a6314c8199..40fe5c5d812 100644
--- a/reference/shaders-msl/asm/frag/function-overload-alias.asm.frag
+++ b/reference/shaders-msl/asm/frag/function-overload-alias.asm.frag
@@ -10,21 +10,25 @@ struct main0_out
     float4 FragColor [[color(0)]];
 };
 
+static inline __attribute__((always_inline))
 float4 foo(thread const float4& foo_1)
 {
     return foo_1 + float4(1.0);
 }
 
+static inline __attribute__((always_inline))
 float4 foo(thread const float3& foo_1)
 {
     return foo_1.xyzz + float4(1.0);
 }
 
+static inline __attribute__((always_inline))
 float4 foo_1(thread const float4& foo_2)
 {
     return foo_2 + float4(2.0);
 }
 
+static inline __attribute__((always_inline))
 float4 foo(thread const float2& foo_2)
 {
     return foo_2.xyxy + float4(2.0);
diff --git a/reference/shaders-msl/asm/frag/inf-nan-constant.asm.frag b/reference/shaders-msl/asm/frag/inf-nan-constant.asm.frag
index 8537dac19a1..067719896b8 100644
--- a/reference/shaders-msl/asm/frag/inf-nan-constant.asm.frag
+++ b/reference/shaders-msl/asm/frag/inf-nan-constant.asm.frag
@@ -11,7 +11,7 @@ struct main0_out
 fragment main0_out main0()
 {
     main0_out out = {};
-    out.FragColor = float3(as_type<float>(0x7f800000u), as_type<float>(0xff800000u), as_type<float>(0x7fc00000u));
+    out.FragColor = float3(as_type<float>(0x7f800000u /* inf */), as_type<float>(0xff800000u /* -inf */), as_type<float>(0x7fc00000u /* nan */));
     return out;
 }
 
diff --git a/reference/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag b/reference/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag
index 41472adac94..6407b32b2e8 100644
--- a/reference/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag
+++ b/reference/shaders-msl/asm/frag/interpolation-qualifiers-struct.asm.frag
@@ -21,26 +21,26 @@ struct main0_out
 
 struct main0_in
 {
-    float2 Input_v0 [[user(locn0)]];
-    float2 Input_v1 [[user(locn1), center_no_perspective]];
-    float3 Input_v2 [[user(locn2), centroid_perspective]];
-    float4 Input_v3 [[user(locn3), centroid_no_perspective]];
-    float Input_v4 [[user(locn4), sample_perspective]];
-    float Input_v5 [[user(locn5), sample_no_perspective]];
-    float Input_v6 [[user(locn6), flat]];
+    float2 inp_v0 [[user(locn0)]];
+    float2 inp_v1 [[user(locn1), center_no_perspective]];
+    float3 inp_v2 [[user(locn2), centroid_perspective]];
+    float4 inp_v3 [[user(locn3), centroid_no_perspective]];
+    float inp_v4 [[user(locn4), sample_perspective]];
+    float inp_v5 [[user(locn5), sample_no_perspective]];
+    float inp_v6 [[user(locn6), flat]];
 };
 
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
     Input inp = {};
-    inp.v0 = in.Input_v0;
-    inp.v1 = in.Input_v1;
-    inp.v2 = in.Input_v2;
-    inp.v3 = in.Input_v3;
-    inp.v4 = in.Input_v4;
-    inp.v5 = in.Input_v5;
-    inp.v6 = in.Input_v6;
+    inp.v0 = in.inp_v0;
+    inp.v1 = in.inp_v1;
+    inp.v2 = in.inp_v2;
+    inp.v3 = in.inp_v3;
+    inp.v4 = in.inp_v4;
+    inp.v5 = in.inp_v5;
+    inp.v6 = in.inp_v6;
     out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, ((inp.v3.w * inp.v4) + inp.v5) - inp.v6);
     return out;
 }
diff --git a/reference/shaders-msl/asm/frag/line-directive.line.asm.frag b/reference/shaders-msl/asm/frag/line-directive.line.asm.frag
index e1d2a9d7b9d..38fbd27ba3a 100644
--- a/reference/shaders-msl/asm/frag/line-directive.line.asm.frag
+++ b/reference/shaders-msl/asm/frag/line-directive.line.asm.frag
@@ -16,6 +16,7 @@ struct main0_in
 };
 
 #line 6 "test.frag"
+static inline __attribute__((always_inline))
 void func(thread float& FragColor, thread float& vColor)
 {
 #line 8 "test.frag"
diff --git a/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag b/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag
index 48f3317d2e2..41193536e69 100644
--- a/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag
+++ b/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag
@@ -1,13 +1,52 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
-constant float _46[16] = { 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 };
-constant float4 _76[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) };
-constant float4 _90[4] = { float4(20.0), float4(30.0), float4(50.0), float4(60.0) };
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 16> _46 = spvUnsafeArray<float, 16>({ 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 });
+constant spvUnsafeArray<float4, 4> _76 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) });
+constant spvUnsafeArray<float4, 4> _90 = spvUnsafeArray<float4, 4>({ float4(20.0), float4(30.0), float4(50.0), float4(60.0) });
 
 struct main0_out
 {
@@ -19,23 +58,10 @@ struct main0_in
     int index [[user(locn0)]];
 };
 
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
 fragment main0_out main0(main0_in in [[stage_in]])
 {
-    float4 foobar[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) };
-    float4 baz[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) };
+    spvUnsafeArray<float4, 4> foobar = spvUnsafeArray<float4, 4>({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) });
+    spvUnsafeArray<float4, 4> baz = spvUnsafeArray<float4, 4>({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) });
     main0_out out = {};
     out.FragColor = _46[in.index];
     if (in.index < 10)
@@ -59,7 +85,7 @@ fragment main0_out main0(main0_in in [[stage_in]])
         foobar[1].z = 20.0;
     }
     out.FragColor += foobar[in.index & 3].z;
-    spvArrayCopyFromConstant1(baz, _90);
+    baz = _90;
     out.FragColor += baz[in.index & 3].z;
     return out;
 }
diff --git a/reference/shaders-msl/asm/frag/op-constant-null.asm.frag b/reference/shaders-msl/asm/frag/op-constant-null.asm.frag
index 9d5d7fb1d69..f8104f81657 100644
--- a/reference/shaders-msl/asm/frag/op-constant-null.asm.frag
+++ b/reference/shaders-msl/asm/frag/op-constant-null.asm.frag
@@ -1,15 +1,56 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct D
 {
     float4 a;
     float b;
 };
 
-constant float4 _14[4] = { float4(0.0), float4(0.0), float4(0.0), float4(0.0) };
+constant spvUnsafeArray<float4, 4> _14 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(0.0), float4(0.0), float4(0.0) });
 
 struct main0_out
 {
diff --git a/reference/shaders-msl/asm/frag/pass-by-value.asm.frag b/reference/shaders-msl/asm/frag/pass-by-value.asm.frag
index 6ed945ecff0..46648f6d309 100644
--- a/reference/shaders-msl/asm/frag/pass-by-value.asm.frag
+++ b/reference/shaders-msl/asm/frag/pass-by-value.asm.frag
@@ -15,6 +15,7 @@ struct main0_out
     float FragColor [[color(0)]];
 };
 
+static inline __attribute__((always_inline))
 float add_value(float v, float w)
 {
     return v + w;
diff --git a/reference/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag b/reference/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag
new file mode 100644
index 00000000000..02ec4751bb1
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag
@@ -0,0 +1,187 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _13
+{
+    float4 x;
+    float4 y;
+    float4 z;
+    spvUnsafeArray<float4, 2> u;
+    spvUnsafeArray<float2, 2> v;
+    spvUnsafeArray<float, 3> w;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    interpolant<float4, interpolation::no_perspective> foo [[user(locn0)]];
+    interpolant<float3, interpolation::perspective> bar [[user(locn1)]];
+    interpolant<float2, interpolation::perspective> baz [[user(locn2)]];
+    int sid [[user(locn3)]];
+    interpolant<float2, interpolation::perspective> a_0 [[user(locn4)]];
+    interpolant<float2, interpolation::perspective> a_1 [[user(locn5)]];
+    interpolant<float2, interpolation::perspective> b_0 [[user(locn6)]];
+    interpolant<float2, interpolation::perspective> b_1 [[user(locn7)]];
+    interpolant<float2, interpolation::perspective> c_0 [[user(locn8)]];
+    interpolant<float2, interpolation::perspective> c_1 [[user(locn9)]];
+    interpolant<float4, interpolation::perspective> s_x [[user(locn10)]];
+    interpolant<float4, interpolation::no_perspective> s_y [[user(locn11)]];
+    interpolant<float4, interpolation::perspective> s_z [[user(locn12)]];
+    interpolant<float4, interpolation::perspective> s_u_0 [[user(locn13)]];
+    interpolant<float4, interpolation::perspective> s_u_1 [[user(locn14)]];
+    interpolant<float2, interpolation::no_perspective> s_v_0 [[user(locn15)]];
+    interpolant<float2, interpolation::no_perspective> s_v_1 [[user(locn16)]];
+    interpolant<float, interpolation::perspective> s_w_0 [[user(locn17)]];
+    interpolant<float, interpolation::perspective> s_w_1 [[user(locn18)]];
+    interpolant<float, interpolation::perspective> s_w_2 [[user(locn19)]];
+};
+
+static inline __attribute__((always_inline))
+void func(thread float4& FragColor, thread float2 baz, thread spvUnsafeArray<float2, 2>& a, thread _13& s, thread main0_in& in)
+{
+    float2 _237 = FragColor.xy + baz;
+    FragColor = float4(_237.x, _237.y, FragColor.z, FragColor.w);
+    FragColor.x += in.baz.interpolate_at_centroid().x;
+    FragColor.y += in.baz.interpolate_at_sample(3).y;
+    FragColor.z += in.baz.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375).y;
+    float2 _262 = FragColor.xy + in.a_1.interpolate_at_centroid();
+    FragColor = float4(_262.x, _262.y, FragColor.z, FragColor.w);
+    float2 _269 = FragColor.xy + in.a_0.interpolate_at_sample(2);
+    FragColor = float4(_269.x, _269.y, FragColor.z, FragColor.w);
+    float2 _276 = FragColor.xy + in.a_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375);
+    FragColor = float4(_276.x, _276.y, FragColor.z, FragColor.w);
+    FragColor += s.z;
+    float2 _288 = FragColor.xy + in.s_z.interpolate_at_centroid().yy;
+    FragColor = float4(_288.x, _288.y, FragColor.z, FragColor.w);
+    float2 _296 = FragColor.yz + in.s_z.interpolate_at_sample(3).xy;
+    FragColor = float4(FragColor.x, _296.x, _296.y, FragColor.w);
+    float2 _304 = FragColor.zw + in.s_z.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375).wx;
+    FragColor = float4(FragColor.x, FragColor.y, _304.x, _304.y);
+    FragColor += s.u[0];
+    FragColor += in.s_u_1.interpolate_at_centroid();
+    FragColor += in.s_u_0.interpolate_at_sample(2);
+    FragColor += in.s_u_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375);
+}
+
+fragment main0_out main0(main0_in in [[stage_in]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float2, 2> a = {};
+    _13 s = {};
+    spvUnsafeArray<float2, 2> b = {};
+    spvUnsafeArray<float2, 2> c = {};
+    a[0] = in.a_0.interpolate_at_center();
+    a[1] = in.a_1.interpolate_at_center();
+    s.x = in.s_x.interpolate_at_center();
+    s.y = in.s_y.interpolate_at_centroid();
+    s.z = in.s_z.interpolate_at_sample(gl_SampleID);
+    s.u[0] = in.s_u_0.interpolate_at_centroid();
+    s.u[1] = in.s_u_1.interpolate_at_centroid();
+    s.v[0] = in.s_v_0.interpolate_at_sample(gl_SampleID);
+    s.v[1] = in.s_v_1.interpolate_at_sample(gl_SampleID);
+    s.w[0] = in.s_w_0.interpolate_at_center();
+    s.w[1] = in.s_w_1.interpolate_at_center();
+    s.w[2] = in.s_w_2.interpolate_at_center();
+    b[0] = in.b_0.interpolate_at_centroid();
+    b[1] = in.b_1.interpolate_at_centroid();
+    c[0] = in.c_0.interpolate_at_sample(gl_SampleID);
+    c[1] = in.c_1.interpolate_at_sample(gl_SampleID);
+    out.FragColor = in.foo.interpolate_at_center();
+    out.FragColor += in.foo.interpolate_at_centroid();
+    out.FragColor += in.foo.interpolate_at_sample(in.sid);
+    out.FragColor += in.foo.interpolate_at_offset(float2(0.100000001490116119384765625) + 0.4375);
+    float3 _65 = out.FragColor.xyz + in.bar.interpolate_at_centroid();
+    out.FragColor = float4(_65.x, _65.y, _65.z, out.FragColor.w);
+    float3 _71 = out.FragColor.xyz + in.bar.interpolate_at_centroid();
+    out.FragColor = float4(_71.x, _71.y, _71.z, out.FragColor.w);
+    float3 _78 = out.FragColor.xyz + in.bar.interpolate_at_sample(in.sid);
+    out.FragColor = float4(_78.x, _78.y, _78.z, out.FragColor.w);
+    float3 _84 = out.FragColor.xyz + in.bar.interpolate_at_offset(float2(-0.100000001490116119384765625) + 0.4375);
+    out.FragColor = float4(_84.x, _84.y, _84.z, out.FragColor.w);
+    float2 _91 = out.FragColor.xy + b[0];
+    out.FragColor = float4(_91.x, _91.y, out.FragColor.z, out.FragColor.w);
+    float2 _98 = out.FragColor.xy + in.b_1.interpolate_at_centroid();
+    out.FragColor = float4(_98.x, _98.y, out.FragColor.z, out.FragColor.w);
+    float2 _105 = out.FragColor.xy + in.b_0.interpolate_at_sample(2);
+    out.FragColor = float4(_105.x, _105.y, out.FragColor.z, out.FragColor.w);
+    float2 _112 = out.FragColor.xy + in.b_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375);
+    out.FragColor = float4(_112.x, _112.y, out.FragColor.z, out.FragColor.w);
+    float2 _119 = out.FragColor.xy + c[0];
+    out.FragColor = float4(_119.x, _119.y, out.FragColor.z, out.FragColor.w);
+    float2 _127 = out.FragColor.xy + in.c_1.interpolate_at_centroid().xy;
+    out.FragColor = float4(_127.x, _127.y, out.FragColor.z, out.FragColor.w);
+    float2 _135 = out.FragColor.xy + in.c_0.interpolate_at_sample(2).yx;
+    out.FragColor = float4(_135.x, _135.y, out.FragColor.z, out.FragColor.w);
+    float2 _143 = out.FragColor.xy + in.c_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375).xx;
+    out.FragColor = float4(_143.x, _143.y, out.FragColor.z, out.FragColor.w);
+    out.FragColor += s.x;
+    out.FragColor += in.s_x.interpolate_at_centroid();
+    out.FragColor += in.s_x.interpolate_at_sample(in.sid);
+    out.FragColor += in.s_x.interpolate_at_offset(float2(0.100000001490116119384765625) + 0.4375);
+    out.FragColor += s.y;
+    out.FragColor += in.s_y.interpolate_at_centroid();
+    out.FragColor += in.s_y.interpolate_at_sample(in.sid);
+    out.FragColor += in.s_y.interpolate_at_offset(float2(-0.100000001490116119384765625) + 0.4375);
+    float2 _184 = out.FragColor.xy + s.v[0];
+    out.FragColor = float4(_184.x, _184.y, out.FragColor.z, out.FragColor.w);
+    float2 _191 = out.FragColor.xy + in.s_v_1.interpolate_at_centroid();
+    out.FragColor = float4(_191.x, _191.y, out.FragColor.z, out.FragColor.w);
+    float2 _198 = out.FragColor.xy + in.s_v_0.interpolate_at_sample(2);
+    out.FragColor = float4(_198.x, _198.y, out.FragColor.z, out.FragColor.w);
+    float2 _205 = out.FragColor.xy + in.s_v_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375);
+    out.FragColor = float4(_205.x, _205.y, out.FragColor.z, out.FragColor.w);
+    out.FragColor.x += s.w[0];
+    out.FragColor.x += in.s_w_1.interpolate_at_centroid();
+    out.FragColor.x += in.s_w_0.interpolate_at_sample(2);
+    out.FragColor.x += in.s_w_1.interpolate_at_offset(float2(-0.100000001490116119384765625, 0.100000001490116119384765625) + 0.4375);
+    func(out.FragColor, in.baz.interpolate_at_sample(gl_SampleID), a, s, in);
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/single-function-private-lut.asm.frag b/reference/shaders-msl/asm/frag/single-function-private-lut.asm.frag
index 628d5c7c1b4..2ea037407f2 100644
--- a/reference/shaders-msl/asm/frag/single-function-private-lut.asm.frag
+++ b/reference/shaders-msl/asm/frag/single-function-private-lut.asm.frag
@@ -1,44 +1,70 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
-struct myType
-{
-    float data;
-};
-
-constant myType _21[5] = { myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 } };
-
-struct main0_out
+template<typename T, size_t Num>
+struct spvUnsafeArray
 {
-    float4 o_color [[color(0)]];
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
 };
 
 // Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
 template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
+inline Tx mod(Tx x, Ty y)
 {
     return x - y * floor(x / y);
 }
 
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
+struct myType
 {
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
+    float data;
+};
 
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
+struct main0_out
 {
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
+    float4 o_color [[color(0)]];
+};
 
 fragment main0_out main0(float4 gl_FragCoord [[position]])
 {
+    spvUnsafeArray<myType, 5> _21 = spvUnsafeArray<myType, 5>({ myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 }, myType{ 1.0 }, myType{ 0.0 } });
+    
     main0_out out = {};
     float2 uv = gl_FragCoord.xy;
     int index = int(mod(uv.x, 4.0));
diff --git a/reference/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag b/reference/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag
index 1bafc6953ba..d59013daaf8 100644
--- a/reference/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag
+++ b/reference/shaders-msl/asm/frag/storage-class-output-initializer.asm.frag
@@ -1,9 +1,50 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
-constant float4 _20[2] = { float4(1.0, 2.0, 3.0, 4.0), float4(10.0) };
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float4, 2> _20 = spvUnsafeArray<float4, 2>({ float4(1.0, 2.0, 3.0, 4.0), float4(10.0) });
 
 struct main0_out
 {
@@ -15,7 +56,7 @@ struct main0_out
 fragment main0_out main0()
 {
     main0_out out = {};
-    float4 FragColors[2] = { float4(1.0, 2.0, 3.0, 4.0), float4(10.0) };
+    spvUnsafeArray<float4, 2> FragColors = spvUnsafeArray<float4, 2>({ float4(1.0, 2.0, 3.0, 4.0), float4(10.0) });
     out.FragColor = float4(5.0);
     out.FragColors_0 = FragColors[0];
     out.FragColors_1 = FragColors[1];
diff --git a/reference/shaders-msl/asm/frag/switch-different-sizes.asm.frag b/reference/shaders-msl/asm/frag/switch-different-sizes.asm.frag
new file mode 100644
index 00000000000..1ee9eebf005
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/switch-different-sizes.asm.frag
@@ -0,0 +1,78 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+fragment void main0()
+{
+    int sw0 = 42;
+    int result = 0;
+    switch (sw0)
+    {
+        case -42:
+        {
+            result = 42;
+        }
+        case 420:
+        {
+            result = 420;
+        }
+        case -1234:
+        {
+            result = 420;
+            break;
+        }
+    }
+    char sw1 = char(10);
+    switch (sw1)
+    {
+        case -42:
+        {
+            result = 42;
+        }
+        case 42:
+        {
+            result = 420;
+        }
+        case -123:
+        {
+            result = 512;
+            break;
+        }
+    }
+    short sw2 = short(10);
+    switch (sw2)
+    {
+        case -42:
+        {
+            result = 42;
+        }
+        case 42:
+        {
+            result = 420;
+        }
+        case -1234:
+        {
+            result = 512;
+            break;
+        }
+    }
+    short sw3 = short(10);
+    switch (sw3)
+    {
+        case -42:
+        {
+            result = 42;
+        }
+        case 42:
+        {
+            result = 420;
+        }
+        case -1234:
+        {
+            result = 512;
+            break;
+        }
+    }
+}
+
diff --git a/reference/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag b/reference/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag
new file mode 100644
index 00000000000..5ec002e7806
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag
@@ -0,0 +1,27 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+fragment void main0()
+{
+    long sw = 42l;
+    int result = 0;
+    switch (sw)
+    {
+        case -42l:
+        {
+            result = 42;
+        }
+        case 420l:
+        {
+            result = 420;
+        }
+        case -34359738368l:
+        {
+            result = 420;
+            break;
+        }
+    }
+}
+
diff --git a/reference/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag b/reference/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag
new file mode 100644
index 00000000000..2bf44c207b8
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag
@@ -0,0 +1,27 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+fragment void main0()
+{
+    ulong sw = 42ul;
+    int result = 0;
+    switch (sw)
+    {
+        case 42ul:
+        {
+            result = 42;
+        }
+        case 420ul:
+        {
+            result = 420;
+        }
+        case 343597383680ul:
+        {
+            result = 420;
+            break;
+        }
+    }
+}
+
diff --git a/reference/shaders-msl/asm/frag/texture-atomics.asm.frag b/reference/shaders-msl/asm/frag/texture-atomics.asm.frag
new file mode 100644
index 00000000000..ab5be649849
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/texture-atomics.asm.frag
@@ -0,0 +1,121 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_StructuredBuffer_v4float
+{
+    spvUnsafeArray<float4, 1> _m0;
+};
+
+struct type_Globals
+{
+    uint2 ShadowTileListGroupSize;
+};
+
+constant float3 _70 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    uint in_var_TEXCOORD0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    uint2 _77 = uint2(gl_FragCoord.xy);
+    uint _78 = _77.y;
+    uint _83 = _77.x;
+    float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78));
+    float2 _93 = float2(_Globals.ShadowTileListGroupSize);
+    float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
+    float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
+    float3 _102 = float3(_100.x, _100.y, _70.z);
+    _102.z = 1.0;
+    uint _103 = in.in_var_TEXCOORD0 * 5u;
+    uint _107 = _103 + 1u;
+    if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102))
+    {
+        float _122 = _96.x;
+        float _123 = _96.y;
+        spvUnsafeArray<float3, 8> _73;
+        _73[0] = float3(_122, _123, -1000.0);
+        float _126 = _100.x;
+        _73[1] = float3(_126, _123, -1000.0);
+        float _129 = _100.y;
+        _73[2] = float3(_122, _129, -1000.0);
+        _73[3] = float3(_126, _129, -1000.0);
+        _73[4] = float3(_122, _123, 1.0);
+        _73[5] = float3(_126, _123, 1.0);
+        _73[6] = float3(_122, _129, 1.0);
+        _73[7] = float3(_126, _129, 1.0);
+        float3 _155;
+        float3 _158;
+        _155 = float3(-500000.0);
+        _158 = float3(500000.0);
+        for (int _160 = 0; _160 < 8; )
+        {
+            float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz));
+            float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
+            _155 = fast::max(_155, _170);
+            _158 = fast::min(_158, _170);
+            _160++;
+            continue;
+        }
+        if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0)))
+        {
+            uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
+        }
+    }
+    out.out_var_SV_Target0 = float4(0.0);
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/texture-atomics.asm.graphics-robust-access.frag b/reference/shaders-msl/asm/frag/texture-atomics.asm.graphics-robust-access.frag
new file mode 100644
index 00000000000..ab5be649849
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/texture-atomics.asm.graphics-robust-access.frag
@@ -0,0 +1,121 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_StructuredBuffer_v4float
+{
+    spvUnsafeArray<float4, 1> _m0;
+};
+
+struct type_Globals
+{
+    uint2 ShadowTileListGroupSize;
+};
+
+constant float3 _70 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    uint in_var_TEXCOORD0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(2)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    uint2 _77 = uint2(gl_FragCoord.xy);
+    uint _78 = _77.y;
+    uint _83 = _77.x;
+    float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78));
+    float2 _93 = float2(_Globals.ShadowTileListGroupSize);
+    float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
+    float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
+    float3 _102 = float3(_100.x, _100.y, _70.z);
+    _102.z = 1.0;
+    uint _103 = in.in_var_TEXCOORD0 * 5u;
+    uint _107 = _103 + 1u;
+    if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102))
+    {
+        float _122 = _96.x;
+        float _123 = _96.y;
+        spvUnsafeArray<float3, 8> _73;
+        _73[0] = float3(_122, _123, -1000.0);
+        float _126 = _100.x;
+        _73[1] = float3(_126, _123, -1000.0);
+        float _129 = _100.y;
+        _73[2] = float3(_122, _129, -1000.0);
+        _73[3] = float3(_126, _129, -1000.0);
+        _73[4] = float3(_122, _123, 1.0);
+        _73[5] = float3(_126, _123, 1.0);
+        _73[6] = float3(_122, _129, 1.0);
+        _73[7] = float3(_126, _129, 1.0);
+        float3 _155;
+        float3 _158;
+        _155 = float3(-500000.0);
+        _158 = float3(500000.0);
+        for (int _160 = 0; _160 < 8; )
+        {
+            float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz));
+            float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
+            _155 = fast::max(_155, _170);
+            _158 = fast::min(_158, _170);
+            _160++;
+            continue;
+        }
+        if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0)))
+        {
+            uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
+        }
+    }
+    out.out_var_SV_Target0 = float4(0.0);
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/unknown-depth-state.asm.frag b/reference/shaders-msl/asm/frag/unknown-depth-state.asm.frag
index ce2b95196e3..e512bdca497 100644
--- a/reference/shaders-msl/asm/frag/unknown-depth-state.asm.frag
+++ b/reference/shaders-msl/asm/frag/unknown-depth-state.asm.frag
@@ -15,12 +15,14 @@ struct main0_in
     float3 vUV [[user(locn0)]];
 };
 
-float sample_combined(thread float3& vUV, thread depth2d<float> uShadow, thread const sampler uShadowSmplr)
+static inline __attribute__((always_inline))
+float sample_combined(thread float3& vUV, depth2d<float> uShadow, sampler uShadowSmplr)
 {
     return uShadow.sample_compare(uShadowSmplr, vUV.xy, vUV.z);
 }
 
-float sample_separate(thread float3& vUV, thread depth2d<float> uTexture, thread sampler uSampler)
+static inline __attribute__((always_inline))
+float sample_separate(thread float3& vUV, depth2d<float> uTexture, sampler uSampler)
 {
     return uTexture.sample_compare(uSampler, vUV.xy, vUV.z);
 }
diff --git a/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag b/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag
index 8df57c55b0d..624408c4d6f 100644
--- a/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag
+++ b/reference/shaders-msl/asm/frag/unord-relational-op.asm.frag
@@ -29,25 +29,26 @@ fragment main0_out main0(main0_in in [[stage_in]])
     float t0 = a;
     float t1 = b;
     bool c1 = (isunordered(a, b) || a == b);
-    bool c2 = (isunordered(a, b) || a != b);
+    c1 = a != b;
+    bool c2 = a != b;
     bool c3 = (isunordered(a, b) || a < b);
     bool c4 = (isunordered(a, b) || a > b);
     bool c5 = (isunordered(a, b) || a <= b);
     bool c6 = (isunordered(a, b) || a >= b);
     bool2 c7 = (isunordered(in.c, in.d) || in.c == in.d);
-    bool2 c8 = (isunordered(in.c, in.d) || in.c != in.d);
+    bool2 c8 = in.c != in.d;
     bool2 c9 = (isunordered(in.c, in.d) || in.c < in.d);
     bool2 c10 = (isunordered(in.c, in.d) || in.c > in.d);
     bool2 c11 = (isunordered(in.c, in.d) || in.c <= in.d);
     bool2 c12 = (isunordered(in.c, in.d) || in.c >= in.d);
     bool3 c13 = (isunordered(in.e, in.f) || in.e == in.f);
-    bool3 c14 = (isunordered(in.e, in.f) || in.e != in.f);
+    bool3 c14 = in.e != in.f;
     bool3 c15 = (isunordered(in.e, in.f) || in.e < in.f);
     bool3 c16 = (isunordered(in.e, in.f) || in.e > in.f);
     bool3 c17 = (isunordered(in.e, in.f) || in.e <= in.f);
     bool3 c18 = (isunordered(in.e, in.f) || in.e >= in.f);
     bool4 c19 = (isunordered(in.g, in.h) || in.g == in.h);
-    bool4 c20 = (isunordered(in.g, in.h) || in.g != in.h);
+    bool4 c20 = in.g != in.h;
     bool4 c21 = (isunordered(in.g, in.h) || in.g < in.h);
     bool4 c22 = (isunordered(in.g, in.h) || in.g > in.h);
     bool4 c23 = (isunordered(in.g, in.h) || in.g <= in.h);
diff --git a/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
new file mode 100644
index 00000000000..4848280635b
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
@@ -0,0 +1,59 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+constant float a_tmp [[function_constant(1)]];
+constant float a = is_function_constant_defined(a_tmp) ? a_tmp : 1.0;
+constant float b_tmp [[function_constant(2)]];
+constant float b = is_function_constant_defined(b_tmp) ? b_tmp : 2.0;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 c [[user(locn2)]];
+    float2 d [[user(locn3)]];
+    float3 e [[user(locn4)]];
+    float3 f [[user(locn5)]];
+    float4 g [[user(locn6)]];
+    float4 h [[user(locn7)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float t0 = a;
+    float t1 = b;
+    bool c1 = a == b;
+    c1 = a != b;
+    bool c2 = a != b;
+    bool c3 = a < b;
+    bool c4 = a > b;
+    bool c5 = a <= b;
+    bool c6 = a >= b;
+    bool2 c7 = in.c == in.d;
+    bool2 c8 = in.c != in.d;
+    bool2 c9 = in.c < in.d;
+    bool2 c10 = in.c > in.d;
+    bool2 c11 = in.c <= in.d;
+    bool2 c12 = in.c >= in.d;
+    bool3 c13 = in.e == in.f;
+    bool3 c14 = in.e != in.f;
+    bool3 c15 = in.e < in.f;
+    bool3 c16 = in.e > in.f;
+    bool3 c17 = in.e <= in.f;
+    bool3 c18 = in.e >= in.f;
+    bool4 c19 = in.g == in.h;
+    bool4 c20 = in.g != in.h;
+    bool4 c21 = in.g < in.h;
+    bool4 c22 = in.g > in.h;
+    bool4 c23 = in.g <= in.h;
+    bool4 c24 = in.g >= in.h;
+    out.FragColor = float4(t0 + t1);
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag b/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
index e55d9aeada7..9e73be477a9 100644
--- a/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
+++ b/reference/shaders-msl/asm/frag/vector-shuffle-oom.asm.frag
@@ -128,7 +128,7 @@ struct main0_out
 fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buffer(1)]], constant _18& _19 [[buffer(2)]], texture2d<float> _8 [[texture(0)]], texture2d<float> _12 [[texture(1)]], texture2d<float> _14 [[texture(2)]], sampler _9 [[sampler(0)]], sampler _13 [[sampler(1)]], sampler _15 [[sampler(2)]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
-    _28 _77 = _74;
+    _28 _77;
     _77._m0 = float4(0.0);
     float2 _82 = gl_FragCoord.xy * _19._m23.xy;
     float4 _88 = _7._m2 * _7._m0.xyxy;
@@ -144,11 +144,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _129 = _109;
     }
-    float3 _130 = _129 * 0.5;
-    float3 _133 = float4(0.0).xyz + _130;
+    float3 _133 = float4(0.0).xyz + (_129 * 0.5);
     float4 _134 = float4(_133.x, _133.y, _133.z, float4(0.0).w);
-    _28 _135 = _77;
-    _135._m0 = _134;
+    _77._m0 = _134;
     float2 _144 = fast::clamp(_82 + (float3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     float3 _156 = float3(_11._m5) * fast::clamp(_8.sample(_9, _144, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _160 = _12.sample(_13, _144, level(0.0));
@@ -161,11 +159,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _176 = _156;
     }
-    float3 _177 = _176 * 0.5;
-    float3 _180 = _134.xyz + _177;
+    float3 _180 = _134.xyz + (_176 * 0.5);
     float4 _181 = float4(_180.x, _180.y, _180.z, _134.w);
-    _28 _182 = _135;
-    _182._m0 = _181;
+    _77._m0 = _181;
     float2 _191 = fast::clamp(_82 + (float3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
     float3 _203 = float3(_11._m5) * fast::clamp(_8.sample(_9, _191, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _207 = _12.sample(_13, _191, level(0.0));
@@ -178,11 +174,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _223 = _203;
     }
-    float3 _224 = _223 * 0.75;
-    float3 _227 = _181.xyz + _224;
+    float3 _227 = _181.xyz + (_223 * 0.75);
     float4 _228 = float4(_227.x, _227.y, _227.z, _181.w);
-    _28 _229 = _182;
-    _229._m0 = _228;
+    _77._m0 = _228;
     float2 _238 = fast::clamp(_82 + (float3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     float3 _250 = float3(_11._m5) * fast::clamp(_8.sample(_9, _238, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _254 = _12.sample(_13, _238, level(0.0));
@@ -195,11 +189,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _270 = _250;
     }
-    float3 _271 = _270 * 0.5;
-    float3 _274 = _228.xyz + _271;
+    float3 _274 = _228.xyz + (_270 * 0.5);
     float4 _275 = float4(_274.x, _274.y, _274.z, _228.w);
-    _28 _276 = _229;
-    _276._m0 = _275;
+    _77._m0 = _275;
     float2 _285 = fast::clamp(_82 + (float3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     float3 _297 = float3(_11._m5) * fast::clamp(_8.sample(_9, _285, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _301 = _12.sample(_13, _285, level(0.0));
@@ -212,11 +204,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _317 = _297;
     }
-    float3 _318 = _317 * 0.5;
-    float3 _321 = _275.xyz + _318;
+    float3 _321 = _275.xyz + (_317 * 0.5);
     float4 _322 = float4(_321.x, _321.y, _321.z, _275.w);
-    _28 _323 = _276;
-    _323._m0 = _322;
+    _77._m0 = _322;
     float2 _332 = fast::clamp(_82 + (float3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
     float3 _344 = float3(_11._m5) * fast::clamp(_8.sample(_9, _332, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _348 = _12.sample(_13, _332, level(0.0));
@@ -229,11 +219,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _364 = _344;
     }
-    float3 _365 = _364 * 0.75;
-    float3 _368 = _322.xyz + _365;
+    float3 _368 = _322.xyz + (_364 * 0.75);
     float4 _369 = float4(_368.x, _368.y, _368.z, _322.w);
-    _28 _370 = _323;
-    _370._m0 = _369;
+    _77._m0 = _369;
     float2 _379 = fast::clamp(_82 + (float3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw);
     float3 _391 = float3(_11._m5) * fast::clamp(_8.sample(_9, _379, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _395 = _12.sample(_13, _379, level(0.0));
@@ -246,11 +234,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _411 = _391;
     }
-    float3 _412 = _411 * 1.0;
-    float3 _415 = _369.xyz + _412;
+    float3 _415 = _369.xyz + (_411 * 1.0);
     float4 _416 = float4(_415.x, _415.y, _415.z, _369.w);
-    _28 _417 = _370;
-    _417._m0 = _416;
+    _77._m0 = _416;
     float2 _426 = fast::clamp(_82 + (float3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
     float3 _438 = float3(_11._m5) * fast::clamp(_8.sample(_9, _426, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _442 = _12.sample(_13, _426, level(0.0));
@@ -263,11 +249,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _458 = _438;
     }
-    float3 _459 = _458 * 0.75;
-    float3 _462 = _416.xyz + _459;
+    float3 _462 = _416.xyz + (_458 * 0.75);
     float4 _463 = float4(_462.x, _462.y, _462.z, _416.w);
-    _28 _464 = _417;
-    _464._m0 = _463;
+    _77._m0 = _463;
     float2 _473 = fast::clamp(_82 + (float3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     float3 _485 = float3(_11._m5) * fast::clamp(_8.sample(_9, _473, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _489 = _12.sample(_13, _473, level(0.0));
@@ -280,11 +264,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _505 = _485;
     }
-    float3 _506 = _505 * 0.5;
-    float3 _509 = _463.xyz + _506;
+    float3 _509 = _463.xyz + (_505 * 0.5);
     float4 _510 = float4(_509.x, _509.y, _509.z, _463.w);
-    _28 _511 = _464;
-    _511._m0 = _510;
+    _77._m0 = _510;
     float2 _520 = fast::clamp(_82 + (float3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     float3 _532 = float3(_11._m5) * fast::clamp(_8.sample(_9, _520, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _536 = _12.sample(_13, _520, level(0.0));
@@ -297,11 +279,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _552 = _532;
     }
-    float3 _553 = _552 * 0.5;
-    float3 _556 = _510.xyz + _553;
+    float3 _556 = _510.xyz + (_552 * 0.5);
     float4 _557 = float4(_556.x, _556.y, _556.z, _510.w);
-    _28 _558 = _511;
-    _558._m0 = _557;
+    _77._m0 = _557;
     float2 _567 = fast::clamp(_82 + (float3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
     float3 _579 = float3(_11._m5) * fast::clamp(_8.sample(_9, _567, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _583 = _12.sample(_13, _567, level(0.0));
@@ -314,11 +294,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _599 = _579;
     }
-    float3 _600 = _599 * 0.75;
-    float3 _603 = _557.xyz + _600;
+    float3 _603 = _557.xyz + (_599 * 0.75);
     float4 _604 = float4(_603.x, _603.y, _603.z, _557.w);
-    _28 _605 = _558;
-    _605._m0 = _604;
+    _77._m0 = _604;
     float2 _614 = fast::clamp(_82 + (float3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     float3 _626 = float3(_11._m5) * fast::clamp(_8.sample(_9, _614, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _630 = _12.sample(_13, _614, level(0.0));
@@ -331,11 +309,9 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     {
         _646 = _626;
     }
-    float3 _647 = _646 * 0.5;
-    float3 _650 = _604.xyz + _647;
+    float3 _650 = _604.xyz + (_646 * 0.5);
     float4 _651 = float4(_650.x, _650.y, _650.z, _604.w);
-    _28 _652 = _605;
-    _652._m0 = _651;
+    _77._m0 = _651;
     float2 _661 = fast::clamp(_82 + (float3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     float3 _673 = float3(_11._m5) * fast::clamp(_8.sample(_9, _661, level(0.0)).w * _7._m1, 0.0, 1.0);
     float4 _677 = _12.sample(_13, _661, level(0.0));
@@ -350,14 +326,11 @@ fragment main0_out main0(constant _6& _7 [[buffer(0)]], constant _10& _11 [[buff
     }
     float3 _697 = _651.xyz + (_693 * 0.5);
     float4 _698 = float4(_697.x, _697.y, _697.z, _651.w);
-    _28 _699 = _652;
-    _699._m0 = _698;
+    _77._m0 = _698;
     float3 _702 = _698.xyz / float3(((((((((((((0.0 + 0.5) + 0.5) + 0.75) + 0.5) + 0.5) + 0.75) + 1.0) + 0.75) + 0.5) + 0.5) + 0.75) + 0.5) + 0.5);
-    _28 _704 = _699;
-    _704._m0 = float4(_702.x, _702.y, _702.z, _698.w);
-    _28 _705 = _704;
-    _705._m0.w = 1.0;
-    out.m_5 = _705._m0;
+    _77._m0 = float4(_702.x, _702.y, _702.z, _698.w);
+    _77._m0.w = 1.0;
+    out.m_5 = _77._m0;
     return out;
 }
 
diff --git a/reference/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc b/reference/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc
new file mode 100644
index 00000000000..79395a4bbb2
--- /dev/null
+++ b/reference/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct TessLevels
+{
+    float inner0;
+    float inner1;
+    float outer0;
+    float outer1;
+    float outer2;
+    float outer3;
+};
+
+kernel void main0(const device TessLevels& sb_levels [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(sb_levels.inner0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(sb_levels.outer0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(sb_levels.outer1);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(sb_levels.outer2);
+}
+
diff --git a/reference/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese b/reference/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese
index 83ef729321e..7fd48b4f7d0 100644
--- a/reference/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese
+++ b/reference/shaders-msl/asm/tese/unnamed-builtin-array.asm.tese
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float4 gl_Position [[position]];
@@ -10,14 +51,23 @@ struct main0_out
 
 struct main0_patchIn
 {
-    float2 gl_TessLevelInner [[attribute(0)]];
-    float4 gl_TessLevelOuter [[attribute(1)]];
+    float4 gl_TessLevelOuter [[attribute(0)]];
+    float2 gl_TessLevelInner [[attribute(1)]];
 };
 
-[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]])
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]])
 {
     main0_out out = {};
-    out.gl_Position = float4(((gl_TessCoord.x * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.x) + (((1.0 - gl_TessCoord.x) * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.z), ((gl_TessCoord.y * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.y) + (((1.0 - gl_TessCoord.y) * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.w), 0.0, 1.0);
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0];
+    gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1];
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2];
+    gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3];
+    float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0);
+    out.gl_Position = float4(((gl_TessCoord.x * gl_TessLevelInner[0]) * gl_TessLevelOuter[0]) + (((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), ((gl_TessCoord.y * gl_TessLevelInner[1]) * gl_TessLevelOuter[1]) + (((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]), 0.0, 1.0);
     return out;
 }
 
diff --git a/reference/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert b/reference/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert
new file mode 100644
index 00000000000..ae42798e313
--- /dev/null
+++ b/reference/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert
@@ -0,0 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct VSOut
+{
+    float4 pos;
+    float2 clip;
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [2];
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+};
+
+struct main0_in
+{
+    float4 pos [[attribute(0)]];
+};
+
+static inline __attribute__((always_inline))
+VSOut _main(thread const float4& pos)
+{
+    VSOut vout;
+    vout.pos = pos;
+    vout.clip = pos.xy;
+    return vout;
+}
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float4 pos = in.pos;
+    float4 param = pos;
+    VSOut flattenTemp = _main(param);
+    out.gl_Position = flattenTemp.pos;
+    out.gl_ClipDistance[0] = flattenTemp.clip.x;
+    out.gl_ClipDistance[1] = flattenTemp.clip.y;
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert b/reference/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert
new file mode 100644
index 00000000000..c1e59b9c146
--- /dev/null
+++ b/reference/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert
@@ -0,0 +1,45 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct VSOut
+{
+    float4 pos;
+    float2 clip;
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [2];
+};
+
+struct main0_in
+{
+    float4 pos [[attribute(0)]];
+};
+
+static inline __attribute__((always_inline))
+VSOut _main(thread const float4& pos)
+{
+    VSOut vout;
+    vout.pos = pos;
+    vout.clip = pos.xy;
+    return vout;
+}
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float4 pos = in.pos;
+    float4 param = pos;
+    VSOut flattenTemp = _main(param);
+    out.gl_Position = flattenTemp.pos;
+    out.gl_ClipDistance[0] = flattenTemp.clip.x;
+    out.gl_ClipDistance[1] = flattenTemp.clip.y;
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/vert/extract-transposed-matrix-from-struct.asm.vert b/reference/shaders-msl/asm/vert/extract-transposed-matrix-from-struct.asm.vert
index 667d363a389..b94687a52bc 100644
--- a/reference/shaders-msl/asm/vert/extract-transposed-matrix-from-struct.asm.vert
+++ b/reference/shaders-msl/asm/vert/extract-transposed-matrix-from-struct.asm.vert
@@ -39,6 +39,7 @@ struct main0_in
     float3 PosL [[attribute(0)]];
 };
 
+static inline __attribute__((always_inline))
 V2F _VS(thread const float3& PosL, thread const uint& instanceID, const device gInstanceData& gInstanceData_1)
 {
     InstanceData instData;
diff --git a/reference/shaders-msl/asm/vert/fake-builtin-input.asm.vert b/reference/shaders-msl/asm/vert/fake-builtin-input.asm.vert
index f9fcbc85c30..3079ae9bcbb 100644
--- a/reference/shaders-msl/asm/vert/fake-builtin-input.asm.vert
+++ b/reference/shaders-msl/asm/vert/fake-builtin-input.asm.vert
@@ -5,6 +5,7 @@ using namespace metal;
 
 struct main0_out
 {
+    half4 out_var_SV_Target [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
diff --git a/reference/shaders-msl/asm/vert/invariant.msl21.asm.vert b/reference/shaders-msl/asm/vert/invariant.msl21.asm.vert
index 1e3a86531c8..d74c43b3f05 100644
--- a/reference/shaders-msl/asm/vert/invariant.msl21.asm.vert
+++ b/reference/shaders-msl/asm/vert/invariant.msl21.asm.vert
@@ -10,6 +10,7 @@ struct main0_out
     float4 gl_Position [[position, invariant]];
 };
 
+static inline __attribute__((always_inline))
 float4 _main()
 {
     return float4(1.0);
diff --git a/reference/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert b/reference/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert
new file mode 100644
index 00000000000..1926ff9e14e
--- /dev/null
+++ b/reference/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Struct
+{
+    uint flags[1];
+};
+
+struct defaultUniformsVS
+{
+    Struct flags;
+    float4 uquad[4];
+    float4x4 umatrix;
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 a_position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _9 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    out.gl_Position = _9.umatrix * float4(_9.uquad[int(gl_VertexIndex)].x, _9.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w);
+    if (_9.flags.flags[0] != 0u)
+    {
+        out.gl_Position.z = 0.0;
+    }
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert b/reference/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert
new file mode 100644
index 00000000000..ee206385746
--- /dev/null
+++ b/reference/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Struct
+{
+    uint2 flags[1];
+};
+
+struct defaultUniformsVS
+{
+    Struct flags;
+    float4 uquad[4];
+    float4x4 umatrix;
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 a_position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _9 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    out.gl_Position = _9.umatrix * float4(_9.uquad[int(gl_VertexIndex)].x, _9.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w);
+    if (_9.flags.flags[0].x != 0u)
+    {
+        out.gl_Position.z = 0.0;
+    }
+    return out;
+}
+
diff --git a/reference/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert b/reference/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
index 05a69e48a03..074a47a652c 100644
--- a/reference/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
+++ b/reference/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
@@ -1,19 +1,34 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template <typename F> struct SpvHalfTypeSelector;
+template <> struct SpvHalfTypeSelector<float> { public: using H = half; };
+template<uint N> struct SpvHalfTypeSelector<vec<float, N>> { using H = vec<half, N>; };
+template<typename F, typename H = typename SpvHalfTypeSelector<F>::H>
+[[clang::optnone]] F spvQuantizeToF16(F fval)
+{
+    H hval = H(fval);
+    hval = select(copysign(H(0), hval), hval, isnormal(hval) || isinf(hval) || isnan(hval));
+    return F(hval);
+}
+
 constant int _7_tmp [[function_constant(201)]];
 constant int _7 = is_function_constant_defined(_7_tmp) ? _7_tmp : -10;
 constant int _20 = (_7 + 2);
 constant uint _8_tmp [[function_constant(202)]];
 constant uint _8 = is_function_constant_defined(_8_tmp) ? _8_tmp : 100u;
 constant uint _25 = (_8 % 5u);
-constant int4 _30 = int4(20, 30, _20, _20);
-constant int2 _32 = int2(_30.y, _30.x);
-constant int _33 = _30.y;
+constant int _30 = _7 - (-3) * (_7 / (-3));
+constant int4 _32 = int4(20, 30, _20, _30);
+constant int2 _34 = int2(_32.y, _32.x);
+constant int _35 = _32.y;
 constant float _9_tmp [[function_constant(200)]];
 constant float _9 = is_function_constant_defined(_9_tmp) ? _9_tmp : 3.141590118408203125;
+constant float _41 = spvQuantizeToF16(_9);
 
 struct main0_out
 {
@@ -27,11 +42,11 @@ vertex main0_out main0()
     float4 pos = float4(0.0);
     pos.y += float(_20);
     pos.z += float(_25);
-    pos += float4(_30);
-    float2 _56 = pos.xy + float2(_32);
-    pos = float4(_56.x, _56.y, pos.z, pos.w);
+    pos += float4(_32);
+    float2 _59 = pos.xy + float2(_34);
+    pos = float4(_59.x, _59.y, pos.z, pos.w);
     out.gl_Position = pos;
-    out.m_4 = _33;
+    out.m_4 = _35;
     return out;
 }
 
diff --git a/reference/shaders-msl/asm/vert/uint-vertex-id-instance-id.asm.vert b/reference/shaders-msl/asm/vert/uint-vertex-id-instance-id.asm.vert
index 89ca17f98b3..30df905e6ab 100644
--- a/reference/shaders-msl/asm/vert/uint-vertex-id-instance-id.asm.vert
+++ b/reference/shaders-msl/asm/vert/uint-vertex-id-instance-id.asm.vert
@@ -10,6 +10,7 @@ struct main0_out
     float4 gl_Position [[position]];
 };
 
+static inline __attribute__((always_inline))
 float4 _main(thread const uint& vid, thread const uint& iid)
 {
     return float4(float(vid + iid));
diff --git a/reference/shaders-msl/comp/access-private-workgroup-in-function.comp b/reference/shaders-msl/comp/access-private-workgroup-in-function.comp
index 17acda96780..85185e6df6a 100644
--- a/reference/shaders-msl/comp/access-private-workgroup-in-function.comp
+++ b/reference/shaders-msl/comp/access-private-workgroup-in-function.comp
@@ -5,11 +5,15 @@
 
 using namespace metal;
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
 void set_f(thread int& f)
 {
     f = 40;
 }
 
+static inline __attribute__((always_inline))
 void set_shared_u(threadgroup int& u)
 {
     u = 50;
diff --git a/reference/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp b/reference/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp
index f7757cd19f8..18cfd68c199 100644
--- a/reference/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp
+++ b/reference/shaders-msl/comp/argument-buffers-discrete.msl2.argument.discrete.comp
@@ -23,6 +23,8 @@ struct SSBO2
     float4 v;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 struct spvDescriptorSetBuffer0
 {
     const device SSBO0* ssbo0 [[id(0)]];
diff --git a/reference/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp b/reference/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp
new file mode 100644
index 00000000000..c23a9d1d006
--- /dev/null
+++ b/reference/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp
@@ -0,0 +1,11 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+kernel void main0(texture2d<float, access::write> uImage [[texture(0)]], texture2d<float> uImageRead [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    int2 coord = int2(gl_GlobalInvocationID.xy);
+    uImage.write(uImageRead.read(uint2(coord)), uint2(coord));
+}
+
diff --git a/reference/shaders-msl/comp/array-length.comp b/reference/shaders-msl/comp/array-length.comp
index 8406d1e50b0..9ac8c827e62 100644
--- a/reference/shaders-msl/comp/array-length.comp
+++ b/reference/shaders-msl/comp/array-length.comp
@@ -16,6 +16,9 @@ struct SSBO1
     float bz[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
 uint get_size(device SSBO& v_14, constant uint& v_14BufferSize, device SSBO1* (&ssbos)[2], constant uint* ssbosBufferSize)
 {
     return uint(int((v_14BufferSize - 16) / 16) + int((ssbosBufferSize[1] - 0) / 4));
diff --git a/reference/shaders-msl/comp/array-length.msl2.argument.discrete.comp b/reference/shaders-msl/comp/array-length.msl2.argument.discrete.comp
index 73bd3f6d7b3..aa1f4424d12 100644
--- a/reference/shaders-msl/comp/array-length.msl2.argument.discrete.comp
+++ b/reference/shaders-msl/comp/array-length.msl2.argument.discrete.comp
@@ -27,6 +27,8 @@ struct SSBO3
     float bz[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 struct spvDescriptorSetBuffer0
 {
     device SSBO* v_16 [[id(0)]];
@@ -39,6 +41,7 @@ struct spvDescriptorSetBuffer1
     constant uint* spvBufferSizeConstants [[id(2)]];
 };
 
+static inline __attribute__((always_inline))
 uint get_size(device SSBO& v_16, constant uint& v_16BufferSize, device SSBO1* constant (&ssbos)[2], constant uint* ssbosBufferSize, device SSBO2& v_38, constant uint& v_38BufferSize, device SSBO3* (&ssbos2)[2], constant uint* ssbos2BufferSize)
 {
     uint len = uint(int((v_16BufferSize - 16) / 16));
diff --git a/reference/shaders-msl/comp/atomic.comp b/reference/shaders-msl/comp/atomic.comp
index 43e6a8f0380..fca72bfcfe9 100644
--- a/reference/shaders-msl/comp/atomic.comp
+++ b/reference/shaders-msl/comp/atomic.comp
@@ -12,59 +12,61 @@ struct SSBO
     int i32;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& ssbo [[buffer(0)]])
 {
     threadgroup uint shared_u32;
     threadgroup int shared_i32;
-    uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _16 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _18 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _20 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _22 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _24 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _28 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
     uint _32;
     do
     {
         _32 = 10u;
-    } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
-    int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
+    int _36 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _38 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _40 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _42 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _44 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _46 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _48 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
     int _52;
     do
     {
         _52 = 10;
-    } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
+    } while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
     shared_u32 = 10u;
     shared_i32 = 10;
-    uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _57 = atomic_fetch_add_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _58 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _59 = atomic_fetch_xor_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _60 = atomic_fetch_and_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _61 = atomic_fetch_min_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _62 = atomic_fetch_max_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _63 = atomic_exchange_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
     uint _64;
     do
     {
         _64 = 10u;
-    } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
-    int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
+    int _65 = atomic_fetch_add_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _66 = atomic_fetch_or_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _67 = atomic_fetch_xor_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _68 = atomic_fetch_and_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _69 = atomic_fetch_min_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _70 = atomic_fetch_max_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _71 = atomic_exchange_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
     int _72;
     do
     {
         _72 = 10;
-    } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
+    } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
 }
 
diff --git a/reference/shaders-msl/comp/barriers.comp b/reference/shaders-msl/comp/barriers.comp
index 560fd8b53f6..82813906f68 100644
--- a/reference/shaders-msl/comp/barriers.comp
+++ b/reference/shaders-msl/comp/barriers.comp
@@ -7,56 +7,67 @@ using namespace metal;
 
 constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
 
+static inline __attribute__((always_inline))
 void barrier_shared()
 {
     threadgroup_barrier(mem_flags::mem_threadgroup);
 }
 
+static inline __attribute__((always_inline))
 void full_barrier()
 {
     threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
 }
 
+static inline __attribute__((always_inline))
 void image_barrier()
 {
     threadgroup_barrier(mem_flags::mem_texture);
 }
 
+static inline __attribute__((always_inline))
 void buffer_barrier()
 {
     threadgroup_barrier(mem_flags::mem_device);
 }
 
+static inline __attribute__((always_inline))
 void group_barrier()
 {
     threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
 }
 
+static inline __attribute__((always_inline))
 void barrier_shared_exec()
 {
     threadgroup_barrier(mem_flags::mem_threadgroup);
 }
 
+static inline __attribute__((always_inline))
 void full_barrier_exec()
 {
     threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
 }
 
+static inline __attribute__((always_inline))
 void image_barrier_exec()
 {
     threadgroup_barrier(mem_flags::mem_texture);
 }
 
+static inline __attribute__((always_inline))
 void buffer_barrier_exec()
 {
     threadgroup_barrier(mem_flags::mem_device);
 }
 
+static inline __attribute__((always_inline))
 void group_barrier_exec()
 {
     threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
 }
 
+static inline __attribute__((always_inline))
 void exec_barrier()
 {
     threadgroup_barrier(mem_flags::mem_threadgroup);
diff --git a/reference/shaders-msl/comp/basic.comp b/reference/shaders-msl/comp/basic.comp
index 6410894ba08..42518f0d1dd 100644
--- a/reference/shaders-msl/comp/basic.comp
+++ b/reference/shaders-msl/comp/basic.comp
@@ -21,13 +21,15 @@ struct SSBO3
     uint counter;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]], device SSBO3& _48 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
     float4 idata = _23.in_data[ident];
     if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
     {
-        uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
+        uint _52 = atomic_fetch_add_explicit((device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
         _45.out_data[_52] = idata;
     }
 }
diff --git a/reference/shaders-msl/comp/basic.dispatchbase.comp b/reference/shaders-msl/comp/basic.dispatchbase.comp
new file mode 100644
index 00000000000..92d517cffb0
--- /dev/null
+++ b/reference/shaders-msl/comp/basic.dispatchbase.comp
@@ -0,0 +1,41 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 in_data[1];
+};
+
+struct SSBO2
+{
+    float4 out_data[1];
+};
+
+struct SSBO3
+{
+    uint counter;
+};
+
+constant uint _59_tmp [[function_constant(10)]];
+constant uint _59 = is_function_constant_defined(_59_tmp) ? _59_tmp : 1u;
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(_59, 1u, 1u);
+
+kernel void main0(const device SSBO& _27 [[buffer(0)]], device SSBO2& _49 [[buffer(1)]], device SSBO3& _52 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 spvDispatchBase [[grid_origin]])
+{
+    gl_GlobalInvocationID += spvDispatchBase * gl_WorkGroupSize;
+    gl_WorkGroupID += spvDispatchBase;
+    uint ident = gl_GlobalInvocationID.x;
+    uint workgroup = gl_WorkGroupID.x;
+    float4 idata = _27.in_data[ident];
+    if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
+    {
+        uint _56 = atomic_fetch_add_explicit((device atomic_uint*)&_52.counter, 1u, memory_order_relaxed);
+        _49.out_data[_56] = idata;
+    }
+}
+
diff --git a/reference/shaders-msl/comp/basic.dispatchbase.msl11.comp b/reference/shaders-msl/comp/basic.dispatchbase.msl11.comp
new file mode 100644
index 00000000000..87b0b442911
--- /dev/null
+++ b/reference/shaders-msl/comp/basic.dispatchbase.msl11.comp
@@ -0,0 +1,39 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 in_data[1];
+};
+
+struct SSBO2
+{
+    float4 out_data[1];
+};
+
+struct SSBO3
+{
+    uint counter;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(constant uint3& spvDispatchBase [[buffer(29)]], const device SSBO& _27 [[buffer(0)]], device SSBO2& _49 [[buffer(1)]], device SSBO3& _52 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
+{
+    gl_GlobalInvocationID += spvDispatchBase * gl_WorkGroupSize;
+    gl_WorkGroupID += spvDispatchBase;
+    uint ident = gl_GlobalInvocationID.x;
+    uint workgroup = gl_WorkGroupID.x;
+    float4 idata = _27.in_data[ident];
+    if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
+    {
+        uint _56 = atomic_fetch_add_explicit((device atomic_uint*)&_52.counter, 1u, memory_order_relaxed);
+        _49.out_data[_56] = idata;
+    }
+}
+
diff --git a/reference/shaders-msl/comp/basic.inline-block.msl2.comp b/reference/shaders-msl/comp/basic.inline-block.msl2.comp
new file mode 100644
index 00000000000..337b1b73362
--- /dev/null
+++ b/reference/shaders-msl/comp/basic.inline-block.msl2.comp
@@ -0,0 +1,54 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef packed_float4 packed_float4x4[4];
+
+struct Baz
+{
+    int f;
+    int g;
+};
+
+struct X
+{
+    int x;
+    int y;
+    float z;
+};
+
+struct Foo
+{
+    int a;
+    int b;
+    packed_float4x4 c;
+    X x[2];
+};
+
+struct Bar
+{
+    int d;
+    int e;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u);
+
+struct spvDescriptorSetBuffer0
+{
+    constant Bar* m_38 [[id(0)]];
+    Foo m_32 [[id(1)]];
+};
+
+struct spvDescriptorSetBuffer1
+{
+    device Baz* baz [[id(0)]][3];
+};
+
+kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    uint3 coords = gl_GlobalInvocationID;
+    spvDescriptorSet1.baz[coords.x]->f = spvDescriptorSet0.m_32.a + (*spvDescriptorSet0.m_38).d;
+    spvDescriptorSet1.baz[coords.x]->g = spvDescriptorSet0.m_32.b * (*spvDescriptorSet0.m_38).e;
+}
+
diff --git a/reference/shaders-msl/comp/bitcast-16bit-2.invalid.comp b/reference/shaders-msl/comp/bitcast-16bit-2.invalid.comp
deleted file mode 100644
index 59eb961f123..00000000000
--- a/reference/shaders-msl/comp/bitcast-16bit-2.invalid.comp
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct SSBO1
-{
-    short4 outputs[1];
-};
-
-struct SSBO0
-{
-    int4 inputs[1];
-};
-
-struct UBO
-{
-    half4 const0;
-};
-
-kernel void main0(device SSBO1& _21 [[buffer(0)]], device SSBO0& _29 [[buffer(1)]], constant UBO& _40 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
-{
-    uint ident = gl_GlobalInvocationID.x;
-    short2 _47 = as_type<short2>(_29.inputs[ident].x) + as_type<short2>(_40.const0.xy);
-    _21.outputs[ident] = short4(_47.x, _47.y, _21.outputs[ident].z, _21.outputs[ident].w);
-    short2 _66 = short2(as_type<ushort2>(uint(_29.inputs[ident].y)) - as_type<ushort2>(_40.const0.zw));
-    _21.outputs[ident] = short4(_21.outputs[ident].x, _21.outputs[ident].y, _66.x, _66.y);
-}
-
diff --git a/reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp b/reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
new file mode 100644
index 00000000000..f79a8b52068
--- /dev/null
+++ b/reference/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
@@ -0,0 +1,26 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO;
+
+struct UBO
+{
+    uint2 b;
+};
+
+struct SSBO
+{
+    packed_float3 a1;
+    float a2;
+};
+
+kernel void main0(constant UBO& _10 [[buffer(0)]])
+{
+    (reinterpret_cast<device SSBO*>(as_type<ulong>(_10.b)))->a1 = float3(1.0, 2.0, 3.0);
+    uint2 v2 = as_type<uint2>(reinterpret_cast<ulong>(reinterpret_cast<device SSBO*>(as_type<ulong>(_10.b + uint2(32u)))));
+    float3 v3 = float3((reinterpret_cast<device SSBO*>(as_type<ulong>(v2)))->a1);
+    (reinterpret_cast<device SSBO*>(as_type<ulong>(v2)))->a1 = v3 + float3(1.0);
+}
+
diff --git a/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
new file mode 100644
index 00000000000..35b7af540ca
--- /dev/null
+++ b/reference/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
@@ -0,0 +1,67 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct t21;
+
+struct t24
+{
+    int4 m0[2];
+    int m1;
+    ulong2 m2[2];
+    device t21* m3;
+    float2x4 m4;
+};
+
+struct t21
+{
+    int4 m0[2];
+    int m1;
+    ulong2 m2[2];
+    device t21* m3;
+    float2x4 m4;
+};
+
+struct t35
+{
+    int m0[32];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(constant t24& u24 [[buffer(0)]], constant t35& u35 [[buffer(1)]], texture2d<uint, access::write> v295 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    int v8 = 0;
+    v8 |= (u24.m0[0].x - 0);
+    v8 |= (u24.m0[u35.m0[1]].x - 1);
+    v8 |= (u24.m1 - 2);
+    v8 |= int(u24.m4[0u][0] - 3.0);
+    v8 |= int(u24.m4[1u][0] - 5.0);
+    v8 |= int(u24.m4[0u][1] - 4.0);
+    v8 |= int(u24.m4[1u][1] - 6.0);
+    v8 |= (((device t21*)u24.m2[0].x)->m0[0].x - 3);
+    v8 |= (((device t21*)u24.m2[0].x)->m0[u35.m0[1]].x - 4);
+    v8 |= (((device t21*)u24.m2[0].x)->m1 - 5);
+    v8 |= int(((device t21*)u24.m2[0].x)->m4[0u][0] - 6.0);
+    v8 |= int(((device t21*)u24.m2[0].x)->m4[1u][0] - 8.0);
+    v8 |= int(((device t21*)u24.m2[0].x)->m4[0u][1] - 7.0);
+    v8 |= int(((device t21*)u24.m2[0].x)->m4[1u][1] - 9.0);
+    v8 |= (((device t21*)u24.m2[u35.m0[1]].x)->m0[0].x - 6);
+    v8 |= (((device t21*)u24.m2[u35.m0[1]].x)->m0[u35.m0[1]].x - 7);
+    v8 |= (((device t21*)u24.m2[u35.m0[1]].x)->m1 - 8);
+    v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][0] - 9.0);
+    v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][0] - 11.0);
+    v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[0u][1] - 10.0);
+    v8 |= int(((device t21*)u24.m2[u35.m0[1]].x)->m4[1u][1] - 12.0);
+    v8 |= (u24.m3->m0[0].x - 9);
+    v8 |= (u24.m3->m0[u35.m0[1]].x - 10);
+    v8 |= (u24.m3->m1 - 11);
+    v8 |= int(u24.m3->m4[0u][0] - 12.0);
+    v8 |= int(u24.m3->m4[1u][0] - 14.0);
+    v8 |= int(u24.m3->m4[0u][1] - 13.0);
+    v8 |= int(u24.m3->m4[1u][1] - 15.0);
+    uint4 v284 = select(uint4(1u, 0u, 0u, 1u), uint4(0u), bool4(v8 != 0));
+    v295.write(v284, uint2(int2(gl_GlobalInvocationID.xy)));
+}
+
diff --git a/reference/shaders-msl/comp/buffer_device_address.msl2.comp b/reference/shaders-msl/comp/buffer_device_address.msl2.comp
new file mode 100644
index 00000000000..f0f5ea9a650
--- /dev/null
+++ b/reference/shaders-msl/comp/buffer_device_address.msl2.comp
@@ -0,0 +1,43 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Position;
+struct PositionReferences;
+
+struct Position
+{
+    float2 positions[1];
+};
+
+struct Registers
+{
+    device PositionReferences* references;
+    float fract_time;
+};
+
+struct PositionReferences
+{
+    device Position* buffers[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u);
+
+kernel void main0(constant Registers& registers [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
+{
+    uint2 local_offset = gl_GlobalInvocationID.xy;
+    uint local_index = ((local_offset.y * 8u) * gl_NumWorkGroups.x) + local_offset.x;
+    uint slice = gl_WorkGroupID.z;
+    device Position* __restrict positions = registers.references->buffers[slice];
+    float offset = 6.283125400543212890625 * fract(registers.fract_time + (float(slice) * 0.100000001490116119384765625));
+    float2 pos = float2(local_offset);
+    pos.x += (0.20000000298023223876953125 * sin((2.2000000476837158203125 * pos.x) + offset));
+    pos.y += (0.20000000298023223876953125 * sin((2.25 * pos.y) + (2.0 * offset)));
+    pos.x += (0.20000000298023223876953125 * cos((1.7999999523162841796875 * pos.y) + (3.0 * offset)));
+    pos.y += (0.20000000298023223876953125 * cos((2.849999904632568359375 * pos.x) + (4.0 * offset)));
+    pos.x += (0.5 * sin(offset));
+    pos.y += (0.5 * sin(offset + 0.300000011920928955078125));
+    positions->positions[local_index] = (pos / ((float2(8.0) * float2(gl_NumWorkGroups.xy)) - float2(1.0))) - float2(0.5);
+}
+
diff --git a/reference/shaders-msl/comp/cfg-preserve-parameter.comp b/reference/shaders-msl/comp/cfg-preserve-parameter.comp
index d65beee5d2d..ce1bef3f8e6 100644
--- a/reference/shaders-msl/comp/cfg-preserve-parameter.comp
+++ b/reference/shaders-msl/comp/cfg-preserve-parameter.comp
@@ -5,6 +5,7 @@
 
 using namespace metal;
 
+static inline __attribute__((always_inline))
 void out_test_0(thread const int& cond, thread int& i)
 {
     if (cond == 0)
@@ -17,6 +18,7 @@ void out_test_0(thread const int& cond, thread int& i)
     }
 }
 
+static inline __attribute__((always_inline))
 void out_test_1(thread const int& cond, thread int& i)
 {
     switch (cond)
@@ -34,6 +36,7 @@ void out_test_1(thread const int& cond, thread int& i)
     }
 }
 
+static inline __attribute__((always_inline))
 void inout_test_0(thread const int& cond, thread int& i)
 {
     if (cond == 0)
@@ -42,6 +45,7 @@ void inout_test_0(thread const int& cond, thread int& i)
     }
 }
 
+static inline __attribute__((always_inline))
 void inout_test_1(thread const int& cond, thread int& i)
 {
     switch (cond)
diff --git a/reference/shaders-msl/comp/coherent-block.comp b/reference/shaders-msl/comp/coherent-block.comp
index bec9b218c7b..58bbacb7f0c 100644
--- a/reference/shaders-msl/comp/coherent-block.comp
+++ b/reference/shaders-msl/comp/coherent-block.comp
@@ -8,7 +8,9 @@ struct SSBO
     float4 value;
 };
 
-kernel void main0(device SSBO& _10 [[buffer(0)]])
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(volatile device SSBO& _10 [[buffer(0)]])
 {
     _10.value = float4(20.0);
 }
diff --git a/reference/shaders-msl/comp/coherent-image.comp b/reference/shaders-msl/comp/coherent-image.comp
index 0fe044fb9ae..5090484464d 100644
--- a/reference/shaders-msl/comp/coherent-image.comp
+++ b/reference/shaders-msl/comp/coherent-image.comp
@@ -8,7 +8,9 @@ struct SSBO
     int4 value;
 };
 
-kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
 {
     _10.value = uImage.read(uint2(int2(10)));
 }
diff --git a/reference/shaders-msl/comp/complex-composite-constant-array.comp b/reference/shaders-msl/comp/complex-composite-constant-array.comp
new file mode 100644
index 00000000000..2f5549f5fb2
--- /dev/null
+++ b/reference/shaders-msl/comp/complex-composite-constant-array.comp
@@ -0,0 +1,65 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct SSBO
+{
+    float4x4 a;
+    uint index;
+};
+
+constant spvUnsafeArray<float4x4, 2> _32 = spvUnsafeArray<float4x4, 2>({ float4x4(float4(1.0, 0.0, 0.0, 0.0), float4(0.0, 1.0, 0.0, 0.0), float4(0.0, 0.0, 1.0, 0.0), float4(0.0, 0.0, 0.0, 1.0)), float4x4(float4(2.0, 0.0, 0.0, 0.0), float4(0.0, 2.0, 0.0, 0.0), float4(0.0, 0.0, 2.0, 0.0), float4(0.0, 0.0, 0.0, 2.0)) });
+
+static inline __attribute__((always_inline))
+void write_global(device SSBO& v_14)
+{
+    v_14.a = _32[v_14.index];
+}
+
+kernel void main0(device SSBO& v_14 [[buffer(0)]])
+{
+    write_global(v_14);
+}
+
diff --git a/reference/shaders-msl/comp/complex-type-alias.comp b/reference/shaders-msl/comp/complex-type-alias.comp
index 9101f89b743..fc0d57500b7 100644
--- a/reference/shaders-msl/comp/complex-type-alias.comp
+++ b/reference/shaders-msl/comp/complex-type-alias.comp
@@ -44,6 +44,7 @@ struct SSBO
 
 constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u);
 
+static inline __attribute__((always_inline))
 void Zero(thread Foo0& v)
 {
     v.a = 0.0;
diff --git a/reference/shaders-msl/comp/composite-array-initialization.comp b/reference/shaders-msl/comp/composite-array-initialization.comp
index ac10e750120..c6c17b1f392 100644
--- a/reference/shaders-msl/comp/composite-array-initialization.comp
+++ b/reference/shaders-msl/comp/composite-array-initialization.comp
@@ -1,10 +1,49 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct Data
 {
     float a;
@@ -27,21 +66,9 @@ struct SSBO
 
 constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u);
 
-constant Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } };
-
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
+constant spvUnsafeArray<Data, 2> _25 = spvUnsafeArray<Data, 2>({ Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } });
 
+static inline __attribute__((always_inline))
 Data combine(thread const Data& a, thread const Data& b)
 {
     return Data{ a.a + b.a, a.b + b.b };
@@ -49,10 +76,10 @@ Data combine(thread const Data& a, thread const Data& b)
 
 kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
 {
-    Data data[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } };
-    Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } };
-    Data data2[2];
-    spvArrayCopyFromStack1(data2, _31);
+    spvUnsafeArray<Data, 2> data = spvUnsafeArray<Data, 2>({ Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } });
+    spvUnsafeArray<Data, 2> _31 = spvUnsafeArray<Data, 2>({ Data{ X, 2.0 }, Data{ 3.0, 5.0 } });
+    spvUnsafeArray<Data, 2> data2;
+    data2 = _31;
     Data param = data[gl_LocalInvocationID.x];
     Data param_1 = data2[gl_LocalInvocationID.x];
     Data _73 = combine(param, param_1);
diff --git a/reference/shaders-msl/comp/composite-array-initialization.force-native-array.comp b/reference/shaders-msl/comp/composite-array-initialization.force-native-array.comp
new file mode 100644
index 00000000000..1d451a95928
--- /dev/null
+++ b/reference/shaders-msl/comp/composite-array-initialization.force-native-array.comp
@@ -0,0 +1,158 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+struct Data
+{
+    float a;
+    float b;
+};
+
+constant float X_tmp [[function_constant(0)]];
+constant float X = is_function_constant_defined(X_tmp) ? X_tmp : 4.0;
+
+struct Data_1
+{
+    float a;
+    float b;
+};
+
+struct SSBO
+{
+    Data_1 outdata[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u);
+
+constant Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } };
+
+static inline __attribute__((always_inline))
+Data combine(thread const Data& a, thread const Data& b)
+{
+    return Data{ a.a + b.a, a.b + b.b };
+}
+
+kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
+{
+    Data data[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } };
+    Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } };
+    Data data2[2];
+    spvArrayCopyFromStackToStack1(data2, _31);
+    Data param = data[gl_LocalInvocationID.x];
+    Data param_1 = data2[gl_LocalInvocationID.x];
+    Data _73 = combine(param, param_1);
+    _53.outdata[gl_WorkGroupID.x].a = _73.a;
+    _53.outdata[gl_WorkGroupID.x].b = _73.b;
+}
+
diff --git a/reference/shaders-msl/comp/composite-construct.comp b/reference/shaders-msl/comp/composite-construct.comp
index 4b5ea37e98f..aada82fc92c 100644
--- a/reference/shaders-msl/comp/composite-construct.comp
+++ b/reference/shaders-msl/comp/composite-construct.comp
@@ -1,10 +1,49 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct SSBO0
 {
     float4 as[1];
@@ -21,26 +60,15 @@ struct Composite
     float4 b;
 };
 
-constant float4 _43[2] = { float4(20.0), float4(40.0) };
-
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
 
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
+constant spvUnsafeArray<float4, 2> _43 = spvUnsafeArray<float4, 2>({ float4(20.0), float4(40.0) });
 
 kernel void main0(device SSBO0& _16 [[buffer(0)]], device SSBO1& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
 {
-    float4 _37[2] = { _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] };
-    float4 values[2];
-    spvArrayCopyFromStack1(values, _37);
+    spvUnsafeArray<float4, 2> _37 = spvUnsafeArray<float4, 2>({ _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] });
+    spvUnsafeArray<float4, 2> values;
+    values = _37;
     Composite c = Composite{ values[0], _43[1] };
     _16.as[0] = values[gl_LocalInvocationIndex];
     _32.bs[1] = c.b;
diff --git a/reference/shaders-msl/comp/copy-array-of-arrays.comp b/reference/shaders-msl/comp/copy-array-of-arrays.comp
index 0fa6c6bfd31..21fb9b367c5 100644
--- a/reference/shaders-msl/comp/copy-array-of-arrays.comp
+++ b/reference/shaders-msl/comp/copy-array-of-arrays.comp
@@ -1,87 +1,79 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
-struct BUF
-{
-    int a;
-    float b;
-    float c;
-};
-
-constant float _16[2] = { 1.0, 2.0 };
-constant float _19[2] = { 3.0, 4.0 };
-constant float _20[2][2] = { { 1.0, 2.0 }, { 3.0, 4.0 } };
-constant float _21[2][2][2] = { { { 1.0, 2.0 }, { 3.0, 4.0 } }, { { 1.0, 2.0 }, { 3.0, 4.0 } } };
-
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
+template<typename T, size_t Num>
+struct spvUnsafeArray
 {
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint A, uint B>
-void spvArrayCopyFromStack2(thread T (&dst)[A][B], thread const T (&src)[A][B])
-{
-    for (uint i = 0; i < A; i++)
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
     {
-        spvArrayCopyFromStack1(dst[i], src[i]);
+        return elements[pos];
     }
-}
-
-template<typename T, uint A, uint B>
-void spvArrayCopyFromConstant2(thread T (&dst)[A][B], constant T (&src)[A][B])
-{
-    for (uint i = 0; i < A; i++)
+    constexpr const thread T& operator [] (size_t pos) const thread
     {
-        spvArrayCopyFromConstant1(dst[i], src[i]);
+        return elements[pos];
     }
-}
-
-template<typename T, uint A, uint B, uint C>
-void spvArrayCopyFromStack3(thread T (&dst)[A][B][C], thread const T (&src)[A][B][C])
-{
-    for (uint i = 0; i < A; i++)
+    
+    device T& operator [] (size_t pos) device
     {
-        spvArrayCopyFromStack2(dst[i], src[i]);
+        return elements[pos];
     }
-}
-
-template<typename T, uint A, uint B, uint C>
-void spvArrayCopyFromConstant3(thread T (&dst)[A][B][C], constant T (&src)[A][B][C])
-{
-    for (uint i = 0; i < A; i++)
+    constexpr const device T& operator [] (size_t pos) const device
     {
-        spvArrayCopyFromConstant2(dst[i], src[i]);
+        return elements[pos];
     }
-}
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct BUF
+{
+    int a;
+    float b;
+    float c;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+constant spvUnsafeArray<float, 2> _16 = spvUnsafeArray<float, 2>({ 1.0, 2.0 });
+constant spvUnsafeArray<float, 2> _19 = spvUnsafeArray<float, 2>({ 3.0, 4.0 });
+constant spvUnsafeArray<spvUnsafeArray<float, 2>, 2> _20 = spvUnsafeArray<spvUnsafeArray<float, 2>, 2>({ spvUnsafeArray<float, 2>({ 1.0, 2.0 }), spvUnsafeArray<float, 2>({ 3.0, 4.0 }) });
+constant spvUnsafeArray<spvUnsafeArray<spvUnsafeArray<float, 2>, 2>, 2> _21 = spvUnsafeArray<spvUnsafeArray<spvUnsafeArray<float, 2>, 2>, 2>({ spvUnsafeArray<spvUnsafeArray<float, 2>, 2>({ spvUnsafeArray<float, 2>({ 1.0, 2.0 }), spvUnsafeArray<float, 2>({ 3.0, 4.0 }) }), spvUnsafeArray<spvUnsafeArray<float, 2>, 2>({ spvUnsafeArray<float, 2>({ 1.0, 2.0 }), spvUnsafeArray<float, 2>({ 3.0, 4.0 }) }) });
 
 kernel void main0(device BUF& o [[buffer(0)]])
 {
-    float c[2][2][2];
-    spvArrayCopyFromConstant3(c, _21);
+    spvUnsafeArray<spvUnsafeArray<spvUnsafeArray<float, 2>, 2>, 2> c;
+    c = _21;
     o.a = int(c[1][1][1]);
-    float _43[2] = { o.b, o.c };
-    float _48[2] = { o.b, o.b };
-    float _49[2][2] = { { _43[0], _43[1] }, { _48[0], _48[1] } };
-    float _54[2] = { o.c, o.c };
-    float _59[2] = { o.c, o.b };
-    float _60[2][2] = { { _54[0], _54[1] }, { _59[0], _59[1] } };
-    float _61[2][2][2] = { { { _49[0][0], _49[0][1] }, { _49[1][0], _49[1][1] } }, { { _60[0][0], _60[0][1] }, { _60[1][0], _60[1][1] } } };
-    float d[2][2][2];
-    spvArrayCopyFromStack3(d, _61);
-    float e[2][2][2];
-    spvArrayCopyFromStack3(e, d);
+    spvUnsafeArray<float, 2> _43 = spvUnsafeArray<float, 2>({ o.b, o.c });
+    spvUnsafeArray<float, 2> _48 = spvUnsafeArray<float, 2>({ o.b, o.b });
+    spvUnsafeArray<spvUnsafeArray<float, 2>, 2> _49 = spvUnsafeArray<spvUnsafeArray<float, 2>, 2>({ _43, _48 });
+    spvUnsafeArray<float, 2> _54 = spvUnsafeArray<float, 2>({ o.c, o.c });
+    spvUnsafeArray<float, 2> _59 = spvUnsafeArray<float, 2>({ o.c, o.b });
+    spvUnsafeArray<spvUnsafeArray<float, 2>, 2> _60 = spvUnsafeArray<spvUnsafeArray<float, 2>, 2>({ _54, _59 });
+    spvUnsafeArray<spvUnsafeArray<spvUnsafeArray<float, 2>, 2>, 2> _61 = spvUnsafeArray<spvUnsafeArray<spvUnsafeArray<float, 2>, 2>, 2>({ _49, _60 });
+    spvUnsafeArray<spvUnsafeArray<spvUnsafeArray<float, 2>, 2>, 2> d;
+    d = _61;
+    spvUnsafeArray<spvUnsafeArray<spvUnsafeArray<float, 2>, 2>, 2> e;
+    e = d;
     o.b = e[1][0][1];
 }
 
diff --git a/reference/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp b/reference/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp
new file mode 100644
index 00000000000..0eacb269b2a
--- /dev/null
+++ b/reference/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp
@@ -0,0 +1,364 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromConstantToStack2(thread T (&dst)[A][B], constant T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromConstantToStack1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromConstantToThreadGroup2(threadgroup T (&dst)[A][B], constant T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromConstantToThreadGroup1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromStackToStack2(thread T (&dst)[A][B], thread const T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromStackToStack1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromStackToThreadGroup2(threadgroup T (&dst)[A][B], thread const T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromStackToThreadGroup1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromThreadGroupToStack2(thread T (&dst)[A][B], threadgroup const T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromThreadGroupToStack1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromThreadGroupToThreadGroup2(threadgroup T (&dst)[A][B], threadgroup const T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromThreadGroupToThreadGroup1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromDeviceToDevice2(device T (&dst)[A][B], device const T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromDeviceToDevice1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromConstantToDevice2(device T (&dst)[A][B], constant T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromConstantToDevice1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromStackToDevice2(device T (&dst)[A][B], thread const T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromStackToDevice1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromThreadGroupToDevice2(device T (&dst)[A][B], threadgroup const T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromThreadGroupToDevice1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromDeviceToStack2(thread T (&dst)[A][B], device const T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromDeviceToStack1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B>
+inline void spvArrayCopyFromDeviceToThreadGroup2(threadgroup T (&dst)[A][B], device const T (&src)[A][B])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromDeviceToThreadGroup1(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromConstantToStack3(thread T (&dst)[A][B][C], constant T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromConstantToStack2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromConstantToThreadGroup3(threadgroup T (&dst)[A][B][C], constant T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromConstantToThreadGroup2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromStackToStack3(thread T (&dst)[A][B][C], thread const T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromStackToStack2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromStackToThreadGroup3(threadgroup T (&dst)[A][B][C], thread const T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromStackToThreadGroup2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromThreadGroupToStack3(thread T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromThreadGroupToStack2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromThreadGroupToThreadGroup3(threadgroup T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromThreadGroupToThreadGroup2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromDeviceToDevice3(device T (&dst)[A][B][C], device const T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromDeviceToDevice2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromConstantToDevice3(device T (&dst)[A][B][C], constant T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromConstantToDevice2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromStackToDevice3(device T (&dst)[A][B][C], thread const T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromStackToDevice2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromThreadGroupToDevice3(device T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromThreadGroupToDevice2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromDeviceToStack3(thread T (&dst)[A][B][C], device const T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromDeviceToStack2(dst[i], src[i]);
+    }
+}
+
+template<typename T, uint A, uint B, uint C>
+inline void spvArrayCopyFromDeviceToThreadGroup3(threadgroup T (&dst)[A][B][C], device const T (&src)[A][B][C])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        spvArrayCopyFromDeviceToThreadGroup2(dst[i], src[i]);
+    }
+}
+
+struct BUF
+{
+    int a;
+    float b;
+    float c;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+constant float _16[2] = { 1.0, 2.0 };
+constant float _19[2] = { 3.0, 4.0 };
+constant float _20[2][2] = { { 1.0, 2.0 }, { 3.0, 4.0 } };
+constant float _21[2][2][2] = { { { 1.0, 2.0 }, { 3.0, 4.0 } }, { { 1.0, 2.0 }, { 3.0, 4.0 } } };
+
+kernel void main0(device BUF& o [[buffer(0)]])
+{
+    float c[2][2][2];
+    spvArrayCopyFromConstantToStack3(c, _21);
+    o.a = int(c[1][1][1]);
+    float _43[2] = { o.b, o.c };
+    float _48[2] = { o.b, o.b };
+    float _49[2][2] = { { _43[0], _43[1] }, { _48[0], _48[1] } };
+    float _54[2] = { o.c, o.c };
+    float _59[2] = { o.c, o.b };
+    float _60[2][2] = { { _54[0], _54[1] }, { _59[0], _59[1] } };
+    float _61[2][2][2] = { { { _49[0][0], _49[0][1] }, { _49[1][0], _49[1][1] } }, { { _60[0][0], _60[0][1] }, { _60[1][0], _60[1][1] } } };
+    float d[2][2][2];
+    spvArrayCopyFromStackToStack3(d, _61);
+    float e[2][2][2];
+    spvArrayCopyFromStackToStack3(e, d);
+    o.b = e[1][0][1];
+}
+
diff --git a/reference/shaders-msl/comp/culling.comp b/reference/shaders-msl/comp/culling.comp
index 32acf599e77..13578363b7a 100644
--- a/reference/shaders-msl/comp/culling.comp
+++ b/reference/shaders-msl/comp/culling.comp
@@ -29,7 +29,7 @@ kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buff
     float idata = _22.in_data[ident];
     if (idata > 12.0)
     {
-        uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
+        uint _45 = atomic_fetch_add_explicit((device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
         _38.out_data[_45] = idata;
     }
 }
diff --git a/reference/shaders-msl/comp/defer-parens.comp b/reference/shaders-msl/comp/defer-parens.comp
index 76dce777340..9a567fa6ebe 100644
--- a/reference/shaders-msl/comp/defer-parens.comp
+++ b/reference/shaders-msl/comp/defer-parens.comp
@@ -9,6 +9,8 @@ struct SSBO
     int index;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& _13 [[buffer(0)]])
 {
     float4 d = _13.data;
diff --git a/reference/shaders-msl/comp/dowhile.comp b/reference/shaders-msl/comp/dowhile.comp
index 3482fb355b4..2b4de9ebb73 100644
--- a/reference/shaders-msl/comp/dowhile.comp
+++ b/reference/shaders-msl/comp/dowhile.comp
@@ -14,6 +14,8 @@ struct SSBO2
     float4 out_data[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(const device SSBO& _28 [[buffer(0)]], device SSBO2& _52 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
diff --git a/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp b/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp
index f9608de34d7..856efeabaf0 100644
--- a/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp
+++ b/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp
@@ -5,17 +5,6 @@
 
 using namespace metal;
 
-enum class spvSwizzle : uint
-{
-    none = 0,
-    zero,
-    one,
-    red,
-    green,
-    blue,
-    alpha
-};
-
 template<typename T> struct spvRemoveReference { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
 template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
@@ -28,6 +17,17 @@ template<typename T> inline constexpr thread T&& spvForward(thread typename spvR
     return static_cast<thread T&&>(x);
 }
 
+enum class spvSwizzle : uint
+{
+    none = 0,
+    zero,
+    one,
+    red,
+    green,
+    blue,
+    alpha
+};
+
 template<typename T>
 inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
 {
@@ -65,66 +65,6 @@ inline T spvTextureSwizzle(T x, uint s)
     return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
 }
 
-// Wrapper function that swizzles texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
-{
-    if (sw)
-    {
-        switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))
-        {
-            case spvSwizzle::none:
-                break;
-            case spvSwizzle::zero:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-            case spvSwizzle::red:
-                return t.gather(s, spvForward<Ts>(params)..., component::x);
-            case spvSwizzle::green:
-                return t.gather(s, spvForward<Ts>(params)..., component::y);
-            case spvSwizzle::blue:
-                return t.gather(s, spvForward<Ts>(params)..., component::z);
-            case spvSwizzle::alpha:
-                return t.gather(s, spvForward<Ts>(params)..., component::w);
-        }
-    }
-    switch (c)
-    {
-        case component::x:
-            return t.gather(s, spvForward<Ts>(params)..., component::x);
-        case component::y:
-            return t.gather(s, spvForward<Ts>(params)..., component::y);
-        case component::z:
-            return t.gather(s, spvForward<Ts>(params)..., component::z);
-        case component::w:
-            return t.gather(s, spvForward<Ts>(params)..., component::w);
-    }
-}
-
-// Wrapper function that swizzles depth texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) 
-{
-    if (sw)
-    {
-        switch (spvSwizzle(sw & 0xFF))
-        {
-            case spvSwizzle::none:
-            case spvSwizzle::red:
-                break;
-            case spvSwizzle::zero:
-            case spvSwizzle::green:
-            case spvSwizzle::blue:
-            case spvSwizzle::alpha:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-        }
-    }
-    return t.gather_compare(s, spvForward<Ts>(params)...);
-}
-
 kernel void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture2d<float> foo [[texture(0)]], texture2d<float, access::write> bar [[texture(1)]], sampler fooSmplr [[sampler(0)]])
 {
     constant uint& fooSwzl = spvSwizzleConstants[0];
diff --git a/reference/shaders-msl/comp/functions.comp b/reference/shaders-msl/comp/functions.comp
index d8f6e55a467..b1072608787 100644
--- a/reference/shaders-msl/comp/functions.comp
+++ b/reference/shaders-msl/comp/functions.comp
@@ -5,6 +5,7 @@
 
 using namespace metal;
 
+static inline __attribute__((always_inline))
 void myfunc(threadgroup int (&foo)[1337])
 {
     foo[0] = 13;
diff --git a/reference/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp b/reference/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp
index 1b525c1f90e..f6b8845343a 100644
--- a/reference/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp
+++ b/reference/shaders-msl/comp/global-invocation-id-writable-ssbo-in-function.comp
@@ -5,19 +5,20 @@
 
 using namespace metal;
 
-struct myBlock
-{
-    int a;
-    float b[1];
-};
-
 // Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
 template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
+inline Tx mod(Tx x, Ty y)
 {
     return x - y * floor(x / y);
 }
 
+struct myBlock
+{
+    int a;
+    float b[1];
+};
+
+static inline __attribute__((always_inline))
 float getB(device myBlock& myStorage, thread uint3& gl_GlobalInvocationID)
 {
     return myStorage.b[gl_GlobalInvocationID.x];
diff --git a/reference/shaders-msl/comp/global-invocation-id.comp b/reference/shaders-msl/comp/global-invocation-id.comp
index fe0212ec3ff..333485a256a 100644
--- a/reference/shaders-msl/comp/global-invocation-id.comp
+++ b/reference/shaders-msl/comp/global-invocation-id.comp
@@ -5,19 +5,19 @@
 
 using namespace metal;
 
-struct myBlock
-{
-    int a;
-    float b[1];
-};
-
 // Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
 template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
+inline Tx mod(Tx x, Ty y)
 {
     return x - y * floor(x / y);
 }
 
+struct myBlock
+{
+    int a;
+    float b[1];
+};
+
 kernel void main0(device myBlock& myStorage [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     myStorage.a = (myStorage.a + 1) % 256;
diff --git a/reference/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp b/reference/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp
new file mode 100644
index 00000000000..24a6c04841b
--- /dev/null
+++ b/reference/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp
@@ -0,0 +1,38 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+// The required alignment of a linear texture of R32Uint format.
+constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
+constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
+// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
+#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() +  spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
+
+struct SSBO
+{
+    float4 outdata;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+struct spvDescriptorSetBuffer0
+{
+    texture2d<uint> uImage [[id(0)]];
+    device atomic_uint* uImage_atomic [[id(1)]];
+    device SSBO* m_31 [[id(2)]];
+    texture2d<float> uTexture [[id(3)]];
+    sampler uTextureSmplr [[id(4)]];
+};
+
+kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    uint _26 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.uImage_atomic[spvImage2DAtomicCoord(int2(gl_GlobalInvocationID.xy), spvDescriptorSet0.uImage)], 10u, memory_order_relaxed);
+    uint ret = _26;
+    (*spvDescriptorSet0.m_31).outdata = spvDescriptorSet0.uTexture.sample(spvDescriptorSet0.uTextureSmplr, float2(gl_GlobalInvocationID.xy), level(0.0)) + float4(float(ret));
+}
+
diff --git a/reference/shaders-msl/comp/image-atomic-automatic-bindings.comp b/reference/shaders-msl/comp/image-atomic-automatic-bindings.comp
new file mode 100644
index 00000000000..60d5421e56f
--- /dev/null
+++ b/reference/shaders-msl/comp/image-atomic-automatic-bindings.comp
@@ -0,0 +1,29 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+// The required alignment of a linear texture of R32Uint format.
+constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
+constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
+// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
+#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() +  spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
+
+struct SSBO
+{
+    float4 outdata;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device SSBO& _31 [[buffer(1)]], texture2d<uint> uImage [[texture(0)]], device atomic_uint* uImage_atomic [[buffer(0)]], texture2d<float> uTexture [[texture(1)]], sampler uTextureSmplr [[sampler(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    uint _26 = atomic_fetch_add_explicit((device atomic_uint*)&uImage_atomic[spvImage2DAtomicCoord(int2(gl_GlobalInvocationID.xy), uImage)], 10u, memory_order_relaxed);
+    uint ret = _26;
+    _31.outdata = uTexture.sample(uTextureSmplr, float2(gl_GlobalInvocationID.xy), level(0.0)) + float4(float(ret));
+}
+
diff --git a/reference/shaders-msl/comp/image-cube-array-load-store.comp b/reference/shaders-msl/comp/image-cube-array-load-store.comp
index ef67a326f59..c0b83c46c8c 100644
--- a/reference/shaders-msl/comp/image-cube-array-load-store.comp
+++ b/reference/shaders-msl/comp/image-cube-array-load-store.comp
@@ -3,6 +3,8 @@
 
 using namespace metal;
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(texturecube_array<float> uImageIn [[texture(0)]], texturecube_array<float, access::write> uImageOut [[texture(1)]])
 {
     int3 coord = int3(9, 7, 11);
diff --git a/reference/shaders-msl/comp/image.comp b/reference/shaders-msl/comp/image.comp
index f3bc1455db5..e7c9c763a34 100644
--- a/reference/shaders-msl/comp/image.comp
+++ b/reference/shaders-msl/comp/image.comp
@@ -3,6 +3,8 @@
 
 using namespace metal;
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(texture2d<float> uImageIn [[texture(0)]], texture2d<float, access::write> uImageOut [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     float4 v = uImageIn.read(uint2((int2(gl_GlobalInvocationID.xy) + int2(uImageIn.get_width(), uImageIn.get_height()))));
diff --git a/reference/shaders-msl/comp/insert.comp b/reference/shaders-msl/comp/insert.comp
index 0f56a651534..c4611ba2225 100644
--- a/reference/shaders-msl/comp/insert.comp
+++ b/reference/shaders-msl/comp/insert.comp
@@ -8,6 +8,8 @@ struct SSBO
     float4 out_data[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     float4 v;
@@ -16,6 +18,6 @@ kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_GlobalInvocationID [[
     v.z = 70.0;
     v.w = 90.0;
     _27.out_data[gl_GlobalInvocationID.x] = v;
-    _27.out_data[gl_GlobalInvocationID.x].y = 20.0;
+    ((device float*)&_27.out_data[gl_GlobalInvocationID.x])[1u] = 20.0;
 }
 
diff --git a/reference/shaders-msl/comp/int64.invalid.msl22.comp b/reference/shaders-msl/comp/int64.invalid.msl22.comp
deleted file mode 100644
index 6eb4a8a8d98..00000000000
--- a/reference/shaders-msl/comp/int64.invalid.msl22.comp
+++ /dev/null
@@ -1,63 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct M0
-{
-    long v;
-    long2 b[2];
-    ulong c;
-    ulong d[5];
-};
-
-struct SSBO0_Type
-{
-    long4 a;
-    M0 m0;
-};
-
-struct SSBO1_Type
-{
-    ulong4 b;
-    M0 m0;
-};
-
-struct SSBO2_Type
-{
-    long a[4];
-    long2 b[4];
-};
-
-struct SSBO3_Type
-{
-    long a[4];
-    long2 b[4];
-};
-
-struct SSBO
-{
-    int s32;
-    uint u32;
-};
-
-kernel void main0(device SSBO& _96 [[buffer(0)]])
-{
-    SSBO0_Type ssbo_0;
-    ssbo_0.a += long4(10l, 20l, 30l, 40l);
-    SSBO1_Type ssbo_1;
-    ssbo_1.b += ulong4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul);
-    ssbo_0.a += long4(20l);
-    ssbo_0.a = abs(ssbo_0.a + long4(ssbo_1.b));
-    ssbo_0.a += long4(1l);
-    ssbo_1.b += ulong4(long4(1l));
-    ssbo_0.a -= long4(1l);
-    ssbo_1.b -= ulong4(long4(1l));
-    SSBO2_Type ssbo_2;
-    ssbo_2.a[0] += 1l;
-    SSBO3_Type ssbo_3;
-    ssbo_3.a[0] += 2l;
-    _96.s32 = int(uint(((ulong(ssbo_0.a.x) + ssbo_1.b.y) + ulong(ssbo_2.a[1])) + ulong(ssbo_3.a[2])));
-    _96.u32 = uint(((ulong(ssbo_0.a.y) + ssbo_1.b.z) + ulong(ssbo_2.a[0])) + ulong(ssbo_3.a[1]));
-}
-
diff --git a/reference/shaders-msl/comp/inverse.comp b/reference/shaders-msl/comp/inverse.comp
index f2f499b91eb..0a1d298b0da 100644
--- a/reference/shaders-msl/comp/inverse.comp
+++ b/reference/shaders-msl/comp/inverse.comp
@@ -5,34 +5,23 @@
 
 using namespace metal;
 
-struct MatrixOut
-{
-    float2x2 m2out;
-    float3x3 m3out;
-    float4x4 m4out;
-};
-
-struct MatrixIn
-{
-    float2x2 m2in;
-    float3x3 m3in;
-    float4x4 m4in;
-};
-
 // Returns the determinant of a 2x2 matrix.
-inline float spvDet2x2(float a1, float a2, float b1, float b2)
+static inline __attribute__((always_inline))
+float spvDet2x2(float a1, float a2, float b1, float b2)
 {
     return a1 * b2 - b1 * a2;
 }
 
 // Returns the determinant of a 3x3 matrix.
-inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
+static inline __attribute__((always_inline))
+float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
 {
     return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3);
 }
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
+static inline __attribute__((always_inline))
 float4x4 spvInverse4x4(float4x4 m)
 {
     float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
@@ -68,6 +57,7 @@ float4x4 spvInverse4x4(float4x4 m)
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
+static inline __attribute__((always_inline))
 float3x3 spvInverse3x3(float3x3 m)
 {
     float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)
@@ -95,6 +85,7 @@ float3x3 spvInverse3x3(float3x3 m)
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
+static inline __attribute__((always_inline))
 float2x2 spvInverse2x2(float2x2 m)
 {
     float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)
@@ -114,6 +105,22 @@ float2x2 spvInverse2x2(float2x2 m)
     return (det != 0.0f) ? (adj * (1.0f / det)) : m;
 }
 
+struct MatrixOut
+{
+    float2x2 m2out;
+    float3x3 m3out;
+    float4x4 m4out;
+};
+
+struct MatrixIn
+{
+    float2x2 m2in;
+    float3x3 m3in;
+    float4x4 m4in;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device MatrixOut& _15 [[buffer(0)]], const device MatrixIn& _20 [[buffer(1)]])
 {
     _15.m2out = spvInverse2x2(_20.m2in);
diff --git a/reference/shaders-msl/comp/local-invocation-id.comp b/reference/shaders-msl/comp/local-invocation-id.comp
index 772e5e0d867..45059905881 100644
--- a/reference/shaders-msl/comp/local-invocation-id.comp
+++ b/reference/shaders-msl/comp/local-invocation-id.comp
@@ -5,19 +5,19 @@
 
 using namespace metal;
 
-struct myBlock
-{
-    int a;
-    float b[1];
-};
-
 // Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
 template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
+inline Tx mod(Tx x, Ty y)
 {
     return x - y * floor(x / y);
 }
 
+struct myBlock
+{
+    int a;
+    float b[1];
+};
+
 kernel void main0(device myBlock& myStorage [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
 {
     myStorage.a = (myStorage.a + 1) % 256;
diff --git a/reference/shaders-msl/comp/local-invocation-index.comp b/reference/shaders-msl/comp/local-invocation-index.comp
index 41adbdca5cf..67426dd3f6b 100644
--- a/reference/shaders-msl/comp/local-invocation-index.comp
+++ b/reference/shaders-msl/comp/local-invocation-index.comp
@@ -5,19 +5,19 @@
 
 using namespace metal;
 
-struct myBlock
-{
-    int a;
-    float b[1];
-};
-
 // Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
 template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
+inline Tx mod(Tx x, Ty y)
 {
     return x - y * floor(x / y);
 }
 
+struct myBlock
+{
+    int a;
+    float b[1];
+};
+
 kernel void main0(device myBlock& myStorage [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
 {
     myStorage.a = (myStorage.a + 1) % 256;
diff --git a/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp b/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp
new file mode 100644
index 00000000000..3de0ef44f0f
--- /dev/null
+++ b/reference/shaders-msl/comp/mat3-row-maj-read-write-const.comp
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct model_t
+{
+    float3x3 mtx_rm;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device model_t& model [[buffer(0)]])
+{
+    float3x3 mtx_cm = transpose(model.mtx_rm);
+    float3x3 mtx1 = mtx_cm * float3x3(float3(4.0, -3.0, 1.0), float3(-7.0, 7.0, -7.0), float3(-5.0, 6.0, -8.0));
+    if (mtx1[0].x != 0.0)
+    {
+        model.mtx_rm = transpose(float3x3(float3(-5.0, -3.0, -5.0), float3(-2.0, 2.0, -5.0), float3(6.0, 3.0, -8.0)));
+    }
+}
+
diff --git a/reference/shaders-msl/comp/mat3.comp b/reference/shaders-msl/comp/mat3.comp
index c2d9a7c8382..fcb8f7a60fc 100644
--- a/reference/shaders-msl/comp/mat3.comp
+++ b/reference/shaders-msl/comp/mat3.comp
@@ -8,6 +8,8 @@ struct SSBO2
     float3x3 out_data[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO2& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
diff --git a/reference/shaders-msl/comp/mod.comp b/reference/shaders-msl/comp/mod.comp
index e0d290259c6..9c43d7321bd 100644
--- a/reference/shaders-msl/comp/mod.comp
+++ b/reference/shaders-msl/comp/mod.comp
@@ -5,6 +5,13 @@
 
 using namespace metal;
 
+// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
+template<typename Tx, typename Ty>
+inline Tx mod(Tx x, Ty y)
+{
+    return x - y * floor(x / y);
+}
+
 struct SSBO
 {
     float4 in_data[1];
@@ -15,12 +22,7 @@ struct SSBO2
     float4 out_data[1];
 };
 
-// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
-template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
-{
-    return x - y * floor(x / y);
-}
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
 
 kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _33 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
diff --git a/reference/shaders-msl/comp/modf.comp b/reference/shaders-msl/comp/modf.comp
index ef50a021354..5a5ac3dbd75 100644
--- a/reference/shaders-msl/comp/modf.comp
+++ b/reference/shaders-msl/comp/modf.comp
@@ -13,6 +13,8 @@ struct SSBO2
     float4 out_data[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _35 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
diff --git a/reference/shaders-msl/comp/outer-product.comp b/reference/shaders-msl/comp/outer-product.comp
index 8e32db392ea..e589642dbda 100644
--- a/reference/shaders-msl/comp/outer-product.comp
+++ b/reference/shaders-msl/comp/outer-product.comp
@@ -23,6 +23,8 @@ struct ReadSSBO
     float4 v4;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]])
 {
     _21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y);
diff --git a/reference/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp b/reference/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp
new file mode 100644
index 00000000000..14723cbe80f
--- /dev/null
+++ b/reference/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp
@@ -0,0 +1,142 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO_A
+{
+    float data[1];
+};
+
+struct UBO_C
+{
+    float4 data[1024];
+};
+
+struct Registers
+{
+    float reg;
+};
+
+struct SSBO_B
+{
+    uint2 data[1];
+};
+
+struct UBO_D
+{
+    uint4 data[1024];
+};
+
+struct SSBO_BRO
+{
+    uint2 data[1];
+};
+
+struct SSBO_As
+{
+    float data[1];
+};
+
+struct UBO_Cs
+{
+    float4 data[1024];
+};
+
+struct SSBO_Bs
+{
+    uint2 data[1024];
+};
+
+struct UBO_Ds
+{
+    uint4 data[1024];
+};
+
+struct SSBO_BsRO
+{
+    uint2 data[1024];
+};
+
+struct SSBO_E
+{
+    float data[1];
+};
+
+struct UBO_G
+{
+    float4 data[1024];
+};
+
+struct SSBO_F
+{
+    uint2 data[1];
+};
+
+struct UBO_H
+{
+    uint4 data[1024];
+};
+
+struct SSBO_I
+{
+    uint2 data[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
+
+struct spvDescriptorSetBuffer0
+{
+    device SSBO_A* ssbo_a [[id(0)]];
+    constant UBO_C* ubo_c [[id(1)]];
+    device SSBO_As* ssbo_as [[id(2)]][4];
+    constant UBO_Cs* ubo_cs [[id(6)]][4];
+};
+
+static inline __attribute__((always_inline))
+void func0(device SSBO_A& ssbo_a, thread uint3& gl_GlobalInvocationID, constant UBO_C& ubo_c, thread uint3& gl_WorkGroupID, constant Registers& v_42, device SSBO_B& ssbo_b, constant UBO_D& ubo_d, const device SSBO_BRO& ssbo_b_readonly)
+{
+    ssbo_a.data[gl_GlobalInvocationID.x] = ubo_c.data[gl_WorkGroupID.x].x + v_42.reg;
+    ssbo_b.data[gl_GlobalInvocationID.x] = ubo_d.data[gl_WorkGroupID.y].xy + ssbo_b_readonly.data[gl_GlobalInvocationID.x];
+}
+
+static inline __attribute__((always_inline))
+void func1(thread uint3& gl_GlobalInvocationID, thread uint3& gl_WorkGroupID, device SSBO_As* const device (&ssbo_as)[4], constant UBO_Cs* const device (&ubo_cs)[4])
+{
+    ssbo_as[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = ubo_cs[gl_WorkGroupID.x]->data[0].x;
+}
+
+static inline __attribute__((always_inline))
+void func2(thread uint3& gl_GlobalInvocationID, thread uint3& gl_WorkGroupID, device SSBO_Bs* const device (&ssbo_bs)[4], constant UBO_Ds* const device (&ubo_ds)[4], const device SSBO_BsRO* const device (&ssbo_bs_readonly)[4])
+{
+    ssbo_bs[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = ubo_ds[gl_WorkGroupID.x]->data[0].xy + ssbo_bs_readonly[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x];
+}
+
+static inline __attribute__((always_inline))
+void func3(thread uint3& gl_GlobalInvocationID, thread uint3& gl_WorkGroupID, device SSBO_E& ssbo_e, constant UBO_G& ubo_g, device SSBO_F& ssbo_f, constant UBO_H& ubo_h, const device SSBO_I& ssbo_i)
+{
+    ssbo_e.data[gl_GlobalInvocationID.x] = ubo_g.data[gl_WorkGroupID.x].x;
+    ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y].xy + ssbo_i.data[gl_GlobalInvocationID.x];
+}
+
+kernel void main0(const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& v_42 [[buffer(1)]], device void* spvBufferAliasSet2Binding0 [[buffer(2)]], constant void* spvBufferAliasSet2Binding1 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
+{
+    device auto& ssbo_e = *(device SSBO_E*)spvBufferAliasSet2Binding0;
+    constant auto& ubo_g = *(constant UBO_G*)spvBufferAliasSet2Binding1;
+    device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding0;
+    constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding1;
+    const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding0;
+    device auto& ssbo_b = (device SSBO_B&)(*spvDescriptorSet0.ssbo_a);
+    constant auto& ubo_d = (constant UBO_D&)(*spvDescriptorSet0.ubo_c);
+    const device auto& ssbo_b_readonly = (const device SSBO_BRO&)(*spvDescriptorSet0.ssbo_a);
+    const device auto& ssbo_bs = (device SSBO_Bs* const device (&)[4])spvDescriptorSet0.ssbo_as;
+    const device auto& ubo_ds = (constant UBO_Ds* const device (&)[4])spvDescriptorSet0.ubo_cs;
+    const device auto& ssbo_bs_readonly = (const device SSBO_BsRO* const device (&)[4])spvDescriptorSet0.ssbo_as;
+    func0((*spvDescriptorSet0.ssbo_a), gl_GlobalInvocationID, (*spvDescriptorSet0.ubo_c), gl_WorkGroupID, v_42, ssbo_b, ubo_d, ssbo_b_readonly);
+    func1(gl_GlobalInvocationID, gl_WorkGroupID, spvDescriptorSet0.ssbo_as, spvDescriptorSet0.ubo_cs);
+    func2(gl_GlobalInvocationID, gl_WorkGroupID, ssbo_bs, ubo_ds, ssbo_bs_readonly);
+    func3(gl_GlobalInvocationID, gl_WorkGroupID, ssbo_e, ubo_g, ssbo_f, ubo_h, ssbo_i);
+}
+
diff --git a/reference/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp b/reference/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp
new file mode 100644
index 00000000000..587f1ee8e0d
--- /dev/null
+++ b/reference/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp
@@ -0,0 +1,142 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO_A
+{
+    float data[1];
+};
+
+struct UBO_C
+{
+    float4 data[1024];
+};
+
+struct Registers
+{
+    float reg;
+};
+
+struct SSBO_B
+{
+    uint2 data[1];
+};
+
+struct UBO_D
+{
+    uint4 data[1024];
+};
+
+struct SSBO_BRO
+{
+    uint2 data[1];
+};
+
+struct SSBO_As
+{
+    float data[1];
+};
+
+struct UBO_Cs
+{
+    float4 data[1024];
+};
+
+struct SSBO_Bs
+{
+    uint2 data[1024];
+};
+
+struct UBO_Ds
+{
+    uint4 data[1024];
+};
+
+struct SSBO_BsRO
+{
+    uint2 data[1024];
+};
+
+struct SSBO_E
+{
+    float data[1];
+};
+
+struct UBO_G
+{
+    float4 data[1024];
+};
+
+struct SSBO_F
+{
+    uint2 data[1];
+};
+
+struct UBO_H
+{
+    uint4 data[1024];
+};
+
+struct SSBO_I
+{
+    uint2 data[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u);
+
+struct spvDescriptorSetBuffer0
+{
+    device SSBO_A* ssbo_a [[id(0)]];
+    constant UBO_C* ubo_c [[id(1)]];
+    device SSBO_As* ssbo_as [[id(2)]][4];
+    constant UBO_Cs* ubo_cs [[id(6)]][4];
+};
+
+static inline __attribute__((always_inline))
+void func0(device SSBO_A& ssbo_a, thread uint3& gl_GlobalInvocationID, constant UBO_C& ubo_c, thread uint3& gl_WorkGroupID, constant Registers& v_42, device SSBO_B& ssbo_b, constant UBO_D& ubo_d, const device SSBO_BRO& ssbo_b_readonly)
+{
+    ssbo_a.data[gl_GlobalInvocationID.x] = ubo_c.data[gl_WorkGroupID.x].x + v_42.reg;
+    ssbo_b.data[gl_GlobalInvocationID.x] = ubo_d.data[gl_WorkGroupID.y].xy + ssbo_b_readonly.data[gl_GlobalInvocationID.x];
+}
+
+static inline __attribute__((always_inline))
+void func1(thread uint3& gl_GlobalInvocationID, thread uint3& gl_WorkGroupID, device SSBO_As* constant (&ssbo_as)[4], constant UBO_Cs* constant (&ubo_cs)[4])
+{
+    ssbo_as[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = ubo_cs[gl_WorkGroupID.x]->data[0].x;
+}
+
+static inline __attribute__((always_inline))
+void func2(thread uint3& gl_GlobalInvocationID, thread uint3& gl_WorkGroupID, device SSBO_Bs* constant (&ssbo_bs)[4], constant UBO_Ds* constant (&ubo_ds)[4], const device SSBO_BsRO* constant (&ssbo_bs_readonly)[4])
+{
+    ssbo_bs[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x] = ubo_ds[gl_WorkGroupID.x]->data[0].xy + ssbo_bs_readonly[gl_WorkGroupID.x]->data[gl_GlobalInvocationID.x];
+}
+
+static inline __attribute__((always_inline))
+void func3(thread uint3& gl_GlobalInvocationID, thread uint3& gl_WorkGroupID, device SSBO_E& ssbo_e, constant UBO_G& ubo_g, device SSBO_F& ssbo_f, constant UBO_H& ubo_h, const device SSBO_I& ssbo_i)
+{
+    ssbo_e.data[gl_GlobalInvocationID.x] = ubo_g.data[gl_WorkGroupID.x].x;
+    ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y].xy + ssbo_i.data[gl_GlobalInvocationID.x];
+}
+
+kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant Registers& v_42 [[buffer(1)]], device void* spvBufferAliasSet2Binding0 [[buffer(2)]], constant void* spvBufferAliasSet2Binding1 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
+{
+    device auto& ssbo_e = *(device SSBO_E*)spvBufferAliasSet2Binding0;
+    constant auto& ubo_g = *(constant UBO_G*)spvBufferAliasSet2Binding1;
+    device auto& ssbo_f = *(device SSBO_F*)spvBufferAliasSet2Binding0;
+    constant auto& ubo_h = *(constant UBO_H*)spvBufferAliasSet2Binding1;
+    const device auto& ssbo_i = *(const device SSBO_I*)spvBufferAliasSet2Binding0;
+    device auto& ssbo_b = (device SSBO_B&)(*spvDescriptorSet0.ssbo_a);
+    constant auto& ubo_d = (constant UBO_D&)(*spvDescriptorSet0.ubo_c);
+    const device auto& ssbo_b_readonly = (const device SSBO_BRO&)(*spvDescriptorSet0.ssbo_a);
+    constant auto& ssbo_bs = (device SSBO_Bs* constant (&)[4])spvDescriptorSet0.ssbo_as;
+    constant auto& ubo_ds = (constant UBO_Ds* constant (&)[4])spvDescriptorSet0.ubo_cs;
+    constant auto& ssbo_bs_readonly = (const device SSBO_BsRO* constant (&)[4])spvDescriptorSet0.ssbo_as;
+    func0((*spvDescriptorSet0.ssbo_a), gl_GlobalInvocationID, (*spvDescriptorSet0.ubo_c), gl_WorkGroupID, v_42, ssbo_b, ubo_d, ssbo_b_readonly);
+    func1(gl_GlobalInvocationID, gl_WorkGroupID, spvDescriptorSet0.ssbo_as, spvDescriptorSet0.ubo_cs);
+    func2(gl_GlobalInvocationID, gl_WorkGroupID, ssbo_bs, ubo_ds, ssbo_bs_readonly);
+    func3(gl_GlobalInvocationID, gl_WorkGroupID, ssbo_e, ubo_g, ssbo_f, ubo_h, ssbo_i);
+}
+
diff --git a/reference/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp b/reference/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp
new file mode 100644
index 00000000000..dde7f47b085
--- /dev/null
+++ b/reference/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp
@@ -0,0 +1,71 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+#if __METAL_VERSION__ >= 230
+#include <metal_raytracing>
+using namespace metal::raytracing;
+#endif
+
+using namespace metal;
+
+struct Params
+{
+    uint ray_flags;
+    uint cull_mask;
+    char _m2_pad[8];
+    packed_float3 origin;
+    float tmin;
+    packed_float3 dir;
+    float tmax;
+    float thit;
+};
+
+kernel void main0(constant Params& _18 [[buffer(1)]], raytracing::acceleration_structure<raytracing::instancing> AS0 [[buffer(0)]], raytracing::acceleration_structure<raytracing::instancing> AS1 [[buffer(2)]])
+{
+    raytracing::intersection_query<raytracing::instancing, raytracing::triangle_data> q;
+    q.reset(ray(_18.origin, _18.dir, _18.tmin, _18.tmax), AS0, intersection_params());
+    raytracing::intersection_query<raytracing::instancing, raytracing::triangle_data> q2[2];
+    q2[1].reset(ray(_18.origin, _18.dir, _18.tmin, _18.tmax), AS1, intersection_params());
+    bool _63 = q.next();
+    bool res = _63;
+    q2[0].abort();
+    q.commit_bounding_box_intersection(_18.thit);
+    q2[1].commit_triangle_intersection();
+    float _71 = q.get_ray_min_distance();
+    float fval = _71;
+    float3 _74 = q.get_world_space_ray_direction();
+    float3 fvals = _74;
+    float3 _75 = q.get_world_space_ray_origin();
+    fvals = _75;
+    uint _80 = uint(q2[1].get_committed_intersection_type());
+    uint type = _80;
+    uint _83 = uint(q2[0].get_candidate_intersection_type()) - 1;
+    type = _83;
+    bool _85 = q2[1].is_candidate_non_opaque_bounding_box();
+    res = _85;
+    float _87 = q2[1].get_committed_distance();
+    fval = _87;
+    float _89 = q2[1].get_candidate_triangle_distance();
+    fval = _89;
+    int _92 = q.get_committed_user_instance_id();
+    int ival = _92;
+    int _94 = q2[0].get_candidate_instance_id();
+    ival = _94;
+    int _96 = q2[1].get_candidate_geometry_id();
+    ival = _96;
+    int _97 = q.get_committed_primitive_id();
+    ival = _97;
+    float2 _100 = q2[0].get_candidate_triangle_barycentric_coord();
+    fvals.x = _100.x;
+    fvals.y = _100.y;
+    bool _107 = q.is_committed_triangle_front_facing();
+    res = _107;
+    float3 _108 = q.get_candidate_ray_direction();
+    fvals = _108;
+    float3 _110 = q2[0].get_committed_ray_origin();
+    fvals = _110;
+    float4x3 _114 = q.get_candidate_object_to_world_transform();
+    float4x3 matrices = _114;
+    float4x3 _116 = q2[1].get_committed_world_to_object_transform();
+    matrices = _116;
+}
+
diff --git a/reference/shaders-msl/comp/read-write-only.comp b/reference/shaders-msl/comp/read-write-only.comp
index 7547b417d8f..0cf8d8e3215 100644
--- a/reference/shaders-msl/comp/read-write-only.comp
+++ b/reference/shaders-msl/comp/read-write-only.comp
@@ -21,6 +21,8 @@ struct SSBO1
     float4 data3;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO2& _10 [[buffer(0)]], const device SSBO0& _15 [[buffer(1)]], device SSBO1& _21 [[buffer(2)]])
 {
     _10.data4 = _15.data0 + _21.data2;
diff --git a/reference/shaders-msl/comp/rmw-matrix.comp b/reference/shaders-msl/comp/rmw-matrix.comp
index 150db7ede98..b53a3a75c27 100644
--- a/reference/shaders-msl/comp/rmw-matrix.comp
+++ b/reference/shaders-msl/comp/rmw-matrix.comp
@@ -13,6 +13,8 @@ struct SSBO
     float4x4 c1;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& _11 [[buffer(0)]])
 {
     _11.a *= _11.a1;
diff --git a/reference/shaders-msl/comp/rmw-opt.comp b/reference/shaders-msl/comp/rmw-opt.comp
index 060f9f9c717..229154fc219 100644
--- a/reference/shaders-msl/comp/rmw-opt.comp
+++ b/reference/shaders-msl/comp/rmw-opt.comp
@@ -8,6 +8,8 @@ struct SSBO
     int a;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& _9 [[buffer(0)]])
 {
     _9.a += 10;
diff --git a/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
index 312a6f9453a..9bf87817747 100644
--- a/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
+++ b/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
@@ -10,12 +10,16 @@ struct SSBO
     float c;
     float d;
     float e;
+    float f;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& _9 [[buffer(0)]])
 {
     _9.c = abs(_9.a - _9.b);
     _9.d = abs(_9.a);
     _9.e = sign(_9.a);
+    _9.f = abs((_9.a - 1.0) - (_9.b - 2.0));
 }
 
diff --git a/reference/shaders-msl/comp/shared-array-of-arrays.comp b/reference/shaders-msl/comp/shared-array-of-arrays.comp
index 7acb0ab8573..8b532368959 100644
--- a/reference/shaders-msl/comp/shared-array-of-arrays.comp
+++ b/reference/shaders-msl/comp/shared-array-of-arrays.comp
@@ -12,6 +12,7 @@ struct SSBO
 
 constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 4u, 1u);
 
+static inline __attribute__((always_inline))
 void work(threadgroup float (&foo)[4][4], thread uint3& gl_LocalInvocationID, thread uint& gl_LocalInvocationIndex, device SSBO& v_67, thread uint3& gl_GlobalInvocationID)
 {
     foo[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = float(gl_LocalInvocationIndex);
diff --git a/reference/shaders-msl/comp/shared-matrix-array-of-array.comp b/reference/shaders-msl/comp/shared-matrix-array-of-array.comp
new file mode 100644
index 00000000000..173b31cde62
--- /dev/null
+++ b/reference/shaders-msl/comp/shared-matrix-array-of-array.comp
@@ -0,0 +1,1286 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T, int Cols, int Rows=Cols>
+struct spvStorageMatrix
+{
+    vec<T, Rows> columns[Cols];
+    
+    spvStorageMatrix() thread = default;
+    thread spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) thread
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const thread
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const thread
+    {
+        return columns[idx];
+    }
+    thread vec<T, Rows>& operator[](size_t idx) thread
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() constant = default;
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const constant
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const constant
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() device = default;
+    device spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) device
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const device
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const device
+    {
+        return columns[idx];
+    }
+    device vec<T, Rows>& operator[](size_t idx) device
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup
+    {
+        return columns[idx];
+    }
+    threadgroup vec<T, Rows>& operator[](size_t idx) threadgroup
+    {
+        return columns[idx];
+    }
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix() threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup_imageblock
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup_imageblock
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    threadgroup_imageblock vec<T, Rows>& operator[](size_t idx) threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix() ray_data = default;
+    ray_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) ray_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const ray_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const ray_data
+    {
+        return columns[idx];
+    }
+    ray_data vec<T, Rows>& operator[](size_t idx) ray_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix() object_data = default;
+    object_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) object_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default;
+    
+    operator matrix<T, Cols, Rows>() const object_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const object_data
+    {
+        return columns[idx];
+    }
+    object_data vec<T, Rows>& operator[](size_t idx) object_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+};
+
+template<typename T, int Cols, int Rows>
+matrix<T, Rows, Cols> transpose(spvStorageMatrix<T, Cols, Rows> m)
+{
+    return transpose(matrix<T, Cols, Rows>(m));
+}
+
+typedef spvStorageMatrix<half, 2, 2> spvStorage_half2x2;
+typedef spvStorageMatrix<half, 2, 3> spvStorage_half2x3;
+typedef spvStorageMatrix<half, 2, 4> spvStorage_half2x4;
+typedef spvStorageMatrix<half, 3, 2> spvStorage_half3x2;
+typedef spvStorageMatrix<half, 3, 3> spvStorage_half3x3;
+typedef spvStorageMatrix<half, 3, 4> spvStorage_half3x4;
+typedef spvStorageMatrix<half, 4, 2> spvStorage_half4x2;
+typedef spvStorageMatrix<half, 4, 3> spvStorage_half4x3;
+typedef spvStorageMatrix<half, 4, 4> spvStorage_half4x4;
+typedef spvStorageMatrix<float, 2, 2> spvStorage_float2x2;
+typedef spvStorageMatrix<float, 2, 3> spvStorage_float2x3;
+typedef spvStorageMatrix<float, 2, 4> spvStorage_float2x4;
+typedef spvStorageMatrix<float, 3, 2> spvStorage_float3x2;
+typedef spvStorageMatrix<float, 3, 3> spvStorage_float3x3;
+typedef spvStorageMatrix<float, 3, 4> spvStorage_float3x4;
+typedef spvStorageMatrix<float, 4, 2> spvStorage_float4x2;
+typedef spvStorageMatrix<float, 4, 3> spvStorage_float4x3;
+typedef spvStorageMatrix<float, 4, 4> spvStorage_float4x4;
+
+struct S1
+{
+    spvStorage_float4x3 a[2];
+    float b;
+    spvUnsafeArray<float2, 3> c;
+};
+
+struct S2
+{
+    int4 a;
+    spvUnsafeArray<spvUnsafeArray<spvUnsafeArray<short, 3>, 1>, 3> b;
+};
+
+struct block
+{
+    uint passed;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+bool compare_float(thread const float& a, thread const float& b)
+{
+    return abs(a - b) < 0.0500000007450580596923828125;
+}
+
+static inline __attribute__((always_inline))
+bool compare_vec3(thread const float3& a, thread const float3& b)
+{
+    float param = a.x;
+    float param_1 = b.x;
+    bool _85 = compare_float(param, param_1);
+    bool _95;
+    if (_85)
+    {
+        float param_2 = a.y;
+        float param_3 = b.y;
+        _95 = compare_float(param_2, param_3);
+    }
+    else
+    {
+        _95 = _85;
+    }
+    bool _106;
+    if (_95)
+    {
+        float param_4 = a.z;
+        float param_5 = b.z;
+        _106 = compare_float(param_4, param_5);
+    }
+    else
+    {
+        _106 = _95;
+    }
+    return _106;
+}
+
+static inline __attribute__((always_inline))
+bool compare_mat4x3(thread const float4x3& a, thread const float4x3& b)
+{
+    float3 param = a[0];
+    float3 param_1 = b[0];
+    bool _116 = compare_vec3(param, param_1);
+    bool _127;
+    if (_116)
+    {
+        float3 param_2 = a[1];
+        float3 param_3 = b[1];
+        _127 = compare_vec3(param_2, param_3);
+    }
+    else
+    {
+        _127 = _116;
+    }
+    bool _138;
+    if (_127)
+    {
+        float3 param_4 = a[2];
+        float3 param_5 = b[2];
+        _138 = compare_vec3(param_4, param_5);
+    }
+    else
+    {
+        _138 = _127;
+    }
+    bool _149;
+    if (_138)
+    {
+        float3 param_6 = a[3];
+        float3 param_7 = b[3];
+        _149 = compare_vec3(param_6, param_7);
+    }
+    else
+    {
+        _149 = _138;
+    }
+    return _149;
+}
+
+static inline __attribute__((always_inline))
+bool compare_vec2(thread const float2& a, thread const float2& b)
+{
+    float param = a.x;
+    float param_1 = b.x;
+    bool _65 = compare_float(param, param_1);
+    bool _76;
+    if (_65)
+    {
+        float param_2 = a.y;
+        float param_3 = b.y;
+        _76 = compare_float(param_2, param_3);
+    }
+    else
+    {
+        _76 = _65;
+    }
+    return _76;
+}
+
+static inline __attribute__((always_inline))
+bool compare_ivec4(thread const int4& a, thread const int4& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_bool(thread const bool& a, thread const bool& b)
+{
+    return a == b;
+}
+
+kernel void main0(device block& _383 [[buffer(0)]])
+{
+    threadgroup S1 s1;
+    threadgroup S2 s2;
+    s1.a[0] = spvStorage_float4x3(float4x3(float3(0.0, 2.0, -8.0), float3(6.0, 7.0, 5.0), float3(-6.0, 1.0, 9.0), float3(-4.0, -3.0, 4.0)));
+    s1.a[1] = spvStorage_float4x3(float4x3(float3(4.0, 9.0, -9.0), float3(-8.0, -9.0, 8.0), float3(0.0, 4.0, -4.0), float3(7.0, 2.0, -1.0)));
+    s1.b = 7.0;
+    s1.c[0] = float2(-5.0, -4.0);
+    s1.c[1] = float2(3.0, -5.0);
+    s1.c[2] = float2(-3.0, -1.0);
+    s2.a = int4(1, 0, -3, 1);
+    s2.b[0][0][0] = short(true);
+    s2.b[0][0][1] = short(false);
+    s2.b[0][0][2] = short(false);
+    s2.b[1][0][0] = short(true);
+    s2.b[1][0][1] = short(false);
+    s2.b[1][0][2] = short(true);
+    s2.b[2][0][0] = short(false);
+    s2.b[2][0][1] = short(true);
+    s2.b[2][0][2] = short(true);
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    bool allOk = true;
+    bool _242;
+    if (allOk)
+    {
+        float4x3 param = float4x3(float3(0.0, 2.0, -8.0), float3(6.0, 7.0, 5.0), float3(-6.0, 1.0, 9.0), float3(-4.0, -3.0, 4.0));
+        float4x3 param_1 = float4x3(s1.a[0]);
+        _242 = compare_mat4x3(param, param_1);
+    }
+    else
+    {
+        _242 = allOk;
+    }
+    allOk = _242;
+    bool _251;
+    if (allOk)
+    {
+        float4x3 param_2 = float4x3(float3(4.0, 9.0, -9.0), float3(-8.0, -9.0, 8.0), float3(0.0, 4.0, -4.0), float3(7.0, 2.0, -1.0));
+        float4x3 param_3 = float4x3(s1.a[1]);
+        _251 = compare_mat4x3(param_2, param_3);
+    }
+    else
+    {
+        _251 = allOk;
+    }
+    allOk = _251;
+    bool _260;
+    if (allOk)
+    {
+        float param_4 = 7.0;
+        float param_5 = s1.b;
+        _260 = compare_float(param_4, param_5);
+    }
+    else
+    {
+        _260 = allOk;
+    }
+    allOk = _260;
+    bool _269;
+    if (allOk)
+    {
+        float2 param_6 = float2(-5.0, -4.0);
+        float2 param_7 = s1.c[0];
+        _269 = compare_vec2(param_6, param_7);
+    }
+    else
+    {
+        _269 = allOk;
+    }
+    allOk = _269;
+    bool _278;
+    if (allOk)
+    {
+        float2 param_8 = float2(3.0, -5.0);
+        float2 param_9 = s1.c[1];
+        _278 = compare_vec2(param_8, param_9);
+    }
+    else
+    {
+        _278 = allOk;
+    }
+    allOk = _278;
+    bool _287;
+    if (allOk)
+    {
+        float2 param_10 = float2(-3.0, -1.0);
+        float2 param_11 = s1.c[2];
+        _287 = compare_vec2(param_10, param_11);
+    }
+    else
+    {
+        _287 = allOk;
+    }
+    allOk = _287;
+    bool _296;
+    if (allOk)
+    {
+        int4 param_12 = int4(1, 0, -3, 1);
+        int4 param_13 = s2.a;
+        _296 = compare_ivec4(param_12, param_13);
+    }
+    else
+    {
+        _296 = allOk;
+    }
+    allOk = _296;
+    bool _305;
+    if (allOk)
+    {
+        bool param_14 = true;
+        bool param_15 = bool(s2.b[0][0][0]);
+        _305 = compare_bool(param_14, param_15);
+    }
+    else
+    {
+        _305 = allOk;
+    }
+    allOk = _305;
+    bool _314;
+    if (allOk)
+    {
+        bool param_16 = false;
+        bool param_17 = bool(s2.b[0][0][1]);
+        _314 = compare_bool(param_16, param_17);
+    }
+    else
+    {
+        _314 = allOk;
+    }
+    allOk = _314;
+    bool _323;
+    if (allOk)
+    {
+        bool param_18 = false;
+        bool param_19 = bool(s2.b[0][0][2]);
+        _323 = compare_bool(param_18, param_19);
+    }
+    else
+    {
+        _323 = allOk;
+    }
+    allOk = _323;
+    bool _332;
+    if (allOk)
+    {
+        bool param_20 = true;
+        bool param_21 = bool(s2.b[1][0][0]);
+        _332 = compare_bool(param_20, param_21);
+    }
+    else
+    {
+        _332 = allOk;
+    }
+    allOk = _332;
+    bool _341;
+    if (allOk)
+    {
+        bool param_22 = false;
+        bool param_23 = bool(s2.b[1][0][1]);
+        _341 = compare_bool(param_22, param_23);
+    }
+    else
+    {
+        _341 = allOk;
+    }
+    allOk = _341;
+    bool _350;
+    if (allOk)
+    {
+        bool param_24 = true;
+        bool param_25 = bool(s2.b[1][0][2]);
+        _350 = compare_bool(param_24, param_25);
+    }
+    else
+    {
+        _350 = allOk;
+    }
+    allOk = _350;
+    bool _359;
+    if (allOk)
+    {
+        bool param_26 = false;
+        bool param_27 = bool(s2.b[2][0][0]);
+        _359 = compare_bool(param_26, param_27);
+    }
+    else
+    {
+        _359 = allOk;
+    }
+    allOk = _359;
+    bool _368;
+    if (allOk)
+    {
+        bool param_28 = true;
+        bool param_29 = bool(s2.b[2][0][1]);
+        _368 = compare_bool(param_28, param_29);
+    }
+    else
+    {
+        _368 = allOk;
+    }
+    allOk = _368;
+    bool _377;
+    if (allOk)
+    {
+        bool param_30 = true;
+        bool param_31 = bool(s2.b[2][0][2]);
+        _377 = compare_bool(param_30, param_31);
+    }
+    else
+    {
+        _377 = allOk;
+    }
+    allOk = _377;
+    if (allOk)
+    {
+        _383.passed++;
+    }
+}
+
diff --git a/reference/shaders-msl/comp/shared-matrix-cast.comp b/reference/shaders-msl/comp/shared-matrix-cast.comp
new file mode 100644
index 00000000000..c764c1fdbd5
--- /dev/null
+++ b/reference/shaders-msl/comp/shared-matrix-cast.comp
@@ -0,0 +1,1065 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, int Cols, int Rows=Cols>
+struct spvStorageMatrix
+{
+    vec<T, Rows> columns[Cols];
+    
+    spvStorageMatrix() thread = default;
+    thread spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) thread
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const thread
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const thread
+    {
+        return columns[idx];
+    }
+    thread vec<T, Rows>& operator[](size_t idx) thread
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() constant = default;
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const constant
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const constant
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() device = default;
+    device spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) device
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const device
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const device
+    {
+        return columns[idx];
+    }
+    device vec<T, Rows>& operator[](size_t idx) device
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup
+    {
+        return columns[idx];
+    }
+    threadgroup vec<T, Rows>& operator[](size_t idx) threadgroup
+    {
+        return columns[idx];
+    }
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix() threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup_imageblock
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup_imageblock
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    threadgroup_imageblock vec<T, Rows>& operator[](size_t idx) threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix() ray_data = default;
+    ray_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) ray_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const ray_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const ray_data
+    {
+        return columns[idx];
+    }
+    ray_data vec<T, Rows>& operator[](size_t idx) ray_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix() object_data = default;
+    object_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) object_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default;
+    
+    operator matrix<T, Cols, Rows>() const object_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const object_data
+    {
+        return columns[idx];
+    }
+    object_data vec<T, Rows>& operator[](size_t idx) object_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+};
+
+template<typename T, int Cols, int Rows>
+matrix<T, Rows, Cols> transpose(spvStorageMatrix<T, Cols, Rows> m)
+{
+    return transpose(matrix<T, Cols, Rows>(m));
+}
+
+typedef spvStorageMatrix<half, 2, 2> spvStorage_half2x2;
+typedef spvStorageMatrix<half, 2, 3> spvStorage_half2x3;
+typedef spvStorageMatrix<half, 2, 4> spvStorage_half2x4;
+typedef spvStorageMatrix<half, 3, 2> spvStorage_half3x2;
+typedef spvStorageMatrix<half, 3, 3> spvStorage_half3x3;
+typedef spvStorageMatrix<half, 3, 4> spvStorage_half3x4;
+typedef spvStorageMatrix<half, 4, 2> spvStorage_half4x2;
+typedef spvStorageMatrix<half, 4, 3> spvStorage_half4x3;
+typedef spvStorageMatrix<half, 4, 4> spvStorage_half4x4;
+typedef spvStorageMatrix<float, 2, 2> spvStorage_float2x2;
+typedef spvStorageMatrix<float, 2, 3> spvStorage_float2x3;
+typedef spvStorageMatrix<float, 2, 4> spvStorage_float2x4;
+typedef spvStorageMatrix<float, 3, 2> spvStorage_float3x2;
+typedef spvStorageMatrix<float, 3, 3> spvStorage_float3x3;
+typedef spvStorageMatrix<float, 3, 4> spvStorage_float3x4;
+typedef spvStorageMatrix<float, 4, 2> spvStorage_float4x2;
+typedef spvStorageMatrix<float, 4, 3> spvStorage_float4x3;
+typedef spvStorageMatrix<float, 4, 4> spvStorage_float4x4;
+
+struct S1
+{
+    float4 a;
+    spvStorage_float3x2 b;
+    short4 c;
+};
+
+struct block
+{
+    uint passed;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+bool compare_float(thread const float& a, thread const float& b)
+{
+    return abs(a - b) < 0.0500000007450580596923828125;
+}
+
+static inline __attribute__((always_inline))
+bool compare_vec4(thread const float4& a, thread const float4& b)
+{
+    float param = a.x;
+    float param_1 = b.x;
+    bool _78 = compare_float(param, param_1);
+    bool _88;
+    if (_78)
+    {
+        float param_2 = a.y;
+        float param_3 = b.y;
+        _88 = compare_float(param_2, param_3);
+    }
+    else
+    {
+        _88 = _78;
+    }
+    bool _99;
+    if (_88)
+    {
+        float param_4 = a.z;
+        float param_5 = b.z;
+        _99 = compare_float(param_4, param_5);
+    }
+    else
+    {
+        _99 = _88;
+    }
+    bool _110;
+    if (_99)
+    {
+        float param_6 = a.w;
+        float param_7 = b.w;
+        _110 = compare_float(param_6, param_7);
+    }
+    else
+    {
+        _110 = _99;
+    }
+    return _110;
+}
+
+static inline __attribute__((always_inline))
+bool compare_vec2(thread const float2& a, thread const float2& b)
+{
+    float param = a.x;
+    float param_1 = b.x;
+    bool _58 = compare_float(param, param_1);
+    bool _69;
+    if (_58)
+    {
+        float param_2 = a.y;
+        float param_3 = b.y;
+        _69 = compare_float(param_2, param_3);
+    }
+    else
+    {
+        _69 = _58;
+    }
+    return _69;
+}
+
+static inline __attribute__((always_inline))
+bool compare_mat3x2(thread const float3x2& a, thread const float3x2& b)
+{
+    float2 param = a[0];
+    float2 param_1 = b[0];
+    bool _121 = compare_vec2(param, param_1);
+    bool _132;
+    if (_121)
+    {
+        float2 param_2 = a[1];
+        float2 param_3 = b[1];
+        _132 = compare_vec2(param_2, param_3);
+    }
+    else
+    {
+        _132 = _121;
+    }
+    bool _143;
+    if (_132)
+    {
+        float2 param_4 = a[2];
+        float2 param_5 = b[2];
+        _143 = compare_vec2(param_4, param_5);
+    }
+    else
+    {
+        _143 = _132;
+    }
+    return _143;
+}
+
+static inline __attribute__((always_inline))
+bool compare_bvec4(thread const bool4& a, thread const bool4& b)
+{
+    return all(a == b);
+}
+
+kernel void main0(device block& _212 [[buffer(0)]])
+{
+    threadgroup S1 s1;
+    s1.a = float4(1.0, -5.0, -9.0, -5.0);
+    s1.b = spvStorage_float3x2(float3x2(float2(1.0, -7.0), float2(1.0, 2.0), float2(8.0, 7.0)));
+    s1.c = short4(bool4(false, true, false, false));
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    bool allOk = true;
+    bool _188;
+    if (allOk)
+    {
+        float4 param = float4(1.0, -5.0, -9.0, -5.0);
+        float4 param_1 = s1.a;
+        _188 = compare_vec4(param, param_1);
+    }
+    else
+    {
+        _188 = allOk;
+    }
+    allOk = _188;
+    bool _197;
+    if (allOk)
+    {
+        float3x2 param_2 = float3x2(float2(1.0, -7.0), float2(1.0, 2.0), float2(8.0, 7.0));
+        float3x2 param_3 = float3x2(s1.b);
+        _197 = compare_mat3x2(param_2, param_3);
+    }
+    else
+    {
+        _197 = allOk;
+    }
+    allOk = _197;
+    bool _206;
+    if (allOk)
+    {
+        bool4 param_4 = bool4(false, true, false, false);
+        bool4 param_5 = bool4(s1.c);
+        _206 = compare_bvec4(param_4, param_5);
+    }
+    else
+    {
+        _206 = allOk;
+    }
+    allOk = _206;
+    if (allOk)
+    {
+        _212.passed++;
+    }
+}
+
diff --git a/reference/shaders-msl/comp/shared-matrix-nested-struct-array.comp b/reference/shaders-msl/comp/shared-matrix-nested-struct-array.comp
new file mode 100644
index 00000000000..db5ed440f88
--- /dev/null
+++ b/reference/shaders-msl/comp/shared-matrix-nested-struct-array.comp
@@ -0,0 +1,1316 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T, int Cols, int Rows=Cols>
+struct spvStorageMatrix
+{
+    vec<T, Rows> columns[Cols];
+    
+    spvStorageMatrix() thread = default;
+    thread spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) thread
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const thread
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const thread
+    {
+        return columns[idx];
+    }
+    thread vec<T, Rows>& operator[](size_t idx) thread
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() constant = default;
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const constant
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const constant
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() device = default;
+    device spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) device
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const device
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const device
+    {
+        return columns[idx];
+    }
+    device vec<T, Rows>& operator[](size_t idx) device
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup
+    {
+        return columns[idx];
+    }
+    threadgroup vec<T, Rows>& operator[](size_t idx) threadgroup
+    {
+        return columns[idx];
+    }
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix() threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup_imageblock
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup_imageblock
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    threadgroup_imageblock vec<T, Rows>& operator[](size_t idx) threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix() ray_data = default;
+    ray_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) ray_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const ray_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const ray_data
+    {
+        return columns[idx];
+    }
+    ray_data vec<T, Rows>& operator[](size_t idx) ray_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix() object_data = default;
+    object_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) object_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default;
+    
+    operator matrix<T, Cols, Rows>() const object_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const object_data
+    {
+        return columns[idx];
+    }
+    object_data vec<T, Rows>& operator[](size_t idx) object_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+};
+
+template<typename T, int Cols, int Rows>
+matrix<T, Rows, Cols> transpose(spvStorageMatrix<T, Cols, Rows> m)
+{
+    return transpose(matrix<T, Cols, Rows>(m));
+}
+
+typedef spvStorageMatrix<half, 2, 2> spvStorage_half2x2;
+typedef spvStorageMatrix<half, 2, 3> spvStorage_half2x3;
+typedef spvStorageMatrix<half, 2, 4> spvStorage_half2x4;
+typedef spvStorageMatrix<half, 3, 2> spvStorage_half3x2;
+typedef spvStorageMatrix<half, 3, 3> spvStorage_half3x3;
+typedef spvStorageMatrix<half, 3, 4> spvStorage_half3x4;
+typedef spvStorageMatrix<half, 4, 2> spvStorage_half4x2;
+typedef spvStorageMatrix<half, 4, 3> spvStorage_half4x3;
+typedef spvStorageMatrix<half, 4, 4> spvStorage_half4x4;
+typedef spvStorageMatrix<float, 2, 2> spvStorage_float2x2;
+typedef spvStorageMatrix<float, 2, 3> spvStorage_float2x3;
+typedef spvStorageMatrix<float, 2, 4> spvStorage_float2x4;
+typedef spvStorageMatrix<float, 3, 2> spvStorage_float3x2;
+typedef spvStorageMatrix<float, 3, 3> spvStorage_float3x3;
+typedef spvStorageMatrix<float, 3, 4> spvStorage_float3x4;
+typedef spvStorageMatrix<float, 4, 2> spvStorage_float4x2;
+typedef spvStorageMatrix<float, 4, 3> spvStorage_float4x3;
+typedef spvStorageMatrix<float, 4, 4> spvStorage_float4x4;
+
+struct sA
+{
+    spvStorage_float2x3 mA;
+};
+
+struct sB
+{
+    spvStorage_float2x2 mA;
+    spvStorage_float3x2 mB;
+    uint3 mC;
+};
+
+struct sC
+{
+    sA mA;
+    sB mB;
+};
+
+struct sD
+{
+    sC mA;
+};
+
+struct sE
+{
+    spvStorage_float3x2 mA;
+    spvStorage_float4x3 mB;
+};
+
+struct sF
+{
+    sE mA;
+};
+
+struct sG
+{
+    sF mA;
+};
+
+struct sH
+{
+    spvUnsafeArray<short3, 2> mA;
+};
+
+struct S1
+{
+    sD a;
+    sG b;
+    spvUnsafeArray<sH, 2> c;
+};
+
+struct block
+{
+    uint passed;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+bool compare_float(thread const float& a, thread const float& b)
+{
+    return abs(a - b) < 0.0500000007450580596923828125;
+}
+
+static inline __attribute__((always_inline))
+bool compare_vec3(thread const float3& a, thread const float3& b)
+{
+    float param = a.x;
+    float param_1 = b.x;
+    bool _106 = compare_float(param, param_1);
+    bool _116;
+    if (_106)
+    {
+        float param_2 = a.y;
+        float param_3 = b.y;
+        _116 = compare_float(param_2, param_3);
+    }
+    else
+    {
+        _116 = _106;
+    }
+    bool _127;
+    if (_116)
+    {
+        float param_4 = a.z;
+        float param_5 = b.z;
+        _127 = compare_float(param_4, param_5);
+    }
+    else
+    {
+        _127 = _116;
+    }
+    return _127;
+}
+
+static inline __attribute__((always_inline))
+bool compare_mat2x3(thread const float2x3& a, thread const float2x3& b)
+{
+    float3 param = a[0];
+    float3 param_1 = b[0];
+    bool _158 = compare_vec3(param, param_1);
+    bool _168;
+    if (_158)
+    {
+        float3 param_2 = a[1];
+        float3 param_3 = b[1];
+        _168 = compare_vec3(param_2, param_3);
+    }
+    else
+    {
+        _168 = _158;
+    }
+    return _168;
+}
+
+static inline __attribute__((always_inline))
+bool compare_vec2(thread const float2& a, thread const float2& b)
+{
+    float param = a.x;
+    float param_1 = b.x;
+    bool _86 = compare_float(param, param_1);
+    bool _97;
+    if (_86)
+    {
+        float param_2 = a.y;
+        float param_3 = b.y;
+        _97 = compare_float(param_2, param_3);
+    }
+    else
+    {
+        _97 = _86;
+    }
+    return _97;
+}
+
+static inline __attribute__((always_inline))
+bool compare_mat2(thread const float2x2& a, thread const float2x2& b)
+{
+    float2 param = a[0];
+    float2 param_1 = b[0];
+    bool _138 = compare_vec2(param, param_1);
+    bool _149;
+    if (_138)
+    {
+        float2 param_2 = a[1];
+        float2 param_3 = b[1];
+        _149 = compare_vec2(param_2, param_3);
+    }
+    else
+    {
+        _149 = _138;
+    }
+    return _149;
+}
+
+static inline __attribute__((always_inline))
+bool compare_mat3x2(thread const float3x2& a, thread const float3x2& b)
+{
+    float2 param = a[0];
+    float2 param_1 = b[0];
+    bool _177 = compare_vec2(param, param_1);
+    bool _187;
+    if (_177)
+    {
+        float2 param_2 = a[1];
+        float2 param_3 = b[1];
+        _187 = compare_vec2(param_2, param_3);
+    }
+    else
+    {
+        _187 = _177;
+    }
+    bool _198;
+    if (_187)
+    {
+        float2 param_4 = a[2];
+        float2 param_5 = b[2];
+        _198 = compare_vec2(param_4, param_5);
+    }
+    else
+    {
+        _198 = _187;
+    }
+    return _198;
+}
+
+static inline __attribute__((always_inline))
+bool compare_uvec3(thread const uint3& a, thread const uint3& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_mat4x3(thread const float4x3& a, thread const float4x3& b)
+{
+    float3 param = a[0];
+    float3 param_1 = b[0];
+    bool _207 = compare_vec3(param, param_1);
+    bool _217;
+    if (_207)
+    {
+        float3 param_2 = a[1];
+        float3 param_3 = b[1];
+        _217 = compare_vec3(param_2, param_3);
+    }
+    else
+    {
+        _217 = _207;
+    }
+    bool _227;
+    if (_217)
+    {
+        float3 param_4 = a[2];
+        float3 param_5 = b[2];
+        _227 = compare_vec3(param_4, param_5);
+    }
+    else
+    {
+        _227 = _217;
+    }
+    bool _238;
+    if (_227)
+    {
+        float3 param_6 = a[3];
+        float3 param_7 = b[3];
+        _238 = compare_vec3(param_6, param_7);
+    }
+    else
+    {
+        _238 = _227;
+    }
+    return _238;
+}
+
+static inline __attribute__((always_inline))
+bool compare_bvec3(thread const bool3& a, thread const bool3& b)
+{
+    return all(a == b);
+}
+
+kernel void main0(device block& _424 [[buffer(0)]])
+{
+    threadgroup S1 s1;
+    s1.a.mA.mA.mA = spvStorage_float2x3(float2x3(float3(6.0, 8.0, 8.0), float3(0.0, -4.0, -5.0)));
+    s1.a.mA.mB.mA = spvStorage_float2x2(float2x2(float2(9.0, -4.0), float2(-6.0, -1.0)));
+    s1.a.mA.mB.mB = spvStorage_float3x2(float3x2(float2(-1.0, -2.0), float2(1.0, 6.0), float2(5.0, 7.0)));
+    s1.a.mA.mB.mC = uint3(3u, 1u, 5u);
+    s1.b.mA.mA.mA = spvStorage_float3x2(float3x2(float2(8.0, 3.0), float2(0.0, 2.0), float2(1.0, 8.0)));
+    s1.b.mA.mA.mB = spvStorage_float4x3(float4x3(float3(0.0, 9.0, -1.0), float3(-1.0, -7.0, 7.0), float3(-4.0, -3.0, 1.0), float3(-4.0, -9.0, 1.0)));
+    s1.c[0].mA[0] = short3(bool3(true, false, false));
+    s1.c[0].mA[1] = short3(bool3(true, false, false));
+    s1.c[1].mA[0] = short3(bool3(false));
+    s1.c[1].mA[1] = short3(bool3(false));
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    bool allOk = true;
+    bool _337;
+    if (allOk)
+    {
+        float2x3 param = float2x3(float3(6.0, 8.0, 8.0), float3(0.0, -4.0, -5.0));
+        float2x3 param_1 = float2x3(s1.a.mA.mA.mA);
+        _337 = compare_mat2x3(param, param_1);
+    }
+    else
+    {
+        _337 = allOk;
+    }
+    allOk = _337;
+    bool _346;
+    if (allOk)
+    {
+        float2x2 param_2 = float2x2(float2(9.0, -4.0), float2(-6.0, -1.0));
+        float2x2 param_3 = float2x2(s1.a.mA.mB.mA);
+        _346 = compare_mat2(param_2, param_3);
+    }
+    else
+    {
+        _346 = allOk;
+    }
+    allOk = _346;
+    bool _355;
+    if (allOk)
+    {
+        float3x2 param_4 = float3x2(float2(-1.0, -2.0), float2(1.0, 6.0), float2(5.0, 7.0));
+        float3x2 param_5 = float3x2(s1.a.mA.mB.mB);
+        _355 = compare_mat3x2(param_4, param_5);
+    }
+    else
+    {
+        _355 = allOk;
+    }
+    allOk = _355;
+    bool _364;
+    if (allOk)
+    {
+        uint3 param_6 = uint3(3u, 1u, 5u);
+        uint3 param_7 = s1.a.mA.mB.mC;
+        _364 = compare_uvec3(param_6, param_7);
+    }
+    else
+    {
+        _364 = allOk;
+    }
+    allOk = _364;
+    bool _373;
+    if (allOk)
+    {
+        float3x2 param_8 = float3x2(float2(8.0, 3.0), float2(0.0, 2.0), float2(1.0, 8.0));
+        float3x2 param_9 = float3x2(s1.b.mA.mA.mA);
+        _373 = compare_mat3x2(param_8, param_9);
+    }
+    else
+    {
+        _373 = allOk;
+    }
+    allOk = _373;
+    bool _382;
+    if (allOk)
+    {
+        float4x3 param_10 = float4x3(float3(0.0, 9.0, -1.0), float3(-1.0, -7.0, 7.0), float3(-4.0, -3.0, 1.0), float3(-4.0, -9.0, 1.0));
+        float4x3 param_11 = float4x3(s1.b.mA.mA.mB);
+        _382 = compare_mat4x3(param_10, param_11);
+    }
+    else
+    {
+        _382 = allOk;
+    }
+    allOk = _382;
+    bool _391;
+    if (allOk)
+    {
+        bool3 param_12 = bool3(true, false, false);
+        bool3 param_13 = bool3(s1.c[0].mA[0]);
+        _391 = compare_bvec3(param_12, param_13);
+    }
+    else
+    {
+        _391 = allOk;
+    }
+    allOk = _391;
+    bool _400;
+    if (allOk)
+    {
+        bool3 param_14 = bool3(true, false, false);
+        bool3 param_15 = bool3(s1.c[0].mA[1]);
+        _400 = compare_bvec3(param_14, param_15);
+    }
+    else
+    {
+        _400 = allOk;
+    }
+    allOk = _400;
+    bool _409;
+    if (allOk)
+    {
+        bool3 param_16 = bool3(false);
+        bool3 param_17 = bool3(s1.c[1].mA[0]);
+        _409 = compare_bvec3(param_16, param_17);
+    }
+    else
+    {
+        _409 = allOk;
+    }
+    allOk = _409;
+    bool _418;
+    if (allOk)
+    {
+        bool3 param_18 = bool3(false);
+        bool3 param_19 = bool3(s1.c[1].mA[1]);
+        _418 = compare_bvec3(param_18, param_19);
+    }
+    else
+    {
+        _418 = allOk;
+    }
+    allOk = _418;
+    if (allOk)
+    {
+        _424.passed++;
+    }
+}
+
diff --git a/reference/shaders-msl/comp/shared-matrix-nested-struct.comp b/reference/shaders-msl/comp/shared-matrix-nested-struct.comp
new file mode 100644
index 00000000000..2526c6c93b7
--- /dev/null
+++ b/reference/shaders-msl/comp/shared-matrix-nested-struct.comp
@@ -0,0 +1,1473 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, int Cols, int Rows=Cols>
+struct spvStorageMatrix
+{
+    vec<T, Rows> columns[Cols];
+    
+    spvStorageMatrix() thread = default;
+    thread spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) thread
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const thread spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const constant spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const device spvStorageMatrix& m) thread = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) thread = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) thread = default;
+    thread spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) thread
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    thread spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) thread = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const thread
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const thread
+    {
+        return columns[idx];
+    }
+    thread vec<T, Rows>& operator[](size_t idx) thread
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() constant = default;
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) constant = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) constant = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) constant
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) constant = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const constant
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const constant
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() device = default;
+    device spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) device
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const thread spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const constant spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const device spvStorageMatrix& m) device = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) device = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) device = default;
+    device spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) device
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    device spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) device = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const device
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const device
+    {
+        return columns[idx];
+    }
+    device vec<T, Rows>& operator[](size_t idx) device
+    {
+        return columns[idx];
+    }
+    
+    spvStorageMatrix() threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup = default;
+    threadgroup spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup
+    {
+        return columns[idx];
+    }
+    threadgroup vec<T, Rows>& operator[](size_t idx) threadgroup
+    {
+        return columns[idx];
+    }
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix() threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) threadgroup_imageblock
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const thread spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const constant spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const device spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) threadgroup_imageblock = default;
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) threadgroup_imageblock
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    threadgroup_imageblock spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) threadgroup_imageblock = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const threadgroup_imageblock
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    threadgroup_imageblock vec<T, Rows>& operator[](size_t idx) threadgroup_imageblock
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix() ray_data = default;
+    ray_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) ray_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) ray_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) ray_data = default;
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) ray_data = default;
+    ray_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) ray_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    ray_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) ray_data = default;
+    #endif
+    
+    operator matrix<T, Cols, Rows>() const ray_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const ray_data
+    {
+        return columns[idx];
+    }
+    ray_data vec<T, Rows>& operator[](size_t idx) ray_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+    #ifdef __HAVE_MESH__
+    spvStorageMatrix() object_data = default;
+    object_data spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) object_data
+    {
+        size_t i;
+        thread vec<T, Rows>* col;
+        for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)
+            columns[i] = *col;
+        return *this;
+    }
+    
+    spvStorageMatrix(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const thread spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const thread matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const thread spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const constant spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const constant matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const constant spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const device spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const device matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const device spvStorageMatrix& m) object_data = default;
+    
+    spvStorageMatrix(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup spvStorageMatrix& m) object_data = default;
+    
+    #ifdef __HAVE_IMAGEBLOCKS__
+    spvStorageMatrix(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const threadgroup_imageblock spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    #ifdef __HAVE_RAYTRACING__
+    spvStorageMatrix(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const ray_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const ray_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const ray_data spvStorageMatrix& m) object_data = default;
+    #endif
+    
+    spvStorageMatrix(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+    }
+    spvStorageMatrix(const object_data spvStorageMatrix& m) object_data = default;
+    object_data spvStorageMatrix& operator=(const object_data matrix<T, Cols, Rows>& m) object_data
+    {
+        for (size_t i = 0; i < Cols; ++i)
+            columns[i] = m.columns[i];
+        return *this;
+    }
+    object_data spvStorageMatrix& operator=(const object_data spvStorageMatrix& m) object_data = default;
+    
+    operator matrix<T, Cols, Rows>() const object_data
+    {
+        matrix<T, Cols, Rows> m;
+        for (int i = 0; i < Cols; ++i)
+            m.columns[i] = columns[i];
+        return m;
+    }
+    
+    vec<T, Rows> operator[](size_t idx) const object_data
+    {
+        return columns[idx];
+    }
+    object_data vec<T, Rows>& operator[](size_t idx) object_data
+    {
+        return columns[idx];
+    }
+    #endif
+    
+};
+
+template<typename T, int Cols, int Rows>
+matrix<T, Rows, Cols> transpose(spvStorageMatrix<T, Cols, Rows> m)
+{
+    return transpose(matrix<T, Cols, Rows>(m));
+}
+
+typedef spvStorageMatrix<half, 2, 2> spvStorage_half2x2;
+typedef spvStorageMatrix<half, 2, 3> spvStorage_half2x3;
+typedef spvStorageMatrix<half, 2, 4> spvStorage_half2x4;
+typedef spvStorageMatrix<half, 3, 2> spvStorage_half3x2;
+typedef spvStorageMatrix<half, 3, 3> spvStorage_half3x3;
+typedef spvStorageMatrix<half, 3, 4> spvStorage_half3x4;
+typedef spvStorageMatrix<half, 4, 2> spvStorage_half4x2;
+typedef spvStorageMatrix<half, 4, 3> spvStorage_half4x3;
+typedef spvStorageMatrix<half, 4, 4> spvStorage_half4x4;
+typedef spvStorageMatrix<float, 2, 2> spvStorage_float2x2;
+typedef spvStorageMatrix<float, 2, 3> spvStorage_float2x3;
+typedef spvStorageMatrix<float, 2, 4> spvStorage_float2x4;
+typedef spvStorageMatrix<float, 3, 2> spvStorage_float3x2;
+typedef spvStorageMatrix<float, 3, 3> spvStorage_float3x3;
+typedef spvStorageMatrix<float, 3, 4> spvStorage_float3x4;
+typedef spvStorageMatrix<float, 4, 2> spvStorage_float4x2;
+typedef spvStorageMatrix<float, 4, 3> spvStorage_float4x3;
+typedef spvStorageMatrix<float, 4, 4> spvStorage_float4x4;
+
+struct S1
+{
+    uint a;
+    float4 b;
+};
+
+struct sA
+{
+    spvStorage_float4x4 mA;
+    short3 mB;
+    short4 mC;
+};
+
+struct sB
+{
+    short2 mA;
+};
+
+struct sC
+{
+    float mA;
+    uint4 mB;
+    float mC;
+};
+
+struct sD
+{
+    sA mA;
+    sB mB;
+    sC mC;
+};
+
+struct sE
+{
+    sD mA;
+};
+
+struct sF
+{
+    uint3 mA;
+    short mB;
+};
+
+struct sG
+{
+    sF mA;
+    spvStorage_float3x2 mB;
+};
+
+struct sH
+{
+    sG mA;
+    float2 mB;
+};
+
+struct sI
+{
+    spvStorage_float2x2 mA;
+    short3 mB;
+    short4 mC;
+};
+
+struct sJ
+{
+    sI mA;
+    short3 mB;
+};
+
+struct sK
+{
+    short2 mA;
+    sJ mB;
+    int2 mC;
+};
+
+struct S2
+{
+    sE a;
+    int3 b;
+    sH c;
+    sK d;
+};
+
+struct block
+{
+    uint passed;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+bool compare_uint(thread const uint& a, thread const uint& b)
+{
+    return a == b;
+}
+
+static inline __attribute__((always_inline))
+bool compare_float(thread const float& a, thread const float& b)
+{
+    return abs(a - b) < 0.0500000007450580596923828125;
+}
+
+static inline __attribute__((always_inline))
+bool compare_vec4(thread const float4& a, thread const float4& b)
+{
+    float param = a.x;
+    float param_1 = b.x;
+    bool _147 = compare_float(param, param_1);
+    bool _157;
+    if (_147)
+    {
+        float param_2 = a.y;
+        float param_3 = b.y;
+        _157 = compare_float(param_2, param_3);
+    }
+    else
+    {
+        _157 = _147;
+    }
+    bool _168;
+    if (_157)
+    {
+        float param_4 = a.z;
+        float param_5 = b.z;
+        _168 = compare_float(param_4, param_5);
+    }
+    else
+    {
+        _168 = _157;
+    }
+    bool _179;
+    if (_168)
+    {
+        float param_6 = a.w;
+        float param_7 = b.w;
+        _179 = compare_float(param_6, param_7);
+    }
+    else
+    {
+        _179 = _168;
+    }
+    return _179;
+}
+
+static inline __attribute__((always_inline))
+bool compare_mat4(thread const float4x4& a, thread const float4x4& b)
+{
+    float4 param = a[0];
+    float4 param_1 = b[0];
+    bool _239 = compare_vec4(param, param_1);
+    bool _249;
+    if (_239)
+    {
+        float4 param_2 = a[1];
+        float4 param_3 = b[1];
+        _249 = compare_vec4(param_2, param_3);
+    }
+    else
+    {
+        _249 = _239;
+    }
+    bool _259;
+    if (_249)
+    {
+        float4 param_4 = a[2];
+        float4 param_5 = b[2];
+        _259 = compare_vec4(param_4, param_5);
+    }
+    else
+    {
+        _259 = _249;
+    }
+    bool _270;
+    if (_259)
+    {
+        float4 param_6 = a[3];
+        float4 param_7 = b[3];
+        _270 = compare_vec4(param_6, param_7);
+    }
+    else
+    {
+        _270 = _259;
+    }
+    return _270;
+}
+
+static inline __attribute__((always_inline))
+bool compare_bvec3(thread const bool3& a, thread const bool3& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_bvec4(thread const bool4& a, thread const bool4& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_bvec2(thread const bool2& a, thread const bool2& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_uvec4(thread const uint4& a, thread const uint4& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_ivec3(thread const int3& a, thread const int3& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_uvec3(thread const uint3& a, thread const uint3& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_bool(thread const bool& a, thread const bool& b)
+{
+    return a == b;
+}
+
+static inline __attribute__((always_inline))
+bool compare_vec2(thread const float2& a, thread const float2& b)
+{
+    float param = a.x;
+    float param_1 = b.x;
+    bool _127 = compare_float(param, param_1);
+    bool _138;
+    if (_127)
+    {
+        float param_2 = a.y;
+        float param_3 = b.y;
+        _138 = compare_float(param_2, param_3);
+    }
+    else
+    {
+        _138 = _127;
+    }
+    return _138;
+}
+
+static inline __attribute__((always_inline))
+bool compare_mat3x2(thread const float3x2& a, thread const float3x2& b)
+{
+    float2 param = a[0];
+    float2 param_1 = b[0];
+    bool _209 = compare_vec2(param, param_1);
+    bool _219;
+    if (_209)
+    {
+        float2 param_2 = a[1];
+        float2 param_3 = b[1];
+        _219 = compare_vec2(param_2, param_3);
+    }
+    else
+    {
+        _219 = _209;
+    }
+    bool _230;
+    if (_219)
+    {
+        float2 param_4 = a[2];
+        float2 param_5 = b[2];
+        _230 = compare_vec2(param_4, param_5);
+    }
+    else
+    {
+        _230 = _219;
+    }
+    return _230;
+}
+
+static inline __attribute__((always_inline))
+bool compare_mat2(thread const float2x2& a, thread const float2x2& b)
+{
+    float2 param = a[0];
+    float2 param_1 = b[0];
+    bool _189 = compare_vec2(param, param_1);
+    bool _200;
+    if (_189)
+    {
+        float2 param_2 = a[1];
+        float2 param_3 = b[1];
+        _200 = compare_vec2(param_2, param_3);
+    }
+    else
+    {
+        _200 = _189;
+    }
+    return _200;
+}
+
+static inline __attribute__((always_inline))
+bool compare_ivec2(thread const int2& a, thread const int2& b)
+{
+    return all(a == b);
+}
+
+kernel void main0(device block& _612 [[buffer(0)]])
+{
+    threadgroup S1 s1;
+    threadgroup S2 s2;
+    s1.a = 0u;
+    s1.b = float4(8.0, 8.0, 0.0, -4.0);
+    s2.a.mA.mA.mA = spvStorage_float4x4(float4x4(float4(-5.0, 9.0, -4.0, -6.0), float4(-1.0, -1.0, -2.0, 1.0), float4(6.0, 5.0, 7.0, -2.0), float4(-4.0, -9.0, 8.0, 3.0)));
+    s2.a.mA.mA.mB = short3(bool3(true, false, false));
+    s2.a.mA.mA.mC = short4(bool4(true, true, true, false));
+    s2.a.mA.mB.mA = short2(bool2(true));
+    s2.a.mA.mC.mA = 7.0;
+    s2.a.mA.mC.mB = uint4(8u, 6u, 2u, 0u);
+    s2.a.mA.mC.mC = -9.0;
+    s2.b = int3(1, -4, 0);
+    s2.c.mA.mA.mA = uint3(4u, 9u, 1u);
+    s2.c.mA.mA.mB = short(false);
+    s2.c.mA.mB = spvStorage_float3x2(float3x2(float2(3.0, -5.0), float2(-1.0, -5.0), float2(-1.0, -9.0)));
+    s2.c.mB = float2(-6.0, -9.0);
+    s2.d.mA = short2(bool2(true, false));
+    s2.d.mB.mA.mA = spvStorage_float2x2(float2x2(float2(-2.0, 3.0), float2(7.0, 2.0)));
+    s2.d.mB.mA.mB = short3(bool3(false));
+    s2.d.mB.mA.mC = short4(bool4(false, false, false, true));
+    s2.d.mB.mB = short3(bool3(true, false, false));
+    s2.d.mC = int2(-9, 0);
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    bool allOk = true;
+    bool _435;
+    if (allOk)
+    {
+        uint param = 0u;
+        uint param_1 = s1.a;
+        _435 = compare_uint(param, param_1);
+    }
+    else
+    {
+        _435 = allOk;
+    }
+    allOk = _435;
+    bool _444;
+    if (allOk)
+    {
+        float4 param_2 = float4(8.0, 8.0, 0.0, -4.0);
+        float4 param_3 = s1.b;
+        _444 = compare_vec4(param_2, param_3);
+    }
+    else
+    {
+        _444 = allOk;
+    }
+    allOk = _444;
+    bool _453;
+    if (allOk)
+    {
+        float4x4 param_4 = float4x4(float4(-5.0, 9.0, -4.0, -6.0), float4(-1.0, -1.0, -2.0, 1.0), float4(6.0, 5.0, 7.0, -2.0), float4(-4.0, -9.0, 8.0, 3.0));
+        float4x4 param_5 = float4x4(s2.a.mA.mA.mA);
+        _453 = compare_mat4(param_4, param_5);
+    }
+    else
+    {
+        _453 = allOk;
+    }
+    allOk = _453;
+    bool _462;
+    if (allOk)
+    {
+        bool3 param_6 = bool3(true, false, false);
+        bool3 param_7 = bool3(s2.a.mA.mA.mB);
+        _462 = compare_bvec3(param_6, param_7);
+    }
+    else
+    {
+        _462 = allOk;
+    }
+    allOk = _462;
+    bool _471;
+    if (allOk)
+    {
+        bool4 param_8 = bool4(true, true, true, false);
+        bool4 param_9 = bool4(s2.a.mA.mA.mC);
+        _471 = compare_bvec4(param_8, param_9);
+    }
+    else
+    {
+        _471 = allOk;
+    }
+    allOk = _471;
+    bool _480;
+    if (allOk)
+    {
+        bool2 param_10 = bool2(true);
+        bool2 param_11 = bool2(s2.a.mA.mB.mA);
+        _480 = compare_bvec2(param_10, param_11);
+    }
+    else
+    {
+        _480 = allOk;
+    }
+    allOk = _480;
+    bool _489;
+    if (allOk)
+    {
+        float param_12 = 7.0;
+        float param_13 = s2.a.mA.mC.mA;
+        _489 = compare_float(param_12, param_13);
+    }
+    else
+    {
+        _489 = allOk;
+    }
+    allOk = _489;
+    bool _498;
+    if (allOk)
+    {
+        uint4 param_14 = uint4(8u, 6u, 2u, 0u);
+        uint4 param_15 = s2.a.mA.mC.mB;
+        _498 = compare_uvec4(param_14, param_15);
+    }
+    else
+    {
+        _498 = allOk;
+    }
+    allOk = _498;
+    bool _507;
+    if (allOk)
+    {
+        float param_16 = -9.0;
+        float param_17 = s2.a.mA.mC.mC;
+        _507 = compare_float(param_16, param_17);
+    }
+    else
+    {
+        _507 = allOk;
+    }
+    allOk = _507;
+    bool _516;
+    if (allOk)
+    {
+        int3 param_18 = int3(1, -4, 0);
+        int3 param_19 = s2.b;
+        _516 = compare_ivec3(param_18, param_19);
+    }
+    else
+    {
+        _516 = allOk;
+    }
+    allOk = _516;
+    bool _525;
+    if (allOk)
+    {
+        uint3 param_20 = uint3(4u, 9u, 1u);
+        uint3 param_21 = s2.c.mA.mA.mA;
+        _525 = compare_uvec3(param_20, param_21);
+    }
+    else
+    {
+        _525 = allOk;
+    }
+    allOk = _525;
+    bool _534;
+    if (allOk)
+    {
+        bool param_22 = false;
+        bool param_23 = bool(s2.c.mA.mA.mB);
+        _534 = compare_bool(param_22, param_23);
+    }
+    else
+    {
+        _534 = allOk;
+    }
+    allOk = _534;
+    bool _543;
+    if (allOk)
+    {
+        float3x2 param_24 = float3x2(float2(3.0, -5.0), float2(-1.0, -5.0), float2(-1.0, -9.0));
+        float3x2 param_25 = float3x2(s2.c.mA.mB);
+        _543 = compare_mat3x2(param_24, param_25);
+    }
+    else
+    {
+        _543 = allOk;
+    }
+    allOk = _543;
+    bool _552;
+    if (allOk)
+    {
+        float2 param_26 = float2(-6.0, -9.0);
+        float2 param_27 = s2.c.mB;
+        _552 = compare_vec2(param_26, param_27);
+    }
+    else
+    {
+        _552 = allOk;
+    }
+    allOk = _552;
+    bool _561;
+    if (allOk)
+    {
+        bool2 param_28 = bool2(true, false);
+        bool2 param_29 = bool2(s2.d.mA);
+        _561 = compare_bvec2(param_28, param_29);
+    }
+    else
+    {
+        _561 = allOk;
+    }
+    allOk = _561;
+    bool _570;
+    if (allOk)
+    {
+        float2x2 param_30 = float2x2(float2(-2.0, 3.0), float2(7.0, 2.0));
+        float2x2 param_31 = float2x2(s2.d.mB.mA.mA);
+        _570 = compare_mat2(param_30, param_31);
+    }
+    else
+    {
+        _570 = allOk;
+    }
+    allOk = _570;
+    bool _579;
+    if (allOk)
+    {
+        bool3 param_32 = bool3(false);
+        bool3 param_33 = bool3(s2.d.mB.mA.mB);
+        _579 = compare_bvec3(param_32, param_33);
+    }
+    else
+    {
+        _579 = allOk;
+    }
+    allOk = _579;
+    bool _588;
+    if (allOk)
+    {
+        bool4 param_34 = bool4(false, false, false, true);
+        bool4 param_35 = bool4(s2.d.mB.mA.mC);
+        _588 = compare_bvec4(param_34, param_35);
+    }
+    else
+    {
+        _588 = allOk;
+    }
+    allOk = _588;
+    bool _597;
+    if (allOk)
+    {
+        bool3 param_36 = bool3(true, false, false);
+        bool3 param_37 = bool3(s2.d.mB.mB);
+        _597 = compare_bvec3(param_36, param_37);
+    }
+    else
+    {
+        _597 = allOk;
+    }
+    allOk = _597;
+    bool _606;
+    if (allOk)
+    {
+        int2 param_38 = int2(-9, 0);
+        int2 param_39 = s2.d.mC;
+        _606 = compare_ivec2(param_38, param_39);
+    }
+    else
+    {
+        _606 = allOk;
+    }
+    allOk = _606;
+    if (allOk)
+    {
+        _612.passed++;
+    }
+}
+
diff --git a/reference/shaders-msl/comp/shared-struct-bool-cast.comp b/reference/shaders-msl/comp/shared-struct-bool-cast.comp
new file mode 100644
index 00000000000..806cb0a86cb
--- /dev/null
+++ b/reference/shaders-msl/comp/shared-struct-bool-cast.comp
@@ -0,0 +1,110 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct S1
+{
+    int3 a;
+    uint2 b;
+    short4 c;
+    uint d;
+};
+
+struct block
+{
+    uint passed;
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
+bool compare_ivec3(thread const int3& a, thread const int3& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_uvec2(thread const uint2& a, thread const uint2& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_bvec4(thread const bool4& a, thread const bool4& b)
+{
+    return all(a == b);
+}
+
+static inline __attribute__((always_inline))
+bool compare_uint(thread const uint& a, thread const uint& b)
+{
+    return a == b;
+}
+
+kernel void main0(device block& _132 [[buffer(0)]])
+{
+    threadgroup S1 s1;
+    s1.a = int3(6, 8, 8);
+    s1.b = uint2(4u);
+    s1.c = short4(bool4(false, false, false, true));
+    s1.d = 6u;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
+    bool allOk = true;
+    bool _99;
+    if (allOk)
+    {
+        int3 param = int3(6, 8, 8);
+        int3 param_1 = s1.a;
+        _99 = compare_ivec3(param, param_1);
+    }
+    else
+    {
+        _99 = allOk;
+    }
+    allOk = _99;
+    bool _108;
+    if (allOk)
+    {
+        uint2 param_2 = uint2(4u);
+        uint2 param_3 = s1.b;
+        _108 = compare_uvec2(param_2, param_3);
+    }
+    else
+    {
+        _108 = allOk;
+    }
+    allOk = _108;
+    bool _117;
+    if (allOk)
+    {
+        bool4 param_4 = bool4(false, false, false, true);
+        bool4 param_5 = bool4(s1.c);
+        _117 = compare_bvec4(param_4, param_5);
+    }
+    else
+    {
+        _117 = allOk;
+    }
+    allOk = _117;
+    bool _126;
+    if (allOk)
+    {
+        uint param_6 = 6u;
+        uint param_7 = s1.d;
+        _126 = compare_uint(param_6, param_7);
+    }
+    else
+    {
+        _126 = allOk;
+    }
+    allOk = _126;
+    if (allOk)
+    {
+        _132.passed++;
+    }
+}
+
diff --git a/reference/shaders-msl/comp/spec-constant-op-member-array.comp b/reference/shaders-msl/comp/spec-constant-op-member-array.comp
index d3c8b7dc4a3..8f54f0528dc 100644
--- a/reference/shaders-msl/comp/spec-constant-op-member-array.comp
+++ b/reference/shaders-msl/comp/spec-constant-op-member-array.comp
@@ -40,6 +40,7 @@ struct SSBO
 
 constant int e_tmp [[function_constant(3)]];
 constant int e = is_function_constant_defined(e_tmp) ? e_tmp : 400;
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
 
 kernel void main0(device SSBO& _22 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
diff --git a/reference/shaders-msl/comp/spec-constant-work-group-size.comp b/reference/shaders-msl/comp/spec-constant-work-group-size.comp
index bb796ab95d7..de30edec155 100644
--- a/reference/shaders-msl/comp/spec-constant-work-group-size.comp
+++ b/reference/shaders-msl/comp/spec-constant-work-group-size.comp
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 #ifndef SPIRV_CROSS_CONSTANT_ID_1
 #define SPIRV_CROSS_CONSTANT_ID_1 2
 #endif
@@ -27,7 +68,7 @@ constant int _32 = (1 - a);
 
 kernel void main0(device SSBO& _17 [[buffer(0)]])
 {
-    int spec_const_array_size[b];
+    spvUnsafeArray<int, b> spec_const_array_size;
     spec_const_array_size[a] = a;
     _17.v[_30] = b + spec_const_array_size[_32];
 }
diff --git a/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp b/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp
index 6988febfacc..5593629c064 100644
--- a/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp
+++ b/reference/shaders-msl/comp/storage-buffer-std140-vector-array.comp
@@ -1,14 +1,55 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct Sub
 {
-    float f[2];
-    float2 f2[2];
-    float3 f3[2];
-    float4 f4[2];
+    spvUnsafeArray<float, 2> f;
+    spvUnsafeArray<float2, 2> f2;
+    spvUnsafeArray<float3, 2> f3;
+    spvUnsafeArray<float4, 2> f4;
 };
 
 struct Sub_1
@@ -24,6 +65,8 @@ struct SSBO
     Sub_1 sub[2];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     Sub foo;
@@ -39,15 +82,15 @@ kernel void main0(device SSBO& _27 [[buffer(0)]], uint3 gl_WorkGroupID [[threadg
     foo.f2[gl_GlobalInvocationID.x] += float2(2.0);
     foo.f3[gl_GlobalInvocationID.x] += float3(3.0);
     foo.f4[gl_GlobalInvocationID.x] += float4(4.0);
-    _27.sub[gl_WorkGroupID.x].f[0].x = foo.f[0];
-    _27.sub[gl_WorkGroupID.x].f[1].x = foo.f[1];
-    _27.sub[gl_WorkGroupID.x].f2[0].xy = foo.f2[0];
-    _27.sub[gl_WorkGroupID.x].f2[1].xy = foo.f2[1];
+    (device float&)_27.sub[gl_WorkGroupID.x].f[0] = foo.f[0];
+    (device float&)_27.sub[gl_WorkGroupID.x].f[1] = foo.f[1];
+    (device float2&)_27.sub[gl_WorkGroupID.x].f2[0] = foo.f2[0];
+    (device float2&)_27.sub[gl_WorkGroupID.x].f2[1] = foo.f2[1];
     _27.sub[gl_WorkGroupID.x].f3[0] = foo.f3[0];
     _27.sub[gl_WorkGroupID.x].f3[1] = foo.f3[1];
     _27.sub[gl_WorkGroupID.x].f4[0] = foo.f4[0];
     _27.sub[gl_WorkGroupID.x].f4[1] = foo.f4[1];
-    _27.sub[0].f[0].x += 5.0;
-    _27.sub[0].f2[1].xy += float2(5.0);
+    (device float&)_27.sub[0].f[0] = _27.sub[0].f[0].x + 5.0;
+    (device float2&)_27.sub[0].f2[1] = _27.sub[0].f2[1].xy + float2(5.0);
 }
 
diff --git a/reference/shaders-msl/comp/struct-layout.comp b/reference/shaders-msl/comp/struct-layout.comp
index b6ee59f1693..8f2ab2d60ca 100644
--- a/reference/shaders-msl/comp/struct-layout.comp
+++ b/reference/shaders-msl/comp/struct-layout.comp
@@ -18,6 +18,8 @@ struct SSBO
     Foo in_data[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO2& _23 [[buffer(0)]], const device SSBO& _30 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
diff --git a/reference/shaders-msl/comp/struct-nested.comp b/reference/shaders-msl/comp/struct-nested.comp
index 86229e3417f..e3d04bedaa6 100644
--- a/reference/shaders-msl/comp/struct-nested.comp
+++ b/reference/shaders-msl/comp/struct-nested.comp
@@ -28,6 +28,8 @@ struct dstbuffer
     s2_1 test[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device dstbuffer& _19 [[buffer(0)]])
 {
     s2 testVal;
diff --git a/reference/shaders-msl/comp/struct-packing.comp b/reference/shaders-msl/comp/struct-packing.comp
index 35cf1b22cb4..dc1654399d3 100644
--- a/reference/shaders-msl/comp/struct-packing.comp
+++ b/reference/shaders-msl/comp/struct-packing.comp
@@ -3,12 +3,11 @@
 
 using namespace metal;
 
-typedef packed_float2 packed_rm_float2x3[3];
-
 struct S0
 {
     float2 a[1];
     float b;
+    char _m0_final_padding[4];
 };
 
 struct S1
@@ -21,6 +20,7 @@ struct S2
 {
     float3 a[1];
     float b;
+    char _m0_final_padding[12];
 };
 
 struct S3
@@ -45,6 +45,7 @@ struct Content
     S3 m3;
     float m4;
     S4 m3s[8];
+    char _m0_final_padding[8];
 };
 
 struct SSBO1
@@ -58,17 +59,17 @@ struct SSBO1
     float3x2 m3;
     float2x2 m4;
     float2x2 m5[9];
-    packed_rm_float2x3 m6[4][2];
-    char _m10_pad[8];
-    float3x2 m7;
-    char _m11_pad[8];
+    float3x2 m6[4][2];
+    float2x3 m7;
     float array[1];
 };
 
 struct S0_1
 {
-    float4 a[1];
+    float2 a[1];
+    char _m1_pad[8];
     float b;
+    char _m0_final_padding[12];
 };
 
 struct S1_1
@@ -81,6 +82,7 @@ struct S2_1
 {
     float3 a[1];
     float b;
+    char _m0_final_padding[12];
 };
 
 struct S3_1
@@ -92,6 +94,7 @@ struct S3_1
 struct S4_1
 {
     float2 c;
+    char _m0_final_padding[8];
 };
 
 struct Content_1
@@ -104,8 +107,8 @@ struct Content_1
     S2_1 m2;
     S3_1 m3;
     float m4;
-    char _m8_pad[12];
-    /* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8];
+    char _m8_pad[8];
+    S4_1 m3s[8];
 };
 
 struct SSBO0
@@ -116,16 +119,18 @@ struct SSBO0
     float4 array[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]])
 {
     Content_1 _60 = ssbo_140.content;
-    ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0].xy;
+    ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0];
     ssbo_430.content.m0s[0].b = _60.m0s[0].b;
     ssbo_430.content.m1s[0].a = float3(_60.m1s[0].a);
     ssbo_430.content.m1s[0].b = _60.m1s[0].b;
     ssbo_430.content.m2s[0].a[0] = _60.m2s[0].a[0];
     ssbo_430.content.m2s[0].b = _60.m2s[0].b;
-    ssbo_430.content.m0.a[0] = _60.m0.a[0].xy;
+    ssbo_430.content.m0.a[0] = _60.m0.a[0];
     ssbo_430.content.m0.b = _60.m0.b;
     ssbo_430.content.m1.a = float3(_60.m1.a);
     ssbo_430.content.m1.b = _60.m1.b;
@@ -142,6 +147,6 @@ kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [
     ssbo_430.content.m3s[5].c = _60.m3s[5].c;
     ssbo_430.content.m3s[6].c = _60.m3s[6].c;
     ssbo_430.content.m3s[7].c = _60.m3s[7].c;
-    ssbo_430.content.m1.a = ssbo_430.content.m3.a * float3x2(float2(ssbo_430.m6[1][1][0]), float2(ssbo_430.m6[1][1][1]), float2(ssbo_430.m6[1][1][2]));
+    ssbo_430.content.m1.a = ssbo_430.content.m3.a * ssbo_430.m6[1][1];
 }
 
diff --git a/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp b/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp
new file mode 100644
index 00000000000..754f7357d4f
--- /dev/null
+++ b/reference/shaders-msl/comp/threadgroup-boolean-workaround.comp
@@ -0,0 +1,28 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float4 values[1];
+};
+
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(4u, 1u, 1u);
+
+static inline __attribute__((always_inline))
+void in_function(threadgroup short4 (&foo)[4], thread uint& gl_LocalInvocationIndex, device SSBO& v_23, thread uint3& gl_GlobalInvocationID)
+{
+    foo[gl_LocalInvocationIndex] = short4(v_23.values[gl_GlobalInvocationID.x] != float4(10.0));
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    v_23.values[gl_GlobalInvocationID.x] = select(float4(40.0), float4(30.0), bool4(foo[gl_LocalInvocationIndex ^ 3u]));
+}
+
+kernel void main0(device SSBO& v_23 [[buffer(0)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    threadgroup short4 foo[4];
+    in_function(foo, gl_LocalInvocationIndex, v_23, gl_GlobalInvocationID);
+}
+
diff --git a/reference/shaders-msl/comp/torture-loop.comp b/reference/shaders-msl/comp/torture-loop.comp
index 1b65a3afaba..e92e71d61dd 100644
--- a/reference/shaders-msl/comp/torture-loop.comp
+++ b/reference/shaders-msl/comp/torture-loop.comp
@@ -14,6 +14,8 @@ struct SSBO2
     float4 out_data[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(const device SSBO& _24 [[buffer(0)]], device SSBO2& _89 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     uint ident = gl_GlobalInvocationID.x;
diff --git a/reference/shaders-msl/comp/type-alias.comp b/reference/shaders-msl/comp/type-alias.comp
index 25a49f59f0f..e3ac031668f 100644
--- a/reference/shaders-msl/comp/type-alias.comp
+++ b/reference/shaders-msl/comp/type-alias.comp
@@ -40,11 +40,15 @@ struct SSBO2
     float4 outputs[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+static inline __attribute__((always_inline))
 float4 overload(thread const S0& s0)
 {
     return s0.a;
 }
 
+static inline __attribute__((always_inline))
 float4 overload(thread const S1& s1)
 {
     return s1.a;
diff --git a/reference/shaders-msl/comp/type_casting_i64.msl22.comp b/reference/shaders-msl/comp/type_casting_i64.msl22.comp
new file mode 100644
index 00000000000..6820b077a1a
--- /dev/null
+++ b/reference/shaders-msl/comp/type_casting_i64.msl22.comp
@@ -0,0 +1,27 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct dst_buff_t
+{
+    int m0[1];
+};
+
+struct src_buff_t
+{
+    int m0[1];
+};
+
+constant int base_val_tmp [[function_constant(0)]];
+constant int base_val = is_function_constant_defined(base_val_tmp) ? base_val_tmp : 0;
+constant long shift_val_tmp [[function_constant(1)]];
+constant long shift_val = is_function_constant_defined(shift_val_tmp) ? shift_val_tmp : 0l;
+constant int offset = (base_val >> int(shift_val));
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
+kernel void main0(device dst_buff_t& dst_buff [[buffer(0)]], device src_buff_t& src_buff [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
+{
+    dst_buff.m0[gl_GlobalInvocationID.x] = src_buff.m0[gl_GlobalInvocationID.x] + offset;
+}
+
diff --git a/reference/shaders-msl/comp/udiv.comp b/reference/shaders-msl/comp/udiv.comp
index 32874ad7879..7f7315b882a 100644
--- a/reference/shaders-msl/comp/udiv.comp
+++ b/reference/shaders-msl/comp/udiv.comp
@@ -13,6 +13,8 @@ struct SSBO
     uint inputs[1];
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBO2& _10 [[buffer(0)]], device SSBO& _23 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
     _10.outputs[gl_GlobalInvocationID.x] = _23.inputs[gl_GlobalInvocationID.x] / 29u;
diff --git a/reference/shaders-msl/comp/writable-ssbo.comp b/reference/shaders-msl/comp/writable-ssbo.comp
index 9dc53b6dd5d..310cda7fef9 100644
--- a/reference/shaders-msl/comp/writable-ssbo.comp
+++ b/reference/shaders-msl/comp/writable-ssbo.comp
@@ -5,19 +5,19 @@
 
 using namespace metal;
 
-struct myBlock
-{
-    int a;
-    float b;
-};
-
 // Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
 template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
+inline Tx mod(Tx x, Ty y)
 {
     return x - y * floor(x / y);
 }
 
+struct myBlock
+{
+    int a;
+    float b;
+};
+
 kernel void main0(device myBlock& myStorage [[buffer(0)]])
 {
     myStorage.a = (myStorage.a + 1) % 256;
diff --git a/reference/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp b/reference/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp
index a37fe519a55..cea12980c67 100644
--- a/reference/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp
+++ b/reference/shaders-msl/desktop-only/comp/extended-arithmetic.desktop.comp
@@ -91,6 +91,8 @@ struct ResType_7
     int4 _m1;
 };
 
+constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
+
 kernel void main0(device SSBOUint& u [[buffer(0)]], device SSBOInt& i [[buffer(1)]])
 {
     ResType _25;
diff --git a/reference/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc b/reference/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc
index a5e30b6de1a..01fceeb6c7b 100644
--- a/reference/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc
+++ b/reference/shaders-msl/desktop-only/tesc/arrayed-output.desktop.sso.tesc
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float3 vVertex;
@@ -10,7 +51,7 @@ struct main0_out
 
 struct main0_patchOut
 {
-    float3 vPatch[2];
+    spvUnsafeArray<float3, 2> vPatch;
 };
 
 struct main0_in
@@ -28,7 +69,7 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
     if (gl_InvocationID >= 4)
         return;
     gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput + gl_in[gl_InvocationID ^ 1].vInput;
-    threadgroup_barrier(mem_flags::mem_device);
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
     if (gl_InvocationID == 0)
     {
         patchOut.vPatch[0] = float3(10.0);
diff --git a/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc b/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc
new file mode 100644
index 00000000000..6aca0157833
--- /dev/null
+++ b/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc
@@ -0,0 +1,47 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float3 vFoo;
+};
+
+struct main0_in
+{
+    uint3 m_78;
+    ushort2 m_82;
+    float4 gl_Position;
+};
+
+static inline __attribute__((always_inline))
+void set_position(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device main0_in* thread & gl_in)
+{
+    gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 1];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 1];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 1;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625);
+    patchOut.vFoo = float3(1.0);
+    set_position(gl_out, gl_InvocationID, gl_in);
+}
+
diff --git a/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc b/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc
index 9acece62c61..054b4e74188 100644
--- a/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc
+++ b/reference/shaders-msl/desktop-only/tesc/basic.desktop.sso.tesc
@@ -20,6 +20,7 @@ struct main0_in
     float4 gl_Position [[attribute(0)]];
 };
 
+static inline __attribute__((always_inline))
 void set_position(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup main0_in* thread & gl_in)
 {
     gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position;
diff --git a/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc b/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc
new file mode 100644
index 00000000000..184a4a6f9b3
--- /dev/null
+++ b/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc
@@ -0,0 +1,36 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Boo
+{
+    float3 a;
+    uint3 b;
+};
+
+struct main0_out
+{
+    Boo vVertex;
+};
+
+struct main0_in
+{
+    Boo vInput;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].vVertex = gl_in[gl_InvocationID].vInput;
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(2.0);
+}
+
diff --git a/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc b/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc
index cd4d8d80e52..f5fd60a9f71 100644
--- a/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc
+++ b/reference/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.tesc
@@ -16,8 +16,8 @@ struct main0_out
 
 struct main0_in
 {
-    float3 Boo_a [[attribute(0)]];
-    float3 Boo_b [[attribute(1)]];
+    float3 vInput_a [[attribute(0)]];
+    float3 vInput_b [[attribute(1)]];
 };
 
 kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
@@ -28,10 +28,8 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_
     threadgroup_barrier(mem_flags::mem_threadgroup);
     if (gl_InvocationID >= 4)
         return;
-    Boo vInput_24;
-    vInput_24.a = gl_in[gl_InvocationID].Boo_a;
-    vInput_24.b = gl_in[gl_InvocationID].Boo_b;
-    gl_out[gl_InvocationID].vVertex = vInput_24;
+    Boo _25 = Boo{ gl_in[gl_InvocationID].vInput_a, gl_in[gl_InvocationID].vInput_b };
+    gl_out[gl_InvocationID].vVertex = _25;
     spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
     spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(2.0);
     spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.0);
diff --git a/reference/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert b/reference/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert
new file mode 100644
index 00000000000..a414c98542c
--- /dev/null
+++ b/reference/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [2];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    out.gl_Position = float4(10.0);
+    out.gl_ClipDistance[0] = 1.0;
+    out.gl_ClipDistance[1] = 4.0;
+    return out;
+}
+
diff --git a/reference/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert b/reference/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert
index a414c98542c..2d98929051b 100644
--- a/reference/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert
+++ b/reference/shaders-msl/desktop-only/vert/clip-cull-distance.desktop.vert
@@ -7,6 +7,8 @@ struct main0_out
 {
     float4 gl_Position [[position]];
     float gl_ClipDistance [[clip_distance]] [2];
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
 };
 
 vertex main0_out main0()
@@ -15,6 +17,8 @@ vertex main0_out main0()
     out.gl_Position = float4(10.0);
     out.gl_ClipDistance[0] = 1.0;
     out.gl_ClipDistance[1] = 4.0;
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
     return out;
 }
 
diff --git a/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert b/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert
new file mode 100644
index 00000000000..b3c8b6bb278
--- /dev/null
+++ b/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], uint3 spvDispatchBase [[grid_origin]], device main0_out* spvOut [[buffer(28)]])
+{
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    uint gl_BaseVertex = spvDispatchBase.x;
+    uint gl_BaseInstance = spvDispatchBase.y;
+    out.gl_Position = float4(float(int(gl_BaseVertex)), float(int(gl_BaseInstance)), 0.0, 1.0);
+}
+
diff --git a/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert b/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert
index 1d203ba98bc..a32c1948f88 100644
--- a/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert
+++ b/reference/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.vert
@@ -11,7 +11,7 @@ struct main0_out
 vertex main0_out main0(uint gl_BaseVertex [[base_vertex]], uint gl_BaseInstance [[base_instance]])
 {
     main0_out out = {};
-    out.gl_Position = float4(float(gl_BaseVertex), float(gl_BaseInstance), 0.0, 1.0);
+    out.gl_Position = float4(float(int(gl_BaseVertex)), float(int(gl_BaseInstance)), 0.0, 1.0);
     return out;
 }
 
diff --git a/reference/shaders-msl/flatten/rowmajor.flatten.vert b/reference/shaders-msl/flatten/rowmajor.flatten.vert
index b5df8b064f5..3ea30e65c92 100644
--- a/reference/shaders-msl/flatten/rowmajor.flatten.vert
+++ b/reference/shaders-msl/flatten/rowmajor.flatten.vert
@@ -1,5 +1,3 @@
-#pragma clang diagnostic ignored "-Wmissing-prototypes"
-
 #include <metal_stdlib>
 #include <simd/simd.h>
 
@@ -9,7 +7,7 @@ struct UBO
 {
     float4x4 uMVPR;
     float4x4 uMVPC;
-    float2x4 uMVP;
+    float4x4 uMVP;
 };
 
 struct main0_out
@@ -22,16 +20,10 @@ struct main0_in
     float4 aVertex [[attribute(0)]];
 };
 
-// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.
-float2x4 spvConvertFromRowMajor2x4(float2x4 m)
-{
-    return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], m[1][3]));
-}
-
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]])
 {
     main0_out out = {};
-    float2 v = in.aVertex * spvConvertFromRowMajor2x4(_18.uMVP);
+    float2 v = float4x2(_18.uMVP[0].xy, _18.uMVP[1].xy, _18.uMVP[2].xy, _18.uMVP[3].xy) * in.aVertex;
     out.gl_Position = (_18.uMVPR * in.aVertex) + (in.aVertex * _18.uMVPC);
     return out;
 }
diff --git a/reference/shaders-msl/flatten/struct.flatten.vert b/reference/shaders-msl/flatten/struct.flatten.vert
index 954f9255c49..f79a794cde3 100644
--- a/reference/shaders-msl/flatten/struct.flatten.vert
+++ b/reference/shaders-msl/flatten/struct.flatten.vert
@@ -34,7 +34,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]
     out.gl_Position = _18.uMVP * in.aVertex;
     out.vColor = float4(0.0);
     float3 L = in.aVertex.xyz - float3(_18.light.Position);
-    out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
+    out.vColor += ((_18.light.Color * fast::clamp(1.0 - (length(L) / _18.light.Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(L)));
     return out;
 }
 
diff --git a/reference/shaders-msl/frag/argument-buffers.msl2.argument.frag b/reference/shaders-msl/frag/argument-buffers.msl2.argument.frag
index 1cf97277105..fd0a3def314 100644
--- a/reference/shaders-msl/frag/argument-buffers.msl2.argument.frag
+++ b/reference/shaders-msl/frag/argument-buffers.msl2.argument.frag
@@ -62,7 +62,8 @@ struct main0_in
     float2 vUV [[user(locn0)]];
 };
 
-float4 sample_in_function2(thread texture2d<float> uTexture, thread const sampler uTextureSmplr, thread float2& vUV, thread const array<texture2d<float>, 4> uTexture2, thread const array<sampler, 2> uSampler, thread const array<texture2d<float>, 2> uTextures, thread const array<sampler, 2> uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers)
+static inline __attribute__((always_inline))
+float4 sample_in_function2(texture2d<float> uTexture, sampler uTextureSmplr, thread float2& vUV, constant array<texture2d<float>, 4>& uTexture2, constant array<sampler, 2>& uSampler, constant array<texture2d<float>, 2>& uTextures, constant array<sampler, 2>& uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers)
 {
     float4 ret = uTexture.sample(uTextureSmplr, vUV);
     ret += uTexture2[2].sample(uSampler[1], vUV);
@@ -73,7 +74,8 @@ float4 sample_in_function2(thread texture2d<float> uTexture, thread const sample
     return ret;
 }
 
-float4 sample_in_function(thread texture2d<float> uTexture, thread const sampler uTextureSmplr, thread float2& vUV, thread const array<texture2d<float>, 4> uTexture2, thread const array<sampler, 2> uSampler, thread const array<texture2d<float>, 2> uTextures, thread const array<sampler, 2> uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers, constant UBO& v_90, constant UBOs* constant (&ubos)[4])
+static inline __attribute__((always_inline))
+float4 sample_in_function(texture2d<float> uTexture, sampler uTextureSmplr, thread float2& vUV, constant array<texture2d<float>, 4>& uTexture2, constant array<sampler, 2>& uSampler, constant array<texture2d<float>, 2>& uTextures, constant array<sampler, 2>& uTexturesSmplr, device SSBO& v_60, const device SSBOs* constant (&ssbos)[2], constant Push& registers, constant UBO& v_90, constant UBOs* constant (&ubos)[4])
 {
     float4 ret = sample_in_function2(uTexture, uTextureSmplr, vUV, uTexture2, uSampler, uTextures, uTexturesSmplr, v_60, ssbos, registers);
     ret += v_90.ubo;
diff --git a/reference/shaders-msl/frag/array-component-io.frag b/reference/shaders-msl/frag/array-component-io.frag
new file mode 100644
index 00000000000..9b4c5b5204f
--- /dev/null
+++ b/reference/shaders-msl/frag/array-component-io.frag
@@ -0,0 +1,99 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 m_location_0 [[color(0)]];
+    float4 m_location_1 [[color(1)]];
+    float4 m_location_2 [[color(2)]];
+};
+
+struct main0_in
+{
+    float InC_0 [[user(locn0_1), flat]];
+    float InA_0 [[user(locn1), flat]];
+    float InC_1 [[user(locn1_1), flat]];
+    float2 InB_0 [[user(locn1_2), flat]];
+    float InA_1 [[user(locn2), flat]];
+    float InC_2 [[user(locn2_1), flat]];
+    float2 InB_1 [[user(locn2_2), flat]];
+    float InD [[user(locn3_1), sample_perspective]];
+    float InE [[user(locn4_2), center_no_perspective]];
+    float InF [[user(locn5_3), centroid_perspective]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 2> A = {};
+    spvUnsafeArray<float2, 2> B = {};
+    spvUnsafeArray<float, 3> C = {};
+    float D = {};
+    spvUnsafeArray<float, 2> InA = {};
+    spvUnsafeArray<float2, 2> InB = {};
+    spvUnsafeArray<float, 3> InC = {};
+    InA[0] = in.InA_0;
+    InA[1] = in.InA_1;
+    InB[0] = in.InB_0;
+    InB[1] = in.InB_1;
+    InC[0] = in.InC_0;
+    InC[1] = in.InC_1;
+    InC[2] = in.InC_2;
+    A = InA;
+    B = InB;
+    C = InC;
+    D = (in.InD + in.InE) + in.InF;
+    out.m_location_1.x = A[0];
+    out.m_location_2.x = A[1];
+    out.m_location_1.zw = B[0];
+    out.m_location_2.zw = B[1];
+    out.m_location_0.y = C[0];
+    out.m_location_1.y = C[1];
+    out.m_location_2.y = C[2];
+    out.m_location_0.w = D;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/array-lut-no-loop-variable.frag b/reference/shaders-msl/frag/array-lut-no-loop-variable.frag
index 87158849b6f..cdedd73a748 100644
--- a/reference/shaders-msl/frag/array-lut-no-loop-variable.frag
+++ b/reference/shaders-msl/frag/array-lut-no-loop-variable.frag
@@ -1,9 +1,50 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
-constant float _17[5] = { 1.0, 2.0, 3.0, 4.0, 5.0 };
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 5> _17 = spvUnsafeArray<float, 5>({ 1.0, 2.0, 3.0, 4.0, 5.0 });
 
 struct main0_out
 {
diff --git a/reference/shaders-msl/frag/array-of-array-lut.frag b/reference/shaders-msl/frag/array-of-array-lut.frag
new file mode 100644
index 00000000000..ba553824e79
--- /dev/null
+++ b/reference/shaders-msl/frag/array-of-array-lut.frag
@@ -0,0 +1,68 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 3> _17 = spvUnsafeArray<float, 3>({ 1.0, 2.0, 3.0 });
+constant spvUnsafeArray<float, 3> _21 = spvUnsafeArray<float, 3>({ 4.0, 5.0, 6.0 });
+constant spvUnsafeArray<spvUnsafeArray<float, 3>, 2> _22 = spvUnsafeArray<spvUnsafeArray<float, 3>, 2>({ spvUnsafeArray<float, 3>({ 1.0, 2.0, 3.0 }), spvUnsafeArray<float, 3>({ 4.0, 5.0, 6.0 }) });
+
+struct main0_out
+{
+    float vOutput [[color(0)]];
+};
+
+struct main0_in
+{
+    int vIndex1 [[user(locn0)]];
+    int vIndex2 [[user(locn1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.vOutput = _22[in.vIndex1][in.vIndex2];
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag
new file mode 100644
index 00000000000..9c02d302ed3
--- /dev/null
+++ b/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag
@@ -0,0 +1,117 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T> struct spvRemoveReference { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
+{
+    return static_cast<thread T&&>(x);
+}
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
+{
+    return static_cast<thread T&&>(x);
+}
+
+enum class spvSwizzle : uint
+{
+    none = 0,
+    zero,
+    one,
+    red,
+    green,
+    blue,
+    alpha
+};
+
+template<typename T>
+inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
+{
+    switch (s)
+    {
+        case spvSwizzle::none:
+            return c;
+        case spvSwizzle::zero:
+            return 0;
+        case spvSwizzle::one:
+            return 1;
+        case spvSwizzle::red:
+            return x.r;
+        case spvSwizzle::green:
+            return x.g;
+        case spvSwizzle::blue:
+            return x.b;
+        case spvSwizzle::alpha:
+            return x.a;
+    }
+}
+
+// Wrapper function that swizzles texture samples and fetches.
+template<typename T>
+inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)
+{
+    if (!s)
+        return x;
+    return vec<T, 4>(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) & 0xFF)), spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF)));
+}
+
+template<typename T>
+inline T spvTextureSwizzle(T x, uint s)
+{
+    return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
+}
+
+struct UBO
+{
+    uint index;
+};
+
+struct UBO2
+{
+    uint index2;
+};
+
+struct spvDescriptorSetBuffer0
+{
+    array<texture2d<float>, 4> uSampler [[id(0)]];
+    array<sampler, 4> uSamplerSmplr [[id(4)]];
+    constant UBO* uUBO [[id(8)]];
+    constant UBO2* m_50 [[id(9)]];
+    constant uint* spvSwizzleConstants [[id(10)]];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vUV [[user(locn0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 sample_in_func(constant array<texture2d<float>, 4>& uSampler, constant array<sampler, 4>& uSamplerSmplr, constant uint* uSamplerSwzl, constant UBO& uUBO, thread float2& vUV)
+{
+    return spvTextureSwizzle(uSampler[uUBO.index].sample(uSamplerSmplr[uUBO.index], vUV), uSamplerSwzl[uUBO.index]);
+}
+
+static inline __attribute__((always_inline))
+float4 sample_single_in_func(texture2d<float> s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
+{
+    return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl);
+}
+
+fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant uint* spvSwizzleConstants [[buffer(30)]])
+{
+    main0_out out = {};
+    constant uint* spvDescriptorSet0_uSamplerSwzl = &spvDescriptorSet0.spvSwizzleConstants[0];
+    out.FragColor = sample_in_func(spvDescriptorSet0.uSampler, spvDescriptorSet0.uSamplerSmplr, spvDescriptorSet0_uSamplerSwzl, (*spvDescriptorSet0.uUBO), in.vUV);
+    out.FragColor += sample_single_in_func(spvDescriptorSet0.uSampler[(*spvDescriptorSet0.m_50).index2], spvDescriptorSet0.uSamplerSmplr[(*spvDescriptorSet0.m_50).index2], spvDescriptorSet0_uSamplerSwzl[(*spvDescriptorSet0.m_50).index2], in.vUV);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag
new file mode 100644
index 00000000000..978ecbe1efa
--- /dev/null
+++ b/reference/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag
@@ -0,0 +1,108 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T> struct spvRemoveReference { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
+{
+    return static_cast<thread T&&>(x);
+}
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
+{
+    return static_cast<thread T&&>(x);
+}
+
+enum class spvSwizzle : uint
+{
+    none = 0,
+    zero,
+    one,
+    red,
+    green,
+    blue,
+    alpha
+};
+
+template<typename T>
+inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
+{
+    switch (s)
+    {
+        case spvSwizzle::none:
+            return c;
+        case spvSwizzle::zero:
+            return 0;
+        case spvSwizzle::one:
+            return 1;
+        case spvSwizzle::red:
+            return x.r;
+        case spvSwizzle::green:
+            return x.g;
+        case spvSwizzle::blue:
+            return x.b;
+        case spvSwizzle::alpha:
+            return x.a;
+    }
+}
+
+// Wrapper function that swizzles texture samples and fetches.
+template<typename T>
+inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)
+{
+    if (!s)
+        return x;
+    return vec<T, 4>(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) & 0xFF)), spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF)));
+}
+
+template<typename T>
+inline T spvTextureSwizzle(T x, uint s)
+{
+    return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
+}
+
+struct UBO
+{
+    uint index;
+};
+
+struct UBO2
+{
+    uint index2;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vUV [[user(locn0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 sample_in_func(thread const array<texture2d<float>, 4>& uSampler, thread const array<sampler, 4>& uSamplerSmplr, constant uint* uSamplerSwzl, constant UBO& uUBO, thread float2& vUV)
+{
+    return spvTextureSwizzle(uSampler[uUBO.index].sample(uSamplerSmplr[uUBO.index], vUV), uSamplerSwzl[uUBO.index]);
+}
+
+static inline __attribute__((always_inline))
+float4 sample_single_in_func(texture2d<float> s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
+{
+    return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl);
+}
+
+fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvSwizzleConstants [[buffer(30)]], constant UBO& uUBO [[buffer(0)]], constant UBO2& _50 [[buffer(1)]], array<texture2d<float>, 4> uSampler [[texture(0)]], array<sampler, 4> uSamplerSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    constant uint* uSamplerSwzl = &spvSwizzleConstants[0];
+    out.FragColor = sample_in_func(uSampler, uSamplerSmplr, uSamplerSwzl, uUBO, in.vUV);
+    out.FragColor += sample_single_in_func(uSampler[_50.index2], uSamplerSmplr[_50.index2], uSamplerSwzl[_50.index2], in.vUV);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag
index 702409ee7b3..43a61e1f053 100644
--- a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag
+++ b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag
@@ -5,22 +5,17 @@
 
 using namespace metal;
 
-struct spvDescriptorSetBuffer0
-{
-    array<texture2d<float>, 4> uSampler0 [[id(0)]];
-    array<sampler, 4> uSampler0Smplr [[id(4)]];
-    constant uint* spvSwizzleConstants [[id(8)]];
-};
-
-struct main0_out
+template<typename T> struct spvRemoveReference { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
 {
-    float4 FragColor [[color(0)]];
-};
-
-struct main0_in
+    return static_cast<thread T&&>(x);
+}
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
 {
-    float2 vUV [[user(locn0)]];
-};
+    return static_cast<thread T&&>(x);
+}
 
 enum class spvSwizzle : uint
 {
@@ -33,18 +28,6 @@ enum class spvSwizzle : uint
     alpha
 };
 
-template<typename T> struct spvRemoveReference { typedef T type; };
-template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
-template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
-template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
-{
-    return static_cast<thread T&&>(x);
-}
-template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
-{
-    return static_cast<thread T&&>(x);
-}
-
 template<typename T>
 inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
 {
@@ -82,77 +65,37 @@ inline T spvTextureSwizzle(T x, uint s)
     return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
 }
 
-// Wrapper function that swizzles texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
+struct spvDescriptorSetBuffer0
 {
-    if (sw)
-    {
-        switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))
-        {
-            case spvSwizzle::none:
-                break;
-            case spvSwizzle::zero:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-            case spvSwizzle::red:
-                return t.gather(s, spvForward<Ts>(params)..., component::x);
-            case spvSwizzle::green:
-                return t.gather(s, spvForward<Ts>(params)..., component::y);
-            case spvSwizzle::blue:
-                return t.gather(s, spvForward<Ts>(params)..., component::z);
-            case spvSwizzle::alpha:
-                return t.gather(s, spvForward<Ts>(params)..., component::w);
-        }
-    }
-    switch (c)
-    {
-        case component::x:
-            return t.gather(s, spvForward<Ts>(params)..., component::x);
-        case component::y:
-            return t.gather(s, spvForward<Ts>(params)..., component::y);
-        case component::z:
-            return t.gather(s, spvForward<Ts>(params)..., component::z);
-        case component::w:
-            return t.gather(s, spvForward<Ts>(params)..., component::w);
-    }
-}
+    array<texture2d<float>, 4> uSampler0 [[id(0)]];
+    array<sampler, 4> uSampler0Smplr [[id(4)]];
+    constant uint* spvSwizzleConstants [[id(8)]];
+};
 
-// Wrapper function that swizzles depth texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) 
+struct main0_out
 {
-    if (sw)
-    {
-        switch (spvSwizzle(sw & 0xFF))
-        {
-            case spvSwizzle::none:
-            case spvSwizzle::red:
-                break;
-            case spvSwizzle::zero:
-            case spvSwizzle::green:
-            case spvSwizzle::blue:
-            case spvSwizzle::alpha:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-        }
-    }
-    return t.gather_compare(s, spvForward<Ts>(params)...);
-}
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vUV [[user(locn0)]];
+};
 
-float4 sample_in_func_1(thread const array<texture2d<float>, 4> uSampler0, thread const array<sampler, 4> uSampler0Smplr, constant uint* uSampler0Swzl, thread float2& vUV)
+static inline __attribute__((always_inline))
+float4 sample_in_func_1(constant array<texture2d<float>, 4>& uSampler0, constant array<sampler, 4>& uSampler0Smplr, constant uint* uSampler0Swzl, thread float2& vUV)
 {
     return spvTextureSwizzle(uSampler0[2].sample(uSampler0Smplr[2], vUV), uSampler0Swzl[2]);
 }
 
-float4 sample_in_func_2(thread float2& vUV, thread texture2d<float> uSampler1, thread const sampler uSampler1Smplr, constant uint& uSampler1Swzl)
+static inline __attribute__((always_inline))
+float4 sample_in_func_2(thread float2& vUV, texture2d<float> uSampler1, sampler uSampler1Smplr, constant uint& uSampler1Swzl)
 {
     return spvTextureSwizzle(uSampler1.sample(uSampler1Smplr, vUV), uSampler1Swzl);
 }
 
-float4 sample_single_in_func(thread const texture2d<float> s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
+static inline __attribute__((always_inline))
+float4 sample_single_in_func(texture2d<float> s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
 {
     return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl);
 }
diff --git a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag
index ebaa28a57ea..1db803c5604 100644
--- a/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag
+++ b/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag
@@ -5,15 +5,17 @@
 
 using namespace metal;
 
-struct main0_out
+template<typename T> struct spvRemoveReference { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
+template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
 {
-    float4 FragColor [[color(0)]];
-};
-
-struct main0_in
+    return static_cast<thread T&&>(x);
+}
+template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
 {
-    float2 vUV [[user(locn0)]];
-};
+    return static_cast<thread T&&>(x);
+}
 
 enum class spvSwizzle : uint
 {
@@ -26,18 +28,6 @@ enum class spvSwizzle : uint
     alpha
 };
 
-template<typename T> struct spvRemoveReference { typedef T type; };
-template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
-template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
-template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
-{
-    return static_cast<thread T&&>(x);
-}
-template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
-{
-    return static_cast<thread T&&>(x);
-}
-
 template<typename T>
 inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)
 {
@@ -75,72 +65,24 @@ inline T spvTextureSwizzle(T x, uint s)
     return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;
 }
 
-// Wrapper function that swizzles texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c)
+struct main0_out
 {
-    if (sw)
-    {
-        switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))
-        {
-            case spvSwizzle::none:
-                break;
-            case spvSwizzle::zero:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-            case spvSwizzle::red:
-                return t.gather(s, spvForward<Ts>(params)..., component::x);
-            case spvSwizzle::green:
-                return t.gather(s, spvForward<Ts>(params)..., component::y);
-            case spvSwizzle::blue:
-                return t.gather(s, spvForward<Ts>(params)..., component::z);
-            case spvSwizzle::alpha:
-                return t.gather(s, spvForward<Ts>(params)..., component::w);
-        }
-    }
-    switch (c)
-    {
-        case component::x:
-            return t.gather(s, spvForward<Ts>(params)..., component::x);
-        case component::y:
-            return t.gather(s, spvForward<Ts>(params)..., component::y);
-        case component::z:
-            return t.gather(s, spvForward<Ts>(params)..., component::z);
-        case component::w:
-            return t.gather(s, spvForward<Ts>(params)..., component::w);
-    }
-}
+    float4 FragColor [[color(0)]];
+};
 
-// Wrapper function that swizzles depth texture gathers.
-template<typename T, typename Tex, typename... Ts>
-inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) 
+struct main0_in
 {
-    if (sw)
-    {
-        switch (spvSwizzle(sw & 0xFF))
-        {
-            case spvSwizzle::none:
-            case spvSwizzle::red:
-                break;
-            case spvSwizzle::zero:
-            case spvSwizzle::green:
-            case spvSwizzle::blue:
-            case spvSwizzle::alpha:
-                return vec<T, 4>(0, 0, 0, 0);
-            case spvSwizzle::one:
-                return vec<T, 4>(1, 1, 1, 1);
-        }
-    }
-    return t.gather_compare(s, spvForward<Ts>(params)...);
-}
+    float2 vUV [[user(locn0)]];
+};
 
-float4 sample_in_func(thread const array<texture2d<float>, 4> uSampler, thread const array<sampler, 4> uSamplerSmplr, constant uint* uSamplerSwzl, thread float2& vUV)
+static inline __attribute__((always_inline))
+float4 sample_in_func(thread const array<texture2d<float>, 4>& uSampler, thread const array<sampler, 4>& uSamplerSmplr, constant uint* uSamplerSwzl, thread float2& vUV)
 {
     return spvTextureSwizzle(uSampler[2].sample(uSamplerSmplr[2], vUV), uSamplerSwzl[2]);
 }
 
-float4 sample_single_in_func(thread const texture2d<float> s, thread const sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
+static inline __attribute__((always_inline))
+float4 sample_single_in_func(texture2d<float> s, sampler sSmplr, constant uint& sSwzl, thread float2& vUV)
 {
     return spvTextureSwizzle(s.sample(sSmplr, vUV), sSwzl);
 }
diff --git a/reference/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag b/reference/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag
index ef19fbf8569..1259283caaa 100644
--- a/reference/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag
+++ b/reference/shaders-msl/frag/barycentric-nv-nopersp.msl22.frag
@@ -15,7 +15,7 @@ struct main0_out
 
 struct main0_in
 {
-    float3 gl_BaryCoordNoPerspNV [[barycentric_coord, center_no_perspective]];
+    float3 gl_BaryCoordNoPerspEXT [[barycentric_coord, center_no_perspective]];
 };
 
 fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[buffer(0)]], uint gl_PrimitiveID [[primitive_id]])
@@ -25,7 +25,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[
     float2 uv0 = _19.uvs[(3 * prim) + 0];
     float2 uv1 = _19.uvs[(3 * prim) + 1];
     float2 uv2 = _19.uvs[(3 * prim) + 2];
-    out.value = ((uv0 * in.gl_BaryCoordNoPerspNV.x) + (uv1 * in.gl_BaryCoordNoPerspNV.y)) + (uv2 * in.gl_BaryCoordNoPerspNV.z);
+    out.value = ((uv0 * in.gl_BaryCoordNoPerspEXT.x) + (uv1 * in.gl_BaryCoordNoPerspEXT.y)) + (uv2 * in.gl_BaryCoordNoPerspEXT.z);
     return out;
 }
 
diff --git a/reference/shaders-msl/frag/barycentric-nv.msl22.frag b/reference/shaders-msl/frag/barycentric-nv.msl22.frag
index 1d2e4c2f21e..386d2d26f7f 100644
--- a/reference/shaders-msl/frag/barycentric-nv.msl22.frag
+++ b/reference/shaders-msl/frag/barycentric-nv.msl22.frag
@@ -15,7 +15,7 @@ struct main0_out
 
 struct main0_in
 {
-    float3 gl_BaryCoordNV [[barycentric_coord, center_perspective]];
+    float3 gl_BaryCoordEXT [[barycentric_coord, center_perspective]];
 };
 
 fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[buffer(0)]], uint gl_PrimitiveID [[primitive_id]])
@@ -25,7 +25,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device Vertices& _19 [[
     float2 uv0 = _19.uvs[(3 * prim) + 0];
     float2 uv1 = _19.uvs[(3 * prim) + 1];
     float2 uv2 = _19.uvs[(3 * prim) + 2];
-    out.value = ((uv0 * in.gl_BaryCoordNV.x) + (uv1 * in.gl_BaryCoordNV.y)) + (uv2 * in.gl_BaryCoordNV.z);
+    out.value = ((uv0 * in.gl_BaryCoordEXT.x) + (uv1 * in.gl_BaryCoordEXT.y)) + (uv2 * in.gl_BaryCoordEXT.z);
     return out;
 }
 
diff --git a/reference/shaders-msl/frag/basic.force-sample.frag b/reference/shaders-msl/frag/basic.force-sample.frag
new file mode 100644
index 00000000000..b9706b73f56
--- /dev/null
+++ b/reference/shaders-msl/frag/basic.force-sample.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+    float2 vTex [[user(locn1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    out.FragColor = in.vColor * uTex.sample(uTexSmplr, in.vTex);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/bitcasting.1d-as-2d.frag b/reference/shaders-msl/frag/bitcasting.1d-as-2d.frag
new file mode 100644
index 00000000000..ea49c067c21
--- /dev/null
+++ b/reference/shaders-msl/frag/bitcasting.1d-as-2d.frag
@@ -0,0 +1,30 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor0 [[color(0)]];
+    float4 FragColor1 [[color(1)]];
+};
+
+struct main0_in
+{
+    float4 VertGeom [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> TextureBase [[texture(0)]], texture2d<float> TextureDetail [[texture(1)]], sampler TextureBaseSmplr [[sampler(0)]], sampler TextureDetailSmplr [[sampler(1)]])
+{
+    main0_out out = {};
+    float4 texSample0 = TextureBase.sample(TextureBaseSmplr, float2(in.VertGeom.x, 0.5));
+    float4 texSample1 = TextureDetail.sample(TextureDetailSmplr, float2(in.VertGeom.x, 0.5), int2(3, 0));
+    int4 iResult0 = as_type<int4>(texSample0);
+    int4 iResult1 = as_type<int4>(texSample1);
+    out.FragColor0 = as_type<float4>(iResult0) * as_type<float4>(iResult1);
+    uint4 uResult0 = as_type<uint4>(texSample0);
+    uint4 uResult1 = as_type<uint4>(texSample1);
+    out.FragColor1 = as_type<float4>(uResult0) * as_type<float4>(uResult1);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/buffer-read-write.frag b/reference/shaders-msl/frag/buffer-read-write.frag
index 2b2ac7f0608..4f114ed7247 100644
--- a/reference/shaders-msl/frag/buffer-read-write.frag
+++ b/reference/shaders-msl/frag/buffer-read-write.frag
@@ -5,17 +5,18 @@
 
 using namespace metal;
 
-struct main0_out
-{
-    float4 FragColor [[color(0)]];
-};
-
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
 fragment main0_out main0(texture2d<float> buf [[texture(0)]], texture2d<float, access::write> bufOut [[texture(1)]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/clip-distance-varying.frag b/reference/shaders-msl/frag/clip-distance-varying.frag
new file mode 100644
index 00000000000..9a72d5ba39f
--- /dev/null
+++ b/reference/shaders-msl/frag/clip-distance-varying.frag
@@ -0,0 +1,67 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 2> gl_ClipDistance = {};
+    gl_ClipDistance[0] = in.gl_ClipDistance_0;
+    gl_ClipDistance[1] = in.gl_ClipDistance_1;
+    out.FragColor = float4((1.0 - gl_ClipDistance[0]) - gl_ClipDistance[1]);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/constant-array.frag b/reference/shaders-msl/frag/constant-array.frag
index c4e87d98725..990860762c3 100644
--- a/reference/shaders-msl/frag/constant-array.frag
+++ b/reference/shaders-msl/frag/constant-array.frag
@@ -1,21 +1,60 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct Foobar
 {
     float a;
     float b;
 };
 
-constant float4 _37[3] = { float4(1.0), float4(2.0), float4(3.0) };
-constant float4 _49[2] = { float4(1.0), float4(2.0) };
-constant float4 _54[2] = { float4(8.0), float4(10.0) };
-constant float4 _55[2][2] = { { float4(1.0), float4(2.0) }, { float4(8.0), float4(10.0) } };
-constant Foobar _75[2] = { Foobar{ 10.0, 40.0 }, Foobar{ 90.0, 70.0 } };
+constant spvUnsafeArray<float4, 3> _37 = spvUnsafeArray<float4, 3>({ float4(1.0), float4(2.0), float4(3.0) });
+constant spvUnsafeArray<float4, 2> _49 = spvUnsafeArray<float4, 2>({ float4(1.0), float4(2.0) });
+constant spvUnsafeArray<float4, 2> _54 = spvUnsafeArray<float4, 2>({ float4(8.0), float4(10.0) });
+constant spvUnsafeArray<spvUnsafeArray<float4, 2>, 2> _55 = spvUnsafeArray<spvUnsafeArray<float4, 2>, 2>({ spvUnsafeArray<float4, 2>({ float4(1.0), float4(2.0) }), spvUnsafeArray<float4, 2>({ float4(8.0), float4(10.0) }) });
+constant spvUnsafeArray<Foobar, 2> _75 = spvUnsafeArray<Foobar, 2>({ Foobar{ 10.0, 40.0 }, Foobar{ 90.0, 70.0 } });
 
 struct main0_out
 {
@@ -27,6 +66,7 @@ struct main0_in
     int index [[user(locn0)]];
 };
 
+static inline __attribute__((always_inline))
 float4 resolve(thread const Foobar& f)
 {
     return float4(f.a + f.b);
diff --git a/reference/shaders-msl/frag/constant-composites.frag b/reference/shaders-msl/frag/constant-composites.frag
index 504beaa067d..e0fa980fb85 100644
--- a/reference/shaders-msl/frag/constant-composites.frag
+++ b/reference/shaders-msl/frag/constant-composites.frag
@@ -1,18 +1,56 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct Foo
 {
     float a;
     float b;
 };
 
-constant float _16[4] = { 1.0, 4.0, 3.0, 2.0 };
-constant Foo _28[2] = { Foo{ 10.0, 20.0 }, Foo{ 30.0, 40.0 } };
+constant spvUnsafeArray<float, 4> _16 = spvUnsafeArray<float, 4>({ 1.0, 4.0, 3.0, 2.0 });
 
 struct main0_out
 {
@@ -24,21 +62,10 @@ struct main0_in
     int line [[user(locn0)]];
 };
 
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
 fragment main0_out main0(main0_in in [[stage_in]])
 {
+    spvUnsafeArray<Foo, 2> _28 = spvUnsafeArray<Foo, 2>({ Foo{ 10.0, 20.0 }, Foo{ 30.0, 40.0 } });
+    
     main0_out out = {};
     out.FragColor = float4(_16[in.line]);
     out.FragColor += float4(_28[in.line].a * _28[1 - in.line].a);
diff --git a/reference/shaders-msl/frag/cull-distance-varying.frag b/reference/shaders-msl/frag/cull-distance-varying.frag
new file mode 100644
index 00000000000..708a295710d
--- /dev/null
+++ b/reference/shaders-msl/frag/cull-distance-varying.frag
@@ -0,0 +1,67 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float gl_CullDistance_0 [[user(cull0)]];
+    float gl_CullDistance_1 [[user(cull1)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 2> gl_CullDistance = {};
+    gl_CullDistance[0] = in.gl_CullDistance_0;
+    gl_CullDistance[1] = in.gl_CullDistance_1;
+    out.FragColor = float4((1.0 - gl_CullDistance[0]) - gl_CullDistance[1]);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/depth-out-early-frag-tests.frag b/reference/shaders-msl/frag/depth-out-early-frag-tests.frag
new file mode 100644
index 00000000000..21884d81c5b
--- /dev/null
+++ b/reference/shaders-msl/frag/depth-out-early-frag-tests.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 color_out [[color(0)]];
+};
+
+[[ early_fragment_tests ]] fragment main0_out main0()
+{
+    float gl_FragDepth;
+    main0_out out = {};
+    out.color_out = float4(1.0, 0.0, 0.0, 1.0);
+    gl_FragDepth = 0.699999988079071044921875;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/depth-out-no-early-frag-tests.frag b/reference/shaders-msl/frag/depth-out-no-early-frag-tests.frag
new file mode 100644
index 00000000000..57d810fafcb
--- /dev/null
+++ b/reference/shaders-msl/frag/depth-out-no-early-frag-tests.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 color_out [[color(0)]];
+    float gl_FragDepth [[depth(less)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.color_out = float4(1.0, 0.0, 0.0, 1.0);
+    out.gl_FragDepth = 0.699999988079071044921875;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/disable-frag-output.frag-output.frag b/reference/shaders-msl/frag/disable-frag-output.frag-output.frag
new file mode 100644
index 00000000000..63bc45b8af7
--- /dev/null
+++ b/reference/shaders-msl/frag/disable-frag-output.frag-output.frag
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 buf1 [[color(1)]];
+    float4 buf3 [[color(3)]];
+    float4 buf6 [[color(6)]];
+    float4 buf7 [[color(7)]];
+};
+
+fragment main0_out main0()
+{
+    float4 buf0;
+    float4 buf2;
+    float4 buf4;
+    float4 buf5;
+    float gl_FragDepth;
+    int gl_FragStencilRefARB;
+    main0_out out = {};
+    buf0 = float4(0.0, 0.0, 0.0, 1.0);
+    out.buf1 = float4(1.0, 0.0, 0.0, 1.0);
+    buf2 = float4(0.0, 1.0, 0.0, 1.0);
+    out.buf3 = float4(0.0, 0.0, 1.0, 1.0);
+    buf4 = float4(1.0, 0.0, 1.0, 0.5);
+    buf5 = float4(0.25);
+    out.buf6 = float4(0.75);
+    out.buf7 = float4(1.0);
+    gl_FragDepth = 0.89999997615814208984375;
+    gl_FragStencilRefARB = uint(127);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/flush_params.frag b/reference/shaders-msl/frag/flush_params.frag
index e2f2a48cb25..905a179bc6a 100644
--- a/reference/shaders-msl/frag/flush_params.frag
+++ b/reference/shaders-msl/frag/flush_params.frag
@@ -15,11 +15,13 @@ struct main0_out
     float4 FragColor [[color(0)]];
 };
 
+static inline __attribute__((always_inline))
 void foo2(thread Structy& f)
 {
     f.c = float4(10.0);
 }
 
+static inline __attribute__((always_inline))
 Structy foo()
 {
     Structy param;
diff --git a/reference/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag b/reference/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag
new file mode 100644
index 00000000000..648dc9cf89a
--- /dev/null
+++ b/reference/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag
@@ -0,0 +1,71 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+// The required alignment of a linear texture of R32Uint format.
+constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
+constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
+// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
+#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() +  spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
+
+struct foo_t
+{
+    float x;
+    uint y;
+};
+
+struct main0_out
+{
+    float4 fragColor [[color(0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 frag_body(device foo_t& foo, thread float4& gl_FragCoord, texture2d<uint, access::write> bar, device atomic_uint* bar_atomic, thread bool& gl_HelperInvocation)
+{
+    if (!gl_HelperInvocation)
+    {
+        foo.x = 1.0;
+    }
+    uint _25 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    if (int(gl_FragCoord.x) == 3)
+    {
+        gl_HelperInvocation = true, discard_fragment();
+    }
+    (gl_HelperInvocation ? ((void)0) : bar.write(uint4(1u), uint2(int2(gl_FragCoord.xy))));
+    uint _50 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _57 = (!gl_HelperInvocation ? atomic_fetch_or_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], 62u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], memory_order_relaxed));
+    uint _60 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _63 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _65 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _71 = (!gl_HelperInvocation ? atomic_fetch_max_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], 100u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], memory_order_relaxed));
+    uint _76;
+    if (!gl_HelperInvocation)
+    {
+        do
+        {
+            _76 = 100u;
+        } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], &_76, 42u, memory_order_relaxed, memory_order_relaxed) && _76 == 100u);
+    }
+    else
+    {
+        _76 = atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], memory_order_relaxed);
+    }
+    bool _77 = gl_HelperInvocation;
+    return float4(1.0, float(_77), 0.0, 1.0);
+}
+
+fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d<uint, access::write> bar [[texture(0)]], device atomic_uint* bar_atomic [[buffer(1)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    bool gl_HelperInvocation = {};
+    gl_HelperInvocation = simd_is_helper_thread();
+    float4 _85 = frag_body(foo, gl_FragCoord, bar, bar_atomic, gl_HelperInvocation);
+    out.fragColor = _85;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag b/reference/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag
new file mode 100644
index 00000000000..0c11898b089
--- /dev/null
+++ b/reference/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag
@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct foo
+{
+    int x;
+};
+
+struct main0_out
+{
+    float4 fragColor [[color(0)]];
+};
+
+fragment main0_out main0(device foo& _24 [[buffer(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    bool gl_HelperInvocation = {};
+    gl_HelperInvocation = simd_is_helper_thread();
+    if (gl_FragCoord.y == 7.0)
+    {
+        gl_HelperInvocation = true, discard_fragment();
+    }
+    if (!gl_HelperInvocation)
+    {
+        _24.x = 0;
+    }
+    for (;;)
+    {
+        if (float(_24.x) < gl_FragCoord.x)
+        {
+            int _41 = _24.x;
+            int _43 = _41 + 1;
+            if (!gl_HelperInvocation)
+            {
+                _24.x = _43;
+            }
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+    out.fragColor = float4(float(_24.x), 0.0, 0.0, 1.0);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag b/reference/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag
new file mode 100644
index 00000000000..c2ae69695fc
--- /dev/null
+++ b/reference/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag
@@ -0,0 +1,70 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+// The required alignment of a linear texture of R32Uint format.
+constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
+constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
+// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
+#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() +  spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
+
+struct foo_t
+{
+    float x;
+    uint y;
+};
+
+struct main0_out
+{
+    float4 fragColor [[color(0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 frag_body(device foo_t& foo, thread float4& gl_FragCoord, texture2d<uint, access::write> bar, device atomic_uint* bar_atomic, thread bool& gl_HelperInvocation)
+{
+    if (!gl_HelperInvocation)
+    {
+        foo.x = 1.0;
+    }
+    uint _25 = (!gl_HelperInvocation ? atomic_exchange_explicit((device atomic_uint*)&foo.y, 0u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    if (int(gl_FragCoord.x) == 3)
+    {
+        gl_HelperInvocation = true, discard_fragment();
+    }
+    (gl_HelperInvocation ? ((void)0) : bar.write(uint4(1u), uint2(int2(gl_FragCoord.xy))));
+    uint _51 = (!gl_HelperInvocation ? atomic_fetch_add_explicit((device atomic_uint*)&foo.y, 42u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _58 = (!gl_HelperInvocation ? atomic_fetch_or_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], 62u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], memory_order_relaxed));
+    uint _61 = (!gl_HelperInvocation ? atomic_fetch_and_explicit((device atomic_uint*)&foo.y, 65535u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _64 = (!gl_HelperInvocation ? atomic_fetch_xor_explicit((device atomic_uint*)&foo.y, 4294967040u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _66 = (!gl_HelperInvocation ? atomic_fetch_min_explicit((device atomic_uint*)&foo.y, 1u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&foo.y, memory_order_relaxed));
+    uint _72 = (!gl_HelperInvocation ? atomic_fetch_max_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], 100u, memory_order_relaxed) : atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], memory_order_relaxed));
+    uint _77;
+    if (!gl_HelperInvocation)
+    {
+        do
+        {
+            _77 = 100u;
+        } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], &_77, 42u, memory_order_relaxed, memory_order_relaxed) && _77 == 100u);
+    }
+    else
+    {
+        _77 = atomic_load_explicit((device atomic_uint*)&bar_atomic[spvImage2DAtomicCoord(int2(gl_FragCoord.xy), bar)], memory_order_relaxed);
+    }
+    return float4(1.0, 0.0, 0.0, 1.0);
+}
+
+fragment main0_out main0(device foo_t& foo [[buffer(0)]], texture2d<uint, access::write> bar [[texture(0)]], device atomic_uint* bar_atomic [[buffer(1)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    bool gl_HelperInvocation = {};
+    gl_HelperInvocation = simd_is_helper_thread();
+    float4 _84 = frag_body(foo, gl_FragCoord, bar, bar_atomic, gl_HelperInvocation);
+    out.fragColor = _84;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/fragment-component-padding.pad-fragment.frag b/reference/shaders-msl/frag/fragment-component-padding.pad-fragment.frag
index 2d339c47353..6420bb9f4e9 100644
--- a/reference/shaders-msl/frag/fragment-component-padding.pad-fragment.frag
+++ b/reference/shaders-msl/frag/fragment-component-padding.pad-fragment.frag
@@ -1,10 +1,49 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float4 FragColors_0 [[color(0)]];
@@ -18,7 +57,8 @@ struct main0_in
     float3 vColor [[user(locn0)]];
 };
 
-void set_globals(thread float (&FragColors)[2], thread float3& vColor, thread float2& FragColor2, thread float3& FragColor3)
+static inline __attribute__((always_inline))
+void set_globals(thread spvUnsafeArray<float, 2>& FragColors, thread float3& vColor, thread float2& FragColor2, thread float3& FragColor3)
 {
     FragColors[0] = vColor.x;
     FragColors[1] = vColor.y;
@@ -29,14 +69,14 @@ void set_globals(thread float (&FragColors)[2], thread float3& vColor, thread fl
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    float FragColors[2] = {};
+    spvUnsafeArray<float, 2> FragColors = {};
     float2 FragColor2 = {};
     float3 FragColor3 = {};
     set_globals(FragColors, in.vColor, FragColor2, FragColor3);
     out.FragColors_0 = float4(FragColors[0]);
     out.FragColors_1 = float4(FragColors[1]);
-    out.FragColor2 = FragColor2.xyyy;
-    out.FragColor3 = FragColor3.xyzz;
+    out.FragColor2.xy = FragColor2;
+    out.FragColor3.xyz = FragColor3;
     return out;
 }
 
diff --git a/reference/shaders-msl/frag/helper-invocation.msl21.frag b/reference/shaders-msl/frag/helper-invocation.msl21.frag
index 8d32f4860dc..36d14239093 100644
--- a/reference/shaders-msl/frag/helper-invocation.msl21.frag
+++ b/reference/shaders-msl/frag/helper-invocation.msl21.frag
@@ -15,10 +15,11 @@ struct main0_in
     float2 vUV [[user(locn0)]];
 };
 
-float4 foo(thread bool& gl_HelperInvocation, thread texture2d<float> uSampler, thread const sampler uSamplerSmplr, thread float2& vUV)
+static inline __attribute__((always_inline))
+float4 foo(texture2d<float> uSampler, sampler uSamplerSmplr, thread float2& vUV)
 {
     float4 color;
-    if (!gl_HelperInvocation)
+    if (!simd_is_helper_thread())
     {
         color = uSampler.sample(uSamplerSmplr, vUV, level(0.0));
     }
@@ -32,8 +33,7 @@ float4 foo(thread bool& gl_HelperInvocation, thread texture2d<float> uSampler, t
 fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
 {
     main0_out out = {};
-    bool gl_HelperInvocation = simd_is_helper_thread();
-    out.FragColor = foo(gl_HelperInvocation, uSampler, uSamplerSmplr, in.vUV);
+    out.FragColor = foo(uSampler, uSamplerSmplr, in.vUV);
     return out;
 }
 
diff --git a/reference/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag b/reference/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag
new file mode 100644
index 00000000000..a35835846d5
--- /dev/null
+++ b/reference/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag
@@ -0,0 +1,58 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4 v;
+};
+
+struct spvDescriptorSetBuffer0
+{
+    array<texture2d<float>, 10000> uSamplers [[id(0)]];
+    array<sampler, 10000> uSamplersSmplr [[id(10000)]];
+};
+
+struct spvDescriptorSetBuffer1
+{
+    constant UBO* vs [[id(0)]][10000];
+};
+
+struct spvDescriptorSetBuffer2
+{
+    texture2d<float> uSampler [[id(0)]];
+    sampler uSamplerSmplr [[id(1)]];
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 vUV [[user(locn0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 samp_array(const device array<texture2d<float>, 10000>& uSamplers, const device array<sampler, 10000>& uSamplersSmplr, thread float2& vUV, constant UBO* const device (&vs)[10000])
+{
+    return uSamplers[9999].sample(uSamplersSmplr[9999], vUV) + vs[5000]->v;
+}
+
+static inline __attribute__((always_inline))
+float4 samp_single(thread float2& vUV, texture2d<float> uSampler, sampler uSamplerSmplr)
+{
+    return uSampler.sample(uSamplerSmplr, vUV);
+}
+
+fragment main0_out main0(main0_in in [[stage_in]], const device spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], const device spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant spvDescriptorSetBuffer2& spvDescriptorSet2 [[buffer(2)]])
+{
+    main0_out out = {};
+    out.FragColor = samp_array(spvDescriptorSet0.uSamplers, spvDescriptorSet0.uSamplersSmplr, in.vUV, spvDescriptorSet1.vs) + samp_single(in.vUV, spvDescriptorSet2.uSampler, spvDescriptorSet2.uSamplerSmplr);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/image-query-lod.msl22.frag b/reference/shaders-msl/frag/image-query-lod.msl22.frag
index 4ec61c24d70..2362597fa51 100644
--- a/reference/shaders-msl/frag/image-query-lod.msl22.frag
+++ b/reference/shaders-msl/frag/image-query-lod.msl22.frag
@@ -15,7 +15,8 @@ struct main0_in
     float3 vUV [[user(locn0)]];
 };
 
-void from_function(thread float2& FragColor, thread texture2d<float> uSampler2D, thread const sampler uSampler2DSmplr, thread float3& vUV, thread texture3d<float> uSampler3D, thread const sampler uSampler3DSmplr, thread texturecube<float> uSamplerCube, thread const sampler uSamplerCubeSmplr, thread texture2d<float> uTexture2D, thread sampler uSampler, thread texture3d<float> uTexture3D, thread texturecube<float> uTextureCube)
+static inline __attribute__((always_inline))
+void from_function(thread float2& FragColor, texture2d<float> uSampler2D, sampler uSampler2DSmplr, thread float3& vUV, texture3d<float> uSampler3D, sampler uSampler3DSmplr, texturecube<float> uSamplerCube, sampler uSamplerCubeSmplr, texture2d<float> uTexture2D, sampler uSampler, texture3d<float> uTexture3D, texturecube<float> uTextureCube)
 {
     float2 _22;
     _22.x = uSampler2D.calculate_clamped_lod(uSampler2DSmplr, vUV.xy);
diff --git a/reference/shaders-msl/frag/in_block.frag b/reference/shaders-msl/frag/in_block.frag
index 8178c9a4ed6..efb0cbd4296 100644
--- a/reference/shaders-msl/frag/in_block.frag
+++ b/reference/shaders-msl/frag/in_block.frag
@@ -16,16 +16,16 @@ struct main0_out
 
 struct main0_in
 {
-    float4 VertexOut_color [[user(locn2)]];
-    float4 VertexOut_color2 [[user(locn3)]];
+    float4 inputs_color [[user(locn2)]];
+    float4 inputs_color2 [[user(locn3)]];
 };
 
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
     VertexOut inputs = {};
-    inputs.color = in.VertexOut_color;
-    inputs.color2 = in.VertexOut_color2;
+    inputs.color = in.inputs_color;
+    inputs.color2 = in.inputs_color2;
     out.FragColor = inputs.color + inputs.color2;
     return out;
 }
diff --git a/reference/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag b/reference/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag
new file mode 100644
index 00000000000..7b011ffb580
--- /dev/null
+++ b/reference/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag
@@ -0,0 +1,105 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float foos_0_a [[user(locn1)]];
+    float foos_0_b [[user(locn2)]];
+    float foos_1_a [[user(locn3)]];
+    float foos_1_b [[user(locn4)]];
+    float foos_2_a [[user(locn5)]];
+    float foos_2_b [[user(locn6)]];
+    float foos_3_a [[user(locn7)]];
+    float foos_3_b [[user(locn8)]];
+    float bars_0_a [[user(locn10)]];
+    float bars_0_b [[user(locn11)]];
+    float bars_1_a [[user(locn12)]];
+    float bars_1_b [[user(locn13)]];
+    float bars_2_a [[user(locn14)]];
+    float bars_2_b [[user(locn15)]];
+    float bars_3_a [[user(locn16)]];
+    float bars_3_b [[user(locn17)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<Foo, 4> foos = {};
+    spvUnsafeArray<Foo, 4> bars = {};
+    foos[0].a = in.foos_0_a;
+    foos[0].b = in.foos_0_b;
+    foos[1].a = in.foos_1_a;
+    foos[1].b = in.foos_1_b;
+    foos[2].a = in.foos_2_a;
+    foos[2].b = in.foos_2_b;
+    foos[3].a = in.foos_3_a;
+    foos[3].b = in.foos_3_b;
+    bars[0].a = in.bars_0_a;
+    bars[0].b = in.bars_0_b;
+    bars[1].a = in.bars_1_a;
+    bars[1].b = in.bars_1_b;
+    bars[2].a = in.bars_2_a;
+    bars[2].b = in.bars_2_b;
+    bars[3].a = in.bars_3_a;
+    bars[3].b = in.bars_3_b;
+    out.FragColor.x = foos[0].a;
+    out.FragColor.y = foos[1].b;
+    out.FragColor.z = foos[2].a;
+    out.FragColor.w = bars[3].b;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/in_mat.frag b/reference/shaders-msl/frag/in_mat.frag
index 70ff4860b80..cf7da2ef292 100644
--- a/reference/shaders-msl/frag/in_mat.frag
+++ b/reference/shaders-msl/frag/in_mat.frag
@@ -27,8 +27,8 @@ fragment main0_out main0(main0_in in [[stage_in]], texturecube<float> samplerCol
     inInvModelView[1] = in.inInvModelView_1;
     inInvModelView[2] = in.inInvModelView_2;
     inInvModelView[3] = in.inInvModelView_3;
-    float3 cI = normalize(in.inPos);
-    float3 cR = reflect(cI, normalize(in.inNormal));
+    float3 cI = fast::normalize(in.inPos);
+    float3 cR = reflect(cI, fast::normalize(in.inNormal));
     cR = float3((inInvModelView * float4(cR, 0.0)).xyz);
     cR.x *= (-1.0);
     out.outFragColor = samplerColor.sample(samplerColorSmplr, cR, bias(in.inLodBias));
diff --git a/reference/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag b/reference/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag
new file mode 100644
index 00000000000..3f91c2c409d
--- /dev/null
+++ b/reference/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag
@@ -0,0 +1,27 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 load_subpasses(texture2d_ms_array<float> uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord, thread uint& gl_Layer)
+{
+    float4 _24 = uInput.read(uint2(gl_FragCoord.xy), gl_Layer, gl_SampleID);
+    return _24;
+}
+
+fragment main0_out main0(texture2d_ms_array<float> uSubpass0 [[texture(0)]], texture2d_ms_array<float> uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]], uint gl_Layer [[render_target_array_index]])
+{
+    main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
+    out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), gl_Layer, 1) + uSubpass1.read(uint2(gl_FragCoord.xy), gl_Layer, 2)) + load_subpasses(uSubpass0, gl_SampleID, gl_FragCoord, gl_Layer);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/input-attachment-ms.frag b/reference/shaders-msl/frag/input-attachment-ms.frag
index d38712e91cd..97629d8e443 100644
--- a/reference/shaders-msl/frag/input-attachment-ms.frag
+++ b/reference/shaders-msl/frag/input-attachment-ms.frag
@@ -10,14 +10,17 @@ struct main0_out
     float4 FragColor [[color(0)]];
 };
 
-float4 load_subpasses(thread const texture2d_ms<float> uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord)
+static inline __attribute__((always_inline))
+float4 load_subpasses(texture2d_ms<float> uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord)
 {
-    return uInput.read(uint2(gl_FragCoord.xy), gl_SampleID);
+    float4 _24 = uInput.read(uint2(gl_FragCoord.xy), gl_SampleID);
+    return _24;
 }
 
 fragment main0_out main0(texture2d_ms<float> uSubpass0 [[texture(0)]], texture2d_ms<float> uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
     out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), 1) + uSubpass1.read(uint2(gl_FragCoord.xy), 2)) + load_subpasses(uSubpass0, gl_SampleID, gl_FragCoord);
     return out;
 }
diff --git a/reference/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag b/reference/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag
new file mode 100644
index 00000000000..1b6b9f6aaea
--- /dev/null
+++ b/reference/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag
@@ -0,0 +1,28 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 load_subpasses(texture2d_ms_array<float> uInput, thread uint& gl_SampleID, thread float4& gl_FragCoord, thread uint& gl_ViewIndex)
+{
+    float4 _24 = uInput.read(uint2(gl_FragCoord.xy), gl_ViewIndex, gl_SampleID);
+    return _24;
+}
+
+fragment main0_out main0(constant uint* spvViewMask [[buffer(24)]], texture2d_ms_array<float> uSubpass0 [[texture(0)]], texture2d_ms_array<float> uSubpass1 [[texture(1)]], uint gl_SampleID [[sample_id]], float4 gl_FragCoord [[position]], uint gl_ViewIndex [[render_target_array_index]])
+{
+    main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
+    gl_ViewIndex += spvViewMask[0];
+    out.FragColor = (uSubpass0.read(uint2(gl_FragCoord.xy), gl_ViewIndex, 1) + uSubpass1.read(uint2(gl_FragCoord.xy), gl_ViewIndex, 2)) + load_subpasses(uSubpass0, gl_SampleID, gl_FragCoord, gl_ViewIndex);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/input-attachment.arrayed-subpass.frag b/reference/shaders-msl/frag/input-attachment.arrayed-subpass.frag
new file mode 100644
index 00000000000..934abd585f6
--- /dev/null
+++ b/reference/shaders-msl/frag/input-attachment.arrayed-subpass.frag
@@ -0,0 +1,25 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 load_subpasses(texture2d_array<float> uInput, thread float4& gl_FragCoord, thread uint& gl_Layer)
+{
+    return uInput.read(uint2(gl_FragCoord.xy), gl_Layer);
+}
+
+fragment main0_out main0(texture2d_array<float> uSubpass0 [[texture(0)]], texture2d_array<float> uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]], uint gl_Layer [[render_target_array_index]])
+{
+    main0_out out = {};
+    out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), gl_Layer) + load_subpasses(uSubpass1, gl_FragCoord, gl_Layer);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/input-attachment.frag b/reference/shaders-msl/frag/input-attachment.frag
index 3cc929182b0..0643acfa72d 100644
--- a/reference/shaders-msl/frag/input-attachment.frag
+++ b/reference/shaders-msl/frag/input-attachment.frag
@@ -10,15 +10,16 @@ struct main0_out
     float4 FragColor [[color(0)]];
 };
 
-float4 load_subpasses(thread const texture2d<float> uInput, thread float4& gl_FragCoord)
+static inline __attribute__((always_inline))
+float4 load_subpasses(texture2d<float> uInput, thread float4& gl_FragCoord)
 {
-    return uInput.read(uint2(gl_FragCoord.xy), 0);
+    return uInput.read(uint2(gl_FragCoord.xy));
 }
 
 fragment main0_out main0(texture2d<float> uSubpass0 [[texture(0)]], texture2d<float> uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
-    out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), 0) + load_subpasses(uSubpass1, gl_FragCoord);
+    out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy)) + load_subpasses(uSubpass1, gl_FragCoord);
     return out;
 }
 
diff --git a/reference/shaders-msl/frag/input-attachment.multiview.frag b/reference/shaders-msl/frag/input-attachment.multiview.frag
new file mode 100644
index 00000000000..6ba2421659f
--- /dev/null
+++ b/reference/shaders-msl/frag/input-attachment.multiview.frag
@@ -0,0 +1,26 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 load_subpasses(texture2d_array<float> uInput, thread float4& gl_FragCoord, thread uint& gl_ViewIndex)
+{
+    return uInput.read(uint2(gl_FragCoord.xy), gl_ViewIndex);
+}
+
+fragment main0_out main0(constant uint* spvViewMask [[buffer(24)]], texture2d_array<float> uSubpass0 [[texture(0)]], texture2d_array<float> uSubpass1 [[texture(1)]], float4 gl_FragCoord [[position]], uint gl_ViewIndex [[render_target_array_index]])
+{
+    main0_out out = {};
+    gl_ViewIndex += spvViewMask[0];
+    out.FragColor = uSubpass0.read(uint2(gl_FragCoord.xy), gl_ViewIndex) + load_subpasses(uSubpass1, gl_FragCoord, gl_ViewIndex);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/interpolation-qualifiers-block.frag b/reference/shaders-msl/frag/interpolation-qualifiers-block.frag
index 2b420195ffc..6148b8915c1 100644
--- a/reference/shaders-msl/frag/interpolation-qualifiers-block.frag
+++ b/reference/shaders-msl/frag/interpolation-qualifiers-block.frag
@@ -21,26 +21,26 @@ struct main0_out
 
 struct main0_in
 {
-    float2 Input_v0 [[user(locn0), centroid_no_perspective]];
-    float2 Input_v1 [[user(locn1), centroid_no_perspective]];
-    float3 Input_v2 [[user(locn2), centroid_no_perspective]];
-    float4 Input_v3 [[user(locn3), centroid_no_perspective]];
-    float Input_v4 [[user(locn4), centroid_no_perspective]];
-    float Input_v5 [[user(locn5), centroid_no_perspective]];
-    float Input_v6 [[user(locn6), centroid_no_perspective]];
+    float2 inp_v0 [[user(locn0), centroid_no_perspective]];
+    float2 inp_v1 [[user(locn1), centroid_no_perspective]];
+    float3 inp_v2 [[user(locn2), centroid_no_perspective]];
+    float4 inp_v3 [[user(locn3), centroid_no_perspective]];
+    float inp_v4 [[user(locn4), centroid_no_perspective]];
+    float inp_v5 [[user(locn5), centroid_no_perspective]];
+    float inp_v6 [[user(locn6), centroid_no_perspective]];
 };
 
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
     Input inp = {};
-    inp.v0 = in.Input_v0;
-    inp.v1 = in.Input_v1;
-    inp.v2 = in.Input_v2;
-    inp.v3 = in.Input_v3;
-    inp.v4 = in.Input_v4;
-    inp.v5 = in.Input_v5;
-    inp.v6 = in.Input_v6;
+    inp.v0 = in.inp_v0;
+    inp.v1 = in.inp_v1;
+    inp.v2 = in.inp_v2;
+    inp.v3 = in.inp_v3;
+    inp.v4 = in.inp_v4;
+    inp.v5 = in.inp_v5;
+    inp.v6 = in.inp_v6;
     out.FragColor = float4(inp.v0.x + inp.v1.y, inp.v2.xy, ((inp.v3.w * inp.v4) + inp.v5) - inp.v6);
     return out;
 }
diff --git a/reference/shaders-msl/frag/lut-promotion.frag b/reference/shaders-msl/frag/lut-promotion.frag
index f7e51edb84c..b1e0e7311dc 100644
--- a/reference/shaders-msl/frag/lut-promotion.frag
+++ b/reference/shaders-msl/frag/lut-promotion.frag
@@ -1,13 +1,52 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
-constant float _16[16] = { 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 };
-constant float4 _60[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) };
-constant float4 _104[4] = { float4(20.0), float4(30.0), float4(50.0), float4(60.0) };
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float, 16> _16 = spvUnsafeArray<float, 16>({ 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0 });
+constant spvUnsafeArray<float4, 4> _60 = spvUnsafeArray<float4, 4>({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) });
+constant spvUnsafeArray<float4, 4> _104 = spvUnsafeArray<float4, 4>({ float4(20.0), float4(30.0), float4(50.0), float4(60.0) });
 
 struct main0_out
 {
@@ -19,19 +58,6 @@ struct main0_in
     int index [[user(locn0)]];
 };
 
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
@@ -52,14 +78,14 @@ fragment main0_out main0(main0_in in [[stage_in]])
     {
         out.FragColor += _60[in.index & 1].x;
     }
-    float4 foobar[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) };
+    spvUnsafeArray<float4, 4> foobar = spvUnsafeArray<float4, 4>({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) });
     if (in.index > 30)
     {
         foobar[1].z = 20.0;
     }
     out.FragColor += foobar[in.index & 3].z;
-    float4 baz[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) };
-    spvArrayCopyFromConstant1(baz, _104);
+    spvUnsafeArray<float4, 4> baz = spvUnsafeArray<float4, 4>({ float4(0.0), float4(1.0), float4(8.0), float4(5.0) });
+    baz = _104;
     out.FragColor += baz[in.index & 3].z;
     return out;
 }
diff --git a/reference/shaders-msl/frag/mix.frag b/reference/shaders-msl/frag/mix.frag
index ad7c5adeeb9..ee28bf92616 100644
--- a/reference/shaders-msl/frag/mix.frag
+++ b/reference/shaders-msl/frag/mix.frag
@@ -20,11 +20,10 @@ fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
     bool4 l = bool4(false, true, false, false);
-    out.FragColor = float4(l.x ? in.vIn1.x : in.vIn0.x, l.y ? in.vIn1.y : in.vIn0.y, l.z ? in.vIn1.z : in.vIn0.z, l.w ? in.vIn1.w : in.vIn0.w);
+    out.FragColor = select(in.vIn0, in.vIn1, l);
     bool f = true;
     out.FragColor = float4(f ? in.vIn3 : in.vIn2);
-    bool4 _37 = bool4(f);
-    out.FragColor = float4(_37.x ? in.vIn0.x : in.vIn1.x, _37.y ? in.vIn0.y : in.vIn1.y, _37.z ? in.vIn0.z : in.vIn1.z, _37.w ? in.vIn0.w : in.vIn1.w);
+    out.FragColor = select(in.vIn1, in.vIn0, bool4(f));
     out.FragColor = float4(f ? in.vIn2 : in.vIn3);
     return out;
 }
diff --git a/reference/shaders-msl/frag/modf-access-tracking-function.frag b/reference/shaders-msl/frag/modf-access-tracking-function.frag
new file mode 100644
index 00000000000..934561e809f
--- /dev/null
+++ b/reference/shaders-msl/frag/modf-access-tracking-function.frag
@@ -0,0 +1,33 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 vo0 [[color(0)]];
+    float4 vo1 [[color(1)]];
+};
+
+struct main0_in
+{
+    float4 v [[user(locn0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 modf_inner(thread float4& v, thread float4& vo1)
+{
+    float4 _16 = modf(v, vo1);
+    return _16;
+}
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float4 _20 = modf_inner(in.v, out.vo1);
+    out.vo0 = _20;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/mrt-array.frag b/reference/shaders-msl/frag/mrt-array.frag
index daf7edb4ccf..bc268284417 100644
--- a/reference/shaders-msl/frag/mrt-array.frag
+++ b/reference/shaders-msl/frag/mrt-array.frag
@@ -1,10 +1,56 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
+template<typename Tx, typename Ty>
+inline Tx mod(Tx x, Ty y)
+{
+    return x - y * floor(x / y);
+}
+
 struct main0_out
 {
     float4 FragColor_0 [[color(0)]];
@@ -19,19 +65,14 @@ struct main0_in
     float4 vB [[user(locn1)]];
 };
 
-// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
-template<typename Tx, typename Ty>
-Tx mod(Tx x, Ty y)
-{
-    return x - y * floor(x / y);
-}
-
-void write_deeper_in_function(thread float4 (&FragColor)[4], thread float4& vA, thread float4& vB)
+static inline __attribute__((always_inline))
+void write_deeper_in_function(thread spvUnsafeArray<float4, 4>& FragColor, thread float4& vA, thread float4& vB)
 {
     FragColor[3] = vA * vB;
 }
 
-void write_in_function(thread float4 (&FragColor)[4], thread float4& vA, thread float4& vB)
+static inline __attribute__((always_inline))
+void write_in_function(thread spvUnsafeArray<float4, 4>& FragColor, thread float4& vA, thread float4& vB)
 {
     FragColor[2] = vA - vB;
     write_deeper_in_function(FragColor, vA, vB);
@@ -40,7 +81,7 @@ void write_in_function(thread float4 (&FragColor)[4], thread float4& vA, thread
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    float4 FragColor[4] = {};
+    spvUnsafeArray<float4, 4> FragColor = {};
     FragColor[0] = mod(in.vA, in.vB);
     FragColor[1] = in.vA + in.vB;
     write_in_function(FragColor, in.vA, in.vB);
diff --git a/reference/shaders-msl/frag/nonuniform-qualifier.msl2.frag b/reference/shaders-msl/frag/nonuniform-qualifier.msl2.frag
index 34f2bde1096..401703cbafa 100644
--- a/reference/shaders-msl/frag/nonuniform-qualifier.msl2.frag
+++ b/reference/shaders-msl/frag/nonuniform-qualifier.msl2.frag
@@ -40,12 +40,17 @@ fragment main0_out main0(main0_in in [[stage_in]], constant UBO* ubos_0 [[buffer
 
     main0_out out = {};
     int i = in.vIndex;
-    int _24 = i + 10;
-    out.FragColor = uSamplers[_24].sample(uSamps[i + 40], in.vUV);
-    int _50 = i + 10;
-    out.FragColor = uCombinedSamplers[_50].sample(uCombinedSamplersSmplr[_50], in.vUV);
-    out.FragColor += ubos[(i + 20)]->v[i + 40];
-    out.FragColor += ssbos[(i + 50)]->v[i + 60];
+    int _25 = i + 10;
+    int _37 = i + 40;
+    out.FragColor = uSamplers[_25].sample(uSamps[_37], in.vUV);
+    int _53 = i + 10;
+    out.FragColor = uCombinedSamplers[_53].sample(uCombinedSamplersSmplr[_53], in.vUV);
+    int _69 = i + 20;
+    int _73 = i + 40;
+    out.FragColor += ubos[_69]->v[_73];
+    int _87 = i + 50;
+    int _91 = i + 60;
+    out.FragColor += ssbos[_87]->v[_91];
     return out;
 }
 
diff --git a/reference/shaders-msl/frag/packed-expression-vector-shuffle.frag b/reference/shaders-msl/frag/packed-expression-vector-shuffle.frag
index dc8947425a7..2ed369353fb 100644
--- a/reference/shaders-msl/frag/packed-expression-vector-shuffle.frag
+++ b/reference/shaders-msl/frag/packed-expression-vector-shuffle.frag
@@ -18,7 +18,9 @@ fragment main0_out main0(constant UBO& _15 [[buffer(0)]])
 {
     main0_out out = {};
     float4 f = float4(1.0);
-    f = float4(_15.color[0], _15.color[1], _15.color[2], f.w);
+    f.x = _15.color[0];
+    f.y = _15.color[1];
+    f.z = _15.color[2];
     out.FragColor = f;
     return out;
 }
diff --git a/reference/shaders-msl/frag/packing-test-3.frag b/reference/shaders-msl/frag/packing-test-3.frag
index d8310d6ac76..f82d8a57970 100644
--- a/reference/shaders-msl/frag/packing-test-3.frag
+++ b/reference/shaders-msl/frag/packing-test-3.frag
@@ -32,23 +32,24 @@ struct main0_out
     float4 _entryPointOutput [[color(0)]];
 };
 
-float4 _main(thread const VertexOutput& IN, constant CB0& v_26)
+static inline __attribute__((always_inline))
+float4 _main(thread const VertexOutput& IN, constant CB0& _RESERVED_IDENTIFIER_FIXUP_24)
 {
     TestStruct st;
-    st.position = float3(v_26.CB0[1].position);
-    st.radius = v_26.CB0[1].radius;
+    st.position = float3(_RESERVED_IDENTIFIER_FIXUP_24.CB0[1].position);
+    st.radius = _RESERVED_IDENTIFIER_FIXUP_24.CB0[1].radius;
     float4 col = float4(st.position, st.radius);
     return col;
 }
 
-fragment main0_out main0(constant CB0& v_26 [[buffer(0)]], float4 gl_FragCoord [[position]])
+fragment main0_out main0(constant CB0& _RESERVED_IDENTIFIER_FIXUP_24 [[buffer(0)]], float4 gl_FragCoord [[position]])
 {
     main0_out out = {};
     VertexOutput IN;
     IN.HPosition = gl_FragCoord;
     VertexOutput param = IN;
     VertexOutput param_1 = param;
-    out._entryPointOutput = _main(param_1, v_26);
+    out._entryPointOutput = _main(param_1, _RESERVED_IDENTIFIER_FIXUP_24);
     return out;
 }
 
diff --git a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
new file mode 100644
index 00000000000..1bfaff53bf8
--- /dev/null
+++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
@@ -0,0 +1,53 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+// The required alignment of a linear texture of R32Uint format.
+constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
+constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
+// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
+#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() +  spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
+
+struct Buffer3
+{
+    int baz;
+};
+
+struct Buffer
+{
+    int foo;
+    uint bar;
+};
+
+struct Buffer2
+{
+    uint quux;
+};
+
+struct spvDescriptorSetBuffer0
+{
+    device Buffer3* m_9 [[id(0)]];
+    texture2d<float, access::write> img4 [[id(1)]];
+    texture2d<float, access::write> img [[id(2), raster_order_group(0)]];
+    texture2d<float> img3 [[id(3), raster_order_group(0)]];
+    texture2d<uint> img2 [[id(4), raster_order_group(0)]];
+    device atomic_uint* img2_atomic [[id(5), raster_order_group(0)]];
+    volatile device Buffer* m_42 [[id(6), raster_order_group(0)]];
+    device Buffer2* m_52 [[id(7), raster_order_group(0)]];
+};
+
+fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]])
+{
+    (*spvDescriptorSet0.m_9).baz = 0;
+    spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+    spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0)));
+    uint _39 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.img2_atomic[spvImage2DAtomicCoord(int2(0), spvDescriptorSet0.img2)], 1u, memory_order_relaxed);
+    (*spvDescriptorSet0.m_42).foo += 42;
+    uint _55 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_42).bar, (*spvDescriptorSet0.m_52).quux, memory_order_relaxed);
+}
+
diff --git a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
new file mode 100644
index 00000000000..6a300e8c589
--- /dev/null
+++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
@@ -0,0 +1,41 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+// The required alignment of a linear texture of R32Uint format.
+constant uint spvLinearTextureAlignmentOverride [[function_constant(65535)]];
+constant uint spvLinearTextureAlignment = is_function_constant_defined(spvLinearTextureAlignmentOverride) ? spvLinearTextureAlignmentOverride : 4;
+// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics
+#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() +  spvLinearTextureAlignment / 4 - 1) & ~( spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)
+
+struct Buffer3
+{
+    int baz;
+};
+
+struct Buffer
+{
+    int foo;
+    uint bar;
+};
+
+struct Buffer2
+{
+    uint quux;
+};
+
+fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _42 [[buffer(2), raster_order_group(0)]], device Buffer2& _52 [[buffer(3), raster_order_group(0)]], texture2d<float, access::write> img4 [[texture(0)]], texture2d<float, access::write> img [[texture(1), raster_order_group(0)]], texture2d<float> img3 [[texture(2), raster_order_group(0)]], texture2d<uint> img2 [[texture(3), raster_order_group(0)]], device atomic_uint* img2_atomic [[buffer(1), raster_order_group(0)]])
+{
+    _9.baz = 0;
+    img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+    img.write(img3.read(uint2(int2(0))), uint2(int2(0)));
+    uint _39 = atomic_fetch_add_explicit((device atomic_uint*)&img2_atomic[spvImage2DAtomicCoord(int2(0), img2)], 1u, memory_order_relaxed);
+    _42.foo += 42;
+    uint _55 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_42.bar, _52.quux, memory_order_relaxed);
+}
+
diff --git a/reference/shaders-msl/frag/post-depth-coverage.ios.msl2.frag b/reference/shaders-msl/frag/post-depth-coverage.ios.msl2.frag
new file mode 100644
index 00000000000..3b2885e2e2a
--- /dev/null
+++ b/reference/shaders-msl/frag/post-depth-coverage.ios.msl2.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+[[ early_fragment_tests ]] fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask, post_depth_coverage]])
+{
+    main0_out out = {};
+    out.FragColor = float4(float(gl_SampleMaskIn));
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/post-depth-coverage.msl23.frag b/reference/shaders-msl/frag/post-depth-coverage.msl23.frag
new file mode 100644
index 00000000000..3b2885e2e2a
--- /dev/null
+++ b/reference/shaders-msl/frag/post-depth-coverage.msl23.frag
@@ -0,0 +1,17 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+[[ early_fragment_tests ]] fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask, post_depth_coverage]])
+{
+    main0_out out = {};
+    out.FragColor = float4(float(gl_SampleMaskIn));
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/private-variable-prototype-declaration.frag b/reference/shaders-msl/frag/private-variable-prototype-declaration.frag
index d014623bce1..7c11a937a21 100644
--- a/reference/shaders-msl/frag/private-variable-prototype-declaration.frag
+++ b/reference/shaders-msl/frag/private-variable-prototype-declaration.frag
@@ -15,11 +15,13 @@ struct main0_out
     float3 FragColor [[color(0)]];
 };
 
+static inline __attribute__((always_inline))
 void someFunction(thread AStruct& s)
 {
     s.foobar = float4(1.0);
 }
 
+static inline __attribute__((always_inline))
 void otherFunction(thread float3& global_variable)
 {
     global_variable = float3(1.0);
diff --git a/reference/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag b/reference/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag
new file mode 100644
index 00000000000..3ab6a47161b
--- /dev/null
+++ b/reference/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag
@@ -0,0 +1,60 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#if __METAL_VERSION__ >= 230
+#include <metal_raytracing>
+using namespace metal::raytracing;
+#endif
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 outColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 inPos [[user(locn0)]];
+};
+
+static inline __attribute__((always_inline))
+uint doRay(thread const float3& rayOrigin, thread const float3& rayDirection, thread const float& rayDistance, thread raytracing::intersection_query<raytracing::instancing, raytracing::triangle_data>& rayQuery, thread const raytracing::acceleration_structure<raytracing::instancing>& topLevelAS)
+{
+    rayQuery.reset(ray(rayOrigin, rayDirection, 0.001000000047497451305389404296875, rayDistance), topLevelAS, intersection_params());
+    for (;;)
+    {
+        bool _36 = rayQuery.next();
+        if (_36)
+        {
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+    uint _40 = uint(rayQuery.get_committed_intersection_type());
+    return _40;
+}
+
+fragment main0_out main0(main0_in in [[stage_in]], raytracing::acceleration_structure<raytracing::instancing> topLevelAS [[buffer(0)]])
+{
+    main0_out out = {};
+    float3 rayOrigin = float3((in.inPos.xy * 4.0) - float2(2.0), 1.0);
+    float3 rayDirection = float3(0.0, 0.0, -1.0);
+    float rayDistance = 2.0;
+    float3 param = rayOrigin;
+    float3 param_1 = rayDirection;
+    float param_2 = rayDistance;
+    raytracing::intersection_query<raytracing::instancing, raytracing::triangle_data> rayQuery;
+    uint _70 = doRay(param, param_1, param_2, rayQuery, topLevelAS);
+    if (_70 == 0u)
+    {
+        discard_fragment();
+    }
+    out.outColor = in.inPos;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/read-cull-clip-distance-in-function.frag b/reference/shaders-msl/frag/read-cull-clip-distance-in-function.frag
new file mode 100644
index 00000000000..02d57d9beea
--- /dev/null
+++ b/reference/shaders-msl/frag/read-cull-clip-distance-in-function.frag
@@ -0,0 +1,78 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+    float gl_CullDistance_0 [[user(cull0)]];
+    float gl_CullDistance_1 [[user(cull1)]];
+};
+
+static inline __attribute__((always_inline))
+float4 read_in_func(thread spvUnsafeArray<float, 2>& gl_CullDistance, thread spvUnsafeArray<float, 2>& gl_ClipDistance)
+{
+    return float4(gl_CullDistance[0], gl_CullDistance[1], gl_ClipDistance[0], gl_ClipDistance[1]);
+}
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 2> gl_CullDistance = {};
+    spvUnsafeArray<float, 2> gl_ClipDistance = {};
+    gl_CullDistance[0] = in.gl_CullDistance_0;
+    gl_CullDistance[1] = in.gl_CullDistance_1;
+    gl_ClipDistance[0] = in.gl_ClipDistance_0;
+    gl_ClipDistance[1] = in.gl_ClipDistance_1;
+    out.FragColor = read_in_func(gl_CullDistance, gl_ClipDistance);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/readonly-ssbo.frag b/reference/shaders-msl/frag/readonly-ssbo.frag
index 771c225d6ee..7d73da79b2f 100644
--- a/reference/shaders-msl/frag/readonly-ssbo.frag
+++ b/reference/shaders-msl/frag/readonly-ssbo.frag
@@ -15,6 +15,7 @@ struct main0_out
     float4 FragColor [[color(0)]];
 };
 
+static inline __attribute__((always_inline))
 float4 read_from_function(const device SSBO& v_13)
 {
     return v_13.v;
diff --git a/reference/shaders-msl/frag/return-value-after-discard-terminator.frag b/reference/shaders-msl/frag/return-value-after-discard-terminator.frag
new file mode 100644
index 00000000000..d8895e0e7d2
--- /dev/null
+++ b/reference/shaders-msl/frag/return-value-after-discard-terminator.frag
@@ -0,0 +1,26 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct buff_t
+{
+    int m0[1024];
+};
+
+struct main0_out
+{
+    float4 frag_clr [[color(0)]];
+};
+
+fragment main0_out main0(device buff_t& buff [[buffer(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    int2 frag_coord = int2(int4(gl_FragCoord).xy);
+    int buff_idx = (frag_coord.y * 32) + frag_coord.x;
+    out.frag_clr = float4(0.0, 0.0, 1.0, 1.0);
+    buff.m0[buff_idx] = 1;
+    discard_fragment();
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag b/reference/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag
new file mode 100644
index 00000000000..f0bf396c50b
--- /dev/null
+++ b/reference/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag
@@ -0,0 +1,43 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vUV [[user(locn0)]];
+};
+
+static inline __attribute__((always_inline))
+float sample_normal2(depth2d<float> tex, sampler uSampler, thread float3& vUV)
+{
+    return float4(tex.sample(uSampler, vUV.xy)).x;
+}
+
+static inline __attribute__((always_inline))
+float sample_normal(depth2d<float> tex, sampler uSampler, thread float3& vUV)
+{
+    return sample_normal2(tex, uSampler, vUV);
+}
+
+static inline __attribute__((always_inline))
+float sample_comp(depth2d<float> tex, thread float3& vUV, sampler uSamplerShadow)
+{
+    return tex.sample_compare(uSamplerShadow, vUV.xy, vUV.z);
+}
+
+fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uTexture [[texture(0)]], sampler uSampler [[sampler(0)]], sampler uSamplerShadow [[sampler(1)]])
+{
+    main0_out out = {};
+    out.FragColor = sample_normal(uTexture, uSampler, in.vUV);
+    out.FragColor += sample_comp(uTexture, in.vUV, uSamplerShadow);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag b/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag
index a9c0f8b41bb..27653a06a43 100644
--- a/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag
+++ b/reference/shaders-msl/frag/sample-depth-separate-image-sampler.frag
@@ -10,12 +10,14 @@ struct main0_out
     float FragColor [[color(0)]];
 };
 
-float sample_depth_from_function(thread const depth2d<float> uT, thread const sampler uS)
+static inline __attribute__((always_inline))
+float sample_depth_from_function(depth2d<float> uT, sampler uS)
 {
-    return uT.sample_compare(uS, float3(0.5).xy, float3(0.5).z);
+    return uT.sample_compare(uS, float3(0.5).xy, 0.5);
 }
 
-float sample_color_from_function(thread const texture2d<float> uT, thread const sampler uS)
+static inline __attribute__((always_inline))
+float sample_color_from_function(texture2d<float> uT, sampler uS)
 {
     return uT.sample(uS, float2(0.5)).x;
 }
diff --git a/reference/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag b/reference/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag
new file mode 100644
index 00000000000..626fe4c79c2
--- /dev/null
+++ b/reference/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+    uint gl_SampleMask [[sample_mask]];
+};
+
+fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    out.FragColor = float4(1.0);
+    out.gl_SampleMask = (gl_SampleMaskIn & 0x22 & (1 << gl_SampleID));
+    out.gl_SampleMask &= 0x22;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag b/reference/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag
new file mode 100644
index 00000000000..f478901b6be
--- /dev/null
+++ b/reference/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+    uint gl_SampleMask [[sample_mask]];
+};
+
+fragment main0_out main0(uint gl_SampleMaskIn [[sample_mask]])
+{
+    main0_out out = {};
+    out.FragColor = float4(1.0);
+    out.gl_SampleMask = (gl_SampleMaskIn & 0x22);
+    out.gl_SampleMask &= 0x22;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag b/reference/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag
new file mode 100644
index 00000000000..d04f2033bb5
--- /dev/null
+++ b/reference/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+    uint gl_SampleMask [[sample_mask]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float4(1.0);
+    out.gl_SampleMask = 0x22;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sample-mask.fixed-sample-mask.frag b/reference/shaders-msl/frag/sample-mask.fixed-sample-mask.frag
new file mode 100644
index 00000000000..76306b5ade8
--- /dev/null
+++ b/reference/shaders-msl/frag/sample-mask.fixed-sample-mask.frag
@@ -0,0 +1,20 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+    uint gl_SampleMask [[sample_mask]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float4(1.0);
+    out.gl_SampleMask = 0;
+    out.gl_SampleMask &= 0x22;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sample-position-func.frag b/reference/shaders-msl/frag/sample-position-func.frag
index 06fa53063ac..025f874d266 100644
--- a/reference/shaders-msl/frag/sample-position-func.frag
+++ b/reference/shaders-msl/frag/sample-position-func.frag
@@ -15,6 +15,7 @@ struct main0_in
     int index [[user(locn0)]];
 };
 
+static inline __attribute__((always_inline))
 float4 getColor(thread const int& i, thread float2& gl_SamplePosition)
 {
     return float4(gl_SamplePosition, float(i), 1.0);
diff --git a/reference/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag b/reference/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag
new file mode 100644
index 00000000000..5df60f909e5
--- /dev/null
+++ b/reference/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d_array<float> tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
+    float3 _28 = float3(gl_FragCoord.xy, float(gl_SampleID));
+    out.FragColor = tex.sample(texSmplr, _28.xy, uint(round(_28.z)));
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag b/reference/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag
new file mode 100644
index 00000000000..386230ef0c3
--- /dev/null
+++ b/reference/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float foo [[user(locn0), sample_perspective]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d_array<float> tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
+    float3 _26 = float3(gl_FragCoord.xy, in.foo);
+    out.FragColor = tex.sample(texSmplr, _26.xy, uint(round(_26.z)));
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag b/reference/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag
new file mode 100644
index 00000000000..f8f357fe7c9
--- /dev/null
+++ b/reference/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d<float> tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
+    float2 gl_SamplePosition = get_sample_position(gl_SampleID);
+    out.FragColor = tex.sample(texSmplr, (gl_FragCoord.xy - gl_SamplePosition));
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag b/reference/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag
new file mode 100644
index 00000000000..1ed8148d4c0
--- /dev/null
+++ b/reference/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d<float> tex [[texture(0)]], sampler texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleID [[sample_id]])
+{
+    main0_out out = {};
+    gl_FragCoord.xy += get_sample_position(gl_SampleID) - 0.5;
+    out.FragColor = tex.sample(texSmplr, gl_FragCoord.xy);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag b/reference/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag
new file mode 100644
index 00000000000..70278b12907
--- /dev/null
+++ b/reference/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float vTex [[user(locn0), flat]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor += ((uSampler.sample(uSamplerSmplr, float2(in.vTex, 0.5), bias(2.0)) + uSampler.sample(uSamplerSmplr, float2(in.vTex, 0.5), level(3.0))) + uSampler.sample(uSamplerSmplr, float2(in.vTex, 0.5), gradient2d(5.0, 8.0)));
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag b/reference/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag
new file mode 100644
index 00000000000..6aaffe532ba
--- /dev/null
+++ b/reference/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], depth2d_array<float> uTex [[texture(0)]], sampler uShadow [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = uTex.sample_compare(uShadow, float2(in.vUV.x, 0.5), uint(round(in.vUV.y)), in.vUV.z, bias(1.0));
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag b/reference/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag
new file mode 100644
index 00000000000..07845691942
--- /dev/null
+++ b/reference/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], depth2d_array<float> uTex [[texture(0)]], sampler uShadow [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = uTex.sample_compare(uShadow, in.vUV.xy, uint(round(in.vUV.z)), in.vUV.w, level(0)) + uTex.sample_compare(uShadow, in.vUV.xy, uint(round(in.vUV.z)), in.vUV.w, gradient2d(float2(1.0), float2(1.0)));
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag b/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag
index 10a7716261c..8d1934d4efc 100644
--- a/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag
+++ b/reference/shaders-msl/frag/sampler-image-arrays.msl2.frag
@@ -16,17 +16,20 @@ struct main0_in
     int vIndex [[user(locn1)]];
 };
 
-float4 sample_from_global(thread int& vIndex, thread float2& vTex, thread const array<texture2d<float>, 4> uSampler, thread const array<sampler, 4> uSamplerSmplr)
+static inline __attribute__((always_inline))
+float4 sample_from_global(thread int& vIndex, thread float2& vTex, thread const array<texture2d<float>, 4>& uSampler, thread const array<sampler, 4>& uSamplerSmplr)
 {
     return uSampler[vIndex].sample(uSamplerSmplr[vIndex], (vTex + float2(0.100000001490116119384765625)));
 }
 
-float4 sample_from_argument(thread const array<texture2d<float>, 4> samplers, thread const array<sampler, 4> samplersSmplr, thread int& vIndex, thread float2& vTex)
+static inline __attribute__((always_inline))
+float4 sample_from_argument(thread const array<texture2d<float>, 4>& samplers, thread const array<sampler, 4>& samplersSmplr, thread int& vIndex, thread float2& vTex)
 {
     return samplers[vIndex].sample(samplersSmplr[vIndex], (vTex + float2(0.20000000298023223876953125)));
 }
 
-float4 sample_single_from_argument(thread const texture2d<float> samp, thread const sampler sampSmplr, thread float2& vTex)
+static inline __attribute__((always_inline))
+float4 sample_single_from_argument(texture2d<float> samp, sampler sampSmplr, thread float2& vTex)
 {
     return samp.sample(sampSmplr, (vTex + float2(0.300000011920928955078125)));
 }
diff --git a/reference/shaders-msl/frag/sampler.frag b/reference/shaders-msl/frag/sampler.frag
index 395854699e3..84743fbd6e0 100644
--- a/reference/shaders-msl/frag/sampler.frag
+++ b/reference/shaders-msl/frag/sampler.frag
@@ -16,7 +16,8 @@ struct main0_in
     float2 vTex [[user(locn1)]];
 };
 
-float4 sample_texture(thread const texture2d<float> tex, thread const sampler texSmplr, thread const float2& uv)
+static inline __attribute__((always_inline))
+float4 sample_texture(texture2d<float> tex, sampler texSmplr, thread const float2& uv)
 {
     return tex.sample(texSmplr, uv);
 }
diff --git a/reference/shaders-msl/frag/scalar-refract-reflect.frag b/reference/shaders-msl/frag/scalar-refract-reflect.frag
index 592d445810f..fc908cb3e18 100644
--- a/reference/shaders-msl/frag/scalar-refract-reflect.frag
+++ b/reference/shaders-msl/frag/scalar-refract-reflect.frag
@@ -5,18 +5,8 @@
 
 using namespace metal;
 
-struct main0_out
-{
-    float FragColor [[color(0)]];
-};
-
-struct main0_in
-{
-    float3 vRefract [[user(locn0)]];
-};
-
 template<typename T>
-inline T spvReflect(T i, T n)
+[[clang::optnone]] T spvReflect(T i, T n)
 {
     return i - T(2) * i * n * n;
 }
@@ -37,6 +27,16 @@ inline T spvRefract(T i, T n, T eta)
     }
 }
 
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vRefract [[user(locn0)]];
+};
+
 fragment main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/frag/separate-image-sampler-argument.frag b/reference/shaders-msl/frag/separate-image-sampler-argument.frag
index 46c0524ab72..208f5d90951 100644
--- a/reference/shaders-msl/frag/separate-image-sampler-argument.frag
+++ b/reference/shaders-msl/frag/separate-image-sampler-argument.frag
@@ -10,7 +10,8 @@ struct main0_out
     float4 FragColor [[color(0)]];
 };
 
-float4 samp(thread const texture2d<float> t, thread const sampler s)
+static inline __attribute__((always_inline))
+float4 samp(texture2d<float> t, sampler s)
 {
     return t.sample(s, float2(0.5));
 }
diff --git a/reference/shaders-msl/frag/shader-arithmetic-8bit.frag b/reference/shaders-msl/frag/shader-arithmetic-8bit.frag
index 28ef0437268..e9694aa196c 100644
--- a/reference/shaders-msl/frag/shader-arithmetic-8bit.frag
+++ b/reference/shaders-msl/frag/shader-arithmetic-8bit.frag
@@ -34,9 +34,10 @@ struct main0_in
     int4 vColor [[user(locn0)]];
 };
 
+static inline __attribute__((always_inline))
 void packing_int8(device SSBO& ssbo)
 {
-    short i16 = 10;
+    short i16 = short(10);
     int i32 = 20;
     char2 i8_2 = as_type<char2>(i16);
     char4 i8_4 = as_type<char4>(i32);
@@ -48,9 +49,10 @@ void packing_int8(device SSBO& ssbo)
     ssbo.i8[3] = i8_4.w;
 }
 
+static inline __attribute__((always_inline))
 void packing_uint8(device SSBO& ssbo)
 {
-    ushort u16 = 10u;
+    ushort u16 = ushort(10);
     uint u32 = 20u;
     uchar2 u8_2 = as_type<uchar2>(u16);
     uchar4 u8_4 = as_type<uchar4>(u32);
@@ -62,6 +64,7 @@ void packing_uint8(device SSBO& ssbo)
     ssbo.u8[3] = u8_4.w;
 }
 
+static inline __attribute__((always_inline))
 void compute_int8(device SSBO& ssbo, thread int4& vColor, constant Push& registers, constant UBO& ubo, thread int4& FragColorInt)
 {
     char4 tmp = char4(vColor);
@@ -74,6 +77,7 @@ void compute_int8(device SSBO& ssbo, thread int4& vColor, constant Push& registe
     FragColorInt = int4(tmp);
 }
 
+static inline __attribute__((always_inline))
 void compute_uint8(device SSBO& ssbo, thread int4& vColor, constant Push& registers, constant UBO& ubo, thread uint4& FragColorUint)
 {
     uchar4 tmp = uchar4(char4(vColor));
diff --git a/reference/shaders-msl/frag/stencil-export.msl21.frag b/reference/shaders-msl/frag/stencil-export.msl21.frag
index eb85a2158d9..f3629e18beb 100644
--- a/reference/shaders-msl/frag/stencil-export.msl21.frag
+++ b/reference/shaders-msl/frag/stencil-export.msl21.frag
@@ -12,6 +12,7 @@ struct main0_out
     uint gl_FragStencilRefARB [[stencil]];
 };
 
+static inline __attribute__((always_inline))
 void update_stencil(thread uint& gl_FragStencilRefARB)
 {
     gl_FragStencilRefARB = uint(int(gl_FragStencilRefARB) + 10);
diff --git a/reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag b/reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag
new file mode 100644
index 00000000000..ec0911aed6e
--- /dev/null
+++ b/reference/shaders-msl/frag/subgroup-globals-extract.msl22.frag
@@ -0,0 +1,86 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    ballot &= mask;
+    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
+}
+
+inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    ballot &= mask;
+    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
+}
+
+inline uint spvPopCount4(uint4 ballot)
+{
+    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
+}
+
+inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    return spvPopCount4(ballot & mask);
+}
+
+struct main0_out
+{
+    uint2 FragColor [[color(0)]];
+};
+
+static inline __attribute__((always_inline))
+uint sub1(thread uint& gl_SubgroupSize)
+{
+    return spvSubgroupBallotFindLSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize);
+}
+
+static inline __attribute__((always_inline))
+uint sub2(thread uint& gl_SubgroupSize)
+{
+    return spvSubgroupBallotFindMSB(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize);
+}
+
+static inline __attribute__((always_inline))
+uint sub3(thread uint& gl_SubgroupSize)
+{
+    return spvSubgroupBallotBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupSize);
+}
+
+static inline __attribute__((always_inline))
+uint sub4(thread uint& gl_SubgroupInvocationID)
+{
+    return spvSubgroupBallotInclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID);
+}
+
+static inline __attribute__((always_inline))
+uint sub5(thread uint& gl_SubgroupInvocationID)
+{
+    return spvSubgroupBallotExclusiveBitCount(uint4(1u, 2u, 3u, 4u), gl_SubgroupInvocationID);
+}
+
+fragment main0_out main0(uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]], uint gl_SubgroupSize [[threads_per_simdgroup]])
+{
+    main0_out out = {};
+    out.FragColor.x = (((sub1(gl_SubgroupSize) + sub2(gl_SubgroupSize)) + sub3(gl_SubgroupSize)) + sub4(gl_SubgroupInvocationID)) + sub5(gl_SubgroupInvocationID);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/switch-unreachable-break.frag b/reference/shaders-msl/frag/switch-unreachable-break.frag
new file mode 100644
index 00000000000..8d7903b79b4
--- /dev/null
+++ b/reference/shaders-msl/frag/switch-unreachable-break.frag
@@ -0,0 +1,43 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    int cond;
+    int cond2;
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(constant UBO& _15 [[buffer(0)]])
+{
+    main0_out out = {};
+    out.FragColor = float4(10.0);
+    switch (_15.cond)
+    {
+        case 1:
+        {
+            if (_15.cond2 < 50)
+            {
+                break;
+            }
+            else
+            {
+                discard_fragment();
+            }
+            break; // unreachable workaround
+        }
+        default:
+        {
+            out.FragColor = float4(20.0);
+            break;
+        }
+    }
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag b/reference/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag
new file mode 100644
index 00000000000..98b9bb7ef80
--- /dev/null
+++ b/reference/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(texture2d<float> uTexture [[texture(0)]], texture2d<float> uTexture2 [[texture(1)]], sampler uTextureSmplr [[sampler(0)]], sampler uTexture2Smplr [[sampler(1)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    out.FragColor = uTexture.read(uint2(int2(gl_FragCoord.xy)) + uint2(int2(1)), 0);
+    out.FragColor += uTexture2.read(uint2(uint(int(gl_FragCoord.x)), 0) + uint2(uint(-1), 0), 0);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/texture-cube-array.frag b/reference/shaders-msl/frag/texture-cube-array.frag
new file mode 100644
index 00000000000..9f1832ec04b
--- /dev/null
+++ b/reference/shaders-msl/frag/texture-cube-array.frag
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texturecube<float> cubeSampler [[texture(0)]], texturecube_array<float> cubeArraySampler [[texture(1)]], texture2d_array<float> texArraySampler [[texture(2)]], sampler cubeSamplerSmplr [[sampler(0)]], sampler cubeArraySamplerSmplr [[sampler(1)]], sampler texArraySamplerSmplr [[sampler(2)]])
+{
+    main0_out out = {};
+    float4 a = cubeSampler.sample(cubeSamplerSmplr, in.vUV.xyz);
+    float4 b = cubeArraySampler.sample(cubeArraySamplerSmplr, in.vUV.xyz, uint(round(in.vUV.w)));
+    float4 c = texArraySampler.sample(texArraySamplerSmplr, in.vUV.xyz.xy, uint(round(in.vUV.xyz.z)));
+    out.FragColor = (a + b) + c;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag b/reference/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag
new file mode 100644
index 00000000000..02541f3ce8a
--- /dev/null
+++ b/reference/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag
@@ -0,0 +1,61 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+static inline __attribute__((always_inline))
+float3 spvCubemapTo2DArrayFace(float3 P)
+{
+    float3 Coords = abs(P.xyz);
+    float CubeFace = 0;
+    float ProjectionAxis = 0;
+    float u = 0;
+    float v = 0;
+    if (Coords.x >= Coords.y && Coords.x >= Coords.z)
+    {
+        CubeFace = P.x >= 0 ? 0 : 1;
+        ProjectionAxis = Coords.x;
+        u = P.x >= 0 ? -P.z : P.z;
+        v = -P.y;
+    }
+    else if (Coords.y >= Coords.x && Coords.y >= Coords.z)
+    {
+        CubeFace = P.y >= 0 ? 2 : 3;
+        ProjectionAxis = Coords.y;
+        u = P.x;
+        v = P.y >= 0 ? P.z : -P.z;
+    }
+    else
+    {
+        CubeFace = P.z >= 0 ? 4 : 5;
+        ProjectionAxis = Coords.z;
+        u = P.z >= 0 ? P.x : -P.x;
+        v = -P.y;
+    }
+    u = 0.5 * (u/ProjectionAxis + 1);
+    v = 0.5 * (v/ProjectionAxis + 1);
+    return float3(u, v, CubeFace);
+}
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vUV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texturecube<float> cubeSampler [[texture(0)]], texture2d_array<float> cubeArraySampler [[texture(1)]], texture2d_array<float> texArraySampler [[texture(2)]], sampler cubeSamplerSmplr [[sampler(0)]], sampler cubeArraySamplerSmplr [[sampler(1)]], sampler texArraySamplerSmplr [[sampler(2)]])
+{
+    main0_out out = {};
+    float4 a = cubeSampler.sample(cubeSamplerSmplr, in.vUV.xyz);
+    float4 b = cubeArraySampler.sample(cubeArraySamplerSmplr, spvCubemapTo2DArrayFace(in.vUV.xyz).xy, uint(spvCubemapTo2DArrayFace(in.vUV.xyz).z) + (uint(round(in.vUV.w)) * 6u));
+    float4 c = texArraySampler.sample(texArraySamplerSmplr, in.vUV.xyz.xy, uint(round(in.vUV.xyz.z)));
+    out.FragColor = (a + b) + c;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/texture-proj-shadow.frag b/reference/shaders-msl/frag/texture-proj-shadow.frag
index 52d4a026d2d..1ef450a2b32 100644
--- a/reference/shaders-msl/frag/texture-proj-shadow.frag
+++ b/reference/shaders-msl/frag/texture-proj-shadow.frag
@@ -18,9 +18,10 @@ struct main0_in
 fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> uShadow2D [[texture(0)]], texture1d<float> uSampler1D [[texture(1)]], texture2d<float> uSampler2D [[texture(2)]], texture3d<float> uSampler3D [[texture(3)]], sampler uShadow2DSmplr [[sampler(0)]], sampler uSampler1DSmplr [[sampler(1)]], sampler uSampler2DSmplr [[sampler(2)]], sampler uSampler3DSmplr [[sampler(3)]])
 {
     main0_out out = {};
-    float4 _20 = in.vClip4;
-    _20.z = in.vClip4.w;
-    out.FragColor = uShadow2D.sample_compare(uShadow2DSmplr, _20.xy / _20.z, in.vClip4.z / _20.z);
+    float4 _17 = in.vClip4;
+    float4 _20 = _17;
+    _20.z = _17.w;
+    out.FragColor = uShadow2D.sample_compare(uShadow2DSmplr, _20.xy / _20.z, _17.z / _20.z);
     out.FragColor = uSampler1D.sample(uSampler1DSmplr, in.vClip2.x / in.vClip2.y).x;
     out.FragColor = uSampler2D.sample(uSampler2DSmplr, in.vClip3.xy / in.vClip3.z).x;
     out.FragColor = uSampler3D.sample(uSampler3DSmplr, in.vClip4.xyz / in.vClip4.w).x;
diff --git a/reference/shaders-msl/frag/ubo_layout.frag b/reference/shaders-msl/frag/ubo_layout.frag
index 0bc27462b2e..4ca603d6431 100644
--- a/reference/shaders-msl/frag/ubo_layout.frag
+++ b/reference/shaders-msl/frag/ubo_layout.frag
@@ -31,7 +31,7 @@ struct main0_out
 fragment main0_out main0(constant UBO1& ubo1 [[buffer(0)]], constant UBO2& ubo0 [[buffer(1)]])
 {
     main0_out out = {};
-    out.FragColor = transpose(ubo1.foo.foo)[0] + ubo0.foo.foo[0];
+    out.FragColor = float4(ubo1.foo.foo[0][0], ubo1.foo.foo[1][0], ubo1.foo.foo[2][0], ubo1.foo.foo[3][0]) + ubo0.foo.foo[0];
     return out;
 }
 
diff --git a/reference/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag b/reference/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag
new file mode 100644
index 00000000000..1cb7aa70328
--- /dev/null
+++ b/reference/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag
@@ -0,0 +1,75 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    ushort2 a [[user(locn0)]];
+    uint3 b [[user(locn1)]];
+    ushort c_0 [[user(locn2)]];
+    ushort c_1 [[user(locn3)]];
+    uint4 e_0 [[user(locn4)]];
+    uint4 e_1 [[user(locn5)]];
+    float4 d [[user(locn6)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<ushort, 2> c = {};
+    spvUnsafeArray<uint4, 2> e = {};
+    c[0] = in.c_0;
+    c[1] = in.c_1;
+    e[0] = in.e_0;
+    e[1] = in.e_1;
+    out.FragColor = float4(float(int(short(in.a.x))), float(int(in.b.x)), float2(float(uint(c[1])), float(e[0].w)) + in.d.xy);
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/write-depth-in-function.frag b/reference/shaders-msl/frag/write-depth-in-function.frag
index 6837a9b3ea2..c3ab221fc32 100644
--- a/reference/shaders-msl/frag/write-depth-in-function.frag
+++ b/reference/shaders-msl/frag/write-depth-in-function.frag
@@ -11,6 +11,7 @@ struct main0_out
     float gl_FragDepth [[depth(any)]];
 };
 
+static inline __attribute__((always_inline))
 void set_output_depth(thread float& gl_FragDepth)
 {
     gl_FragDepth = 0.20000000298023223876953125;
diff --git a/reference/shaders-msl/intel/shader-integer-functions2.asm.comp b/reference/shaders-msl/intel/shader-integer-functions2.asm.comp
new file mode 100644
index 00000000000..1e5d889d462
--- /dev/null
+++ b/reference/shaders-msl/intel/shader-integer-functions2.asm.comp
@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct foo
+{
+    uint a;
+    uint b;
+    int c;
+    int d;
+};
+
+kernel void main0(device foo& _4 [[buffer(0)]])
+{
+    _4.a = clz(_4.a);
+    _4.a = ctz(_4.a);
+    _4.a = absdiff(_4.c, _4.d);
+    _4.a = absdiff(_4.a, _4.b);
+    _4.c = addsat(_4.c, _4.d);
+    _4.a = addsat(_4.a, _4.b);
+    _4.c = hadd(_4.c, _4.d);
+    _4.a = hadd(_4.a, _4.b);
+    _4.c = rhadd(_4.c, _4.d);
+    _4.a = rhadd(_4.a, _4.b);
+    _4.c = subsat(_4.c, _4.d);
+    _4.a = subsat(_4.a, _4.b);
+    _4.c = int(short(_4.c)) * int(short(_4.d));
+    _4.a = uint(ushort(_4.a)) * uint(ushort(_4.b));
+}
+
diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc
new file mode 100644
index 00000000000..24928da01df
--- /dev/null
+++ b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc
@@ -0,0 +1,188 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    spvUnsafeArray<float4, 2> iFoo;
+    float4 ipFoo;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup float4 spvStorageFoo[8][4][2];
+    threadgroup float4 (&Foo)[4][2] = spvStorageFoo[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvArrayCopyFromDeviceToThreadGroup1(Foo[gl_InvocationID], gl_in[gl_InvocationID].iFoo.elements);
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc
new file mode 100644
index 00000000000..a08364e2b34
--- /dev/null
+++ b/reference/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc
@@ -0,0 +1,191 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    float4 iFoo_0 [[attribute(0)]];
+    float4 iFoo_1 [[attribute(1)]];
+    float4 ipFoo [[attribute(2)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    threadgroup float4 Foo[4][2];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
+    spvArrayCopyFromStackToThreadGroup1(Foo[gl_InvocationID], _38.elements);
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc
new file mode 100644
index 00000000000..abc95ca899e
--- /dev/null
+++ b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc
@@ -0,0 +1,79 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    spvUnsafeArray<float4, 2> Foo;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    spvUnsafeArray<float4, 2> iFoo;
+    float4 ipFoo;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    gl_out[gl_InvocationID].Foo = gl_in[gl_InvocationID].iFoo;
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc
new file mode 100644
index 00000000000..3da1d18c61d
--- /dev/null
+++ b/reference/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc
@@ -0,0 +1,83 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    spvUnsafeArray<float4, 2> Foo;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> pFoo;
+};
+
+struct main0_in
+{
+    float4 iFoo_0 [[attribute(0)]];
+    float4 iFoo_1 [[attribute(1)]];
+    float4 ipFoo [[attribute(2)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    spvUnsafeArray<float4, 2> _38 = spvUnsafeArray<float4, 2>({ gl_in[gl_InvocationID].iFoo_0, gl_in[gl_InvocationID].iFoo_1 });
+    gl_out[gl_InvocationID].Foo = _38;
+    if (gl_InvocationID == 0)
+    {
+        spvUnsafeArray<float4, 2> _56 = spvUnsafeArray<float4, 2>({ gl_in[0].ipFoo, gl_in[1].ipFoo });
+        patchOut.pFoo = _56;
+    }
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert
new file mode 100644
index 00000000000..2bf5c257d6b
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct V
+{
+    float4 a;
+    float4 b;
+    float4 c;
+    float4 d;
+};
+
+struct main0_out
+{
+    float4 m_22_b;
+    float4 m_22_c;
+    float4 m_22_d;
+    float4 gl_Position;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    V _22 = {};
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    out.gl_Position = float4(1.0);
+    _22.a = float4(2.0);
+    _22.b = float4(3.0);
+    out.m_22_b = _22.b;
+    out.m_22_c = _22.c;
+    out.m_22_d = _22.d;
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
new file mode 100644
index 00000000000..cc6364fcd7d
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
@@ -0,0 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float c_a;
+    float c_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float m_11_b;
+};
+
+static inline __attribute__((always_inline))
+void write_in_function(threadgroup P& _11, device main0_patchOut& patchOut, device main0_out* thread & gl_out, thread uint& gl_InvocationID)
+{
+    _11.a = 1.0;
+    patchOut.m_11_b = 2.0;
+    gl_out[gl_InvocationID].c_a = 3.0;
+    gl_out[gl_InvocationID].c_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup P _11;
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    write_in_function(_11, patchOut, gl_out, gl_InvocationID);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc
new file mode 100644
index 00000000000..7336d094c84
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc
@@ -0,0 +1,52 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float c_a;
+    float c_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float m_11_b;
+};
+
+static inline __attribute__((always_inline))
+void write_in_function(threadgroup P& _11, device main0_patchOut& patchOut, device main0_out* thread & gl_out, thread uint& gl_InvocationID)
+{
+    _11.a = 1.0;
+    patchOut.m_11_b = 2.0;
+    gl_out[gl_InvocationID].c_a = 3.0;
+    gl_out[gl_InvocationID].c_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup P spvStorage_11[8];
+    threadgroup P (&_11) = spvStorage_11[(gl_GlobalInvocationID.x / 4) % 8];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    write_in_function(_11, patchOut, gl_out, gl_InvocationID);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-0.vert b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.vert
new file mode 100644
index 00000000000..ad6079061ec
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-0.vert
@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct V
+{
+    float4 a;
+    float4 b;
+    float4 c;
+    float4 d;
+};
+
+struct main0_out
+{
+    float4 m_22_b [[user(locn1)]];
+    float4 m_22_c [[user(locn2)]];
+    float4 m_22_d [[user(locn3)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    V _22 = {};
+    out.gl_Position = float4(1.0);
+    _22.a = float4(2.0);
+    _22.b = float4(3.0);
+    out.m_22_b = _22.b;
+    out.m_22_c = _22.c;
+    out.m_22_d = _22.d;
+    return out;
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert
new file mode 100644
index 00000000000..2b535c312ef
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct V
+{
+    float4 a;
+    float4 b;
+    float4 c;
+    float4 d;
+};
+
+struct main0_out
+{
+    float4 m_22_a;
+    float4 m_22_c;
+    float4 m_22_d;
+    float4 gl_Position;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    V _22 = {};
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    out.gl_Position = float4(1.0);
+    _22.a = float4(2.0);
+    _22.b = float4(3.0);
+    out.m_22_a = _22.a;
+    out.m_22_c = _22.c;
+    out.m_22_d = _22.d;
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
new file mode 100644
index 00000000000..c3b54c7dff7
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
@@ -0,0 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float c_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float m_11_a;
+    float m_11_b;
+};
+
+static inline __attribute__((always_inline))
+void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], device main0_out* thread & gl_out, thread uint& gl_InvocationID)
+{
+    patchOut.m_11_a = 1.0;
+    patchOut.m_11_b = 2.0;
+    c[gl_InvocationID].a = 3.0;
+    gl_out[gl_InvocationID].c_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup C c[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    write_in_function(patchOut, c, gl_out, gl_InvocationID);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc
new file mode 100644
index 00000000000..a881a682a74
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc
@@ -0,0 +1,52 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct P
+{
+    float a;
+    float b;
+};
+
+struct C
+{
+    float a;
+    float b;
+};
+
+struct main0_out
+{
+    float c_b;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    float m_11_a;
+    float m_11_b;
+};
+
+static inline __attribute__((always_inline))
+void write_in_function(device main0_patchOut& patchOut, threadgroup C (&c)[4], device main0_out* thread & gl_out, thread uint& gl_InvocationID)
+{
+    patchOut.m_11_a = 1.0;
+    patchOut.m_11_b = 2.0;
+    c[gl_InvocationID].a = 3.0;
+    gl_out[gl_InvocationID].c_b = 4.0;
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup C spvStoragec[8][4];
+    threadgroup C (&c)[4] = spvStoragec[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    write_in_function(patchOut, c, gl_out, gl_InvocationID);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs-block.mask-location-1.vert b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.vert
new file mode 100644
index 00000000000..3b830290f7c
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs-block.mask-location-1.vert
@@ -0,0 +1,34 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct V
+{
+    float4 a;
+    float4 b;
+    float4 c;
+    float4 d;
+};
+
+struct main0_out
+{
+    float4 m_22_a [[user(locn0)]];
+    float4 m_22_c [[user(locn2)]];
+    float4 m_22_d [[user(locn3)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    V _22 = {};
+    out.gl_Position = float4(1.0);
+    _22.a = float4(2.0);
+    _22.b = float4(3.0);
+    out.m_22_a = _22.a;
+    out.m_22_c = _22.c;
+    out.m_22_d = _22.d;
+    return out;
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-clip-distance.vert b/reference/shaders-msl/masking/write-outputs.mask-clip-distance.vert
new file mode 100644
index 00000000000..2a0508361eb
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-clip-distance.vert
@@ -0,0 +1,73 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 v0 [[user(locn0)]];
+    float4 v1 [[user(locn1)]];
+    float4 gl_Position [[position]];
+    float gl_PointSize [[point_size]];
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(thread float4& v0, thread float4& v1, thread float4& gl_Position, thread float& gl_PointSize, thread spvUnsafeArray<float, 2>& gl_ClipDistance)
+{
+    v0 = float4(1.0);
+    v1 = float4(2.0);
+    gl_Position = float4(3.0);
+    gl_PointSize = 4.0;
+    gl_ClipDistance[0] = 1.0;
+    gl_ClipDistance[1] = 0.5;
+}
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 2> gl_ClipDistance = {};
+    write_in_func(out.v0, out.v1, out.gl_Position, out.gl_PointSize, gl_ClipDistance);
+    return out;
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert b/reference/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert
new file mode 100644
index 00000000000..53f76b575e7
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert
@@ -0,0 +1,74 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 v1;
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 2> gl_ClipDistance;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(thread float4& v0, device float4& v1, device float4& gl_Position, device float& gl_PointSize, device spvUnsafeArray<float, 2>& gl_ClipDistance)
+{
+    v0 = float4(1.0);
+    v1 = float4(2.0);
+    gl_Position = float4(3.0);
+    gl_PointSize = 4.0;
+    gl_ClipDistance[0] = 1.0;
+    gl_ClipDistance[1] = 0.5;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    float4 v0 = {};
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    write_in_func(v0, out.v1, out.gl_Position, out.gl_PointSize, out.gl_ClipDistance);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc
new file mode 100644
index 00000000000..e4f047d3e3f
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc
@@ -0,0 +1,42 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    float4 v1;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, device float4& v1, device main0_out* thread & gl_out)
+{
+    v0[gl_InvocationID] = float4(1.0);
+    v0[gl_InvocationID].x = 2.0;
+    if (gl_InvocationID == 0)
+    {
+        v1 = float4(2.0);
+        ((device float*)&v1)[3u] = 4.0;
+    }
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+    gl_out[gl_InvocationID].gl_Position.z = 5.0;
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup float4 v0[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    write_in_func(v0, gl_InvocationID, patchOut.v1, gl_out);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc
new file mode 100644
index 00000000000..7465cc64db9
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc
@@ -0,0 +1,87 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, device main0_out* thread & gl_out)
+{
+    v0[gl_InvocationID] = float4(1.0);
+    v0[gl_InvocationID].z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((device float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((device float*)&v1[1])[0u] = 5.0;
+    }
+    v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup float4 spvStoragev0[8][4];
+    threadgroup float4 (&v0)[4] = spvStoragev0[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    write_in_func(v0, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-0.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-0.tesc
new file mode 100644
index 00000000000..0ae265e9e7b
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-location-0.tesc
@@ -0,0 +1,84 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(threadgroup float4 (&v0)[4], thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, device main0_out* thread & gl_out)
+{
+    v0[gl_InvocationID] = float4(1.0);
+    v0[gl_InvocationID].z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((device float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((device float*)&v1[1])[0u] = 5.0;
+    }
+    v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup float4 v0[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    write_in_func(v0, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-0.vert b/reference/shaders-msl/masking/write-outputs.mask-location-0.vert
new file mode 100644
index 00000000000..12c111aef43
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-location-0.vert
@@ -0,0 +1,38 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v1 [[user(locn1)]];
+    float4 gl_Position [[position]];
+    float gl_PointSize [[point_size]];
+    float gl_ClipDistance [[clip_distance]] [2];
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(thread float4& v0, thread float4& v1, thread float4& gl_Position, thread float& gl_PointSize, thread float (&gl_ClipDistance)[2])
+{
+    v0 = float4(1.0);
+    v1 = float4(2.0);
+    gl_Position = float4(3.0);
+    gl_PointSize = 4.0;
+    gl_ClipDistance[0] = 1.0;
+    gl_ClipDistance[1] = 0.5;
+}
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    float4 v0 = {};
+    write_in_func(v0, out.v1, out.gl_Position, out.gl_PointSize, out.gl_ClipDistance);
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
+    return out;
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert b/reference/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert
new file mode 100644
index 00000000000..8f9cfce5362
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert
@@ -0,0 +1,74 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 2> gl_ClipDistance;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device float4& v0, thread float4& v1, device float4& gl_Position, device float& gl_PointSize, device spvUnsafeArray<float, 2>& gl_ClipDistance)
+{
+    v0 = float4(1.0);
+    v1 = float4(2.0);
+    gl_Position = float4(3.0);
+    gl_PointSize = 4.0;
+    gl_ClipDistance[0] = 1.0;
+    gl_ClipDistance[1] = 0.5;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    float4 v1 = {};
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    write_in_func(out.v0, v1, out.gl_Position, out.gl_PointSize, out.gl_ClipDistance);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc
new file mode 100644
index 00000000000..85160a9dee1
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc
@@ -0,0 +1,41 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+};
+static inline __attribute__((always_inline))
+void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup float4& v1)
+{
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.x = 2.0;
+    if (gl_InvocationID == 0)
+    {
+        v1 = float4(2.0);
+        ((threadgroup float*)&v1)[3u] = 4.0;
+    }
+    gl_out[gl_InvocationID].gl_Position = float4(3.0);
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+    gl_out[gl_InvocationID].gl_Position.z = 5.0;
+    gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup float4 v1;
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    write_in_func(gl_out, gl_InvocationID, v1);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc
new file mode 100644
index 00000000000..df057861eee
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc
@@ -0,0 +1,48 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    float4 v3;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup float4 (&v1)[2], device float4& v3)
+{
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((threadgroup float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((threadgroup float*)&v1[1])[0u] = 5.0;
+    }
+    v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    threadgroup float4 spvStoragev1[8][2];
+    threadgroup float4 (&v1)[2] = spvStoragev1[(gl_GlobalInvocationID.x / 4) % 8];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    write_in_func(gl_out, gl_InvocationID, v1, patchOut.v3);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-1.tesc b/reference/shaders-msl/masking/write-outputs.mask-location-1.tesc
new file mode 100644
index 00000000000..0ad2727673d
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-location-1.tesc
@@ -0,0 +1,45 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    float4 v3;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup float4 (&v1)[2], device float4& v3)
+{
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((threadgroup float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((threadgroup float*)&v1[1])[0u] = 5.0;
+    }
+    v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup float4 v1[2];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    write_in_func(gl_out, gl_InvocationID, v1, patchOut.v3);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-location-1.vert b/reference/shaders-msl/masking/write-outputs.mask-location-1.vert
new file mode 100644
index 00000000000..2290e5cca49
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-location-1.vert
@@ -0,0 +1,38 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v0 [[user(locn0)]];
+    float4 gl_Position [[position]];
+    float gl_PointSize [[point_size]];
+    float gl_ClipDistance [[clip_distance]] [2];
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(thread float4& v0, thread float4& v1, thread float4& gl_Position, thread float& gl_PointSize, thread float (&gl_ClipDistance)[2])
+{
+    v0 = float4(1.0);
+    v1 = float4(2.0);
+    gl_Position = float4(3.0);
+    gl_PointSize = 4.0;
+    gl_ClipDistance[0] = 1.0;
+    gl_ClipDistance[1] = 0.5;
+}
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    float4 v1 = {};
+    write_in_func(out.v0, v1, out.gl_Position, out.gl_PointSize, out.gl_ClipDistance);
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
+    return out;
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert b/reference/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert
new file mode 100644
index 00000000000..07494ea399a
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert
@@ -0,0 +1,74 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 v0;
+    float4 v1;
+    float4 gl_Position;
+    spvUnsafeArray<float, 2> gl_ClipDistance;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device float4& v0, device float4& v1, device float4& gl_Position, thread float& gl_PointSize, device spvUnsafeArray<float, 2>& gl_ClipDistance)
+{
+    v0 = float4(1.0);
+    v1 = float4(2.0);
+    gl_Position = float4(3.0);
+    gl_PointSize = 4.0;
+    gl_ClipDistance[0] = 1.0;
+    gl_ClipDistance[1] = 0.5;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    float gl_PointSize = {};
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    write_in_func(out.v0, out.v1, out.gl_Position, gl_PointSize, out.gl_ClipDistance);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc b/reference/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc
new file mode 100644
index 00000000000..05d58634ef2
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc
@@ -0,0 +1,95 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
+{
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((device float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((device float*)&v1[1])[0u] = 5.0;
+    }
+    v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out_masked[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-point-size.tesc b/reference/shaders-msl/masking/write-outputs.mask-point-size.tesc
new file mode 100644
index 00000000000..8ec2a663b7c
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-point-size.tesc
@@ -0,0 +1,92 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_out
+{
+    float4 v0;
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
+{
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((device float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((device float*)&v1[1])[0u] = 5.0;
+    }
+    v3 = float4(5.0);
+    gl_out[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out_masked[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup gl_PerVertex gl_out_masked[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-point-size.vert b/reference/shaders-msl/masking/write-outputs.mask-point-size.vert
new file mode 100644
index 00000000000..b37b92dd4a6
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-point-size.vert
@@ -0,0 +1,38 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 v0 [[user(locn0)]];
+    float4 v1 [[user(locn1)]];
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [2];
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(thread float4& v0, thread float4& v1, thread float4& gl_Position, thread float& gl_PointSize, thread float (&gl_ClipDistance)[2])
+{
+    v0 = float4(1.0);
+    v1 = float4(2.0);
+    gl_Position = float4(3.0);
+    gl_PointSize = 4.0;
+    gl_ClipDistance[0] = 1.0;
+    gl_ClipDistance[1] = 0.5;
+}
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    float gl_PointSize = {};
+    write_in_func(out.v0, out.v1, out.gl_Position, gl_PointSize, out.gl_ClipDistance);
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
+    return out;
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc b/reference/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc
new file mode 100644
index 00000000000..86bc7d37be2
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc
@@ -0,0 +1,95 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_out
+{
+    float4 v0;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
+{
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((device float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((device float*)&v1[1])[0u] = 5.0;
+    }
+    v3 = float4(5.0);
+    gl_out_masked[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out_masked[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    threadgroup gl_PerVertex spvStoragegl_out_masked[8][4];
+    threadgroup gl_PerVertex (&gl_out_masked)[4] = spvStoragegl_out_masked[(gl_GlobalInvocationID.x / 4) % 8];
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 4];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked);
+}
+
diff --git a/reference/shaders-msl/masking/write-outputs.mask-position.tesc b/reference/shaders-msl/masking/write-outputs.mask-position.tesc
new file mode 100644
index 00000000000..da0d2a2d10f
--- /dev/null
+++ b/reference/shaders-msl/masking/write-outputs.mask-position.tesc
@@ -0,0 +1,92 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct gl_PerVertex
+{
+    float4 gl_Position;
+    float gl_PointSize;
+    spvUnsafeArray<float, 1> gl_ClipDistance;
+    spvUnsafeArray<float, 1> gl_CullDistance;
+};
+
+struct main0_out
+{
+    float4 v0;
+    float gl_PointSize;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float4, 2> v1;
+    float4 v3;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float4, 2>& v1, device float4& v3, threadgroup gl_PerVertex (&gl_out_masked)[4])
+{
+    gl_out[gl_InvocationID].v0 = float4(1.0);
+    gl_out[gl_InvocationID].v0.z = 3.0;
+    if (gl_InvocationID == 0)
+    {
+        v1[0] = float4(2.0);
+        ((device float*)&v1[0])[0u] = 3.0;
+        v1[1] = float4(2.0);
+        ((device float*)&v1[1])[0u] = 5.0;
+    }
+    v3 = float4(5.0);
+    gl_out_masked[gl_InvocationID].gl_Position = float4(10.0);
+    gl_out_masked[gl_InvocationID].gl_Position.z = 20.0;
+    gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    threadgroup gl_PerVertex gl_out_masked[4];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    write_in_func(gl_out, gl_InvocationID, patchOut.v1, patchOut.v3, gl_out_masked);
+}
+
diff --git a/reference/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc b/reference/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc
new file mode 100644
index 00000000000..c11c7410c65
--- /dev/null
+++ b/reference/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc
@@ -0,0 +1,111 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct S
+{
+    int x;
+    float4 y;
+    spvUnsafeArray<float, 2> z;
+};
+
+struct TheBlock
+{
+    spvUnsafeArray<float, 3> blockFa;
+    spvUnsafeArray<S, 2> blockSa;
+    float blockF;
+};
+
+struct main0_patchOut
+{
+    float2 in_te_positionScale;
+    float2 in_te_positionOffset;
+    spvUnsafeArray<TheBlock, 2> tcBlock;
+};
+
+struct main0_in
+{
+    float3 in_tc_attr;
+    ushort2 m_179;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 5];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 5, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 5, spvIndirectParams[1] - 1);
+    float v = 1.2999999523162841796875;
+    for (int i0 = 0; i0 < 2; i0++)
+    {
+        for (int i1 = 0; i1 < 3; i1++)
+        {
+            patchOut.tcBlock[i0].blockFa[i1] = v;
+            v += 0.4000000059604644775390625;
+        }
+        for (int i1_1 = 0; i1_1 < 2; i1_1++)
+        {
+            patchOut.tcBlock[i0].blockSa[i1_1].x = int(v);
+            v += 0.4000000059604644775390625;
+            patchOut.tcBlock[i0].blockSa[i1_1].y = float4(v, v + 0.800000011920928955078125, v + 1.60000002384185791015625, v + 2.400000095367431640625);
+            v += 0.4000000059604644775390625;
+            for (int i2 = 0; i2 < 2; i2++)
+            {
+                patchOut.tcBlock[i0].blockSa[i1_1].z[i2] = v;
+                v += 0.4000000059604644775390625;
+            }
+        }
+        patchOut.tcBlock[i0].blockF = v;
+        v += 0.4000000059604644775390625;
+    }
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(gl_in[0].in_tc_attr.x);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(gl_in[1].in_tc_attr.x);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(gl_in[2].in_tc_attr.x);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(gl_in[3].in_tc_attr.x);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(gl_in[4].in_tc_attr.x);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(gl_in[5].in_tc_attr.x);
+    patchOut.in_te_positionScale = float2(gl_in[6].in_tc_attr.x, gl_in[7].in_tc_attr.x);
+    patchOut.in_te_positionOffset = float2(gl_in[8].in_tc_attr.x, gl_in[9].in_tc_attr.x);
+}
+
diff --git a/reference/shaders-msl/tesc/basic.multi-patch.tesc b/reference/shaders-msl/tesc/basic.multi-patch.tesc
new file mode 100644
index 00000000000..fe268316737
--- /dev/null
+++ b/reference/shaders-msl/tesc/basic.multi-patch.tesc
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_patchOut
+{
+    float3 vFoo;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 1];
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(8.8999996185302734375);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(6.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(3.900000095367431640625);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(4.900000095367431640625);
+    patchOut.vFoo = float3(1.0);
+}
+
diff --git a/reference/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc b/reference/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc
new file mode 100644
index 00000000000..d266d2512a9
--- /dev/null
+++ b/reference/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc
@@ -0,0 +1,134 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Meep
+{
+    float a;
+    float b;
+};
+
+struct Block
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+};
+
+struct Block_1
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+};
+
+struct main0_out
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+    spvUnsafeArray<float, 2> B_a;
+    float B_b;
+    float2x2 B_m;
+    Meep B_meep;
+    spvUnsafeArray<Meep, 2> B_meeps;
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    spvUnsafeArray<float, 2> in_a;
+    float in_b;
+    float2x2 in_m;
+    Meep in_meep;
+    spvUnsafeArray<Meep, 2> in_meeps;
+    spvUnsafeArray<float, 2> in_B_a;
+    float in_B_b;
+    float2x2 in_B_m;
+    Meep in_B_meep;
+    spvUnsafeArray<Meep, 2> in_B_meeps;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device main0_in* thread & gl_in)
+{
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    gl_out[gl_InvocationID].a[0] = gl_in[gl_InvocationID].in_a[0];
+    gl_out[gl_InvocationID].a[1] = gl_in[gl_InvocationID].in_a[1];
+    gl_out[gl_InvocationID].b = gl_in[gl_InvocationID].in_b;
+    gl_out[gl_InvocationID].m = gl_in[gl_InvocationID].in_m;
+    gl_out[gl_InvocationID].meep.a = gl_in[gl_InvocationID].in_meep.a;
+    gl_out[gl_InvocationID].meep.b = gl_in[gl_InvocationID].in_meep.b;
+    gl_out[gl_InvocationID].meeps[0].a = gl_in[gl_InvocationID].in_meeps[0].a;
+    gl_out[gl_InvocationID].meeps[0].b = gl_in[gl_InvocationID].in_meeps[0].b;
+    gl_out[gl_InvocationID].meeps[1].a = gl_in[gl_InvocationID].in_meeps[1].a;
+    gl_out[gl_InvocationID].meeps[1].b = gl_in[gl_InvocationID].in_meeps[1].b;
+    gl_out[gl_InvocationID].B_a[0] = gl_in[gl_InvocationID].in_B_a[0];
+    gl_out[gl_InvocationID].B_a[1] = gl_in[gl_InvocationID].in_B_a[1];
+    gl_out[gl_InvocationID].B_b = gl_in[gl_InvocationID].in_B_b;
+    gl_out[gl_InvocationID].B_m = gl_in[gl_InvocationID].in_B_m;
+    gl_out[gl_InvocationID].B_meep.a = gl_in[gl_InvocationID].in_B_meep.a;
+    gl_out[gl_InvocationID].B_meep.b = gl_in[gl_InvocationID].in_B_meep.b;
+    gl_out[gl_InvocationID].B_meeps[0].a = gl_in[gl_InvocationID].in_B_meeps[0].a;
+    gl_out[gl_InvocationID].B_meeps[0].b = gl_in[gl_InvocationID].in_B_meeps[0].b;
+    gl_out[gl_InvocationID].B_meeps[1].a = gl_in[gl_InvocationID].in_B_meeps[1].a;
+    gl_out[gl_InvocationID].B_meeps[1].b = gl_in[gl_InvocationID].in_B_meeps[1].b;
+}
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    write_in_func(gl_out, gl_InvocationID, gl_in);
+}
+
diff --git a/reference/shaders-msl/tesc/complex-control-point-inout-types.tesc b/reference/shaders-msl/tesc/complex-control-point-inout-types.tesc
new file mode 100644
index 00000000000..48f10baec85
--- /dev/null
+++ b/reference/shaders-msl/tesc/complex-control-point-inout-types.tesc
@@ -0,0 +1,138 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Meep
+{
+    float a;
+    float b;
+};
+
+struct Block
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+};
+
+struct Block_1
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+};
+
+struct main0_out
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+    spvUnsafeArray<float, 2> B_a;
+    float B_b;
+    float2x2 B_m;
+    Meep B_meep;
+    spvUnsafeArray<Meep, 2> B_meeps;
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    float in_a_0 [[attribute(0)]];
+    float in_a_1 [[attribute(1)]];
+    float in_b [[attribute(2)]];
+    float2 in_m_0 [[attribute(3)]];
+    float2 in_m_1 [[attribute(4)]];
+    float in_meep_a [[attribute(5)]];
+    float in_meep_b [[attribute(6)]];
+    float in_B_a_0 [[attribute(11)]];
+    float in_B_a_1 [[attribute(12)]];
+    float in_B_b [[attribute(13)]];
+    float2 in_B_m_0 [[attribute(14)]];
+    float2 in_B_m_1 [[attribute(15)]];
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, threadgroup main0_in* thread & gl_in)
+{
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    gl_out[gl_InvocationID].a[0] = gl_in[gl_InvocationID].in_a_0;
+    gl_out[gl_InvocationID].a[1] = gl_in[gl_InvocationID].in_a_1;
+    gl_out[gl_InvocationID].b = gl_in[gl_InvocationID].in_b;
+    float2x2 _72 = float2x2(gl_in[gl_InvocationID].in_m_0, gl_in[gl_InvocationID].in_m_1);
+    gl_out[gl_InvocationID].m = _72;
+    gl_out[gl_InvocationID].meep.a = gl_in[gl_InvocationID].in_meep_a;
+    gl_out[gl_InvocationID].meep.b = gl_in[gl_InvocationID].in_meep_b;
+    gl_out[gl_InvocationID].meeps[0].a = 1.0;
+    gl_out[gl_InvocationID].meeps[0].b = 2.0;
+    gl_out[gl_InvocationID].meeps[1].a = 3.0;
+    gl_out[gl_InvocationID].meeps[1].b = 4.0;
+    gl_out[gl_InvocationID].B_a[0] = gl_in[gl_InvocationID].in_B_a_0;
+    gl_out[gl_InvocationID].B_a[1] = gl_in[gl_InvocationID].in_B_a_1;
+    gl_out[gl_InvocationID].B_b = gl_in[gl_InvocationID].in_B_b;
+    float2x2 _134 = float2x2(gl_in[gl_InvocationID].in_B_m_0, gl_in[gl_InvocationID].in_B_m_1);
+    gl_out[gl_InvocationID].B_m = _134;
+    gl_out[gl_InvocationID].B_meep.a = 10.0;
+    gl_out[gl_InvocationID].B_meep.b = 20.0;
+    gl_out[gl_InvocationID].B_meeps[0].a = 5.0;
+    gl_out[gl_InvocationID].B_meeps[0].b = 6.0;
+    gl_out[gl_InvocationID].B_meeps[1].a = 7.0;
+    gl_out[gl_InvocationID].B_meeps[1].b = 8.0;
+}
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    write_in_func(gl_out, gl_InvocationID, gl_in);
+}
+
diff --git a/reference/shaders-msl/tesc/complex-patch-out-types.tesc b/reference/shaders-msl/tesc/complex-patch-out-types.tesc
new file mode 100644
index 00000000000..bd24f58029b
--- /dev/null
+++ b/reference/shaders-msl/tesc/complex-patch-out-types.tesc
@@ -0,0 +1,113 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Meep
+{
+    float a;
+    float b;
+};
+
+struct Block
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+};
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_patchOut
+{
+    spvUnsafeArray<float, 2> a;
+    float b;
+    float2x2 m;
+    Meep meep;
+    spvUnsafeArray<Meep, 2> meeps;
+    spvUnsafeArray<float, 2> B_a;
+    float B_b;
+    float2x2 B_m;
+    Meep B_meep;
+    spvUnsafeArray<Meep, 2> B_meeps;
+};
+
+static inline __attribute__((always_inline))
+void write_in_func(device main0_out* thread & gl_out, thread uint& gl_InvocationID, device spvUnsafeArray<float, 2>& a, device float& b, device float2x2& m, device Meep& meep, device spvUnsafeArray<Meep, 2>& meeps, device main0_patchOut& patchOut)
+{
+    gl_out[gl_InvocationID].gl_Position = float4(1.0);
+    a[0] = 1.0;
+    a[1] = 2.0;
+    b = 3.0;
+    m = float2x2(float2(2.0, 0.0), float2(0.0, 2.0));
+    meep.a = 4.0;
+    meep.b = 5.0;
+    meeps[0].a = 6.0;
+    meeps[0].b = 7.0;
+    meeps[1].a = 8.0;
+    meeps[1].b = 9.0;
+    patchOut.B_a[0] = 1.0;
+    patchOut.B_a[1] = 2.0;
+    patchOut.B_b = 3.0;
+    patchOut.B_m = float2x2(float2(4.0, 0.0), float2(0.0, 4.0));
+    patchOut.B_meep.a = 4.0;
+    patchOut.B_meep.b = 5.0;
+    patchOut.B_meeps[0].a = 6.0;
+    patchOut.B_meeps[0].b = 7.0;
+    patchOut.B_meeps[1].a = 8.0;
+    patchOut.B_meeps[1].b = 9.0;
+}
+
+kernel void main0(uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    write_in_func(gl_out, gl_InvocationID, patchOut.a, patchOut.b, patchOut.m, patchOut.meep, patchOut.meeps, patchOut);
+}
+
diff --git a/reference/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc b/reference/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc
new file mode 100644
index 00000000000..28effad215f
--- /dev/null
+++ b/reference/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc
@@ -0,0 +1,68 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4x4 vOutputs;
+};
+
+struct main0_in
+{
+    float4x4 vInputs;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    spvUnsafeArray<float4x4, 32> _16 = spvUnsafeArray<float4x4, 32>({ gl_in[0].vInputs, gl_in[1].vInputs, gl_in[2].vInputs, gl_in[3].vInputs, gl_in[4].vInputs, gl_in[5].vInputs, gl_in[6].vInputs, gl_in[7].vInputs, gl_in[8].vInputs, gl_in[9].vInputs, gl_in[10].vInputs, gl_in[11].vInputs, gl_in[12].vInputs, gl_in[13].vInputs, gl_in[14].vInputs, gl_in[15].vInputs, gl_in[16].vInputs, gl_in[17].vInputs, gl_in[18].vInputs, gl_in[19].vInputs, gl_in[20].vInputs, gl_in[21].vInputs, gl_in[22].vInputs, gl_in[23].vInputs, gl_in[24].vInputs, gl_in[25].vInputs, gl_in[26].vInputs, gl_in[27].vInputs, gl_in[28].vInputs, gl_in[29].vInputs, gl_in[30].vInputs, gl_in[31].vInputs });
+    spvUnsafeArray<float4x4, 32> tmp;
+    tmp = _16;
+    gl_out[gl_InvocationID].vOutputs = tmp[gl_InvocationID];
+}
+
diff --git a/reference/shaders-msl/tesc/load-control-point-array-of-matrix.tesc b/reference/shaders-msl/tesc/load-control-point-array-of-matrix.tesc
new file mode 100644
index 00000000000..46d4b4ad588
--- /dev/null
+++ b/reference/shaders-msl/tesc/load-control-point-array-of-matrix.tesc
@@ -0,0 +1,73 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4x4 vOutputs;
+};
+
+struct main0_in
+{
+    float4 vInputs_0 [[attribute(0)]];
+    float4 vInputs_1 [[attribute(1)]];
+    float4 vInputs_2 [[attribute(2)]];
+    float4 vInputs_3 [[attribute(3)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    spvUnsafeArray<float4x4, 32> _16 = spvUnsafeArray<float4x4, 32>({ float4x4(gl_in[0].vInputs_0, gl_in[0].vInputs_1, gl_in[0].vInputs_2, gl_in[0].vInputs_3), float4x4(gl_in[1].vInputs_0, gl_in[1].vInputs_1, gl_in[1].vInputs_2, gl_in[1].vInputs_3), float4x4(gl_in[2].vInputs_0, gl_in[2].vInputs_1, gl_in[2].vInputs_2, gl_in[2].vInputs_3), float4x4(gl_in[3].vInputs_0, gl_in[3].vInputs_1, gl_in[3].vInputs_2, gl_in[3].vInputs_3), float4x4(gl_in[4].vInputs_0, gl_in[4].vInputs_1, gl_in[4].vInputs_2, gl_in[4].vInputs_3), float4x4(gl_in[5].vInputs_0, gl_in[5].vInputs_1, gl_in[5].vInputs_2, gl_in[5].vInputs_3), float4x4(gl_in[6].vInputs_0, gl_in[6].vInputs_1, gl_in[6].vInputs_2, gl_in[6].vInputs_3), float4x4(gl_in[7].vInputs_0, gl_in[7].vInputs_1, gl_in[7].vInputs_2, gl_in[7].vInputs_3), float4x4(gl_in[8].vInputs_0, gl_in[8].vInputs_1, gl_in[8].vInputs_2, gl_in[8].vInputs_3), float4x4(gl_in[9].vInputs_0, gl_in[9].vInputs_1, gl_in[9].vInputs_2, gl_in[9].vInputs_3), float4x4(gl_in[10].vInputs_0, gl_in[10].vInputs_1, gl_in[10].vInputs_2, gl_in[10].vInputs_3), float4x4(gl_in[11].vInputs_0, gl_in[11].vInputs_1, gl_in[11].vInputs_2, gl_in[11].vInputs_3), float4x4(gl_in[12].vInputs_0, gl_in[12].vInputs_1, gl_in[12].vInputs_2, gl_in[12].vInputs_3), float4x4(gl_in[13].vInputs_0, gl_in[13].vInputs_1, gl_in[13].vInputs_2, gl_in[13].vInputs_3), float4x4(gl_in[14].vInputs_0, gl_in[14].vInputs_1, gl_in[14].vInputs_2, gl_in[14].vInputs_3), float4x4(gl_in[15].vInputs_0, gl_in[15].vInputs_1, gl_in[15].vInputs_2, gl_in[15].vInputs_3), float4x4(gl_in[16].vInputs_0, gl_in[16].vInputs_1, gl_in[16].vInputs_2, gl_in[16].vInputs_3), float4x4(gl_in[17].vInputs_0, gl_in[17].vInputs_1, gl_in[17].vInputs_2, gl_in[17].vInputs_3), float4x4(gl_in[18].vInputs_0, gl_in[18].vInputs_1, gl_in[18].vInputs_2, gl_in[18].vInputs_3), float4x4(gl_in[19].vInputs_0, gl_in[19].vInputs_1, gl_in[19].vInputs_2, gl_in[19].vInputs_3), float4x4(gl_in[20].vInputs_0, gl_in[20].vInputs_1, gl_in[20].vInputs_2, gl_in[20].vInputs_3), float4x4(gl_in[21].vInputs_0, gl_in[21].vInputs_1, gl_in[21].vInputs_2, gl_in[21].vInputs_3), float4x4(gl_in[22].vInputs_0, gl_in[22].vInputs_1, gl_in[22].vInputs_2, gl_in[22].vInputs_3), float4x4(gl_in[23].vInputs_0, gl_in[23].vInputs_1, gl_in[23].vInputs_2, gl_in[23].vInputs_3), float4x4(gl_in[24].vInputs_0, gl_in[24].vInputs_1, gl_in[24].vInputs_2, gl_in[24].vInputs_3), float4x4(gl_in[25].vInputs_0, gl_in[25].vInputs_1, gl_in[25].vInputs_2, gl_in[25].vInputs_3), float4x4(gl_in[26].vInputs_0, gl_in[26].vInputs_1, gl_in[26].vInputs_2, gl_in[26].vInputs_3), float4x4(gl_in[27].vInputs_0, gl_in[27].vInputs_1, gl_in[27].vInputs_2, gl_in[27].vInputs_3), float4x4(gl_in[28].vInputs_0, gl_in[28].vInputs_1, gl_in[28].vInputs_2, gl_in[28].vInputs_3), float4x4(gl_in[29].vInputs_0, gl_in[29].vInputs_1, gl_in[29].vInputs_2, gl_in[29].vInputs_3), float4x4(gl_in[30].vInputs_0, gl_in[30].vInputs_1, gl_in[30].vInputs_2, gl_in[30].vInputs_3), float4x4(gl_in[31].vInputs_0, gl_in[31].vInputs_1, gl_in[31].vInputs_2, gl_in[31].vInputs_3) });
+    spvUnsafeArray<float4x4, 32> tmp;
+    tmp = _16;
+    gl_out[gl_InvocationID].vOutputs = tmp[gl_InvocationID];
+}
+
diff --git a/reference/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc b/reference/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc
new file mode 100644
index 00000000000..d24c271dd4c
--- /dev/null
+++ b/reference/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc
@@ -0,0 +1,76 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct VertexData
+{
+    float4x4 a;
+    spvUnsafeArray<float4, 2> b;
+    float4 c;
+};
+
+struct main0_out
+{
+    float4 vOutputs;
+};
+
+struct main0_in
+{
+    VertexData vInputs;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    spvUnsafeArray<VertexData, 32> _19 = spvUnsafeArray<VertexData, 32>({ gl_in[0].vInputs, gl_in[1].vInputs, gl_in[2].vInputs, gl_in[3].vInputs, gl_in[4].vInputs, gl_in[5].vInputs, gl_in[6].vInputs, gl_in[7].vInputs, gl_in[8].vInputs, gl_in[9].vInputs, gl_in[10].vInputs, gl_in[11].vInputs, gl_in[12].vInputs, gl_in[13].vInputs, gl_in[14].vInputs, gl_in[15].vInputs, gl_in[16].vInputs, gl_in[17].vInputs, gl_in[18].vInputs, gl_in[19].vInputs, gl_in[20].vInputs, gl_in[21].vInputs, gl_in[22].vInputs, gl_in[23].vInputs, gl_in[24].vInputs, gl_in[25].vInputs, gl_in[26].vInputs, gl_in[27].vInputs, gl_in[28].vInputs, gl_in[29].vInputs, gl_in[30].vInputs, gl_in[31].vInputs });
+    spvUnsafeArray<VertexData, 32> tmp;
+    tmp = _19;
+    VertexData tmp_single = gl_in[gl_InvocationID ^ 1].vInputs;
+    gl_out[gl_InvocationID].vOutputs = ((tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1]) + tmp[gl_InvocationID].c) + tmp_single.c;
+}
+
diff --git a/reference/shaders-msl/tesc/load-control-point-array-of-struct.tesc b/reference/shaders-msl/tesc/load-control-point-array-of-struct.tesc
new file mode 100644
index 00000000000..08392cabbca
--- /dev/null
+++ b/reference/shaders-msl/tesc/load-control-point-array-of-struct.tesc
@@ -0,0 +1,86 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct VertexData
+{
+    float4x4 a;
+    spvUnsafeArray<float4, 2> b;
+    float4 c;
+};
+
+struct main0_out
+{
+    float4 vOutputs;
+};
+
+struct main0_in
+{
+    float4 vInputs_a_0 [[attribute(0)]];
+    float4 vInputs_a_1 [[attribute(1)]];
+    float4 vInputs_a_2 [[attribute(2)]];
+    float4 vInputs_a_3 [[attribute(3)]];
+    float4 vInputs_b_0 [[attribute(4)]];
+    float4 vInputs_b_1 [[attribute(5)]];
+    float4 vInputs_c [[attribute(6)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    spvUnsafeArray<VertexData, 32> _19 = spvUnsafeArray<VertexData, 32>({ VertexData{ float4x4(gl_in[0].vInputs_a_0, gl_in[0].vInputs_a_1, gl_in[0].vInputs_a_2, gl_in[0].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[0].vInputs_b_0, gl_in[0].vInputs_b_1 }), gl_in[0].vInputs_c }, VertexData{ float4x4(gl_in[1].vInputs_a_0, gl_in[1].vInputs_a_1, gl_in[1].vInputs_a_2, gl_in[1].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[1].vInputs_b_0, gl_in[1].vInputs_b_1 }), gl_in[1].vInputs_c }, VertexData{ float4x4(gl_in[2].vInputs_a_0, gl_in[2].vInputs_a_1, gl_in[2].vInputs_a_2, gl_in[2].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[2].vInputs_b_0, gl_in[2].vInputs_b_1 }), gl_in[2].vInputs_c }, VertexData{ float4x4(gl_in[3].vInputs_a_0, gl_in[3].vInputs_a_1, gl_in[3].vInputs_a_2, gl_in[3].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[3].vInputs_b_0, gl_in[3].vInputs_b_1 }), gl_in[3].vInputs_c }, VertexData{ float4x4(gl_in[4].vInputs_a_0, gl_in[4].vInputs_a_1, gl_in[4].vInputs_a_2, gl_in[4].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[4].vInputs_b_0, gl_in[4].vInputs_b_1 }), gl_in[4].vInputs_c }, VertexData{ float4x4(gl_in[5].vInputs_a_0, gl_in[5].vInputs_a_1, gl_in[5].vInputs_a_2, gl_in[5].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[5].vInputs_b_0, gl_in[5].vInputs_b_1 }), gl_in[5].vInputs_c }, VertexData{ float4x4(gl_in[6].vInputs_a_0, gl_in[6].vInputs_a_1, gl_in[6].vInputs_a_2, gl_in[6].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[6].vInputs_b_0, gl_in[6].vInputs_b_1 }), gl_in[6].vInputs_c }, VertexData{ float4x4(gl_in[7].vInputs_a_0, gl_in[7].vInputs_a_1, gl_in[7].vInputs_a_2, gl_in[7].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[7].vInputs_b_0, gl_in[7].vInputs_b_1 }), gl_in[7].vInputs_c }, VertexData{ float4x4(gl_in[8].vInputs_a_0, gl_in[8].vInputs_a_1, gl_in[8].vInputs_a_2, gl_in[8].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[8].vInputs_b_0, gl_in[8].vInputs_b_1 }), gl_in[8].vInputs_c }, VertexData{ float4x4(gl_in[9].vInputs_a_0, gl_in[9].vInputs_a_1, gl_in[9].vInputs_a_2, gl_in[9].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[9].vInputs_b_0, gl_in[9].vInputs_b_1 }), gl_in[9].vInputs_c }, VertexData{ float4x4(gl_in[10].vInputs_a_0, gl_in[10].vInputs_a_1, gl_in[10].vInputs_a_2, gl_in[10].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[10].vInputs_b_0, gl_in[10].vInputs_b_1 }), gl_in[10].vInputs_c }, VertexData{ float4x4(gl_in[11].vInputs_a_0, gl_in[11].vInputs_a_1, gl_in[11].vInputs_a_2, gl_in[11].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[11].vInputs_b_0, gl_in[11].vInputs_b_1 }), gl_in[11].vInputs_c }, VertexData{ float4x4(gl_in[12].vInputs_a_0, gl_in[12].vInputs_a_1, gl_in[12].vInputs_a_2, gl_in[12].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[12].vInputs_b_0, gl_in[12].vInputs_b_1 }), gl_in[12].vInputs_c }, VertexData{ float4x4(gl_in[13].vInputs_a_0, gl_in[13].vInputs_a_1, gl_in[13].vInputs_a_2, gl_in[13].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[13].vInputs_b_0, gl_in[13].vInputs_b_1 }), gl_in[13].vInputs_c }, VertexData{ float4x4(gl_in[14].vInputs_a_0, gl_in[14].vInputs_a_1, gl_in[14].vInputs_a_2, gl_in[14].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[14].vInputs_b_0, gl_in[14].vInputs_b_1 }), gl_in[14].vInputs_c }, VertexData{ float4x4(gl_in[15].vInputs_a_0, gl_in[15].vInputs_a_1, gl_in[15].vInputs_a_2, gl_in[15].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[15].vInputs_b_0, gl_in[15].vInputs_b_1 }), gl_in[15].vInputs_c }, VertexData{ float4x4(gl_in[16].vInputs_a_0, gl_in[16].vInputs_a_1, gl_in[16].vInputs_a_2, gl_in[16].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[16].vInputs_b_0, gl_in[16].vInputs_b_1 }), gl_in[16].vInputs_c }, VertexData{ float4x4(gl_in[17].vInputs_a_0, gl_in[17].vInputs_a_1, gl_in[17].vInputs_a_2, gl_in[17].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[17].vInputs_b_0, gl_in[17].vInputs_b_1 }), gl_in[17].vInputs_c }, VertexData{ float4x4(gl_in[18].vInputs_a_0, gl_in[18].vInputs_a_1, gl_in[18].vInputs_a_2, gl_in[18].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[18].vInputs_b_0, gl_in[18].vInputs_b_1 }), gl_in[18].vInputs_c }, VertexData{ float4x4(gl_in[19].vInputs_a_0, gl_in[19].vInputs_a_1, gl_in[19].vInputs_a_2, gl_in[19].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[19].vInputs_b_0, gl_in[19].vInputs_b_1 }), gl_in[19].vInputs_c }, VertexData{ float4x4(gl_in[20].vInputs_a_0, gl_in[20].vInputs_a_1, gl_in[20].vInputs_a_2, gl_in[20].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[20].vInputs_b_0, gl_in[20].vInputs_b_1 }), gl_in[20].vInputs_c }, VertexData{ float4x4(gl_in[21].vInputs_a_0, gl_in[21].vInputs_a_1, gl_in[21].vInputs_a_2, gl_in[21].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[21].vInputs_b_0, gl_in[21].vInputs_b_1 }), gl_in[21].vInputs_c }, VertexData{ float4x4(gl_in[22].vInputs_a_0, gl_in[22].vInputs_a_1, gl_in[22].vInputs_a_2, gl_in[22].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[22].vInputs_b_0, gl_in[22].vInputs_b_1 }), gl_in[22].vInputs_c }, VertexData{ float4x4(gl_in[23].vInputs_a_0, gl_in[23].vInputs_a_1, gl_in[23].vInputs_a_2, gl_in[23].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[23].vInputs_b_0, gl_in[23].vInputs_b_1 }), gl_in[23].vInputs_c }, VertexData{ float4x4(gl_in[24].vInputs_a_0, gl_in[24].vInputs_a_1, gl_in[24].vInputs_a_2, gl_in[24].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[24].vInputs_b_0, gl_in[24].vInputs_b_1 }), gl_in[24].vInputs_c }, VertexData{ float4x4(gl_in[25].vInputs_a_0, gl_in[25].vInputs_a_1, gl_in[25].vInputs_a_2, gl_in[25].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[25].vInputs_b_0, gl_in[25].vInputs_b_1 }), gl_in[25].vInputs_c }, VertexData{ float4x4(gl_in[26].vInputs_a_0, gl_in[26].vInputs_a_1, gl_in[26].vInputs_a_2, gl_in[26].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[26].vInputs_b_0, gl_in[26].vInputs_b_1 }), gl_in[26].vInputs_c }, VertexData{ float4x4(gl_in[27].vInputs_a_0, gl_in[27].vInputs_a_1, gl_in[27].vInputs_a_2, gl_in[27].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[27].vInputs_b_0, gl_in[27].vInputs_b_1 }), gl_in[27].vInputs_c }, VertexData{ float4x4(gl_in[28].vInputs_a_0, gl_in[28].vInputs_a_1, gl_in[28].vInputs_a_2, gl_in[28].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[28].vInputs_b_0, gl_in[28].vInputs_b_1 }), gl_in[28].vInputs_c }, VertexData{ float4x4(gl_in[29].vInputs_a_0, gl_in[29].vInputs_a_1, gl_in[29].vInputs_a_2, gl_in[29].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[29].vInputs_b_0, gl_in[29].vInputs_b_1 }), gl_in[29].vInputs_c }, VertexData{ float4x4(gl_in[30].vInputs_a_0, gl_in[30].vInputs_a_1, gl_in[30].vInputs_a_2, gl_in[30].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[30].vInputs_b_0, gl_in[30].vInputs_b_1 }), gl_in[30].vInputs_c }, VertexData{ float4x4(gl_in[31].vInputs_a_0, gl_in[31].vInputs_a_1, gl_in[31].vInputs_a_2, gl_in[31].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[31].vInputs_b_0, gl_in[31].vInputs_b_1 }), gl_in[31].vInputs_c } });
+    spvUnsafeArray<VertexData, 32> tmp;
+    tmp = _19;
+    int _27 = gl_InvocationID ^ 1;
+    VertexData _30 = VertexData{ float4x4(gl_in[_27].vInputs_a_0, gl_in[_27].vInputs_a_1, gl_in[_27].vInputs_a_2, gl_in[_27].vInputs_a_3), spvUnsafeArray<float4, 2>({ gl_in[_27].vInputs_b_0, gl_in[_27].vInputs_b_1 }), gl_in[_27].vInputs_c };
+    VertexData tmp_single = _30;
+    gl_out[gl_InvocationID].vOutputs = ((tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1]) + tmp[gl_InvocationID].c) + tmp_single.c;
+}
+
diff --git a/reference/shaders-msl/tesc/load-control-point-array.multi-patch.tesc b/reference/shaders-msl/tesc/load-control-point-array.multi-patch.tesc
new file mode 100644
index 00000000000..45baadb6f26
--- /dev/null
+++ b/reference/shaders-msl/tesc/load-control-point-array.multi-patch.tesc
@@ -0,0 +1,69 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 vOutputs;
+};
+
+struct main0_in
+{
+    float4 vInputs;
+    ushort2 m_43;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    spvUnsafeArray<float4, 32> _15 = spvUnsafeArray<float4, 32>({ gl_in[0].vInputs, gl_in[1].vInputs, gl_in[2].vInputs, gl_in[3].vInputs, gl_in[4].vInputs, gl_in[5].vInputs, gl_in[6].vInputs, gl_in[7].vInputs, gl_in[8].vInputs, gl_in[9].vInputs, gl_in[10].vInputs, gl_in[11].vInputs, gl_in[12].vInputs, gl_in[13].vInputs, gl_in[14].vInputs, gl_in[15].vInputs, gl_in[16].vInputs, gl_in[17].vInputs, gl_in[18].vInputs, gl_in[19].vInputs, gl_in[20].vInputs, gl_in[21].vInputs, gl_in[22].vInputs, gl_in[23].vInputs, gl_in[24].vInputs, gl_in[25].vInputs, gl_in[26].vInputs, gl_in[27].vInputs, gl_in[28].vInputs, gl_in[29].vInputs, gl_in[30].vInputs, gl_in[31].vInputs });
+    spvUnsafeArray<float4, 32> tmp;
+    tmp = _15;
+    gl_out[gl_InvocationID].vOutputs = tmp[gl_InvocationID];
+}
+
diff --git a/reference/shaders-msl/tesc/load-control-point-array.tesc b/reference/shaders-msl/tesc/load-control-point-array.tesc
new file mode 100644
index 00000000000..d04571ae364
--- /dev/null
+++ b/reference/shaders-msl/tesc/load-control-point-array.tesc
@@ -0,0 +1,70 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 vOutputs;
+};
+
+struct main0_in
+{
+    float4 vInputs [[attribute(0)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    spvUnsafeArray<float4, 32> _15 = spvUnsafeArray<float4, 32>({ gl_in[0].vInputs, gl_in[1].vInputs, gl_in[2].vInputs, gl_in[3].vInputs, gl_in[4].vInputs, gl_in[5].vInputs, gl_in[6].vInputs, gl_in[7].vInputs, gl_in[8].vInputs, gl_in[9].vInputs, gl_in[10].vInputs, gl_in[11].vInputs, gl_in[12].vInputs, gl_in[13].vInputs, gl_in[14].vInputs, gl_in[15].vInputs, gl_in[16].vInputs, gl_in[17].vInputs, gl_in[18].vInputs, gl_in[19].vInputs, gl_in[20].vInputs, gl_in[21].vInputs, gl_in[22].vInputs, gl_in[23].vInputs, gl_in[24].vInputs, gl_in[25].vInputs, gl_in[26].vInputs, gl_in[27].vInputs, gl_in[28].vInputs, gl_in[29].vInputs, gl_in[30].vInputs, gl_in[31].vInputs });
+    spvUnsafeArray<float4, 32> tmp;
+    tmp = _15;
+    gl_out[gl_InvocationID].vOutputs = tmp[gl_InvocationID];
+}
+
diff --git a/reference/shaders-msl/tesc/matrix-output.multi-patch.tesc b/reference/shaders-msl/tesc/matrix-output.multi-patch.tesc
new file mode 100644
index 00000000000..98b9dd05245
--- /dev/null
+++ b/reference/shaders-msl/tesc/matrix-output.multi-patch.tesc
@@ -0,0 +1,39 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float in_te_attr;
+    float4x3 in_te_data0;
+    float4x3 in_te_data1;
+};
+
+struct main0_in
+{
+    float3 in_tc_attr;
+    ushort2 m_103;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 3];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 3;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1);
+    float _15 = float(gl_InvocationID);
+    float4x3 d = float4x3(float3(_15, 0.0, 0.0), float3(0.0, _15, 0.0), float3(0.0, 0.0, _15), float3(0.0));
+    gl_out[gl_InvocationID].in_te_data0 = d;
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
+    int _42 = (gl_InvocationID + 1) % 3;
+    gl_out[gl_InvocationID].in_te_data1 = float4x3(d[0] + gl_out[_42].in_te_data0[0], d[1] + gl_out[_42].in_te_data0[1], d[2] + gl_out[_42].in_te_data0[2], d[3] + gl_out[_42].in_te_data0[3]);
+    gl_out[gl_InvocationID].in_te_attr = gl_in[gl_InvocationID].in_tc_attr.x;
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(1.0);
+}
+
diff --git a/reference/shaders-msl/tesc/reload-tess-level.multi-patch.tesc b/reference/shaders-msl/tesc/reload-tess-level.multi-patch.tesc
new file mode 100644
index 00000000000..ae33de517a3
--- /dev/null
+++ b/reference/shaders-msl/tesc/reload-tess-level.multi-patch.tesc
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    uint3 m_82;
+    ushort2 m_86;
+    float4 gl_Position;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 4];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 4;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 4, spvIndirectParams[1] - 1);
+    if (gl_InvocationID == 0)
+    {
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(2.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(3.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(4.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(5.0);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]), 0.5));
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), 0.5));
+    }
+    gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
+}
+
diff --git a/reference/shaders-msl/tesc/reload-tess-level.tesc b/reference/shaders-msl/tesc/reload-tess-level.tesc
new file mode 100644
index 00000000000..eafc50607d7
--- /dev/null
+++ b/reference/shaders-msl/tesc/reload-tess-level.tesc
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    float4 gl_Position [[attribute(0)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 4];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 4)
+        return;
+    if (gl_InvocationID == 0)
+    {
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(2.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(3.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(4.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(5.0);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3]), 0.5));
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(mix(float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2]), float(spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1]), 0.5));
+    }
+    gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
+}
+
diff --git a/reference/shaders-msl/tesc/struct-output.multi-patch.tesc b/reference/shaders-msl/tesc/struct-output.multi-patch.tesc
new file mode 100644
index 00000000000..eaab245c1c8
--- /dev/null
+++ b/reference/shaders-msl/tesc/struct-output.multi-patch.tesc
@@ -0,0 +1,45 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct te_data
+{
+    float a;
+    float b;
+    uint c;
+};
+
+struct main0_out
+{
+    float in_te_attr;
+    te_data in_te_data0;
+    te_data in_te_data1;
+};
+
+struct main0_in
+{
+    float3 in_tc_attr;
+    ushort2 m_107;
+};
+
+kernel void main0(uint3 gl_GlobalInvocationID [[thread_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_out* gl_out = &spvOut[gl_GlobalInvocationID.x - gl_GlobalInvocationID.x % 3];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_InvocationID = gl_GlobalInvocationID.x % 3;
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 3, spvIndirectParams[1] - 1);
+    te_data d = te_data{ float(gl_InvocationID), float(gl_InvocationID + 1), uint(gl_InvocationID) };
+    gl_out[gl_InvocationID].in_te_data0 = d;
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
+    te_data e = gl_out[(gl_InvocationID + 1) % 3].in_te_data0;
+    gl_out[gl_InvocationID].in_te_data1 = te_data{ d.a + e.a, d.b + e.b, d.c + e.c };
+    gl_out[gl_InvocationID].in_te_attr = gl_in[gl_InvocationID].in_tc_attr.x;
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(1.0);
+    spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(1.0);
+}
+
diff --git a/reference/shaders-msl/tesc/water_tess.multi-patch.tesc b/reference/shaders-msl/tesc/water_tess.multi-patch.tesc
new file mode 100644
index 00000000000..f302302ae27
--- /dev/null
+++ b/reference/shaders-msl/tesc/water_tess.multi-patch.tesc
@@ -0,0 +1,135 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4 uScale;
+    float3 uCamPos;
+    float2 uPatchSize;
+    float2 uMaxTessLevel;
+    float uDistanceMod;
+    float4 uFrustum[6];
+};
+
+struct main0_patchOut
+{
+    float2 vOutPatchPosBase;
+    float4 vPatchLods;
+};
+
+struct main0_in
+{
+    float3 vPatchPosBase;
+    ushort2 m_430;
+};
+
+static inline __attribute__((always_inline))
+bool frustum_cull(thread const float2& p0, constant UBO& v_41)
+{
+    float2 min_xz = (p0 - float2(10.0)) * v_41.uScale.xy;
+    float2 max_xz = ((p0 + v_41.uPatchSize) + float2(10.0)) * v_41.uScale.xy;
+    float3 bb_min = float3(min_xz.x, -10.0, min_xz.y);
+    float3 bb_max = float3(max_xz.x, 10.0, max_xz.y);
+    float3 center = (bb_min + bb_max) * 0.5;
+    float radius = 0.5 * length(bb_max - bb_min);
+    float3 f0 = float3(dot(v_41.uFrustum[0], float4(center, 1.0)), dot(v_41.uFrustum[1], float4(center, 1.0)), dot(v_41.uFrustum[2], float4(center, 1.0)));
+    float3 f1 = float3(dot(v_41.uFrustum[3], float4(center, 1.0)), dot(v_41.uFrustum[4], float4(center, 1.0)), dot(v_41.uFrustum[5], float4(center, 1.0)));
+    bool _205 = any(f0 <= float3(-radius));
+    bool _215;
+    if (!_205)
+    {
+        _215 = any(f1 <= float3(-radius));
+    }
+    else
+    {
+        _215 = _205;
+    }
+    return !_215;
+}
+
+static inline __attribute__((always_inline))
+float lod_factor(thread const float2& pos_, constant UBO& v_41)
+{
+    float2 pos = pos_ * v_41.uScale.xy;
+    float3 dist_to_cam = v_41.uCamPos - float3(pos.x, 0.0, pos.y);
+    float level0 = log2((length(dist_to_cam) + 9.9999997473787516355514526367188e-05) * v_41.uDistanceMod);
+    return fast::clamp(level0, 0.0, v_41.uMaxTessLevel.x);
+}
+
+static inline __attribute__((always_inline))
+float4 tess_level(thread const float4& lod, constant UBO& v_41)
+{
+    return exp2(-lod) * v_41.uMaxTessLevel.y;
+}
+
+static inline __attribute__((always_inline))
+float tess_level(thread const float& lod, constant UBO& v_41)
+{
+    return v_41.uMaxTessLevel.y * exp2(-lod);
+}
+
+static inline __attribute__((always_inline))
+void compute_tess_levels(thread const float2& p0, constant UBO& v_41, device float2& vOutPatchPosBase, device float4& vPatchLods, device half (&gl_TessLevelOuter)[4], device half (&gl_TessLevelInner)[2])
+{
+    vOutPatchPosBase = p0;
+    float2 param = p0 + (float2(-0.5) * v_41.uPatchSize);
+    float l00 = lod_factor(param, v_41);
+    float2 param_1 = p0 + (float2(0.5, -0.5) * v_41.uPatchSize);
+    float l10 = lod_factor(param_1, v_41);
+    float2 param_2 = p0 + (float2(1.5, -0.5) * v_41.uPatchSize);
+    float l20 = lod_factor(param_2, v_41);
+    float2 param_3 = p0 + (float2(-0.5, 0.5) * v_41.uPatchSize);
+    float l01 = lod_factor(param_3, v_41);
+    float2 param_4 = p0 + (float2(0.5) * v_41.uPatchSize);
+    float l11 = lod_factor(param_4, v_41);
+    float2 param_5 = p0 + (float2(1.5, 0.5) * v_41.uPatchSize);
+    float l21 = lod_factor(param_5, v_41);
+    float2 param_6 = p0 + (float2(-0.5, 1.5) * v_41.uPatchSize);
+    float l02 = lod_factor(param_6, v_41);
+    float2 param_7 = p0 + (float2(0.5, 1.5) * v_41.uPatchSize);
+    float l12 = lod_factor(param_7, v_41);
+    float2 param_8 = p0 + (float2(1.5) * v_41.uPatchSize);
+    float l22 = lod_factor(param_8, v_41);
+    float4 lods = float4(dot(float4(l01, l11, l02, l12), float4(0.25)), dot(float4(l00, l10, l01, l11), float4(0.25)), dot(float4(l10, l20, l11, l21), float4(0.25)), dot(float4(l11, l21, l12, l22), float4(0.25)));
+    vPatchLods = lods;
+    float4 outer_lods = fast::min(lods, lods.yzwx);
+    float4 param_9 = outer_lods;
+    float4 levels = tess_level(param_9, v_41);
+    gl_TessLevelOuter[0] = half(levels.x);
+    gl_TessLevelOuter[1] = half(levels.y);
+    gl_TessLevelOuter[2] = half(levels.z);
+    gl_TessLevelOuter[3] = half(levels.w);
+    float min_lod = fast::min(fast::min(lods.x, lods.y), fast::min(lods.z, lods.w));
+    float param_10 = fast::min(min_lod, l11);
+    float inner = tess_level(param_10, v_41);
+    gl_TessLevelInner[0] = half(inner);
+    gl_TessLevelInner[1] = half(inner);
+}
+
+kernel void main0(constant UBO& v_41 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]], device main0_in* spvIn [[buffer(22)]])
+{
+    device main0_patchOut& patchOut = spvPatchOut[gl_GlobalInvocationID.x / 1];
+    device main0_in* gl_in = &spvIn[min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1) * spvIndirectParams[0]];
+    uint gl_PrimitiveID = min(gl_GlobalInvocationID.x / 1, spvIndirectParams[1] - 1);
+    float2 p0 = gl_in[0].vPatchPosBase.xy;
+    float2 param = p0;
+    if (!frustum_cull(param, v_41))
+    {
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0] = half(-1.0);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1] = half(-1.0);
+    }
+    else
+    {
+        float2 param_1 = p0;
+        compute_tess_levels(param_1, v_41, patchOut.vOutPatchPosBase, patchOut.vPatchLods, spvTessLevel[gl_PrimitiveID].edgeTessellationFactor, spvTessLevel[gl_PrimitiveID].insideTessellationFactor);
+    }
+}
+
diff --git a/reference/shaders-msl/tesc/water_tess.tesc b/reference/shaders-msl/tesc/water_tess.tesc
index ccd59bbdf0a..0287df0cd79 100644
--- a/reference/shaders-msl/tesc/water_tess.tesc
+++ b/reference/shaders-msl/tesc/water_tess.tesc
@@ -26,6 +26,7 @@ struct main0_in
     float2 vPatchPosBase [[attribute(0)]];
 };
 
+static inline __attribute__((always_inline))
 bool frustum_cull(thread const float2& p0, constant UBO& v_41)
 {
     float2 min_xz = (p0 - float2(10.0)) * v_41.uScale.xy;
@@ -36,9 +37,7 @@ bool frustum_cull(thread const float2& p0, constant UBO& v_41)
     float radius = 0.5 * length(bb_max - bb_min);
     float3 f0 = float3(dot(v_41.uFrustum[0], float4(center, 1.0)), dot(v_41.uFrustum[1], float4(center, 1.0)), dot(v_41.uFrustum[2], float4(center, 1.0)));
     float3 f1 = float3(dot(v_41.uFrustum[3], float4(center, 1.0)), dot(v_41.uFrustum[4], float4(center, 1.0)), dot(v_41.uFrustum[5], float4(center, 1.0)));
-    float3 _199 = f0;
-    float _200 = radius;
-    bool _205 = any(_199 <= float3(-_200));
+    bool _205 = any(f0 <= float3(-radius));
     bool _215;
     if (!_205)
     {
@@ -51,24 +50,28 @@ bool frustum_cull(thread const float2& p0, constant UBO& v_41)
     return !_215;
 }
 
+static inline __attribute__((always_inline))
 float lod_factor(thread const float2& pos_, constant UBO& v_41)
 {
     float2 pos = pos_ * v_41.uScale.xy;
     float3 dist_to_cam = v_41.uCamPos - float3(pos.x, 0.0, pos.y);
-    float level = log2((length(dist_to_cam) + 9.9999997473787516355514526367188e-05) * v_41.uDistanceMod);
-    return fast::clamp(level, 0.0, v_41.uMaxTessLevel.x);
+    float level0 = log2((length(dist_to_cam) + 9.9999997473787516355514526367188e-05) * v_41.uDistanceMod);
+    return fast::clamp(level0, 0.0, v_41.uMaxTessLevel.x);
 }
 
+static inline __attribute__((always_inline))
 float4 tess_level(thread const float4& lod, constant UBO& v_41)
 {
     return exp2(-lod) * v_41.uMaxTessLevel.y;
 }
 
+static inline __attribute__((always_inline))
 float tess_level(thread const float& lod, constant UBO& v_41)
 {
     return v_41.uMaxTessLevel.y * exp2(-lod);
 }
 
+static inline __attribute__((always_inline))
 void compute_tess_levels(thread const float2& p0, constant UBO& v_41, device float2& vOutPatchPosBase, device float4& vPatchLods, device half (&gl_TessLevelOuter)[4], device half (&gl_TessLevelInner)[2])
 {
     vOutPatchPosBase = p0;
diff --git a/reference/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese b/reference/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese
new file mode 100644
index 00000000000..44e495fb32c
--- /dev/null
+++ b/reference/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese
@@ -0,0 +1,73 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct _35
+{
+    float dummy;
+    float4 variableInStruct;
+};
+
+struct main0_out
+{
+    float outResult [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    spvUnsafeArray<_35, 3> testStructArray;
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(float3 gl_TessCoord [[position_in_patch]], uint gl_PrimitiveID [[patch_id]], const device main0_in* spvIn [[buffer(22)]])
+{
+    main0_out out = {};
+    const device main0_in* gl_in = &spvIn[gl_PrimitiveID * 0];
+    out.gl_Position = float4((gl_TessCoord.xy * 2.0) - float2(1.0), 0.0, 1.0);
+    float result = ((float(abs(gl_in[0].testStructArray[2].variableInStruct.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(gl_in[0].testStructArray[2].variableInStruct.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].testStructArray[2].variableInStruct.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].testStructArray[2].variableInStruct.w - 7.0) < 0.001000000047497451305389404296875);
+    out.outResult = result;
+    return out;
+}
+
diff --git a/reference/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese b/reference/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese
new file mode 100644
index 00000000000..23c2cc3ecc4
--- /dev/null
+++ b/reference/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese
@@ -0,0 +1,39 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct t35
+{
+    float2 m0;
+    float4 m1;
+};
+
+struct t36
+{
+    float2 m0;
+    t35 m1;
+};
+
+struct main0_out
+{
+    float v80 [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float2 v40_m0;
+    t35 v40_m1;
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(float3 gl_TessCoord [[position_in_patch]], uint gl_PrimitiveID [[patch_id]], const device main0_in* spvIn [[buffer(22)]])
+{
+    main0_out out = {};
+    const device main0_in* gl_in = &spvIn[gl_PrimitiveID * 0];
+    out.gl_Position = float4((gl_TessCoord.xy * 2.0) - float2(1.0), 0.0, 1.0);
+    float v34 = ((float(abs(gl_in[0].v40_m1.m1.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(gl_in[0].v40_m1.m1.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].v40_m1.m1.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(gl_in[0].v40_m1.m1.w - 7.0) < 0.001000000047497451305389404296875);
+    out.v80 = v34;
+    return out;
+}
+
diff --git a/reference/shaders-msl/tese/in-block-with-nested-struct.tese b/reference/shaders-msl/tese/in-block-with-nested-struct.tese
new file mode 100644
index 00000000000..711580d16a9
--- /dev/null
+++ b/reference/shaders-msl/tese/in-block-with-nested-struct.tese
@@ -0,0 +1,44 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct t35
+{
+    float2 m0;
+    float4 m1;
+};
+
+struct t36
+{
+    float2 m0;
+    t35 m1;
+};
+
+struct main0_out
+{
+    float v80 [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float2 v40_m0 [[attribute(0)]];
+    float2 v40_m1_m0 [[attribute(1)]];
+    float4 v40_m1_m1 [[attribute(2)]];
+};
+
+struct main0_patchIn
+{
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float3 gl_TessCoord [[position_in_patch]])
+{
+    main0_out out = {};
+    out.gl_Position = float4((gl_TessCoord.xy * 2.0) - float2(1.0), 0.0, 1.0);
+    float v34 = ((float(abs(patchIn.gl_in[0].v40_m1_m1.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(patchIn.gl_in[0].v40_m1_m1.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(patchIn.gl_in[0].v40_m1_m1.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(patchIn.gl_in[0].v40_m1_m1.w - 7.0) < 0.001000000047497451305389404296875);
+    out.v80 = v34;
+    return out;
+}
+
diff --git a/reference/shaders-msl/tese/input-array.tese b/reference/shaders-msl/tese/input-array.tese
index 2ac5731fe44..c94619602aa 100644
--- a/reference/shaders-msl/tese/input-array.tese
+++ b/reference/shaders-msl/tese/input-array.tese
@@ -21,14 +21,16 @@ struct main0_patchIn
     patch_control_point<main0_in> gl_in;
 };
 
-void set_position(thread float4& gl_Position, thread patch_control_point<main0_in>& gl_in, thread float2& gl_TessCoord)
+static inline __attribute__((always_inline))
+void set_position(thread float4& gl_Position, thread patch_control_point<main0_in>& gl_in, thread float3& gl_TessCoord)
 {
     gl_Position = (gl_in[0].Floats * gl_TessCoord.x) + (gl_in[1].Floats2 * gl_TessCoord.y);
 }
 
-[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]])
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]])
 {
     main0_out out = {};
+    float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0);
     set_position(out.gl_Position, patchIn.gl_in, gl_TessCoord);
     return out;
 }
diff --git a/reference/shaders-msl/tese/input-types.raw-tess-in.tese b/reference/shaders-msl/tese/input-types.raw-tess-in.tese
new file mode 100644
index 00000000000..52952220968
--- /dev/null
+++ b/reference/shaders-msl/tese/input-types.raw-tess-in.tese
@@ -0,0 +1,81 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Block
+{
+    float4 a;
+    float4 b;
+};
+
+struct PatchBlock
+{
+    float4 a;
+    float4 b;
+};
+
+struct Foo
+{
+    float4 a;
+    float4 b;
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vColor;
+    float4 blocks_a;
+    float4 blocks_b;
+    Foo vFoos;
+};
+
+struct main0_patchIn
+{
+    float4 vColors;
+    float4 patch_block_a;
+    float4 patch_block_b;
+    Foo vFoo;
+};
+
+static inline __attribute__((always_inline))
+void set_from_function(thread float4& gl_Position, const device main0_in* thread & gl_in, thread PatchBlock& patch_block, const device float4& vColors, const device Foo& vFoo)
+{
+    gl_Position = gl_in[0].blocks_a;
+    gl_Position += gl_in[0].blocks_b;
+    gl_Position += gl_in[1].blocks_a;
+    gl_Position += gl_in[1].blocks_b;
+    gl_Position += patch_block.a;
+    gl_Position += patch_block.b;
+    gl_Position += gl_in[0].vColor;
+    gl_Position += gl_in[1].vColor;
+    gl_Position += vColors;
+    Foo foo = vFoo;
+    gl_Position += foo.a;
+    gl_Position += foo.b;
+    foo = gl_in[0].vFoos;
+    gl_Position += foo.a;
+    gl_Position += foo.b;
+    foo = gl_in[1].vFoos;
+    gl_Position += foo.a;
+    gl_Position += foo.b;
+}
+
+[[ patch(quad, 0) ]] vertex main0_out main0(uint gl_PrimitiveID [[patch_id]], const device main0_patchIn* spvPatchIn [[buffer(20)]], const device main0_in* spvIn [[buffer(22)]])
+{
+    main0_out out = {};
+    PatchBlock patch_block = {};
+    const device main0_in* gl_in = &spvIn[gl_PrimitiveID * 0];
+    const device main0_patchIn& patchIn = spvPatchIn[gl_PrimitiveID];
+    patch_block.a = patchIn.patch_block_a;
+    patch_block.b = patchIn.patch_block_b;
+    set_from_function(out.gl_Position, gl_in, patch_block, patchIn.vColors, patchIn.vFoo);
+    return out;
+}
+
diff --git a/reference/shaders-msl/tese/input-types.tese b/reference/shaders-msl/tese/input-types.tese
index 9012a7e1a70..d25235a2a37 100644
--- a/reference/shaders-msl/tese/input-types.tese
+++ b/reference/shaders-msl/tese/input-types.tese
@@ -31,28 +31,29 @@ struct main0_out
 struct main0_in
 {
     float4 vColor [[attribute(0)]];
-    float4 Block_a [[attribute(4)]];
-    float4 Block_b [[attribute(5)]];
-    float4 Foo_a [[attribute(14)]];
-    float4 Foo_b [[attribute(15)]];
+    float4 blocks_a [[attribute(4)]];
+    float4 blocks_b [[attribute(5)]];
+    float4 vFoos_a [[attribute(14)]];
+    float4 vFoos_b [[attribute(15)]];
 };
 
 struct main0_patchIn
 {
     float4 vColors [[attribute(1)]];
-    float4 PatchBlock_a [[attribute(6)]];
-    float4 PatchBlock_b [[attribute(7)]];
-    float4 Foo_a [[attribute(8)]];
-    float4 Foo_b [[attribute(9)]];
+    float4 patch_block_a [[attribute(6)]];
+    float4 patch_block_b [[attribute(7)]];
+    float4 vFoo_a [[attribute(8)]];
+    float4 vFoo_b [[attribute(9)]];
     patch_control_point<main0_in> gl_in;
 };
 
+static inline __attribute__((always_inline))
 void set_from_function(thread float4& gl_Position, thread patch_control_point<main0_in>& gl_in, thread PatchBlock& patch_block, thread float4& vColors, thread Foo& vFoo)
 {
-    gl_Position = gl_in[0].Block_a;
-    gl_Position += gl_in[0].Block_b;
-    gl_Position += gl_in[1].Block_a;
-    gl_Position += gl_in[1].Block_b;
+    gl_Position = gl_in[0].blocks_a;
+    gl_Position += gl_in[0].blocks_b;
+    gl_Position += gl_in[1].blocks_a;
+    gl_Position += gl_in[1].blocks_b;
     gl_Position += patch_block.a;
     gl_Position += patch_block.b;
     gl_Position += gl_in[0].vColor;
@@ -61,16 +62,12 @@ void set_from_function(thread float4& gl_Position, thread patch_control_point<ma
     Foo foo = vFoo;
     gl_Position += foo.a;
     gl_Position += foo.b;
-    Foo vFoos_105;
-    vFoos_105.a = gl_in[0].Foo_a;
-    vFoos_105.b = gl_in[0].Foo_b;
-    foo = vFoos_105;
+    Foo _106 = Foo{ gl_in[0].vFoos_a, gl_in[0].vFoos_b };
+    foo = _106;
     gl_Position += foo.a;
     gl_Position += foo.b;
-    Foo vFoos_119;
-    vFoos_119.a = gl_in[1].Foo_a;
-    vFoos_119.b = gl_in[1].Foo_b;
-    foo = vFoos_119;
+    Foo _120 = Foo{ gl_in[1].vFoos_a, gl_in[1].vFoos_b };
+    foo = _120;
     gl_Position += foo.a;
     gl_Position += foo.b;
 }
@@ -80,10 +77,10 @@ void set_from_function(thread float4& gl_Position, thread patch_control_point<ma
     main0_out out = {};
     PatchBlock patch_block = {};
     Foo vFoo = {};
-    patch_block.a = patchIn.PatchBlock_a;
-    patch_block.b = patchIn.PatchBlock_b;
-    vFoo.a = patchIn.Foo_a;
-    vFoo.b = patchIn.Foo_b;
+    patch_block.a = patchIn.patch_block_a;
+    patch_block.b = patchIn.patch_block_b;
+    vFoo.a = patchIn.vFoo_a;
+    vFoo.b = patchIn.vFoo_b;
     set_from_function(out.gl_Position, patchIn.gl_in, patch_block, patchIn.vColors, vFoo);
     return out;
 }
diff --git a/reference/shaders-msl/tese/load-control-point-array-of-matrix.tese b/reference/shaders-msl/tese/load-control-point-array-of-matrix.tese
new file mode 100644
index 00000000000..162874abd01
--- /dev/null
+++ b/reference/shaders-msl/tese/load-control-point-array-of-matrix.tese
@@ -0,0 +1,84 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vInputs_0 [[attribute(0)]];
+    float4 vInputs_1 [[attribute(1)]];
+    float4 vInputs_2 [[attribute(2)]];
+    float4 vInputs_3 [[attribute(3)]];
+};
+
+struct main0_patchIn
+{
+    float4 vBoo_0 [[attribute(4)]];
+    float4 vBoo_1 [[attribute(5)]];
+    float4 vBoo_2 [[attribute(6)]];
+    float4 vBoo_3 [[attribute(7)]];
+    int vIndex [[attribute(8)]];
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 4> vBoo = {};
+    vBoo[0] = patchIn.vBoo_0;
+    vBoo[1] = patchIn.vBoo_1;
+    vBoo[2] = patchIn.vBoo_2;
+    vBoo[3] = patchIn.vBoo_3;
+    spvUnsafeArray<float4x4, 32> _16 = spvUnsafeArray<float4x4, 32>({ float4x4(patchIn.gl_in[0].vInputs_0, patchIn.gl_in[0].vInputs_1, patchIn.gl_in[0].vInputs_2, patchIn.gl_in[0].vInputs_3), float4x4(patchIn.gl_in[1].vInputs_0, patchIn.gl_in[1].vInputs_1, patchIn.gl_in[1].vInputs_2, patchIn.gl_in[1].vInputs_3), float4x4(patchIn.gl_in[2].vInputs_0, patchIn.gl_in[2].vInputs_1, patchIn.gl_in[2].vInputs_2, patchIn.gl_in[2].vInputs_3), float4x4(patchIn.gl_in[3].vInputs_0, patchIn.gl_in[3].vInputs_1, patchIn.gl_in[3].vInputs_2, patchIn.gl_in[3].vInputs_3), float4x4(patchIn.gl_in[4].vInputs_0, patchIn.gl_in[4].vInputs_1, patchIn.gl_in[4].vInputs_2, patchIn.gl_in[4].vInputs_3), float4x4(patchIn.gl_in[5].vInputs_0, patchIn.gl_in[5].vInputs_1, patchIn.gl_in[5].vInputs_2, patchIn.gl_in[5].vInputs_3), float4x4(patchIn.gl_in[6].vInputs_0, patchIn.gl_in[6].vInputs_1, patchIn.gl_in[6].vInputs_2, patchIn.gl_in[6].vInputs_3), float4x4(patchIn.gl_in[7].vInputs_0, patchIn.gl_in[7].vInputs_1, patchIn.gl_in[7].vInputs_2, patchIn.gl_in[7].vInputs_3), float4x4(patchIn.gl_in[8].vInputs_0, patchIn.gl_in[8].vInputs_1, patchIn.gl_in[8].vInputs_2, patchIn.gl_in[8].vInputs_3), float4x4(patchIn.gl_in[9].vInputs_0, patchIn.gl_in[9].vInputs_1, patchIn.gl_in[9].vInputs_2, patchIn.gl_in[9].vInputs_3), float4x4(patchIn.gl_in[10].vInputs_0, patchIn.gl_in[10].vInputs_1, patchIn.gl_in[10].vInputs_2, patchIn.gl_in[10].vInputs_3), float4x4(patchIn.gl_in[11].vInputs_0, patchIn.gl_in[11].vInputs_1, patchIn.gl_in[11].vInputs_2, patchIn.gl_in[11].vInputs_3), float4x4(patchIn.gl_in[12].vInputs_0, patchIn.gl_in[12].vInputs_1, patchIn.gl_in[12].vInputs_2, patchIn.gl_in[12].vInputs_3), float4x4(patchIn.gl_in[13].vInputs_0, patchIn.gl_in[13].vInputs_1, patchIn.gl_in[13].vInputs_2, patchIn.gl_in[13].vInputs_3), float4x4(patchIn.gl_in[14].vInputs_0, patchIn.gl_in[14].vInputs_1, patchIn.gl_in[14].vInputs_2, patchIn.gl_in[14].vInputs_3), float4x4(patchIn.gl_in[15].vInputs_0, patchIn.gl_in[15].vInputs_1, patchIn.gl_in[15].vInputs_2, patchIn.gl_in[15].vInputs_3), float4x4(patchIn.gl_in[16].vInputs_0, patchIn.gl_in[16].vInputs_1, patchIn.gl_in[16].vInputs_2, patchIn.gl_in[16].vInputs_3), float4x4(patchIn.gl_in[17].vInputs_0, patchIn.gl_in[17].vInputs_1, patchIn.gl_in[17].vInputs_2, patchIn.gl_in[17].vInputs_3), float4x4(patchIn.gl_in[18].vInputs_0, patchIn.gl_in[18].vInputs_1, patchIn.gl_in[18].vInputs_2, patchIn.gl_in[18].vInputs_3), float4x4(patchIn.gl_in[19].vInputs_0, patchIn.gl_in[19].vInputs_1, patchIn.gl_in[19].vInputs_2, patchIn.gl_in[19].vInputs_3), float4x4(patchIn.gl_in[20].vInputs_0, patchIn.gl_in[20].vInputs_1, patchIn.gl_in[20].vInputs_2, patchIn.gl_in[20].vInputs_3), float4x4(patchIn.gl_in[21].vInputs_0, patchIn.gl_in[21].vInputs_1, patchIn.gl_in[21].vInputs_2, patchIn.gl_in[21].vInputs_3), float4x4(patchIn.gl_in[22].vInputs_0, patchIn.gl_in[22].vInputs_1, patchIn.gl_in[22].vInputs_2, patchIn.gl_in[22].vInputs_3), float4x4(patchIn.gl_in[23].vInputs_0, patchIn.gl_in[23].vInputs_1, patchIn.gl_in[23].vInputs_2, patchIn.gl_in[23].vInputs_3), float4x4(patchIn.gl_in[24].vInputs_0, patchIn.gl_in[24].vInputs_1, patchIn.gl_in[24].vInputs_2, patchIn.gl_in[24].vInputs_3), float4x4(patchIn.gl_in[25].vInputs_0, patchIn.gl_in[25].vInputs_1, patchIn.gl_in[25].vInputs_2, patchIn.gl_in[25].vInputs_3), float4x4(patchIn.gl_in[26].vInputs_0, patchIn.gl_in[26].vInputs_1, patchIn.gl_in[26].vInputs_2, patchIn.gl_in[26].vInputs_3), float4x4(patchIn.gl_in[27].vInputs_0, patchIn.gl_in[27].vInputs_1, patchIn.gl_in[27].vInputs_2, patchIn.gl_in[27].vInputs_3), float4x4(patchIn.gl_in[28].vInputs_0, patchIn.gl_in[28].vInputs_1, patchIn.gl_in[28].vInputs_2, patchIn.gl_in[28].vInputs_3), float4x4(patchIn.gl_in[29].vInputs_0, patchIn.gl_in[29].vInputs_1, patchIn.gl_in[29].vInputs_2, patchIn.gl_in[29].vInputs_3), float4x4(patchIn.gl_in[30].vInputs_0, patchIn.gl_in[30].vInputs_1, patchIn.gl_in[30].vInputs_2, patchIn.gl_in[30].vInputs_3), float4x4(patchIn.gl_in[31].vInputs_0, patchIn.gl_in[31].vInputs_1, patchIn.gl_in[31].vInputs_2, patchIn.gl_in[31].vInputs_3) });
+    spvUnsafeArray<float4x4, 32> tmp;
+    tmp = _16;
+    out.gl_Position = (tmp[0][patchIn.vIndex] + tmp[1][patchIn.vIndex]) + vBoo[patchIn.vIndex];
+    return out;
+}
+
diff --git a/reference/shaders-msl/tese/load-control-point-array.tese b/reference/shaders-msl/tese/load-control-point-array.tese
new file mode 100644
index 00000000000..09c19cb47f4
--- /dev/null
+++ b/reference/shaders-msl/tese/load-control-point-array.tese
@@ -0,0 +1,81 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vInputs [[attribute(0)]];
+};
+
+struct main0_patchIn
+{
+    float4 vBoo_0 [[attribute(1)]];
+    float4 vBoo_1 [[attribute(2)]];
+    float4 vBoo_2 [[attribute(3)]];
+    float4 vBoo_3 [[attribute(4)]];
+    int vIndex [[attribute(5)]];
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 4> vBoo = {};
+    vBoo[0] = patchIn.vBoo_0;
+    vBoo[1] = patchIn.vBoo_1;
+    vBoo[2] = patchIn.vBoo_2;
+    vBoo[3] = patchIn.vBoo_3;
+    spvUnsafeArray<float4, 32> _15 = spvUnsafeArray<float4, 32>({ patchIn.gl_in[0].vInputs, patchIn.gl_in[1].vInputs, patchIn.gl_in[2].vInputs, patchIn.gl_in[3].vInputs, patchIn.gl_in[4].vInputs, patchIn.gl_in[5].vInputs, patchIn.gl_in[6].vInputs, patchIn.gl_in[7].vInputs, patchIn.gl_in[8].vInputs, patchIn.gl_in[9].vInputs, patchIn.gl_in[10].vInputs, patchIn.gl_in[11].vInputs, patchIn.gl_in[12].vInputs, patchIn.gl_in[13].vInputs, patchIn.gl_in[14].vInputs, patchIn.gl_in[15].vInputs, patchIn.gl_in[16].vInputs, patchIn.gl_in[17].vInputs, patchIn.gl_in[18].vInputs, patchIn.gl_in[19].vInputs, patchIn.gl_in[20].vInputs, patchIn.gl_in[21].vInputs, patchIn.gl_in[22].vInputs, patchIn.gl_in[23].vInputs, patchIn.gl_in[24].vInputs, patchIn.gl_in[25].vInputs, patchIn.gl_in[26].vInputs, patchIn.gl_in[27].vInputs, patchIn.gl_in[28].vInputs, patchIn.gl_in[29].vInputs, patchIn.gl_in[30].vInputs, patchIn.gl_in[31].vInputs });
+    spvUnsafeArray<float4, 32> tmp;
+    tmp = _15;
+    out.gl_Position = (tmp[0] + tmp[1]) + vBoo[patchIn.vIndex];
+    return out;
+}
+
diff --git a/reference/shaders-msl/tese/quad.domain.tese b/reference/shaders-msl/tese/quad.domain.tese
index 78b58ab9975..10cdf5f10ba 100644
--- a/reference/shaders-msl/tese/quad.domain.tese
+++ b/reference/shaders-msl/tese/quad.domain.tese
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float4 gl_Position [[position]];
@@ -10,15 +51,24 @@ struct main0_out
 
 struct main0_patchIn
 {
-    float2 gl_TessLevelInner [[attribute(0)]];
-    float4 gl_TessLevelOuter [[attribute(1)]];
+    float4 gl_TessLevelOuter [[attribute(0)]];
+    float2 gl_TessLevelInner [[attribute(1)]];
 };
 
-[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]])
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]])
 {
     main0_out out = {};
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0];
+    gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1];
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2];
+    gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3];
+    float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0);
     gl_TessCoord.y = 1.0 - gl_TessCoord.y;
-    out.gl_Position = float4(((gl_TessCoord.x * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.x) + (((1.0 - gl_TessCoord.x) * patchIn.gl_TessLevelInner.x) * patchIn.gl_TessLevelOuter.z), ((gl_TessCoord.y * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.w) + (((1.0 - gl_TessCoord.y) * patchIn.gl_TessLevelInner.y) * patchIn.gl_TessLevelOuter.y), 0.0, 1.0);
+    out.gl_Position = float4(((gl_TessCoord.x * gl_TessLevelInner[0]) * gl_TessLevelOuter[0]) + (((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), ((gl_TessCoord.y * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]) + (((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[1]), 0.0, 1.0);
     return out;
 }
 
diff --git a/reference/shaders-msl/tese/quad.tese b/reference/shaders-msl/tese/quad.tese
index df3d260fa89..e0c7944394c 100644
--- a/reference/shaders-msl/tese/quad.tese
+++ b/reference/shaders-msl/tese/quad.tese
@@ -1,10 +1,49 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float4 gl_Position [[position]];
@@ -12,19 +51,29 @@ struct main0_out
 
 struct main0_patchIn
 {
-    float2 gl_TessLevelInner [[attribute(0)]];
-    float4 gl_TessLevelOuter [[attribute(1)]];
+    float4 gl_TessLevelOuter [[attribute(0)]];
+    float2 gl_TessLevelInner [[attribute(1)]];
 };
 
-void set_position(thread float4& gl_Position, thread float2& gl_TessCoord, thread float2& gl_TessLevelInner, thread float4& gl_TessLevelOuter)
+static inline __attribute__((always_inline))
+void set_position(thread float4& gl_Position, thread float3& gl_TessCoord, thread spvUnsafeArray<float, 2>& gl_TessLevelInner, thread spvUnsafeArray<float, 4>& gl_TessLevelOuter)
 {
-    gl_Position = float4(((gl_TessCoord.x * gl_TessLevelInner.x) * gl_TessLevelOuter.x) + (((1.0 - gl_TessCoord.x) * gl_TessLevelInner.x) * gl_TessLevelOuter.z), ((gl_TessCoord.y * gl_TessLevelInner.y) * gl_TessLevelOuter.y) + (((1.0 - gl_TessCoord.y) * gl_TessLevelInner.y) * gl_TessLevelOuter.w), 0.0, 1.0);
+    gl_Position = float4(((gl_TessCoord.x * gl_TessLevelInner[0]) * gl_TessLevelOuter[0]) + (((1.0 - gl_TessCoord.x) * gl_TessLevelInner[0]) * gl_TessLevelOuter[2]), ((gl_TessCoord.y * gl_TessLevelInner[1]) * gl_TessLevelOuter[1]) + (((1.0 - gl_TessCoord.y) * gl_TessLevelInner[1]) * gl_TessLevelOuter[3]), 0.0, 1.0);
 }
 
-[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoord [[position_in_patch]])
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float2 gl_TessCoordIn [[position_in_patch]])
 {
     main0_out out = {};
-    set_position(out.gl_Position, gl_TessCoord, patchIn.gl_TessLevelInner, patchIn.gl_TessLevelOuter);
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0];
+    gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1];
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2];
+    gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3];
+    float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0);
+    set_position(out.gl_Position, gl_TessCoord, gl_TessLevelInner, gl_TessLevelOuter);
     return out;
 }
 
diff --git a/reference/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese b/reference/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese
new file mode 100644
index 00000000000..7048546cea5
--- /dev/null
+++ b/reference/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese
@@ -0,0 +1,78 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_patchIn
+{
+    float4 gl_TessLevelOuter [[attribute(0)]];
+    float2 gl_TessLevelInner [[attribute(1)]];
+};
+
+static inline __attribute__((always_inline))
+float4 read_tess_levels(thread spvUnsafeArray<float, 4>& gl_TessLevelOuter, thread spvUnsafeArray<float, 2>& gl_TessLevelInner)
+{
+    return float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy;
+}
+
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevelOuter[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevelOuter[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevelOuter[2];
+    gl_TessLevelOuter[3] = patchIn.gl_TessLevelOuter[3];
+    gl_TessLevelInner[0] = patchIn.gl_TessLevelInner[0];
+    gl_TessLevelInner[1] = patchIn.gl_TessLevelInner[1];
+    out.gl_Position = read_tess_levels(gl_TessLevelOuter, gl_TessLevelInner);
+    return out;
+}
+
diff --git a/reference/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese b/reference/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese
new file mode 100644
index 00000000000..f8f81b7574a
--- /dev/null
+++ b/reference/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese
@@ -0,0 +1,72 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+static inline __attribute__((always_inline))
+float4 read_tess_levels(thread spvUnsafeArray<float, 4>& gl_TessLevelOuter, thread spvUnsafeArray<float, 2>& gl_TessLevelInner)
+{
+    return float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy;
+}
+
+[[ patch(quad, 0) ]] vertex main0_out main0(uint gl_PrimitiveID [[patch_id]], const device MTLQuadTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    gl_TessLevelOuter[0] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0];
+    gl_TessLevelOuter[1] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1];
+    gl_TessLevelOuter[2] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2];
+    gl_TessLevelOuter[3] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[3];
+    gl_TessLevelInner[0] = spvTessLevel[gl_PrimitiveID].insideTessellationFactor[0];
+    gl_TessLevelInner[1] = spvTessLevel[gl_PrimitiveID].insideTessellationFactor[1];
+    out.gl_Position = read_tess_levels(gl_TessLevelOuter, gl_TessLevelInner);
+    return out;
+}
+
diff --git a/reference/shaders-msl/tese/read-tess-level-in-func.msl2.tese b/reference/shaders-msl/tese/read-tess-level-in-func.msl2.tese
new file mode 100644
index 00000000000..432ad7cc2d0
--- /dev/null
+++ b/reference/shaders-msl/tese/read-tess-level-in-func.msl2.tese
@@ -0,0 +1,75 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_patchIn
+{
+    float4 gl_TessLevel [[attribute(0)]];
+};
+
+static inline __attribute__((always_inline))
+float4 read_tess_levels(thread spvUnsafeArray<float, 4>& gl_TessLevelOuter, thread spvUnsafeArray<float, 2>& gl_TessLevelInner)
+{
+    return float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy;
+}
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevel[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevel[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevel[2];
+    gl_TessLevelInner[0] = patchIn.gl_TessLevel[3];
+    out.gl_Position = read_tess_levels(gl_TessLevelOuter, gl_TessLevelInner);
+    return out;
+}
+
diff --git a/reference/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese b/reference/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese
new file mode 100644
index 00000000000..5be7c40174d
--- /dev/null
+++ b/reference/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese
@@ -0,0 +1,70 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+static inline __attribute__((always_inline))
+float4 read_tess_levels(thread spvUnsafeArray<float, 4>& gl_TessLevelOuter, thread spvUnsafeArray<float, 2>& gl_TessLevelInner)
+{
+    return float4(gl_TessLevelOuter[0], gl_TessLevelOuter[1], gl_TessLevelOuter[2], gl_TessLevelOuter[3]) + float2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy;
+}
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(uint gl_PrimitiveID [[patch_id]], const device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    gl_TessLevelOuter[0] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0];
+    gl_TessLevelOuter[1] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1];
+    gl_TessLevelOuter[2] = spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2];
+    gl_TessLevelInner[0] = spvTessLevel[gl_PrimitiveID].insideTessellationFactor;
+    out.gl_Position = read_tess_levels(gl_TessLevelOuter, gl_TessLevelInner);
+    return out;
+}
+
diff --git a/reference/shaders-msl/tese/set-from-function.tese b/reference/shaders-msl/tese/set-from-function.tese
index f6e41ee5c8f..dfbbe0b7028 100644
--- a/reference/shaders-msl/tese/set-from-function.tese
+++ b/reference/shaders-msl/tese/set-from-function.tese
@@ -25,24 +25,25 @@ struct main0_out
 struct main0_in
 {
     float4 vColor [[attribute(0)]];
-    float4 Block_a [[attribute(2)]];
-    float4 Block_b [[attribute(3)]];
+    float4 blocks_a [[attribute(2)]];
+    float4 blocks_b [[attribute(3)]];
 };
 
 struct main0_patchIn
 {
     float4 vColors [[attribute(1)]];
-    float4 Foo_a [[attribute(4)]];
-    float4 Foo_b [[attribute(5)]];
+    float4 vFoo_a [[attribute(4)]];
+    float4 vFoo_b [[attribute(5)]];
     patch_control_point<main0_in> gl_in;
 };
 
+static inline __attribute__((always_inline))
 void set_from_function(thread float4& gl_Position, thread patch_control_point<main0_in>& gl_in, thread float4& vColors, thread Foo& vFoo)
 {
-    gl_Position = gl_in[0].Block_a;
-    gl_Position += gl_in[0].Block_b;
-    gl_Position += gl_in[1].Block_a;
-    gl_Position += gl_in[1].Block_b;
+    gl_Position = gl_in[0].blocks_a;
+    gl_Position += gl_in[0].blocks_b;
+    gl_Position += gl_in[1].blocks_a;
+    gl_Position += gl_in[1].blocks_b;
     gl_Position += gl_in[0].vColor;
     gl_Position += gl_in[1].vColor;
     gl_Position += vColors;
@@ -54,8 +55,8 @@ void set_from_function(thread float4& gl_Position, thread patch_control_point<ma
 {
     main0_out out = {};
     Foo vFoo = {};
-    vFoo.a = patchIn.Foo_a;
-    vFoo.b = patchIn.Foo_b;
+    vFoo.a = patchIn.vFoo_a;
+    vFoo.b = patchIn.vFoo_b;
     set_from_function(out.gl_Position, patchIn.gl_in, patchIn.vColors, vFoo);
     return out;
 }
diff --git a/reference/shaders-msl/tese/triangle-tess-level.tese b/reference/shaders-msl/tese/triangle-tess-level.tese
index 975e6298518..86ccc4f023e 100644
--- a/reference/shaders-msl/tese/triangle-tess-level.tese
+++ b/reference/shaders-msl/tese/triangle-tess-level.tese
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float4 gl_Position [[position]];
@@ -16,12 +57,12 @@ struct main0_patchIn
 [[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], float3 gl_TessCoord [[position_in_patch]])
 {
     main0_out out = {};
-    float gl_TessLevelInner[2] = {};
-    float gl_TessLevelOuter[4] = {};
-    gl_TessLevelInner[0] = patchIn.gl_TessLevel.w;
-    gl_TessLevelOuter[0] = patchIn.gl_TessLevel.x;
-    gl_TessLevelOuter[1] = patchIn.gl_TessLevel.y;
-    gl_TessLevelOuter[2] = patchIn.gl_TessLevel.z;
+    spvUnsafeArray<float, 2> gl_TessLevelInner = {};
+    spvUnsafeArray<float, 4> gl_TessLevelOuter = {};
+    gl_TessLevelInner[0] = patchIn.gl_TessLevel[3];
+    gl_TessLevelOuter[0] = patchIn.gl_TessLevel[0];
+    gl_TessLevelOuter[1] = patchIn.gl_TessLevel[1];
+    gl_TessLevelOuter[2] = patchIn.gl_TessLevel[2];
     out.gl_Position = float4((gl_TessCoord.x * gl_TessLevelInner[0]) * gl_TessLevelOuter[0], (gl_TessCoord.y * gl_TessLevelInner[0]) * gl_TessLevelOuter[1], (gl_TessCoord.z * gl_TessLevelInner[0]) * gl_TessLevelOuter[2], 1.0);
     return out;
 }
diff --git a/reference/shaders-msl/tese/water_tess.raw-tess-in.tese b/reference/shaders-msl/tese/water_tess.raw-tess-in.tese
new file mode 100644
index 00000000000..bf93456484c
--- /dev/null
+++ b/reference/shaders-msl/tese/water_tess.raw-tess-in.tese
@@ -0,0 +1,77 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4x4 uMVP;
+    float4 uScale;
+    float2 uInvScale;
+    float3 uCamPos;
+    float2 uPatchSize;
+    float2 uInvHeightmapSize;
+};
+
+struct main0_out
+{
+    float3 vWorld [[user(locn0)]];
+    float4 vGradNormalTex [[user(locn1)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_patchIn
+{
+    float2 vOutPatchPosBase;
+    float4 vPatchLods;
+};
+
+static inline __attribute__((always_inline))
+float2 lerp_vertex(thread const float2& tess_coord, const device float2& vOutPatchPosBase, constant UBO& v_31)
+{
+    return vOutPatchPosBase + (tess_coord * v_31.uPatchSize);
+}
+
+static inline __attribute__((always_inline))
+float2 lod_factor(thread const float2& tess_coord, const device float4& vPatchLods)
+{
+    float2 x = mix(vPatchLods.yx, vPatchLods.zw, float2(tess_coord.x));
+    float level0 = mix(x.x, x.y, tess_coord.y);
+    float floor_level = floor(level0);
+    float fract_level = level0 - floor_level;
+    return float2(floor_level, fract_level);
+}
+
+static inline __attribute__((always_inline))
+float3 sample_height_displacement(thread const float2& uv, thread const float2& off, thread const float2& lod, texture2d<float> uHeightmapDisplacement, sampler uHeightmapDisplacementSmplr)
+{
+    return mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (uv + (off * 0.5)), level(lod.x)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (uv + (off * 1.0)), level(lod.x + 1.0)).xyz, float3(lod.y));
+}
+
+[[ patch(quad, 0) ]] vertex main0_out main0(constant UBO& v_31 [[buffer(0)]], texture2d<float> uHeightmapDisplacement [[texture(0)]], sampler uHeightmapDisplacementSmplr [[sampler(0)]], float2 gl_TessCoordIn [[position_in_patch]], uint gl_PrimitiveID [[patch_id]], const device main0_patchIn* spvPatchIn [[buffer(20)]])
+{
+    main0_out out = {};
+    const device main0_patchIn& patchIn = spvPatchIn[gl_PrimitiveID];
+    float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0);
+    float2 tess_coord = gl_TessCoord.xy;
+    float2 param = tess_coord;
+    float2 pos = lerp_vertex(param, patchIn.vOutPatchPosBase, v_31);
+    float2 param_1 = tess_coord;
+    float2 lod = lod_factor(param_1, patchIn.vPatchLods);
+    float2 tex = pos * v_31.uInvHeightmapSize;
+    pos *= v_31.uScale.xy;
+    float delta_mod = exp2(lod.x);
+    float2 off = v_31.uInvHeightmapSize * delta_mod;
+    out.vGradNormalTex = float4(tex + (v_31.uInvHeightmapSize * 0.5), tex * v_31.uScale.zw);
+    float2 param_2 = tex;
+    float2 param_3 = off;
+    float2 param_4 = lod;
+    float3 height_displacement = sample_height_displacement(param_2, param_3, param_4, uHeightmapDisplacement, uHeightmapDisplacementSmplr);
+    pos += height_displacement.yz;
+    out.vWorld = float3(pos.x, height_displacement.x, pos.y);
+    out.gl_Position = v_31.uMVP * float4(out.vWorld, 1.0);
+    return out;
+}
+
diff --git a/reference/shaders-msl/tese/water_tess.tese b/reference/shaders-msl/tese/water_tess.tese
index 9b6c0aca843..5f63d94f16f 100644
--- a/reference/shaders-msl/tese/water_tess.tese
+++ b/reference/shaders-msl/tese/water_tess.tese
@@ -28,29 +28,33 @@ struct main0_patchIn
     float4 vPatchLods [[attribute(1)]];
 };
 
+static inline __attribute__((always_inline))
 float2 lerp_vertex(thread const float2& tess_coord, thread float2& vOutPatchPosBase, constant UBO& v_31)
 {
     return vOutPatchPosBase + (tess_coord * v_31.uPatchSize);
 }
 
+static inline __attribute__((always_inline))
 float2 lod_factor(thread const float2& tess_coord, thread float4& vPatchLods)
 {
     float2 x = mix(vPatchLods.yx, vPatchLods.zw, float2(tess_coord.x));
-    float level = mix(x.x, x.y, tess_coord.y);
-    float floor_level = floor(level);
-    float fract_level = level - floor_level;
+    float level0 = mix(x.x, x.y, tess_coord.y);
+    float floor_level = floor(level0);
+    float fract_level = level0 - floor_level;
     return float2(floor_level, fract_level);
 }
 
-float3 sample_height_displacement(thread const float2& uv, thread const float2& off, thread const float2& lod, thread texture2d<float> uHeightmapDisplacement, thread const sampler uHeightmapDisplacementSmplr)
+static inline __attribute__((always_inline))
+float3 sample_height_displacement(thread const float2& uv, thread const float2& off, thread const float2& lod, texture2d<float> uHeightmapDisplacement, sampler uHeightmapDisplacementSmplr)
 {
     return mix(uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (uv + (off * 0.5)), level(lod.x)).xyz, uHeightmapDisplacement.sample(uHeightmapDisplacementSmplr, (uv + (off * 1.0)), level(lod.x + 1.0)).xyz, float3(lod.y));
 }
 
-[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant UBO& v_31 [[buffer(0)]], texture2d<float> uHeightmapDisplacement [[texture(0)]], sampler uHeightmapDisplacementSmplr [[sampler(0)]], float2 gl_TessCoord [[position_in_patch]])
+[[ patch(quad, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant UBO& v_31 [[buffer(0)]], texture2d<float> uHeightmapDisplacement [[texture(0)]], sampler uHeightmapDisplacementSmplr [[sampler(0)]], float2 gl_TessCoordIn [[position_in_patch]])
 {
     main0_out out = {};
-    float2 tess_coord = float3(gl_TessCoord, 0).xy;
+    float3 gl_TessCoord = float3(gl_TessCoordIn.x, gl_TessCoordIn.y, 0.0);
+    float2 tess_coord = gl_TessCoord.xy;
     float2 param = tess_coord;
     float2 pos = lerp_vertex(param, patchIn.vOutPatchPosBase, v_31);
     float2 param_1 = tess_coord;
diff --git a/reference/shaders-msl/vert/array-component-io.for-tess.vert b/reference/shaders-msl/vert/array-component-io.for-tess.vert
new file mode 100644
index 00000000000..24958eb50db
--- /dev/null
+++ b/reference/shaders-msl/vert/array-component-io.for-tess.vert
@@ -0,0 +1,98 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 m_location_0;
+    float4 m_location_1;
+    float4 m_location_2;
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    float4 m_location_0 [[attribute(0)]];
+    float4 m_location_1 [[attribute(1)]];
+    float4 m_location_2 [[attribute(2)]];
+    float4 Pos [[attribute(4)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    spvUnsafeArray<float, 2> A = {};
+    spvUnsafeArray<float2, 2> B = {};
+    spvUnsafeArray<float, 3> C = {};
+    float D = {};
+    spvUnsafeArray<float, 2> InA = {};
+    spvUnsafeArray<float2, 2> InB = {};
+    spvUnsafeArray<float, 3> InC = {};
+    float InD = {};
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    InA[0] = in.m_location_1.x;
+    InA[1] = in.m_location_2.x;
+    InB[0] = in.m_location_1.zw;
+    InB[1] = in.m_location_2.zw;
+    InC[0] = in.m_location_0.y;
+    InC[1] = in.m_location_1.y;
+    InC[2] = in.m_location_2.y;
+    InD = in.m_location_0.w;
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    out.gl_Position = in.Pos;
+    A = InA;
+    B = InB;
+    C = InC;
+    D = InD;
+    out.m_location_1.x = A[0];
+    out.m_location_2.x = A[1];
+    out.m_location_1.zw = B[0];
+    out.m_location_2.zw = B[1];
+    out.m_location_0.y = C[0];
+    out.m_location_1.y = C[1];
+    out.m_location_2.y = C[2];
+    out.m_location_0.w = D;
+}
+
diff --git a/reference/shaders-msl/vert/array-component-io.vert b/reference/shaders-msl/vert/array-component-io.vert
new file mode 100644
index 00000000000..352c9d2ef0f
--- /dev/null
+++ b/reference/shaders-msl/vert/array-component-io.vert
@@ -0,0 +1,100 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float C_0 [[user(locn0_1)]];
+    float D [[user(locn0_3)]];
+    float A_0 [[user(locn1)]];
+    float C_1 [[user(locn1_1)]];
+    float2 B_0 [[user(locn1_2)]];
+    float A_1 [[user(locn2)]];
+    float C_2 [[user(locn2_1)]];
+    float2 B_1 [[user(locn2_2)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 m_location_0 [[attribute(0)]];
+    float4 m_location_1 [[attribute(1)]];
+    float4 m_location_2 [[attribute(2)]];
+    float4 Pos [[attribute(4)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float, 2> A = {};
+    spvUnsafeArray<float2, 2> B = {};
+    spvUnsafeArray<float, 3> C = {};
+    spvUnsafeArray<float, 2> InA = {};
+    spvUnsafeArray<float2, 2> InB = {};
+    spvUnsafeArray<float, 3> InC = {};
+    float InD = {};
+    InA[0] = in.m_location_1.x;
+    InA[1] = in.m_location_2.x;
+    InB[0] = in.m_location_1.zw;
+    InB[1] = in.m_location_2.zw;
+    InC[0] = in.m_location_0.y;
+    InC[1] = in.m_location_1.y;
+    InC[2] = in.m_location_2.y;
+    InD = in.m_location_0.w;
+    out.gl_Position = in.Pos;
+    A = InA;
+    B = InB;
+    C = InC;
+    out.D = InD;
+    out.A_0 = A[0];
+    out.A_1 = A[1];
+    out.B_0 = B[0];
+    out.B_1 = B[1];
+    out.C_0 = C[0];
+    out.C_1 = C[1];
+    out.C_2 = C[2];
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/basic.for-tess.vert b/reference/shaders-msl/vert/basic.for-tess.vert
new file mode 100644
index 00000000000..c99a95ac898
--- /dev/null
+++ b/reference/shaders-msl/vert/basic.for-tess.vert
@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4x4 uMVP;
+};
+
+struct main0_out
+{
+    float3 vNormal;
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], constant UBO& _16 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    out.gl_Position = _16.uMVP * in.aVertex;
+    out.vNormal = in.aNormal;
+}
+
diff --git a/reference/shaders-msl/vert/buffer_device_address.msl2.vert b/reference/shaders-msl/vert/buffer_device_address.msl2.vert
new file mode 100644
index 00000000000..9d856c48114
--- /dev/null
+++ b/reference/shaders-msl/vert/buffer_device_address.msl2.vert
@@ -0,0 +1,49 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Position;
+struct PositionReferences;
+
+struct Position
+{
+    float2 positions[1];
+};
+
+struct Registers
+{
+    float4x4 view_projection;
+    device PositionReferences* references;
+};
+
+struct PositionReferences
+{
+    device Position* buffers[1];
+};
+
+struct main0_out
+{
+    float4 out_color [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0(constant Registers& registers [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    int slice = int(gl_InstanceIndex);
+    const device Position* __restrict positions = registers.references->buffers[slice];
+    float2 pos = positions->positions[int(gl_VertexIndex)] * 2.5;
+    pos += ((float2(float(slice % 8), float(slice / 8)) - float2(3.5)) * 3.0);
+    out.gl_Position = registers.view_projection * float4(pos, 0.0, 1.0);
+    int index_x = int(gl_VertexIndex) % 16;
+    int index_y = int(gl_VertexIndex) / 16;
+    float r = 0.5 + (0.300000011920928955078125 * sin(float(index_x)));
+    float g = 0.5 + (0.300000011920928955078125 * sin(float(index_y)));
+    int checkerboard = (index_x ^ index_y) & 1;
+    r *= ((float(checkerboard) * 0.800000011920928955078125) + 0.20000000298023223876953125);
+    g *= ((float(checkerboard) * 0.800000011920928955078125) + 0.20000000298023223876953125);
+    out.out_color = float4(r, g, 0.1500000059604644775390625, 1.0);
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/clip-distance-block.no-user-varying.vert b/reference/shaders-msl/vert/clip-distance-block.no-user-varying.vert
new file mode 100644
index 00000000000..c78105e0ce6
--- /dev/null
+++ b/reference/shaders-msl/vert/clip-distance-block.no-user-varying.vert
@@ -0,0 +1,25 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [2];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.gl_Position = in.Position;
+    out.gl_ClipDistance[0] = in.Position.x;
+    out.gl_ClipDistance[1] = in.Position.y;
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/clip-distance-block.vert b/reference/shaders-msl/vert/clip-distance-block.vert
new file mode 100644
index 00000000000..af58f35ff5f
--- /dev/null
+++ b/reference/shaders-msl/vert/clip-distance-block.vert
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [2];
+    float gl_ClipDistance_0 [[user(clip0)]];
+    float gl_ClipDistance_1 [[user(clip1)]];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.gl_Position = in.Position;
+    out.gl_ClipDistance[0] = in.Position.x;
+    out.gl_ClipDistance[1] = in.Position.y;
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    out.gl_ClipDistance_1 = out.gl_ClipDistance[1];
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/copy.flatten.vert b/reference/shaders-msl/vert/copy.flatten.vert
index a762f7e792b..92757a6001e 100644
--- a/reference/shaders-msl/vert/copy.flatten.vert
+++ b/reference/shaders-msl/vert/copy.flatten.vert
@@ -47,7 +47,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
         light.Radius = _21.lights[i].Radius;
         light.Color = _21.lights[i].Color;
         float3 L = in.aVertex.xyz - light.Position;
-        out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
+        out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / light.Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(L)));
     }
     return out;
 }
diff --git a/reference/shaders-msl/vert/dynamic.flatten.vert b/reference/shaders-msl/vert/dynamic.flatten.vert
index c285f3c8739..43b3e112ce4 100644
--- a/reference/shaders-msl/vert/dynamic.flatten.vert
+++ b/reference/shaders-msl/vert/dynamic.flatten.vert
@@ -36,7 +36,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
     for (int i = 0; i < 4; i++)
     {
         float3 L = in.aVertex.xyz - float3(_21.lights[i].Position);
-        out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, normalize(L)));
+        out.vColor += ((_21.lights[i].Color * fast::clamp(1.0 - (length(L) / _21.lights[i].Radius), 0.0, 1.0)) * dot(in.aNormal, fast::normalize(L)));
     }
     return out;
 }
diff --git a/reference/shaders-msl/vert/float-math.invariant-float-math.vert b/reference/shaders-msl/vert/float-math.invariant-float-math.vert
new file mode 100644
index 00000000000..4b25e91b455
--- /dev/null
+++ b/reference/shaders-msl/vert/float-math.invariant-float-math.vert
@@ -0,0 +1,136 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+template<typename T>
+[[clang::optnone]] T spvFMul(T l, T r)
+{
+    return fma(l, r, T(0));
+}
+
+template<typename T, int Cols, int Rows>
+[[clang::optnone]] vec<T, Cols> spvFMulVectorMatrix(vec<T, Rows> v, matrix<T, Cols, Rows> m)
+{
+    vec<T, Cols> res = vec<T, Cols>(0);
+    for (uint i = Rows; i > 0; --i)
+    {
+        vec<T, Cols> tmp(0);
+        for (uint j = 0; j < Cols; ++j)
+        {
+            tmp[j] = m[j][i - 1];
+        }
+        res = fma(tmp, vec<T, Cols>(v[i - 1]), res);
+    }
+    return res;
+}
+
+template<typename T, int Cols, int Rows>
+[[clang::optnone]] vec<T, Rows> spvFMulMatrixVector(matrix<T, Cols, Rows> m, vec<T, Cols> v)
+{
+    vec<T, Rows> res = vec<T, Rows>(0);
+    for (uint i = Cols; i > 0; --i)
+    {
+        res = fma(m[i - 1], vec<T, Rows>(v[i - 1]), res);
+    }
+    return res;
+}
+
+template<typename T, int LCols, int LRows, int RCols, int RRows>
+[[clang::optnone]] matrix<T, RCols, LRows> spvFMulMatrixMatrix(matrix<T, LCols, LRows> l, matrix<T, RCols, RRows> r)
+{
+    matrix<T, RCols, LRows> res;
+    for (uint i = 0; i < RCols; i++)
+    {
+        vec<T, RCols> tmp(0);
+        for (uint j = 0; j < LCols; j++)
+        {
+            tmp = fma(vec<T, RCols>(r[i][j]), l[j], tmp);
+        }
+        res[i] = tmp;
+    }
+    return res;
+}
+
+struct Matrices
+{
+    float4x4 vpMatrix;
+    float4x4 wMatrix;
+    float4x3 wMatrix4x3;
+    float3x4 wMatrix3x4;
+};
+
+struct main0_out
+{
+    float3 OutNormal [[user(locn0)]];
+    float4 OutWorldPos_0 [[user(locn1)]];
+    float4 OutWorldPos_1 [[user(locn2)]];
+    float4 OutWorldPos_2 [[user(locn3)]];
+    float4 OutWorldPos_3 [[user(locn4)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float3 InPos [[attribute(0)]];
+    float3 InNormal [[attribute(1)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant Matrices& _22 [[buffer(0)]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 4> OutWorldPos = {};
+    out.gl_Position = spvFMulMatrixVector(spvFMulMatrixMatrix(_22.vpMatrix, _22.wMatrix), float4(in.InPos, 1.0));
+    OutWorldPos[0] = spvFMulMatrixVector(_22.wMatrix, float4(in.InPos, 1.0));
+    OutWorldPos[1] = spvFMulVectorMatrix(float4(in.InPos, 1.0), _22.wMatrix);
+    OutWorldPos[2] = spvFMulMatrixVector(_22.wMatrix3x4, in.InPos);
+    OutWorldPos[3] = spvFMulVectorMatrix(in.InPos, _22.wMatrix4x3);
+    out.OutNormal = spvFMulMatrixVector(_22.wMatrix, float4(in.InNormal, 0.0)).xyz;
+    out.OutWorldPos_0 = OutWorldPos[0];
+    out.OutWorldPos_1 = OutWorldPos[1];
+    out.OutWorldPos_2 = OutWorldPos[2];
+    out.OutWorldPos_3 = OutWorldPos[3];
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/float-math.vert b/reference/shaders-msl/vert/float-math.vert
new file mode 100644
index 00000000000..e96fdaedc22
--- /dev/null
+++ b/reference/shaders-msl/vert/float-math.vert
@@ -0,0 +1,87 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Matrices
+{
+    float4x4 vpMatrix;
+    float4x4 wMatrix;
+    float4x3 wMatrix4x3;
+    float3x4 wMatrix3x4;
+};
+
+struct main0_out
+{
+    float3 OutNormal [[user(locn0)]];
+    float4 OutWorldPos_0 [[user(locn1)]];
+    float4 OutWorldPos_1 [[user(locn2)]];
+    float4 OutWorldPos_2 [[user(locn3)]];
+    float4 OutWorldPos_3 [[user(locn4)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float3 InPos [[attribute(0)]];
+    float3 InNormal [[attribute(1)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant Matrices& _22 [[buffer(0)]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 4> OutWorldPos = {};
+    out.gl_Position = (_22.vpMatrix * _22.wMatrix) * float4(in.InPos, 1.0);
+    OutWorldPos[0] = _22.wMatrix * float4(in.InPos, 1.0);
+    OutWorldPos[1] = float4(in.InPos, 1.0) * _22.wMatrix;
+    OutWorldPos[2] = _22.wMatrix3x4 * in.InPos;
+    OutWorldPos[3] = in.InPos * _22.wMatrix4x3;
+    out.OutNormal = (_22.wMatrix * float4(in.InNormal, 0.0)).xyz;
+    out.OutWorldPos_0 = OutWorldPos[0];
+    out.OutWorldPos_1 = OutWorldPos[1];
+    out.OutWorldPos_2 = OutWorldPos[2];
+    out.OutWorldPos_3 = OutWorldPos[3];
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/functions.vert b/reference/shaders-msl/vert/functions.vert
index f710225261d..4300aa1350a 100644
--- a/reference/shaders-msl/vert/functions.vert
+++ b/reference/shaders-msl/vert/functions.vert
@@ -5,73 +5,52 @@
 
 using namespace metal;
 
-struct UBO
-{
-    float4x4 uMVP;
-    float3 rotDeg;
-    float3 rotRad;
-    int2 bits;
-};
-
-struct main0_out
-{
-    float3 vNormal [[user(locn0)]];
-    float3 vRotDeg [[user(locn1)]];
-    float3 vRotRad [[user(locn2)]];
-    int2 vLSB [[user(locn3)]];
-    int2 vMSB [[user(locn4)]];
-    float4 gl_Position [[position]];
-};
-
-struct main0_in
-{
-    float4 aVertex [[attribute(0)]];
-    float3 aNormal [[attribute(1)]];
-};
-
 // Implementation of the GLSL radians() function
 template<typename T>
-T radians(T d)
+inline T radians(T d)
 {
     return d * T(0.01745329251);
 }
 
 // Implementation of the GLSL degrees() function
 template<typename T>
-T degrees(T r)
+inline T degrees(T r)
 {
     return r * T(57.2957795131);
 }
 
 // Implementation of the GLSL findLSB() function
 template<typename T>
-T findLSB(T x)
+inline T spvFindLSB(T x)
 {
     return select(ctz(x), T(-1), x == T(0));
 }
 
 // Implementation of the signed GLSL findMSB() function
 template<typename T>
-T findSMSB(T x)
+inline T spvFindSMSB(T x)
 {
     T v = select(x, T(-1) - x, x < T(0));
     return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));
 }
 
 // Returns the determinant of a 2x2 matrix.
-inline float spvDet2x2(float a1, float a2, float b1, float b2)
+static inline __attribute__((always_inline))
+float spvDet2x2(float a1, float a2, float b1, float b2)
 {
     return a1 * b2 - b1 * a2;
 }
 
 // Returns the determinant of a 3x3 matrix.
-inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
+static inline __attribute__((always_inline))
+float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
 {
     return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, b2, b3);
 }
 
 // Returns the inverse of a matrix, by using the algorithm of calculating the classical
 // adjoint and dividing by the determinant. The contents of the matrix are changed.
+static inline __attribute__((always_inline))
 float4x4 spvInverse4x4(float4x4 m)
 {
     float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)
@@ -105,6 +84,30 @@ float4x4 spvInverse4x4(float4x4 m)
     return (det != 0.0f) ? (adj * (1.0f / det)) : m;
 }
 
+struct UBO
+{
+    float4x4 uMVP;
+    float3 rotDeg;
+    float3 rotRad;
+    int2 bits;
+};
+
+struct main0_out
+{
+    float3 vNormal [[user(locn0)]];
+    float3 vRotDeg [[user(locn1)]];
+    float3 vRotRad [[user(locn2)]];
+    int2 vLSB [[user(locn3)]];
+    int2 vMSB [[user(locn4)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]])
 {
     main0_out out = {};
@@ -112,8 +115,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _18 [[buffer(0)]]
     out.vNormal = in.aNormal;
     out.vRotDeg = degrees(_18.rotRad);
     out.vRotRad = radians(_18.rotDeg);
-    out.vLSB = findLSB(_18.bits);
-    out.vMSB = findSMSB(_18.bits);
+    out.vLSB = spvFindLSB(_18.bits);
+    out.vMSB = spvFindSMSB(_18.bits);
     return out;
 }
 
diff --git a/reference/shaders-msl/vert/implicit-position-1.vert b/reference/shaders-msl/vert/implicit-position-1.vert
new file mode 100644
index 00000000000..5cea4ee2c20
--- /dev/null
+++ b/reference/shaders-msl/vert/implicit-position-1.vert
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 V [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    out.V = float4(1.0);
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/implicit-position-2.vert b/reference/shaders-msl/vert/implicit-position-2.vert
new file mode 100644
index 00000000000..9e024c2095b
--- /dev/null
+++ b/reference/shaders-msl/vert/implicit-position-2.vert
@@ -0,0 +1,9 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+vertex void main0()
+{
+}
+
diff --git a/reference/shaders-msl/vert/in_out_array_mat.vert b/reference/shaders-msl/vert/in_out_array_mat.vert
index 95be574a51e..19bfa7311ea 100644
--- a/reference/shaders-msl/vert/in_out_array_mat.vert
+++ b/reference/shaders-msl/vert/in_out_array_mat.vert
@@ -1,10 +1,49 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct UBO
 {
     float4x4 projection;
@@ -38,13 +77,15 @@ struct main0_in
     float4 inViewMat_3 [[attribute(8)]];
 };
 
-void write_deeper_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread float4 (&colors)[3])
+static inline __attribute__((always_inline))
+void write_deeper_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread spvUnsafeArray<float4, 3>& colors)
 {
     outTransModel[1].y = ubo.lodBias;
     color = colors[2];
 }
 
-void write_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread float4 (&colors)[3], thread float3& inNormal)
+static inline __attribute__((always_inline))
+void write_in_function(thread float4x4& outTransModel, constant UBO& ubo, thread float4& color, thread spvUnsafeArray<float4, 3>& colors, thread float3& inNormal)
 {
     outTransModel[2] = float4(inNormal, 1.0);
     write_deeper_in_function(outTransModel, ubo, color, colors);
@@ -54,7 +95,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& ubo [[buffer(0)]]
 {
     main0_out out = {};
     float4x4 outTransModel = {};
-    float4 colors[3] = {};
+    spvUnsafeArray<float4, 3> colors = {};
     float4x4 inViewMat = {};
     colors[0] = in.colors_0;
     colors[1] = in.colors_1;
diff --git a/reference/shaders-msl/vert/interface-block-block-composites.frag b/reference/shaders-msl/vert/interface-block-block-composites.frag
index c42381d0046..cc2727682d9 100644
--- a/reference/shaders-msl/vert/interface-block-block-composites.frag
+++ b/reference/shaders-msl/vert/interface-block-block-composites.frag
@@ -1,13 +1,54 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct Vert
 {
     float3x3 wMatrix;
     float4 wTmp;
-    float arr[4];
+    spvUnsafeArray<float, 4> arr;
 };
 
 struct main0_out
@@ -20,14 +61,14 @@ struct main0_in
     float3 vMatrix_0 [[user(locn0)]];
     float3 vMatrix_1 [[user(locn1)]];
     float3 vMatrix_2 [[user(locn2)]];
-    float3 Vert_wMatrix_0 [[user(locn4)]];
-    float3 Vert_wMatrix_1 [[user(locn5)]];
-    float3 Vert_wMatrix_2 [[user(locn6)]];
-    float4 Vert_wTmp [[user(locn7)]];
-    float Vert_arr_0 [[user(locn8)]];
-    float Vert_arr_1 [[user(locn9)]];
-    float Vert_arr_2 [[user(locn10)]];
-    float Vert_arr_3 [[user(locn11)]];
+    float3 m_17_wMatrix_0 [[user(locn4)]];
+    float3 m_17_wMatrix_1 [[user(locn5)]];
+    float3 m_17_wMatrix_2 [[user(locn6)]];
+    float4 m_17_wTmp [[user(locn7)]];
+    float m_17_arr_0 [[user(locn8)]];
+    float m_17_arr_1 [[user(locn9)]];
+    float m_17_arr_2 [[user(locn10)]];
+    float m_17_arr_3 [[user(locn11)]];
 };
 
 fragment main0_out main0(main0_in in [[stage_in]])
@@ -35,14 +76,14 @@ fragment main0_out main0(main0_in in [[stage_in]])
     main0_out out = {};
     Vert _17 = {};
     float3x3 vMatrix = {};
-    _17.wMatrix[0] = in.Vert_wMatrix_0;
-    _17.wMatrix[1] = in.Vert_wMatrix_1;
-    _17.wMatrix[2] = in.Vert_wMatrix_2;
-    _17.wTmp = in.Vert_wTmp;
-    _17.arr[0] = in.Vert_arr_0;
-    _17.arr[1] = in.Vert_arr_1;
-    _17.arr[2] = in.Vert_arr_2;
-    _17.arr[3] = in.Vert_arr_3;
+    _17.wMatrix[0] = in.m_17_wMatrix_0;
+    _17.wMatrix[1] = in.m_17_wMatrix_1;
+    _17.wMatrix[2] = in.m_17_wMatrix_2;
+    _17.wTmp = in.m_17_wTmp;
+    _17.arr[0] = in.m_17_arr_0;
+    _17.arr[1] = in.m_17_arr_1;
+    _17.arr[2] = in.m_17_arr_2;
+    _17.arr[3] = in.m_17_arr_3;
     vMatrix[0] = in.vMatrix_0;
     vMatrix[1] = in.vMatrix_1;
     vMatrix[2] = in.vMatrix_2;
diff --git a/reference/shaders-msl/vert/interface-block-block-composites.vert b/reference/shaders-msl/vert/interface-block-block-composites.vert
index 3d97ae6dcff..a05c9331586 100644
--- a/reference/shaders-msl/vert/interface-block-block-composites.vert
+++ b/reference/shaders-msl/vert/interface-block-block-composites.vert
@@ -1,11 +1,52 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct Vert
 {
-    float arr[3];
+    spvUnsafeArray<float, 3> arr;
     float3x3 wMatrix;
     float4 wTmp;
 };
@@ -15,13 +56,13 @@ struct main0_out
     float3 vMatrix_0 [[user(locn0)]];
     float3 vMatrix_1 [[user(locn1)]];
     float3 vMatrix_2 [[user(locn2)]];
-    float Vert_arr_0 [[user(locn4)]];
-    float Vert_arr_1 [[user(locn5)]];
-    float Vert_arr_2 [[user(locn6)]];
-    float3 Vert_wMatrix_0 [[user(locn7)]];
-    float3 Vert_wMatrix_1 [[user(locn8)]];
-    float3 Vert_wMatrix_2 [[user(locn9)]];
-    float4 Vert_wTmp [[user(locn10)]];
+    float m_20_arr_0 [[user(locn4)]];
+    float m_20_arr_1 [[user(locn5)]];
+    float m_20_arr_2 [[user(locn6)]];
+    float3 m_20_wMatrix_0 [[user(locn7)]];
+    float3 m_20_wMatrix_1 [[user(locn8)]];
+    float3 m_20_wMatrix_2 [[user(locn9)]];
+    float4 m_20_wTmp [[user(locn10)]];
     float4 gl_Position [[position]];
 };
 
@@ -52,13 +93,13 @@ vertex main0_out main0(main0_in in [[stage_in]])
     out.vMatrix_0 = vMatrix[0];
     out.vMatrix_1 = vMatrix[1];
     out.vMatrix_2 = vMatrix[2];
-    out.Vert_arr_0 = _20.arr[0];
-    out.Vert_arr_1 = _20.arr[1];
-    out.Vert_arr_2 = _20.arr[2];
-    out.Vert_wMatrix_0 = _20.wMatrix[0];
-    out.Vert_wMatrix_1 = _20.wMatrix[1];
-    out.Vert_wMatrix_2 = _20.wMatrix[2];
-    out.Vert_wTmp = _20.wTmp;
+    out.m_20_arr_0 = _20.arr[0];
+    out.m_20_arr_1 = _20.arr[1];
+    out.m_20_arr_2 = _20.arr[2];
+    out.m_20_wMatrix_0 = _20.wMatrix[0];
+    out.m_20_wMatrix_1 = _20.wMatrix[1];
+    out.m_20_wMatrix_2 = _20.wMatrix[2];
+    out.m_20_wTmp = _20.wTmp;
     return out;
 }
 
diff --git a/reference/shaders-msl/vert/interface-block-single-element-array.vert b/reference/shaders-msl/vert/interface-block-single-element-array.vert
new file mode 100644
index 00000000000..6858db730e3
--- /dev/null
+++ b/reference/shaders-msl/vert/interface-block-single-element-array.vert
@@ -0,0 +1,79 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct TDPickVertex
+{
+    float4 c;
+    spvUnsafeArray<float3, 1> uv;
+};
+
+struct main0_out
+{
+    float4 oTDVert_c [[user(locn0)]];
+    float3 oTDVert_uv_0 [[user(locn1)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float3 P [[attribute(0)]];
+    float3 uv_0 [[attribute(1)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    TDPickVertex oTDVert = {};
+    spvUnsafeArray<float3, 1> uv = {};
+    uv[0] = in.uv_0;
+    out.gl_Position = float4(in.P, 1.0);
+    oTDVert.uv[0] = uv[0];
+    oTDVert.c = float4(1.0);
+    out.oTDVert_c = oTDVert.c;
+    out.oTDVert_uv_0 = oTDVert.uv[0];
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/interpolation-qualifiers-block.vert b/reference/shaders-msl/vert/interpolation-qualifiers-block.vert
index 4206623b4f6..1ae24c7e5b5 100644
--- a/reference/shaders-msl/vert/interpolation-qualifiers-block.vert
+++ b/reference/shaders-msl/vert/interpolation-qualifiers-block.vert
@@ -16,13 +16,13 @@ struct Output
 
 struct main0_out
 {
-    float2 Output_v0 [[user(locn0)]];
-    float2 Output_v1 [[user(locn1)]];
-    float3 Output_v2 [[user(locn2)]];
-    float4 Output_v3 [[user(locn3)]];
-    float Output_v4 [[user(locn4)]];
-    float Output_v5 [[user(locn5)]];
-    float Output_v6 [[user(locn6)]];
+    float2 outp_v0 [[user(locn0)]];
+    float2 outp_v1 [[user(locn1)]];
+    float3 outp_v2 [[user(locn2)]];
+    float4 outp_v3 [[user(locn3)]];
+    float outp_v4 [[user(locn4)]];
+    float outp_v5 [[user(locn5)]];
+    float outp_v6 [[user(locn6)]];
     float4 gl_Position [[position]];
 };
 
@@ -43,13 +43,13 @@ vertex main0_out main0(main0_in in [[stage_in]])
     outp.v5 = in.Position.y;
     outp.v6 = in.Position.x * in.Position.w;
     out.gl_Position = in.Position;
-    out.Output_v0 = outp.v0;
-    out.Output_v1 = outp.v1;
-    out.Output_v2 = outp.v2;
-    out.Output_v3 = outp.v3;
-    out.Output_v4 = outp.v4;
-    out.Output_v5 = outp.v5;
-    out.Output_v6 = outp.v6;
+    out.outp_v0 = outp.v0;
+    out.outp_v1 = outp.v1;
+    out.outp_v2 = outp.v2;
+    out.outp_v3 = outp.v3;
+    out.outp_v4 = outp.v4;
+    out.outp_v5 = outp.v5;
+    out.outp_v6 = outp.v6;
     return out;
 }
 
diff --git a/reference/shaders-msl/vert/leaf-function.capture.vert b/reference/shaders-msl/vert/leaf-function.capture.vert
index 5a8469d1ac3..6519e56b8ff 100644
--- a/reference/shaders-msl/vert/leaf-function.capture.vert
+++ b/reference/shaders-msl/vert/leaf-function.capture.vert
@@ -22,6 +22,7 @@ struct main0_in
     float3 aNormal [[attribute(1)]];
 };
 
+static inline __attribute__((always_inline))
 void set_output(device float4& gl_Position, constant UBO& v_18, thread float4& aVertex, device float3& vNormal, thread float3& aNormal)
 {
     gl_Position = v_18.uMVP * aVertex;
diff --git a/reference/shaders-msl/vert/leaf-function.for-tess.vert b/reference/shaders-msl/vert/leaf-function.for-tess.vert
new file mode 100644
index 00000000000..5a960e5ec84
--- /dev/null
+++ b/reference/shaders-msl/vert/leaf-function.for-tess.vert
@@ -0,0 +1,39 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct UBO
+{
+    float4x4 uMVP;
+};
+
+struct main0_out
+{
+    float3 vNormal;
+    float4 gl_Position;
+};
+
+struct main0_in
+{
+    float4 aVertex [[attribute(0)]];
+    float3 aNormal [[attribute(1)]];
+};
+
+static inline __attribute__((always_inline))
+void set_output(device float4& gl_Position, constant UBO& v_18, thread float4& aVertex, device float3& vNormal, thread float3& aNormal)
+{
+    gl_Position = v_18.uMVP * aVertex;
+    vNormal = aNormal;
+}
+
+kernel void main0(main0_in in [[stage_in]], constant UBO& v_18 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], device main0_out* spvOut [[buffer(28)]])
+{
+    device main0_out& out = spvOut[gl_GlobalInvocationID.y * spvStageInputSize.x + gl_GlobalInvocationID.x];
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    set_output(out.gl_Position, v_18, in.aVertex, out.vNormal, in.aNormal);
+}
+
diff --git a/reference/shaders-msl/vert/no-contraction.vert b/reference/shaders-msl/vert/no-contraction.vert
new file mode 100644
index 00000000000..26bef234e1f
--- /dev/null
+++ b/reference/shaders-msl/vert/no-contraction.vert
@@ -0,0 +1,92 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T>
+[[clang::optnone]] T spvFMul(T l, T r)
+{
+    return fma(l, r, T(0));
+}
+
+template<typename T, int Cols, int Rows>
+[[clang::optnone]] vec<T, Cols> spvFMulVectorMatrix(vec<T, Rows> v, matrix<T, Cols, Rows> m)
+{
+    vec<T, Cols> res = vec<T, Cols>(0);
+    for (uint i = Rows; i > 0; --i)
+    {
+        vec<T, Cols> tmp(0);
+        for (uint j = 0; j < Cols; ++j)
+        {
+            tmp[j] = m[j][i - 1];
+        }
+        res = fma(tmp, vec<T, Cols>(v[i - 1]), res);
+    }
+    return res;
+}
+
+template<typename T, int Cols, int Rows>
+[[clang::optnone]] vec<T, Rows> spvFMulMatrixVector(matrix<T, Cols, Rows> m, vec<T, Cols> v)
+{
+    vec<T, Rows> res = vec<T, Rows>(0);
+    for (uint i = Cols; i > 0; --i)
+    {
+        res = fma(m[i - 1], vec<T, Rows>(v[i - 1]), res);
+    }
+    return res;
+}
+
+template<typename T, int LCols, int LRows, int RCols, int RRows>
+[[clang::optnone]] matrix<T, RCols, LRows> spvFMulMatrixMatrix(matrix<T, LCols, LRows> l, matrix<T, RCols, RRows> r)
+{
+    matrix<T, RCols, LRows> res;
+    for (uint i = 0; i < RCols; i++)
+    {
+        vec<T, RCols> tmp(0);
+        for (uint j = 0; j < LCols; j++)
+        {
+            tmp = fma(vec<T, RCols>(r[i][j]), l[j], tmp);
+        }
+        res[i] = tmp;
+    }
+    return res;
+}
+
+template<typename T>
+[[clang::optnone]] T spvFAdd(T l, T r)
+{
+    return fma(T(1), l, r);
+}
+
+template<typename T>
+[[clang::optnone]] T spvFSub(T l, T r)
+{
+    return fma(T(-1), r, l);
+}
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vA [[attribute(0)]];
+    float4 vB [[attribute(1)]];
+    float4 vC [[attribute(2)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float4 mul = spvFMul(in.vA, in.vB);
+    float4 add = spvFAdd(in.vA, in.vB);
+    float4 sub = spvFSub(in.vA, in.vB);
+    float4 mad = spvFAdd(spvFMul(in.vA, in.vB), in.vC);
+    float4 summed = spvFAdd(spvFAdd(spvFAdd(mul, add), sub), mad);
+    out.gl_Position = summed;
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/no-disable-vertex-out.frag-output.vert b/reference/shaders-msl/vert/no-disable-vertex-out.frag-output.vert
new file mode 100644
index 00000000000..14cc94937c0
--- /dev/null
+++ b/reference/shaders-msl/vert/no-disable-vertex-out.frag-output.vert
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct buf
+{
+    float4x4 MVP;
+    float4 position[36];
+    float4 attr[36];
+};
+
+struct main0_out
+{
+    float4 texcoord [[user(locn0)]];
+    float3 frag_pos [[user(locn1)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0(constant buf& ubuf [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    out.texcoord = ubuf.attr[int(gl_VertexIndex)];
+    out.gl_Position = ubuf.MVP * ubuf.position[int(gl_VertexIndex)];
+    out.frag_pos = out.gl_Position.xyz;
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/no_stage_out.for-tess.vert b/reference/shaders-msl/vert/no_stage_out.for-tess.vert
new file mode 100644
index 00000000000..984e83260aa
--- /dev/null
+++ b/reference/shaders-msl/vert/no_stage_out.for-tess.vert
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct _RESERVED_IDENTIFIER_FIXUP_10_12
+{
+    uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024];
+};
+
+struct main0_in
+{
+    uint4 _RESERVED_IDENTIFIER_FIXUP_19 [[attribute(0)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], device _RESERVED_IDENTIFIER_FIXUP_10_12& _RESERVED_IDENTIFIER_FIXUP_12 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 spvStageInputSize [[grid_size]], uint3 spvDispatchBase [[grid_origin]])
+{
+    if (any(gl_GlobalInvocationID >= spvStageInputSize))
+        return;
+    uint gl_VertexIndex = gl_GlobalInvocationID.x + spvDispatchBase.x;
+    _RESERVED_IDENTIFIER_FIXUP_12._RESERVED_IDENTIFIER_FIXUP_m0[int(gl_VertexIndex)] = in._RESERVED_IDENTIFIER_FIXUP_19;
+}
+
diff --git a/reference/shaders-msl/vert/no_stage_out.vert b/reference/shaders-msl/vert/no_stage_out.vert
index 28098ee88e6..e804da67535 100644
--- a/reference/shaders-msl/vert/no_stage_out.vert
+++ b/reference/shaders-msl/vert/no_stage_out.vert
@@ -3,18 +3,18 @@
 
 using namespace metal;
 
-struct _10
+struct _RESERVED_IDENTIFIER_FIXUP_10_12
 {
-    uint4 _m0[1024];
+    uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024];
 };
 
 struct main0_in
 {
-    uint4 m_19 [[attribute(0)]];
+    uint4 _RESERVED_IDENTIFIER_FIXUP_19 [[attribute(0)]];
 };
 
-vertex void main0(main0_in in [[stage_in]], device _10& _12 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
+vertex void main0(main0_in in [[stage_in]], device _RESERVED_IDENTIFIER_FIXUP_10_12& _RESERVED_IDENTIFIER_FIXUP_12 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
 {
-    _12._m0[gl_VertexIndex] = in.m_19;
+    _RESERVED_IDENTIFIER_FIXUP_12._RESERVED_IDENTIFIER_FIXUP_m0[int(gl_VertexIndex)] = in._RESERVED_IDENTIFIER_FIXUP_19;
 }
 
diff --git a/reference/shaders-msl/vert/no_stage_out.write_buff.vert b/reference/shaders-msl/vert/no_stage_out.write_buff.vert
index 23fa0817c33..fb8060f0722 100644
--- a/reference/shaders-msl/vert/no_stage_out.write_buff.vert
+++ b/reference/shaders-msl/vert/no_stage_out.write_buff.vert
@@ -3,14 +3,14 @@
 
 using namespace metal;
 
-struct _35
+struct _RESERVED_IDENTIFIER_FIXUP_33_35
 {
-    uint4 _m0[1024];
+    uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024];
 };
 
-struct _40
+struct _RESERVED_IDENTIFIER_FIXUP_38_40
 {
-    uint4 _m0[1024];
+    uint4 _RESERVED_IDENTIFIER_FIXUP_m0[1024];
 };
 
 struct main0_out
@@ -20,16 +20,16 @@ struct main0_out
 
 struct main0_in
 {
-    float4 m_17 [[attribute(0)]];
+    float4 _RESERVED_IDENTIFIER_FIXUP_14 [[attribute(0)]];
 };
 
-vertex void main0(main0_in in [[stage_in]], device _35& _37 [[buffer(0)]], constant _40& _42 [[buffer(1)]])
+vertex void main0(main0_in in [[stage_in]], device _RESERVED_IDENTIFIER_FIXUP_33_35& _RESERVED_IDENTIFIER_FIXUP_35 [[buffer(0)]], constant _RESERVED_IDENTIFIER_FIXUP_38_40& _RESERVED_IDENTIFIER_FIXUP_40 [[buffer(1)]])
 {
     main0_out out = {};
-    out.gl_Position = in.m_17;
-    for (int _22 = 0; _22 < 1024; _22++)
+    out.gl_Position = in._RESERVED_IDENTIFIER_FIXUP_14;
+    for (int _RESERVED_IDENTIFIER_FIXUP_19 = 0; _RESERVED_IDENTIFIER_FIXUP_19 < 1024; _RESERVED_IDENTIFIER_FIXUP_19++)
     {
-        _37._m0[_22] = _42._m0[_22];
+        _RESERVED_IDENTIFIER_FIXUP_35._RESERVED_IDENTIFIER_FIXUP_m0[_RESERVED_IDENTIFIER_FIXUP_19] = _RESERVED_IDENTIFIER_FIXUP_40._RESERVED_IDENTIFIER_FIXUP_m0[_RESERVED_IDENTIFIER_FIXUP_19];
     }
 }
 
diff --git a/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert b/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert
index 9fe99e29fe1..68c649ed6c0 100644
--- a/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert
+++ b/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert
@@ -6,9 +6,9 @@
 
 using namespace metal;
 
-struct _23
+struct _RESERVED_IDENTIFIER_FIXUP_19_21
 {
-    uint _m0;
+    uint _RESERVED_IDENTIFIER_FIXUP_m0;
 };
 
 struct main0_out
@@ -18,14 +18,14 @@ struct main0_out
 
 struct main0_in
 {
-    float4 m_17 [[attribute(0)]];
+    float4 _RESERVED_IDENTIFIER_FIXUP_14 [[attribute(0)]];
 };
 
-vertex void main0(main0_in in [[stage_in]], device _23& _25 [[buffer(0)]])
+vertex void main0(main0_in in [[stage_in]], volatile device _RESERVED_IDENTIFIER_FIXUP_19_21& _RESERVED_IDENTIFIER_FIXUP_21 [[buffer(0)]])
 {
     main0_out out = {};
-    out.gl_Position = in.m_17;
-    uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_25._m0, 1u, memory_order_relaxed);
-    uint _22 = _29;
+    out.gl_Position = in._RESERVED_IDENTIFIER_FIXUP_14;
+    uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_RESERVED_IDENTIFIER_FIXUP_21._RESERVED_IDENTIFIER_FIXUP_m0, 1u, memory_order_relaxed);
+    uint _RESERVED_IDENTIFIER_FIXUP_26 = _29;
 }
 
diff --git a/reference/shaders-msl/vert/no_stage_out.write_tex.vert b/reference/shaders-msl/vert/no_stage_out.write_tex.vert
index 76eb6f24fbf..dfe1c328d9e 100644
--- a/reference/shaders-msl/vert/no_stage_out.write_tex.vert
+++ b/reference/shaders-msl/vert/no_stage_out.write_tex.vert
@@ -10,16 +10,16 @@ struct main0_out
 
 struct main0_in
 {
-    float4 m_17 [[attribute(0)]];
+    float4 _RESERVED_IDENTIFIER_FIXUP_14 [[attribute(0)]];
 };
 
-vertex void main0(main0_in in [[stage_in]], texture1d<uint, access::write> _34 [[texture(0)]], texture1d<uint> _37 [[texture(1)]])
+vertex void main0(main0_in in [[stage_in]], texture1d<uint, access::write> _RESERVED_IDENTIFIER_FIXUP_32 [[texture(0)]], texture1d<uint> _RESERVED_IDENTIFIER_FIXUP_35 [[texture(1)]])
 {
     main0_out out = {};
-    out.gl_Position = in.m_17;
-    for (int _22 = 0; _22 < 128; _22++)
+    out.gl_Position = in._RESERVED_IDENTIFIER_FIXUP_14;
+    for (int _RESERVED_IDENTIFIER_FIXUP_19 = 0; _RESERVED_IDENTIFIER_FIXUP_19 < 128; _RESERVED_IDENTIFIER_FIXUP_19++)
     {
-        _34.write(_37.read(uint(_22)), uint(_22));
+        _RESERVED_IDENTIFIER_FIXUP_32.write(_RESERVED_IDENTIFIER_FIXUP_35.read(uint(_RESERVED_IDENTIFIER_FIXUP_19)), uint(_RESERVED_IDENTIFIER_FIXUP_19));
     }
 }
 
diff --git a/reference/shaders-msl/vert/out-block-with-nested-struct-array.vert b/reference/shaders-msl/vert/out-block-with-nested-struct-array.vert
new file mode 100644
index 00000000000..cabcfcb521d
--- /dev/null
+++ b/reference/shaders-msl/vert/out-block-with-nested-struct-array.vert
@@ -0,0 +1,88 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct t21
+{
+    float4 m0;
+    float4 m1;
+};
+
+struct t24
+{
+    spvUnsafeArray<t21, 3> m0;
+};
+
+struct main0_out
+{
+    float4 v26_m0_0_m0 [[user(locn0)]];
+    float4 v26_m0_0_m1 [[user(locn1)]];
+    float4 v26_m0_1_m0 [[user(locn2)]];
+    float4 v26_m0_1_m1 [[user(locn3)]];
+    float4 v26_m0_2_m0 [[user(locn4)]];
+    float4 v26_m0_2_m1 [[user(locn5)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 v17 [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    t24 v26 = {};
+    out.gl_Position = in.v17;
+    v26.m0[1].m1 = float4(-4.0, -9.0, 3.0, 7.0);
+    out.v26_m0_0_m0 = v26.m0[0].m0;
+    out.v26_m0_0_m1 = v26.m0[0].m1;
+    out.v26_m0_1_m0 = v26.m0[1].m0;
+    out.v26_m0_1_m1 = v26.m0[1].m1;
+    out.v26_m0_2_m0 = v26.m0[2].m0;
+    out.v26_m0_2_m1 = v26.m0[2].m1;
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/out-block-with-struct-array.vert b/reference/shaders-msl/vert/out-block-with-struct-array.vert
new file mode 100644
index 00000000000..61c7c18b54c
--- /dev/null
+++ b/reference/shaders-msl/vert/out-block-with-struct-array.vert
@@ -0,0 +1,83 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct t21
+{
+    float m0;
+    float4 m1;
+};
+
+struct main0_out
+{
+    float v25_0_m0 [[user(locn0)]];
+    float4 v25_0_m1 [[user(locn1)]];
+    float v25_1_m0 [[user(locn2)]];
+    float4 v25_1_m1 [[user(locn3)]];
+    float v25_2_m0 [[user(locn4)]];
+    float4 v25_2_m1 [[user(locn5)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 v17 [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<t21, 3> v25 = {};
+    out.gl_Position = in.v17;
+    v25[2].m1 = float4(-4.0, -9.0, 3.0, 7.0);
+    out.v25_0_m0 = v25[0].m0;
+    out.v25_0_m1 = v25[0].m1;
+    out.v25_1_m0 = v25[1].m0;
+    out.v25_1_m1 = v25[1].m1;
+    out.v25_2_m0 = v25[2].m0;
+    out.v25_2_m1 = v25[2].m1;
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/out_block.vert b/reference/shaders-msl/vert/out_block.vert
index 45b897013b1..909a059bd2c 100644
--- a/reference/shaders-msl/vert/out_block.vert
+++ b/reference/shaders-msl/vert/out_block.vert
@@ -16,8 +16,8 @@ struct VertexOut
 
 struct main0_out
 {
-    float4 VertexOut_color [[user(locn2)]];
-    float4 VertexOut_color2 [[user(locn3)]];
+    float4 outputs_color [[user(locn2)]];
+    float4 outputs_color2 [[user(locn3)]];
     float4 gl_Position [[position]];
 };
 
@@ -34,8 +34,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant Transform& block [[buf
     out.gl_Position = block.transform * float4(in.position, 1.0);
     outputs.color = in.color;
     outputs.color2 = in.color + float4(1.0);
-    out.VertexOut_color = outputs.color;
-    out.VertexOut_color2 = outputs.color2;
+    out.outputs_color = outputs.color;
+    out.outputs_color2 = outputs.color2;
     return out;
 }
 
diff --git a/reference/shaders-msl/vert/packed-bool-to-uint.vert b/reference/shaders-msl/vert/packed-bool-to-uint.vert
new file mode 100644
index 00000000000..6cc55204848
--- /dev/null
+++ b/reference/shaders-msl/vert/packed-bool-to-uint.vert
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Struct
+{
+    uint flags[1];
+};
+
+struct defaultUniformsVS
+{
+    Struct flags;
+    float4 uquad[4];
+    float4x4 umatrix;
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 a_position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _24 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    out.gl_Position = _24.umatrix * float4(_24.uquad[int(gl_VertexIndex)].x, _24.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w);
+    if (_24.flags.flags[0] != 0u)
+    {
+        out.gl_Position.z = 0.0;
+    }
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/packed-bool2-to-packed_uint2.vert b/reference/shaders-msl/vert/packed-bool2-to-packed_uint2.vert
new file mode 100644
index 00000000000..4c46aaeb4ea
--- /dev/null
+++ b/reference/shaders-msl/vert/packed-bool2-to-packed_uint2.vert
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct Struct
+{
+    uint2 flags[1];
+};
+
+struct defaultUniformsVS
+{
+    Struct flags;
+    float4 uquad[4];
+    float4x4 umatrix;
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 a_position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant defaultUniformsVS& _25 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    out.gl_Position = _25.umatrix * float4(_25.uquad[int(gl_VertexIndex)].x, _25.uquad[int(gl_VertexIndex)].y, in.a_position.z, in.a_position.w);
+    if (_25.flags.flags[0].x != 0u)
+    {
+        out.gl_Position.z = 0.0;
+    }
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/packed_matrix.vert b/reference/shaders-msl/vert/packed_matrix.vert
index db688115811..e18d5f225a7 100644
--- a/reference/shaders-msl/vert/packed_matrix.vert
+++ b/reference/shaders-msl/vert/packed_matrix.vert
@@ -3,55 +3,53 @@
 
 using namespace metal;
 
-typedef packed_float4 packed_rm_float4x3[3];
-
-struct _15
+struct _RESERVED_IDENTIFIER_FIXUP_1365_18812
 {
-    packed_rm_float4x3 _m0;
-    packed_rm_float4x3 _m1;
+    float3x4 _RESERVED_IDENTIFIER_FIXUP_m0;
+    float3x4 _RESERVED_IDENTIFIER_FIXUP_m1;
 };
 
-struct _42
+struct _RESERVED_IDENTIFIER_FIXUP_1126_22044
 {
-    float4x4 _m0;
-    float4x4 _m1;
-    float _m2;
+    float4x4 _RESERVED_IDENTIFIER_FIXUP_m0;
+    float4x4 _RESERVED_IDENTIFIER_FIXUP_m1;
+    float _RESERVED_IDENTIFIER_FIXUP_m9;
     char _m3_pad[12];
-    packed_float3 _m3;
-    float _m4;
-    packed_float3 _m5;
-    float _m6;
-    float _m7;
-    float _m8;
-    float2 _m9;
+    packed_float3 _RESERVED_IDENTIFIER_FIXUP_m10;
+    float _RESERVED_IDENTIFIER_FIXUP_m11;
+    packed_float3 _RESERVED_IDENTIFIER_FIXUP_m12;
+    float _RESERVED_IDENTIFIER_FIXUP_m17;
+    float _RESERVED_IDENTIFIER_FIXUP_m18;
+    float _RESERVED_IDENTIFIER_FIXUP_m19;
+    float2 _RESERVED_IDENTIFIER_FIXUP_m20;
 };
 
 struct main0_out
 {
-    float3 m_72 [[user(locn0)]];
+    float3 _RESERVED_IDENTIFIER_FIXUP_3976 [[user(locn0)]];
     float4 gl_Position [[position]];
 };
 
 struct main0_in
 {
-    float4 m_25 [[attribute(0)]];
+    float4 _RESERVED_IDENTIFIER_FIXUP_5275 [[attribute(0)]];
 };
 
-vertex main0_out main0(main0_in in [[stage_in]], constant _15& _17 [[buffer(0)]], constant _42& _44 [[buffer(1)]])
+vertex main0_out main0(main0_in in [[stage_in]], constant _RESERVED_IDENTIFIER_FIXUP_1365_18812& _RESERVED_IDENTIFIER_FIXUP_18812 [[buffer(0)]], constant _RESERVED_IDENTIFIER_FIXUP_1126_22044& _RESERVED_IDENTIFIER_FIXUP_22044 [[buffer(1)]])
 {
     main0_out out = {};
-    float3 _91;
-    float3 _13;
-    do
+    float3 _RESERVED_IDENTIFIER_FIXUP_2;
+    float3 _RESERVED_IDENTIFIER_FIXUP_23783;
+    for (;;)
     {
-        _13 = normalize(float4(in.m_25.xyz, 0.0) * float3x4(float4(_17._m1[0]), float4(_17._m1[1]), float4(_17._m1[2])));
+        _RESERVED_IDENTIFIER_FIXUP_23783 = fast::normalize(float4(in._RESERVED_IDENTIFIER_FIXUP_5275.xyz, 0.0) * _RESERVED_IDENTIFIER_FIXUP_18812._RESERVED_IDENTIFIER_FIXUP_m1);
         break;
-    } while (false);
-    float4 _39 = _44._m0 * float4(float3(_44._m3) + (in.m_25.xyz * (_44._m6 + _44._m7)), 1.0);
-    out.m_72 = _13;
-    float4 _74 = _39;
-    _74.y = -_39.y;
-    out.gl_Position = _74;
+    }
+    float4 _RESERVED_IDENTIFIER_FIXUP_14995 = _RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m0 * float4(float3(_RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m10) + (in._RESERVED_IDENTIFIER_FIXUP_5275.xyz * (_RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m17 + _RESERVED_IDENTIFIER_FIXUP_22044._RESERVED_IDENTIFIER_FIXUP_m18)), 1.0);
+    out._RESERVED_IDENTIFIER_FIXUP_3976 = _RESERVED_IDENTIFIER_FIXUP_23783;
+    float4 _RESERVED_IDENTIFIER_FIXUP_6282 = _RESERVED_IDENTIFIER_FIXUP_14995;
+    _RESERVED_IDENTIFIER_FIXUP_6282.y = -_RESERVED_IDENTIFIER_FIXUP_14995.y;
+    out.gl_Position = _RESERVED_IDENTIFIER_FIXUP_6282;
     return out;
 }
 
diff --git a/reference/shaders-msl/vert/read-from-row-major-array.vert b/reference/shaders-msl/vert/read-from-row-major-array.vert
index 9a633c5fe63..d6ade7c38b9 100644
--- a/reference/shaders-msl/vert/read-from-row-major-array.vert
+++ b/reference/shaders-msl/vert/read-from-row-major-array.vert
@@ -7,7 +7,7 @@ using namespace metal;
 
 struct Block
 {
-    float2x3 var[3][4];
+    float3x4 var[3][4];
 };
 
 struct main0_out
@@ -21,17 +21,13 @@ struct main0_in
     float4 a_position [[attribute(0)]];
 };
 
-// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.
-float2x3 spvConvertFromRowMajor2x3(float2x3 m)
-{
-    return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));
-}
-
+static inline __attribute__((always_inline))
 float compare_float(thread const float& a, thread const float& b)
 {
     return float(abs(a - b) < 0.0500000007450580596923828125);
 }
 
+static inline __attribute__((always_inline))
 float compare_vec3(thread const float3& a, thread const float3& b)
 {
     float param = a.x;
@@ -43,6 +39,7 @@ float compare_vec3(thread const float3& a, thread const float3& b)
     return (compare_float(param, param_1) * compare_float(param_2, param_3)) * compare_float(param_4, param_5);
 }
 
+static inline __attribute__((always_inline))
 float compare_mat2x3(thread const float2x3& a, thread const float2x3& b)
 {
     float3 param = a[0];
@@ -57,7 +54,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant Block& _104 [[buffer(0
     main0_out out = {};
     out.gl_Position = in.a_position;
     float result = 1.0;
-    float2x3 param = spvConvertFromRowMajor2x3(_104.var[0][0]);
+    float2x3 param = transpose(float3x2(_104.var[0][0][0].xy, _104.var[0][0][1].xy, _104.var[0][0][2].xy));
     float2x3 param_1 = float2x3(float3(2.0, 6.0, -6.0), float3(0.0, 5.0, 5.0));
     result *= compare_mat2x3(param, param_1);
     out.v_vtxResult = result;
diff --git a/reference/shaders-msl/vert/resource-arrays-leaf.ios.vert b/reference/shaders-msl/vert/resource-arrays-leaf.ios.vert
index 97e97e6fd29..fad06d6afec 100644
--- a/reference/shaders-msl/vert/resource-arrays-leaf.ios.vert
+++ b/reference/shaders-msl/vert/resource-arrays-leaf.ios.vert
@@ -22,7 +22,8 @@ struct constant_block
 #endif
 constant int arraySize = SPIRV_CROSS_CONSTANT_ID_0;
 
-void doWork(device storage_block* (&storage)[2], constant constant_block* (&constants)[4], thread const array<texture2d<int>, 3> images)
+static inline __attribute__((always_inline))
+void doWork(device storage_block* (&storage)[2], constant constant_block* (&constants)[4], thread const array<texture2d<int>, 3>& images)
 {
     storage[0]->baz = uint4(constants[3]->foo);
     storage[1]->quux = images[2].read(uint2(int2(constants[1]->bar))).xy;
diff --git a/reference/shaders-msl/vert/return-array.force-native-array.vert b/reference/shaders-msl/vert/return-array.force-native-array.vert
new file mode 100644
index 00000000000..4793b4aca9e
--- /dev/null
+++ b/reference/shaders-msl/vert/return-array.force-native-array.vert
@@ -0,0 +1,154 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToDevice1(device T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromConstantToDevice1(device T (&dst)[A], constant T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromStackToDevice1(device T (&dst)[A], thread const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromThreadGroupToDevice1(device T (&dst)[A], threadgroup const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToStack1(thread T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T, uint A>
+inline void spvArrayCopyFromDeviceToThreadGroup1(threadgroup T (&dst)[A], device const T (&src)[A])
+{
+    for (uint i = 0; i < A; i++)
+    {
+        dst[i] = src[i];
+    }
+}
+
+constant float4 _20[2] = { float4(10.0), float4(20.0) };
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vInput0 [[attribute(0)]];
+    float4 vInput1 [[attribute(1)]];
+};
+
+static inline __attribute__((always_inline))
+void test(thread float4 (&spvReturnValue)[2])
+{
+    spvArrayCopyFromConstantToStack1(spvReturnValue, _20);
+}
+
+static inline __attribute__((always_inline))
+void test2(thread float4 (&spvReturnValue)[2], thread float4& vInput0, thread float4& vInput1)
+{
+    float4 foobar[2];
+    foobar[0] = vInput0;
+    foobar[1] = vInput1;
+    spvArrayCopyFromStackToStack1(spvReturnValue, foobar);
+}
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    float4 _42[2];
+    test(_42);
+    float4 _44[2];
+    test2(_44, in.vInput0, in.vInput1);
+    out.gl_Position = _42[0] + _44[1];
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/return-array.vert b/reference/shaders-msl/vert/return-array.vert
index cd06fddaa80..dacb0ba3053 100644
--- a/reference/shaders-msl/vert/return-array.vert
+++ b/reference/shaders-msl/vert/return-array.vert
@@ -1,11 +1,50 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
 
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
-constant float4 _20[2] = { float4(10.0), float4(20.0) };
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+constant spvUnsafeArray<float4, 2> _20 = spvUnsafeArray<float4, 2>({ float4(10.0), float4(20.0) });
 
 struct main0_out
 {
@@ -18,40 +57,25 @@ struct main0_in
     float4 vInput1 [[attribute(1)]];
 };
 
-// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
-template<typename T, uint N>
-void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-template<typename T, uint N>
-void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
-{
-    for (uint i = 0; i < N; dst[i] = src[i], i++);
-}
-
-void test(thread float4 (&SPIRV_Cross_return_value)[2])
+static inline __attribute__((always_inline))
+spvUnsafeArray<float4, 2> test()
 {
-    spvArrayCopyFromConstant1(SPIRV_Cross_return_value, _20);
+    return _20;
 }
 
-void test2(thread float4 (&SPIRV_Cross_return_value)[2], thread float4& vInput0, thread float4& vInput1)
+static inline __attribute__((always_inline))
+spvUnsafeArray<float4, 2> test2(thread float4& vInput0, thread float4& vInput1)
 {
-    float4 foobar[2];
+    spvUnsafeArray<float4, 2> foobar;
     foobar[0] = vInput0;
     foobar[1] = vInput1;
-    spvArrayCopyFromStack1(SPIRV_Cross_return_value, foobar);
+    return foobar;
 }
 
 vertex main0_out main0(main0_in in [[stage_in]])
 {
     main0_out out = {};
-    float4 _42[2];
-    test(_42);
-    float4 _44[2];
-    test2(_44, in.vInput0, in.vInput1);
-    out.gl_Position = _42[0] + _44[1];
+    out.gl_Position = test()[0] + test2(in.vInput0, in.vInput1)[1];
     return out;
 }
 
diff --git a/reference/shaders-msl/vert/set_builtin_in_func.vert b/reference/shaders-msl/vert/set_builtin_in_func.vert
index 2952748dc00..91057da2ba9 100644
--- a/reference/shaders-msl/vert/set_builtin_in_func.vert
+++ b/reference/shaders-msl/vert/set_builtin_in_func.vert
@@ -11,6 +11,7 @@ struct main0_out
     float gl_PointSize [[point_size]];
 };
 
+static inline __attribute__((always_inline))
 void write_outblock(thread float4& gl_Position, thread float& gl_PointSize)
 {
     gl_PointSize = 1.0;
diff --git a/reference/shaders-msl/vert/sign-int-types.vert b/reference/shaders-msl/vert/sign-int-types.vert
index 2f518b12911..f5f647d4589 100644
--- a/reference/shaders-msl/vert/sign-int-types.vert
+++ b/reference/shaders-msl/vert/sign-int-types.vert
@@ -5,6 +5,13 @@
 
 using namespace metal;
 
+// Implementation of the GLSL sign() function for integer types
+template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>
+inline T sign(T x)
+{
+    return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));
+}
+
 struct UBO
 {
     float4x4 uMVP;
@@ -36,13 +43,6 @@ struct main0_in
     float4 aVertex [[attribute(0)]];
 };
 
-// Implementation of the GLSL sign() function for integer types
-template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>
-T sign(T x)
-{
-    return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));
-}
-
 vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/vert/signedness-mismatch.shader-inputs.vert b/reference/shaders-msl/vert/signedness-mismatch.shader-inputs.vert
new file mode 100644
index 00000000000..56e00199cb1
--- /dev/null
+++ b/reference/shaders-msl/vert/signedness-mismatch.shader-inputs.vert
@@ -0,0 +1,74 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    ushort2 a [[attribute(0)]];
+    uint3 b [[attribute(1)]];
+    ushort c_0 [[attribute(2)]];
+    ushort c_1 [[attribute(3)]];
+    uint4 d_0 [[attribute(4)]];
+    uint4 d_1 [[attribute(5)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    spvUnsafeArray<ushort, 2> c = {};
+    spvUnsafeArray<uint4, 2> d = {};
+    c[0] = in.c_0;
+    c[1] = in.c_1;
+    d[0] = in.d_0;
+    d[1] = in.d_1;
+    out.gl_Position = float4(float(int(short(in.a.x))), float(int(in.b.x)), float(uint(c[1])), float(d[0].w));
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/texture_buffer.vert b/reference/shaders-msl/vert/texture_buffer.vert
index ee3956fad84..9d8b5c49f02 100644
--- a/reference/shaders-msl/vert/texture_buffer.vert
+++ b/reference/shaders-msl/vert/texture_buffer.vert
@@ -5,17 +5,18 @@
 
 using namespace metal;
 
-struct main0_out
-{
-    float4 gl_Position [[position]];
-};
-
 // Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
 uint2 spvTexelBufferCoord(uint tc)
 {
     return uint2(tc % 4096, tc / 4096);
 }
 
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
 vertex main0_out main0(texture2d<float> uSamp [[texture(0)]], texture2d<float> uSampo [[texture(1)]])
 {
     main0_out out = {};
diff --git a/reference/shaders-msl/vert/uniform-struct-out-of-order-offests.vert b/reference/shaders-msl/vert/uniform-struct-out-of-order-offests.vert
new file mode 100644
index 00000000000..4f71b205570
--- /dev/null
+++ b/reference/shaders-msl/vert/uniform-struct-out-of-order-offests.vert
@@ -0,0 +1,35 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct data_u_t
+{
+    int4 m1[3];
+    uint m3;
+    uint3 m2;
+    int4 m0[8];
+};
+
+struct main0_out
+{
+    float foo [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vtx_posn [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant data_u_t& data_u [[buffer(0)]])
+{
+    main0_out out = {};
+    out.gl_Position = in.vtx_posn;
+    int4 a = data_u.m1[1];
+    uint3 b = data_u.m2;
+    int c = data_u.m0[4].x;
+    out.foo = float((uint3(a.xyz) + b).y * uint(c));
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/uniform-struct-packing-nested.vert b/reference/shaders-msl/vert/uniform-struct-packing-nested.vert
new file mode 100644
index 00000000000..bfcae2a56c7
--- /dev/null
+++ b/reference/shaders-msl/vert/uniform-struct-packing-nested.vert
@@ -0,0 +1,57 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+typedef packed_float4 packed_rm_float4x4[4];
+
+struct s0
+{
+    float3x4 m0;
+    packed_int4 m1;
+    packed_rm_float4x4 m2;
+    packed_uint2 m3;
+};
+
+struct s1
+{
+    float4x4 m0;
+    int m1;
+    char _m2_pad[12];
+    packed_uint3 m2;
+    s0 m3;
+};
+
+struct data_u_t
+{
+    float4 m1[5];
+    float2x4 m3;
+    int4 m4;
+    s1 m2;
+    float3x4 m0;
+};
+
+struct main0_out
+{
+    float foo [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 vtx_posn [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant data_u_t& data_u [[buffer(0)]])
+{
+    main0_out out = {};
+    out.gl_Position = in.vtx_posn;
+    float2 a = data_u.m1[3].xy;
+    int4 b = data_u.m4;
+    float2x3 c = transpose(float3x2(data_u.m0[0].xy, data_u.m0[1].xy, data_u.m0[2].xy));
+    float3x4 d = transpose(float4x3(data_u.m2.m0[0].xyz, data_u.m2.m0[1].xyz, data_u.m2.m0[2].xyz, data_u.m2.m0[3].xyz));
+    float4x4 e = transpose(float4x4(float4(data_u.m2.m3.m2[0]), float4(data_u.m2.m3.m2[1]), float4(data_u.m2.m3.m2[2]), float4(data_u.m2.m3.m2[3])));
+    out.foo = (((a.y + float(b.z)) * c[1].z) * d[2].w) * e[3].w;
+    return out;
+}
+
diff --git a/reference/shaders-msl/vert/unused-position.vert b/reference/shaders-msl/vert/unused-position.vert
new file mode 100644
index 00000000000..7dc4672139c
--- /dev/null
+++ b/reference/shaders-msl/vert/unused-position.vert
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    float gl_PointSize [[point_size]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    out.gl_PointSize = 1.0;
+    return out;
+}
+
diff --git a/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp b/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
deleted file mode 100644
index 4ebab8c7346..00000000000
--- a/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
+++ /dev/null
@@ -1,146 +0,0 @@
-#pragma clang diagnostic ignored "-Wmissing-prototypes"
-
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct SSBO
-{
-    float FragColor;
-};
-
-inline uint4 spvSubgroupBallot(bool value)
-{
-    simd_vote vote = simd_ballot(value);
-    // simd_ballot() returns a 64-bit integer-like object, but
-    // SPIR-V callers expect a uint4. We must convert.
-    // FIXME: This won't include higher bits if Apple ever supports
-    // 128 lanes in an SIMD-group.
-    return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
-}
-
-inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
-{
-    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
-}
-
-inline uint spvSubgroupBallotFindLSB(uint4 ballot)
-{
-    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
-}
-
-inline uint spvSubgroupBallotFindMSB(uint4 ballot)
-{
-    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
-}
-
-inline uint spvSubgroupBallotBitCount(uint4 ballot)
-{
-    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
-}
-
-inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
-{
-    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
-    return spvSubgroupBallotBitCount(ballot & mask);
-}
-
-inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
-{
-    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
-    return spvSubgroupBallotBitCount(ballot & mask);
-}
-
-template<typename T>
-inline bool spvSubgroupAllEqual(T value)
-{
-    return simd_all(value == simd_broadcast_first(value));
-}
-
-template<>
-inline bool spvSubgroupAllEqual(bool value)
-{
-    return simd_all(value) || !simd_any(value);
-}
-
-kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
-{
-    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
-    uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
-    uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
-    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
-    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
-    _9.FragColor = float(gl_NumSubgroups);
-    _9.FragColor = float(gl_SubgroupID);
-    _9.FragColor = float(gl_SubgroupSize);
-    _9.FragColor = float(gl_SubgroupInvocationID);
-    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
-    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
-    simdgroup_barrier(mem_flags::mem_device);
-    simdgroup_barrier(mem_flags::mem_threadgroup);
-    simdgroup_barrier(mem_flags::mem_texture);
-    bool elected = simd_is_first();
-    _9.FragColor = float4(gl_SubgroupEqMask).x;
-    _9.FragColor = float4(gl_SubgroupGeMask).x;
-    _9.FragColor = float4(gl_SubgroupGtMask).x;
-    _9.FragColor = float4(gl_SubgroupLeMask).x;
-    _9.FragColor = float4(gl_SubgroupLtMask).x;
-    float4 broadcasted = simd_broadcast(float4(10.0), 8u);
-    float3 first = simd_broadcast_first(float3(20.0));
-    uint4 ballot_value = spvSubgroupBallot(true);
-    bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
-    bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
-    uint bit_count = spvSubgroupBallotBitCount(ballot_value);
-    uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
-    uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
-    uint lsb = spvSubgroupBallotFindLSB(ballot_value);
-    uint msb = spvSubgroupBallotFindMSB(ballot_value);
-    uint shuffled = simd_shuffle(10u, 8u);
-    uint shuffled_xor = simd_shuffle_xor(30u, 8u);
-    uint shuffled_up = simd_shuffle_up(20u, 4u);
-    uint shuffled_down = simd_shuffle_down(20u, 4u);
-    bool has_all = simd_all(true);
-    bool has_any = simd_any(true);
-    bool has_equal = spvSubgroupAllEqual(0);
-    has_equal = spvSubgroupAllEqual(true);
-    float4 added = simd_sum(float4(20.0));
-    int4 iadded = simd_sum(int4(20));
-    float4 multiplied = simd_product(float4(20.0));
-    int4 imultiplied = simd_product(int4(20));
-    float4 lo = simd_min(float4(20.0));
-    float4 hi = simd_max(float4(20.0));
-    int4 slo = simd_min(int4(20));
-    int4 shi = simd_max(int4(20));
-    uint4 ulo = simd_min(uint4(20u));
-    uint4 uhi = simd_max(uint4(20u));
-    uint4 anded = simd_and(ballot_value);
-    uint4 ored = simd_or(ballot_value);
-    uint4 xored = simd_xor(ballot_value);
-    added = simd_prefix_inclusive_sum(added);
-    iadded = simd_prefix_inclusive_sum(iadded);
-    multiplied = simd_prefix_inclusive_product(multiplied);
-    imultiplied = simd_prefix_inclusive_product(imultiplied);
-    added = simd_prefix_exclusive_sum(multiplied);
-    multiplied = simd_prefix_exclusive_product(multiplied);
-    iadded = simd_prefix_exclusive_sum(imultiplied);
-    imultiplied = simd_prefix_exclusive_product(imultiplied);
-    added = quad_sum(added);
-    multiplied = quad_product(multiplied);
-    iadded = quad_sum(iadded);
-    imultiplied = quad_product(imultiplied);
-    lo = quad_min(lo);
-    hi = quad_max(hi);
-    ulo = quad_min(ulo);
-    uhi = quad_max(uhi);
-    slo = quad_min(slo);
-    shi = quad_max(shi);
-    anded = quad_and(anded);
-    ored = quad_or(ored);
-    xored = quad_xor(xored);
-    float4 swap_horiz = quad_shuffle_xor(float4(20.0), 1u);
-    float4 swap_vertical = quad_shuffle_xor(float4(20.0), 2u);
-    float4 swap_diagonal = quad_shuffle_xor(float4(20.0), 3u);
-    float4 quad_broadcast0 = quad_broadcast(float4(20.0), 3u);
-}
-
diff --git a/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp b/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp
deleted file mode 100644
index 84fcb9c3a92..00000000000
--- a/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp
+++ /dev/null
@@ -1,31 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct SSBO
-{
-    float FragColor;
-};
-
-kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[quadgroups_per_threadgroup]], uint gl_SubgroupID [[quadgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_quadgroup]])
-{
-    _9.FragColor = float(gl_NumSubgroups);
-    _9.FragColor = float(gl_SubgroupID);
-    _9.FragColor = float(gl_SubgroupSize);
-    _9.FragColor = float(gl_SubgroupInvocationID);
-    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
-    simdgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup | mem_flags::mem_texture);
-    simdgroup_barrier(mem_flags::mem_device);
-    simdgroup_barrier(mem_flags::mem_threadgroup);
-    simdgroup_barrier(mem_flags::mem_texture);
-    uint shuffled = quad_shuffle(10u, 8u);
-    uint shuffled_xor = quad_shuffle_xor(30u, 8u);
-    uint shuffled_up = quad_shuffle_up(20u, 4u);
-    uint shuffled_down = quad_shuffle_down(20u, 4u);
-    float4 swap_horiz = quad_shuffle_xor(float4(20.0), 1u);
-    float4 swap_vertical = quad_shuffle_xor(float4(20.0), 2u);
-    float4 swap_diagonal = quad_shuffle_xor(float4(20.0), 3u);
-    float4 quad_broadcast0 = quad_broadcast(float4(20.0), 3u);
-}
-
diff --git a/reference/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag b/reference/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag
new file mode 100644
index 00000000000..f0935f6dcf4
--- /dev/null
+++ b/reference/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag
@@ -0,0 +1,73 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+    float2 vTex_0 [[user(locn1)]];
+    float2 vTex_1 [[user(locn2)]];
+    float2 vTex_2 [[user(locn3)]];
+    float2 vTex_3 [[user(locn4)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float2, 4> vTex = {};
+    vTex[0] = in.vTex_0;
+    vTex[1] = in.vTex_1;
+    vTex[2] = in.vTex_2;
+    vTex[3] = in.vTex_3;
+    const uint gl_ViewIndex = spvViewMask[0];
+    out.FragColor = in.vColor * uTex.sample(uTexSmplr, vTex[int(gl_ViewIndex)]);
+    return out;
+}
+
diff --git a/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag b/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
index 23c554940b8..67895e3e92c 100644
--- a/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
+++ b/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
@@ -1,8 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
 #include <metal_stdlib>
 #include <simd/simd.h>
 
 using namespace metal;
 
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
 struct main0_out
 {
     float4 FragColor [[color(0)]];
@@ -20,7 +61,7 @@ struct main0_in
 fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]])
 {
     main0_out out = {};
-    float2 vTex[4] = {};
+    spvUnsafeArray<float2, 4> vTex = {};
     vTex[0] = in.vTex_0;
     vTex[1] = in.vTex_1;
     vTex[2] = in.vTex_2;
diff --git a/reference/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag b/reference/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag
new file mode 100644
index 00000000000..274cea2de15
--- /dev/null
+++ b/reference/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag
@@ -0,0 +1,24 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    bool gl_HelperInvocation = {};
+    gl_HelperInvocation = simd_is_helper_thread();
+    bool _15 = gl_HelperInvocation;
+    gl_HelperInvocation = true, discard_fragment();
+    if (!_15)
+    {
+        out.FragColor = float4(1.0, 0.0, 0.0, 1.0);
+    }
+    return out;
+}
+
diff --git a/reference/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag b/reference/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag
new file mode 100644
index 00000000000..7af77d91f96
--- /dev/null
+++ b/reference/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag
@@ -0,0 +1,28 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+static inline __attribute__((always_inline))
+void foo(thread bool& gl_HelperInvocation)
+{
+    gl_HelperInvocation = true, discard_fragment();
+}
+
+static inline __attribute__((always_inline))
+void bar(thread bool& gl_HelperInvocation)
+{
+    bool _13 = gl_HelperInvocation;
+    bool helper = _13;
+}
+
+fragment void main0()
+{
+    bool gl_HelperInvocation = {};
+    gl_HelperInvocation = simd_is_helper_thread();
+    foo(gl_HelperInvocation);
+    bar(gl_HelperInvocation);
+}
+
diff --git a/reference/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag b/reference/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag
new file mode 100644
index 00000000000..ad3734bdbe8
--- /dev/null
+++ b/reference/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag
@@ -0,0 +1,14 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+fragment void main0()
+{
+    bool gl_HelperInvocation = {};
+    gl_HelperInvocation = simd_is_helper_thread();
+    gl_HelperInvocation = true, discard_fragment();
+    bool _9 = gl_HelperInvocation;
+    bool helper = _9;
+}
+
diff --git a/reference/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag b/reference/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
deleted file mode 100644
index ec25d067872..00000000000
--- a/reference/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct UBO
-{
-    float a[1];
-    float2 b[2];
-};
-
-struct UBOEnhancedLayout
-{
-    float c[1];
-    float2 d[2];
-    char _m2_pad[9976];
-    float e;
-};
-
-struct main0_out
-{
-    float FragColor [[color(0)]];
-};
-
-struct main0_in
-{
-    int vIndex [[user(locn0)]];
-};
-
-fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _17 [[buffer(0)]], constant UBOEnhancedLayout& _30 [[buffer(1)]])
-{
-    main0_out out = {};
-    out.FragColor = (_17.a[in.vIndex] + _30.c[in.vIndex]) + _30.e;
-    return out;
-}
-
diff --git a/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag b/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
deleted file mode 100644
index affaf86d544..00000000000
--- a/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
+++ /dev/null
@@ -1,143 +0,0 @@
-#pragma clang diagnostic ignored "-Wmissing-prototypes"
-
-#include <metal_stdlib>
-#include <simd/simd.h>
-
-using namespace metal;
-
-struct main0_out
-{
-    float FragColor [[color(0)]];
-};
-
-inline uint4 spvSubgroupBallot(bool value)
-{
-    simd_vote vote = simd_ballot(value);
-    // simd_ballot() returns a 64-bit integer-like object, but
-    // SPIR-V callers expect a uint4. We must convert.
-    // FIXME: This won't include higher bits if Apple ever supports
-    // 128 lanes in an SIMD-group.
-    return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
-}
-
-inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
-{
-    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
-}
-
-inline uint spvSubgroupBallotFindLSB(uint4 ballot)
-{
-    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
-}
-
-inline uint spvSubgroupBallotFindMSB(uint4 ballot)
-{
-    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
-}
-
-inline uint spvSubgroupBallotBitCount(uint4 ballot)
-{
-    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
-}
-
-inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
-{
-    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
-    return spvSubgroupBallotBitCount(ballot & mask);
-}
-
-inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
-{
-    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
-    return spvSubgroupBallotBitCount(ballot & mask);
-}
-
-template<typename T>
-inline bool spvSubgroupAllEqual(T value)
-{
-    return simd_all(value == simd_broadcast_first(value));
-}
-
-template<>
-inline bool spvSubgroupAllEqual(bool value)
-{
-    return simd_all(value) || !simd_any(value);
-}
-
-fragment main0_out main0()
-{
-    main0_out out = {};
-    uint gl_SubgroupSize = simd_sum(1);
-    uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1);
-    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
-    uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
-    uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
-    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
-    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
-    out.FragColor = float(gl_SubgroupSize);
-    out.FragColor = float(gl_SubgroupInvocationID);
-    bool elected = simd_is_first();
-    out.FragColor = float4(gl_SubgroupEqMask).x;
-    out.FragColor = float4(gl_SubgroupGeMask).x;
-    out.FragColor = float4(gl_SubgroupGtMask).x;
-    out.FragColor = float4(gl_SubgroupLeMask).x;
-    out.FragColor = float4(gl_SubgroupLtMask).x;
-    float4 broadcasted = simd_broadcast(float4(10.0), 8u);
-    float3 first = simd_broadcast_first(float3(20.0));
-    uint4 ballot_value = spvSubgroupBallot(true);
-    bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
-    bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
-    uint bit_count = spvSubgroupBallotBitCount(ballot_value);
-    uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
-    uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
-    uint lsb = spvSubgroupBallotFindLSB(ballot_value);
-    uint msb = spvSubgroupBallotFindMSB(ballot_value);
-    uint shuffled = simd_shuffle(10u, 8u);
-    uint shuffled_xor = simd_shuffle_xor(30u, 8u);
-    uint shuffled_up = simd_shuffle_up(20u, 4u);
-    uint shuffled_down = simd_shuffle_down(20u, 4u);
-    bool has_all = simd_all(true);
-    bool has_any = simd_any(true);
-    bool has_equal = spvSubgroupAllEqual(0);
-    has_equal = spvSubgroupAllEqual(true);
-    float4 added = simd_sum(float4(20.0));
-    int4 iadded = simd_sum(int4(20));
-    float4 multiplied = simd_product(float4(20.0));
-    int4 imultiplied = simd_product(int4(20));
-    float4 lo = simd_min(float4(20.0));
-    float4 hi = simd_max(float4(20.0));
-    int4 slo = simd_min(int4(20));
-    int4 shi = simd_max(int4(20));
-    uint4 ulo = simd_min(uint4(20u));
-    uint4 uhi = simd_max(uint4(20u));
-    uint4 anded = simd_and(ballot_value);
-    uint4 ored = simd_or(ballot_value);
-    uint4 xored = simd_xor(ballot_value);
-    added = simd_prefix_inclusive_sum(added);
-    iadded = simd_prefix_inclusive_sum(iadded);
-    multiplied = simd_prefix_inclusive_product(multiplied);
-    imultiplied = simd_prefix_inclusive_product(imultiplied);
-    added = simd_prefix_exclusive_sum(multiplied);
-    multiplied = simd_prefix_exclusive_product(multiplied);
-    iadded = simd_prefix_exclusive_sum(imultiplied);
-    imultiplied = simd_prefix_exclusive_product(imultiplied);
-    added = quad_sum(added);
-    multiplied = quad_product(multiplied);
-    iadded = quad_sum(iadded);
-    imultiplied = quad_product(imultiplied);
-    lo = quad_min(lo);
-    hi = quad_max(hi);
-    ulo = quad_min(ulo);
-    uhi = quad_max(uhi);
-    slo = quad_min(slo);
-    shi = quad_max(shi);
-    anded = quad_and(anded);
-    ored = quad_or(ored);
-    xored = quad_xor(xored);
-    float4 swap_horiz = quad_shuffle_xor(float4(20.0), 1u);
-    float4 swap_vertical = quad_shuffle_xor(float4(20.0), 2u);
-    float4 swap_diagonal = quad_shuffle_xor(float4(20.0), 3u);
-    float4 quad_broadcast0 = quad_broadcast(float4(20.0), 3u);
-    return out;
-}
-
diff --git a/reference/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert
new file mode 100644
index 00000000000..e36576b86f5
--- /dev/null
+++ b/reference/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert
@@ -0,0 +1,19 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    const int gl_DeviceIndex = 0;
+    const uint gl_ViewIndex = 0;
+    out.gl_Position = float4(float(gl_DeviceIndex), float(int(gl_ViewIndex)), 0.0, 1.0);
+    return out;
+}
+
diff --git a/reference/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert
new file mode 100644
index 00000000000..cc4bcc42027
--- /dev/null
+++ b/reference/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0()
+{
+    main0_out out = {};
+    const int gl_DeviceIndex = 0;
+    out.gl_Position = float4(float(gl_DeviceIndex));
+    return out;
+}
+
diff --git a/reference/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert
new file mode 100644
index 00000000000..8959afe821e
--- /dev/null
+++ b/reference/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert
@@ -0,0 +1,28 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct MVPs
+{
+    float4x4 MVP[2];
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]])
+{
+    main0_out out = {};
+    const uint gl_ViewIndex = spvViewMask[0];
+    out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
+    return out;
+}
+
diff --git a/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
index c42e67211e7..20eff0a124f 100644
--- a/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
+++ b/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
@@ -19,11 +19,11 @@ struct main0_in
     float4 Position [[attribute(0)]];
 };
 
-vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
+vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_BaseInstance [[base_instance]])
 {
     main0_out out = {};
-    uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1];
-    gl_InstanceIndex /= spvViewMask[1];
+    uint gl_ViewIndex = spvViewMask[0] + (gl_InstanceIndex - gl_BaseInstance) % spvViewMask[1];
+    gl_InstanceIndex = (gl_InstanceIndex - gl_BaseInstance) / spvViewMask[1] + gl_BaseInstance;
     out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
     out.gl_Layer = gl_ViewIndex - spvViewMask[0];
     return out;
diff --git a/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
index f87d2a11adc..5152b6222ee 100644
--- a/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
+++ b/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
@@ -19,7 +19,7 @@ struct main0_in
     float4 Position [[attribute(0)]];
 };
 
-vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
+vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]], uint gl_BaseInstance [[base_instance]])
 {
     main0_out out = {};
     const uint gl_ViewIndex = 0;
diff --git a/reference/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert b/reference/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert
index 53e26e4a8eb..86a0cea5bb0 100644
--- a/reference/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert
+++ b/reference/shaders-msl/vulkan/vert/vulkan-vertex.vk.vert
@@ -11,7 +11,7 @@ struct main0_out
 vertex main0_out main0(uint gl_VertexIndex [[vertex_id]], uint gl_InstanceIndex [[instance_id]])
 {
     main0_out out = {};
-    out.gl_Position = float4(1.0, 2.0, 3.0, 4.0) * float(gl_VertexIndex + gl_InstanceIndex);
+    out.gl_Position = float4(1.0, 2.0, 3.0, 4.0) * float(int(gl_VertexIndex) + int(gl_InstanceIndex));
     return out;
 }
 
diff --git a/reference/shaders-no-opt/asm/comp/access-tracking-function-call-result.asm.comp b/reference/shaders-no-opt/asm/comp/access-tracking-function-call-result.asm.comp
new file mode 100644
index 00000000000..e4dfdb87a82
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/access-tracking-function-call-result.asm.comp
@@ -0,0 +1,24 @@
+#version 460
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer Output
+{
+    int myout;
+} _5;
+
+int foo()
+{
+    return 12;
+}
+
+void main()
+{
+    int _17 = foo();
+    while (true)
+    {
+        _5.myout = _17;
+        return;
+    }
+    _5.myout = _17;
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/reference/shaders-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp
new file mode 100644
index 00000000000..d36f5431088
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp
@@ -0,0 +1,25 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+struct T
+{
+    float c;
+};
+
+layout(binding = 0, std430) buffer SSBO1
+{
+    T foo[];
+} _7;
+
+layout(binding = 1, std140) buffer SSBO2
+{
+    T bar[];
+} _10;
+
+void main()
+{
+    T v = T(40.0);
+    _7.foo[10].c = v.c;
+    _10.bar[30].c = v.c;
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp.vk b/reference/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp.vk
index c2fb39907b0..5f480728e4e 100644
--- a/reference/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp.vk
+++ b/reference/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp.vk
@@ -33,8 +33,12 @@ void main()
     _4.u16 = uint16_t(_30);
     _4.f32 = float(_31);
     _4.f32 = float(int16_t(_32));
+    _4.f32 = float(_29);
+    _4.f32 = float(int(_30));
     _4.f32 = float(uint16_t(_31));
     _4.f32 = float(_32);
+    _4.f32 = float(uint(_29));
+    _4.f32 = float(_30);
     _4.s16 = int16_t(_33);
     _4.u16 = uint16_t(int16_t(_33));
     _4.u16 = uint16_t(_33);
diff --git a/reference/shaders-no-opt/asm/comp/atomic-load-store.asm.comp b/reference/shaders-no-opt/asm/comp/atomic-load-store.asm.comp
new file mode 100644
index 00000000000..10a54fc8cf0
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/atomic-load-store.asm.comp
@@ -0,0 +1,16 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    uint a;
+    uint b;
+} _5;
+
+void main()
+{
+    uint _20 = atomicAdd(_5.b, 0u);
+    uint c = _20;
+    atomicExchange(_5.a, c);
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/basic.spv16.asm.comp b/reference/shaders-no-opt/asm/comp/basic.spv16.asm.comp
new file mode 100644
index 00000000000..7c237d8abd5
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/basic.spv16.asm.comp
@@ -0,0 +1,13 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float values[];
+} _3;
+
+void main()
+{
+    _3.values[gl_GlobalInvocationID.x] += 2.0;
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp b/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp
new file mode 100644
index 00000000000..66a70f18486
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp
@@ -0,0 +1,24 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for FP16.
+#endif
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    f16vec2 a;
+    float b;
+    float c;
+    f16vec2 d;
+} _4;
+
+void main()
+{
+    _4.b = uintBitsToFloat(packFloat2x16(_4.a));
+    _4.d = unpackFloat2x16(floatBitsToUint(_4.c));
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp.vk b/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp.vk
new file mode 100644
index 00000000000..09eccf4b31e
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp.vk
@@ -0,0 +1,25 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO
+{
+    f16vec2 a;
+    float b;
+    float c;
+    f16vec2 d;
+} _4;
+
+void main()
+{
+    _4.b = uintBitsToFloat(packFloat2x16(_4.a));
+    _4.d = unpackFloat2x16(floatBitsToUint(_4.c));
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/reference/shaders-no-opt/asm/comp/bitfield-signed-operations.asm.comp
new file mode 100644
index 00000000000..f535ba7f49b
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/bitfield-signed-operations.asm.comp
@@ -0,0 +1,27 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    ivec4 ints;
+    uvec4 uints;
+} _3;
+
+void main()
+{
+    ivec4 _19 = _3.ints;
+    uvec4 _20 = _3.uints;
+    _3.ints = bitCount(_19);
+    _3.uints = uvec4(bitCount(_19));
+    _3.ints = bitCount(_20);
+    _3.uints = uvec4(bitCount(_20));
+    _3.ints = bitfieldReverse(_19);
+    _3.uints = bitfieldReverse(_20);
+    _3.ints = bitfieldExtract(_19, 1, int(11u));
+    _3.uints = uvec4(bitfieldExtract(ivec4(_20), int(11u), 1));
+    _3.ints = ivec4(bitfieldExtract(uvec4(_19), 1, int(11u)));
+    _3.uints = bitfieldExtract(_20, int(11u), 1);
+    _3.ints = bitfieldInsert(_19, _19.wzyx, 1, int(11u));
+    _3.uints = bitfieldInsert(_20, _20.wzyx, int(11u), 1);
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/bitscan.asm.comp b/reference/shaders-no-opt/asm/comp/bitscan.asm.comp
new file mode 100644
index 00000000000..31a6234abb5
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/bitscan.asm.comp
@@ -0,0 +1,27 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    uvec4 u;
+    ivec4 i;
+} _4;
+
+void main()
+{
+    uvec4 _19 = _4.u;
+    ivec4 _20 = _4.i;
+    _4.u = uvec4(findLSB(_19));
+    _4.i = findLSB(_19);
+    _4.u = uvec4(findLSB(_20));
+    _4.i = findLSB(_20);
+    _4.u = uvec4(findMSB(_19));
+    _4.i = findMSB(_19);
+    _4.u = uvec4(findMSB(uvec4(_20)));
+    _4.i = findMSB(uvec4(_20));
+    _4.u = uvec4(findMSB(ivec4(_19)));
+    _4.i = findMSB(ivec4(_19));
+    _4.u = uvec4(findMSB(_20));
+    _4.i = findMSB(_20);
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/buffer-atomic-nonuniform.vk.nocompat.asm.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-atomic-nonuniform.vk.nocompat.asm.comp.vk
new file mode 100644
index 00000000000..d700d613534
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/buffer-atomic-nonuniform.vk.nocompat.asm.comp.vk
@@ -0,0 +1,15 @@
+#version 450
+#extension GL_EXT_nonuniform_qualifier : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO
+{
+    uint v;
+} ssbos[];
+
+void main()
+{
+    uint _24 = gl_GlobalInvocationID.z;
+    uint _25 = atomicAdd(ssbos[nonuniformEXT(_24)].v, 1u);
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/buffer-device-address-ptr-casting.vk.nocompat.asm.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-device-address-ptr-casting.vk.nocompat.asm.comp.vk
new file mode 100644
index 00000000000..f082267f931
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/buffer-device-address-ptr-casting.vk.nocompat.asm.comp.vk
@@ -0,0 +1,37 @@
+#version 450
+#if defined(GL_ARB_gpu_shader_int64)
+#extension GL_ARB_gpu_shader_int64 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference_uvec2 : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer SomeBuffer;
+layout(buffer_reference, buffer_reference_align = 16, std430) buffer SomeBuffer
+{
+    vec4 v;
+    uint64_t a;
+    uvec2 b;
+};
+
+layout(push_constant, std430) uniform Registers
+{
+    uint64_t address;
+    uvec2 address2;
+} registers;
+
+void main()
+{
+    SomeBuffer _44 = SomeBuffer(registers.address);
+    SomeBuffer _45 = SomeBuffer(registers.address);
+    SomeBuffer _46 = SomeBuffer(registers.address2);
+    _44.v = vec4(1.0, 2.0, 3.0, 4.0);
+    _45.v = vec4(1.0, 2.0, 3.0, 4.0);
+    _46.v = vec4(1.0, 2.0, 3.0, 4.0);
+    _44.a = uint64_t(_44);
+    _45.a = uint64_t(_45);
+    _46.b = uvec2(_46);
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/buffer-reference-aliased-block-name.nocompat.vk.asm.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-reference-aliased-block-name.nocompat.vk.asm.comp.vk
new file mode 100644
index 00000000000..12581cc3f8a
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/buffer-reference-aliased-block-name.nocompat.vk.asm.comp.vk
@@ -0,0 +1,35 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer Alias;
+layout(buffer_reference) buffer _6;
+layout(buffer_reference) buffer _7;
+layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer Alias
+{
+    vec4 v[];
+};
+
+layout(buffer_reference, buffer_reference_align = 16, std430) restrict buffer _6
+{
+    vec4 v[];
+};
+
+layout(buffer_reference, buffer_reference_align = 16, std430) coherent writeonly buffer _7
+{
+    vec4 v[];
+};
+
+layout(push_constant, std430) uniform Registers
+{
+    Alias ro;
+    _6 rw;
+    _7 wo;
+} registers;
+
+void main()
+{
+    registers.rw.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x];
+    registers.wo.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x];
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/buffer-reference-pointer-to-pod-in-buffer.asm.nocompat.vk.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-reference-pointer-to-pod-in-buffer.asm.nocompat.vk.comp.vk
new file mode 100644
index 00000000000..06e620d2c9b
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/buffer-reference-pointer-to-pod-in-buffer.asm.nocompat.vk.comp.vk
@@ -0,0 +1,19 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference, buffer_reference_align = 8) buffer uvec4Pointer
+{
+    uvec4 value;
+};
+
+layout(push_constant, std430) uniform Push
+{
+    uvec4Pointer ptr;
+} _4;
+
+void main()
+{
+    _4.ptr.value = uvec4(1u, 2u, 3u, 4u);
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/buffer-reference-pointer-to-unused-pod-in-buffer.asm.nocompat.vk.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-reference-pointer-to-unused-pod-in-buffer.asm.nocompat.vk.comp.vk
new file mode 100644
index 00000000000..44427de81e6
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/buffer-reference-pointer-to-unused-pod-in-buffer.asm.nocompat.vk.comp.vk
@@ -0,0 +1,13 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer uvec4Pointer
+{
+    uvec4 value;
+};
+
+void main()
+{
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer-2.asm.nocompat.vk.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer-2.asm.nocompat.vk.comp.vk
index 0288931915c..f77142a7434 100644
--- a/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer-2.asm.nocompat.vk.comp.vk
+++ b/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer-2.asm.nocompat.vk.comp.vk
@@ -1,9 +1,13 @@
 #version 450
+#if defined(GL_ARB_gpu_shader_int64)
 #extension GL_ARB_gpu_shader_int64 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
 #extension GL_EXT_buffer_reference : require
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(buffer_reference) buffer uintPointer
+layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
 {
     uint value;
 };
diff --git a/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer.asm.nocompat.vk.comp.vk b/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer.asm.nocompat.vk.comp.vk
index 9553199b462..6ba488be6b1 100644
--- a/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer.asm.nocompat.vk.comp.vk
+++ b/reference/shaders-no-opt/asm/comp/buffer-reference-synthesized-pointer.asm.nocompat.vk.comp.vk
@@ -1,9 +1,13 @@
 #version 450
+#if defined(GL_ARB_gpu_shader_int64)
 #extension GL_ARB_gpu_shader_int64 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
 #extension GL_EXT_buffer_reference : require
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(buffer_reference) buffer uint0_Pointer
+layout(buffer_reference, buffer_reference_align = 4) buffer uint0_Pointer
 {
     uint value[];
 };
diff --git a/reference/shaders-no-opt/asm/comp/constant-composite-undef.asm.comp b/reference/shaders-no-opt/asm/comp/constant-composite-undef.asm.comp
index 279dede112e..d8f1f19b12e 100644
--- a/reference/shaders-no-opt/asm/comp/constant-composite-undef.asm.comp
+++ b/reference/shaders-no-opt/asm/comp/constant-composite-undef.asm.comp
@@ -1,13 +1,13 @@
 #version 450
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
+float _15;
+
 layout(binding = 0, std430) buffer Block
 {
     vec4 f;
 } block;
 
-float _15;
-
 void main()
 {
     block.f = vec4(0.100000001490116119384765625, 0.20000000298023223876953125, 0.300000011920928955078125, 0.0);
diff --git a/reference/shaders-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/reference/shaders-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp
new file mode 100644
index 00000000000..1f43951a155
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp
@@ -0,0 +1,16 @@
+#version 450
+layout(local_size_x = 4, local_size_y = 4, local_size_z = 1) in;
+
+const int indexable[4] = int[](0, 1, 2, 3);
+const int indexable_1[4] = int[](4, 5, 6, 7);
+
+layout(binding = 0, std430) buffer SSBO
+{
+    int values[];
+} _6;
+
+void main()
+{
+    _6.values[gl_GlobalInvocationID.x] = indexable[gl_LocalInvocationID.x] + indexable_1[gl_LocalInvocationID.y];
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/copy-logical.spv14.asm.comp b/reference/shaders-no-opt/asm/comp/copy-logical.spv14.asm.comp
new file mode 100644
index 00000000000..28b2d1d0e9a
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/copy-logical.spv14.asm.comp
@@ -0,0 +1,45 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+struct B2
+{
+    vec4 elem2;
+};
+
+struct C
+{
+    vec4 c;
+    B2 b2;
+    B2 b2_array[4];
+};
+
+struct B1
+{
+    vec4 elem1;
+};
+
+struct A
+{
+    vec4 a;
+    B1 b1;
+    B1 b1_array[4];
+};
+
+layout(binding = 0, std430) buffer _8_3
+{
+    A a_block;
+    C c_block;
+} _3;
+
+void main()
+{
+    A _27;
+    _27.a = _3.c_block.c;
+    _27.b1.elem1 = _3.c_block.b2.elem2;
+    _27.b1_array[0].elem1 = _3.c_block.b2_array[0].elem2;
+    _27.b1_array[1].elem1 = _3.c_block.b2_array[1].elem2;
+    _27.b1_array[2].elem1 = _3.c_block.b2_array[2].elem2;
+    _27.b1_array[3].elem1 = _3.c_block.b2_array[3].elem2;
+    _3.a_block = _27;
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/reference/shaders-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp
new file mode 100644
index 00000000000..77ea03495f2
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp
@@ -0,0 +1,7 @@
+#version 450
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+void main()
+{
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp b/reference/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp
new file mode 100644
index 00000000000..83a9b83fa12
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp
@@ -0,0 +1,34 @@
+#version 430
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _4_5
+{
+    uint _m0[16];
+} _5;
+
+layout(binding = 1, std430) buffer _4_6
+{
+    uint _m0[16];
+} _6;
+
+layout(binding = 2, std430) buffer _4_7
+{
+    uint _m0[16];
+} _7;
+
+vec4 _88(vec4 _89)
+{
+    for (int _91 = 0; _91 < 16; _91++)
+    {
+        uint _163 = _6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + _5._m0[_91]))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+        uint _225 = _6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + _163)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))));
+        _7._m0[_91] = _6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + (_6._m0[_91] + _225))))))))))))));
+    }
+    return _89;
+}
+
+void main()
+{
+    vec4 _87 = _88(vec4(uvec4(gl_GlobalInvocationID, 0u)));
+}
+
diff --git a/reference/opt/shaders/asm/extended-debug-extinst.invalid.asm.comp b/reference/shaders-no-opt/asm/comp/extended-debug-extinst.invalid.asm.comp
similarity index 100%
rename from reference/opt/shaders/asm/extended-debug-extinst.invalid.asm.comp
rename to reference/shaders-no-opt/asm/comp/extended-debug-extinst.invalid.asm.comp
diff --git a/reference/shaders-no-opt/asm/comp/fuzz-collapse-degenerate-loop.asm.comp b/reference/shaders-no-opt/asm/comp/fuzz-collapse-degenerate-loop.asm.comp
new file mode 100644
index 00000000000..5a5f212faae
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/fuzz-collapse-degenerate-loop.asm.comp
@@ -0,0 +1,51 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 1, std430) buffer _2_9
+{
+    uint _m0[2];
+} _9;
+
+layout(binding = 0, std430) buffer _4_8
+{
+    uint _m0[3];
+} _8;
+
+layout(binding = 2, std430) buffer _6_10
+{
+    uint _m0[11];
+} _10;
+
+void main()
+{
+    uint _34 = 0u;
+    uint _35 = 0u;
+    uint _36 = 0u;
+    _10._m0[_34] = 8u;
+    _34++;
+    for (;;)
+    {
+        _10._m0[_34] = 9u;
+        _34++;
+        uint _44 = _35;
+        _35 = _44 + 1u;
+        if (_8._m0[_44] == 1u)
+        {
+            _10._m0[_34] = 12u;
+            _34++;
+            _36++;
+            _10._m0[_34] = 13u;
+            _34++;
+            _10._m0[_34] = 11u;
+            _34++;
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+    _10._m0[_34] = 10u;
+    _34++;
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/fuzz-loop-variable-dominator-with-unreachable-continue-block.asm.comp b/reference/shaders-no-opt/asm/comp/fuzz-loop-variable-dominator-with-unreachable-continue-block.asm.comp
new file mode 100644
index 00000000000..73c7d367283
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/fuzz-loop-variable-dominator-with-unreachable-continue-block.asm.comp
@@ -0,0 +1,411 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _4_12
+{
+    uint _m0[1];
+} _12;
+
+layout(binding = 1, std430) buffer _4_13
+{
+    uint _m0[1];
+} _13;
+
+layout(binding = 7, std430) buffer _4_19
+{
+    uint _m0[1];
+} _19;
+
+layout(binding = 2, std430) buffer _6_14
+{
+    uint _m0[2];
+} _14;
+
+layout(binding = 3, std430) buffer _6_15
+{
+    uint _m0[2];
+} _15;
+
+layout(binding = 4, std430) buffer _6_16
+{
+    uint _m0[2];
+} _16;
+
+layout(binding = 5, std430) buffer _6_17
+{
+    uint _m0[2];
+} _17;
+
+layout(binding = 6, std430) buffer _6_18
+{
+    uint _m0[2];
+} _18;
+
+layout(binding = 8, std430) buffer _8_20
+{
+    uint _m0[3];
+} _20;
+
+layout(binding = 9, std430) buffer _10_21
+{
+    uint _m0[37];
+} _21;
+
+void main()
+{
+    uint _70 = 0u;
+    uint _71 = 0u;
+    uint _72 = 0u;
+    uint _74 = 0u;
+    uint _75 = 0u;
+    uint _76 = 0u;
+    uint _77 = 0u;
+    uint _78 = 0u;
+    uint _79 = 0u;
+    uint _90 = ((gl_WorkGroupID.y * 1u) + (gl_WorkGroupID.z * 1u)) + gl_WorkGroupID.x;
+    uint _111 = (_90 * 2u) + (gl_LocalInvocationIndex * 2u);
+    _71 = (_90 * 1u) + (gl_LocalInvocationIndex * 1u);
+    _72 = (_90 * 1u) + (gl_LocalInvocationIndex * 1u);
+    _74 = (_90 * 2u) + (gl_LocalInvocationIndex * 2u);
+    _75 = (_90 * 2u) + (gl_LocalInvocationIndex * 2u);
+    _76 = (_90 * 2u) + (gl_LocalInvocationIndex * 2u);
+    _77 = (_90 * 2u) + (gl_LocalInvocationIndex * 2u);
+    _78 = (_90 * 1u) + (gl_LocalInvocationIndex * 1u);
+    _79 = (_90 * 3u) + (gl_LocalInvocationIndex * 3u);
+    _70 = (_90 * 37u) + (gl_LocalInvocationIndex * 37u);
+    _21._m0[_70] = 8u;
+    uint _123 = _70 + 1u;
+    _71++;
+    uint _136;
+    uint _233;
+    uint _234;
+    uint _241;
+    uint _242;
+    uint _73 = _111;
+    uint _129 = _123;
+    for (;;)
+    {
+        _21._m0[_129] = 9u;
+        _136 = _129 + 1u;
+        uint _141;
+        for (;;)
+        {
+            _21._m0[_136] = 12u;
+            _141 = _136 + 1u;
+            break;
+        }
+        uint _148;
+        uint _149;
+        uint _162;
+        uint _163;
+        for (;;)
+        {
+            _21._m0[_141] = 13u;
+            _148 = _141 + 1u;
+            _149 = _75;
+            _21._m0[_148] = 17u;
+            _75 = _149 + 1u;
+            uint _158;
+            if (_16._m0[_149] == 1u)
+            {
+                _158 = _148 + 1u;
+                _21._m0[_158] = 19u;
+                _162 = _158 + 1u;
+                _163 = _74;
+                break;
+            }
+            if (true)
+            {
+                _141 = 666u;
+                continue;
+            }
+            else
+            {
+                _162 = 666u;
+                _163 = 666u;
+                break;
+            }
+        }
+        _21._m0[_162] = 15u;
+        uint _165 = _162 + 1u;
+        _74 = _163 + 1u;
+        uint _174;
+        uint _178;
+        uint _179;
+        if (_15._m0[_163] == 1u)
+        {
+            _178 = _165;
+            _179 = _76;
+            _21._m0[_178] = 21u;
+            uint _181 = _178 + 1u;
+            uint _184 = _179 + 1u;
+            _76 = _184;
+            uint _186;
+            _186 = _181;
+            uint _191;
+            for (;;)
+            {
+                _21._m0[_186] = 23u;
+                uint _189 = _186 + 1u;
+                _191 = _189;
+                break;
+            }
+            uint _199;
+            uint _200;
+            uint _216;
+            uint _217;
+            uint _224;
+            uint _225;
+            for (;;)
+            {
+                _21._m0[_191] = 24u;
+                uint _195 = _191 + 1u;
+                uint _196 = _79;
+                _199 = _195;
+                _200 = _196;
+                _21._m0[_199] = 28u;
+                uint _202 = _199 + 1u;
+                uint _204 = _20._m0[_200];
+                uint _205 = _200 + 1u;
+                _79 = _205;
+                uint _208;
+                uint _212;
+                bool _198_ladder_break = false;
+                switch (_204)
+                {
+                    default:
+                    {
+                        _208 = _202;
+                        _21._m0[_208] = 30u;
+                        uint _210 = _208 + 1u;
+                        uint _211 = _77;
+                        _224 = _210;
+                        _225 = _211;
+                        _198_ladder_break = true;
+                        break;
+                    }
+                    case 1u:
+                    {
+                        _212 = _202;
+                        break;
+                    }
+                }
+                if (_198_ladder_break)
+                {
+                    break;
+                }
+                _21._m0[_212] = 29u;
+                uint _214 = _212 + 1u;
+                uint _215 = _78;
+                _216 = _214;
+                _217 = _215;
+                _21._m0[_216] = 27u;
+                uint _192 = _216 + 1u;
+                uint _220 = _19._m0[_217];
+                uint _222 = _217 + 1u;
+                _78 = _222;
+                uint _223 = _77;
+                if (_220 == 1u)
+                {
+                    _191 = _192;
+                    continue;
+                }
+                else
+                {
+                    _224 = _192;
+                    _225 = _223;
+                    break;
+                }
+            }
+            _21._m0[_224] = 26u;
+            uint _227 = _224 + 1u;
+            uint _229 = _18._m0[_225];
+            bool _230 = _229 == 1u;
+            uint _231 = _225 + 1u;
+            _77 = _231;
+            uint _232 = _73;
+            if (_230)
+            {
+                _233 = _227;
+                _234 = _232;
+                _21._m0[_233] = 11u;
+                uint _130 = _233 + 1u;
+                uint _237 = _14._m0[_234];
+                uint _239 = _234 + 1u;
+                _73 = _239;
+                if (_237 == 1u)
+                {
+                    _129 = _130;
+                    continue;
+                }
+                else
+                {
+                    _241 = _130;
+                    _242 = _72;
+                    break;
+                }
+            }
+            else
+            {
+            }
+        }
+        else
+        {
+            _174 = _165;
+            _21._m0[_174] = 22u;
+            _178 = _174 + 1u;
+            _179 = _76;
+            _21._m0[_178] = 21u;
+            uint _181 = _178 + 1u;
+            uint _184 = _179 + 1u;
+            _76 = _184;
+            uint _186;
+            _186 = _181;
+            uint _191;
+            for (;;)
+            {
+                _21._m0[_186] = 23u;
+                uint _189 = _186 + 1u;
+                _191 = _189;
+                break;
+            }
+            uint _199;
+            uint _200;
+            uint _216;
+            uint _217;
+            uint _224;
+            uint _225;
+            for (;;)
+            {
+                _21._m0[_191] = 24u;
+                uint _195 = _191 + 1u;
+                uint _196 = _79;
+                _199 = _195;
+                _200 = _196;
+                _21._m0[_199] = 28u;
+                uint _202 = _199 + 1u;
+                uint _204 = _20._m0[_200];
+                uint _205 = _200 + 1u;
+                _79 = _205;
+                uint _208;
+                uint _212;
+                bool _198_ladder_break = false;
+                switch (_204)
+                {
+                    default:
+                    {
+                        _208 = _202;
+                        _21._m0[_208] = 30u;
+                        uint _210 = _208 + 1u;
+                        uint _211 = _77;
+                        _224 = _210;
+                        _225 = _211;
+                        _198_ladder_break = true;
+                        break;
+                    }
+                    case 1u:
+                    {
+                        _212 = _202;
+                        break;
+                    }
+                }
+                if (_198_ladder_break)
+                {
+                    break;
+                }
+                _21._m0[_212] = 29u;
+                uint _214 = _212 + 1u;
+                uint _215 = _78;
+                _216 = _214;
+                _217 = _215;
+                _21._m0[_216] = 27u;
+                uint _192 = _216 + 1u;
+                uint _220 = _19._m0[_217];
+                uint _222 = _217 + 1u;
+                _78 = _222;
+                uint _223 = _77;
+                if (_220 == 1u)
+                {
+                    _191 = _192;
+                    continue;
+                }
+                else
+                {
+                    _224 = _192;
+                    _225 = _223;
+                    break;
+                }
+            }
+            _21._m0[_224] = 26u;
+            uint _227 = _224 + 1u;
+            uint _229 = _18._m0[_225];
+            bool _230 = _229 == 1u;
+            uint _231 = _225 + 1u;
+            _77 = _231;
+            uint _232 = _73;
+            if (_230)
+            {
+                _233 = _227;
+                _234 = _232;
+                _21._m0[_233] = 11u;
+                uint _130 = _233 + 1u;
+                uint _237 = _14._m0[_234];
+                uint _239 = _234 + 1u;
+                _73 = _239;
+                if (_237 == 1u)
+                {
+                    _129 = _130;
+                    continue;
+                }
+                else
+                {
+                    _241 = _130;
+                    _242 = _72;
+                    break;
+                }
+            }
+            else
+            {
+            }
+        }
+        _233 = 666u;
+        _234 = 666u;
+        _21._m0[_233] = 11u;
+        uint _130 = _233 + 1u;
+        uint _237 = _14._m0[_234];
+        uint _239 = _234 + 1u;
+        _73 = _239;
+        if (_237 == 1u)
+        {
+            _129 = _130;
+            continue;
+        }
+        else
+        {
+            _241 = _130;
+            _242 = _72;
+            break;
+        }
+    }
+    _21._m0[_241] = 10u;
+    _72 = _242 + 1u;
+    uint _251;
+    uint _254;
+    switch (_13._m0[_242])
+    {
+        case 1u:
+        {
+            _254 = 666u;
+            break;
+        }
+        default:
+        {
+            _251 = _241 + 1u;
+            _21._m0[_251] = 32u;
+            _254 = _251 + 1u;
+            break;
+        }
+    }
+    _21._m0[_254] = 31u;
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp b/reference/shaders-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp
new file mode 100644
index 00000000000..dc0956c3453
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp
@@ -0,0 +1,33 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+struct _8
+{
+    float _m0;
+    float _m1;
+};
+
+struct _15
+{
+    float _m0;
+    int _m1;
+};
+
+layout(binding = 0, std430) buffer _3_4
+{
+    float _m0;
+    int _m1;
+} _4;
+
+void main()
+{
+    _8 _23;
+    _23._m0 = modf(20.0, _23._m1);
+    _15 _24;
+    _24._m0 = frexp(40.0, _24._m1);
+    _4._m0 = _23._m0;
+    _4._m0 = _23._m1;
+    _4._m0 = _24._m0;
+    _4._m1 = _24._m1;
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/image-atomic-nonuniform.vk.nocompat.asm.comp.vk b/reference/shaders-no-opt/asm/comp/image-atomic-nonuniform.vk.nocompat.asm.comp.vk
new file mode 100644
index 00000000000..c6c1ea3e266
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/image-atomic-nonuniform.vk.nocompat.asm.comp.vk
@@ -0,0 +1,12 @@
+#version 450
+#extension GL_EXT_nonuniform_qualifier : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 0, r32ui) uniform uimage2D uImage[];
+
+void main()
+{
+    uint _26 = gl_GlobalInvocationID.z;
+    uint _31 = imageAtomicAdd(uImage[nonuniformEXT(_26)], ivec2(gl_GlobalInvocationID.xy), 1u);
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp b/reference/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp
new file mode 100644
index 00000000000..57587ebfa28
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp
@@ -0,0 +1,29 @@
+#version 450
+
+#ifndef SPIRV_CROSS_CONSTANT_ID_1
+#define SPIRV_CROSS_CONSTANT_ID_1 11u
+#endif
+#ifndef SPIRV_CROSS_CONSTANT_ID_2
+#define SPIRV_CROSS_CONSTANT_ID_2 12u
+#endif
+#ifndef SPIRV_CROSS_CONSTANT_ID_3
+#define SPIRV_CROSS_CONSTANT_ID_3 13u
+#endif
+const uint _4 = SPIRV_CROSS_CONSTANT_ID_3;
+#ifndef SPIRV_CROSS_CONSTANT_ID_4
+#define SPIRV_CROSS_CONSTANT_ID_4 14u
+#endif
+const uint _5 = SPIRV_CROSS_CONSTANT_ID_4;
+
+layout(local_size_x = 3, local_size_y = SPIRV_CROSS_CONSTANT_ID_1, local_size_z = SPIRV_CROSS_CONSTANT_ID_2) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    vec4 values[];
+} _8;
+
+void main()
+{
+    _8.values[gl_GlobalInvocationID.x] += vec4(2.0);
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp.vk b/reference/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp.vk
new file mode 100644
index 00000000000..0073fbee0cf
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp.vk
@@ -0,0 +1,16 @@
+#version 450
+layout(local_size_x = 3, local_size_y_id = 1, local_size_z_id = 2) in;
+
+layout(constant_id = 3) const uint _4 = 13u;
+layout(constant_id = 4) const uint _5 = 14u;
+
+layout(set = 0, binding = 0, std430) buffer SSBO
+{
+    vec4 values[];
+} _8;
+
+void main()
+{
+    _8.values[gl_GlobalInvocationID.x] += vec4(2.0);
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp b/reference/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp
new file mode 100644
index 00000000000..5c2a09d3d4c
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp
@@ -0,0 +1,31 @@
+#version 450
+
+#ifndef SPIRV_CROSS_CONSTANT_ID_1
+#define SPIRV_CROSS_CONSTANT_ID_1 11
+#endif
+const int _10 = SPIRV_CROSS_CONSTANT_ID_1;
+#ifndef SPIRV_CROSS_CONSTANT_ID_2
+#define SPIRV_CROSS_CONSTANT_ID_2 12
+#endif
+const int _11 = SPIRV_CROSS_CONSTANT_ID_2;
+#ifndef SPIRV_CROSS_CONSTANT_ID_3
+#define SPIRV_CROSS_CONSTANT_ID_3 13
+#endif
+#ifndef SPIRV_CROSS_CONSTANT_ID_4
+#define SPIRV_CROSS_CONSTANT_ID_4 14
+#endif
+const uint _29 = (uint(int(gl_WorkGroupSize.x)) + 3u);
+const uvec3 _30 = uvec3(_29, int(gl_WorkGroupSize.y), 2u);
+
+layout(local_size_x = SPIRV_CROSS_CONSTANT_ID_3, local_size_y = SPIRV_CROSS_CONSTANT_ID_4, local_size_z = 2) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    vec4 values[];
+} _8;
+
+void main()
+{
+    _8.values[gl_GlobalInvocationID.x] = ((((_8.values[gl_GlobalInvocationID.x] + vec4(2.0)) + vec3(_30).xyzz) * float(int(gl_WorkGroupSize.x))) * float(int(gl_WorkGroupSize.y))) * float(int(2u));
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp.vk b/reference/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp.vk
new file mode 100644
index 00000000000..b6a78bdf1cd
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp.vk
@@ -0,0 +1,22 @@
+#version 450
+layout(local_size_x_id = 3, local_size_y_id = 4, local_size_z = 2) in;
+
+layout(constant_id = 1) const int _10 = 11;
+layout(constant_id = 2) const int _11 = 12;
+const uint _29 = (uint(int(gl_WorkGroupSize.x)) + 3u);
+const uvec3 _30 = uvec3(_29, int(gl_WorkGroupSize.y), 2u);
+
+layout(set = 0, binding = 0, std430) buffer SSBO
+{
+    vec4 values[];
+} _8;
+
+void main()
+{
+    vec3 _38 = vec3(_30);
+    float _41 = float(int(gl_WorkGroupSize.x));
+    float _42 = float(int(gl_WorkGroupSize.y));
+    float _43 = float(int(2u));
+    _8.values[gl_GlobalInvocationID.x] = ((((_8.values[gl_GlobalInvocationID.x] + vec4(2.0)) + _38.xyzz) * _41) * _42) * _43;
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/loop-variable-early-read-with-initializer.asm.comp b/reference/shaders-no-opt/asm/comp/loop-variable-early-read-with-initializer.asm.comp
new file mode 100644
index 00000000000..c34852f79c3
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/loop-variable-early-read-with-initializer.asm.comp
@@ -0,0 +1,107 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 4, std430) buffer _2_12
+{
+    uint _m0[1];
+} _12;
+
+layout(binding = 1, std430) buffer _2_9
+{
+    uint _m0[1];
+} _9;
+
+layout(binding = 2, std430) buffer _2_10
+{
+    uint _m0[1];
+} _10;
+
+layout(binding = 3, std430) buffer _2_11
+{
+    uint _m0[1];
+} _11;
+
+layout(binding = 0, std430) buffer _4_8
+{
+    uint _m0[2];
+} _8;
+
+layout(binding = 5, std430) buffer _6_13
+{
+    uint _m0[11];
+} _13;
+
+void main()
+{
+    uint _43 = 0u;
+    uint _44 = 0u;
+    uint _45 = 0u;
+    uint _46 = 0u;
+    uint _47 = 0u;
+    _13._m0[0u] = 8u;
+    uint _50 = 0u + 1u;
+    uint _42 = _50;
+    for (;;)
+    {
+        _13._m0[_42] = 9u;
+        _42++;
+        uint _55 = _43;
+        _43 = _55 + 1u;
+        if (_8._m0[_55] == 1u)
+        {
+            _13._m0[_42] = 12u;
+            _42++;
+            return;
+        }
+        else
+        {
+            _13._m0[_42] = 13u;
+            _42++;
+            uint _70 = _44;
+            _44 = _70 + 1u;
+            if (_9._m0[_70] == 1u)
+            {
+                _13._m0[_42] = 11u;
+                _42++;
+                _13._m0[_42] = 14u;
+                _42++;
+                _45++;
+                do
+                {
+                    _13._m0[_42] = 16u;
+                    _42++;
+                    break;
+                } while(false);
+                _13._m0[_42] = 15u;
+                _42++;
+                uint _94 = _46;
+                _46 = _94 + 1u;
+                if (_11._m0[_94] == 1u)
+                {
+                }
+                else
+                {
+                    _13._m0[_42] = 19u;
+                    _42++;
+                }
+                _13._m0[_42] = 17u;
+                _42++;
+                uint _108 = _47;
+                _47 = _108 + 1u;
+                if (_12._m0[_108] == 1u)
+                {
+                    continue;
+                }
+                else
+                {
+                    break;
+                }
+            }
+            else
+            {
+                break;
+            }
+        }
+    }
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/loop-variable-early-read-with-undef.asm.comp b/reference/shaders-no-opt/asm/comp/loop-variable-early-read-with-undef.asm.comp
new file mode 100644
index 00000000000..08f3b44e0b7
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/loop-variable-early-read-with-undef.asm.comp
@@ -0,0 +1,108 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 4, std430) buffer _2_12
+{
+    uint _m0[1];
+} _12;
+
+layout(binding = 1, std430) buffer _2_9
+{
+    uint _m0[1];
+} _9;
+
+layout(binding = 2, std430) buffer _2_10
+{
+    uint _m0[1];
+} _10;
+
+layout(binding = 3, std430) buffer _2_11
+{
+    uint _m0[1];
+} _11;
+
+layout(binding = 0, std430) buffer _4_8
+{
+    uint _m0[2];
+} _8;
+
+layout(binding = 5, std430) buffer _6_13
+{
+    uint _m0[11];
+} _13;
+
+void main()
+{
+    uint _43 = 0u;
+    uint _44 = 0u;
+    uint _45 = 0u;
+    uint _46 = 0u;
+    uint _47 = 0u;
+    uint _115;
+    _13._m0[_115] = 8u;
+    uint _50 = _115 + 1u;
+    uint _42 = _50;
+    for (;;)
+    {
+        _13._m0[_42] = 9u;
+        _42++;
+        uint _55 = _43;
+        _43 = _55 + 1u;
+        if (_8._m0[_55] == 1u)
+        {
+            _13._m0[_42] = 12u;
+            _42++;
+            return;
+        }
+        else
+        {
+            _13._m0[_42] = 13u;
+            _42++;
+            uint _70 = _44;
+            _44 = _70 + 1u;
+            if (_9._m0[_70] == 1u)
+            {
+                _13._m0[_42] = 11u;
+                _42++;
+                _13._m0[_42] = 14u;
+                _42++;
+                _45++;
+                do
+                {
+                    _13._m0[_42] = 16u;
+                    _42++;
+                    break;
+                } while(false);
+                _13._m0[_42] = 15u;
+                _42++;
+                uint _94 = _46;
+                _46 = _94 + 1u;
+                if (_11._m0[_94] == 1u)
+                {
+                }
+                else
+                {
+                    _13._m0[_42] = 19u;
+                    _42++;
+                }
+                _13._m0[_42] = 17u;
+                _42++;
+                uint _108 = _47;
+                _47 = _108 + 1u;
+                if (_12._m0[_108] == 1u)
+                {
+                    continue;
+                }
+                else
+                {
+                    break;
+                }
+            }
+            else
+            {
+                break;
+            }
+        }
+    }
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/loop-variable-with-initializer.asm.comp b/reference/shaders-no-opt/asm/comp/loop-variable-with-initializer.asm.comp
new file mode 100644
index 00000000000..ca8e58cfc75
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/loop-variable-with-initializer.asm.comp
@@ -0,0 +1,12 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+void main()
+{
+    uint i = 0u;
+    for (;;)
+    {
+        break;
+    }
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/multi-break-switch-out-of-loop.asm.comp b/reference/shaders-no-opt/asm/comp/multi-break-switch-out-of-loop.asm.comp
new file mode 100644
index 00000000000..7de95ae6b4e
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/multi-break-switch-out-of-loop.asm.comp
@@ -0,0 +1,52 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std140) uniform UBO
+{
+    int v;
+} _6;
+
+void main()
+{
+    uint count = 0u;
+    for (int i = 0; i < 4; i++)
+    {
+        bool _31_ladder_break = false;
+        do
+        {
+            bool _33_ladder_break = false;
+            do
+            {
+                bool _35_ladder_break = false;
+                do
+                {
+                    if (_6.v == 20)
+                    {
+                        _35_ladder_break = true;
+                        _33_ladder_break = true;
+                        _31_ladder_break = true;
+                        break;
+                    }
+                    break;
+                } while(false);
+                if (_35_ladder_break)
+                {
+                    break;
+                }
+                break;
+            } while(false);
+            if (_33_ladder_break)
+            {
+                break;
+            }
+            count++;
+            break;
+        } while(false);
+        if (_31_ladder_break)
+        {
+            break;
+        }
+        count++;
+    }
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/nonuniform-bracket-handling.vk.nocompat.asm.comp.vk b/reference/shaders-no-opt/asm/comp/nonuniform-bracket-handling.vk.nocompat.asm.comp.vk
new file mode 100644
index 00000000000..9b7de0622f8
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/nonuniform-bracket-handling.vk.nocompat.asm.comp.vk
@@ -0,0 +1,55 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_nonuniform_qualifier : require
+#extension GL_KHR_shader_subgroup_ballot : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 0, std430) restrict readonly buffer SSBO_Offsets
+{
+    uvec2 _m0[];
+} _7;
+
+layout(push_constant, std430) uniform RootConstants
+{
+    uint _m0;
+    uint _m1;
+    uint _m2;
+    uint _m3;
+    uint _m4;
+    uint _m5;
+    uint _m6;
+    uint _m7;
+} registers;
+
+layout(set = 1, binding = 0) uniform samplerBuffer _8[];
+layout(set = 4, binding = 0, r32f) uniform imageBuffer _9[];
+layout(set = 4, binding = 0, r32ui) uniform uimageBuffer _10[];
+
+void main()
+{
+    uint _61 = registers._m4 + 2u;
+    uint _64 = subgroupBroadcastFirst(_61);
+    uint _71 = subgroupBroadcastFirst(registers._m4);
+    uint _76 = registers._m1 + 1u;
+    uint _79 = subgroupBroadcastFirst(_76);
+    uint _88 = gl_GlobalInvocationID.x + 4u;
+    uint _99 = gl_GlobalInvocationID.x + 1024u;
+    imageStore(_9[registers._m4], int((_99 < _7._m0[_71].y) ? (_99 + _7._m0[_71].x) : 4294967295u), vec4(imageLoad(_9[registers._m4], int((_88 < _7._m0[_71].y) ? (_88 + _7._m0[_71].x) : 4294967295u))));
+    uint _106 = gl_GlobalInvocationID.x + 2u;
+    uint _117 = gl_GlobalInvocationID.x + 2048u;
+    imageStore(_9[registers._m4], int((_117 < _7._m0[_71].y) ? (_117 + _7._m0[_71].x) : 4294967295u), vec4(texelFetch(_8[_76], int((_106 < _7._m0[_79].y) ? (_106 + _7._m0[_79].x) : 4294967295u))));
+    uint _130 = imageAtomicAdd(_10[_61], int((gl_GlobalInvocationID.x < _7._m0[_64].y) ? (gl_GlobalInvocationID.x + _7._m0[_64].x) : 4294967295u), 40u);
+    uint _137 = imageAtomicCompSwap(_10[_61], int((gl_GlobalInvocationID.y < _7._m0[_64].y) ? (gl_GlobalInvocationID.y + _7._m0[_64].x) : 4294967295u), 40u, 50u);
+    imageStore(_9[registers._m4], int((0u < _7._m0[_71].y) ? (0u + _7._m0[_71].x) : 4294967295u), vec4(float(_7._m0[_71].y)));
+    imageStore(_9[registers._m4], int((1u < _7._m0[_71].y) ? (1u + _7._m0[_71].x) : 4294967295u), vec4(float(_7._m0[_79].y)));
+    uint _11 = registers._m4 + (gl_GlobalInvocationID.z + 0u);
+    imageStore(_9[nonuniformEXT(_11)], int((_99 < _7._m0[_11].y) ? (_99 + _7._m0[_11].x) : 4294967295u), vec4(imageLoad(_9[nonuniformEXT(_11)], int((_88 < _7._m0[_11].y) ? (_88 + _7._m0[_11].x) : 4294967295u))));
+    uint _13 = registers._m1 + (gl_GlobalInvocationID.z + 0u);
+    imageStore(_9[nonuniformEXT(_11)], int((_117 < _7._m0[_11].y) ? (_117 + _7._m0[_11].x) : 4294967295u), vec4(texelFetch(_8[nonuniformEXT(_13)], int((_88 < _7._m0[_13].y) ? (_88 + _7._m0[_13].x) : 4294967295u))));
+    uint _15 = registers._m4 + (gl_GlobalInvocationID.z + 0u);
+    uint _209 = imageAtomicAdd(_10[nonuniformEXT(_15)], int((gl_GlobalInvocationID.y < _7._m0[_15].y) ? (gl_GlobalInvocationID.y + _7._m0[_15].x) : 4294967295u), 40u);
+    uint _215 = imageAtomicCompSwap(_10[nonuniformEXT(_15)], int((gl_GlobalInvocationID.y < _7._m0[_15].y) ? (gl_GlobalInvocationID.y + _7._m0[_15].x) : 4294967295u), 40u, 70u);
+    imageStore(_9[registers._m4], int((2u < _7._m0[_71].y) ? (2u + _7._m0[_71].x) : 4294967295u), vec4(float(_7._m0[_11].y)));
+    imageStore(_9[registers._m4], int((3u < _7._m0[_71].y) ? (3u + _7._m0[_71].x) : 4294967295u), vec4(float(_7._m0[_13].y)));
+}
+
diff --git a/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/reference/shaders-no-opt/asm/comp/phi-temporary-copy-loop-variable.asm.invalid.comp
similarity index 86%
rename from reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
rename to reference/shaders-no-opt/asm/comp/phi-temporary-copy-loop-variable.asm.invalid.comp
index 9ae8d6fd7f3..f8650b5e5c2 100644
--- a/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
+++ b/reference/shaders-no-opt/asm/comp/phi-temporary-copy-loop-variable.asm.invalid.comp
@@ -5,20 +5,15 @@ layout(binding = 1, rgba32f) uniform writeonly image2D outImageTexture;
 
 void main()
 {
+    int _27_copy;
     int _30;
     _30 = 7;
-    int _27_copy;
     for (int _27 = 7; _27 >= 0; _27_copy = _27, _27--, _30 = _27_copy)
     {
         if (5.0 > float(_27))
         {
             break;
         }
-        else
-        {
-            continue;
-        }
-        continue;
     }
     imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0));
 }
diff --git a/reference/shaders-no-opt/asm/comp/ray-query-force-temporary-rtas.spv14.asm.vk.nocompat.comp.vk b/reference/shaders-no-opt/asm/comp/ray-query-force-temporary-rtas.spv14.asm.vk.nocompat.comp.vk
new file mode 100644
index 00000000000..fa46c715bac
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/ray-query-force-temporary-rtas.spv14.asm.vk.nocompat.comp.vk
@@ -0,0 +1,28 @@
+#version 460
+#extension GL_EXT_ray_query : require
+#extension GL_EXT_ray_tracing : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 0, std430) readonly buffer Buf
+{
+    uvec2 vas[1024];
+} _3;
+
+layout(push_constant, std430) uniform Registers
+{
+    uint index;
+} _4;
+
+rayQueryEXT rq;
+
+void main()
+{
+    uvec2 _41;
+    do
+    {
+        uvec2 va = _3.vas[_4.index];
+        _41 = _3.vas[_4.index];
+    } while (false);
+    rayQueryInitializeEXT(rq, accelerationStructureEXT(_41), 0u, 0u, vec3(0.0), 0.0, vec3(0.0), 0.0);
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/ray-query-function-object.spv14.asm.vk.nocompat.comp.vk b/reference/shaders-no-opt/asm/comp/ray-query-function-object.spv14.asm.vk.nocompat.comp.vk
new file mode 100644
index 00000000000..ccbbc02ff44
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/ray-query-function-object.spv14.asm.vk.nocompat.comp.vk
@@ -0,0 +1,15 @@
+#version 460
+#extension GL_EXT_ray_query : require
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+float _16;
+vec3 _17;
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT RTAS;
+
+void main()
+{
+    rayQueryEXT _19;
+    rayQueryInitializeEXT(_19, RTAS, 2u, 255u, _17, _16, _17, _16);
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp b/reference/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp
new file mode 100644
index 00000000000..e7b9dbf3377
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp
@@ -0,0 +1,44 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+#ifndef SPIRV_CROSS_CONSTANT_ID_0
+#define SPIRV_CROSS_CONSTANT_ID_0 0
+#endif
+const int A = SPIRV_CROSS_CONSTANT_ID_0;
+#ifndef SPIRV_CROSS_CONSTANT_ID_1
+#define SPIRV_CROSS_CONSTANT_ID_1 1
+#endif
+const int A_1 = SPIRV_CROSS_CONSTANT_ID_1;
+#ifndef SPIRV_CROSS_CONSTANT_ID_2
+#define SPIRV_CROSS_CONSTANT_ID_2 2
+#endif
+const int A_2 = SPIRV_CROSS_CONSTANT_ID_2;
+#ifndef SPIRV_CROSS_CONSTANT_ID_3
+#define SPIRV_CROSS_CONSTANT_ID_3 3
+#endif
+const int A_3 = SPIRV_CROSS_CONSTANT_ID_3;
+#ifndef SPIRV_CROSS_CONSTANT_ID_4
+#define SPIRV_CROSS_CONSTANT_ID_4 4
+#endif
+const int A_4 = SPIRV_CROSS_CONSTANT_ID_4;
+#ifndef SPIRV_CROSS_CONSTANT_ID_5
+#define SPIRV_CROSS_CONSTANT_ID_5 5
+#endif
+const int A_5 = SPIRV_CROSS_CONSTANT_ID_5;
+const int A_6 = (A - A_1);
+const int A_7 = (A_6 - A_2);
+const int A_8 = (A_7 - A_3);
+const int A_9 = (A_8 - A_4);
+const int A_10 = (A_9 - A_5);
+const int A_11 = (A_10 + A_5);
+
+layout(binding = 0, std430) buffer SSBO
+{
+    int values[];
+} _5;
+
+void main()
+{
+    _5.values[gl_GlobalInvocationID.x] = A_11;
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp.vk b/reference/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp.vk
new file mode 100644
index 00000000000..c31d0787d80
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp.vk
@@ -0,0 +1,26 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(constant_id = 0) const int A = 0;
+layout(constant_id = 1) const int A_1 = 1;
+layout(constant_id = 2) const int A_2 = 2;
+layout(constant_id = 3) const int A_3 = 3;
+layout(constant_id = 4) const int A_4 = 4;
+layout(constant_id = 5) const int A_5 = 5;
+const int A_6 = (A - A_1);
+const int A_7 = (A_6 - A_2);
+const int A_8 = (A_7 - A_3);
+const int A_9 = (A_8 - A_4);
+const int A_10 = (A_9 - A_5);
+const int A_11 = (A_10 + A_5);
+
+layout(set = 0, binding = 0, std430) buffer SSBO
+{
+    int values[];
+} _5;
+
+void main()
+{
+    _5.values[gl_GlobalInvocationID.x] = A_11;
+}
+
diff --git a/reference/shaders-no-opt/asm/comp/spec-constant-op-convert-sign.asm.comp b/reference/shaders-no-opt/asm/comp/spec-constant-op-convert-sign.asm.comp
index c6aa711f650..50ca0fbdbc4 100644
--- a/reference/shaders-no-opt/asm/comp/spec-constant-op-convert-sign.asm.comp
+++ b/reference/shaders-no-opt/asm/comp/spec-constant-op-convert-sign.asm.comp
@@ -1,5 +1,11 @@
 #version 450
+#if defined(GL_ARB_gpu_shader_int64)
 #extension GL_ARB_gpu_shader_int64 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
 #ifndef SPIRV_CROSS_CONSTANT_ID_0
diff --git a/reference/opt/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp b/reference/shaders-no-opt/asm/comp/storage-buffer-basic.asm.comp
similarity index 100%
rename from reference/opt/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp
rename to reference/shaders-no-opt/asm/comp/storage-buffer-basic.asm.comp
diff --git a/reference/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp.vk b/reference/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp.vk
new file mode 100644
index 00000000000..22834fa8e72
--- /dev/null
+++ b/reference/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp.vk
@@ -0,0 +1,36 @@
+#version 450
+#extension GL_KHR_shader_subgroup_ballot : require
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 0) uniform usamplerBuffer _4;
+layout(set = 0, binding = 0, r32ui) uniform writeonly uimageBuffer _5;
+
+uvec4 WaveMatch(uint _45)
+{
+    uvec4 _52;
+    for (;;)
+    {
+        bool _51 = _45 == subgroupBroadcastFirst(_45);
+        _52 = subgroupBallot(_51);
+        if (_51)
+        {
+            break;
+        }
+        else
+        {
+            continue;
+        }
+    }
+    return _52;
+}
+
+void main()
+{
+    uvec4 _32 = WaveMatch(texelFetch(_4, int(gl_GlobalInvocationID.x)).x);
+    uint _37 = gl_GlobalInvocationID.x * 4u;
+    imageStore(_5, int(_37), uvec4(_32.x));
+    imageStore(_5, int(_37 + 1u), uvec4(_32.y));
+    imageStore(_5, int(_37 + 2u), uvec4(_32.z));
+    imageStore(_5, int(_37 + 3u), uvec4(_32.w));
+}
+
diff --git a/reference/shaders-no-opt/asm/degenerate-selection-constructs.asm.frag b/reference/shaders-no-opt/asm/degenerate-selection-constructs.asm.frag
new file mode 100644
index 00000000000..eb1cf0ca940
--- /dev/null
+++ b/reference/shaders-no-opt/asm/degenerate-selection-constructs.asm.frag
@@ -0,0 +1,118 @@
+#version 320 es
+precision mediump float;
+precision highp int;
+
+layout(binding = 1, std140) uniform buf1
+{
+    highp vec2 resolution;
+} _9;
+
+layout(binding = 0, std140) uniform buf0
+{
+    highp vec2 injectionSwitch;
+} _13;
+
+layout(location = 0) out highp vec4 _GLF_color;
+
+bool checkSwap(highp float a, highp float b)
+{
+    bool _153 = gl_FragCoord.y < (_9.resolution.y / 2.0);
+    highp float _160;
+    if (_153)
+    {
+        _160 = a;
+    }
+    else
+    {
+        highp float _159 = 0.0;
+        _160 = _159;
+    }
+    bool _147;
+    do
+    {
+        highp float _168;
+        if (_153)
+        {
+            _168 = b;
+        }
+        else
+        {
+            highp float _167 = 0.0;
+            _168 = _167;
+        }
+        if (_153)
+        {
+            _147 = _160 > _168;
+        }
+        if (true)
+        {
+            break;
+        }
+        else
+        {
+            break;
+        }
+    } while(false);
+    highp float _180;
+    if (_153)
+    {
+        highp float _179 = 0.0;
+        _180 = _179;
+    }
+    else
+    {
+        _180 = a;
+    }
+    highp float _186;
+    if (_153)
+    {
+        highp float _185 = 0.0;
+        _186 = _185;
+    }
+    else
+    {
+        _186 = b;
+    }
+    if (!_153)
+    {
+        _147 = _180 < _186;
+    }
+    return _147;
+}
+
+void main()
+{
+    highp float data[10];
+    for (int i = 0; i < 10; i++)
+    {
+        data[i] = float(10 - i) * _13.injectionSwitch.y;
+    }
+    for (int i_1 = 0; i_1 < 9; i_1++)
+    {
+        for (int j = 0; j < 10; j++)
+        {
+            if (j < (i_1 + 1))
+            {
+                continue;
+            }
+            highp float param = data[i_1];
+            highp float param_1 = data[j];
+            bool doSwap = checkSwap(param, param_1);
+            if (doSwap)
+            {
+                highp float temp = data[i_1];
+                data[i_1] = data[j];
+                data[j] = temp;
+            }
+        }
+    }
+    if (gl_FragCoord.x < (_9.resolution.x / 2.0))
+    {
+        _GLF_color = vec4(data[0] / 10.0, data[5] / 10.0, data[9] / 10.0, 1.0);
+    }
+    else
+    {
+        _GLF_color = vec4(data[5] / 10.0, data[9] / 10.0, data[0] / 10.0, 1.0);
+    }
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag b/reference/shaders-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag
new file mode 100644
index 00000000000..b2d8919aa96
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag
@@ -0,0 +1,63 @@
+#version 450
+
+struct anon_aa
+{
+    int foo;
+};
+
+struct anon_ab
+{
+    int foo;
+};
+
+struct anon_a
+{
+    anon_aa _aa;
+    anon_ab ab;
+};
+
+struct anon_ba
+{
+    int foo;
+};
+
+struct anon_bb
+{
+    int foo;
+};
+
+struct anon_b
+{
+    anon_ba _ba;
+    anon_bb bb;
+};
+
+struct anon_ca
+{
+    int foo;
+};
+
+struct anon_c
+{
+    anon_ca _ca;
+};
+
+struct anon_da
+{
+    int foo;
+};
+
+struct anon_d
+{
+    anon_da da;
+};
+
+struct anon_e
+{
+    int a;
+};
+
+void main()
+{
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/array-builtin-bitcast-load-store.asm.frag b/reference/shaders-no-opt/asm/frag/array-builtin-bitcast-load-store.asm.frag
new file mode 100644
index 00000000000..40f6ee714b1
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/array-builtin-bitcast-load-store.asm.frag
@@ -0,0 +1,28 @@
+#version 450
+
+layout(binding = 0, std140) uniform uBuffer
+{
+    vec4 color;
+} x_12;
+
+layout(location = 0) out vec4 fragColor;
+const vec4 _2_init = vec4(0.0);
+
+void main()
+{
+    fragColor = _2_init;
+    gl_SampleMask[0] = 0;
+    fragColor = x_12.color;
+    gl_SampleMask[0] = int(uint(6));
+    gl_SampleMask[0] = int(uint(gl_SampleMask[0]));
+    uint _30_unrolled[1];
+    for (int i = 0; i < int(1); i++)
+    {
+        _30_unrolled[i] = int(gl_SampleMask[i]);
+    }
+    for (int i = 0; i < int(1); i++)
+    {
+        gl_SampleMask[i] = int(_30_unrolled[i]);
+    }
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/collapsed-switch-phi-flush.asm.frag b/reference/shaders-no-opt/asm/frag/collapsed-switch-phi-flush.asm.frag
new file mode 100644
index 00000000000..a6f3e694418
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/collapsed-switch-phi-flush.asm.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    vec4 _17;
+    _17 = vec4(1.0);
+    FragColor = _17;
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/combined-image-sampler-dxc-min16float.asm.invalid.frag b/reference/shaders-no-opt/asm/frag/combined-image-sampler-dxc-min16float.asm.invalid.frag
new file mode 100644
index 00000000000..5fa822b39f2
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/combined-image-sampler-dxc-min16float.asm.invalid.frag
@@ -0,0 +1,28 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+struct PSInput
+{
+    highp vec4 color;
+    highp vec2 uv;
+};
+
+uniform mediump sampler2D SPIRV_Cross_CombinedtexSamp;
+
+layout(location = 0) in highp vec4 in_var_COLOR;
+layout(location = 1) in highp vec2 in_var_TEXCOORD0;
+layout(location = 0) out highp vec4 out_var_SV_TARGET;
+
+highp vec4 src_PSMain(PSInput _input)
+{
+    vec4 a = _input.color * texture(SPIRV_Cross_CombinedtexSamp, _input.uv);
+    return a;
+}
+
+void main()
+{
+    PSInput param_var_input = PSInput(in_var_COLOR, in_var_TEXCOORD0);
+    out_var_SV_TARGET = src_PSMain(param_var_input);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/complex-opaque-handle-reuse-in-loop.asm.frag b/reference/shaders-no-opt/asm/frag/complex-opaque-handle-reuse-in-loop.asm.frag
new file mode 100644
index 00000000000..140a336debe
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/complex-opaque-handle-reuse-in-loop.asm.frag
@@ -0,0 +1,71 @@
+#version 450
+#if defined(GL_EXT_control_flow_attributes)
+#extension GL_EXT_control_flow_attributes : require
+#define SPIRV_CROSS_FLATTEN [[flatten]]
+#define SPIRV_CROSS_BRANCH [[dont_flatten]]
+#define SPIRV_CROSS_UNROLL [[unroll]]
+#define SPIRV_CROSS_LOOP [[dont_unroll]]
+#else
+#define SPIRV_CROSS_FLATTEN
+#define SPIRV_CROSS_BRANCH
+#define SPIRV_CROSS_UNROLL
+#define SPIRV_CROSS_LOOP
+#endif
+
+struct MyConsts
+{
+    uint opt;
+};
+
+uvec4 _37;
+
+layout(binding = 3, std140) uniform type_scene
+{
+    MyConsts myConsts;
+} scene;
+
+uniform sampler2D SPIRV_Cross_CombinedtexTablemySampler[1];
+
+layout(location = 1) out uint out_var_SV_TARGET1;
+
+void main()
+{
+    uint _42;
+    bool _47;
+    float _55;
+    do
+    {
+        _42 = _37.y & 16777215u;
+        _47 = scene.myConsts.opt != 0u;
+        SPIRV_CROSS_BRANCH
+        if (_47)
+        {
+            _55 = 1.0;
+            break;
+        }
+        else
+        {
+            _55 = textureLod(SPIRV_Cross_CombinedtexTablemySampler[_42], vec2(0.0), 0.0).x;
+            break;
+        }
+        break; // unreachable workaround
+    } while(false);
+    float _66;
+    do
+    {
+        SPIRV_CROSS_BRANCH
+        if (_47)
+        {
+            _66 = 1.0;
+            break;
+        }
+        else
+        {
+            _66 = textureLod(SPIRV_Cross_CombinedtexTablemySampler[_42], vec2(0.0), 0.0).x;
+            break;
+        }
+        break; // unreachable workaround
+    } while(false);
+    out_var_SV_TARGET1 = uint(cross(vec3(-1.0, -1.0, _55), vec3(1.0, 1.0, _66)).x);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-1.asm.frag b/reference/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-1.asm.frag
new file mode 100644
index 00000000000..f01a3282f10
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-1.asm.frag
@@ -0,0 +1,27 @@
+#version 450
+
+layout(binding = 0, std430) readonly buffer SSBO
+{
+    float values0[];
+} _5;
+
+layout(binding = 1, std430) readonly buffer SSBO1
+{
+    float values1[];
+} _7;
+
+layout(location = 0) out vec2 FragColor;
+
+void main()
+{
+    vec2 _27;
+    _27 = vec2(0.0);
+    vec2 _39;
+    vec2 _40;
+    vec2 _41;
+    for (int _30 = 0; _30 < 16; _39 = _27 * _27, _40 = _39, _40.x = _5.values0[_30], _41 = _40, _41.y = _7.values1[_30], _27 += _41, _30++)
+    {
+    }
+    FragColor = _27;
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-2.asm.frag b/reference/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-2.asm.frag
new file mode 100644
index 00000000000..37b66f8c3d0
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-2.asm.frag
@@ -0,0 +1,29 @@
+#version 450
+
+layout(binding = 0, std430) readonly buffer SSBO
+{
+    float values0[];
+} _5;
+
+layout(binding = 1, std430) readonly buffer SSBO1
+{
+    float values1[];
+} _7;
+
+layout(location = 0) out vec2 FragColor;
+
+void main()
+{
+    vec2 _27;
+    _27 = vec2(0.0);
+    vec2 _42;
+    for (int _30 = 0; _30 < 16; _27 += _42, _30++)
+    {
+        vec2 _40 = _27 * _27;
+        _40.x = _5.values0[_30];
+        _42 = _40;
+        _42.y = _7.values1[_30];
+    }
+    FragColor = _27;
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/composite-insert-inheritance.asm.frag b/reference/shaders-no-opt/asm/frag/composite-insert-inheritance.asm.frag
new file mode 100644
index 00000000000..11c1f4ca3c4
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/composite-insert-inheritance.asm.frag
@@ -0,0 +1,73 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+vec4 _32;
+
+layout(location = 0) in vec4 vInput;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    vec4 _37 = vInput;
+    highp vec4 _38 = _37;
+    _38.x = 1.0;
+    _38.y = 2.0;
+    _38.z = 3.0;
+    _38.w = 4.0;
+    FragColor = _38;
+    vec4 _6 = _37;
+    _6.x = 1.0;
+    _6.y = 2.0;
+    _6.z = 3.0;
+    _6.w = 4.0;
+    FragColor = _6;
+    highp vec4 _42 = _37;
+    _42.x = 1.0;
+    vec4 _10 = _42;
+    _10.y = 2.0;
+    highp vec4 _43 = _10;
+    _43.z = 3.0;
+    vec4 _11 = _43;
+    _11.w = 4.0;
+    FragColor = _11;
+    highp vec4 _44 = _37;
+    _44.x = 1.0;
+    highp vec4 _45 = _44;
+    _45.y = 2.0;
+    vec4 mp_copy_45 = _45;
+    highp vec4 _46 = _45;
+    _46.z = 3.0;
+    highp vec4 _47 = _46;
+    _47.w = 4.0;
+    vec4 mp_copy_47 = _47;
+    FragColor = _47 + _44;
+    FragColor = mp_copy_47 + mp_copy_45;
+    highp vec4 _49;
+    _49.x = 1.0;
+    _49.y = 2.0;
+    _49.z = 3.0;
+    _49.w = 4.0;
+    FragColor = _49;
+    highp vec4 _53 = vec4(0.0);
+    _53.x = 1.0;
+    FragColor = _53;
+    highp vec4 _54[2] = vec4[](vec4(0.0), vec4(0.0));
+    _54[1].z = 1.0;
+    _54[0].w = 2.0;
+    FragColor = _54[0];
+    FragColor = _54[1];
+    highp mat4 _58 = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
+    _58[1].z = 1.0;
+    _58[2].w = 2.0;
+    FragColor = _58[0];
+    FragColor = _58[1];
+    FragColor = _58[2];
+    FragColor = _58[3];
+    highp vec4 PHI;
+    PHI = _46;
+    highp vec4 _65 = PHI;
+    _65.w = 4.0;
+    FragColor = _65;
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/demote-impure-function-call.vk.nocompat.asm.frag.vk b/reference/shaders-no-opt/asm/frag/demote-impure-function-call.vk.nocompat.asm.frag.vk
new file mode 100644
index 00000000000..adde5fcbf46
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/demote-impure-function-call.vk.nocompat.asm.frag.vk
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+layout(location = 0) flat in int vA;
+layout(location = 0) out vec4 FragColor;
+
+vec4 foobar(int a)
+{
+    if (a < 0)
+    {
+        demote;
+    }
+    return vec4(10.0);
+}
+
+void main()
+{
+    int param = vA;
+    vec4 _25 = foobar(param);
+    FragColor = vec4(10.0);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/discard-impure-function-call.asm.frag b/reference/shaders-no-opt/asm/frag/discard-impure-function-call.asm.frag
new file mode 100644
index 00000000000..0fe71f64b44
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/discard-impure-function-call.asm.frag
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) flat in int vA;
+layout(location = 0) out vec4 FragColor;
+
+vec4 foobar(int a)
+{
+    if (a < 0)
+    {
+        discard;
+    }
+    return vec4(10.0);
+}
+
+void main()
+{
+    int param = vA;
+    vec4 _25 = foobar(param);
+    FragColor = vec4(10.0);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/do-while-continue-phi.asm.invalid.frag b/reference/shaders-no-opt/asm/frag/do-while-continue-phi.asm.invalid.frag
new file mode 100644
index 00000000000..2024c302efd
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/do-while-continue-phi.asm.invalid.frag
@@ -0,0 +1,37 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out highp vec4 _GLF_color;
+
+void main()
+{
+    for (;;)
+    {
+        bool _32;
+        for (;;)
+        {
+            if (gl_FragCoord.x != gl_FragCoord.x)
+            {
+                _32 = true;
+                break;
+            }
+            if (false)
+            {
+                continue;
+            }
+            else
+            {
+                _32 = false;
+                break;
+            }
+        }
+        if (_32)
+        {
+            break;
+        }
+        _GLF_color = vec4(1.0, 0.0, 0.0, 1.0);
+        break;
+    }
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/early-conditional-return-switch.asm.frag b/reference/shaders-no-opt/asm/frag/early-conditional-return-switch.asm.frag
new file mode 100644
index 00000000000..b03d5a4d7b1
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/early-conditional-return-switch.asm.frag
@@ -0,0 +1,67 @@
+#version 450
+
+vec4 _32;
+
+layout(binding = 0, std140) uniform type_gCBuffarrayIndex
+{
+    uint gArrayIndex;
+} gCBuffarrayIndex;
+
+uniform sampler2D SPIRV_Cross_Combinedg_textureArray0SPIRV_Cross_DummySampler;
+uniform sampler2D SPIRV_Cross_Combinedg_textureArray1SPIRV_Cross_DummySampler;
+uniform sampler2D SPIRV_Cross_Combinedg_textureArray2SPIRV_Cross_DummySampler;
+uniform sampler2D SPIRV_Cross_Combinedg_textureArray3SPIRV_Cross_DummySampler;
+
+layout(location = 0) out vec4 out_var_SV_TARGET;
+
+void main()
+{
+    vec4 _80;
+    do
+    {
+        vec4 _77;
+        bool _78;
+        switch (gCBuffarrayIndex.gArrayIndex)
+        {
+            case 0u:
+            {
+                _77 = texelFetch(SPIRV_Cross_Combinedg_textureArray0SPIRV_Cross_DummySampler, ivec3(int(gl_FragCoord.x), int(gl_FragCoord.y), 0).xy, 0);
+                _78 = true;
+                break;
+            }
+            case 1u:
+            {
+                _77 = texelFetch(SPIRV_Cross_Combinedg_textureArray1SPIRV_Cross_DummySampler, ivec3(int(gl_FragCoord.x), int(gl_FragCoord.y), 0).xy, 0);
+                _78 = true;
+                break;
+            }
+            case 2u:
+            {
+                _77 = texelFetch(SPIRV_Cross_Combinedg_textureArray2SPIRV_Cross_DummySampler, ivec3(int(gl_FragCoord.x), int(gl_FragCoord.y), 0).xy, 0);
+                _78 = true;
+                break;
+            }
+            case 3u:
+            {
+                _77 = texelFetch(SPIRV_Cross_Combinedg_textureArray3SPIRV_Cross_DummySampler, ivec3(int(gl_FragCoord.x), int(gl_FragCoord.y), 0).xy, 0);
+                _78 = true;
+                break;
+            }
+            default:
+            {
+                _77 = _32;
+                _78 = false;
+                break;
+            }
+        }
+        if (_78)
+        {
+            _80 = _77;
+            break;
+        }
+        _80 = vec4(0.0, 1.0, 0.0, 1.0);
+        break;
+    } while(false);
+    out_var_SV_TARGET = _80;
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/empty-struct-in-struct.asm.frag b/reference/shaders-no-opt/asm/frag/empty-struct-in-struct.asm.frag
new file mode 100644
index 00000000000..0d3958b5b08
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/empty-struct-in-struct.asm.frag
@@ -0,0 +1,31 @@
+#version 450
+
+struct EmptyStructTest
+{
+    int empty_struct_member;
+};
+
+struct EmptyStruct2Test
+{
+    EmptyStructTest _m0;
+};
+
+float GetValue(EmptyStruct2Test self)
+{
+    return 0.0;
+}
+
+float GetValue_1(EmptyStruct2Test self)
+{
+    return 0.0;
+}
+
+void main()
+{
+    EmptyStructTest _25 = EmptyStructTest(0);
+    EmptyStruct2Test emptyStruct;
+    float value = GetValue(emptyStruct);
+    value = GetValue_1(EmptyStruct2Test(_25));
+    value = GetValue_1(EmptyStruct2Test(EmptyStructTest(0)));
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.frag b/reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.invalid.frag
similarity index 100%
rename from reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.frag
rename to reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.invalid.frag
diff --git a/reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.frag b/reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.invalid.frag
similarity index 100%
rename from reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.frag
rename to reference/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.invalid.frag
diff --git a/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag b/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
index 01797173f1b..874bc6de137 100644
--- a/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
+++ b/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
@@ -61,6 +61,15 @@ struct Params
     vec4 LqmatFarTilingFactor;
 };
 
+VertexOutput _121;
+SurfaceInput _122;
+vec2 _123;
+vec4 _124;
+Surface _125;
+vec4 _192;
+vec4 _219;
+vec4 _297;
+
 layout(binding = 0, std140) uniform CB0
 {
     Globals CB0;
@@ -86,51 +95,31 @@ layout(location = 7) in vec4 IN_PosLightSpace_Reflectance;
 layout(location = 8) in float IN_studIndex;
 layout(location = 0) out vec4 _entryPointOutput;
 
-VertexOutput _121;
-SurfaceInput _122;
-vec2 _123;
-vec4 _124;
-Surface _125;
-vec4 _192;
-vec4 _219;
-vec4 _297;
-
 void main()
 {
-    VertexOutput _128 = _121;
+    VertexOutput _128;
     _128.HPosition = gl_FragCoord;
-    VertexOutput _130 = _128;
-    _130.Uv_EdgeDistance1 = IN_Uv_EdgeDistance1;
-    VertexOutput _132 = _130;
-    _132.UvStuds_EdgeDistance2 = IN_UvStuds_EdgeDistance2;
-    VertexOutput _134 = _132;
-    _134.Color = IN_Color;
-    VertexOutput _136 = _134;
-    _136.LightPosition_Fog = IN_LightPosition_Fog;
-    VertexOutput _138 = _136;
-    _138.View_Depth = IN_View_Depth;
-    VertexOutput _140 = _138;
-    _140.Normal_SpecPower = IN_Normal_SpecPower;
-    VertexOutput _142 = _140;
-    _142.Tangent = IN_Tangent;
-    VertexOutput _144 = _142;
-    _144.PosLightSpace_Reflectance = IN_PosLightSpace_Reflectance;
-    VertexOutput _146 = _144;
-    _146.studIndex = IN_studIndex;
-    SurfaceInput _147 = _122;
+    _128.Uv_EdgeDistance1 = IN_Uv_EdgeDistance1;
+    _128.UvStuds_EdgeDistance2 = IN_UvStuds_EdgeDistance2;
+    _128.Color = IN_Color;
+    _128.LightPosition_Fog = IN_LightPosition_Fog;
+    _128.View_Depth = IN_View_Depth;
+    _128.Normal_SpecPower = IN_Normal_SpecPower;
+    _128.Tangent = IN_Tangent;
+    _128.PosLightSpace_Reflectance = IN_PosLightSpace_Reflectance;
+    _128.studIndex = IN_studIndex;
+    SurfaceInput _147;
     _147.Color = IN_Color;
-    SurfaceInput _149 = _147;
-    _149.Uv = IN_Uv_EdgeDistance1.xy;
-    SurfaceInput _151 = _149;
-    _151.UvStuds = IN_UvStuds_EdgeDistance2.xy;
-    SurfaceInput _156 = _151;
-    _156.UvStuds.y = (fract(_151.UvStuds.y) + IN_studIndex) * 0.25;
-    float _163 = _146.View_Depth.w * _19.CB0.RefractionBias_FadeDistance_GlowFactor.y;
+    _147.Uv = IN_Uv_EdgeDistance1.xy;
+    _147.UvStuds = IN_UvStuds_EdgeDistance2.xy;
+    _147.UvStuds.y = (fract(_147.UvStuds.y) + IN_studIndex) * 0.25;
+    float _160 = clamp(1.0 - (_128.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0);
+    float _163 = _128.View_Depth.w * _19.CB0.RefractionBias_FadeDistance_GlowFactor.y;
     float _165 = clamp(1.0 - _163, 0.0, 1.0);
     vec2 _166 = IN_Uv_EdgeDistance1.xy * 1.0;
     bool _173;
     vec4 _193;
-    do
+    for (;;)
     {
         _173 = 0.0 == 0.0;
         if (_173)
@@ -141,15 +130,14 @@ void main()
         else
         {
             float _180 = 1.0 / (1.0 - 0.0);
-            _193 = mix(texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166), vec4(clamp((clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0) * _180) - (0.0 * _180), 0.0, 1.0)));
+            _193 = mix(texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166), vec4(clamp((_160 * _180) - (0.0 * _180), 0.0, 1.0)));
             break;
         }
         _193 = _192;
         break;
-    } while (false);
-    vec4 _194 = _193 * 1.0;
+    }
     vec4 _220;
-    do
+    for (;;)
     {
         if (_173)
         {
@@ -164,18 +152,20 @@ void main()
         }
         _220 = _219;
         break;
-    } while (false);
+    }
     vec2 _223 = vec2(1.0);
     vec2 _224 = (_220.wy * 2.0) - _223;
     vec3 _232 = vec3(_224, sqrt(clamp(1.0 + dot(-_224, _224), 0.0, 1.0)));
-    vec2 _240 = (texture(SPIRV_Cross_CombinedNormalDetailMapTextureNormalDetailMapSampler, _166 * 0.0).wy * 2.0) - _223;
+    vec4 _237 = texture(SPIRV_Cross_CombinedNormalDetailMapTextureNormalDetailMapSampler, _166 * 0.0);
+    vec2 _240 = (_237.wy * 2.0) - _223;
     vec2 _252 = _232.xy + (vec3(_240, sqrt(clamp(1.0 + dot(-_240, _240), 0.0, 1.0))).xy * 0.0);
     vec3 _253 = vec3(_252.x, _252.y, _232.z);
     vec2 _255 = _253.xy * _165;
     vec3 _256 = vec3(_255.x, _255.y, _253.z);
-    vec3 _271 = ((IN_Color.xyz * _194.xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (texture(SPIRV_Cross_CombinedStudsMapTextureStudsMapSampler, _156.UvStuds).x * 2.0);
+    vec4 _268 = texture(SPIRV_Cross_CombinedStudsMapTextureStudsMapSampler, _147.UvStuds);
+    vec3 _271 = ((IN_Color.xyz * (_193 * 1.0).xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (_268.x * 2.0);
     vec4 _298;
-    do
+    for (;;)
     {
         if (0.75 == 0.0)
         {
@@ -190,23 +180,19 @@ void main()
         }
         _298 = _297;
         break;
-    } while (false);
+    }
     vec2 _303 = mix(vec2(0.800000011920928955078125, 120.0), (_298.xy * vec2(2.0, 256.0)) + vec2(0.0, 0.00999999977648258209228515625), vec2(_165));
-    Surface _304 = _125;
+    Surface _304;
     _304.albedo = _271;
-    Surface _305 = _304;
-    _305.normal = _256;
+    _304.normal = _256;
     float _306 = _303.x;
-    Surface _307 = _305;
-    _307.specular = _306;
+    _304.specular = _306;
     float _308 = _303.y;
-    Surface _309 = _307;
-    _309.gloss = _308;
+    _304.gloss = _308;
     float _312 = (_298.xy.y * _165) * 0.0;
-    Surface _313 = _309;
-    _313.reflectance = _312;
-    vec4 _318 = vec4(_271, _146.Color.w);
-    vec3 _329 = normalize(((IN_Tangent * _313.normal.x) + (cross(IN_Normal_SpecPower.xyz, IN_Tangent) * _313.normal.y)) + (IN_Normal_SpecPower.xyz * _313.normal.z));
+    _304.reflectance = _312;
+    vec4 _318 = vec4(_271, _128.Color.w);
+    vec3 _329 = normalize(((IN_Tangent * _304.normal.x) + (cross(IN_Normal_SpecPower.xyz, IN_Tangent) * _304.normal.y)) + (IN_Normal_SpecPower.xyz * _304.normal.z));
     vec3 _332 = -_19.CB0.Lamp0Dir;
     float _333 = dot(_329, _332);
     float _357 = clamp(dot(step(_19.CB0.LightConfig3.xyz, abs(IN_LightPosition_Fog.xyz - _19.CB0.LightConfig2.xyz)), vec3(1.0)), 0.0, 1.0);
@@ -214,15 +200,14 @@ void main()
     vec2 _376 = texture(SPIRV_Cross_CombinedShadowMapTextureShadowMapSampler, IN_PosLightSpace_Reflectance.xyz.xy).xy;
     float _392 = (1.0 - (((step(_376.x, IN_PosLightSpace_Reflectance.xyz.z) * clamp(9.0 - (20.0 * abs(IN_PosLightSpace_Reflectance.xyz.z - 0.5)), 0.0, 1.0)) * _376.y) * _19.CB0.OutlineBrightness_ShadowInfo.w)) * _368.w;
     vec3 _403 = mix(_318.xyz, texture(SPIRV_Cross_CombinedEnvironmentMapTextureEnvironmentMapSampler, reflect(-IN_View_Depth.xyz, _329)).xyz, vec3(_312));
-    vec4 _404 = vec4(_403.x, _403.y, _403.z, _318.w);
-    vec3 _422 = (((_19.CB0.AmbientColor + (((_19.CB0.Lamp0Color * clamp(_333, 0.0, 1.0)) + (_19.CB0.Lamp1Color * max(-_333, 0.0))) * _392)) + _368.xyz) * _404.xyz) + (_19.CB0.Lamp0Color * (((step(0.0, _333) * _306) * _392) * pow(clamp(dot(_329, normalize(_332 + normalize(IN_View_Depth.xyz))), 0.0, 1.0), _308)));
-    vec4 _425 = vec4(_422.x, _422.y, _422.z, _124.w);
-    _425.w = _404.w;
+    vec3 _422 = (((_19.CB0.AmbientColor + (((_19.CB0.Lamp0Color * clamp(_333, 0.0, 1.0)) + (_19.CB0.Lamp1Color * max(-_333, 0.0))) * _392)) + _368.xyz) * vec4(_403.x, _403.y, _403.z, _318.w).xyz) + (_19.CB0.Lamp0Color * (((step(0.0, _333) * _306) * _392) * pow(clamp(dot(_329, normalize(_332 + normalize(IN_View_Depth.xyz))), 0.0, 1.0), _308)));
+    vec4 _423 = vec4(_422.x, _422.y, _422.z, _124.w);
+    _423.w = vec4(_403.x, _403.y, _403.z, _318.w).w;
     vec2 _435 = min(IN_Uv_EdgeDistance1.wz, IN_UvStuds_EdgeDistance2.wz);
     float _439 = min(_435.x, _435.y) / _163;
-    vec3 _445 = _425.xyz * clamp((clamp((_163 * _19.CB0.OutlineBrightness_ShadowInfo.x) + _19.CB0.OutlineBrightness_ShadowInfo.y, 0.0, 1.0) * (1.5 - _439)) + _439, 0.0, 1.0);
-    vec4 _446 = vec4(_445.x, _445.y, _445.z, _425.w);
-    vec3 _453 = mix(_19.CB0.FogColor, _446.xyz, vec3(clamp(_146.LightPosition_Fog.w, 0.0, 1.0)));
+    vec3 _445 = _423.xyz * clamp((clamp((_163 * _19.CB0.OutlineBrightness_ShadowInfo.x) + _19.CB0.OutlineBrightness_ShadowInfo.y, 0.0, 1.0) * (1.5 - _439)) + _439, 0.0, 1.0);
+    vec4 _446 = vec4(_445.x, _445.y, _445.z, _423.w);
+    vec3 _453 = mix(_19.CB0.FogColor, _446.xyz, vec3(clamp(_128.LightPosition_Fog.w, 0.0, 1.0)));
     _entryPointOutput = vec4(_453.x, _453.y, _453.z, _446.w);
 }
 
diff --git a/reference/shaders-no-opt/asm/frag/late-expression-invalidation-2.asm.frag b/reference/shaders-no-opt/asm/frag/late-expression-invalidation-2.asm.frag
new file mode 100644
index 00000000000..ed853d0125c
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/late-expression-invalidation-2.asm.frag
@@ -0,0 +1,263 @@
+#version 320 es
+#if defined(GL_EXT_control_flow_attributes)
+#extension GL_EXT_control_flow_attributes : require
+#define SPIRV_CROSS_FLATTEN [[flatten]]
+#define SPIRV_CROSS_BRANCH [[dont_flatten]]
+#define SPIRV_CROSS_UNROLL [[unroll]]
+#define SPIRV_CROSS_LOOP [[dont_unroll]]
+#else
+#define SPIRV_CROSS_FLATTEN
+#define SPIRV_CROSS_BRANCH
+#define SPIRV_CROSS_UNROLL
+#define SPIRV_CROSS_LOOP
+#endif
+precision mediump float;
+precision highp int;
+
+layout(binding = 0, std140) uniform buf0
+{
+    highp vec2 resolution;
+} _7;
+
+layout(location = 0) out highp vec4 _GLF_color;
+int map[256];
+highp mat2x4 _60 = mat2x4(vec4(0.0), vec4(0.0));
+
+void main()
+{
+    int _65 = 256 - 14;
+    int _68 = -_65;
+    highp vec2 pos = gl_FragCoord.xy / _7.resolution;
+    ivec2 ipos = ivec2(int(pos.x * 16.0), int(pos.y * 16.0));
+    int i = 0;
+    for (; i < 256; i++)
+    {
+        map[i] = 0;
+    }
+    ivec2 p = ivec2(0);
+    int v = 0;
+    bool canwalk = true;
+    do
+    {
+        v++;
+        int directions = 0;
+        bool _98 = p.x > 0;
+        bool _111;
+        if (_98)
+        {
+            _111 = map[(p.x - 2) + (p.y * 16)] == 0;
+        }
+        else
+        {
+            _111 = _98;
+        }
+        if (_111)
+        {
+            directions++;
+        }
+        bool _118 = p.y > 0;
+        bool _131;
+        if (_118)
+        {
+            _131 = map[p.x + ((p.y - 2) * 16)] == 0;
+        }
+        else
+        {
+            _131 = _118;
+        }
+        if (_131)
+        {
+            directions++;
+        }
+        bool _138 = p.x < 14;
+        bool _151;
+        if (_138)
+        {
+            _151 = map[(p.x + 2) + (p.y * 16)] == 0;
+        }
+        else
+        {
+            _151 = _138;
+        }
+        if (_151)
+        {
+            directions++;
+        }
+        int _156 = 256 - _68;
+        bool _159 = p.y < 14;
+        bool _172;
+        if (_159)
+        {
+            _172 = map[p.x + ((p.y + 2) * 16)] == 0;
+        }
+        else
+        {
+            _172 = _159;
+        }
+        if (_172)
+        {
+            directions++;
+        }
+        if (directions == 0)
+        {
+            canwalk = false;
+            i = 0;
+            for (;;)
+            {
+                int _186 = i;
+                if (_186 < 8)
+                {
+                    int j = 0;
+                    _60 = mat2x4(vec4(0.0), vec4(0.0));
+                    if (false)
+                    {
+                        int _216 = i;
+                        i = _216 + 1;
+                        continue;
+                    }
+                    else
+                    {
+                        SPIRV_CROSS_UNROLL
+                        for (; j < 8; j++)
+                        {
+                            if (map[(j * 2) + ((i * 2) * 16)] == 0)
+                            {
+                                p.x = j * 2;
+                                p.y = i * 2;
+                                canwalk = true;
+                            }
+                        }
+                        int _216 = i;
+                        i = _216 + 1;
+                        continue;
+                    }
+                }
+                else
+                {
+                    break;
+                }
+            }
+            map[p.x + (p.y * 16)] = 1;
+        }
+        else
+        {
+            int d = v % directions;
+            v += directions;
+            bool _232 = d >= 0;
+            bool _238;
+            if (_232)
+            {
+                _238 = p.x > 0;
+            }
+            else
+            {
+                _238 = _232;
+            }
+            bool _251;
+            if (_238)
+            {
+                _251 = map[(p.x - 2) + (p.y * 16)] == 0;
+            }
+            else
+            {
+                _251 = _238;
+            }
+            if (_251)
+            {
+                d--;
+                map[p.x + (p.y * 16)] = 1;
+                map[(p.x - 1) + (p.y * 16)] = 1;
+                map[(p.x - 2) + (p.y * 16)] = 1;
+                p.x -= 2;
+            }
+            bool _284 = d >= 0;
+            bool _290;
+            if (_284)
+            {
+                _290 = p.y > 0;
+            }
+            else
+            {
+                _290 = _284;
+            }
+            bool _303;
+            if (_290)
+            {
+                _303 = map[p.x + ((p.y - 2) * 16)] == 0;
+            }
+            else
+            {
+                _303 = _290;
+            }
+            if (_303)
+            {
+                d--;
+                map[p.x + (p.y * 16)] = 1;
+                map[p.x + ((p.y - 1) * 16)] = 1;
+                map[p.x + ((p.y - 2) * 16)] = 1;
+                p.y -= 2;
+            }
+            bool _336 = d >= 0;
+            bool _342;
+            if (_336)
+            {
+                _342 = p.x < 14;
+            }
+            else
+            {
+                _342 = _336;
+            }
+            bool _355;
+            if (_342)
+            {
+                _355 = map[(p.x + 2) + (p.y * 16)] == 0;
+            }
+            else
+            {
+                _355 = _342;
+            }
+            if (_355)
+            {
+                d--;
+                map[p.x + (p.y * 16)] = 1;
+                map[(p.x + 1) + (p.y * 16)] = 1;
+                map[(p.x + 2) + (p.y * 16)] = 1;
+                p.x += 2;
+            }
+            bool _388 = d >= 0;
+            bool _394;
+            if (_388)
+            {
+                _394 = p.y < 14;
+            }
+            else
+            {
+                _394 = _388;
+            }
+            bool _407;
+            if (_394)
+            {
+                _407 = map[p.x + ((p.y + 2) * 16)] == 0;
+            }
+            else
+            {
+                _407 = _394;
+            }
+            if (_407)
+            {
+                d--;
+                map[p.x + (p.y * 16)] = 1;
+                map[p.x + ((p.y + 1) * 16)] = 1;
+                map[p.x + ((p.y + 2) * 16)] = 1;
+                p.y += 2;
+            }
+        }
+        if (map[(ipos.y * 16) + ipos.x] == 1)
+        {
+            _GLF_color = vec4(1.0);
+            return;
+        }
+    } while (canwalk);
+    _GLF_color = vec4(0.0, 0.0, 0.0, 1.0);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/late-expression-invalidation.asm.frag b/reference/shaders-no-opt/asm/frag/late-expression-invalidation.asm.frag
new file mode 100644
index 00000000000..6522c651c8f
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/late-expression-invalidation.asm.frag
@@ -0,0 +1,54 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+const mat4 _34[4] = mat4[](mat4(vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0)), mat4(vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0)), mat4(vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0)), mat4(vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0)));
+
+layout(location = 0) out highp vec4 _GLF_color;
+
+void main()
+{
+    for (;;)
+    {
+        if (gl_FragCoord.x < 10.0)
+        {
+            _GLF_color = vec4(1.0, 0.0, 0.0, 1.0);
+            break;
+        }
+        for (int _46 = 0; _46 < 4; _46++)
+        {
+            int _53;
+            _53 = 0;
+            bool _56;
+            for (;;)
+            {
+                _56 = _53 < 4;
+                if (_56)
+                {
+                    if (distance(vec2(1.0), vec2(1.0) / vec2(_34[int(_56)][_46].w)) < 1.0)
+                    {
+                        _GLF_color = vec4(1.0);
+                        int _54 = _53 + 1;
+                        _53 = _54;
+                        continue;
+                    }
+                    else
+                    {
+                        int _54 = _53 + 1;
+                        _53 = _54;
+                        continue;
+                    }
+                    int _54 = _53 + 1;
+                    _53 = _54;
+                    continue;
+                }
+                else
+                {
+                    break;
+                }
+            }
+        }
+        break;
+    }
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/ldexp-uint-exponent.asm.frag b/reference/shaders-no-opt/asm/frag/ldexp-uint-exponent.asm.frag
new file mode 100644
index 00000000000..4ce9b253578
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/ldexp-uint-exponent.asm.frag
@@ -0,0 +1,13 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out highp vec4 _GLF_color;
+
+void main()
+{
+    mediump uvec4 _4 = uvec4(bitCount(uvec4(1u)));
+    uvec4 hp_copy_4 = _4;
+    _GLF_color = ldexp(vec4(1.0), ivec4(hp_copy_4));
+}
+
diff --git a/reference/shaders/asm/frag/loop-merge-to-continue.asm.frag b/reference/shaders-no-opt/asm/frag/loop-merge-to-continue.asm.invalid.frag
similarity index 100%
rename from reference/shaders/asm/frag/loop-merge-to-continue.asm.frag
rename to reference/shaders-no-opt/asm/frag/loop-merge-to-continue.asm.invalid.frag
diff --git a/reference/shaders-no-opt/asm/frag/nonuniform-bracket-handling-2.vk.nocompat.asm.frag.vk b/reference/shaders-no-opt/asm/frag/nonuniform-bracket-handling-2.vk.nocompat.asm.frag.vk
new file mode 100644
index 00000000000..d2f964674f0
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/nonuniform-bracket-handling-2.vk.nocompat.asm.frag.vk
@@ -0,0 +1,20 @@
+#version 450
+#extension GL_EXT_nonuniform_qualifier : require
+
+layout(set = 0, binding = 0, std430) readonly buffer SSBO
+{
+    uint indices[];
+} _8;
+
+layout(set = 0, binding = 0) uniform sampler2D uSamplers[];
+layout(set = 1, binding = 0) uniform sampler2D uSampler;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec2 vUV;
+
+void main()
+{
+    FragColor = textureLod(uSamplers[nonuniformEXT(_8.indices[10])], vUV, 0.0);
+    FragColor += textureLod(uSampler, vUV, float(_8.indices[int(gl_FragCoord.y)]));
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk b/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk
index 5f7ddeee17a..289f576f6dd 100644
--- a/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk
+++ b/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk
@@ -24,7 +24,7 @@ void main()
     int i = vIndex;
     int _59 = i + 10;
     int _64 = i + 40;
-    FragColor = texture(sampler2D(uSamplers[nonuniformEXT(_59)], uSamps[nonuniformEXT(_64)]), vUV);
+    FragColor = texture(nonuniformEXT(sampler2D(uSamplers[_59], uSamps[_64])), vUV);
     int _71 = i + 10;
     FragColor = texture(uCombinedSamplers[nonuniformEXT(_71)], vUV);
     int _77 = i + 20;
diff --git a/reference/shaders-no-opt/asm/frag/nonuniform-ssbo.nocompat.vk.asm.frag.vk b/reference/shaders-no-opt/asm/frag/nonuniform-ssbo.nocompat.vk.asm.frag.vk
new file mode 100644
index 00000000000..2d98ec5fdf2
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/nonuniform-ssbo.nocompat.vk.asm.frag.vk
@@ -0,0 +1,24 @@
+#version 450
+#extension GL_EXT_nonuniform_qualifier : require
+
+layout(set = 0, binding = 3, std430) buffer SSBO
+{
+    uint counter;
+    vec4 v[];
+} ssbos[];
+
+layout(location = 0) flat in int vIndex;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    int i = vIndex;
+    int _42 = i + 60;
+    int _45 = i + 70;
+    ssbos[nonuniformEXT(_42)].v[_45] = vec4(20.0);
+    int _48 = i + 100;
+    uint _49 = atomicAdd(ssbos[nonuniformEXT(_48)].counter, 100u);
+    int _51 = i;
+    FragColor.z += float(int(uint(ssbos[nonuniformEXT(_51)].v.length())));
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/only-initializer-frag-depth.asm.frag b/reference/shaders-no-opt/asm/frag/only-initializer-frag-depth.asm.frag
new file mode 100644
index 00000000000..1041f711f8a
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/only-initializer-frag-depth.asm.frag
@@ -0,0 +1,8 @@
+#version 450
+
+const float _3_init = 0.5;
+void main()
+{
+    gl_FragDepth = _3_init;
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/phi.zero-initialize.asm.frag b/reference/shaders-no-opt/asm/frag/phi.zero-initialize.asm.frag
new file mode 100644
index 00000000000..59bac994549
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/phi.zero-initialize.asm.frag
@@ -0,0 +1,30 @@
+#version 450
+
+int uninit_int = 0;
+ivec4 uninit_vector = ivec4(0);
+mat4 uninit_matrix = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
+
+struct Foo
+{
+    int a;
+};
+
+Foo uninit_foo = Foo(0);
+
+layout(location = 0) in vec4 vColor;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    int _39 = 0;
+    if (vColor.x > 10.0)
+    {
+        _39 = 10;
+    }
+    else
+    {
+        _39 = 20;
+    }
+    FragColor = vColor;
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag
new file mode 100644
index 00000000000..7ba3fd5581e
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag
@@ -0,0 +1,51 @@
+#version 450
+#ifdef GL_ARB_fragment_shader_interlock
+#extension GL_ARB_fragment_shader_interlock : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
+#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
+#elif defined(GL_INTEL_fragment_shader_ordering)
+#extension GL_INTEL_fragment_shader_ordering : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
+#define SPIRV_Cross_endInvocationInterlock()
+#endif
+#if defined(GL_ARB_fragment_shader_interlock)
+layout(pixel_interlock_ordered) in;
+#elif !defined(GL_INTEL_fragment_shader_ordering)
+#error Fragment Shader Interlock/Ordering extension missing!
+#endif
+
+layout(binding = 1, std430) buffer SSBO1
+{
+    uint values1[];
+} _7;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+    uint values0[];
+} _9;
+
+void callee2()
+{
+    int _31 = int(gl_FragCoord.x);
+    _7.values1[_31]++;
+}
+
+void callee()
+{
+    int _39 = int(gl_FragCoord.x);
+    _9.values0[_39]++;
+    callee2();
+}
+
+void spvMainInterlockedBody()
+{
+    callee();
+}
+
+void main()
+{
+    // Interlocks were used in a way not compatible with GLSL, this is very slow.
+    SPIRV_Cross_beginInvocationInterlock();
+    spvMainInterlockedBody();
+    SPIRV_Cross_endInvocationInterlock();
+}
diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag
new file mode 100644
index 00000000000..3575e02c8b0
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag
@@ -0,0 +1,65 @@
+#version 450
+#ifdef GL_ARB_fragment_shader_interlock
+#extension GL_ARB_fragment_shader_interlock : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
+#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
+#elif defined(GL_INTEL_fragment_shader_ordering)
+#extension GL_INTEL_fragment_shader_ordering : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
+#define SPIRV_Cross_endInvocationInterlock()
+#endif
+#if defined(GL_ARB_fragment_shader_interlock)
+layout(pixel_interlock_ordered) in;
+#elif !defined(GL_INTEL_fragment_shader_ordering)
+#error Fragment Shader Interlock/Ordering extension missing!
+#endif
+
+layout(binding = 1, std430) buffer SSBO1
+{
+    uint values1[];
+} _7;
+
+layout(binding = 2, std430) buffer _12_13
+{
+    uint _m0[];
+} _13;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+    uint values0[];
+} _9;
+
+void callee2()
+{
+    int _44 = int(gl_FragCoord.x);
+    _7.values1[_44]++;
+}
+
+void callee()
+{
+    int _52 = int(gl_FragCoord.x);
+    _9.values0[_52]++;
+    callee2();
+    if (true)
+    {
+    }
+}
+
+void _35()
+{
+    _13._m0[int(gl_FragCoord.x)] = 4u;
+}
+
+void spvMainInterlockedBody()
+{
+    callee();
+    _35();
+}
+
+void main()
+{
+    // Interlocks were used in a way not compatible with GLSL, this is very slow.
+    SPIRV_Cross_beginInvocationInterlock();
+    spvMainInterlockedBody();
+    SPIRV_Cross_endInvocationInterlock();
+}
diff --git a/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag b/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag
new file mode 100644
index 00000000000..806eedf9fbe
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag
@@ -0,0 +1,61 @@
+#version 450
+#ifdef GL_ARB_fragment_shader_interlock
+#extension GL_ARB_fragment_shader_interlock : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
+#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
+#elif defined(GL_INTEL_fragment_shader_ordering)
+#extension GL_INTEL_fragment_shader_ordering : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
+#define SPIRV_Cross_endInvocationInterlock()
+#endif
+#if defined(GL_ARB_fragment_shader_interlock)
+layout(pixel_interlock_ordered) in;
+#elif !defined(GL_INTEL_fragment_shader_ordering)
+#error Fragment Shader Interlock/Ordering extension missing!
+#endif
+
+layout(binding = 1, std430) buffer SSBO1
+{
+    uint values1[];
+} _7;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+    uint values0[];
+} _9;
+
+void callee2()
+{
+    int _37 = int(gl_FragCoord.x);
+    _7.values1[_37]++;
+}
+
+void callee()
+{
+    int _45 = int(gl_FragCoord.x);
+    _9.values0[_45]++;
+    callee2();
+}
+
+void _29()
+{
+}
+
+void _31()
+{
+}
+
+void spvMainInterlockedBody()
+{
+    callee();
+    _29();
+    _31();
+}
+
+void main()
+{
+    // Interlocks were used in a way not compatible with GLSL, this is very slow.
+    SPIRV_Cross_beginInvocationInterlock();
+    spvMainInterlockedBody();
+    SPIRV_Cross_endInvocationInterlock();
+}
diff --git a/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporaries.asm.frag b/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporaries.asm.frag
new file mode 100644
index 00000000000..6078efae88d
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporaries.asm.frag
@@ -0,0 +1,23 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) in vec4 vColor;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    float a = vColor.x;
+    highp float b = vColor.y;
+    int i = 0;
+    float _14;
+    highp float hp_copy_14;
+    float _15;
+    highp float hp_copy_15;
+    for (; i < 4; i++, _14 = a, hp_copy_14 = _14, _15 = a * _14, hp_copy_15 = _15, b += (hp_copy_15 * hp_copy_14))
+    {
+        FragColor += vec4(1.0);
+    }
+    FragColor += vec4(b);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporary.asm.frag b/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporary.asm.frag
new file mode 100644
index 00000000000..58de92a45b7
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporary.asm.frag
@@ -0,0 +1,19 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) in float vColor;
+layout(location = 0) out float FragColor;
+
+void main()
+{
+    float b;
+    highp float hp_copy_b;
+    do
+    {
+        b = vColor * vColor;
+        hp_copy_b = b;
+    } while (false);
+    FragColor = hp_copy_b * hp_copy_b;
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules.asm.frag b/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules.asm.frag
new file mode 100644
index 00000000000..b0b3a8dbe91
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules.asm.frag
@@ -0,0 +1,53 @@
+#version 320 es
+precision mediump float;
+precision highp int;
+
+layout(binding = 0, std140) uniform UBO
+{
+    float mediump_float;
+    highp float highp_float;
+} ubo;
+
+layout(location = 0) out vec4 FragColor0;
+layout(location = 1) out vec4 FragColor1;
+layout(location = 2) out vec4 FragColor2;
+layout(location = 3) out vec4 FragColor3;
+layout(location = 0) in vec4 V4;
+
+void main()
+{
+    vec4 V4_value0 = V4;
+    highp vec4 hp_copy_V4_value0 = V4_value0;
+    float V1_value0 = V4.x;
+    highp float hp_copy_V1_value0 = V1_value0;
+    float V1_value2 = V4_value0.z;
+    highp float hp_copy_V1_value2 = V1_value2;
+    float ubo_mp0 = ubo.mediump_float;
+    highp float hp_copy_ubo_mp0 = ubo_mp0;
+    highp float ubo_hp0 = ubo.highp_float;
+    float mp_copy_ubo_hp0 = ubo_hp0;
+    highp vec4 _48 = hp_copy_V4_value0 - vec4(3.0);
+    vec4 mp_copy_48 = _48;
+    FragColor0 = V4_value0 + vec4(3.0);
+    FragColor1 = _48;
+    FragColor2 = mp_copy_48 * vec4(3.0);
+    float _21 = V1_value0 + 3.0;
+    float float_0_weird = 3.0 - mp_copy_ubo_hp0;
+    highp float hp_copy_float_0_weird = float_0_weird;
+    highp float _49 = hp_copy_V1_value0 - hp_copy_float_0_weird;
+    float mp_copy_49 = _49;
+    FragColor3 = vec4(_21, _49, mp_copy_49 * mp_copy_ubo_hp0, 3.0);
+    highp float _51 = hp_copy_V1_value2 - hp_copy_ubo_mp0;
+    float mp_copy_51 = _51;
+    FragColor3 = vec4(V4_value0.z + ubo_mp0, _51, mp_copy_51 * mp_copy_ubo_hp0, 3.0);
+    FragColor0 = sin(hp_copy_V4_value0);
+    FragColor1 = sin(V4_value0);
+    float phi_mp;
+    highp float phi_hp;
+    phi_mp = _21;
+    phi_hp = _49;
+    highp float hp_copy_phi_mp = phi_mp;
+    float mp_copy_phi_hp = phi_hp;
+    FragColor2 = vec4(phi_mp + phi_mp, hp_copy_phi_mp + hp_copy_phi_mp, mp_copy_phi_hp + mp_copy_phi_hp, phi_hp + phi_hp);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/reserved-function-identifier.asm.frag b/reference/shaders-no-opt/asm/frag/reserved-function-identifier.asm.frag
new file mode 100644
index 00000000000..52f0c616617
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/reserved-function-identifier.asm.frag
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) out float FragColor;
+
+float _mat3(float a)
+{
+    return a + 1.0;
+}
+
+float _RESERVED_IDENTIFIER_FIXUP_gl_Foo(int a)
+{
+    return float(a) + 1.0;
+}
+
+void main()
+{
+    float param = 2.0;
+    int param_1 = 4;
+    FragColor = _mat3(param) + _RESERVED_IDENTIFIER_FIXUP_gl_Foo(param_1);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/reserved-identifiers.asm.frag b/reference/shaders-no-opt/asm/frag/reserved-identifiers.asm.frag
new file mode 100644
index 00000000000..5d75a44a334
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/reserved-identifiers.asm.frag
@@ -0,0 +1,17 @@
+#version 450
+
+layout(location = 0) out vec4 _RESERVED_IDENTIFIER_FIXUP_spvFoo;
+layout(location = 1) out vec4 SPIRV_Cross_blah;
+layout(location = 2) out vec4 _40Bar;
+layout(location = 3) out vec4 _m40;
+layout(location = 4) out vec4 _underscore_foo_bar_meep_;
+
+void main()
+{
+    _RESERVED_IDENTIFIER_FIXUP_spvFoo = vec4(0.0);
+    SPIRV_Cross_blah = vec4(1.0);
+    _40Bar = vec4(2.0);
+    _m40 = vec4(3.0);
+    _underscore_foo_bar_meep_ = vec4(4.0);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/scalar-select.spv14.asm.frag b/reference/shaders-no-opt/asm/frag/scalar-select.spv14.asm.frag
new file mode 100644
index 00000000000..d74286a1536
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/scalar-select.spv14.asm.frag
@@ -0,0 +1,19 @@
+#version 450
+
+struct _15
+{
+    float _m0;
+};
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = false ? vec4(1.0, 1.0, 0.0, 1.0) : vec4(0.0, 0.0, 0.0, 1.0);
+    FragColor = vec4(false);
+    FragColor = mix(vec4(0.0, 0.0, 0.0, 1.0), vec4(1.0, 1.0, 0.0, 1.0), bvec4(false, true, false, true));
+    FragColor = vec4(bvec4(false, true, false, true));
+    _15 _32 = false ? _15(0.0) : _15(1.0);
+    float _33[2] = true ? float[](0.0, 1.0) : float[](1.0, 0.0);
+}
+
diff --git a/reference/shaders/asm/frag/selection-merge-to-continue.asm.frag b/reference/shaders-no-opt/asm/frag/selection-merge-to-continue.asm.invalid.frag
similarity index 88%
rename from reference/shaders/asm/frag/selection-merge-to-continue.asm.frag
rename to reference/shaders-no-opt/asm/frag/selection-merge-to-continue.asm.invalid.frag
index 82b5973f8af..edbce0ccafb 100644
--- a/reference/shaders/asm/frag/selection-merge-to-continue.asm.frag
+++ b/reference/shaders-no-opt/asm/frag/selection-merge-to-continue.asm.invalid.frag
@@ -11,12 +11,10 @@ void main()
         if (v0.x == 20.0)
         {
             FragColor += vec4(v0[i & 3]);
-            continue;
         }
         else
         {
             FragColor += vec4(v0[i & 1]);
-            continue;
         }
     }
 }
diff --git a/reference/shaders-no-opt/asm/frag/sparse-texture-feedback-uint-code.asm.desktop.frag b/reference/shaders-no-opt/asm/frag/sparse-texture-feedback-uint-code.asm.desktop.frag
new file mode 100644
index 00000000000..540978c4340
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/sparse-texture-feedback-uint-code.asm.desktop.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_sparse_texture2 : require
+
+struct ResType
+{
+    uint _m0;
+    vec4 _m1;
+};
+
+layout(binding = 0) uniform sampler2D uSamp;
+
+layout(location = 0) in vec2 vUV;
+
+void main()
+{
+    uint _30;
+    vec4 _31;
+    _30 = sparseTextureARB(uSamp, vUV, _31);
+    ResType _26 = ResType(_30, _31);
+    vec4 texel = _26._m1;
+    bool ret = sparseTexelsResidentARB(int(_26._m0));
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/subgroup-arithmetic-cast.nocompat.vk.asm.frag.vk b/reference/shaders-no-opt/asm/frag/subgroup-arithmetic-cast.nocompat.vk.asm.frag.vk
new file mode 100644
index 00000000000..130cab7d1ad
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/subgroup-arithmetic-cast.nocompat.vk.asm.frag.vk
@@ -0,0 +1,24 @@
+#version 450
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_clustered : require
+
+layout(location = 0) flat in int index;
+layout(location = 0) out uint FragColor;
+
+void main()
+{
+    uint _17 = uint(index);
+    FragColor = uint(subgroupMin(index));
+    FragColor = uint(subgroupMax(int(_17)));
+    FragColor = subgroupMin(uint(index));
+    FragColor = subgroupMax(_17);
+    FragColor = uint(subgroupInclusiveMax(index));
+    FragColor = uint(subgroupInclusiveMin(int(_17)));
+    FragColor = subgroupExclusiveMax(uint(index));
+    FragColor = subgroupExclusiveMin(_17);
+    FragColor = uint(subgroupClusteredMin(index, 4u));
+    FragColor = uint(subgroupClusteredMax(int(_17), 4u));
+    FragColor = subgroupClusteredMin(uint(index), 4u);
+    FragColor = subgroupClusteredMax(_17, 4u);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag b/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag
new file mode 100644
index 00000000000..8a918c035c6
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag
@@ -0,0 +1,32 @@
+#version 450
+
+#if defined(GL_KHR_shader_subgroup_ballot)
+#extension GL_KHR_shader_subgroup_ballot : require
+#elif defined(GL_NV_shader_thread_group)
+#extension GL_NV_shader_thread_group : require
+#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
+#extension GL_ARB_shader_int64 : enable
+#extension GL_ARB_shader_ballot : require
+#else
+#error No extensions available to emulate requested subgroup feature.
+#endif
+
+layout(location = 0) flat in uint INDEX;
+layout(location = 0) out uvec4 SV_Target;
+
+#if defined(GL_KHR_shader_subgroup_ballot)
+#elif defined(GL_NV_shader_thread_group)
+uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }
+#elif defined(GL_ARB_shader_ballot)
+uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }
+#endif
+
+void main()
+{
+    uvec4 _21 = subgroupBallot(INDEX < 100u);
+    SV_Target.x = _21.x;
+    SV_Target.y = _21.y;
+    SV_Target.z = _21.z;
+    SV_Target.w = _21.w;
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag.vk b/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag.vk
new file mode 100644
index 00000000000..ed5933f3128
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag.vk
@@ -0,0 +1,15 @@
+#version 450
+#extension GL_KHR_shader_subgroup_ballot : require
+
+layout(location = 0) flat in uint INDEX;
+layout(location = 0) out uvec4 SV_Target;
+
+void main()
+{
+    uvec4 _21 = subgroupBallot(INDEX < 100u);
+    SV_Target.x = _21.x;
+    SV_Target.y = _21.y;
+    SV_Target.z = _21.z;
+    SV_Target.w = _21.w;
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/reference/shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag
similarity index 100%
rename from reference/shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag
rename to reference/shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag
diff --git a/reference/shaders/asm/frag/switch-merge-to-continue.asm.frag b/reference/shaders-no-opt/asm/frag/switch-merge-to-continue.asm.invalid.frag
similarity index 100%
rename from reference/shaders/asm/frag/switch-merge-to-continue.asm.frag
rename to reference/shaders-no-opt/asm/frag/switch-merge-to-continue.asm.invalid.frag
diff --git a/reference/shaders-no-opt/asm/frag/switch-non-default-fallthrough-no-phi.asm.frag b/reference/shaders-no-opt/asm/frag/switch-non-default-fallthrough-no-phi.asm.frag
new file mode 100644
index 00000000000..3315180965f
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/switch-non-default-fallthrough-no-phi.asm.frag
@@ -0,0 +1,88 @@
+#version 450
+
+struct _4
+{
+    uint _m0;
+    int _m1;
+};
+
+struct _5
+{
+    int _m0;
+    int _m1;
+};
+
+_4 _16;
+int _21;
+
+layout(location = 0) flat in int _2;
+layout(location = 0) out int _3;
+
+void main()
+{
+    bool _25 = false;
+    do
+    {
+        _5 _26;
+        _26._m0 = 0;
+        _26._m1 = 10;
+        _4 _35;
+        _35 = _16;
+        int _39;
+        _4 _36;
+        bool _59;
+        int _38 = 0;
+        for (;;)
+        {
+            if (_26._m0 < _26._m1)
+            {
+                int _27 = _26._m0;
+                int _28 = _26._m0 + int(1u);
+                _26._m0 = _28;
+                _36 = _4(1u, _27);
+            }
+            else
+            {
+                _4 _48 = _35;
+                _48._m0 = 0u;
+                _36 = _48;
+            }
+            bool _45_ladder_break = false;
+            switch (int(_36._m0))
+            {
+                case 0:
+                {
+                    _3 = _38;
+                    _25 = true;
+                    _59 = true;
+                    _45_ladder_break = true;
+                    break;
+                }
+                default:
+                {
+                    _59 = false;
+                    _45_ladder_break = true;
+                    break;
+                }
+                case 1:
+                {
+                    break;
+                }
+            }
+            if (_45_ladder_break)
+            {
+                break;
+            }
+            _39 = _38 + _2;
+            _35 = _36;
+            _38 = _39;
+            continue;
+        }
+        if (_59)
+        {
+            break;
+        }
+        break;
+    } while(false);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/switch-single-case-multiple-exit-cfg.asm.frag b/reference/shaders-no-opt/asm/frag/switch-single-case-multiple-exit-cfg.asm.frag
new file mode 100644
index 00000000000..c9ddbe6899b
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/switch-single-case-multiple-exit-cfg.asm.frag
@@ -0,0 +1,26 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+vec2 _19;
+
+layout(location = 0) out highp vec4 _GLF_color;
+
+void main()
+{
+    highp vec2 _30;
+    do
+    {
+        if (gl_FragCoord.x != gl_FragCoord.x)
+        {
+            _30 = _19;
+            break;
+        }
+        highp vec2 _29;
+        _29.y = _19.y;
+        _30 = _29;
+        break;
+    } while(false);
+    _GLF_color = vec4(_30, 1.0, 1.0);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/terminate-impure-function-call.spv16.asm.frag b/reference/shaders-no-opt/asm/frag/terminate-impure-function-call.spv16.asm.frag
new file mode 100644
index 00000000000..0fe71f64b44
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/terminate-impure-function-call.spv16.asm.frag
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) flat in int vA;
+layout(location = 0) out vec4 FragColor;
+
+vec4 foobar(int a)
+{
+    if (a < 0)
+    {
+        discard;
+    }
+    return vec4(10.0);
+}
+
+void main()
+{
+    int param = vA;
+    vec4 _25 = foobar(param);
+    FragColor = vec4(10.0);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/texel-fetch-ms-uint-sample.asm.frag b/reference/shaders-no-opt/asm/frag/texel-fetch-ms-uint-sample.asm.frag
new file mode 100644
index 00000000000..d62ccb83866
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/texel-fetch-ms-uint-sample.asm.frag
@@ -0,0 +1,15 @@
+#version 450
+
+layout(binding = 0) uniform sampler2DMS uSamp;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    ivec2 _28 = ivec2(gl_FragCoord.xy);
+    FragColor.x = texelFetch(uSamp, _28, int(0u)).x;
+    FragColor.y = texelFetch(uSamp, _28, int(1u)).x;
+    FragColor.z = texelFetch(uSamp, _28, int(2u)).x;
+    FragColor.w = texelFetch(uSamp, _28, int(3u)).x;
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag
new file mode 100644
index 00000000000..61122bbd3ab
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/unordered-compare.asm.frag
@@ -0,0 +1,34 @@
+#version 450
+
+layout(location = 0) in vec4 A;
+layout(location = 1) in vec4 B;
+layout(location = 0) out vec4 FragColor;
+
+vec4 test_vector()
+{
+    bvec4 le = not(greaterThanEqual(A, B));
+    bvec4 leq = not(greaterThan(A, B));
+    bvec4 ge = not(lessThanEqual(A, B));
+    bvec4 geq = not(lessThan(A, B));
+    bvec4 eq = not(notEqual(A, B));
+    bvec4 neq = notEqual(A, B);
+    neq = notEqual(A, B);
+    return ((((vec4(le) + vec4(leq)) + vec4(ge)) + vec4(geq)) + vec4(eq)) + vec4(neq);
+}
+
+float test_scalar()
+{
+    bool le = !(A.x >= B.x);
+    bool leq = !(A.x > B.x);
+    bool ge = !(A.x <= B.x);
+    bool geq = !(A.x < B.x);
+    bool eq = !(A.x != B.x);
+    bool neq = A.x != B.x;
+    return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq);
+}
+
+void main()
+{
+    FragColor = test_vector() + vec4(test_scalar());
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
new file mode 100644
index 00000000000..24db7c9f881
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
@@ -0,0 +1,34 @@
+#version 450
+
+layout(location = 0) in vec4 A;
+layout(location = 1) in vec4 B;
+layout(location = 0) out vec4 FragColor;
+
+vec4 test_vector()
+{
+    bvec4 le = lessThan(A, B);
+    bvec4 leq = lessThanEqual(A, B);
+    bvec4 ge = greaterThan(A, B);
+    bvec4 geq = greaterThanEqual(A, B);
+    bvec4 eq = equal(A, B);
+    bvec4 neq = notEqual(A, B);
+    neq = notEqual(A, B);
+    return ((((vec4(le) + vec4(leq)) + vec4(ge)) + vec4(geq)) + vec4(eq)) + vec4(neq);
+}
+
+float test_scalar()
+{
+    bool le = A.x < B.x;
+    bool leq = A.x <= B.x;
+    bool ge = A.x > B.x;
+    bool geq = A.x >= B.x;
+    bool eq = A.x == B.x;
+    bool neq = A.x != B.x;
+    return ((((float(le) + float(leq)) + float(ge)) + float(geq)) + float(eq)) + float(neq);
+}
+
+void main()
+{
+    FragColor = test_vector() + vec4(test_scalar());
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/vector-extract-dynamic-spec-constant.asm.frag b/reference/shaders-no-opt/asm/frag/vector-extract-dynamic-spec-constant.asm.frag
new file mode 100644
index 00000000000..d4f3acae097
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/vector-extract-dynamic-spec-constant.asm.frag
@@ -0,0 +1,27 @@
+#version 450
+
+#ifndef SPIRV_CROSS_CONSTANT_ID_0
+#define SPIRV_CROSS_CONSTANT_ID_0 0
+#endif
+const int omap_r = SPIRV_CROSS_CONSTANT_ID_0;
+#ifndef SPIRV_CROSS_CONSTANT_ID_1
+#define SPIRV_CROSS_CONSTANT_ID_1 1
+#endif
+const int omap_g = SPIRV_CROSS_CONSTANT_ID_1;
+#ifndef SPIRV_CROSS_CONSTANT_ID_2
+#define SPIRV_CROSS_CONSTANT_ID_2 2
+#endif
+const int omap_b = SPIRV_CROSS_CONSTANT_ID_2;
+#ifndef SPIRV_CROSS_CONSTANT_ID_3
+#define SPIRV_CROSS_CONSTANT_ID_3 3
+#endif
+const int omap_a = SPIRV_CROSS_CONSTANT_ID_3;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vColor;
+
+void main()
+{
+    FragColor = vec4(vColor[omap_r], vColor[omap_g], vColor[omap_b], vColor[omap_a]);
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag b/reference/shaders-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag
index b32d1874856..b6d3bc84900 100644
--- a/reference/shaders-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag
+++ b/reference/shaders-no-opt/asm/frag/vector-shuffle-undef-index.asm.frag
@@ -1,10 +1,10 @@
 #version 450
 
+vec4 undef;
+
 layout(location = 0) out vec4 FragColor;
 layout(location = 0) in vec4 vFloat;
 
-vec4 undef;
-
 void main()
 {
     FragColor = vec4(undef.x, vFloat.y, 0.0, vFloat.w) + vec4(vFloat.z, vFloat.y, 0.0, vFloat.w);
diff --git a/reference/opt/shaders/asm/geom/store-uint-layer.invalid.asm.geom b/reference/shaders-no-opt/asm/geom/store-uint-layer.invalid.asm.geom
similarity index 100%
rename from reference/opt/shaders/asm/geom/store-uint-layer.invalid.asm.geom
rename to reference/shaders-no-opt/asm/geom/store-uint-layer.invalid.asm.geom
diff --git a/reference/shaders-no-opt/asm/loop-header-self-continue-break.asm.comp b/reference/shaders-no-opt/asm/loop-header-self-continue-break.asm.comp
new file mode 100644
index 00000000000..bd2a3c2736e
--- /dev/null
+++ b/reference/shaders-no-opt/asm/loop-header-self-continue-break.asm.comp
@@ -0,0 +1,89 @@
+#version 450
+
+#ifndef SPIRV_CROSS_CONSTANT_ID_0
+#define SPIRV_CROSS_CONSTANT_ID_0 1u
+#endif
+#ifndef SPIRV_CROSS_CONSTANT_ID_1
+#define SPIRV_CROSS_CONSTANT_ID_1 1u
+#endif
+#ifndef SPIRV_CROSS_CONSTANT_ID_2
+#define SPIRV_CROSS_CONSTANT_ID_2 1u
+#endif
+
+layout(local_size_x = SPIRV_CROSS_CONSTANT_ID_0, local_size_y = SPIRV_CROSS_CONSTANT_ID_1, local_size_z = SPIRV_CROSS_CONSTANT_ID_2) in;
+
+layout(binding = 0, std430) buffer _4_6
+{
+    float _m0[];
+} _6;
+
+layout(binding = 1, std430) buffer _4_7
+{
+    float _m0[];
+} _7;
+
+uvec3 _28 = gl_WorkGroupSize;
+
+void main()
+{
+    float _44_copy;
+    float _46;
+    uint _47;
+    float _63;
+    uint _65;
+    float _36 = _6._m0[0u];
+    uint _39 = 0u;
+    float _44;
+    for (;;)
+    {
+        _44 = _36;
+        _46 = _6._m0[35u];
+        _47 = 0u;
+        for (;;)
+        {
+            uint _48 = _47 + 1u;
+            float _45 = _6._m0[_48];
+            _6._m0[_47] = ((_46 + _44) + _45) / 3.0;
+            if (!(_47 < 34u))
+            {
+                break;
+            }
+            else
+            {
+                _44_copy = _44;
+                _44 = _45;
+                _46 = _44_copy;
+                _47 = _48;
+            }
+        }
+        _6._m0[35u] = (_36 + (_44 + _6._m0[35u])) / 3.0;
+        if (!(_39 < 5u))
+        {
+            _63 = _6._m0[0u];
+            _65 = 1u;
+            break;
+        }
+        else
+        {
+            _36 = _6._m0[0u];
+            _39++;
+            continue;
+        }
+    }
+    float _64;
+    for (;;)
+    {
+        _64 = (_63 < _6._m0[_65]) ? _6._m0[_65] : _63;
+        if (!(_65 < 35u))
+        {
+            break;
+        }
+        else
+        {
+            _63 = _64;
+            _65++;
+        }
+    }
+    _7._m0[gl_GlobalInvocationID.x] = _64;
+}
+
diff --git a/reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk b/reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk
new file mode 100644
index 00000000000..5040aa46964
--- /dev/null
+++ b/reference/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh.vk
@@ -0,0 +1,44 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(max_vertices = 24, max_primitives = 8, triangles) out;
+
+struct _12
+{
+    float _m0;
+};
+
+layout(location = 1) out vec4 B[24];
+layout(location = 3) perprimitiveEXT out vec4 C[8];
+shared float _9[64];
+taskPayloadSharedEXT _12 _11;
+
+void main()
+{
+    _9[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex);
+    barrier();
+    SetMeshOutputsEXT(24u, 8u);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = _9[gl_LocalInvocationIndex];
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = _9[gl_LocalInvocationIndex];
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = _9[gl_LocalInvocationIndex];
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = _9[gl_LocalInvocationIndex];
+    float _63 = _11._m0 + _9[gl_LocalInvocationIndex ^ 1u];
+    B[gl_LocalInvocationIndex].x = _63;
+    B[gl_LocalInvocationIndex].y = _63;
+    B[gl_LocalInvocationIndex].z = _63;
+    B[gl_LocalInvocationIndex].w = _63;
+    if (gl_LocalInvocationIndex < 8u)
+    {
+        uint _71 = gl_LocalInvocationIndex * 3u;
+        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(_71, _71 + 1u, _71 + 2u);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_LocalInvocationIndex & 1u) != 0u;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_LocalInvocationIndex);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_LocalInvocationIndex);
+        uint _81 = gl_LocalInvocationIndex ^ 2u;
+        C[gl_LocalInvocationIndex].x = _9[_81];
+        C[gl_LocalInvocationIndex].y = _9[_81];
+        C[gl_LocalInvocationIndex].z = _9[_81];
+        C[gl_LocalInvocationIndex].w = _9[_81];
+    }
+}
+
diff --git a/reference/shaders-no-opt/asm/rgen/acceleration-nonuniform.spv14.vk.nocompat.asm.rgen.vk b/reference/shaders-no-opt/asm/rgen/acceleration-nonuniform.spv14.vk.nocompat.asm.rgen.vk
new file mode 100644
index 00000000000..a72a7cf48aa
--- /dev/null
+++ b/reference/shaders-no-opt/asm/rgen/acceleration-nonuniform.spv14.vk.nocompat.asm.rgen.vk
@@ -0,0 +1,19 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+#extension GL_EXT_nonuniform_qualifier : require
+
+layout(set = 0, binding = 1) uniform accelerationStructureEXT as[];
+layout(location = 0) rayPayloadEXT float payload;
+layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image;
+
+void main()
+{
+    vec4 col = vec4(0.0, 0.0, 0.0, 1.0);
+    vec3 origin = vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0);
+    vec3 direction = vec3(0.0, 0.0, -1.0);
+    uint _62 = gl_LaunchIDEXT.x;
+    traceRayEXT(as[nonuniformEXT(_62)], 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0);
+    col.y = payload;
+    imageStore(image, ivec2(gl_LaunchIDEXT.xy), col);
+}
+
diff --git a/reference/shaders-no-opt/asm/task/task-shader.vk.nocompat.spv14.asm.task.vk b/reference/shaders-no-opt/asm/task/task-shader.vk.nocompat.spv14.asm.task.vk
new file mode 100644
index 00000000000..1d491e7014b
--- /dev/null
+++ b/reference/shaders-no-opt/asm/task/task-shader.vk.nocompat.spv14.asm.task.vk
@@ -0,0 +1,35 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in;
+
+struct Payload
+{
+    float v[3];
+};
+
+shared float vs[24];
+taskPayloadSharedEXT Payload p;
+
+void main()
+{
+    vs[gl_LocalInvocationIndex] = 10.0;
+    barrier();
+    if (gl_LocalInvocationIndex < 12u)
+    {
+        vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u];
+    }
+    barrier();
+    if (gl_LocalInvocationIndex < 6u)
+    {
+        vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u];
+    }
+    barrier();
+    if (gl_LocalInvocationIndex < 3u)
+    {
+        vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u];
+    }
+    barrier();
+    p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex];
+    EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8])));
+}
+
diff --git a/reference/shaders-no-opt/asm/temporary.zero-initialize.asm.frag b/reference/shaders-no-opt/asm/temporary.zero-initialize.asm.frag
new file mode 100644
index 00000000000..1b8e8cd3295
--- /dev/null
+++ b/reference/shaders-no-opt/asm/temporary.zero-initialize.asm.frag
@@ -0,0 +1,28 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in mediump int vA;
+layout(location = 1) flat in mediump int vB;
+
+void main()
+{
+    FragColor = vec4(0.0);
+    mediump int _10 = 0;
+    mediump int _15 = 0;
+    for (mediump int _16 = 0, _17 = 0; _16 < vA; _17 = _15, _16 += _10)
+    {
+        if ((vA + _16) == 20)
+        {
+            _15 = 50;
+        }
+        else
+        {
+            _15 = ((vB + _16) == 40) ? 60 : _17;
+        }
+        _10 = _15 + 10;
+        FragColor += vec4(1.0);
+    }
+}
+
diff --git a/reference/shaders-no-opt/asm/tesc/array-of-block-output-initializer.asm.tesc b/reference/shaders-no-opt/asm/tesc/array-of-block-output-initializer.asm.tesc
new file mode 100644
index 00000000000..13e1d3294b1
--- /dev/null
+++ b/reference/shaders-no-opt/asm/tesc/array-of-block-output-initializer.asm.tesc
@@ -0,0 +1,82 @@
+#version 450
+layout(vertices = 4) out;
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+} gl_out[4];
+
+layout(location = 0) patch out vert
+{
+    float v0;
+    float v1;
+} _5;
+
+layout(location = 2) patch out vert_patch
+{
+    float v2;
+    float v3;
+} patches[2];
+
+layout(location = 6) patch out float v2;
+layout(location = 7) out float v3[4];
+layout(location = 8) out vert2
+{
+    float v4;
+    float v5;
+} verts[4];
+
+const vec4 _3_0_init[4] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
+const float _3_1_init[4] = float[](0.0, 0.0, 0.0, 0.0);
+const float _3_2_init[4][1] = float[][](float[](0.0), float[](0.0), float[](0.0), float[](0.0));
+const float _3_3_init[4][1] = float[][](float[](0.0), float[](0.0), float[](0.0), float[](0.0));
+const float _6_0_init[2] = float[](0.0, 0.0);
+const float _6_1_init[2] = float[](0.0, 0.0);
+const float _7_init = 0.0;
+const float _8_init[4] = float[](0.0, 0.0, 0.0, 0.0);
+const float _9_0_init[4] = float[](0.0, 0.0, 0.0, 0.0);
+const float _9_1_init[4] = float[](0.0, 0.0, 0.0, 0.0);
+
+void main()
+{
+    gl_out[gl_InvocationID].gl_Position = _3_0_init[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_PointSize = _3_1_init[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_ClipDistance = _3_2_init[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_CullDistance = _3_3_init[gl_InvocationID];
+    if (gl_InvocationID == 0)
+    {
+        _5.v0 = 0.0;
+    }
+    if (gl_InvocationID == 0)
+    {
+        _5.v1 = 0.0;
+    }
+    if (gl_InvocationID == 0)
+    {
+        patches[0].v2 = _6_0_init[0];
+    }
+    if (gl_InvocationID == 0)
+    {
+        patches[1].v2 = _6_0_init[1];
+    }
+    if (gl_InvocationID == 0)
+    {
+        patches[0].v3 = _6_1_init[0];
+    }
+    if (gl_InvocationID == 0)
+    {
+        patches[1].v3 = _6_1_init[1];
+    }
+    if (gl_InvocationID == 0)
+    {
+        v2 = _7_init;
+    }
+    v3[gl_InvocationID] = _8_init[gl_InvocationID];
+    verts[gl_InvocationID].v4 = _9_0_init[gl_InvocationID];
+    verts[gl_InvocationID].v5 = _9_1_init[gl_InvocationID];
+    gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+}
+
diff --git a/reference/shaders-no-opt/asm/tesc/copy-memory-control-point.asm.tesc b/reference/shaders-no-opt/asm/tesc/copy-memory-control-point.asm.tesc
new file mode 100644
index 00000000000..3412f1cf5c5
--- /dev/null
+++ b/reference/shaders-no-opt/asm/tesc/copy-memory-control-point.asm.tesc
@@ -0,0 +1,73 @@
+#version 450
+layout(vertices = 3) out;
+
+layout(binding = 0, std140) uniform cb1_struct
+{
+    vec4 _m0[1];
+} cb0_0;
+
+layout(location = 0) in vec4 v0[];
+layout(location = 1) in vec4 v1[];
+layout(location = 2) in vec3 vicp0[];
+layout(location = 3) out vec3 vocp0[3];
+layout(location = 4) in vec4 vicp1[];
+layout(location = 5) out vec4 vocp1[3];
+vec4 opc[4];
+vec4 vicp[2][3];
+vec4 _48;
+vec4 _49;
+vec4 _50;
+vec4 _56;
+
+void fork0_epilogue(vec4 _87, vec4 _88, vec4 _89)
+{
+    gl_TessLevelOuter[0u] = _87.x;
+    gl_TessLevelOuter[1u] = _88.x;
+    gl_TessLevelOuter[2u] = _89.x;
+}
+
+void fork0(uint vForkInstanceId)
+{
+    vec4 r0;
+    r0.x = uintBitsToFloat(vForkInstanceId);
+    opc[floatBitsToInt(r0.x)].x = cb0_0._m0[0u].x;
+    _48 = opc[0u];
+    _49 = opc[1u];
+    _50 = opc[2u];
+    fork0_epilogue(_48, _49, _50);
+}
+
+void fork1_epilogue(vec4 _109)
+{
+    gl_TessLevelInner[0u] = _109.x;
+}
+
+void fork1()
+{
+    opc[3u].x = cb0_0._m0[0u].x;
+    _56 = opc[3u];
+    fork1_epilogue(_56);
+}
+
+void main()
+{
+    vec4 _126_unrolled[3];
+    for (int i = 0; i < int(3); i++)
+    {
+        _126_unrolled[i] = v0[i];
+    }
+    vicp[0u] = _126_unrolled;
+    vec4 _127_unrolled[3];
+    for (int i = 0; i < int(3); i++)
+    {
+        _127_unrolled[i] = v1[i];
+    }
+    vicp[1u] = _127_unrolled;
+    vocp0[gl_InvocationID] = vicp0[gl_InvocationID];
+    vocp1[gl_InvocationID] = vicp1[gl_InvocationID];
+    fork0(0u);
+    fork0(1u);
+    fork0(2u);
+    fork1();
+}
+
diff --git a/reference/opt/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/reference/shaders-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
similarity index 100%
rename from reference/opt/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
rename to reference/shaders-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
diff --git a/reference/shaders-no-opt/asm/tesc/tess-level-initializer.asm.tesc b/reference/shaders-no-opt/asm/tesc/tess-level-initializer.asm.tesc
new file mode 100644
index 00000000000..ebd2d8aeac3
--- /dev/null
+++ b/reference/shaders-no-opt/asm/tesc/tess-level-initializer.asm.tesc
@@ -0,0 +1,24 @@
+#version 450
+layout(vertices = 4) out;
+
+const float _5_init[2] = float[](0.0, 0.0);
+const float _6_init[4] = float[](0.0, 0.0, 0.0, 0.0);
+void main()
+{
+    if (gl_InvocationID == 0)
+    {
+        gl_TessLevelInner = _5_init;
+    }
+    if (gl_InvocationID == 0)
+    {
+        gl_TessLevelOuter = _6_init;
+    }
+    gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+    gl_TessLevelInner[0] = 1.0;
+    gl_TessLevelInner[1] = 2.0;
+    gl_TessLevelOuter[0] = 3.0;
+    gl_TessLevelOuter[1] = 4.0;
+    gl_TessLevelOuter[2] = 5.0;
+    gl_TessLevelOuter[3] = 6.0;
+}
+
diff --git a/reference/shaders-no-opt/asm/vert/block-struct-initializer.asm.vert b/reference/shaders-no-opt/asm/vert/block-struct-initializer.asm.vert
new file mode 100644
index 00000000000..6060888d81f
--- /dev/null
+++ b/reference/shaders-no-opt/asm/vert/block-struct-initializer.asm.vert
@@ -0,0 +1,24 @@
+#version 450
+
+struct Foo
+{
+    float c;
+    float d;
+};
+
+layout(location = 0) out Vert
+{
+    float a;
+    float b;
+} _3;
+
+layout(location = 2) out Foo foo;
+const Foo _4_init = Foo(0.0, 0.0);
+
+void main()
+{
+    _3.a = 0.0;
+    _3.b = 0.0;
+    foo = _4_init;
+}
+
diff --git a/reference/shaders-no-opt/asm/vert/builtin-output-initializer.asm.vert b/reference/shaders-no-opt/asm/vert/builtin-output-initializer.asm.vert
new file mode 100644
index 00000000000..b449f080575
--- /dev/null
+++ b/reference/shaders-no-opt/asm/vert/builtin-output-initializer.asm.vert
@@ -0,0 +1,14 @@
+#version 450
+
+out float gl_ClipDistance[1];
+out float gl_CullDistance[1];
+
+void main()
+{
+    gl_Position = vec4(0.0);
+    gl_PointSize = 0.0;
+    gl_ClipDistance = float[](0.0);
+    gl_CullDistance = float[](0.0);
+    gl_Position = vec4(1.0);
+}
+
diff --git a/reference/shaders-no-opt/asm/vert/complex-link-by-name.asm.vert b/reference/shaders-no-opt/asm/vert/complex-link-by-name.asm.vert
new file mode 100644
index 00000000000..03271409b76
--- /dev/null
+++ b/reference/shaders-no-opt/asm/vert/complex-link-by-name.asm.vert
@@ -0,0 +1,35 @@
+#version 450
+
+struct Struct_vec4
+{
+    vec4 m0;
+};
+
+layout(binding = 0, std140) uniform UBO
+{
+    Struct_vec4 m0;
+    Struct_vec4 m1;
+} ubo_binding_0;
+
+layout(location = 0) out VertexOut
+{
+    Struct_vec4 m0;
+    Struct_vec4 m1;
+} output_location_0;
+
+layout(location = 2) out Struct_vec4 output_location_2;
+layout(location = 3) out Struct_vec4 output_location_3;
+
+void main()
+{
+    Struct_vec4 c;
+    c.m0 = ubo_binding_0.m0.m0;
+    Struct_vec4 b;
+    b.m0 = ubo_binding_0.m1.m0;
+    gl_Position = c.m0 + b.m0;
+    output_location_0.m0 = c;
+    output_location_0.m1 = b;
+    output_location_2 = c;
+    output_location_3 = b;
+}
+
diff --git a/reference/shaders-no-opt/asm/vert/complex-link-by-name.force-flattened-io.legacy.asm.vert b/reference/shaders-no-opt/asm/vert/complex-link-by-name.force-flattened-io.legacy.asm.vert
new file mode 100644
index 00000000000..280399b44d9
--- /dev/null
+++ b/reference/shaders-no-opt/asm/vert/complex-link-by-name.force-flattened-io.legacy.asm.vert
@@ -0,0 +1,33 @@
+#version 100
+
+struct Struct_vec4
+{
+    vec4 m0;
+};
+
+struct UBO
+{
+    Struct_vec4 m0;
+    Struct_vec4 m1;
+};
+
+uniform UBO ubo_binding_0;
+
+varying vec4 output_location_0_m0_m0;
+varying vec4 output_location_0_m1_m0;
+varying vec4 output_location_2_m0;
+varying vec4 output_location_3_m0;
+
+void main()
+{
+    Struct_vec4 c;
+    c.m0 = ubo_binding_0.m0.m0;
+    Struct_vec4 b;
+    b.m0 = ubo_binding_0.m1.m0;
+    gl_Position = c.m0 + b.m0;
+    output_location_0_m0_m0 = c.m0;
+    output_location_0_m1_m0 = b.m0;
+    output_location_2_m0 = c.m0;
+    output_location_3_m0 = b.m0;
+}
+
diff --git a/reference/shaders-no-opt/asm/vert/constant-composite-extract.asm.vert b/reference/shaders-no-opt/asm/vert/constant-composite-extract.asm.vert
new file mode 100644
index 00000000000..a1fe3e50acc
--- /dev/null
+++ b/reference/shaders-no-opt/asm/vert/constant-composite-extract.asm.vert
@@ -0,0 +1,7 @@
+#version 450
+
+void main()
+{
+    gl_Position = (vec4(1.0, 2.0, 3.0, 4.0) + vec4(5.0, 6.0, 7.0, 8.0)) + (vec4(1.0, 2.0, 3.0, 4.0) + vec4(4.0, 3.0, 8.0, 2.0));
+}
+
diff --git a/reference/shaders-no-opt/asm/vert/debug-printf.asm.vk.nocompat.vert.vk b/reference/shaders-no-opt/asm/vert/debug-printf.asm.vk.nocompat.vert.vk
new file mode 100644
index 00000000000..b90912d1cc2
--- /dev/null
+++ b/reference/shaders-no-opt/asm/vert/debug-printf.asm.vk.nocompat.vert.vk
@@ -0,0 +1,10 @@
+#version 450
+#extension GL_EXT_debug_printf : require
+
+void main()
+{
+    debugPrintfEXT("Foo %f %f", 1.0, 2.0);
+    vec4 _17 = vec4(0.0, 0.0, 0.0, 1.0);
+    gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
+}
+
diff --git a/reference/shaders/comp/bitcast-16bit-1.invalid.comp b/reference/shaders-no-opt/comp/bitcast-16bit-1.invalid.comp
similarity index 78%
rename from reference/shaders/comp/bitcast-16bit-1.invalid.comp
rename to reference/shaders-no-opt/comp/bitcast-16bit-1.invalid.comp
index 501f97955fc..85fdcdba7dd 100644
--- a/reference/shaders/comp/bitcast-16bit-1.invalid.comp
+++ b/reference/shaders-no-opt/comp/bitcast-16bit-1.invalid.comp
@@ -6,8 +6,12 @@
 #else
 #error No extension available for FP16.
 #endif
-#if defined(GL_AMD_gpu_shader_int16)
+#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
+#elif defined(GL_AMD_gpu_shader_int16)
 #extension GL_AMD_gpu_shader_int16 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
 #else
 #error No extension available for Int16.
 #endif
diff --git a/reference/opt/shaders/comp/bitcast-16bit-2.invalid.comp b/reference/shaders-no-opt/comp/bitcast-16bit-2.invalid.comp
similarity index 52%
rename from reference/opt/shaders/comp/bitcast-16bit-2.invalid.comp
rename to reference/shaders-no-opt/comp/bitcast-16bit-2.invalid.comp
index bddc16d62bc..506d4e55780 100644
--- a/reference/opt/shaders/comp/bitcast-16bit-2.invalid.comp
+++ b/reference/shaders-no-opt/comp/bitcast-16bit-2.invalid.comp
@@ -1,6 +1,10 @@
 #version 450
-#if defined(GL_AMD_gpu_shader_int16)
+#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
+#elif defined(GL_AMD_gpu_shader_int16)
 #extension GL_AMD_gpu_shader_int16 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
 #else
 #error No extension available for Int16.
 #endif
@@ -31,9 +35,13 @@ layout(binding = 2, std140) uniform UBO
 void main()
 {
     uint ident = gl_GlobalInvocationID.x;
-    i16vec2 _47 = unpackInt2x16(_29.inputs[ident].x) + float16BitsToInt16(_40.const0.xy);
-    _21.outputs[ident] = i16vec4(_47.x, _47.y, _21.outputs[ident].z, _21.outputs[ident].w);
-    i16vec2 _66 = i16vec2(unpackUint2x16(uint(_29.inputs[ident].y)) - float16BitsToUint16(_40.const0.zw));
-    _21.outputs[ident] = i16vec4(_21.outputs[ident].x, _21.outputs[ident].y, _66.x, _66.y);
+    int _33 = _29.inputs[ident].x;
+    i16vec2 _47 = unpackInt2x16(_33) + float16BitsToInt16(_40.const0.xy);
+    _21.outputs[ident].x = _47.x;
+    _21.outputs[ident].y = _47.y;
+    int _57 = _29.inputs[ident].y;
+    i16vec2 _67 = i16vec2(unpackUint2x16(uint(_57)) - float16BitsToUint16(_40.const0.zw));
+    _21.outputs[ident].z = _67.x;
+    _21.outputs[ident].w = _67.y;
 }
 
diff --git a/reference/shaders-no-opt/comp/glsl.std450.comp b/reference/shaders-no-opt/comp/glsl.std450.comp
new file mode 100644
index 00000000000..d2628a9ab62
--- /dev/null
+++ b/reference/shaders-no-opt/comp/glsl.std450.comp
@@ -0,0 +1,112 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+struct ResType
+{
+    float _m0;
+    int _m1;
+};
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float res;
+    int ires;
+    uint ures;
+    vec4 f32;
+    ivec4 s32;
+    uvec4 u32;
+    mat2 m2;
+    mat3 m3;
+    mat4 m4;
+} _19;
+
+void main()
+{
+    _19.res = round(_19.f32.x);
+    _19.res = roundEven(_19.f32.x);
+    _19.res = trunc(_19.f32.x);
+    _19.res = abs(_19.f32.x);
+    _19.ires = abs(_19.s32.x);
+    _19.res = sign(_19.f32.x);
+    _19.ires = sign(_19.s32.x);
+    _19.res = floor(_19.f32.x);
+    _19.res = ceil(_19.f32.x);
+    _19.res = fract(_19.f32.x);
+    _19.res = radians(_19.f32.x);
+    _19.res = degrees(_19.f32.x);
+    _19.res = sin(_19.f32.x);
+    _19.res = cos(_19.f32.x);
+    _19.res = tan(_19.f32.x);
+    _19.res = asin(_19.f32.x);
+    _19.res = acos(_19.f32.x);
+    _19.res = atan(_19.f32.x);
+    _19.res = sinh(_19.f32.x);
+    _19.res = cosh(_19.f32.x);
+    _19.res = tanh(_19.f32.x);
+    _19.res = asinh(_19.f32.x);
+    _19.res = acosh(_19.f32.x);
+    _19.res = atanh(_19.f32.x);
+    _19.res = atan(_19.f32.x, _19.f32.y);
+    _19.res = pow(_19.f32.x, _19.f32.y);
+    _19.res = exp(_19.f32.x);
+    _19.res = log(_19.f32.x);
+    _19.res = exp2(_19.f32.x);
+    _19.res = log2(_19.f32.x);
+    _19.res = sqrt(_19.f32.x);
+    _19.res = inversesqrt(_19.f32.x);
+    _19.res = length(_19.f32.x);
+    _19.res = distance(_19.f32.x, _19.f32.y);
+    _19.res = normalize(_19.f32.x);
+    _19.res = faceforward(_19.f32.x, _19.f32.y, _19.f32.z);
+    _19.res = reflect(_19.f32.x, _19.f32.y);
+    _19.res = refract(_19.f32.x, _19.f32.y, _19.f32.z);
+    _19.res = length(_19.f32.xy);
+    _19.res = distance(_19.f32.xy, _19.f32.zw);
+    vec2 v2 = normalize(_19.f32.xy);
+    v2 = faceforward(_19.f32.xy, _19.f32.yz, _19.f32.zw);
+    v2 = reflect(_19.f32.xy, _19.f32.zw);
+    v2 = refract(_19.f32.xy, _19.f32.yz, _19.f32.w);
+    vec3 v3 = cross(_19.f32.xyz, _19.f32.yzw);
+    _19.res = determinant(_19.m2);
+    _19.res = determinant(_19.m3);
+    _19.res = determinant(_19.m4);
+    _19.m2 = inverse(_19.m2);
+    _19.m3 = inverse(_19.m3);
+    _19.m4 = inverse(_19.m4);
+    float tmp;
+    float _287 = modf(_19.f32.x, tmp);
+    _19.res = _287;
+    _19.res = min(_19.f32.x, _19.f32.y);
+    _19.ures = min(_19.u32.x, _19.u32.y);
+    _19.ires = min(_19.s32.x, _19.s32.y);
+    _19.res = max(_19.f32.x, _19.f32.y);
+    _19.ures = max(_19.u32.x, _19.u32.y);
+    _19.ires = max(_19.s32.x, _19.s32.y);
+    _19.res = clamp(_19.f32.x, _19.f32.y, _19.f32.z);
+    _19.ures = clamp(_19.u32.x, _19.u32.y, _19.u32.z);
+    _19.ires = clamp(_19.s32.x, _19.s32.y, _19.s32.z);
+    _19.res = mix(_19.f32.x, _19.f32.y, _19.f32.z);
+    _19.res = step(_19.f32.x, _19.f32.y);
+    _19.res = smoothstep(_19.f32.x, _19.f32.y, _19.f32.z);
+    _19.res = fma(_19.f32.x, _19.f32.y, _19.f32.z);
+    ResType _387;
+    _387._m0 = frexp(_19.f32.x, _387._m1);
+    int itmp = _387._m1;
+    _19.res = _387._m0;
+    _19.res = ldexp(_19.f32.x, itmp);
+    _19.ures = packSnorm4x8(_19.f32);
+    _19.ures = packUnorm4x8(_19.f32);
+    _19.ures = packSnorm2x16(_19.f32.xy);
+    _19.ures = packUnorm2x16(_19.f32.xy);
+    _19.ures = packHalf2x16(_19.f32.xy);
+    v2 = unpackSnorm2x16(_19.u32.x);
+    v2 = unpackUnorm2x16(_19.u32.x);
+    v2 = unpackHalf2x16(_19.u32.x);
+    vec4 v4 = unpackSnorm4x8(_19.u32.x);
+    v4 = unpackUnorm4x8(_19.u32.x);
+    _19.s32 = findLSB(_19.s32);
+    _19.s32 = findLSB(_19.u32);
+    _19.s32 = findMSB(_19.s32);
+    _19.s32 = findMSB(_19.u32);
+}
+
diff --git a/reference/shaders-no-opt/comp/illegal-struct-name.asm.comp b/reference/shaders-no-opt/comp/illegal-struct-name.asm.comp
new file mode 100644
index 00000000000..885dcb3baa5
--- /dev/null
+++ b/reference/shaders-no-opt/comp/illegal-struct-name.asm.comp
@@ -0,0 +1,22 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+struct Foo
+{
+    float _abs;
+};
+
+layout(binding = 0, std430) buffer SSBO
+{
+    Foo foo;
+    Foo foo2;
+} _7;
+
+void main()
+{
+    Foo f;
+    f._abs = _7.foo._abs;
+    int _abs = 10;
+    _7.foo2._abs = f._abs;
+}
+
diff --git a/reference/shaders-no-opt/comp/image-load-formatted.comp b/reference/shaders-no-opt/comp/image-load-formatted.comp
new file mode 100644
index 00000000000..e11b8febfa7
--- /dev/null
+++ b/reference/shaders-no-opt/comp/image-load-formatted.comp
@@ -0,0 +1,12 @@
+#version 450
+#extension GL_EXT_shader_image_load_formatted : require
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+layout(binding = 0) uniform image2D img;
+
+void main()
+{
+    vec4 v = imageLoad(img, ivec2(gl_GlobalInvocationID.xy));
+    imageStore(img, ivec2(gl_GlobalInvocationID.xy), v + vec4(1.0));
+}
+
diff --git a/reference/opt/shaders/comp/inout-struct.invalid.comp b/reference/shaders-no-opt/comp/inout-struct.invalid.comp
similarity index 100%
rename from reference/opt/shaders/comp/inout-struct.invalid.comp
rename to reference/shaders-no-opt/comp/inout-struct.invalid.comp
diff --git a/reference/shaders-no-opt/comp/int16min-literal.comp b/reference/shaders-no-opt/comp/int16min-literal.comp
new file mode 100644
index 00000000000..f4bae2fec60
--- /dev/null
+++ b/reference/shaders-no-opt/comp/int16min-literal.comp
@@ -0,0 +1,36 @@
+#version 450
+#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
+#elif defined(GL_AMD_gpu_shader_int16)
+#extension GL_AMD_gpu_shader_int16 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for Int16.
+#endif
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for FP16.
+#endif
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std140) uniform UBO
+{
+    float16_t b;
+} _12;
+
+layout(binding = 1, std430) buffer SSBO
+{
+    float16_t a;
+} _24;
+
+void main()
+{
+    int16_t v = float16BitsToInt16(_12.b);
+    v ^= (-32768s);
+    _24.a = int16BitsToFloat16(v);
+}
+
diff --git a/reference/shaders-no-opt/comp/int64min-literal.comp b/reference/shaders-no-opt/comp/int64min-literal.comp
new file mode 100644
index 00000000000..63bd0fdaf2d
--- /dev/null
+++ b/reference/shaders-no-opt/comp/int64min-literal.comp
@@ -0,0 +1,29 @@
+#version 450
+#if defined(GL_ARB_gpu_shader_int64)
+#extension GL_ARB_gpu_shader_int64 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std140) uniform UBO
+{
+    float b;
+} _12;
+
+layout(binding = 1, std430) buffer SSBO
+{
+    float a;
+} _32;
+
+void main()
+{
+    double b2 = double(_12.b);
+    int64_t v = doubleBitsToInt64(b2);
+    v ^= int64_t(0x8000000000000000ul);
+    double a2 = int64BitsToDouble(v);
+    _32.a = float(a2);
+}
+
diff --git a/reference/shaders-no-opt/comp/intmin-literal.comp b/reference/shaders-no-opt/comp/intmin-literal.comp
new file mode 100644
index 00000000000..5a4896f9070
--- /dev/null
+++ b/reference/shaders-no-opt/comp/intmin-literal.comp
@@ -0,0 +1,18 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 1, std430) buffer SSBO
+{
+    float a;
+} _9;
+
+layout(binding = 0, std140) uniform UBO
+{
+    float b;
+} _14;
+
+void main()
+{
+    _9.a = intBitsToFloat(floatBitsToInt(_14.b) ^ int(0x80000000));
+}
+
diff --git a/reference/shaders-no-opt/comp/loop-break-merge-after-inner-continue.comp b/reference/shaders-no-opt/comp/loop-break-merge-after-inner-continue.comp
new file mode 100644
index 00000000000..5b4cb886b4e
--- /dev/null
+++ b/reference/shaders-no-opt/comp/loop-break-merge-after-inner-continue.comp
@@ -0,0 +1,22 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer STO
+{
+    uint data[];
+} ssbo;
+
+void main()
+{
+    while (true)
+    {
+        ssbo.data[0]++;
+        if (ssbo.data[2] != 0u)
+        {
+            ssbo.data[5]++;
+            continue;
+        }
+        break;
+    }
+}
+
diff --git a/reference/shaders-no-opt/comp/loop-resolve-debug-semantics.g.comp b/reference/shaders-no-opt/comp/loop-resolve-debug-semantics.g.comp
new file mode 100644
index 00000000000..78ebc26c522
--- /dev/null
+++ b/reference/shaders-no-opt/comp/loop-resolve-debug-semantics.g.comp
@@ -0,0 +1,16 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    int v[];
+} _23;
+
+void main()
+{
+    for (int i = 0; i < 4; i++)
+    {
+        _23.v[i] += 10;
+    }
+}
+
diff --git a/reference/shaders-no-opt/comp/loop-resolve-debug-semantics.gV.comp b/reference/shaders-no-opt/comp/loop-resolve-debug-semantics.gV.comp
new file mode 100644
index 00000000000..8b6a0321044
--- /dev/null
+++ b/reference/shaders-no-opt/comp/loop-resolve-debug-semantics.gV.comp
@@ -0,0 +1,16 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    int v[];
+} _64;
+
+void main()
+{
+    for (int i = 0; i < 4; i++)
+    {
+        _64.v[i] += 10;
+    }
+}
+
diff --git a/reference/shaders-no-opt/comp/loop.comp b/reference/shaders-no-opt/comp/loop.comp
index 049a30669cd..2ba731cdc7d 100644
--- a/reference/shaders-no-opt/comp/loop.comp
+++ b/reference/shaders-no-opt/comp/loop.comp
@@ -7,11 +7,6 @@ layout(binding = 0, std430) readonly buffer SSBO
     vec4 in_data[];
 } _24;
 
-layout(binding = 1, std430) writeonly buffer SSBO2
-{
-    vec4 out_data[];
-} _177;
-
 void main()
 {
     uint ident = gl_GlobalInvocationID.x;
@@ -83,23 +78,5 @@ void main()
         k += 10;
         continue;
     }
-    k = 0;
-    do
-    {
-        k++;
-    } while (k > 10);
-    int l = 0;
-    for (;;)
-    {
-        if (l == 5)
-        {
-            l++;
-            continue;
-        }
-        idat += vec4(1.0);
-        l++;
-        continue;
-    }
-    _177.out_data[ident] = idat;
 }
 
diff --git a/reference/shaders-no-opt/comp/return.comp b/reference/shaders-no-opt/comp/return.comp
index 4be20e93e41..4802be2244a 100644
--- a/reference/shaders-no-opt/comp/return.comp
+++ b/reference/shaders-no-opt/comp/return.comp
@@ -21,7 +21,8 @@ void main()
             return;
         }
     }
-    for (int i = 0; i < 20; i++)
+    int i = 0;
+    while (i < 20)
     {
         if (i == 10)
         {
diff --git a/reference/opt/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp b/reference/shaders-no-opt/comp/shader_ballot_nonuniform_invocations.invalid.comp
similarity index 100%
rename from reference/opt/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp
rename to reference/shaders-no-opt/comp/shader_ballot_nonuniform_invocations.invalid.comp
diff --git a/reference/shaders-no-opt/comp/specialization-constant-evaluation.comp b/reference/shaders-no-opt/comp/specialization-constant-evaluation.comp
new file mode 100644
index 00000000000..695835968af
--- /dev/null
+++ b/reference/shaders-no-opt/comp/specialization-constant-evaluation.comp
@@ -0,0 +1,321 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+#ifndef SPIRV_CROSS_CONSTANT_ID_2
+#define SPIRV_CROSS_CONSTANT_ID_2 1
+#endif
+const int SONE = SPIRV_CROSS_CONSTANT_ID_2;
+#ifndef SPIRV_CROSS_CONSTANT_ID_3
+#define SPIRV_CROSS_CONSTANT_ID_3 2
+#endif
+const int STWO = SPIRV_CROSS_CONSTANT_ID_3;
+const int _10 = (SONE + STWO);
+const uint _13 = (uint(_10) + 0u);
+#ifndef SPIRV_CROSS_CONSTANT_ID_5
+#define SPIRV_CROSS_CONSTANT_ID_5 1u
+#endif
+const uint UONE = SPIRV_CROSS_CONSTANT_ID_5;
+const uint _15 = (_13 + UONE);
+#ifndef SPIRV_CROSS_CONSTANT_ID_6
+#define SPIRV_CROSS_CONSTANT_ID_6 2u
+#endif
+const uint UTWO = SPIRV_CROSS_CONSTANT_ID_6;
+const uint IADD = (_15 + UTWO);
+const uint _19 = (IADD - 5u);
+const uint _28 = (uint(SONE) + 0u);
+const uint ISUB = (UTWO - _28);
+const uint IMUL = (UTWO * UTWO);
+const uint _37 = (IMUL - 3u);
+const uint UDIV = (UTWO / UTWO);
+#ifndef SPIRV_CROSS_CONSTANT_ID_4
+#define SPIRV_CROSS_CONSTANT_ID_4 -2
+#endif
+const int SNEG_TWO = SPIRV_CROSS_CONSTANT_ID_4;
+const int SDIV = (STWO / SNEG_TWO);
+const int _52 = (SDIV + 2);
+#ifndef SPIRV_CROSS_CONSTANT_ID_7
+#define SPIRV_CROSS_CONSTANT_ID_7 -3
+#endif
+const int SNEG_THREE = SPIRV_CROSS_CONSTANT_ID_7;
+const int SMOD = (STWO % SNEG_THREE);
+const int _66 = (SMOD + 2);
+const uint UMOD = (IADD % IMUL);
+const uint _73 = (UMOD - 1u);
+const uint LSHL = (IADD << ISUB);
+const uint _81 = (LSHL - 11u);
+const uint RSHL = (IADD >> ISUB);
+const uint _89 = (RSHL - 2u);
+const int _95 = int(IADD + 0u);
+const int _96 = (-_95);
+const int _97 = (-SDIV);
+const int RSHA = (_96 >> _97);
+const int _100 = (RSHA + 4);
+const bool IEQ = (IADD == ISUB);
+const int _109 = IEQ ? 2 : 1;
+const bool INEQ = (IADD != ISUB);
+const int _116 = INEQ ? 1 : 2;
+const bool ULT = (IADD < ISUB);
+const int _123 = ULT ? 2 : 1;
+const bool ULE = (IADD <= ISUB);
+const int _130 = ULE ? 2 : 1;
+const bool UGT = (IADD > ISUB);
+const int _137 = UGT ? 1 : 2;
+const bool UGE = (IADD >= ISUB);
+const int _144 = UGE ? 1 : 2;
+const bool SLT = (SMOD < 1);
+const int _151 = SLT ? 1 : 2;
+const bool SLE = (SMOD <= 1);
+const int _158 = SLE ? 1 : 2;
+const bool SGT = (SMOD > 1);
+const int _165 = SGT ? 2 : 1;
+const bool SGE = (SMOD >= 1);
+const int _172 = SGE ? 2 : 1;
+const bool LOR = (IEQ || SLT);
+const int _179 = LOR ? 1 : 2;
+const bool LAND = (IEQ && SLT);
+const int _186 = LAND ? 2 : 1;
+const bool LNOT = (!LOR);
+const int _193 = LNOT ? 2 : 1;
+const uint AND = (IADD & IADD);
+const uint _200 = (AND - 5u);
+const uint OR = (IADD | ISUB);
+const uint _208 = (OR - 6u);
+const uint XOR = (IADD ^ IADD);
+const uint _215 = (XOR + 1u);
+const uint NOT = (~XOR);
+const uint _223 = (NOT - 4294967294u);
+const bool LEQ = (LAND == LNOT);
+const int _230 = LEQ ? 1 : 2;
+const bool LNEQ = (LAND != LNOT);
+const int _237 = LNEQ ? 2 : 1;
+const uint SEL = IEQ ? IADD : ISUB;
+#ifndef SPIRV_CROSS_CONSTANT_ID_0
+#define SPIRV_CROSS_CONSTANT_ID_0 true
+#endif
+const bool TRUE = SPIRV_CROSS_CONSTANT_ID_0;
+#ifndef SPIRV_CROSS_CONSTANT_ID_1
+#define SPIRV_CROSS_CONSTANT_ID_1 false
+#endif
+const bool FALSE = SPIRV_CROSS_CONSTANT_ID_1;
+
+layout(binding = 0, std430) buffer SSBO_IAdd
+{
+    float val[_19];
+    float dummy;
+} IAdd;
+
+layout(binding = 1, std430) buffer SSBO_ISub
+{
+    float val[ISUB];
+    float dummy;
+} ISub;
+
+layout(binding = 2, std430) buffer SSBO_IMul
+{
+    float val[_37];
+    float dummy;
+} IMul;
+
+layout(binding = 3, std430) buffer SSBO_UDiv
+{
+    float val[UDIV];
+    float dummy;
+} UDiv;
+
+layout(binding = 4, std430) buffer SSBO_SDiv
+{
+    float val[_52];
+    float dummy;
+} SDiv;
+
+layout(binding = 5, std430) buffer SSBO_SRem
+{
+    float val[1];
+    float dummy;
+} SRem;
+
+layout(binding = 6, std430) buffer SSBO_SMod
+{
+    float val[_66];
+    float dummy;
+} SMod;
+
+layout(binding = 7, std430) buffer SSBO_UMod
+{
+    float val[_73];
+    float dummy;
+} UMod;
+
+layout(binding = 8, std430) buffer SSBO_LShl
+{
+    float val[_81];
+    float dummy;
+} LShl;
+
+layout(binding = 9, std430) buffer SSBO_RShl
+{
+    float val[_89];
+    float dummy;
+} RShl;
+
+layout(binding = 10, std430) buffer SSBO_RSha
+{
+    float val[_100];
+    float dummy;
+} RSha;
+
+layout(binding = 11, std430) buffer SSBO_IEq
+{
+    float val[_109];
+    float dummy;
+} IEq;
+
+layout(binding = 12, std430) buffer SSBO_INeq
+{
+    float val[_116];
+    float dummy;
+} INeq;
+
+layout(binding = 13, std430) buffer SSBO_Ult
+{
+    float val[_123];
+    float dummy;
+} Ult;
+
+layout(binding = 14, std430) buffer SSBO_Ule
+{
+    float val[_130];
+    float dummy;
+} Ule;
+
+layout(binding = 15, std430) buffer SSBO_Ugt
+{
+    float val[_137];
+    float dummy;
+} Ugt;
+
+layout(binding = 16, std430) buffer SSBO_Uge
+{
+    float val[_144];
+    float dummy;
+} Uge;
+
+layout(binding = 17, std430) buffer SSBO_Slt
+{
+    float val[_151];
+    float dummy;
+} Slt;
+
+layout(binding = 18, std430) buffer SSBO_Sle
+{
+    float val[_158];
+    float dummy;
+} Sle;
+
+layout(binding = 19, std430) buffer SSBO_Sgt
+{
+    float val[_165];
+    float dummy;
+} Sgt;
+
+layout(binding = 20, std430) buffer SSBO_Sge
+{
+    float val[_172];
+    float dummy;
+} Sge;
+
+layout(binding = 21, std430) buffer SSBO_Lor
+{
+    float val[_179];
+    float dummy;
+} Lor;
+
+layout(binding = 22, std430) buffer SSBO_Land
+{
+    float val[_186];
+    float dummy;
+} Land;
+
+layout(binding = 23, std430) buffer SSBO_Lnot
+{
+    float val[_193];
+    float dummy;
+} Lnot;
+
+layout(binding = 24, std430) buffer SSBO_And
+{
+    float val[_200];
+    float dummy;
+} And;
+
+layout(binding = 24, std430) buffer SSBO_Or
+{
+    float val[_208];
+    float dummy;
+} Or;
+
+layout(binding = 24, std430) buffer SSBO_Xor
+{
+    float val[_215];
+    float dummy;
+} Xor;
+
+layout(binding = 25, std430) buffer SSBO_Not
+{
+    float val[_223];
+    float dummy;
+} Not;
+
+layout(binding = 26, std430) buffer SSBO_Leq
+{
+    float val[_230];
+    float dummy;
+} Leq;
+
+layout(binding = 27, std430) buffer SSBO_Lneq
+{
+    float val[_237];
+    float dummy;
+} Lneq;
+
+layout(binding = 28, std430) buffer SSBO_Sel
+{
+    float val[SEL];
+    float dummy;
+} Sel;
+
+void main()
+{
+    IAdd.val[0] = 0.0;
+    ISub.val[0] = 0.0;
+    IMul.val[0] = 0.0;
+    UDiv.val[0] = 0.0;
+    SDiv.val[0] = 0.0;
+    SRem.val[0] = 0.0;
+    SMod.val[0] = 0.0;
+    UMod.val[0] = 0.0;
+    LShl.val[0] = 0.0;
+    RShl.val[0] = 0.0;
+    RSha.val[0] = 0.0;
+    IEq.val[0] = 0.0;
+    INeq.val[0] = 0.0;
+    Ult.val[0] = 0.0;
+    Ule.val[0] = 0.0;
+    Ugt.val[0] = 0.0;
+    Uge.val[0] = 0.0;
+    Slt.val[0] = 0.0;
+    Sle.val[0] = 0.0;
+    Sgt.val[0] = 0.0;
+    Sge.val[0] = 0.0;
+    Lor.val[0] = 0.0;
+    Land.val[0] = 0.0;
+    Lnot.val[0] = 0.0;
+    And.val[0] = 0.0;
+    Or.val[0] = 0.0;
+    Xor.val[0] = 0.0;
+    Not.val[0] = 0.0;
+    Leq.val[0] = 0.0;
+    Lneq.val[0] = 0.0;
+    Sel.val[0] = 0.0;
+}
+
diff --git a/reference/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk b/reference/shaders-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk
similarity index 84%
rename from reference/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk
rename to reference/shaders-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk
index d67e0beeb65..a037b301ca7 100644
--- a/reference/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk
+++ b/reference/shaders-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp.vk
@@ -44,48 +44,6 @@ struct Content
     S4 m3s[8];
 };
 
-struct S0_1
-{
-    vec2 a[1];
-    float b;
-};
-
-struct S1_1
-{
-    vec3 a;
-    float b;
-};
-
-struct S2_1
-{
-    vec3 a[1];
-    float b;
-};
-
-struct S3_1
-{
-    vec2 a;
-    float b;
-};
-
-struct S4_1
-{
-    vec2 c;
-};
-
-struct Content_1
-{
-    S0_1 m0s[1];
-    S1_1 m1s[1];
-    S2_1 m2s[1];
-    S0_1 m0;
-    S1_1 m1;
-    S2_1 m2;
-    S3_1 m3;
-    float m4;
-    S4_1 m3s[8];
-};
-
 layout(set = 0, binding = 1, scalar) restrict buffer SSBO1
 {
     Content content;
@@ -104,9 +62,9 @@ layout(set = 0, binding = 1, scalar) restrict buffer SSBO1
 
 layout(set = 0, binding = 0, std140) restrict buffer SSBO0
 {
-    Content_1 content;
-    Content_1 content1[2];
-    Content_1 content2;
+    Content content;
+    Content content1[2];
+    Content content2;
     mat2 m0;
     mat2 m1;
     mat2x3 m2[4];
diff --git a/reference/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk b/reference/shaders-no-opt/comp/subgroups.nocompat.invalid.vk.comp.vk
similarity index 92%
rename from reference/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk
rename to reference/shaders-no-opt/comp/subgroups.nocompat.invalid.vk.comp.vk
index 6d288574f74..f3fa6dd00c3 100644
--- a/reference/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp.vk
+++ b/reference/shaders-no-opt/comp/subgroups.nocompat.invalid.vk.comp.vk
@@ -62,6 +62,9 @@ void main()
     uvec4 anded = subgroupAnd(ballot_value);
     uvec4 ored = subgroupOr(ballot_value);
     uvec4 xored = subgroupXor(ballot_value);
+    bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42u)));
+    bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42u)));
+    bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42u)));
     added = subgroupInclusiveAdd(added);
     iadded = subgroupInclusiveAdd(iadded);
     multiplied = subgroupInclusiveMul(multiplied);
@@ -102,6 +105,9 @@ void main()
     anded = subgroupClusteredAnd(anded, 4u);
     ored = subgroupClusteredOr(ored, 4u);
     xored = subgroupClusteredXor(xored, 4u);
+    anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u);
+    ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u);
+    xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u);
     vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
     vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
     vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
diff --git a/reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp b/reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp
new file mode 100644
index 00000000000..00b3fa7e1fc
--- /dev/null
+++ b/reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp
@@ -0,0 +1,401 @@
+#version 450
+
+#if defined(GL_KHR_shader_subgroup_ballot)
+#extension GL_KHR_shader_subgroup_ballot : require
+#elif defined(GL_NV_shader_thread_group)
+#extension GL_NV_shader_thread_group : require
+#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
+#extension GL_ARB_shader_int64 : enable
+#extension GL_ARB_shader_ballot : require
+#else
+#error No extensions available to emulate requested subgroup feature.
+#endif
+
+#if defined(GL_KHR_shader_subgroup_basic)
+#extension GL_KHR_shader_subgroup_basic : require
+#elif defined(GL_NV_shader_thread_group)
+#extension GL_NV_shader_thread_group : require
+#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
+#extension GL_ARB_shader_int64 : enable
+#extension GL_ARB_shader_ballot : require
+#elif defined(GL_AMD_gcn_shader) && (defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))
+#extension GL_AMD_gpu_shader_int64 : enable
+#extension GL_NV_gpu_shader5 : enable
+#extension GL_AMD_gcn_shader : require
+#else
+#error No extensions available to emulate requested subgroup feature.
+#endif
+
+#if defined(GL_KHR_shader_subgroup_basic)
+#extension GL_KHR_shader_subgroup_basic : require
+#elif defined(GL_NV_shader_thread_group)
+#extension GL_NV_shader_thread_group : require
+#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
+#extension GL_ARB_shader_int64 : enable
+#extension GL_ARB_shader_ballot : require
+#else
+#error No extensions available to emulate requested subgroup feature.
+#endif
+
+#if defined(GL_KHR_shader_subgroup_basic)
+#extension GL_KHR_shader_subgroup_basic : require
+#elif defined(GL_NV_shader_thread_group)
+#extension GL_NV_shader_thread_group : require
+#else
+#error No extensions available to emulate requested subgroup feature.
+#endif
+
+#if defined(GL_KHR_shader_subgroup_basic)
+#extension GL_KHR_shader_subgroup_basic : require
+#elif defined(GL_NV_shader_thread_group)
+#extension GL_NV_shader_thread_group : require
+#else
+#error No extensions available to emulate requested subgroup feature.
+#endif
+
+#if defined(GL_KHR_shader_subgroup_ballot)
+#extension GL_KHR_shader_subgroup_ballot : require
+#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
+#extension GL_ARB_shader_int64 : enable
+#extension GL_ARB_shader_ballot : require
+#elif defined(GL_NV_shader_thread_shuffle)
+#extension GL_NV_shader_thread_shuffle : require
+#else
+#error No extensions available to emulate requested subgroup feature.
+#endif
+
+#if defined(GL_KHR_shader_subgroup_ballot)
+#extension GL_KHR_shader_subgroup_ballot : require
+#elif defined(GL_NV_shader_thread_group)
+#extension GL_NV_shader_thread_group : require
+#endif
+
+#if defined(GL_KHR_shader_subgroup_vote)
+#extension GL_KHR_shader_subgroup_vote : require
+#elif defined(GL_AMD_gcn_shader) && (defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))
+#extension GL_AMD_gpu_shader_int64 : enable
+#extension GL_NV_gpu_shader5 : enable
+#extension GL_AMD_gcn_shader : require
+#elif defined(GL_NV_gpu_shader_5)
+#extension GL_NV_gpu_shader_5 : require
+#elif defined(GL_ARB_shader_group_vote)
+#extension GL_ARB_shader_group_vote : require
+#else
+#error No extensions available to emulate requested subgroup feature.
+#endif
+
+#if defined(GL_KHR_shader_subgroup_basic)
+#extension GL_KHR_shader_subgroup_basic : require
+#elif defined(GL_NV_shader_thread_group)
+#extension GL_NV_shader_thread_group : require
+#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
+#extension GL_ARB_shader_int64 : enable
+#extension GL_ARB_shader_ballot : require
+#elif defined(GL_AMD_gcn_shader) && (defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))
+#extension GL_AMD_gpu_shader_int64 : enable
+#extension GL_NV_gpu_shader5 : enable
+#extension GL_AMD_gcn_shader : require
+#else
+#error No extensions available to emulate requested subgroup feature.
+#endif
+
+#if defined(GL_KHR_shader_subgroup_basic)
+#extension GL_KHR_shader_subgroup_basic : require
+#endif
+
+#if defined(GL_KHR_shader_subgroup_ballot)
+#extension GL_KHR_shader_subgroup_ballot : require
+#elif defined(GL_NV_shader_thread_group)
+#extension GL_NV_shader_thread_group : require
+#elif defined(GL_ARB_shader_ballot) && defined(GL_ARB_shader_int64)
+#extension GL_ARB_shader_int64 : enable
+#extension GL_ARB_shader_ballot : require
+#else
+#error No extensions available to emulate requested subgroup feature.
+#endif
+
+#if defined(GL_NV_shader_thread_group)
+#extension GL_NV_shader_thread_group : require
+#endif
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float FragColor;
+} _9;
+
+#if defined(GL_KHR_shader_subgroup_ballot)
+#elif defined(GL_NV_shader_thread_group)
+#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)
+#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)
+#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)
+#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)
+#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)
+#elif defined(GL_ARB_shader_ballot)
+#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)
+#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)
+#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)
+#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)
+#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)
+#endif
+
+#if defined(GL_KHR_shader_subgroup_basic)
+#elif defined(GL_NV_shader_thread_group)
+#define gl_SubgroupSize gl_WarpSizeNV
+#elif defined(GL_ARB_shader_ballot)
+#define gl_SubgroupSize gl_SubGroupSizeARB
+#elif defined(GL_AMD_gcn_shader)
+#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)
+#endif
+
+#if defined(GL_KHR_shader_subgroup_basic)
+#elif defined(GL_NV_shader_thread_group)
+#define gl_SubgroupInvocationID gl_ThreadInWarpNV
+#elif defined(GL_ARB_shader_ballot)
+#define gl_SubgroupInvocationID gl_SubGroupInvocationARB
+#endif
+
+#if defined(GL_KHR_shader_subgroup_basic)
+#elif defined(GL_NV_shader_thread_group)
+#define gl_SubgroupID gl_WarpIDNV
+#endif
+
+#if defined(GL_KHR_shader_subgroup_basic)
+#elif defined(GL_NV_shader_thread_group)
+#define gl_NumSubgroups gl_WarpsPerSMNV
+#endif
+
+#if defined(GL_KHR_shader_subgroup_ballot)
+#elif defined(GL_ARB_shader_ballot)
+int subgroupBroadcastFirst(int value) { return readFirstInvocationARB(value); }
+ivec2 subgroupBroadcastFirst(ivec2 value) { return readFirstInvocationARB(value); }
+ivec3 subgroupBroadcastFirst(ivec3 value) { return readFirstInvocationARB(value); }
+ivec4 subgroupBroadcastFirst(ivec4 value) { return readFirstInvocationARB(value); }
+uint subgroupBroadcastFirst(uint value) { return readFirstInvocationARB(value); }
+uvec2 subgroupBroadcastFirst(uvec2 value) { return readFirstInvocationARB(value); }
+uvec3 subgroupBroadcastFirst(uvec3 value) { return readFirstInvocationARB(value); }
+uvec4 subgroupBroadcastFirst(uvec4 value) { return readFirstInvocationARB(value); }
+float subgroupBroadcastFirst(float value) { return readFirstInvocationARB(value); }
+vec2 subgroupBroadcastFirst(vec2 value) { return readFirstInvocationARB(value); }
+vec3 subgroupBroadcastFirst(vec3 value) { return readFirstInvocationARB(value); }
+vec4 subgroupBroadcastFirst(vec4 value) { return readFirstInvocationARB(value); }
+double subgroupBroadcastFirst(double value) { return readFirstInvocationARB(value); }
+dvec2 subgroupBroadcastFirst(dvec2 value) { return readFirstInvocationARB(value); }
+dvec3 subgroupBroadcastFirst(dvec3 value) { return readFirstInvocationARB(value); }
+dvec4 subgroupBroadcastFirst(dvec4 value) { return readFirstInvocationARB(value); }
+int subgroupBroadcast(int value, uint id) { return readInvocationARB(value, id); }
+ivec2 subgroupBroadcast(ivec2 value, uint id) { return readInvocationARB(value, id); }
+ivec3 subgroupBroadcast(ivec3 value, uint id) { return readInvocationARB(value, id); }
+ivec4 subgroupBroadcast(ivec4 value, uint id) { return readInvocationARB(value, id); }
+uint subgroupBroadcast(uint value, uint id) { return readInvocationARB(value, id); }
+uvec2 subgroupBroadcast(uvec2 value, uint id) { return readInvocationARB(value, id); }
+uvec3 subgroupBroadcast(uvec3 value, uint id) { return readInvocationARB(value, id); }
+uvec4 subgroupBroadcast(uvec4 value, uint id) { return readInvocationARB(value, id); }
+float subgroupBroadcast(float value, uint id) { return readInvocationARB(value, id); }
+vec2 subgroupBroadcast(vec2 value, uint id) { return readInvocationARB(value, id); }
+vec3 subgroupBroadcast(vec3 value, uint id) { return readInvocationARB(value, id); }
+vec4 subgroupBroadcast(vec4 value, uint id) { return readInvocationARB(value, id); }
+double subgroupBroadcast(double value, uint id) { return readInvocationARB(value, id); }
+dvec2 subgroupBroadcast(dvec2 value, uint id) { return readInvocationARB(value, id); }
+dvec3 subgroupBroadcast(dvec3 value, uint id) { return readInvocationARB(value, id); }
+dvec4 subgroupBroadcast(dvec4 value, uint id) { return readInvocationARB(value, id); }
+#elif defined(GL_NV_shader_thread_shuffle)
+int subgroupBroadcastFirst(int value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+ivec2 subgroupBroadcastFirst(ivec2 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+ivec3 subgroupBroadcastFirst(ivec3 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+ivec4 subgroupBroadcastFirst(ivec4 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+uint subgroupBroadcastFirst(uint value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+uvec2 subgroupBroadcastFirst(uvec2 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+uvec3 subgroupBroadcastFirst(uvec3 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+uvec4 subgroupBroadcastFirst(uvec4 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+float subgroupBroadcastFirst(float value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+vec2 subgroupBroadcastFirst(vec2 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+vec3 subgroupBroadcastFirst(vec3 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+vec4 subgroupBroadcastFirst(vec4 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+double subgroupBroadcastFirst(double value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+dvec2 subgroupBroadcastFirst(dvec2 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+dvec3 subgroupBroadcastFirst(dvec3 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+dvec4 subgroupBroadcastFirst(dvec4 value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }
+int subgroupBroadcast(int value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+ivec2 subgroupBroadcast(ivec2 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+ivec3 subgroupBroadcast(ivec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+ivec4 subgroupBroadcast(ivec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+uint subgroupBroadcast(uint value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+uvec2 subgroupBroadcast(uvec2 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+uvec3 subgroupBroadcast(uvec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+uvec4 subgroupBroadcast(uvec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+float subgroupBroadcast(float value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+vec2 subgroupBroadcast(vec2 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+vec3 subgroupBroadcast(vec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+vec4 subgroupBroadcast(vec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+double subgroupBroadcast(double value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+dvec2 subgroupBroadcast(dvec2 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+dvec3 subgroupBroadcast(dvec3 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+dvec4 subgroupBroadcast(dvec4 value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }
+#endif
+
+#if defined(GL_KHR_shader_subgroup_ballot)
+#elif defined(GL_NV_shader_thread_group)
+uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }
+uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }
+#else
+uint subgroupBallotFindLSB(uvec4 value)
+{
+    int firstLive = findLSB(value.x);
+    return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));
+}
+uint subgroupBallotFindMSB(uvec4 value)
+{
+    int firstLive = findMSB(value.y);
+    return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));
+}
+#endif
+
+#if defined(GL_KHR_shader_subgroup_vote)
+#elif defined(GL_AMD_gcn_shader)
+bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }
+bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }
+bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || b == ballotAMD(true); }
+#elif defined(GL_NV_gpu_shader_5)
+bool subgroupAll(bool value) { return allThreadsNV(value); }
+bool subgroupAny(bool value) { return anyThreadNV(value); }
+bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }
+#elif defined(GL_ARB_shader_group_vote)
+bool subgroupAll(bool v) { return allInvocationsARB(v); }
+bool subgroupAny(bool v) { return anyInvocationARB(v); }
+bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }
+#endif
+
+#ifndef GL_KHR_shader_subgroup_vote
+#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return subgroupAllEqual(subgroupBroadcastFirst(value) == value); }
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(int)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(ivec2)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(ivec3)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(ivec4)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(uint)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(uvec2)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(uvec3)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(uvec4)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(float)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(vec2)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(vec3)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(vec4)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(double)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(dvec2)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(dvec3)
+_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(dvec4)
+#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND
+#endif
+
+#if defined(GL_KHR_shader_subgroup_ballot)
+#elif defined(GL_NV_shader_thread_group)
+uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }
+#elif defined(GL_ARB_shader_ballot)
+uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }
+#endif
+
+#ifndef GL_KHR_shader_subgroup_basic
+bool subgroupElect()
+{
+    uvec4 activeMask = subgroupBallot(true);
+    uint firstLive = subgroupBallotFindLSB(activeMask);
+    return gl_SubgroupInvocationID == firstLive;
+}
+#endif
+
+#ifndef GL_KHR_shader_subgroup_basic
+void subgroupBarrier() { memoryBarrierShared(); }
+#endif
+
+#ifndef GL_KHR_shader_subgroup_basic
+void subgroupMemoryBarrier() { groupMemoryBarrier(); }
+void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }
+void subgroupMemoryBarrierShared() { memoryBarrierShared(); }
+void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }
+#endif
+
+#ifndef GL_KHR_shader_subgroup_ballot
+bool subgroupInverseBallot(uvec4 value)
+{
+    return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));
+}
+uint subgroupBallotInclusiveBitCount(uvec4 value)
+{
+    uvec2 v = value.xy & gl_SubgroupLeMask.xy;
+    ivec2 c = bitCount(v);
+#ifdef GL_NV_shader_thread_group
+    return uint(c.x);
+#else
+    return uint(c.x + c.y);
+#endif
+}
+uint subgroupBallotExclusiveBitCount(uvec4 value)
+{
+    uvec2 v = value.xy & gl_SubgroupLtMask.xy;
+    ivec2 c = bitCount(v);
+#ifdef GL_NV_shader_thread_group
+    return uint(c.x);
+#else
+    return uint(c.x + c.y);
+#endif
+}
+#endif
+
+#ifndef GL_KHR_shader_subgroup_ballot
+uint subgroupBallotBitCount(uvec4 value)
+{
+    ivec2 c = bitCount(value.xy);
+#ifdef GL_NV_shader_thread_group
+    return uint(c.x);
+#else
+    return uint(c.x + c.y);
+#endif
+}
+#endif
+
+#ifndef GL_KHR_shader_subgroup_ballot
+bool subgroupBallotBitExtract(uvec4 value, uint index)
+{
+#ifdef GL_NV_shader_thread_group
+    uint shifted = value.x >> index;
+#else
+    uint shifted = value[index >> 5u] >> (index & 0x1fu);
+#endif
+    return (shifted & 1u) != 0u;
+}
+#endif
+
+void main()
+{
+    _9.FragColor = float(gl_NumSubgroups);
+    _9.FragColor = float(gl_SubgroupID);
+    _9.FragColor = float(gl_SubgroupSize);
+    _9.FragColor = float(gl_SubgroupInvocationID);
+    subgroupMemoryBarrier();
+    subgroupBarrier();
+    subgroupMemoryBarrier();
+    subgroupMemoryBarrierBuffer();
+    subgroupMemoryBarrierShared();
+    subgroupMemoryBarrierImage();
+    bool elected = subgroupElect();
+    _9.FragColor = vec4(gl_SubgroupEqMask).x;
+    _9.FragColor = vec4(gl_SubgroupGeMask).x;
+    _9.FragColor = vec4(gl_SubgroupGtMask).x;
+    _9.FragColor = vec4(gl_SubgroupLeMask).x;
+    _9.FragColor = vec4(gl_SubgroupLtMask).x;
+    vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+    vec3 first = subgroupBroadcastFirst(vec3(20.0));
+    uvec4 ballot_value = subgroupBallot(true);
+    bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
+    bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
+    uint bit_count = subgroupBallotBitCount(ballot_value);
+    uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
+    uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
+    uint lsb = subgroupBallotFindLSB(ballot_value);
+    uint msb = subgroupBallotFindMSB(ballot_value);
+    bool has_all = subgroupAll(true);
+    bool has_any = subgroupAny(true);
+    bool has_equal_bool = subgroupAllEqual(true);
+    bool has_equal_T = subgroupAllEqual(uvec3(5u));
+}
+
diff --git a/reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp.vk b/reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp.vk
new file mode 100644
index 00000000000..61aa2f7a561
--- /dev/null
+++ b/reference/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp.vk
@@ -0,0 +1,45 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_vote : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO
+{
+    float FragColor;
+} _9;
+
+void main()
+{
+    _9.FragColor = float(gl_NumSubgroups);
+    _9.FragColor = float(gl_SubgroupID);
+    _9.FragColor = float(gl_SubgroupSize);
+    _9.FragColor = float(gl_SubgroupInvocationID);
+    subgroupMemoryBarrier();
+    subgroupBarrier();
+    subgroupMemoryBarrier();
+    subgroupMemoryBarrierBuffer();
+    subgroupMemoryBarrierShared();
+    subgroupMemoryBarrierImage();
+    bool elected = subgroupElect();
+    _9.FragColor = vec4(gl_SubgroupEqMask).x;
+    _9.FragColor = vec4(gl_SubgroupGeMask).x;
+    _9.FragColor = vec4(gl_SubgroupGtMask).x;
+    _9.FragColor = vec4(gl_SubgroupLeMask).x;
+    _9.FragColor = vec4(gl_SubgroupLtMask).x;
+    vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+    vec3 first = subgroupBroadcastFirst(vec3(20.0));
+    uvec4 ballot_value = subgroupBallot(true);
+    bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
+    bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
+    uint bit_count = subgroupBallotBitCount(ballot_value);
+    uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
+    uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
+    uint lsb = subgroupBallotFindLSB(ballot_value);
+    uint msb = subgroupBallotFindMSB(ballot_value);
+    bool has_all = subgroupAll(true);
+    bool has_any = subgroupAny(true);
+    bool has_equal_bool = subgroupAllEqual(true);
+    bool has_equal_T = subgroupAllEqual(uvec3(5u));
+}
+
diff --git a/reference/shaders-no-opt/comp/trivial-select-cast-vector.comp b/reference/shaders-no-opt/comp/trivial-select-cast-vector.comp
new file mode 100644
index 00000000000..92573ffdc86
--- /dev/null
+++ b/reference/shaders-no-opt/comp/trivial-select-cast-vector.comp
@@ -0,0 +1,15 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer A
+{
+    vec3 a;
+    vec3 b;
+} _14;
+
+void main()
+{
+    bvec3 c = lessThan(_14.b, vec3(1.0));
+    _14.a = mix(vec3(1.0, 0.0, 0.0), vec3(0.0, 0.0, 1.0), c);
+}
+
diff --git a/reference/shaders-no-opt/comp/trivial-select-matrix.spv14.comp b/reference/shaders-no-opt/comp/trivial-select-matrix.spv14.comp
new file mode 100644
index 00000000000..dd227e89d68
--- /dev/null
+++ b/reference/shaders-no-opt/comp/trivial-select-matrix.spv14.comp
@@ -0,0 +1,16 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer A
+{
+    mat3 a;
+    float b;
+} _14;
+
+void main()
+{
+    bool c = _14.b < 1.0;
+    _14.a = c ? mat3(vec3(1.0), vec3(1.0), vec3(1.0)) : mat3(vec3(0.0), vec3(0.0), vec3(0.0));
+    _14.a = c ? mat3(vec3(1.0, 0.0, 0.0), vec3(0.0, 1.0, 0.0), vec3(0.0, 0.0, 1.0)) : mat3(vec3(0.0), vec3(0.0), vec3(0.0));
+}
+
diff --git a/reference/shaders/frag/16bit-constants.frag b/reference/shaders-no-opt/frag/16bit-constants.invalid.frag
similarity index 68%
rename from reference/shaders/frag/16bit-constants.frag
rename to reference/shaders-no-opt/frag/16bit-constants.invalid.frag
index 57d8256138b..a5c0a6a17cd 100644
--- a/reference/shaders/frag/16bit-constants.frag
+++ b/reference/shaders-no-opt/frag/16bit-constants.invalid.frag
@@ -6,8 +6,12 @@
 #else
 #error No extension available for FP16.
 #endif
-#if defined(GL_AMD_gpu_shader_int16)
+#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
+#elif defined(GL_AMD_gpu_shader_int16)
 #extension GL_AMD_gpu_shader_int16 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
 #else
 #error No extension available for Int16.
 #endif
diff --git a/reference/opt/shaders/desktop-only/frag/fp16.invalid.desktop.frag b/reference/shaders-no-opt/frag/fp16.invalid.desktop.frag
similarity index 100%
rename from reference/opt/shaders/desktop-only/frag/fp16.invalid.desktop.frag
rename to reference/shaders-no-opt/frag/fp16.invalid.desktop.frag
diff --git a/reference/shaders-no-opt/frag/frag-fully-covered.frag b/reference/shaders-no-opt/frag/frag-fully-covered.frag
new file mode 100644
index 00000000000..0f22a7de2a3
--- /dev/null
+++ b/reference/shaders-no-opt/frag/frag-fully-covered.frag
@@ -0,0 +1,14 @@
+#version 450
+#extension GL_NV_conservative_raster_underestimation : require
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    if (!gl_FragFullyCoveredNV)
+    {
+        discard;
+    }
+    FragColor = vec4(1.0);
+}
+
diff --git a/reference/opt/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk b/reference/shaders-no-opt/frag/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk
similarity index 100%
rename from reference/opt/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk
rename to reference/shaders-no-opt/frag/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk
diff --git a/reference/shaders/amd/fs.invalid.frag b/reference/shaders-no-opt/frag/fs.invalid.frag
similarity index 100%
rename from reference/shaders/amd/fs.invalid.frag
rename to reference/shaders-no-opt/frag/fs.invalid.frag
index aecf69eba7a..8548a8733f1 100644
--- a/reference/shaders/amd/fs.invalid.frag
+++ b/reference/shaders-no-opt/frag/fs.invalid.frag
@@ -1,6 +1,6 @@
 #version 450
-#extension GL_AMD_shader_fragment_mask : require
 #extension GL_AMD_shader_explicit_vertex_parameter : require
+#extension GL_AMD_shader_fragment_mask : require
 
 layout(binding = 0) uniform sampler2DMS texture1;
 
diff --git a/reference/shaders-no-opt/frag/image-gather.frag b/reference/shaders-no-opt/frag/image-gather.frag
new file mode 100644
index 00000000000..1baccdfa534
--- /dev/null
+++ b/reference/shaders-no-opt/frag/image-gather.frag
@@ -0,0 +1,15 @@
+#version 450
+
+layout(binding = 0) uniform sampler2D uSamp;
+layout(binding = 1) uniform sampler2DShadow uSampShadow;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec3 vUV;
+
+void main()
+{
+    FragColor = textureGather(uSamp, vUV.xy);
+    FragColor += textureGather(uSamp, vUV.xy, 1);
+    FragColor += textureGather(uSampShadow, vUV.xy, vUV.z);
+}
+
diff --git a/reference/shaders-no-opt/frag/modf-non-function-purity-analysis.frag b/reference/shaders-no-opt/frag/modf-non-function-purity-analysis.frag
new file mode 100644
index 00000000000..3a4e0866439
--- /dev/null
+++ b/reference/shaders-no-opt/frag/modf-non-function-purity-analysis.frag
@@ -0,0 +1,18 @@
+#version 450
+
+layout(location = 0) in vec4 v;
+layout(location = 1) out vec4 vo1;
+layout(location = 0) out vec4 vo0;
+
+vec4 modf_inner()
+{
+    vec4 _16 = modf(v, vo1);
+    return _16;
+}
+
+void main()
+{
+    vec4 _20 = modf_inner();
+    vo0 = _20;
+}
+
diff --git a/reference/opt/shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag b/reference/shaders-no-opt/frag/multi-dimensional.desktop.invalid.flatten_dim.frag
similarity index 100%
rename from reference/opt/shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag
rename to reference/shaders-no-opt/frag/multi-dimensional.desktop.invalid.flatten_dim.frag
diff --git a/reference/shaders-no-opt/frag/nonuniform-constructor.vk.nocompat.frag.vk b/reference/shaders-no-opt/frag/nonuniform-constructor.vk.nocompat.frag.vk
new file mode 100644
index 00000000000..ab58862ffd4
--- /dev/null
+++ b/reference/shaders-no-opt/frag/nonuniform-constructor.vk.nocompat.frag.vk
@@ -0,0 +1,15 @@
+#version 450
+#extension GL_EXT_nonuniform_qualifier : require
+
+layout(set = 0, binding = 0) uniform texture2D uTex[];
+layout(set = 1, binding = 0) uniform sampler Immut;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 1) flat in int vIndex;
+layout(location = 0) in vec2 vUV;
+
+void main()
+{
+    FragColor = texture(nonuniformEXT(sampler2D(uTex[vIndex], Immut)), vUV);
+}
+
diff --git a/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag b/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag
new file mode 100644
index 00000000000..a4a962e163d
--- /dev/null
+++ b/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag
@@ -0,0 +1,46 @@
+#version 450
+#ifdef GL_ARB_fragment_shader_interlock
+#extension GL_ARB_fragment_shader_interlock : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
+#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
+#elif defined(GL_INTEL_fragment_shader_ordering)
+#extension GL_INTEL_fragment_shader_ordering : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
+#define SPIRV_Cross_endInvocationInterlock()
+#endif
+#if defined(GL_ARB_fragment_shader_interlock)
+layout(pixel_interlock_ordered) in;
+#elif !defined(GL_INTEL_fragment_shader_ordering)
+#error Fragment Shader Interlock/Ordering extension missing!
+#endif
+
+layout(binding = 1, std430) buffer SSBO1
+{
+    uint values1[];
+} _14;
+
+layout(binding = 0, std430) buffer SSBO0
+{
+    uint values0[];
+} _35;
+
+void callee2()
+{
+    int _25 = int(gl_FragCoord.x);
+    _14.values1[_25]++;
+}
+
+void callee()
+{
+    int _38 = int(gl_FragCoord.x);
+    _35.values0[_38]++;
+    callee2();
+}
+
+void main()
+{
+    SPIRV_Cross_beginInvocationInterlock();
+    callee();
+    SPIRV_Cross_endInvocationInterlock();
+}
+
diff --git a/reference/opt/shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk b/reference/shaders-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk
similarity index 100%
rename from reference/opt/shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk
rename to reference/shaders-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk
diff --git a/reference/shaders-no-opt/frag/sparse-texture-clamp.desktop.frag b/reference/shaders-no-opt/frag/sparse-texture-clamp.desktop.frag
new file mode 100644
index 00000000000..df0daa79de3
--- /dev/null
+++ b/reference/shaders-no-opt/frag/sparse-texture-clamp.desktop.frag
@@ -0,0 +1,46 @@
+#version 450
+#extension GL_ARB_sparse_texture2 : require
+#extension GL_ARB_sparse_texture_clamp : require
+
+struct ResType
+{
+    int _m0;
+    vec4 _m1;
+};
+
+layout(binding = 0) uniform sampler2D uSamp;
+
+layout(location = 0) in vec2 vUV;
+
+void main()
+{
+    int _66;
+    vec4 _67;
+    _66 = sparseTextureClampARB(uSamp, vUV, 1.0, _67, 2.0);
+    ResType _25 = ResType(_66, _67);
+    vec4 texel = _25._m1;
+    int code = _25._m0;
+    texel = textureClampARB(uSamp, vUV, 1.0, 2.0);
+    int _68;
+    vec4 _69;
+    _68 = sparseTextureOffsetClampARB(uSamp, vUV, ivec2(1, 2), 1.0, _69, 2.0);
+    ResType _37 = ResType(_68, _69);
+    texel = _37._m1;
+    code = _37._m0;
+    texel = textureOffsetClampARB(uSamp, vUV, ivec2(1, 2), 1.0, 2.0);
+    int _70;
+    vec4 _71;
+    _70 = sparseTextureGradClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), 1.0, _71);
+    ResType _47 = ResType(_70, _71);
+    texel = _47._m1;
+    code = _47._m0;
+    texel = textureGradClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), 1.0);
+    int _72;
+    vec4 _73;
+    _72 = sparseTextureGradOffsetClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), ivec2(-1, -2), 1.0, _73);
+    ResType _58 = ResType(_72, _73);
+    texel = _58._m1;
+    code = _58._m0;
+    texel = textureGradOffsetClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), ivec2(-1, -2), 1.0);
+}
+
diff --git a/reference/shaders-no-opt/frag/sparse-texture-feedback.desktop.frag b/reference/shaders-no-opt/frag/sparse-texture-feedback.desktop.frag
new file mode 100644
index 00000000000..7faa226430b
--- /dev/null
+++ b/reference/shaders-no-opt/frag/sparse-texture-feedback.desktop.frag
@@ -0,0 +1,105 @@
+#version 450
+#extension GL_ARB_sparse_texture2 : require
+#extension GL_ARB_sparse_texture_clamp : require
+
+struct ResType
+{
+    int _m0;
+    vec4 _m1;
+};
+
+layout(binding = 0) uniform sampler2D uSamp;
+layout(binding = 1) uniform sampler2DMS uSampMS;
+layout(binding = 2, rgba8) uniform readonly image2D uImage;
+layout(binding = 3, rgba8) uniform readonly image2DMS uImageMS;
+
+layout(location = 0) in vec2 vUV;
+
+void main()
+{
+    int _144;
+    vec4 _145;
+    _144 = sparseTextureARB(uSamp, vUV, _145);
+    ResType _24 = ResType(_144, _145);
+    vec4 texel = _24._m1;
+    bool ret = sparseTexelsResidentARB(_24._m0);
+    int _146;
+    vec4 _147;
+    _146 = sparseTextureARB(uSamp, vUV, _147, 1.10000002384185791015625);
+    ResType _31 = ResType(_146, _147);
+    texel = _31._m1;
+    ret = sparseTexelsResidentARB(_31._m0);
+    int _148;
+    vec4 _149;
+    _148 = sparseTextureLodARB(uSamp, vUV, 1.0, _149);
+    ResType _38 = ResType(_148, _149);
+    texel = _38._m1;
+    ret = sparseTexelsResidentARB(_38._m0);
+    int _150;
+    vec4 _151;
+    _150 = sparseTextureOffsetARB(uSamp, vUV, ivec2(1), _151);
+    ResType _47 = ResType(_150, _151);
+    texel = _47._m1;
+    ret = sparseTexelsResidentARB(_47._m0);
+    int _152;
+    vec4 _153;
+    _152 = sparseTextureOffsetARB(uSamp, vUV, ivec2(2), _153, 0.5);
+    ResType _56 = ResType(_152, _153);
+    texel = _56._m1;
+    ret = sparseTexelsResidentARB(_56._m0);
+    int _154;
+    vec4 _155;
+    _154 = sparseTexelFetchARB(uSamp, ivec2(vUV), 1, _155);
+    ResType _64 = ResType(_154, _155);
+    texel = _64._m1;
+    ret = sparseTexelsResidentARB(_64._m0);
+    int _156;
+    vec4 _157;
+    _156 = sparseTexelFetchARB(uSampMS, ivec2(vUV), 2, _157);
+    ResType _76 = ResType(_156, _157);
+    texel = _76._m1;
+    ret = sparseTexelsResidentARB(_76._m0);
+    int _158;
+    vec4 _159;
+    _158 = sparseTexelFetchOffsetARB(uSamp, ivec2(vUV), 1, ivec2(2, 3), _159);
+    ResType _86 = ResType(_158, _159);
+    texel = _86._m1;
+    ret = sparseTexelsResidentARB(_86._m0);
+    int _160;
+    vec4 _161;
+    _160 = sparseTextureLodOffsetARB(uSamp, vUV, 1.5, ivec2(2, 3), _161);
+    ResType _93 = ResType(_160, _161);
+    texel = _93._m1;
+    ret = sparseTexelsResidentARB(_93._m0);
+    int _162;
+    vec4 _163;
+    _162 = sparseTextureGradARB(uSamp, vUV, vec2(1.0), vec2(3.0), _163);
+    ResType _102 = ResType(_162, _163);
+    texel = _102._m1;
+    ret = sparseTexelsResidentARB(_102._m0);
+    int _164;
+    vec4 _165;
+    _164 = sparseTextureGradOffsetARB(uSamp, vUV, vec2(1.0), vec2(3.0), ivec2(-2, -3), _165);
+    ResType _111 = ResType(_164, _165);
+    texel = _111._m1;
+    ret = sparseTexelsResidentARB(_111._m0);
+    int _166;
+    vec4 _167;
+    _166 = sparseTextureClampARB(uSamp, vUV, 4.0, _167);
+    ResType _118 = ResType(_166, _167);
+    texel = _118._m1;
+    ret = sparseTexelsResidentARB(_118._m0);
+    int _168;
+    vec4 _169;
+    _168 = sparseImageLoadARB(uImage, ivec2(vUV), _169);
+    ResType _128 = ResType(_168, _169);
+    texel = _128._m1;
+    ret = sparseTexelsResidentARB(_128._m0);
+    int _170;
+    vec4 _171;
+    _170 = sparseImageLoadARB(uImageMS, ivec2(vUV), 1, _171);
+    ResType _138 = ResType(_170, _171);
+    texel = _138._m1;
+    ret = sparseTexelsResidentARB(_138._m0);
+}
+
diff --git a/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.frag b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.frag
new file mode 100644
index 00000000000..00a146c4d36
--- /dev/null
+++ b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.frag
@@ -0,0 +1,18 @@
+#version 310 es
+#extension GL_EXT_shader_framebuffer_fetch : require
+precision mediump float;
+precision highp int;
+
+mediump vec4 uSubpass0;
+mediump vec4 uSubpass1;
+
+layout(location = 0) inout vec3 FragColor;
+layout(location = 1) inout vec4 FragColor2;
+
+void main()
+{
+    uSubpass0.xyz = FragColor;
+    uSubpass1 = FragColor2;
+    FragColor = uSubpass0.xyz + uSubpass1.xyz;
+}
+
diff --git a/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.framebuffer-fetch-noncoherent.frag b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.framebuffer-fetch-noncoherent.frag
new file mode 100644
index 00000000000..8600549859e
--- /dev/null
+++ b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.framebuffer-fetch-noncoherent.frag
@@ -0,0 +1,18 @@
+#version 310 es
+#extension GL_EXT_shader_framebuffer_fetch_non_coherent : require
+precision mediump float;
+precision highp int;
+
+mediump vec4 uSubpass0;
+mediump vec4 uSubpass1;
+
+layout(location = 0, noncoherent) inout vec3 FragColor;
+layout(location = 1, noncoherent) inout vec4 FragColor2;
+
+void main()
+{
+    uSubpass0.xyz = FragColor;
+    uSubpass1 = FragColor2;
+    FragColor = uSubpass0.xyz + uSubpass1.xyz;
+}
+
diff --git a/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.frag b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.frag
new file mode 100644
index 00000000000..d1b72651215
--- /dev/null
+++ b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.frag
@@ -0,0 +1,16 @@
+#version 100
+#extension GL_EXT_shader_framebuffer_fetch : require
+#extension GL_EXT_draw_buffers : require
+precision mediump float;
+precision highp int;
+
+mediump vec4 uSubpass0;
+mediump vec4 uSubpass1;
+
+void main()
+{
+    uSubpass0 = gl_LastFragData[0];
+    uSubpass1 = gl_LastFragData[1];
+    gl_FragData[0] = uSubpass0.xyz + uSubpass1.xyz;
+}
+
diff --git a/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.framebuffer-fetch-noncoherent.frag b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.framebuffer-fetch-noncoherent.frag
new file mode 100644
index 00000000000..c0a40571b05
--- /dev/null
+++ b/reference/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.framebuffer-fetch-noncoherent.frag
@@ -0,0 +1,16 @@
+#version 100
+#extension GL_EXT_shader_framebuffer_fetch_non_coherent : require
+#extension GL_EXT_draw_buffers : require
+precision mediump float;
+precision highp int;
+
+mediump vec4 uSubpass0;
+mediump vec4 uSubpass1;
+
+void main()
+{
+    uSubpass0 = gl_LastFragData[0];
+    uSubpass1 = gl_LastFragData[1];
+    gl_FragData[0] = uSubpass0.xyz + uSubpass1.xyz;
+}
+
diff --git a/reference/shaders-no-opt/frag/texture-gather-offsets.frag b/reference/shaders-no-opt/frag/texture-gather-offsets.frag
new file mode 100644
index 00000000000..36409dd3c34
--- /dev/null
+++ b/reference/shaders-no-opt/frag/texture-gather-offsets.frag
@@ -0,0 +1,12 @@
+#version 460
+
+layout(binding = 0) uniform sampler2D Image0;
+
+layout(location = 0) out vec4 outColor;
+layout(location = 0) in vec2 inUv;
+
+void main()
+{
+    outColor = textureGatherOffsets(Image0, inUv, ivec2[](ivec2(0), ivec2(1, 0), ivec2(1), ivec2(0, 1)));
+}
+
diff --git a/reference/shaders-no-opt/frag/texture-gather-uint-component.asm.frag b/reference/shaders-no-opt/frag/texture-gather-uint-component.asm.frag
new file mode 100644
index 00000000000..66dcb369cc7
--- /dev/null
+++ b/reference/shaders-no-opt/frag/texture-gather-uint-component.asm.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(binding = 0) uniform sampler2D uSamp;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec2 vUV;
+
+void main()
+{
+    FragColor = textureGather(uSamp, vUV, int(1u));
+}
+
diff --git a/reference/shaders-no-opt/frag/texture1d-emulation.es.frag b/reference/shaders-no-opt/frag/texture1d-emulation.es.frag
new file mode 100644
index 00000000000..71efb7bcc65
--- /dev/null
+++ b/reference/shaders-no-opt/frag/texture1d-emulation.es.frag
@@ -0,0 +1,30 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(binding = 0) uniform highp sampler2D uSamp;
+layout(binding = 1) uniform highp sampler2DShadow uSampShadow;
+layout(binding = 2) uniform highp sampler2DArray uSampArray;
+layout(binding = 3) uniform highp sampler2DArrayShadow uSampArrayShadow;
+layout(binding = 4, r32f) uniform highp image2D uImage;
+
+layout(location = 0) out highp vec4 FragColor;
+layout(location = 0) in highp vec4 vUV;
+
+void main()
+{
+    FragColor = texture(uSamp, vec2(vUV.x, 0.0));
+    FragColor += textureProj(uSamp, vec3(vUV.xy.x, 0.0, vUV.xy.y));
+    FragColor += texelFetch(uSamp, ivec2(int(vUV.x), 0), 0);
+    FragColor += vec4(texture(uSampShadow, vec3(vUV.xyz.x, 0.0, vUV.xyz.z)));
+    highp vec4 _54 = vUV;
+    highp vec4 _57 = _54;
+    _57.y = _54.w;
+    FragColor += vec4(textureProj(uSampShadow, vec4(_57.x, 0.0, _54.z, _57.y)));
+    FragColor = texture(uSampArray, vec3(vUV.xy.x, 0.0, vUV.xy.y));
+    FragColor += texelFetch(uSampArray, ivec3(ivec2(vUV.xy).x, 0, ivec2(vUV.xy).y), 0);
+    FragColor += vec4(texture(uSampArrayShadow, vec4(vUV.xyz.xy.x, 0.0, vUV.xyz.xy.y, vUV.xyz.z)));
+    FragColor += imageLoad(uImage, ivec2(int(vUV.x), 0));
+    imageStore(uImage, ivec2(int(vUV.x), 0), FragColor);
+}
+
diff --git a/reference/shaders-no-opt/frag/texture1d-emulation.legacy.frag b/reference/shaders-no-opt/frag/texture1d-emulation.legacy.frag
new file mode 100644
index 00000000000..e6a14ed30cb
--- /dev/null
+++ b/reference/shaders-no-opt/frag/texture1d-emulation.legacy.frag
@@ -0,0 +1,21 @@
+#version 100
+#extension GL_EXT_shadow_samplers : require
+precision mediump float;
+precision highp int;
+
+uniform highp sampler2D uSamp;
+uniform highp sampler2DShadow uSampShadow;
+
+varying highp vec4 vUV;
+
+void main()
+{
+    gl_FragData[0] = texture2D(uSamp, vec2(vUV.x, 0.0));
+    gl_FragData[0] += texture2DProj(uSamp, vec3(vUV.xy.x, 0.0, vUV.xy.y));
+    gl_FragData[0] += vec4(shadow2DEXT(uSampShadow, vec3(vUV.xyz.x, 0.0, vUV.xyz.z)));
+    highp vec4 _44 = vUV;
+    highp vec4 _47 = _44;
+    _47.y = _44.w;
+    gl_FragData[0] += vec4(shadow2DProjEXT(uSampShadow, vec4(_47.x, 0.0, _44.z, _47.y)));
+}
+
diff --git a/reference/shaders-no-opt/frag/variables.zero-initialize.frag b/reference/shaders-no-opt/frag/variables.zero-initialize.frag
new file mode 100644
index 00000000000..c9027235082
--- /dev/null
+++ b/reference/shaders-no-opt/frag/variables.zero-initialize.frag
@@ -0,0 +1,28 @@
+#version 450
+
+struct Foo
+{
+    int a;
+};
+
+layout(location = 0) in vec4 vColor;
+layout(location = 0) out vec4 FragColor;
+int uninit_int = 0;
+ivec4 uninit_vector = ivec4(0);
+mat4 uninit_matrix = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
+Foo uninit_foo = Foo(0);
+
+void main()
+{
+    int uninit_function_int = 0;
+    if (vColor.x > 10.0)
+    {
+        uninit_function_int = 10;
+    }
+    else
+    {
+        uninit_function_int = 20;
+    }
+    FragColor = vColor;
+}
+
diff --git a/reference/shaders-no-opt/legacy/frag/switch-single-case-multiple-exit-cfg.legacy.asm.frag b/reference/shaders-no-opt/legacy/frag/switch-single-case-multiple-exit-cfg.legacy.asm.frag
new file mode 100644
index 00000000000..f46bc2fd884
--- /dev/null
+++ b/reference/shaders-no-opt/legacy/frag/switch-single-case-multiple-exit-cfg.legacy.asm.frag
@@ -0,0 +1,24 @@
+#version 100
+precision mediump float;
+precision highp int;
+
+vec2 _19;
+
+void main()
+{
+    highp vec2 _30;
+    for (int spvDummy15 = 0; spvDummy15 < 1; spvDummy15++)
+    {
+        if (gl_FragCoord.x != gl_FragCoord.x)
+        {
+            _30 = _19;
+            break;
+        }
+        highp vec2 _29;
+        _29.y = _19.y;
+        _30 = _29;
+        break;
+    }
+    gl_FragData[0] = vec4(_30, 1.0, 1.0);
+}
+
diff --git a/reference/shaders-no-opt/task/task-shader-basic-2.vk.spv14.nocompat.task.vk b/reference/shaders-no-opt/task/task-shader-basic-2.vk.spv14.nocompat.task.vk
new file mode 100644
index 00000000000..98704e22dec
--- /dev/null
+++ b/reference/shaders-no-opt/task/task-shader-basic-2.vk.spv14.nocompat.task.vk
@@ -0,0 +1,42 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in;
+
+struct Payload
+{
+    float v[3];
+};
+
+shared float vs[24];
+taskPayloadSharedEXT Payload p;
+
+void main()
+{
+    vs[gl_LocalInvocationIndex] = 10.0;
+    barrier();
+    if (gl_LocalInvocationIndex < 12u)
+    {
+        vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u];
+    }
+    barrier();
+    if (gl_LocalInvocationIndex < 6u)
+    {
+        vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u];
+    }
+    barrier();
+    if (gl_LocalInvocationIndex < 3u)
+    {
+        vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u];
+    }
+    barrier();
+    p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex];
+    if (vs[5] > 20.0)
+    {
+        EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8])));
+    }
+    else
+    {
+        EmitMeshTasksEXT(uint(int(vs[6])), 10u, 50u);
+    }
+}
+
diff --git a/reference/shaders-no-opt/task/task-shader-basic.vk.spv14.nocompat.task.vk b/reference/shaders-no-opt/task/task-shader-basic.vk.spv14.nocompat.task.vk
new file mode 100644
index 00000000000..1d491e7014b
--- /dev/null
+++ b/reference/shaders-no-opt/task/task-shader-basic.vk.spv14.nocompat.task.vk
@@ -0,0 +1,35 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in;
+
+struct Payload
+{
+    float v[3];
+};
+
+shared float vs[24];
+taskPayloadSharedEXT Payload p;
+
+void main()
+{
+    vs[gl_LocalInvocationIndex] = 10.0;
+    barrier();
+    if (gl_LocalInvocationIndex < 12u)
+    {
+        vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u];
+    }
+    barrier();
+    if (gl_LocalInvocationIndex < 6u)
+    {
+        vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u];
+    }
+    barrier();
+    if (gl_LocalInvocationIndex < 3u)
+    {
+        vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u];
+    }
+    barrier();
+    p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex];
+    EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8])));
+}
+
diff --git a/reference/shaders-no-opt/vert/io-blocks.force-flattened-io.vert b/reference/shaders-no-opt/vert/io-blocks.force-flattened-io.vert
new file mode 100644
index 00000000000..604de8a2cc3
--- /dev/null
+++ b/reference/shaders-no-opt/vert/io-blocks.force-flattened-io.vert
@@ -0,0 +1,25 @@
+#version 450
+
+struct Foo
+{
+    vec4 bar[2];
+    vec4 baz[2];
+};
+
+out vec4 _14_foo_bar[2];
+out vec4 _14_foo_baz[2];
+out vec4 _14_foo2_bar[2];
+out vec4 _14_foo2_baz[2];
+out vec4 foo3_bar[2];
+out vec4 foo3_baz[2];
+
+void main()
+{
+    _14_foo_bar[0] = vec4(1.0);
+    _14_foo_baz[1] = vec4(2.0);
+    _14_foo2_bar[0] = vec4(3.0);
+    _14_foo2_baz[1] = vec4(4.0);
+    foo3_bar[0] = vec4(5.0);
+    foo3_baz[1] = vec4(6.0);
+}
+
diff --git a/reference/shaders-no-opt/vulkan/frag/shading-rate.vk.nocompat.frag.vk b/reference/shaders-no-opt/vulkan/frag/shading-rate.vk.nocompat.frag.vk
new file mode 100644
index 00000000000..e5c67115ca8
--- /dev/null
+++ b/reference/shaders-no-opt/vulkan/frag/shading-rate.vk.nocompat.frag.vk
@@ -0,0 +1,10 @@
+#version 450
+#extension GL_EXT_fragment_shading_rate : require
+
+layout(location = 0) out uint FragColor;
+
+void main()
+{
+    FragColor = uint(gl_ShadingRateEXT);
+}
+
diff --git a/reference/shaders-no-opt/vulkan/frag/ubo-offset-out-of-order.vk.nocompat.frag.vk b/reference/shaders-no-opt/vulkan/frag/ubo-offset-out-of-order.vk.nocompat.frag.vk
new file mode 100644
index 00000000000..380b7465914
--- /dev/null
+++ b/reference/shaders-no-opt/vulkan/frag/ubo-offset-out-of-order.vk.nocompat.frag.vk
@@ -0,0 +1,16 @@
+#version 450
+
+layout(set = 0, binding = 0, std140) uniform UBO
+{
+    layout(offset = 16) mat4 m;
+    layout(offset = 0) vec4 v;
+} _13;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vColor;
+
+void main()
+{
+    FragColor = (_13.m * vColor) + _13.v;
+}
+
diff --git a/reference/shaders-no-opt/vulkan/frag/volatile-helper-invocation.vk.nocompat.spv16.frag.vk b/reference/shaders-no-opt/vulkan/frag/volatile-helper-invocation.vk.nocompat.spv16.frag.vk
new file mode 100644
index 00000000000..73b0f9b097b
--- /dev/null
+++ b/reference/shaders-no-opt/vulkan/frag/volatile-helper-invocation.vk.nocompat.spv16.frag.vk
@@ -0,0 +1,16 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+layout(location = 0) out float FragColor;
+
+void main()
+{
+    bool _12 = gl_HelperInvocation;
+    float _15 = float(_12);
+    FragColor = _15;
+    demote;
+    bool _16 = gl_HelperInvocation;
+    float _17 = float(_16);
+    FragColor = _17;
+}
+
diff --git a/reference/shaders-no-opt/vulkan/vert/primitive-shading-rate.vk.nocompat.vert.vk b/reference/shaders-no-opt/vulkan/vert/primitive-shading-rate.vk.nocompat.vert.vk
new file mode 100644
index 00000000000..4736723322a
--- /dev/null
+++ b/reference/shaders-no-opt/vulkan/vert/primitive-shading-rate.vk.nocompat.vert.vk
@@ -0,0 +1,9 @@
+#version 450
+#extension GL_EXT_fragment_shading_rate : require
+
+void main()
+{
+    gl_PrimitiveShadingRateEXT = 3;
+    gl_Position = vec4(1.0);
+}
+
diff --git a/reference/shaders-reflection/asm/aliased-entry-point-names.asm.multi.json b/reference/shaders-reflection/asm/aliased-entry-point-names.asm.multi.json
index a56a06f35c0..666167af4a7 100644
--- a/reference/shaders-reflection/asm/aliased-entry-point-names.asm.multi.json
+++ b/reference/shaders-reflection/asm/aliased-entry-point-names.asm.multi.json
@@ -34,6 +34,9 @@
                     "type" : "float",
                     "array" : [
                         1
+                    ],
+                    "array_size_is_literal" : [
+                        true
                     ]
                 },
                 {
@@ -41,6 +44,9 @@
                     "type" : "float",
                     "array" : [
                         1
+                    ],
+                    "array_size_is_literal" : [
+                        true
                     ]
                 }
             ]
diff --git a/reference/shaders-reflection/asm/comp/pointer-to-array-of-physical-pointer.asm.comp.json b/reference/shaders-reflection/asm/comp/pointer-to-array-of-physical-pointer.asm.comp.json
new file mode 100644
index 00000000000..b9224eccdbf
--- /dev/null
+++ b/reference/shaders-reflection/asm/comp/pointer-to-array-of-physical-pointer.asm.comp.json
@@ -0,0 +1,71 @@
+{
+    "entryPoints" : [
+        {
+            "name" : "main",
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
+        }
+    ],
+    "types" : {
+        "_3" : {
+            "name" : "Params",
+            "members" : [
+                {
+                    "name" : "x",
+                    "type" : "float",
+                    "offset" : 0
+                },
+                {
+                    "name" : "y",
+                    "type" : "_6",
+                    "offset" : 16,
+                    "physical_pointer" : true
+                }
+            ]
+        },
+        "_4" : {
+            "name" : "IntBuf",
+            "members" : [
+                {
+                    "name" : "v",
+                    "type" : "int",
+                    "offset" : 0
+                }
+            ]
+        },
+        "_11" : {
+            "name" : "IntBuf",
+            "type" : "_4",
+            "physical_pointer" : true
+        },
+        "_6" : {
+            "name" : "IntBuf",
+            "array" : [
+                3
+            ],
+            "array_size_is_literal" : [
+                true
+            ],
+            "type" : "_11",
+            "array_stride" : 16
+        }
+    },
+    "ubos" : [
+        {
+            "type" : "_3",
+            "name" : "Params",
+            "block_size" : 24,
+            "set" : 0,
+            "binding" : 0
+        }
+    ]
+}
\ No newline at end of file
diff --git a/reference/shaders-reflection/asm/op-source-glsl-ssbo-1.asm.comp.json b/reference/shaders-reflection/asm/op-source-glsl-ssbo-1.asm.comp.json
index 6cd7f95d76e..3b0c9868e56 100644
--- a/reference/shaders-reflection/asm/op-source-glsl-ssbo-1.asm.comp.json
+++ b/reference/shaders-reflection/asm/op-source-glsl-ssbo-1.asm.comp.json
@@ -2,7 +2,17 @@
     "entryPoints" : [
         {
             "name" : "main",
-            "mode" : "comp"
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
         }
     ],
     "types" : {
@@ -15,7 +25,11 @@
                     "array" : [
                         0
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 }
             ]
         }
diff --git a/reference/shaders-reflection/asm/op-source-glsl-ssbo-2.asm.comp.json b/reference/shaders-reflection/asm/op-source-glsl-ssbo-2.asm.comp.json
index c2fa56405e3..80cf8622272 100644
--- a/reference/shaders-reflection/asm/op-source-glsl-ssbo-2.asm.comp.json
+++ b/reference/shaders-reflection/asm/op-source-glsl-ssbo-2.asm.comp.json
@@ -2,7 +2,17 @@
     "entryPoints" : [
         {
             "name" : "main",
-            "mode" : "comp"
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
         }
     ],
     "types" : {
@@ -15,7 +25,11 @@
                     "array" : [
                         0
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 }
             ]
         },
@@ -28,7 +42,11 @@
                     "array" : [
                         0
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 }
             ]
         }
diff --git a/reference/shaders-reflection/asm/op-source-hlsl-uav-1.asm.comp.json b/reference/shaders-reflection/asm/op-source-hlsl-uav-1.asm.comp.json
index 12b0677f6dd..b34f85bb5a8 100644
--- a/reference/shaders-reflection/asm/op-source-hlsl-uav-1.asm.comp.json
+++ b/reference/shaders-reflection/asm/op-source-hlsl-uav-1.asm.comp.json
@@ -2,7 +2,17 @@
     "entryPoints" : [
         {
             "name" : "main",
-            "mode" : "comp"
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
         }
     ],
     "types" : {
@@ -10,12 +20,16 @@
             "name" : "UAV0",
             "members" : [
                 {
-                    "name" : "_data",
+                    "name" : "@data",
                     "type" : "vec4",
                     "array" : [
                         0
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 }
             ]
         }
diff --git a/reference/shaders-reflection/asm/op-source-hlsl-uav-2.asm.comp.json b/reference/shaders-reflection/asm/op-source-hlsl-uav-2.asm.comp.json
index 8da2c74eb59..052e3ba814e 100644
--- a/reference/shaders-reflection/asm/op-source-hlsl-uav-2.asm.comp.json
+++ b/reference/shaders-reflection/asm/op-source-hlsl-uav-2.asm.comp.json
@@ -2,7 +2,17 @@
     "entryPoints" : [
         {
             "name" : "main",
-            "mode" : "comp"
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
         }
     ],
     "types" : {
@@ -10,12 +20,16 @@
             "name" : "UAV0",
             "members" : [
                 {
-                    "name" : "_data",
+                    "name" : "@data",
                     "type" : "vec4",
                     "array" : [
                         0
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 }
             ]
         }
diff --git a/reference/shaders-reflection/asm/op-source-none-ssbo-1.asm.comp.json b/reference/shaders-reflection/asm/op-source-none-ssbo-1.asm.comp.json
index 6cd7f95d76e..3b0c9868e56 100644
--- a/reference/shaders-reflection/asm/op-source-none-ssbo-1.asm.comp.json
+++ b/reference/shaders-reflection/asm/op-source-none-ssbo-1.asm.comp.json
@@ -2,7 +2,17 @@
     "entryPoints" : [
         {
             "name" : "main",
-            "mode" : "comp"
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
         }
     ],
     "types" : {
@@ -15,7 +25,11 @@
                     "array" : [
                         0
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 }
             ]
         }
diff --git a/reference/shaders-reflection/asm/op-source-none-ssbo-2.asm.comp.json b/reference/shaders-reflection/asm/op-source-none-ssbo-2.asm.comp.json
index c2fa56405e3..80cf8622272 100644
--- a/reference/shaders-reflection/asm/op-source-none-ssbo-2.asm.comp.json
+++ b/reference/shaders-reflection/asm/op-source-none-ssbo-2.asm.comp.json
@@ -2,7 +2,17 @@
     "entryPoints" : [
         {
             "name" : "main",
-            "mode" : "comp"
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
         }
     ],
     "types" : {
@@ -15,7 +25,11 @@
                     "array" : [
                         0
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 }
             ]
         },
@@ -28,7 +42,11 @@
                     "array" : [
                         0
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 }
             ]
         }
diff --git a/reference/shaders-reflection/asm/op-source-none-uav-1.asm.comp.json b/reference/shaders-reflection/asm/op-source-none-uav-1.asm.comp.json
index 12b0677f6dd..b34f85bb5a8 100644
--- a/reference/shaders-reflection/asm/op-source-none-uav-1.asm.comp.json
+++ b/reference/shaders-reflection/asm/op-source-none-uav-1.asm.comp.json
@@ -2,7 +2,17 @@
     "entryPoints" : [
         {
             "name" : "main",
-            "mode" : "comp"
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
         }
     ],
     "types" : {
@@ -10,12 +20,16 @@
             "name" : "UAV0",
             "members" : [
                 {
-                    "name" : "_data",
+                    "name" : "@data",
                     "type" : "vec4",
                     "array" : [
                         0
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 }
             ]
         }
diff --git a/reference/shaders-reflection/asm/op-source-none-uav-2.asm.comp.json b/reference/shaders-reflection/asm/op-source-none-uav-2.asm.comp.json
index 8da2c74eb59..052e3ba814e 100644
--- a/reference/shaders-reflection/asm/op-source-none-uav-2.asm.comp.json
+++ b/reference/shaders-reflection/asm/op-source-none-uav-2.asm.comp.json
@@ -2,7 +2,17 @@
     "entryPoints" : [
         {
             "name" : "main",
-            "mode" : "comp"
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
         }
     ],
     "types" : {
@@ -10,12 +20,16 @@
             "name" : "UAV0",
             "members" : [
                 {
-                    "name" : "_data",
+                    "name" : "@data",
                     "type" : "vec4",
                     "array" : [
                         0
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 }
             ]
         }
diff --git a/reference/shaders-reflection/comp/array-of-physical-pointer.comp.json b/reference/shaders-reflection/comp/array-of-physical-pointer.comp.json
new file mode 100644
index 00000000000..a5da58c1a7d
--- /dev/null
+++ b/reference/shaders-reflection/comp/array-of-physical-pointer.comp.json
@@ -0,0 +1,66 @@
+{
+    "entryPoints" : [
+        {
+            "name" : "main",
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
+        }
+    ],
+    "types" : {
+        "_11" : {
+            "name" : "Params",
+            "members" : [
+                {
+                    "name" : "x",
+                    "type" : "float",
+                    "offset" : 0
+                },
+                {
+                    "name" : "y",
+                    "type" : "_7",
+                    "array" : [
+                        3
+                    ],
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 16,
+                    "array_stride" : 16
+                }
+            ]
+        },
+        "_13" : {
+            "name" : "IntBuf",
+            "members" : [
+                {
+                    "name" : "v",
+                    "type" : "int",
+                    "offset" : 0
+                }
+            ]
+        },
+        "_7" : {
+            "name" : "IntBuf",
+            "type" : "_13",
+            "physical_pointer" : true
+        }
+    },
+    "ubos" : [
+        {
+            "type" : "_11",
+            "name" : "Params",
+            "block_size" : 64,
+            "set" : 0,
+            "binding" : 0
+        }
+    ]
+}
\ No newline at end of file
diff --git a/reference/shaders-reflection/comp/function-pointer.invalid.asm.comp.json b/reference/shaders-reflection/comp/function-pointer.invalid.asm.comp.json
new file mode 100644
index 00000000000..bed59455f01
--- /dev/null
+++ b/reference/shaders-reflection/comp/function-pointer.invalid.asm.comp.json
@@ -0,0 +1,18 @@
+{
+    "entryPoints" : [
+        {
+            "name" : "main",
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/reference/shaders-reflection/comp/out-of-order-block-offsets.comp.json b/reference/shaders-reflection/comp/out-of-order-block-offsets.comp.json
new file mode 100644
index 00000000000..b697b453b1a
--- /dev/null
+++ b/reference/shaders-reflection/comp/out-of-order-block-offsets.comp.json
@@ -0,0 +1,44 @@
+{
+    "entryPoints" : [
+        {
+            "name" : "main",
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
+        }
+    ],
+    "types" : {
+        "_7" : {
+            "name" : "SSBO",
+            "members" : [
+                {
+                    "name" : "foo",
+                    "type" : "uint",
+                    "offset" : 8
+                },
+                {
+                    "name" : "bar",
+                    "type" : "uint",
+                    "offset" : 4
+                }
+            ]
+        }
+    },
+    "ssbos" : [
+        {
+            "type" : "_7",
+            "name" : "SSBO",
+            "block_size" : 12,
+            "set" : 0,
+            "binding" : 0
+        }
+    ]
+}
\ No newline at end of file
diff --git a/reference/shaders-reflection/comp/physical-pointer.comp.json b/reference/shaders-reflection/comp/physical-pointer.comp.json
new file mode 100644
index 00000000000..a397d0febc2
--- /dev/null
+++ b/reference/shaders-reflection/comp/physical-pointer.comp.json
@@ -0,0 +1,55 @@
+{
+    "entryPoints" : [
+        {
+            "name" : "main",
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
+        }
+    ],
+    "types" : {
+        "_8" : {
+            "name" : "Params",
+            "members" : [
+                {
+                    "name" : "x",
+                    "type" : "float",
+                    "offset" : 0
+                },
+                {
+                    "name" : "y",
+                    "type" : "_10",
+                    "offset" : 8,
+                    "physical_pointer" : true
+                }
+            ]
+        },
+        "_10" : {
+            "name" : "IntBuf",
+            "members" : [
+                {
+                    "name" : "v",
+                    "type" : "int",
+                    "offset" : 0
+                }
+            ]
+        }
+    },
+    "ubos" : [
+        {
+            "type" : "_8",
+            "name" : "Params",
+            "block_size" : 16,
+            "set" : 0,
+            "binding" : 0
+        }
+    ]
+}
\ No newline at end of file
diff --git a/reference/shaders-reflection/comp/struct-layout.comp.json b/reference/shaders-reflection/comp/struct-layout.comp.json
index 3004454b806..e9bf7eea903 100644
--- a/reference/shaders-reflection/comp/struct-layout.comp.json
+++ b/reference/shaders-reflection/comp/struct-layout.comp.json
@@ -2,7 +2,17 @@
     "entryPoints" : [
         {
             "name" : "main",
-            "mode" : "comp"
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
         }
     ],
     "types" : {
@@ -12,7 +22,8 @@
                 {
                     "name" : "m",
                     "type" : "mat4",
-                    "offset" : 0
+                    "offset" : 0,
+                    "matrix_stride" : 16
                 }
             ]
         },
@@ -25,7 +36,11 @@
                     "array" : [
                         0
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 64
                 }
             ]
         },
@@ -38,7 +53,11 @@
                     "array" : [
                         0
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 64
                 }
             ]
         }
diff --git a/reference/shaders-reflection/comp/struct-packing.comp.json b/reference/shaders-reflection/comp/struct-packing.comp.json
index 22a41584d96..12285ae24ef 100644
--- a/reference/shaders-reflection/comp/struct-packing.comp.json
+++ b/reference/shaders-reflection/comp/struct-packing.comp.json
@@ -2,7 +2,17 @@
     "entryPoints" : [
         {
             "name" : "main",
-            "mode" : "comp"
+            "mode" : "comp",
+            "workgroup_size" : [
+                1,
+                1,
+                1
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                false,
+                false,
+                false
+            ]
         }
     ],
     "types" : {
@@ -15,7 +25,11 @@
                     "array" : [
                         1
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 8
                 },
                 {
                     "name" : "b",
@@ -48,7 +62,11 @@
                     "array" : [
                         1
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 },
                 {
                     "name" : "b",
@@ -91,7 +109,11 @@
                     "array" : [
                         1
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 },
                 {
                     "name" : "m1s",
@@ -99,7 +121,11 @@
                     "array" : [
                         1
                     ],
-                    "offset" : 16
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 16,
+                    "array_stride" : 16
                 },
                 {
                     "name" : "m2s",
@@ -107,7 +133,11 @@
                     "array" : [
                         1
                     ],
-                    "offset" : 32
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 32,
+                    "array_stride" : 32
                 },
                 {
                     "name" : "m0",
@@ -140,7 +170,11 @@
                     "array" : [
                         8
                     ],
-                    "offset" : 152
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 152,
+                    "array_stride" : 8
                 }
             ]
         },
@@ -158,7 +192,11 @@
                     "array" : [
                         2
                     ],
-                    "offset" : 224
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 224,
+                    "array_stride" : 224
                 },
                 {
                     "name" : "content2",
@@ -168,12 +206,14 @@
                 {
                     "name" : "m0",
                     "type" : "mat2",
-                    "offset" : 896
+                    "offset" : 896,
+                    "matrix_stride" : 8
                 },
                 {
                     "name" : "m1",
                     "type" : "mat2",
-                    "offset" : 912
+                    "offset" : 912,
+                    "matrix_stride" : 8
                 },
                 {
                     "name" : "m2",
@@ -181,43 +221,62 @@
                     "array" : [
                         4
                     ],
-                    "offset" : 928
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 928,
+                    "array_stride" : 32,
+                    "matrix_stride" : 16
                 },
                 {
                     "name" : "m3",
                     "type" : "mat3x2",
-                    "offset" : 1056
+                    "offset" : 1056,
+                    "matrix_stride" : 8
                 },
                 {
                     "name" : "m4",
                     "type" : "mat2",
-                    "row_major" : true,
-                    "offset" : 1080
+                    "offset" : 1080,
+                    "matrix_stride" : 8,
+                    "row_major" : true
                 },
                 {
                     "name" : "m5",
                     "type" : "mat2",
-                    "row_major" : true,
                     "array" : [
                         9
                     ],
-                    "offset" : 1096
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 1096,
+                    "array_stride" : 16,
+                    "matrix_stride" : 8,
+                    "row_major" : true
                 },
                 {
                     "name" : "m6",
                     "type" : "mat2x3",
-                    "row_major" : true,
                     "array" : [
                         2,
                         4
                     ],
-                    "offset" : 1240
+                    "array_size_is_literal" : [
+                        true,
+                        true
+                    ],
+                    "offset" : 1240,
+                    "array_stride" : 48,
+                    "matrix_stride" : 8,
+                    "row_major" : true
                 },
                 {
                     "name" : "m7",
                     "type" : "mat3x2",
-                    "row_major" : true,
-                    "offset" : 1440
+                    "offset" : 1440,
+                    "matrix_stride" : 16,
+                    "row_major" : true
                 },
                 {
                     "name" : "array",
@@ -225,7 +284,11 @@
                     "array" : [
                         0
                     ],
-                    "offset" : 1472
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 1472,
+                    "array_stride" : 4
                 }
             ]
         },
@@ -238,7 +301,11 @@
                     "array" : [
                         1
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 },
                 {
                     "name" : "b",
@@ -271,7 +338,11 @@
                     "array" : [
                         1
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
                 },
                 {
                     "name" : "b",
@@ -314,7 +385,11 @@
                     "array" : [
                         1
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 32
                 },
                 {
                     "name" : "m1s",
@@ -322,7 +397,11 @@
                     "array" : [
                         1
                     ],
-                    "offset" : 32
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 32,
+                    "array_stride" : 16
                 },
                 {
                     "name" : "m2s",
@@ -330,7 +409,11 @@
                     "array" : [
                         1
                     ],
-                    "offset" : 48
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 48,
+                    "array_stride" : 32
                 },
                 {
                     "name" : "m0",
@@ -363,7 +446,11 @@
                     "array" : [
                         8
                     ],
-                    "offset" : 192
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 192,
+                    "array_stride" : 16
                 }
             ]
         },
@@ -381,7 +468,11 @@
                     "array" : [
                         2
                     ],
-                    "offset" : 320
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 320,
+                    "array_stride" : 320
                 },
                 {
                     "name" : "content2",
@@ -391,12 +482,14 @@
                 {
                     "name" : "m0",
                     "type" : "mat2",
-                    "offset" : 1280
+                    "offset" : 1280,
+                    "matrix_stride" : 16
                 },
                 {
                     "name" : "m1",
                     "type" : "mat2",
-                    "offset" : 1312
+                    "offset" : 1312,
+                    "matrix_stride" : 16
                 },
                 {
                     "name" : "m2",
@@ -404,43 +497,62 @@
                     "array" : [
                         4
                     ],
-                    "offset" : 1344
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 1344,
+                    "array_stride" : 32,
+                    "matrix_stride" : 16
                 },
                 {
                     "name" : "m3",
                     "type" : "mat3x2",
-                    "offset" : 1472
+                    "offset" : 1472,
+                    "matrix_stride" : 16
                 },
                 {
                     "name" : "m4",
                     "type" : "mat2",
-                    "row_major" : true,
-                    "offset" : 1520
+                    "offset" : 1520,
+                    "matrix_stride" : 16,
+                    "row_major" : true
                 },
                 {
                     "name" : "m5",
                     "type" : "mat2",
-                    "row_major" : true,
                     "array" : [
                         9
                     ],
-                    "offset" : 1552
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 1552,
+                    "array_stride" : 32,
+                    "matrix_stride" : 16,
+                    "row_major" : true
                 },
                 {
                     "name" : "m6",
                     "type" : "mat2x3",
-                    "row_major" : true,
                     "array" : [
                         2,
                         4
                     ],
-                    "offset" : 1840
+                    "array_size_is_literal" : [
+                        true,
+                        true
+                    ],
+                    "offset" : 1840,
+                    "array_stride" : 96,
+                    "matrix_stride" : 16,
+                    "row_major" : true
                 },
                 {
                     "name" : "m7",
                     "type" : "mat3x2",
-                    "row_major" : true,
-                    "offset" : 2224
+                    "offset" : 2224,
+                    "matrix_stride" : 16,
+                    "row_major" : true
                 },
                 {
                     "name" : "array",
@@ -448,7 +560,11 @@
                     "array" : [
                         0
                     ],
-                    "offset" : 2256
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 2256,
+                    "array_stride" : 16
                 }
             ]
         }
diff --git a/reference/shaders-reflection/comp/workgroup-size-spec-constant.comp.json b/reference/shaders-reflection/comp/workgroup-size-spec-constant.comp.json
new file mode 100644
index 00000000000..c67d7230034
--- /dev/null
+++ b/reference/shaders-reflection/comp/workgroup-size-spec-constant.comp.json
@@ -0,0 +1,62 @@
+{
+    "entryPoints" : [
+        {
+            "name" : "main",
+            "mode" : "comp",
+            "workgroup_size" : [
+                10,
+                40,
+                60
+            ],
+            "workgroup_size_is_spec_constant_id" : [
+                true,
+                true,
+                true
+            ]
+        }
+    ],
+    "types" : {
+        "_8" : {
+            "name" : "SSBO",
+            "members" : [
+                {
+                    "name" : "v",
+                    "type" : "vec4",
+                    "offset" : 0
+                }
+            ]
+        }
+    },
+    "ssbos" : [
+        {
+            "type" : "_8",
+            "name" : "SSBO",
+            "block_size" : 16,
+            "set" : 0,
+            "binding" : 0
+        }
+    ],
+    "specialization_constants" : [
+        {
+            "name" : "",
+            "id" : 10,
+            "type" : "uint",
+            "variable_id" : 18,
+            "default_value" : 1
+        },
+        {
+            "name" : "",
+            "id" : 40,
+            "type" : "uint",
+            "variable_id" : 19,
+            "default_value" : 1
+        },
+        {
+            "name" : "",
+            "id" : 60,
+            "type" : "uint",
+            "variable_id" : 20,
+            "default_value" : 1
+        }
+    ]
+}
\ No newline at end of file
diff --git a/reference/shaders-reflection/frag/image-load-store-uint-coord.asm.frag.json b/reference/shaders-reflection/frag/image-load-store-uint-coord.asm.frag.json
index 527ea2bfeee..c239527c842 100644
--- a/reference/shaders-reflection/frag/image-load-store-uint-coord.asm.frag.json
+++ b/reference/shaders-reflection/frag/image-load-store-uint-coord.asm.frag.json
@@ -8,7 +8,7 @@
     "outputs" : [
         {
             "type" : "vec4",
-            "name" : "_entryPointOutput",
+            "name" : "@entryPointOutput",
             "location" : 0
         }
     ],
diff --git a/reference/shaders-reflection/frag/separate-sampler-texture-array.vk.frag.json b/reference/shaders-reflection/frag/separate-sampler-texture-array.vk.frag.json
index 9216d93e5d7..e5f2f756249 100644
--- a/reference/shaders-reflection/frag/separate-sampler-texture-array.vk.frag.json
+++ b/reference/shaders-reflection/frag/separate-sampler-texture-array.vk.frag.json
@@ -31,6 +31,9 @@
             "array" : [
                 4
             ],
+            "array_size_is_literal" : [
+                true
+            ],
             "set" : 0,
             "binding" : 1
         },
@@ -40,6 +43,9 @@
             "array" : [
                 4
             ],
+            "array_size_is_literal" : [
+                true
+            ],
             "set" : 0,
             "binding" : 4
         },
@@ -49,6 +55,9 @@
             "array" : [
                 4
             ],
+            "array_size_is_literal" : [
+                true
+            ],
             "set" : 0,
             "binding" : 3
         },
@@ -58,6 +67,9 @@
             "array" : [
                 4
             ],
+            "array_size_is_literal" : [
+                true
+            ],
             "set" : 0,
             "binding" : 2
         }
diff --git a/reference/shaders-reflection/frag/spec-constant.vk.frag.json b/reference/shaders-reflection/frag/spec-constant.vk.frag.json
index 0add2986660..dd876dde96d 100644
--- a/reference/shaders-reflection/frag/spec-constant.vk.frag.json
+++ b/reference/shaders-reflection/frag/spec-constant.vk.frag.json
@@ -14,6 +14,9 @@
                     "type" : "float",
                     "array" : [
                         135
+                    ],
+                    "array_size_is_literal" : [
+                        false
                     ]
                 }
             ]
@@ -28,43 +31,59 @@
     ],
     "specialization_constants" : [
         {
+            "name" : "a",
             "id" : 1,
             "type" : "float",
+            "variable_id" : 9,
             "default_value" : 1.5
         },
         {
+            "name" : "b",
             "id" : 2,
             "type" : "float",
+            "variable_id" : 11,
             "default_value" : 2.5
         },
         {
+            "name" : "c",
             "id" : 3,
             "type" : "int",
+            "variable_id" : 16,
             "default_value" : 3
         },
         {
+            "name" : "d",
             "id" : 4,
             "type" : "int",
+            "variable_id" : 25,
             "default_value" : 4
         },
         {
+            "name" : "e",
             "id" : 5,
             "type" : "uint",
+            "variable_id" : 34,
             "default_value" : 5
         },
         {
+            "name" : "f",
             "id" : 6,
             "type" : "uint",
+            "variable_id" : 35,
             "default_value" : 6
         },
         {
+            "name" : "g",
             "id" : 7,
             "type" : "bool",
+            "variable_id" : 56,
             "default_value" : false
         },
         {
+            "name" : "h",
             "id" : 8,
             "type" : "bool",
+            "variable_id" : 57,
             "default_value" : true
         }
     ]
diff --git a/reference/shaders-reflection/vert/array-size-reflection.vert.json b/reference/shaders-reflection/vert/array-size-reflection.vert.json
new file mode 100644
index 00000000000..87b6cb060f4
--- /dev/null
+++ b/reference/shaders-reflection/vert/array-size-reflection.vert.json
@@ -0,0 +1,78 @@
+{
+    "entryPoints" : [
+        {
+            "name" : "main",
+            "mode" : "vert"
+        }
+    ],
+    "types" : {
+        "_11" : {
+            "name" : "gl_PerVertex",
+            "members" : [
+                {
+                    "name" : "gl_Position",
+                    "type" : "vec4"
+                },
+                {
+                    "name" : "gl_PointSize",
+                    "type" : "float"
+                },
+                {
+                    "name" : "gl_ClipDistance",
+                    "type" : "float",
+                    "array" : [
+                        1
+                    ],
+                    "array_size_is_literal" : [
+                        true
+                    ]
+                },
+                {
+                    "name" : "gl_CullDistance",
+                    "type" : "float",
+                    "array" : [
+                        1
+                    ],
+                    "array_size_is_literal" : [
+                        true
+                    ]
+                }
+            ]
+        },
+        "_18" : {
+            "name" : "u_",
+            "members" : [
+                {
+                    "name" : "u_0",
+                    "type" : "vec4",
+                    "array" : [
+                        16
+                    ],
+                    "array_size_is_literal" : [
+                        false
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
+                }
+            ]
+        }
+    },
+    "ubos" : [
+        {
+            "type" : "_18",
+            "name" : "u_",
+            "block_size" : 16,
+            "set" : 1,
+            "binding" : 0
+        }
+    ],
+    "specialization_constants" : [
+        {
+            "name" : "ARR_SIZE",
+            "id" : 0,
+            "type" : "int",
+            "variable_id" : 16,
+            "default_value" : 1
+        }
+    ]
+}
\ No newline at end of file
diff --git a/reference/shaders-reflection/vert/read-from-row-major-array.vert.json b/reference/shaders-reflection/vert/read-from-row-major-array.vert.json
index d92fb67fb58..cebd66bd9aa 100644
--- a/reference/shaders-reflection/vert/read-from-row-major-array.vert.json
+++ b/reference/shaders-reflection/vert/read-from-row-major-array.vert.json
@@ -25,12 +25,18 @@
                 {
                     "name" : "var",
                     "type" : "mat2x3",
-                    "row_major" : true,
                     "array" : [
                         4,
                         3
                     ],
-                    "offset" : 0
+                    "array_size_is_literal" : [
+                        true,
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 192,
+                    "matrix_stride" : 16,
+                    "row_major" : true
                 }
             ]
         }
diff --git a/reference/shaders-reflection/vert/stride-reflection.vert.json b/reference/shaders-reflection/vert/stride-reflection.vert.json
new file mode 100644
index 00000000000..1dd8f189575
--- /dev/null
+++ b/reference/shaders-reflection/vert/stride-reflection.vert.json
@@ -0,0 +1,96 @@
+{
+    "entryPoints" : [
+        {
+            "name" : "main",
+            "mode" : "vert"
+        }
+    ],
+    "types" : {
+        "_11" : {
+            "name" : "gl_PerVertex",
+            "members" : [
+                {
+                    "name" : "gl_Position",
+                    "type" : "vec4"
+                },
+                {
+                    "name" : "gl_PointSize",
+                    "type" : "float"
+                },
+                {
+                    "name" : "gl_ClipDistance",
+                    "type" : "float",
+                    "array" : [
+                        1
+                    ],
+                    "array_size_is_literal" : [
+                        true
+                    ]
+                },
+                {
+                    "name" : "gl_CullDistance",
+                    "type" : "float",
+                    "array" : [
+                        1
+                    ],
+                    "array_size_is_literal" : [
+                        true
+                    ]
+                }
+            ]
+        },
+        "_21" : {
+            "name" : "U",
+            "members" : [
+                {
+                    "name" : "v",
+                    "type" : "vec4",
+                    "array" : [
+                        4
+                    ],
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 0,
+                    "array_stride" : 16
+                },
+                {
+                    "name" : "c",
+                    "type" : "mat4",
+                    "array" : [
+                        4
+                    ],
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 64,
+                    "array_stride" : 64,
+                    "matrix_stride" : 16
+                },
+                {
+                    "name" : "r",
+                    "type" : "mat4",
+                    "array" : [
+                        4
+                    ],
+                    "array_size_is_literal" : [
+                        true
+                    ],
+                    "offset" : 320,
+                    "array_stride" : 64,
+                    "matrix_stride" : 16,
+                    "row_major" : true
+                }
+            ]
+        }
+    },
+    "ubos" : [
+        {
+            "type" : "_21",
+            "name" : "U",
+            "block_size" : 576,
+            "set" : 0,
+            "binding" : 0
+        }
+    ]
+}
\ No newline at end of file
diff --git a/reference/shaders-reflection/vert/texture_buffer.vert.json b/reference/shaders-reflection/vert/texture_buffer.vert.json
index 3c69e24cbc0..a9368639488 100644
--- a/reference/shaders-reflection/vert/texture_buffer.vert.json
+++ b/reference/shaders-reflection/vert/texture_buffer.vert.json
@@ -32,6 +32,7 @@
         {
             "type" : "imageBuffer",
             "name" : "uSampo",
+            "readonly" : true,
             "set" : 0,
             "binding" : 5,
             "format" : "rgba32f"
diff --git a/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag
new file mode 100644
index 00000000000..429bbf738ea
--- /dev/null
+++ b/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag
@@ -0,0 +1,361 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_MobileDirectionalLight
+{
+    float4 MobileDirectionalLight_DirectionalLightColor;
+    float4 MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition;
+    float4 MobileDirectionalLight_DirectionalLightShadowSize;
+    float4 MobileDirectionalLight_DirectionalLightDistanceFadeMAD;
+    float4 MobileDirectionalLight_DirectionalLightShadowDistances;
+    float4x4 MobileDirectionalLight_DirectionalLightScreenToShadow[4];
+};
+
+struct type_Globals
+{
+    int NumDynamicPointLights;
+    float4 LightPositionAndInvRadius[4];
+    float4 LightColorAndFalloffExponent[4];
+    float4 MobileReflectionParams;
+};
+
+constant float3 _136 = {};
+constant float4 _137 = {};
+constant float _138 = {};
+constant float3 _139 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 in_var_TEXCOORD0 [[user(locn0)]];
+    float4 in_var_TEXCOORD7 [[user(locn1)]];
+    float4 in_var_TEXCOORD8 [[user(locn2)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_MobileDirectionalLight& MobileDirectionalLight [[buffer(1)]], constant type_Globals& _Globals [[buffer(2)]], texture2d<float> MobileDirectionalLight_DirectionalLightShadowTexture [[texture(0)]], texture2d<float> Material_Texture2D_0 [[texture(1)]], texture2d<float> Material_Texture2D_1 [[texture(2)]], texturecube<float> ReflectionCubemap [[texture(3)]], sampler MobileDirectionalLight_DirectionalLightShadowSampler [[sampler(0)]], sampler Material_Texture2D_0Sampler [[sampler(1)]], sampler Material_Texture2D_1Sampler [[sampler(2)]], sampler ReflectionCubemapSampler [[sampler(3)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    float4 _177 = float4((((gl_FragCoord.xy - View.View_ViewRectMin.xy) * View.View_ViewSizeAndInvSize.zw) - float2(0.5)) * float2(2.0, -2.0), _138, 1.0) * float4(gl_FragCoord.w);
+    float3 _179 = in.in_var_TEXCOORD8.xyz - float3(View.View_PreViewTranslation);
+    float3 _181 = fast::normalize(-in.in_var_TEXCOORD8.xyz);
+    float4 _187 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (in.in_var_TEXCOORD0 * float2(10.0)));
+    float2 _190 = (_187.xy * float2(2.0)) - float2(1.0);
+    float3 _206 = fast::normalize(float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0)) * (((float4(_190, sqrt(fast::clamp(1.0 - dot(_190, _190), 0.0, 1.0)), 1.0).xyz * float3(0.300000011920928955078125, 0.300000011920928955078125, 1.0)) * float3(View.View_NormalOverrideParameter.w)) + View.View_NormalOverrideParameter.xyz));
+    float _208 = dot(_206, _181);
+    float4 _217 = Material_Texture2D_1.sample(Material_Texture2D_1Sampler, (in.in_var_TEXCOORD0 * float2(20.0)));
+    float _219 = mix(0.4000000059604644775390625, 1.0, _217.x);
+    float4 _223 = Material_Texture2D_1.sample(Material_Texture2D_1Sampler, (in.in_var_TEXCOORD0 * float2(5.0)));
+    float _224 = _177.w;
+    float _228 = fast::min(fast::max((_224 - 24.0) * 0.000666666659526526927947998046875, 0.0), 1.0);
+    float _229 = _223.y;
+    float4 _233 = Material_Texture2D_1.sample(Material_Texture2D_1Sampler, (in.in_var_TEXCOORD0 * float2(0.5)));
+    float _235 = _233.y;
+    float _253 = fast::clamp((fast::min(fast::max(mix(0.0, 0.5, _235) + mix(mix(0.699999988079071044921875, 1.0, _229), 1.0, _228), 0.0), 1.0) * View.View_RoughnessOverrideParameter.y) + View.View_RoughnessOverrideParameter.x, 0.119999997317790985107421875, 1.0);
+    float2 _257 = (float2(_253) * float2(-1.0, -0.0274999998509883880615234375)) + float2(1.0, 0.0425000004470348358154296875);
+    float _258 = _257.x;
+    float3 _270 = (fast::clamp(float3(mix(_219, 1.0 - _219, mix(_229, 1.0, _228)) * (mix(0.2949999868869781494140625, 0.660000026226043701171875, mix(_235 + mix(_229, 0.0, _228), 0.5, 0.5)) * 0.5)), float3(0.0), float3(1.0)) * float3(View.View_DiffuseOverrideParameter.w)) + View.View_DiffuseOverrideParameter.xyz;
+    float3 _275 = float3(((fast::min(_258 * _258, exp2((-9.27999973297119140625) * fast::max(_208, 0.0))) * _258) + _257.y) * View.View_SpecularOverrideParameter.w) + View.View_SpecularOverrideParameter.xyz;
+    float _276 = _275.x;
+    float4 _303;
+    int _286 = 0;
+    for (;;)
+    {
+        if (_286 < 2)
+        {
+            if (_224 < MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowDistances[uint(_286)])
+            {
+                _303 = MobileDirectionalLight.MobileDirectionalLight_DirectionalLightScreenToShadow[_286] * float4(_177.xy, _224, 1.0);
+                break;
+            }
+            _286++;
+            continue;
+        }
+        else
+        {
+            _303 = float4(0.0);
+            break;
+        }
+    }
+    float _423;
+    if (_303.z > 0.0)
+    {
+        float2 _311 = _303.xy * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.xy;
+        float2 _312 = fract(_311);
+        float2 _313 = floor(_311);
+        float3 _320;
+        _320.x = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(-0.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x;
+        _320.y = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(0.5, -0.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x;
+        _320.z = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(1.5, -0.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x;
+        float3 _335 = float3(MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition.w);
+        float3 _337 = float3((fast::min(_303.z, 0.999989986419677734375) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition.w) - 1.0);
+        float3 _339 = fast::clamp((_320 * _335) - _337, float3(0.0), float3(1.0));
+        float3 _345;
+        _345.x = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(-0.5, 0.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x;
+        _345.y = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(0.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x;
+        _345.z = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(1.5, 0.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x;
+        float3 _360 = fast::clamp((_345 * _335) - _337, float3(0.0), float3(1.0));
+        float3 _366;
+        _366.x = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(-0.5, 1.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x;
+        _366.y = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(0.5, 1.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x;
+        _366.z = MobileDirectionalLight_DirectionalLightShadowTexture.sample(MobileDirectionalLight_DirectionalLightShadowSampler, ((_313 + float2(1.5)) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightShadowSize.zw), level(0.0)).x;
+        float3 _381 = fast::clamp((_366 * _335) - _337, float3(0.0), float3(1.0));
+        float _383 = _312.x;
+        float _384 = 1.0 - _383;
+        float3 _399;
+        _399.x = ((_339.x * _384) + _339.y) + (_339.z * _383);
+        _399.y = ((_360.x * _384) + _360.y) + (_360.z * _383);
+        _399.z = ((_381.x * _384) + _381.y) + (_381.z * _383);
+        float _408 = _312.y;
+        float _420 = fast::clamp((_224 * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDistanceFadeMAD.x) + MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDistanceFadeMAD.y, 0.0, 1.0);
+        _423 = mix(fast::clamp(0.25 * dot(_399, float3(1.0 - _408, 1.0, _408)), 0.0, 1.0), 1.0, _420 * _420);
+    }
+    else
+    {
+        _423 = 1.0;
+    }
+    float3 _429 = fast::normalize(_181 + MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition.xyz);
+    float _439 = (_253 * 0.25) + 0.25;
+    float3 _440 = cross(_206, _429);
+    float _442 = _253 * _253;
+    float _443 = fast::max(0.0, dot(_206, _429)) * _442;
+    float _446 = _442 / (dot(_440, _440) + (_443 * _443));
+    bool _458 = float(_Globals.MobileReflectionParams.w > 0.0) != 0.0;
+    float4 _468 = ReflectionCubemap.sample(ReflectionCubemapSampler, ((-_181) + ((_206 * float3(_208)) * float3(2.0))), level(((_458 ? _Globals.MobileReflectionParams.w : View.View_ReflectionCubemapMaxMip) - 1.0) - (1.0 - (1.2000000476837158203125 * log2(_253)))));
+    float3 _481;
+    if (_458)
+    {
+        _481 = _468.xyz * View.View_SkyLightColor.xyz;
+    }
+    else
+    {
+        float3 _476 = _468.xyz * float3(_468.w * 16.0);
+        _481 = _476 * _476;
+    }
+    float3 _484 = float3(_276);
+    float3 _488;
+    _488 = ((float3(_423 * fast::max(0.0, dot(_206, MobileDirectionalLight.MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition.xyz))) * MobileDirectionalLight.MobileDirectionalLight_DirectionalLightColor.xyz) * (_270 + float3(_276 * (_439 * fast::min(_446 * _446, 65504.0))))) + ((_481 * float3(fast::clamp(1.0, 0.0, 1.0))) * _484);
+    float3 _507;
+    float _509;
+    float _511;
+    float _537;
+    int _491 = 0;
+    for (;;)
+    {
+        if (_491 < _Globals.NumDynamicPointLights)
+        {
+            float3 _501 = _Globals.LightPositionAndInvRadius[_491].xyz - _179;
+            float _502 = dot(_501, _501);
+            float3 _505 = _501 * float3(rsqrt(_502));
+            _507 = fast::normalize(_181 + _505);
+            _509 = fast::max(0.0, dot(_206, _505));
+            _511 = fast::max(0.0, dot(_206, _507));
+            if (_Globals.LightColorAndFalloffExponent[_491].w == 0.0)
+            {
+                float _531 = _502 * (_Globals.LightPositionAndInvRadius[_491].w * _Globals.LightPositionAndInvRadius[_491].w);
+                float _534 = fast::clamp(1.0 - (_531 * _531), 0.0, 1.0);
+                _537 = (1.0 / (_502 + 1.0)) * (_534 * _534);
+            }
+            else
+            {
+                float3 _521 = _501 * float3(_Globals.LightPositionAndInvRadius[_491].w);
+                _537 = pow(1.0 - fast::clamp(dot(_521, _521), 0.0, 1.0), _Globals.LightColorAndFalloffExponent[_491].w);
+            }
+            float3 _544 = cross(_206, _507);
+            float _546 = _511 * _442;
+            float _549 = _442 / (dot(_544, _544) + (_546 * _546));
+            _488 += fast::min(float3(65000.0), ((float3(_537 * _509) * _Globals.LightColorAndFalloffExponent[_491].xyz) * float3(0.3183098733425140380859375)) * (_270 + float3(_276 * (_439 * fast::min(_549 * _549, 65504.0)))));
+            _491++;
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+    float3 _567 = (mix(_488 + fast::max(float3(0.0), float3(0.0)), _270 + _484, float3(View.View_UnlitViewmodeMask)) * float3(in.in_var_TEXCOORD7.w)) + in.in_var_TEXCOORD7.xyz;
+    float4 _568 = float4(_567.x, _567.y, _567.z, _137.w);
+    _568.w = fast::min(in.in_var_TEXCOORD8.w, 65500.0);
+    out.out_var_SV_Target0 = _568;
+    return out;
+}
+
diff --git a/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag
new file mode 100644
index 00000000000..bb6058c387e
--- /dev/null
+++ b/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag
@@ -0,0 +1,353 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_ClipToWorld;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_908;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_924;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_940;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_956;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_972;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_1020;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_1036;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_1052;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1068;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1724;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1740;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1756;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2076;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2148;
+    float PrePadding_View_2152;
+    float PrePadding_View_2156;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2228;
+    float PrePadding_View_2232;
+    float PrePadding_View_2236;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2268;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2412;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    float View_AtmosphericFogSunDiscHalfApexAngleRadian;
+    float PrePadding_View_2492;
+    float4 View_AtmosphericFogSunDiscLuminance;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    uint PrePadding_View_2520;
+    uint PrePadding_View_2524;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2584;
+    float PrePadding_View_2588;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2744;
+    float PrePadding_View_2748;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float PrePadding_View_2908;
+    int2 View_CursorPosition;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    float PrePadding_View_2924;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2940;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2956;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2972;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2988;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_3004;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_PrimitiveDither
+{
+    float PrimitiveDither_LODFactor;
+};
+
+struct type_PrimitiveFade
+{
+    float2 PrimitiveFade_FadeTimeScaleBias;
+};
+
+struct type_Material
+{
+    float4 Material_VectorExpressions[9];
+    float4 Material_ScalarExpressions[3];
+};
+
+constant float _98 = {};
+constant float _103 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+    float gl_FragDepth [[depth(less)]];
+};
+
+struct main0_in
+{
+    float4 in_var_TEXCOORD6 [[user(locn0)]];
+    float4 in_var_TEXCOORD7 [[user(locn1)]];
+    float4 in_var_TEXCOORD10_centroid [[user(locn2)]];
+    float4 in_var_TEXCOORD11_centroid [[user(locn3)]];
+    float4 in_var_TEXCOORD0_0 [[user(locn4)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_PrimitiveDither& PrimitiveDither [[buffer(1)]], constant type_PrimitiveFade& PrimitiveFade [[buffer(2)]], constant type_Material& Material [[buffer(3)]], texture2d<float> Material_Texture2D_0 [[texture(0)]], texture2d<float> Material_Texture2D_3 [[texture(1)]], sampler Material_Texture2D_0Sampler [[sampler(0)]], sampler Material_Texture2D_3Sampler [[sampler(1)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 1> in_var_TEXCOORD0 = {};
+    in_var_TEXCOORD0[0] = in.in_var_TEXCOORD0_0;
+    float2 _135 = gl_FragCoord.xy - View.View_ViewRectMin.xy;
+    float4 _140 = float4(_103, _103, gl_FragCoord.z, 1.0) * float4(gl_FragCoord.w);
+    float4 _144 = View.View_SVPositionToTranslatedWorld * float4(gl_FragCoord.xyz, 1.0);
+    float3 _148 = _144.xyz / float3(_144.w);
+    float3 _149 = _148 - float3(View.View_PreViewTranslation);
+    float3 _151 = fast::normalize(-_148);
+    float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz);
+    float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0));
+    float _171 = floor(_170);
+    float _172 = 1.0 / _170;
+    float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172);
+    float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
+    float2 _176 = dfdy(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
+    float _180_copy;
+    float2 _183;
+    _183 = float2(0.0);
+    float _188;
+    float _211;
+    float2 _212;
+    float _180 = 1.0;
+    int _185 = 0;
+    float _187 = 1.0;
+    float _189 = 1.0;
+    for (;;)
+    {
+        if (float(_185) < (_171 + 2.0))
+        {
+            _188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y;
+            if (_180 < _188)
+            {
+                float _201 = _188 - _180;
+                float _203 = _201 / ((_189 - _187) + _201);
+                _211 = (_189 * _203) + (_180 * (1.0 - _203));
+                _212 = _183 - (float2(_203) * _174);
+                break;
+            }
+            _180_copy = _180;
+            _180 -= _172;
+            _183 += _174;
+            _185++;
+            _187 = _188;
+            _189 = _180_copy;
+            continue;
+        }
+        else
+        {
+            _211 = _98;
+            _212 = _183;
+            break;
+        }
+    }
+    float4 _218 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _212.xy), bias(View.View_MaterialTextureMipBias));
+    float2 _229 = _135 + float2(View.View_TemporalAAParams.x);
+    float _237 = float((uint(_229.x) + (2u * uint(_229.y))) % 5u);
+    float2 _238 = _135 * float2(0.015625);
+    float4 _242 = Material_Texture2D_3.sample(Material_Texture2D_3Sampler, _238, bias(View.View_MaterialTextureMipBias));
+    float4 _254 = Material_Texture2D_3.sample(Material_Texture2D_3Sampler, _238, bias(View.View_MaterialTextureMipBias));
+    float3 _272 = float3(_212, (1.0 - _211) * Material.Material_ScalarExpressions[0].x);
+    float2 _275 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
+    float2 _276 = abs(_275);
+    float3 _279 = dfdx(_149);
+    float2 _283 = dfdy(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
+    float2 _284 = abs(_283);
+    float3 _287 = dfdy(_149);
+    if (PrimitiveDither.PrimitiveDither_LODFactor != 0.0)
+    {
+        if (abs(PrimitiveDither.PrimitiveDither_LODFactor) > 0.001000000047497451305389404296875)
+        {
+            float _317 = fract(cos(dot(floor(gl_FragCoord.xy), float2(347.834503173828125, 3343.28369140625))) * 1000.0);
+            if ((float((PrimitiveDither.PrimitiveDither_LODFactor < 0.0) ? ((PrimitiveDither.PrimitiveDither_LODFactor + 1.0) > _317) : (PrimitiveDither.PrimitiveDither_LODFactor < _317)) - 0.001000000047497451305389404296875) < 0.0)
+            {
+                discard_fragment();
+            }
+        }
+    }
+    if ((((_218.z + ((fast::min(fast::max(1.0 - (_218.x * Material.Material_ScalarExpressions[2].y), 0.0), 1.0) + ((_237 + (_242.x * Material.Material_ScalarExpressions[2].z)) * 0.16666667163372039794921875)) + (-0.5))) * ((fast::clamp((View.View_RealTime * PrimitiveFade.PrimitiveFade_FadeTimeScaleBias.x) + PrimitiveFade.PrimitiveFade_FadeTimeScaleBias.y, 0.0, 1.0) + ((_237 + _254.x) * 0.16666667163372039794921875)) + (-0.5))) - 0.33329999446868896484375) < 0.0)
+    {
+        discard_fragment();
+    }
+    float2 _351 = ((((in.in_var_TEXCOORD6.xy / float2(in.in_var_TEXCOORD6.w)) - View.View_TemporalAAJitter.xy) - ((in.in_var_TEXCOORD7.xy / float2(in.in_var_TEXCOORD7.w)) - View.View_TemporalAAJitter.zw)) * float2(0.2495000064373016357421875)) + float2(0.49999237060546875);
+    out.gl_FragDepth = fast::min(_140.z / (_140.w + (sqrt(dot(_272, _272)) / (fast::max(sqrt(dot(_276, _276)) / sqrt(dot(_279, _279)), sqrt(dot(_284, _284)) / sqrt(dot(_287, _287))) / abs(dot(float3x3(View.View_ViewToTranslatedWorld[0].xyz, View.View_ViewToTranslatedWorld[1].xyz, View.View_ViewToTranslatedWorld[2].xyz) * float3(0.0, 0.0, 1.0), _151))))), gl_FragCoord.z);
+    out.out_var_SV_Target0 = float4(_351.x, _351.y, float2(0.0).x, float2(0.0).y);
+    return out;
+}
+
diff --git a/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag
new file mode 100644
index 00000000000..bb6058c387e
--- /dev/null
+++ b/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag
@@ -0,0 +1,353 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_ClipToWorld;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_908;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_924;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_940;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_956;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_972;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_1020;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_1036;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_1052;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1068;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1724;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1740;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1756;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2076;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2148;
+    float PrePadding_View_2152;
+    float PrePadding_View_2156;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2228;
+    float PrePadding_View_2232;
+    float PrePadding_View_2236;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2268;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2412;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    float View_AtmosphericFogSunDiscHalfApexAngleRadian;
+    float PrePadding_View_2492;
+    float4 View_AtmosphericFogSunDiscLuminance;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    uint PrePadding_View_2520;
+    uint PrePadding_View_2524;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2584;
+    float PrePadding_View_2588;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2744;
+    float PrePadding_View_2748;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float PrePadding_View_2908;
+    int2 View_CursorPosition;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    float PrePadding_View_2924;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2940;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2956;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2972;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2988;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_3004;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_PrimitiveDither
+{
+    float PrimitiveDither_LODFactor;
+};
+
+struct type_PrimitiveFade
+{
+    float2 PrimitiveFade_FadeTimeScaleBias;
+};
+
+struct type_Material
+{
+    float4 Material_VectorExpressions[9];
+    float4 Material_ScalarExpressions[3];
+};
+
+constant float _98 = {};
+constant float _103 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+    float gl_FragDepth [[depth(less)]];
+};
+
+struct main0_in
+{
+    float4 in_var_TEXCOORD6 [[user(locn0)]];
+    float4 in_var_TEXCOORD7 [[user(locn1)]];
+    float4 in_var_TEXCOORD10_centroid [[user(locn2)]];
+    float4 in_var_TEXCOORD11_centroid [[user(locn3)]];
+    float4 in_var_TEXCOORD0_0 [[user(locn4)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_PrimitiveDither& PrimitiveDither [[buffer(1)]], constant type_PrimitiveFade& PrimitiveFade [[buffer(2)]], constant type_Material& Material [[buffer(3)]], texture2d<float> Material_Texture2D_0 [[texture(0)]], texture2d<float> Material_Texture2D_3 [[texture(1)]], sampler Material_Texture2D_0Sampler [[sampler(0)]], sampler Material_Texture2D_3Sampler [[sampler(1)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 1> in_var_TEXCOORD0 = {};
+    in_var_TEXCOORD0[0] = in.in_var_TEXCOORD0_0;
+    float2 _135 = gl_FragCoord.xy - View.View_ViewRectMin.xy;
+    float4 _140 = float4(_103, _103, gl_FragCoord.z, 1.0) * float4(gl_FragCoord.w);
+    float4 _144 = View.View_SVPositionToTranslatedWorld * float4(gl_FragCoord.xyz, 1.0);
+    float3 _148 = _144.xyz / float3(_144.w);
+    float3 _149 = _148 - float3(View.View_PreViewTranslation);
+    float3 _151 = fast::normalize(-_148);
+    float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz);
+    float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0));
+    float _171 = floor(_170);
+    float _172 = 1.0 / _170;
+    float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172);
+    float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
+    float2 _176 = dfdy(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
+    float _180_copy;
+    float2 _183;
+    _183 = float2(0.0);
+    float _188;
+    float _211;
+    float2 _212;
+    float _180 = 1.0;
+    int _185 = 0;
+    float _187 = 1.0;
+    float _189 = 1.0;
+    for (;;)
+    {
+        if (float(_185) < (_171 + 2.0))
+        {
+            _188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y;
+            if (_180 < _188)
+            {
+                float _201 = _188 - _180;
+                float _203 = _201 / ((_189 - _187) + _201);
+                _211 = (_189 * _203) + (_180 * (1.0 - _203));
+                _212 = _183 - (float2(_203) * _174);
+                break;
+            }
+            _180_copy = _180;
+            _180 -= _172;
+            _183 += _174;
+            _185++;
+            _187 = _188;
+            _189 = _180_copy;
+            continue;
+        }
+        else
+        {
+            _211 = _98;
+            _212 = _183;
+            break;
+        }
+    }
+    float4 _218 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _212.xy), bias(View.View_MaterialTextureMipBias));
+    float2 _229 = _135 + float2(View.View_TemporalAAParams.x);
+    float _237 = float((uint(_229.x) + (2u * uint(_229.y))) % 5u);
+    float2 _238 = _135 * float2(0.015625);
+    float4 _242 = Material_Texture2D_3.sample(Material_Texture2D_3Sampler, _238, bias(View.View_MaterialTextureMipBias));
+    float4 _254 = Material_Texture2D_3.sample(Material_Texture2D_3Sampler, _238, bias(View.View_MaterialTextureMipBias));
+    float3 _272 = float3(_212, (1.0 - _211) * Material.Material_ScalarExpressions[0].x);
+    float2 _275 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
+    float2 _276 = abs(_275);
+    float3 _279 = dfdx(_149);
+    float2 _283 = dfdy(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y));
+    float2 _284 = abs(_283);
+    float3 _287 = dfdy(_149);
+    if (PrimitiveDither.PrimitiveDither_LODFactor != 0.0)
+    {
+        if (abs(PrimitiveDither.PrimitiveDither_LODFactor) > 0.001000000047497451305389404296875)
+        {
+            float _317 = fract(cos(dot(floor(gl_FragCoord.xy), float2(347.834503173828125, 3343.28369140625))) * 1000.0);
+            if ((float((PrimitiveDither.PrimitiveDither_LODFactor < 0.0) ? ((PrimitiveDither.PrimitiveDither_LODFactor + 1.0) > _317) : (PrimitiveDither.PrimitiveDither_LODFactor < _317)) - 0.001000000047497451305389404296875) < 0.0)
+            {
+                discard_fragment();
+            }
+        }
+    }
+    if ((((_218.z + ((fast::min(fast::max(1.0 - (_218.x * Material.Material_ScalarExpressions[2].y), 0.0), 1.0) + ((_237 + (_242.x * Material.Material_ScalarExpressions[2].z)) * 0.16666667163372039794921875)) + (-0.5))) * ((fast::clamp((View.View_RealTime * PrimitiveFade.PrimitiveFade_FadeTimeScaleBias.x) + PrimitiveFade.PrimitiveFade_FadeTimeScaleBias.y, 0.0, 1.0) + ((_237 + _254.x) * 0.16666667163372039794921875)) + (-0.5))) - 0.33329999446868896484375) < 0.0)
+    {
+        discard_fragment();
+    }
+    float2 _351 = ((((in.in_var_TEXCOORD6.xy / float2(in.in_var_TEXCOORD6.w)) - View.View_TemporalAAJitter.xy) - ((in.in_var_TEXCOORD7.xy / float2(in.in_var_TEXCOORD7.w)) - View.View_TemporalAAJitter.zw)) * float2(0.2495000064373016357421875)) + float2(0.49999237060546875);
+    out.gl_FragDepth = fast::min(_140.z / (_140.w + (sqrt(dot(_272, _272)) / (fast::max(sqrt(dot(_276, _276)) / sqrt(dot(_279, _279)), sqrt(dot(_284, _284)) / sqrt(dot(_287, _287))) / abs(dot(float3x3(View.View_ViewToTranslatedWorld[0].xyz, View.View_ViewToTranslatedWorld[1].xyz, View.View_ViewToTranslatedWorld[2].xyz) * float3(0.0, 0.0, 1.0), _151))))), gl_FragCoord.z);
+    out.out_var_SV_Target0 = float4(_351.x, _351.y, float2(0.0).x, float2(0.0).y);
+    return out;
+}
+
diff --git a/reference/shaders-ue4-no-opt/asm/tese/ds-texcoord-array.asm.tese b/reference/shaders-ue4-no-opt/asm/tese/ds-texcoord-array.asm.tese
new file mode 100644
index 00000000000..346d7e3fc95
--- /dev/null
+++ b/reference/shaders-ue4-no-opt/asm/tese/ds-texcoord-array.asm.tese
@@ -0,0 +1,318 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_ClipToWorld;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_908;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_924;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_940;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_956;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_972;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_1020;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_1036;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_1052;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1068;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1724;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1740;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1756;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2076;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2148;
+    float PrePadding_View_2152;
+    float PrePadding_View_2156;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2228;
+    float PrePadding_View_2232;
+    float PrePadding_View_2236;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2268;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2412;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    float View_AtmosphericFogSunDiscHalfApexAngleRadian;
+    float PrePadding_View_2492;
+    float4 View_AtmosphericFogSunDiscLuminance;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    uint PrePadding_View_2520;
+    uint PrePadding_View_2524;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2584;
+    float PrePadding_View_2588;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2744;
+    float PrePadding_View_2748;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float PrePadding_View_2908;
+    int2 View_CursorPosition;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    float PrePadding_View_2924;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2940;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2956;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2972;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2988;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_3004;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+    float PrePadding_View_3048;
+    float PrePadding_View_3052;
+    float4x4 View_WorldToVirtualTexture;
+    float4 View_VirtualTextureParams;
+    float4 View_XRPassthroughCameraUVs[2];
+};
+
+constant float4 _68 = {};
+
+struct main0_out
+{
+    float4 out_var_TEXCOORD10_centroid [[user(locn0)]];
+    float4 out_var_TEXCOORD11_centroid [[user(locn1)]];
+    float4 out_var_TEXCOORD0_0 [[user(locn2)]];
+    float4 out_var_COLOR1 [[user(locn3)]];
+    float4 out_var_COLOR2 [[user(locn4)]];
+    float4 out_var_TEXCOORD6 [[user(locn5)]];
+    float3 out_var_TEXCOORD7 [[user(locn6)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 in_var_COLOR1 [[attribute(0)]];
+    float4 in_var_COLOR2 [[attribute(1)]];
+    float4 in_var_TEXCOORD0_0 [[attribute(5)]];
+    float4 in_var_TEXCOORD10_centroid [[attribute(6)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(7)]];
+    float3 in_var_TEXCOORD7 [[attribute(8)]];
+    float4 in_var_VS_To_DS_Position [[attribute(9)]];
+};
+
+struct main0_patchIn
+{
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_View& View [[buffer(0)]], float3 gl_TessCoord [[position_in_patch]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 1> out_var_TEXCOORD0 = {};
+    spvUnsafeArray<float4, 3> _77 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 3> _78 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<spvUnsafeArray<float4, 1>, 3> _79 = spvUnsafeArray<spvUnsafeArray<float4, 1>, 3>({ spvUnsafeArray<float4, 1>({ patchIn.gl_in[0].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ patchIn.gl_in[1].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ patchIn.gl_in[2].in_var_TEXCOORD0_0 }) });
+    spvUnsafeArray<float4, 3> _80 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_COLOR1, patchIn.gl_in[1].in_var_COLOR1, patchIn.gl_in[2].in_var_COLOR1 });
+    spvUnsafeArray<float4, 3> _81 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_COLOR2, patchIn.gl_in[1].in_var_COLOR2, patchIn.gl_in[2].in_var_COLOR2 });
+    spvUnsafeArray<float4, 3> _97 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_VS_To_DS_Position, patchIn.gl_in[1].in_var_VS_To_DS_Position, patchIn.gl_in[2].in_var_VS_To_DS_Position });
+    spvUnsafeArray<float3, 3> _98 = spvUnsafeArray<float3, 3>({ patchIn.gl_in[0].in_var_TEXCOORD7, patchIn.gl_in[1].in_var_TEXCOORD7, patchIn.gl_in[2].in_var_TEXCOORD7 });
+    float4 _111 = float4(gl_TessCoord.x);
+    float4 _113 = float4(gl_TessCoord.y);
+    float4 _116 = float4(gl_TessCoord.z);
+    float4 _118 = ((_97[0] * _111) + (_97[1] * _113)) + (_97[2] * _116);
+    spvUnsafeArray<float4, 1> _72;
+    _72 = _79[0];
+    spvUnsafeArray<float4, 1> _71;
+    _71 = _79[1];
+    float3 _120 = float3(gl_TessCoord.x);
+    float3 _123 = float3(gl_TessCoord.y);
+    spvUnsafeArray<float4, 1> _73;
+    for (int _133 = 0; _133 < 1; )
+    {
+        _73[_133] = (_72[_133] * _111) + (_71[_133] * _113);
+        _133++;
+        continue;
+    }
+    spvUnsafeArray<float4, 1> _75;
+    _75 = _73;
+    spvUnsafeArray<float4, 1> _74;
+    _74 = _79[2];
+    float3 _155 = float3(gl_TessCoord.z);
+    float3 _157 = ((_77[0].xyz * _120) + (_77[1].xyz * _123)).xyz + (_77[2].xyz * _155);
+    spvUnsafeArray<float4, 1> _76;
+    for (int _164 = 0; _164 < 1; )
+    {
+        _76[_164] = _75[_164] + (_74[_164] * _116);
+        _164++;
+        continue;
+    }
+    float4 _181 = float4(_118.x, _118.y, _118.z, _118.w);
+    out.out_var_TEXCOORD10_centroid = float4(_157.x, _157.y, _157.z, _68.w);
+    out.out_var_TEXCOORD11_centroid = ((_78[0] * _111) + (_78[1] * _113)) + (_78[2] * _116);
+    out_var_TEXCOORD0 = _76;
+    out.out_var_COLOR1 = ((_80[0] * _111) + (_80[1] * _113)) + (_80[2] * _116);
+    out.out_var_COLOR2 = ((_81[0] * _111) + (_81[1] * _113)) + (_81[2] * _116);
+    out.out_var_TEXCOORD6 = _181;
+    out.out_var_TEXCOORD7 = ((_98[0] * _120) + (_98[1] * _123)) + (_98[2] * _155);
+    out.gl_Position = View.View_TranslatedWorldToClip * _181;
+    out.out_var_TEXCOORD0_0 = out_var_TEXCOORD0[0];
+    return out;
+}
+
diff --git a/reference/shaders-ue4-no-opt/asm/vert/loop-accesschain-writethrough.asm.invalid.vert b/reference/shaders-ue4-no-opt/asm/vert/loop-accesschain-writethrough.asm.invalid.vert
new file mode 100644
index 00000000000..b1298b7e683
--- /dev/null
+++ b/reference/shaders-ue4-no-opt/asm/vert/loop-accesschain-writethrough.asm.invalid.vert
@@ -0,0 +1,122 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_Globals
+{
+    float4 ViewportSize;
+    float ScatteringScaling;
+    float CocRadiusToCircumscribedRadius;
+};
+
+struct type_StructuredBuffer_v4float
+{
+    float4 _m0[1];
+};
+
+struct main0_out
+{
+    float2 out_var_TEXCOORD0 [[user(locn0)]];
+    float4 out_var_TEXCOORD1 [[user(locn1)]];
+    float4 out_var_TEXCOORD2 [[user(locn2)]];
+    float4 out_var_TEXCOORD3 [[user(locn3)]];
+    float4 out_var_TEXCOORD4 [[user(locn4)]];
+    float4 out_var_TEXCOORD5 [[user(locn5)]];
+    float4 out_var_TEXCOORD6 [[user(locn6)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0(constant type_Globals& _Globals [[buffer(0)]], const device type_StructuredBuffer_v4float& ScatterDrawList [[buffer(1)]], uint gl_VertexIndex [[vertex_id]], uint gl_InstanceIndex [[instance_id]])
+{
+    main0_out out = {};
+    uint _66 = gl_VertexIndex / 4u;
+    uint _68 = gl_VertexIndex - (_66 * 4u);
+    uint _70 = (16u * gl_InstanceIndex) + _66;
+    float _72;
+    _72 = 0.0;
+    spvUnsafeArray<float4, 4> _61;
+    spvUnsafeArray<float, 4> _62;
+    spvUnsafeArray<float2, 4> _63;
+    float _73;
+    uint _75 = 0u;
+    for (;;)
+    {
+        if (_75 < 4u)
+        {
+            uint _82 = ((5u * _70) + _75) + 1u;
+            _61[_75] = float4(ScatterDrawList._m0[_82].xyz, 0.0);
+            _62[_75] = ScatterDrawList._m0[_82].w;
+            if (_75 == 0u)
+            {
+                _73 = _62[_75];
+            }
+            else
+            {
+                _73 = fast::max(_72, _62[_75]);
+            }
+            _63[_75].x = (-0.5) / _62[_75];
+            _63[_75].y = (0.5 * _62[_75]) + 0.5;
+            _72 = _73;
+            _75++;
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+    float2 _144 = float2(_Globals.ScatteringScaling) * ScatterDrawList._m0[5u * _70].xy;
+    float2 _173 = (((float2((_72 * _Globals.CocRadiusToCircumscribedRadius) + 1.0) * ((float2(float(_68 % 2u), float(_68 / 2u)) * float2(2.0)) - float2(1.0))) + _144) + float2(0.5)) * _Globals.ViewportSize.zw;
+    out.out_var_TEXCOORD0 = _144;
+    out.out_var_TEXCOORD1 = float4(_61[0].xyz, _62[0]);
+    out.out_var_TEXCOORD2 = float4(_61[1].xyz, _62[1]);
+    out.out_var_TEXCOORD3 = float4(_61[2].xyz, _62[2]);
+    out.out_var_TEXCOORD4 = float4(_61[3].xyz, _62[3]);
+    out.out_var_TEXCOORD5 = float4(_63[0].x, _63[0].y, _63[1].x, _63[1].y);
+    out.out_var_TEXCOORD6 = float4(_63[2].x, _63[2].y, _63[3].x, _63[3].y);
+    out.gl_Position = float4((_173.x * 2.0) - 1.0, 1.0 - (_173.y * 2.0), 0.0, 1.0);
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/frag/depth-compare.asm.frag b/reference/shaders-ue4/asm/frag/depth-compare.asm.frag
new file mode 100644
index 00000000000..0a6c98418e3
--- /dev/null
+++ b/reference/shaders-ue4/asm/frag/depth-compare.asm.frag
@@ -0,0 +1,315 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_Globals
+{
+    float3 SoftTransitionScale;
+    float4x4 ShadowViewProjectionMatrices[6];
+    float InvShadowmapResolution;
+    float ShadowFadeFraction;
+    float ShadowSharpen;
+    float4 LightPositionAndInvRadius;
+    float2 ProjectionDepthBiasParameters;
+    float4 PointLightDepthBiasAndProjParameters;
+};
+
+constant float4 _107 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<float> SceneTexturesStruct_SceneDepthTexture [[texture(0)]], texture2d<float> SceneTexturesStruct_GBufferATexture [[texture(1)]], texture2d<float> SceneTexturesStruct_GBufferBTexture [[texture(2)]], texture2d<float> SceneTexturesStruct_GBufferDTexture [[texture(3)]], depthcube<float> ShadowDepthCubeTexture [[texture(4)]], texture2d<float> SSProfilesTexture [[texture(5)]], sampler SceneTexturesStruct_SceneDepthTextureSampler [[sampler(0)]], sampler SceneTexturesStruct_GBufferATextureSampler [[sampler(1)]], sampler SceneTexturesStruct_GBufferBTextureSampler [[sampler(2)]], sampler SceneTexturesStruct_GBufferDTextureSampler [[sampler(3)]], sampler ShadowDepthTextureSampler [[sampler(4)]], sampler ShadowDepthCubeTextureSampler [[sampler(5)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    float2 _114 = gl_FragCoord.xy * View.View_BufferSizeAndInvSize.zw;
+    float4 _118 = SceneTexturesStruct_SceneDepthTexture.sample(SceneTexturesStruct_SceneDepthTextureSampler, _114, level(0.0));
+    float _119 = _118.x;
+    float _133 = ((_119 * View.View_InvDeviceZToWorldZTransform.x) + View.View_InvDeviceZToWorldZTransform.y) + (1.0 / ((_119 * View.View_InvDeviceZToWorldZTransform.z) - View.View_InvDeviceZToWorldZTransform.w));
+    float4 _147 = View.View_ScreenToWorld * float4(((_114 - View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_133), _133, 1.0);
+    float3 _148 = _147.xyz;
+    float3 _152 = _Globals.LightPositionAndInvRadius.xyz - _148;
+    float _158 = length(_152);
+    bool _160 = (_158 * _Globals.LightPositionAndInvRadius.w) < 1.0;
+    float _207;
+    if (_160)
+    {
+        float3 _165 = abs(_152);
+        float _166 = _165.x;
+        float _167 = _165.y;
+        float _168 = _165.z;
+        float _170 = fast::max(_166, fast::max(_167, _168));
+        int _189;
+        if (_170 == _166)
+        {
+            _189 = (_166 == _152.x) ? 0 : 1;
+        }
+        else
+        {
+            int _185;
+            if (_170 == _167)
+            {
+                _185 = (_167 == _152.y) ? 2 : 3;
+            }
+            else
+            {
+                _185 = (_168 == _152.z) ? 4 : 5;
+            }
+            _189 = _185;
+        }
+        float4 _196 = _Globals.ShadowViewProjectionMatrices[_189] * float4(_147.xyz, 1.0);
+        float _198 = _196.w;
+        _207 = ShadowDepthCubeTexture.sample_compare(ShadowDepthCubeTextureSampler, (_152 / float3(_158)), (_196.z / _198) + ((-_Globals.PointLightDepthBiasAndProjParameters.x) / _198), level(0.0));
+    }
+    else
+    {
+        _207 = 1.0;
+    }
+    float _213 = fast::clamp(((_207 - 0.5) * _Globals.ShadowSharpen) + 0.5, 0.0, 1.0);
+    float _218 = sqrt(mix(1.0, _213 * _213, _Globals.ShadowFadeFraction));
+    float4 _219;
+    _219.z = _218;
+    float4 _220 = float4(float3(1.0).x, float3(1.0).y, _219.z, float3(1.0).z);
+    float3 _236 = fast::normalize((SceneTexturesStruct_GBufferATexture.sample(SceneTexturesStruct_GBufferATextureSampler, _114, level(0.0)).xyz * float3(2.0)) - float3(1.0));
+    uint _240 = uint(round(SceneTexturesStruct_GBufferBTexture.sample(SceneTexturesStruct_GBufferBTextureSampler, _114, level(0.0)).w * 255.0));
+    bool _248 = (_240 & 15u) == 5u;
+    float _448;
+    if (_248)
+    {
+        float4 _260 = SSProfilesTexture.read(uint2(int3(1, int(uint((select(float4(0.0), SceneTexturesStruct_GBufferDTexture.sample(SceneTexturesStruct_GBufferDTextureSampler, _114, level(0.0)), bool4(!(((_240 & 4294967280u) & 16u) != 0u))).x * 255.0) + 0.5)), 0).xy), 0);
+        float _263 = _260.y * 0.5;
+        float3 _266 = _148 - (_236 * float3(_263));
+        float _274 = pow(fast::clamp(dot(-(_152 * float3(rsqrt(dot(_152, _152)))), _236), 0.0, 1.0), 1.0);
+        float _445;
+        if (_160)
+        {
+            float3 _278 = _152 / float3(_158);
+            float3 _280 = fast::normalize(cross(_278, float3(0.0, 0.0, 1.0)));
+            float3 _284 = float3(_Globals.InvShadowmapResolution);
+            float3 _285 = _280 * _284;
+            float3 _286 = cross(_280, _278) * _284;
+            float3 _287 = abs(_278);
+            float _288 = _287.x;
+            float _289 = _287.y;
+            float _290 = _287.z;
+            float _292 = fast::max(_288, fast::max(_289, _290));
+            int _311;
+            if (_292 == _288)
+            {
+                _311 = (_288 == _278.x) ? 0 : 1;
+            }
+            else
+            {
+                int _307;
+                if (_292 == _289)
+                {
+                    _307 = (_289 == _278.y) ? 2 : 3;
+                }
+                else
+                {
+                    _307 = (_290 == _278.z) ? 4 : 5;
+                }
+                _311 = _307;
+            }
+            float4 _318 = _Globals.ShadowViewProjectionMatrices[_311] * float4(_266, 1.0);
+            float _323 = _260.x * (10.0 / _Globals.LightPositionAndInvRadius.w);
+            float _329 = (1.0 / (((_318.z / _318.w) * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w;
+            float _342 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, (_278 + (_286 * float3(2.5))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323;
+            float _364 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, ((_278 + (_285 * float3(2.3776409626007080078125))) + (_286 * float3(0.77254199981689453125))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323;
+            float _387 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, ((_278 + (_285 * float3(1.46946299076080322265625))) + (_286 * float3(-2.0225429534912109375))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323;
+            float _410 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, ((_278 + (_285 * float3(-1.46946299076080322265625))) + (_286 * float3(-2.02254199981689453125))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323;
+            float _433 = (_329 - ((1.0 / ((float4(ShadowDepthCubeTexture.sample(ShadowDepthTextureSampler, ((_278 + (_285 * float3(-2.3776409626007080078125))) + (_286 * float3(0.772543013095855712890625))), level(0.0))).x * _Globals.PointLightDepthBiasAndProjParameters.z) - _Globals.PointLightDepthBiasAndProjParameters.w)) * _Globals.LightPositionAndInvRadius.w)) * _323;
+            _445 = (((((fast::clamp(abs((_342 > 0.0) ? (_342 + _263) : fast::max(0.0, (_342 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25) + (fast::clamp(abs((_364 > 0.0) ? (_364 + _263) : fast::max(0.0, (_364 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_387 > 0.0) ? (_387 + _263) : fast::max(0.0, (_387 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_410 > 0.0) ? (_410 + _263) : fast::max(0.0, (_410 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25)) + (fast::clamp(abs((_433 > 0.0) ? (_433 + _263) : fast::max(0.0, (_433 * _274) + _263)), 0.1500000059604644775390625, 5.0) + 0.25)) * 0.20000000298023223876953125;
+        }
+        else
+        {
+            _445 = 1.0;
+        }
+        _448 = 1.0 - (_445 * 0.20000000298023223876953125);
+    }
+    else
+    {
+        _448 = 1.0;
+    }
+    _220.w = _248 ? sqrt(_448) : _218;
+    out.out_var_SV_Target0 = _220;
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag b/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag
new file mode 100644
index 00000000000..88618a85129
--- /dev/null
+++ b/reference/shaders-ue4/asm/frag/global-constant-arrays.asm.frag
@@ -0,0 +1,1346 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_Globals
+{
+    float4 MappingPolynomial;
+    float3 InverseGamma;
+    float4 ColorMatrixR_ColorCurveCd1;
+    float4 ColorMatrixG_ColorCurveCd3Cm3;
+    float4 ColorMatrixB_ColorCurveCm2;
+    float4 ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3;
+    float4 ColorCurve_Ch1_Ch2;
+    float4 ColorShadow_Luma;
+    float4 ColorShadow_Tint1;
+    float4 ColorShadow_Tint2;
+    float FilmSlope;
+    float FilmToe;
+    float FilmShoulder;
+    float FilmBlackClip;
+    float FilmWhiteClip;
+    packed_float3 ColorScale;
+    float4 OverlayColor;
+    float WhiteTemp;
+    float WhiteTint;
+    float4 ColorSaturation;
+    float4 ColorContrast;
+    float4 ColorGamma;
+    float4 ColorGain;
+    float4 ColorOffset;
+    float4 ColorSaturationShadows;
+    float4 ColorContrastShadows;
+    float4 ColorGammaShadows;
+    float4 ColorGainShadows;
+    float4 ColorOffsetShadows;
+    float4 ColorSaturationMidtones;
+    float4 ColorContrastMidtones;
+    float4 ColorGammaMidtones;
+    float4 ColorGainMidtones;
+    float4 ColorOffsetMidtones;
+    float4 ColorSaturationHighlights;
+    float4 ColorContrastHighlights;
+    float4 ColorGammaHighlights;
+    float4 ColorGainHighlights;
+    float4 ColorOffsetHighlights;
+    float ColorCorrectionShadowsMax;
+    float ColorCorrectionHighlightsMin;
+    uint OutputDevice;
+    uint OutputGamut;
+    float BlueCorrection;
+    float ExpandGamut;
+};
+
+constant float3 _391 = {};
+
+constant spvUnsafeArray<float, 6> _475 = spvUnsafeArray<float, 6>({ -4.0, -4.0, -3.1573765277862548828125, -0.485249996185302734375, 1.84773242473602294921875, 1.84773242473602294921875 });
+constant spvUnsafeArray<float, 6> _476 = spvUnsafeArray<float, 6>({ -0.718548238277435302734375, 2.0810306072235107421875, 3.66812419891357421875, 4.0, 4.0, 4.0 });
+constant spvUnsafeArray<float, 10> _479 = spvUnsafeArray<float, 10>({ -4.97062206268310546875, -3.0293781757354736328125, -2.1261999607086181640625, -1.5104999542236328125, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 });
+constant spvUnsafeArray<float, 10> _480 = spvUnsafeArray<float, 10>({ 0.80891323089599609375, 1.19108676910400390625, 1.5683000087738037109375, 1.94830000400543212890625, 2.308300018310546875, 2.63840007781982421875, 2.85949993133544921875, 2.9872608184814453125, 3.0127391815185546875, 3.0127391815185546875 });
+constant spvUnsafeArray<float, 10> _482 = spvUnsafeArray<float, 10>({ -2.3010299205780029296875, -2.3010299205780029296875, -1.9312000274658203125, -1.5204999446868896484375, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 });
+constant spvUnsafeArray<float, 10> _483 = spvUnsafeArray<float, 10>({ 0.801995217800140380859375, 1.19800484180450439453125, 1.5943000316619873046875, 1.99730002880096435546875, 2.3782999515533447265625, 2.7683999538421630859375, 3.0515000820159912109375, 3.2746293544769287109375, 3.32743072509765625, 3.32743072509765625 });
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 in_var_TEXCOORD0 [[user(locn0), center_no_perspective]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globals [[buffer(0)]], uint gl_Layer [[render_target_array_index]])
+{
+    main0_out out = {};
+    float3x3 _546 = float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * float3x3(float3(1.01303005218505859375, 0.0061053098179399967193603515625, -0.014971000142395496368408203125), float3(0.0076982299797236919403076171875, 0.99816501140594482421875, -0.005032029934227466583251953125), float3(-0.0028413101099431514739990234375, 0.0046851597726345062255859375, 0.92450702190399169921875));
+    float3x3 _547 = _546 * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125));
+    float3x3 _548 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(0.98722398281097412109375, -0.0061132698319852352142333984375, 0.01595330052077770233154296875), float3(-0.007598360069096088409423828125, 1.00186002254486083984375, 0.0053300200961530208587646484375), float3(0.003072570078074932098388671875, -0.0050959498621523380279541015625, 1.0816800594329833984375));
+    float3x3 _549 = _548 * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875));
+    float3x3 _550 = float3x3(float3(0.952552378177642822265625, 0.0, 9.25), float3(0.3439664542675018310546875, 0.728166103363037109375, -0.07213254272937774658203125), float3(0.0, 0.0, 1.00882518291473388671875)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125));
+    float3x3 _551 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625));
+    float3x3 _576;
+    for (;;)
+    {
+        if (_Globals.OutputGamut == 1u)
+        {
+            _576 = _548 * float3x3(float3(2.493396282196044921875, -0.931345880031585693359375, -0.4026944935321807861328125), float3(-0.829486787319183349609375, 1.76265966892242431640625, 0.02362460084259510040283203125), float3(0.0358506999909877777099609375, -0.076182700693607330322265625, 0.957014024257659912109375));
+            break;
+        }
+        else
+        {
+            if (_Globals.OutputGamut == 2u)
+            {
+                _576 = _548 * float3x3(float3(1.71660840511322021484375, -0.3556621074676513671875, -0.253360092639923095703125), float3(-0.666682898998260498046875, 1.61647760868072509765625, 0.01576850004494190216064453125), float3(0.017642199993133544921875, -0.04277630150318145751953125, 0.94222867488861083984375));
+                break;
+            }
+            else
+            {
+                if (_Globals.OutputGamut == 3u)
+                {
+                    _576 = float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625));
+                    break;
+                }
+                else
+                {
+                    if (_Globals.OutputGamut == 4u)
+                    {
+                        _576 = float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0));
+                        break;
+                    }
+                    else
+                    {
+                        _576 = _549;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+    float3 _577 = float4((in.in_var_TEXCOORD0 - float2(0.015625)) * float2(1.03225803375244140625), float(gl_Layer) * 0.0322580635547637939453125, 0.0).xyz;
+    float3 _599;
+    if (_Globals.OutputDevice >= 3u)
+    {
+        float3 _591 = pow(_577, float3(0.0126833133399486541748046875));
+        _599 = pow(fast::max(float3(0.0), _591 - float3(0.8359375)) / (float3(18.8515625) - (float3(18.6875) * _591)), float3(6.277394771575927734375)) * float3(10000.0);
+    }
+    else
+    {
+        _599 = (exp2((_577 - float3(0.434017598628997802734375)) * float3(14.0)) * float3(0.180000007152557373046875)) - (exp2(float3(-6.0762462615966796875)) * float3(0.180000007152557373046875));
+    }
+    float _602 = _Globals.WhiteTemp * 1.00055634975433349609375;
+    float _616 = (_602 <= 7000.0) ? (0.24406300485134124755859375 + ((99.1100006103515625 + ((2967800.0 - (4604438528.0 / _Globals.WhiteTemp)) / _602)) / _602)) : (0.23703999817371368408203125 + ((247.4799957275390625 + ((1901800.0 - (2005284352.0 / _Globals.WhiteTemp)) / _602)) / _602));
+    float _633 = ((0.860117733478546142578125 + (0.00015411825734190642833709716796875 * _Globals.WhiteTemp)) + ((1.2864121856637211749330163002014e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 + (0.0008424202096648514270782470703125 * _Globals.WhiteTemp)) + ((7.0814513719597016461193561553955e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp));
+    float _644 = ((0.317398726940155029296875 + (4.25 * _Globals.WhiteTemp)) + ((4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 - (2.8974181986995972692966461181641e-05 * _Globals.WhiteTemp)) + ((1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp));
+    float _649 = ((2.0 * _633) - (8.0 * _644)) + 4.0;
+    float2 _653 = float2((3.0 * _633) / _649, (2.0 * _644) / _649);
+    float2 _660 = fast::normalize(float2(_633, _644));
+    float _665 = _633 + (((-_660.y) * _Globals.WhiteTint) * 0.0500000007450580596923828125);
+    float _669 = _644 + ((_660.x * _Globals.WhiteTint) * 0.0500000007450580596923828125);
+    float _674 = ((2.0 * _665) - (8.0 * _669)) + 4.0;
+    float2 _680 = select(float2(_616, ((((-3.0) * _616) * _616) + (2.86999988555908203125 * _616)) - 0.2750000059604644775390625), _653, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _665) / _674, (2.0 * _669) / _674) - _653);
+    float _683 = fast::max(_680.y, 1.0000000133514319600180897396058e-10);
+    float3 _685;
+    _685.x = _680.x / _683;
+    _685.y = 1.0;
+    _685.z = ((1.0 - _680.x) - _680.y) / _683;
+    float _691 = fast::max(0.328999996185302734375, 1.0000000133514319600180897396058e-10);
+    float3 _693;
+    _693.x = 0.3127000033855438232421875 / _691;
+    _693.y = 1.0;
+    _693.z = 0.3582999706268310546875 / _691;
+    float3 _697 = _685 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875));
+    float3 _698 = _693 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875));
+    float3 _717 = (_599 * ((float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * ((float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)) * float3x3(float3(_698.x / _697.x, 0.0, 0.0), float3(0.0, _698.y / _697.y, 0.0), float3(0.0, 0.0, _698.z / _697.z))) * float3x3(float3(0.986992895603179931640625, -0.14705429971218109130859375, 0.15996269881725311279296875), float3(0.4323053061962127685546875, 0.518360316753387451171875, 0.049291200935840606689453125), float3(-0.00852870009839534759521484375, 0.0400427989661693572998046875, 0.968486726284027099609375)))) * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)))) * _547;
+    float3 _745;
+    if (_Globals.ColorShadow_Tint2.w != 0.0)
+    {
+        float _724 = dot(_717, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625));
+        float3 _727 = (_717 / float3(_724)) - float3(1.0);
+        _745 = mix(_717, _717 * (_549 * (float3x3(float3(0.544169127941131591796875, 0.23959259688854217529296875, 0.16669429838657379150390625), float3(0.23946559429168701171875, 0.702153027057647705078125, 0.058381401002407073974609375), float3(-0.0023439000360667705535888671875, 0.0361833982169628143310546875, 1.05521833896636962890625)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)))), float3((1.0 - exp2((-4.0) * dot(_727, _727))) * (1.0 - exp2((((-4.0) * _Globals.ExpandGamut) * _724) * _724))));
+    }
+    else
+    {
+        _745 = _717;
+    }
+    float _746 = dot(_745, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625));
+    float4 _751 = _Globals.ColorSaturationShadows * _Globals.ColorSaturation;
+    float4 _756 = _Globals.ColorContrastShadows * _Globals.ColorContrast;
+    float4 _761 = _Globals.ColorGammaShadows * _Globals.ColorGamma;
+    float4 _766 = _Globals.ColorGainShadows * _Globals.ColorGain;
+    float4 _771 = _Globals.ColorOffsetShadows + _Globals.ColorOffset;
+    float3 _772 = float3(_746);
+    float _804 = smoothstep(0.0, _Globals.ColorCorrectionShadowsMax, _746);
+    float4 _808 = _Globals.ColorSaturationHighlights * _Globals.ColorSaturation;
+    float4 _811 = _Globals.ColorContrastHighlights * _Globals.ColorContrast;
+    float4 _814 = _Globals.ColorGammaHighlights * _Globals.ColorGamma;
+    float4 _817 = _Globals.ColorGainHighlights * _Globals.ColorGain;
+    float4 _820 = _Globals.ColorOffsetHighlights + _Globals.ColorOffset;
+    float _852 = smoothstep(_Globals.ColorCorrectionHighlightsMin, 1.0, _746);
+    float4 _855 = _Globals.ColorSaturationMidtones * _Globals.ColorSaturation;
+    float4 _858 = _Globals.ColorContrastMidtones * _Globals.ColorContrast;
+    float4 _861 = _Globals.ColorGammaMidtones * _Globals.ColorGamma;
+    float4 _864 = _Globals.ColorGainMidtones * _Globals.ColorGain;
+    float4 _867 = _Globals.ColorOffsetMidtones + _Globals.ColorOffset;
+    float3 _905 = ((((pow(pow(fast::max(float3(0.0), mix(_772, _745, _751.xyz * float3(_751.w))) * float3(5.5555553436279296875), _756.xyz * float3(_756.w)) * float3(0.180000007152557373046875), float3(1.0) / (_761.xyz * float3(_761.w))) * (_766.xyz * float3(_766.w))) + (_771.xyz + float3(_771.w))) * float3(1.0 - _804)) + (((pow(pow(fast::max(float3(0.0), mix(_772, _745, _855.xyz * float3(_855.w))) * float3(5.5555553436279296875), _858.xyz * float3(_858.w)) * float3(0.180000007152557373046875), float3(1.0) / (_861.xyz * float3(_861.w))) * (_864.xyz * float3(_864.w))) + (_867.xyz + float3(_867.w))) * float3(_804 - _852))) + (((pow(pow(fast::max(float3(0.0), mix(_772, _745, _808.xyz * float3(_808.w))) * float3(5.5555553436279296875), _811.xyz * float3(_811.w)) * float3(0.180000007152557373046875), float3(1.0) / (_814.xyz * float3(_814.w))) * (_817.xyz * float3(_817.w))) + (_820.xyz + float3(_820.w))) * float3(_852));
+    float3 _906 = _905 * _549;
+    float3 _914 = float3(_Globals.BlueCorrection);
+    float3 _916 = mix(_905, _905 * ((_551 * float3x3(float3(0.940437257289886474609375, -0.01830687932670116424560546875, 0.07786960899829864501953125), float3(0.008378696627914905548095703125, 0.82866001129150390625, 0.162961304187774658203125), float3(0.0005471261101774871349334716796875, -0.00088337459601461887359619140625, 1.00033628940582275390625))) * _550), _914) * _551;
+    float _917 = _916.x;
+    float _918 = _916.y;
+    float _920 = _916.z;
+    float _923 = fast::max(fast::max(_917, _918), _920);
+    float _928 = (fast::max(_923, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_917, _918), _920), 1.0000000133514319600180897396058e-10)) / fast::max(_923, 0.00999999977648258209228515625);
+    float _941 = ((_920 + _918) + _917) + (1.75 * sqrt(((_920 * (_920 - _918)) + (_918 * (_918 - _917))) + (_917 * (_917 - _920))));
+    float _942 = _941 * 0.3333333432674407958984375;
+    float _943 = _928 - 0.4000000059604644775390625;
+    float _948 = fast::max(1.0 - abs(_943 * 2.5), 0.0);
+    float _956 = (1.0 + (float(int(sign(_943 * 5.0))) * (1.0 - (_948 * _948)))) * 0.02500000037252902984619140625;
+    float _969;
+    if (_942 <= 0.053333334624767303466796875)
+    {
+        _969 = _956;
+    }
+    else
+    {
+        float _968;
+        if (_942 >= 0.1599999964237213134765625)
+        {
+            _968 = 0.0;
+        }
+        else
+        {
+            _968 = _956 * ((0.23999999463558197021484375 / _941) - 0.5);
+        }
+        _969 = _968;
+    }
+    float3 _972 = _916 * float3(1.0 + _969);
+    float _973 = _972.x;
+    float _974 = _972.y;
+    float _976 = _972.z;
+    float _990;
+    if ((_973 == _974) && (_974 == _976))
+    {
+        _990 = 0.0;
+    }
+    else
+    {
+        _990 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_974 - _976), ((2.0 * _973) - _974) - _976);
+    }
+    float _995;
+    if (_990 < 0.0)
+    {
+        _995 = _990 + 360.0;
+    }
+    else
+    {
+        _995 = _990;
+    }
+    float _996 = fast::clamp(_995, 0.0, 360.0);
+    float _1001;
+    if (_996 > 180.0)
+    {
+        _1001 = _996 - 360.0;
+    }
+    else
+    {
+        _1001 = _996;
+    }
+    float _1005 = smoothstep(0.0, 1.0, 1.0 - abs(_1001 * 0.01481481455266475677490234375));
+    _972.x = _973 + ((((_1005 * _1005) * _928) * (0.02999999932944774627685546875 - _973)) * 0.180000007152557373046875);
+    float3 _1014 = fast::max(float3(0.0), _972 * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)));
+    float _1023 = (1.0 + _Globals.FilmBlackClip) - _Globals.FilmToe;
+    float _1026 = 1.0 + _Globals.FilmWhiteClip;
+    float _1029 = _1026 - _Globals.FilmShoulder;
+    float _1056;
+    if (_Globals.FilmToe > 0.800000011920928955078125)
+    {
+        _1056 = ((0.819999992847442626953125 - _Globals.FilmToe) / _Globals.FilmSlope) + (log(0.180000007152557373046875) / log(10.0));
+    }
+    else
+    {
+        float _1035 = (0.180000007152557373046875 + _Globals.FilmBlackClip) / _1023;
+        _1056 = (log(0.180000007152557373046875) / log(10.0)) - ((0.5 * log(_1035 / (2.0 - _1035))) * (_1023 / _Globals.FilmSlope));
+    }
+    float _1061 = ((1.0 - _Globals.FilmToe) / _Globals.FilmSlope) - _1056;
+    float _1063 = (_Globals.FilmShoulder / _Globals.FilmSlope) - _1061;
+    float _1065 = log(10.0);
+    float3 _1067 = log(mix(float3(dot(_1014, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1014, float3(0.959999978542327880859375))) / float3(_1065);
+    float3 _1071 = float3(_Globals.FilmSlope) * (_1067 + float3(_1061));
+    float3 _1079 = float3(_1056);
+    float3 _1080 = _1067 - _1079;
+    float3 _1092 = float3(_1063);
+    float3 _1106 = fast::clamp(_1080 / float3(_1063 - _1056), float3(0.0), float3(1.0));
+    float3 _1110 = select(_1106, float3(1.0) - _1106, bool3(_1063 < _1056));
+    float3 _1115 = mix(select(_1071, float3(-_Globals.FilmBlackClip) + (float3(2.0 * _1023) / (float3(1.0) + exp(float3(((-2.0) * _Globals.FilmSlope) / _1023) * _1080))), _1067 < _1079), select(_1071, float3(_1026) - (float3(2.0 * _1029) / (float3(1.0) + exp(float3((2.0 * _Globals.FilmSlope) / _1029) * (_1067 - _1092)))), _1067 > _1092), ((float3(3.0) - (float3(2.0) * _1110)) * _1110) * _1110);
+    float3 _1119 = fast::max(float3(0.0), mix(float3(dot(_1115, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1115, float3(0.930000007152557373046875)));
+    float3 _1189;
+    if (_Globals.ColorShadow_Tint2.w == 0.0)
+    {
+        float3 _1131;
+        _1131.x = dot(_906, _Globals.ColorMatrixR_ColorCurveCd1.xyz);
+        _1131.y = dot(_906, _Globals.ColorMatrixG_ColorCurveCd3Cm3.xyz);
+        _1131.z = dot(_906, _Globals.ColorMatrixB_ColorCurveCm2.xyz);
+        float3 _1157 = fast::max(float3(0.0), _1131 * (_Globals.ColorShadow_Tint1.xyz + (_Globals.ColorShadow_Tint2.xyz * float3(1.0 / (dot(_906, _Globals.ColorShadow_Luma.xyz) + 1.0)))));
+        float3 _1162 = fast::max(float3(0.0), _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx - _1157);
+        float3 _1164 = fast::max(_1157, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz);
+        _1189 = ((((_1164 * _Globals.ColorCurve_Ch1_Ch2.xxx) + _Globals.ColorCurve_Ch1_Ch2.yyy) * (float3(1.0) / (_1164 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.www))) + ((fast::clamp(_1157, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz) * _Globals.ColorMatrixB_ColorCurveCm2.www) + (((_1162 * _Globals.ColorMatrixR_ColorCurveCd1.www) * (float3(1.0) / (_1162 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.yyy))) + _Globals.ColorMatrixG_ColorCurveCd3Cm3.www))) - float3(0.00200000009499490261077880859375);
+    }
+    else
+    {
+        _1189 = fast::max(float3(0.0), mix(_1119, _1119 * ((_551 * float3x3(float3(1.06317996978759765625, 0.02339559979736804962158203125, -0.08657260239124298095703125), float3(-0.010633699595928192138671875, 1.2063200473785400390625, -0.1956900060176849365234375), float3(-0.0005908869788981974124908447265625, 0.00105247995816171169281005859375, 0.999538004398345947265625))) * _550), _914) * _549);
+    }
+    float3 _1218 = pow(fast::max(float3(0.0), mix((((float3(_Globals.MappingPolynomial.x) * (_1189 * _1189)) + (float3(_Globals.MappingPolynomial.y) * _1189)) + float3(_Globals.MappingPolynomial.z)) * float3(_Globals.ColorScale), _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y));
+    float3 _3001;
+    if (_Globals.OutputDevice == 0u)
+    {
+        float _2961 = _1218.x;
+        float _2973;
+        for (;;)
+        {
+            if (_2961 < 0.00313066993840038776397705078125)
+            {
+                _2973 = _2961 * 12.9200000762939453125;
+                break;
+            }
+            _2973 = (pow(_2961, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875;
+            break;
+        }
+        float _2974 = _1218.y;
+        float _2986;
+        for (;;)
+        {
+            if (_2974 < 0.00313066993840038776397705078125)
+            {
+                _2986 = _2974 * 12.9200000762939453125;
+                break;
+            }
+            _2986 = (pow(_2974, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875;
+            break;
+        }
+        float _2987 = _1218.z;
+        float _2999;
+        for (;;)
+        {
+            if (_2987 < 0.00313066993840038776397705078125)
+            {
+                _2999 = _2987 * 12.9200000762939453125;
+                break;
+            }
+            _2999 = (pow(_2987, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875;
+            break;
+        }
+        _3001 = float3(_2973, _2986, _2999);
+    }
+    else
+    {
+        float3 _2960;
+        if (_Globals.OutputDevice == 1u)
+        {
+            float3 _2953 = fast::max(float3(6.1035199905745685100555419921875e-05), (_1218 * _547) * _576);
+            _2960 = fast::min(_2953 * float3(4.5), (pow(fast::max(_2953, float3(0.017999999225139617919921875)), float3(0.449999988079071044921875)) * float3(1.09899997711181640625)) - float3(0.098999999463558197021484375));
+        }
+        else
+        {
+            float3 _2950;
+            if ((_Globals.OutputDevice == 3u) || (_Globals.OutputDevice == 5u))
+            {
+                float3 _2100 = (_906 * float3(1.5)) * (_546 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625)));
+                float _2101 = _2100.x;
+                float _2102 = _2100.y;
+                float _2104 = _2100.z;
+                float _2107 = fast::max(fast::max(_2101, _2102), _2104);
+                float _2112 = (fast::max(_2107, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_2101, _2102), _2104), 1.0000000133514319600180897396058e-10)) / fast::max(_2107, 0.00999999977648258209228515625);
+                float _2125 = ((_2104 + _2102) + _2101) + (1.75 * sqrt(((_2104 * (_2104 - _2102)) + (_2102 * (_2102 - _2101))) + (_2101 * (_2101 - _2104))));
+                float _2126 = _2125 * 0.3333333432674407958984375;
+                float _2127 = _2112 - 0.4000000059604644775390625;
+                float _2132 = fast::max(1.0 - abs(_2127 * 2.5), 0.0);
+                float _2140 = (1.0 + (float(int(sign(_2127 * 5.0))) * (1.0 - (_2132 * _2132)))) * 0.02500000037252902984619140625;
+                float _2153;
+                if (_2126 <= 0.053333334624767303466796875)
+                {
+                    _2153 = _2140;
+                }
+                else
+                {
+                    float _2152;
+                    if (_2126 >= 0.1599999964237213134765625)
+                    {
+                        _2152 = 0.0;
+                    }
+                    else
+                    {
+                        _2152 = _2140 * ((0.23999999463558197021484375 / _2125) - 0.5);
+                    }
+                    _2153 = _2152;
+                }
+                float3 _2156 = _2100 * float3(1.0 + _2153);
+                float _2157 = _2156.x;
+                float _2158 = _2156.y;
+                float _2160 = _2156.z;
+                float _2174;
+                if ((_2157 == _2158) && (_2158 == _2160))
+                {
+                    _2174 = 0.0;
+                }
+                else
+                {
+                    _2174 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_2158 - _2160), ((2.0 * _2157) - _2158) - _2160);
+                }
+                float _2179;
+                if (_2174 < 0.0)
+                {
+                    _2179 = _2174 + 360.0;
+                }
+                else
+                {
+                    _2179 = _2174;
+                }
+                float _2180 = fast::clamp(_2179, 0.0, 360.0);
+                float _2185;
+                if (_2180 > 180.0)
+                {
+                    _2185 = _2180 - 360.0;
+                }
+                else
+                {
+                    _2185 = _2180;
+                }
+                float _2235;
+                if ((_2185 > (-67.5)) && (_2185 < 67.5))
+                {
+                    float _2192 = (_2185 - (-67.5)) * 0.0296296291053295135498046875;
+                    int _2193 = int(_2192);
+                    float _2195 = _2192 - float(_2193);
+                    float _2196 = _2195 * _2195;
+                    float _2197 = _2196 * _2195;
+                    float _2234;
+                    if (_2193 == 3)
+                    {
+                        _2234 = (((_2197 * (-0.16666667163372039794921875)) + (_2196 * 0.5)) + (_2195 * (-0.5))) + 0.16666667163372039794921875;
+                    }
+                    else
+                    {
+                        float _2227;
+                        if (_2193 == 2)
+                        {
+                            _2227 = ((_2197 * 0.5) + (_2196 * (-1.0))) + 0.666666686534881591796875;
+                        }
+                        else
+                        {
+                            float _2222;
+                            if (_2193 == 1)
+                            {
+                                _2222 = (((_2197 * (-0.5)) + (_2196 * 0.5)) + (_2195 * 0.5)) + 0.16666667163372039794921875;
+                            }
+                            else
+                            {
+                                float _2215;
+                                if (_2193 == 0)
+                                {
+                                    _2215 = _2197 * 0.16666667163372039794921875;
+                                }
+                                else
+                                {
+                                    _2215 = 0.0;
+                                }
+                                _2222 = _2215;
+                            }
+                            _2227 = _2222;
+                        }
+                        _2234 = _2227;
+                    }
+                    _2235 = _2234;
+                }
+                else
+                {
+                    _2235 = 0.0;
+                }
+                _2156.x = _2157 + ((((_2235 * 1.5) * _2112) * (0.02999999932944774627685546875 - _2157)) * 0.180000007152557373046875);
+                float3 _2245 = fast::clamp(fast::clamp(_2156, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0));
+                float3 _2248 = mix(float3(dot(_2245, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _2245, float3(0.959999978542327880859375));
+                float _2249 = _2248.x;
+                float _2253 = 0.17999999225139617919921875 * exp2(18.0);
+                float _2255 = exp2(-14.0);
+                float _2258 = log((_2249 <= 0.0) ? _2255 : _2249) / _1065;
+                float _2260 = log(0.17999999225139617919921875 * exp2(-15.0)) / _1065;
+                float _2327;
+                if (_2258 <= _2260)
+                {
+                    _2327 = log(9.9999997473787516355514526367188e-05) / _1065;
+                }
+                else
+                {
+                    float _2267 = log(0.180000007152557373046875) / _1065;
+                    float _2324;
+                    if ((_2258 > _2260) && (_2258 < _2267))
+                    {
+                        float _2307 = (3.0 * (_2258 - _2260)) / (_2267 - _2260);
+                        int _2308 = int(_2307);
+                        float _2310 = _2307 - float(_2308);
+                        _2324 = dot(float3(_2310 * _2310, _2310, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2308], _475[_2308 + 1], _475[_2308 + 2]));
+                    }
+                    else
+                    {
+                        float _2275 = log(_2253) / _1065;
+                        float _2303;
+                        if ((_2258 >= _2267) && (_2258 < _2275))
+                        {
+                            float _2286 = (3.0 * (_2258 - _2267)) / (_2275 - _2267);
+                            int _2287 = int(_2286);
+                            float _2289 = _2286 - float(_2287);
+                            _2303 = dot(float3(_2289 * _2289, _2289, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2287], _476[_2287 + 1], _476[_2287 + 2]));
+                        }
+                        else
+                        {
+                            _2303 = log(10000.0) / _1065;
+                        }
+                        _2324 = _2303;
+                    }
+                    _2327 = _2324;
+                }
+                float3 _2329;
+                _2329.x = pow(10.0, _2327);
+                float _2330 = _2248.y;
+                float _2334 = log((_2330 <= 0.0) ? _2255 : _2330) / _1065;
+                float _2401;
+                if (_2334 <= _2260)
+                {
+                    _2401 = log(9.9999997473787516355514526367188e-05) / _1065;
+                }
+                else
+                {
+                    float _2341 = log(0.180000007152557373046875) / _1065;
+                    float _2398;
+                    if ((_2334 > _2260) && (_2334 < _2341))
+                    {
+                        float _2381 = (3.0 * (_2334 - _2260)) / (_2341 - _2260);
+                        int _2382 = int(_2381);
+                        float _2384 = _2381 - float(_2382);
+                        _2398 = dot(float3(_2384 * _2384, _2384, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2382], _475[_2382 + 1], _475[_2382 + 2]));
+                    }
+                    else
+                    {
+                        float _2349 = log(_2253) / _1065;
+                        float _2377;
+                        if ((_2334 >= _2341) && (_2334 < _2349))
+                        {
+                            float _2360 = (3.0 * (_2334 - _2341)) / (_2349 - _2341);
+                            int _2361 = int(_2360);
+                            float _2363 = _2360 - float(_2361);
+                            _2377 = dot(float3(_2363 * _2363, _2363, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2361], _476[_2361 + 1], _476[_2361 + 2]));
+                        }
+                        else
+                        {
+                            _2377 = log(10000.0) / _1065;
+                        }
+                        _2398 = _2377;
+                    }
+                    _2401 = _2398;
+                }
+                _2329.y = pow(10.0, _2401);
+                float _2404 = _2248.z;
+                float _2408 = log((_2404 <= 0.0) ? _2255 : _2404) / _1065;
+                float _2475;
+                if (_2408 <= _2260)
+                {
+                    _2475 = log(9.9999997473787516355514526367188e-05) / _1065;
+                }
+                else
+                {
+                    float _2415 = log(0.180000007152557373046875) / _1065;
+                    float _2472;
+                    if ((_2408 > _2260) && (_2408 < _2415))
+                    {
+                        float _2455 = (3.0 * (_2408 - _2260)) / (_2415 - _2260);
+                        int _2456 = int(_2455);
+                        float _2458 = _2455 - float(_2456);
+                        _2472 = dot(float3(_2458 * _2458, _2458, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2456], _475[_2456 + 1], _475[_2456 + 2]));
+                    }
+                    else
+                    {
+                        float _2423 = log(_2253) / _1065;
+                        float _2451;
+                        if ((_2408 >= _2415) && (_2408 < _2423))
+                        {
+                            float _2434 = (3.0 * (_2408 - _2415)) / (_2423 - _2415);
+                            int _2435 = int(_2434);
+                            float _2437 = _2434 - float(_2435);
+                            _2451 = dot(float3(_2437 * _2437, _2437, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2435], _476[_2435 + 1], _476[_2435 + 2]));
+                        }
+                        else
+                        {
+                            _2451 = log(10000.0) / _1065;
+                        }
+                        _2472 = _2451;
+                    }
+                    _2475 = _2472;
+                }
+                _2329.z = pow(10.0, _2475);
+                float3 _2479 = (_2329 * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375));
+                float _2481 = 0.17999999225139617919921875 * pow(2.0, -12.0);
+                float _2485 = log((_2481 <= 0.0) ? _2255 : _2481) / _1065;
+                float _2552;
+                if (_2485 <= _2260)
+                {
+                    _2552 = log(9.9999997473787516355514526367188e-05) / _1065;
+                }
+                else
+                {
+                    float _2492 = log(0.180000007152557373046875) / _1065;
+                    float _2549;
+                    if ((_2485 > _2260) && (_2485 < _2492))
+                    {
+                        float _2532 = (3.0 * (_2485 - _2260)) / (_2492 - _2260);
+                        int _2533 = int(_2532);
+                        float _2535 = _2532 - float(_2533);
+                        _2549 = dot(float3(_2535 * _2535, _2535, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2533], _475[_2533 + 1], _475[_2533 + 2]));
+                    }
+                    else
+                    {
+                        float _2500 = log(_2253) / _1065;
+                        float _2528;
+                        if ((_2485 >= _2492) && (_2485 < _2500))
+                        {
+                            float _2511 = (3.0 * (_2485 - _2492)) / (_2500 - _2492);
+                            int _2512 = int(_2511);
+                            float _2514 = _2511 - float(_2512);
+                            _2528 = dot(float3(_2514 * _2514, _2514, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2512], _476[_2512 + 1], _476[_2512 + 2]));
+                        }
+                        else
+                        {
+                            _2528 = log(10000.0) / _1065;
+                        }
+                        _2549 = _2528;
+                    }
+                    _2552 = _2549;
+                }
+                float _2555 = log(0.180000007152557373046875) / _1065;
+                float _2611;
+                if (_2555 <= _2260)
+                {
+                    _2611 = log(9.9999997473787516355514526367188e-05) / _1065;
+                }
+                else
+                {
+                    float _2608;
+                    if ((_2555 > _2260) && (_2555 < _2555))
+                    {
+                        _2608 = (float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[3], _475[4], _475[5])).z;
+                    }
+                    else
+                    {
+                        float _2568 = log(_2253) / _1065;
+                        float _2596;
+                        if ((_2555 >= _2555) && (_2555 < _2568))
+                        {
+                            float _2579 = (3.0 * (_2555 - _2555)) / (_2568 - _2555);
+                            int _2580 = int(_2579);
+                            float _2582 = _2579 - float(_2580);
+                            _2596 = dot(float3(_2582 * _2582, _2582, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2580], _476[_2580 + 1], _476[_2580 + 2]));
+                        }
+                        else
+                        {
+                            _2596 = log(10000.0) / _1065;
+                        }
+                        _2608 = _2596;
+                    }
+                    _2611 = _2608;
+                }
+                float _2612 = pow(10.0, _2611);
+                float _2614 = 0.17999999225139617919921875 * pow(2.0, 10.0);
+                float _2618 = log((_2614 <= 0.0) ? _2255 : _2614) / _1065;
+                float _2683;
+                if (_2618 <= _2260)
+                {
+                    _2683 = log(9.9999997473787516355514526367188e-05) / _1065;
+                }
+                else
+                {
+                    float _2680;
+                    if ((_2618 > _2260) && (_2618 < _2555))
+                    {
+                        float _2663 = (3.0 * (_2618 - _2260)) / (_2555 - _2260);
+                        int _2664 = int(_2663);
+                        float _2666 = _2663 - float(_2664);
+                        _2680 = dot(float3(_2666 * _2666, _2666, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_2664], _475[_2664 + 1], _475[_2664 + 2]));
+                    }
+                    else
+                    {
+                        float _2631 = log(_2253) / _1065;
+                        float _2659;
+                        if ((_2618 >= _2555) && (_2618 < _2631))
+                        {
+                            float _2642 = (3.0 * (_2618 - _2555)) / (_2631 - _2555);
+                            int _2643 = int(_2642);
+                            float _2645 = _2642 - float(_2643);
+                            _2659 = dot(float3(_2645 * _2645, _2645, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_2643], _476[_2643 + 1], _476[_2643 + 2]));
+                        }
+                        else
+                        {
+                            _2659 = log(10000.0) / _1065;
+                        }
+                        _2680 = _2659;
+                    }
+                    _2683 = _2680;
+                }
+                float _2684 = pow(10.0, _2683);
+                float _2685 = _2479.x;
+                float _2689 = log((_2685 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2685) / _1065;
+                float _2690 = log(pow(10.0, _2552));
+                float _2691 = _2690 / _1065;
+                float _2768;
+                if (_2689 <= _2691)
+                {
+                    _2768 = (_2689 * 3.0) + ((log(9.9999997473787516355514526367188e-05) / _1065) - ((3.0 * _2690) / _1065));
+                }
+                else
+                {
+                    float _2698 = log(_2612) / _1065;
+                    float _2760;
+                    if ((_2689 > _2691) && (_2689 < _2698))
+                    {
+                        float _2743 = (7.0 * (_2689 - _2691)) / (_2698 - _2691);
+                        int _2744 = int(_2743);
+                        float _2746 = _2743 - float(_2744);
+                        _2760 = dot(float3(_2746 * _2746, _2746, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_479[_2744], _479[_2744 + 1], _479[_2744 + 2]));
+                    }
+                    else
+                    {
+                        float _2705 = log(_2684);
+                        float _2706 = _2705 / _1065;
+                        float _2739;
+                        if ((_2689 >= _2698) && (_2689 < _2706))
+                        {
+                            float _2722 = (7.0 * (_2689 - _2698)) / (_2706 - _2698);
+                            int _2723 = int(_2722);
+                            float _2725 = _2722 - float(_2723);
+                            _2739 = dot(float3(_2725 * _2725, _2725, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_480[_2723], _480[_2723 + 1], _480[_2723 + 2]));
+                        }
+                        else
+                        {
+                            _2739 = (_2689 * 0.0599999986588954925537109375) + ((log(1000.0) / _1065) - ((0.0599999986588954925537109375 * _2705) / _1065));
+                        }
+                        _2760 = _2739;
+                    }
+                    _2768 = _2760;
+                }
+                float3 _2770;
+                _2770.x = pow(10.0, _2768);
+                float _2771 = _2479.y;
+                float _2775 = log((_2771 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2771) / _1065;
+                float _2852;
+                if (_2775 <= _2691)
+                {
+                    _2852 = (_2775 * 3.0) + ((log(9.9999997473787516355514526367188e-05) / _1065) - ((3.0 * _2690) / _1065));
+                }
+                else
+                {
+                    float _2782 = log(_2612) / _1065;
+                    float _2844;
+                    if ((_2775 > _2691) && (_2775 < _2782))
+                    {
+                        float _2827 = (7.0 * (_2775 - _2691)) / (_2782 - _2691);
+                        int _2828 = int(_2827);
+                        float _2830 = _2827 - float(_2828);
+                        _2844 = dot(float3(_2830 * _2830, _2830, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_479[_2828], _479[_2828 + 1], _479[_2828 + 2]));
+                    }
+                    else
+                    {
+                        float _2789 = log(_2684);
+                        float _2790 = _2789 / _1065;
+                        float _2823;
+                        if ((_2775 >= _2782) && (_2775 < _2790))
+                        {
+                            float _2806 = (7.0 * (_2775 - _2782)) / (_2790 - _2782);
+                            int _2807 = int(_2806);
+                            float _2809 = _2806 - float(_2807);
+                            _2823 = dot(float3(_2809 * _2809, _2809, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_480[_2807], _480[_2807 + 1], _480[_2807 + 2]));
+                        }
+                        else
+                        {
+                            _2823 = (_2775 * 0.0599999986588954925537109375) + ((log(1000.0) / _1065) - ((0.0599999986588954925537109375 * _2789) / _1065));
+                        }
+                        _2844 = _2823;
+                    }
+                    _2852 = _2844;
+                }
+                _2770.y = pow(10.0, _2852);
+                float _2855 = _2479.z;
+                float _2859 = log((_2855 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2855) / _1065;
+                float _2936;
+                if (_2859 <= _2691)
+                {
+                    _2936 = (_2859 * 3.0) + ((log(9.9999997473787516355514526367188e-05) / _1065) - ((3.0 * _2690) / _1065));
+                }
+                else
+                {
+                    float _2866 = log(_2612) / _1065;
+                    float _2928;
+                    if ((_2859 > _2691) && (_2859 < _2866))
+                    {
+                        float _2911 = (7.0 * (_2859 - _2691)) / (_2866 - _2691);
+                        int _2912 = int(_2911);
+                        float _2914 = _2911 - float(_2912);
+                        _2928 = dot(float3(_2914 * _2914, _2914, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_479[_2912], _479[_2912 + 1], _479[_2912 + 2]));
+                    }
+                    else
+                    {
+                        float _2873 = log(_2684);
+                        float _2874 = _2873 / _1065;
+                        float _2907;
+                        if ((_2859 >= _2866) && (_2859 < _2874))
+                        {
+                            float _2890 = (7.0 * (_2859 - _2866)) / (_2874 - _2866);
+                            int _2891 = int(_2890);
+                            float _2893 = _2890 - float(_2891);
+                            _2907 = dot(float3(_2893 * _2893, _2893, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_480[_2891], _480[_2891 + 1], _480[_2891 + 2]));
+                        }
+                        else
+                        {
+                            _2907 = (_2859 * 0.0599999986588954925537109375) + ((log(1000.0) / _1065) - ((0.0599999986588954925537109375 * _2873) / _1065));
+                        }
+                        _2928 = _2907;
+                    }
+                    _2936 = _2928;
+                }
+                _2770.z = pow(10.0, _2936);
+                float3 _2942 = pow(((_2770 - float3(3.5073844628641381859779357910156e-05)) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
+                _2950 = pow((float3(0.8359375) + (float3(18.8515625) * _2942)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _2942))), float3(78.84375));
+            }
+            else
+            {
+                float3 _2097;
+                if ((_Globals.OutputDevice == 4u) || (_Globals.OutputDevice == 6u))
+                {
+                    float3 _1263 = (_906 * float3(1.5)) * (_546 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625)));
+                    float _1264 = _1263.x;
+                    float _1265 = _1263.y;
+                    float _1267 = _1263.z;
+                    float _1270 = fast::max(fast::max(_1264, _1265), _1267);
+                    float _1275 = (fast::max(_1270, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_1264, _1265), _1267), 1.0000000133514319600180897396058e-10)) / fast::max(_1270, 0.00999999977648258209228515625);
+                    float _1288 = ((_1267 + _1265) + _1264) + (1.75 * sqrt(((_1267 * (_1267 - _1265)) + (_1265 * (_1265 - _1264))) + (_1264 * (_1264 - _1267))));
+                    float _1289 = _1288 * 0.3333333432674407958984375;
+                    float _1290 = _1275 - 0.4000000059604644775390625;
+                    float _1295 = fast::max(1.0 - abs(_1290 * 2.5), 0.0);
+                    float _1303 = (1.0 + (float(int(sign(_1290 * 5.0))) * (1.0 - (_1295 * _1295)))) * 0.02500000037252902984619140625;
+                    float _1316;
+                    if (_1289 <= 0.053333334624767303466796875)
+                    {
+                        _1316 = _1303;
+                    }
+                    else
+                    {
+                        float _1315;
+                        if (_1289 >= 0.1599999964237213134765625)
+                        {
+                            _1315 = 0.0;
+                        }
+                        else
+                        {
+                            _1315 = _1303 * ((0.23999999463558197021484375 / _1288) - 0.5);
+                        }
+                        _1316 = _1315;
+                    }
+                    float3 _1319 = _1263 * float3(1.0 + _1316);
+                    float _1320 = _1319.x;
+                    float _1321 = _1319.y;
+                    float _1323 = _1319.z;
+                    float _1337;
+                    if ((_1320 == _1321) && (_1321 == _1323))
+                    {
+                        _1337 = 0.0;
+                    }
+                    else
+                    {
+                        _1337 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_1321 - _1323), ((2.0 * _1320) - _1321) - _1323);
+                    }
+                    float _1342;
+                    if (_1337 < 0.0)
+                    {
+                        _1342 = _1337 + 360.0;
+                    }
+                    else
+                    {
+                        _1342 = _1337;
+                    }
+                    float _1343 = fast::clamp(_1342, 0.0, 360.0);
+                    float _1348;
+                    if (_1343 > 180.0)
+                    {
+                        _1348 = _1343 - 360.0;
+                    }
+                    else
+                    {
+                        _1348 = _1343;
+                    }
+                    float _1398;
+                    if ((_1348 > (-67.5)) && (_1348 < 67.5))
+                    {
+                        float _1355 = (_1348 - (-67.5)) * 0.0296296291053295135498046875;
+                        int _1356 = int(_1355);
+                        float _1358 = _1355 - float(_1356);
+                        float _1359 = _1358 * _1358;
+                        float _1360 = _1359 * _1358;
+                        float _1397;
+                        if (_1356 == 3)
+                        {
+                            _1397 = (((_1360 * (-0.16666667163372039794921875)) + (_1359 * 0.5)) + (_1358 * (-0.5))) + 0.16666667163372039794921875;
+                        }
+                        else
+                        {
+                            float _1390;
+                            if (_1356 == 2)
+                            {
+                                _1390 = ((_1360 * 0.5) + (_1359 * (-1.0))) + 0.666666686534881591796875;
+                            }
+                            else
+                            {
+                                float _1385;
+                                if (_1356 == 1)
+                                {
+                                    _1385 = (((_1360 * (-0.5)) + (_1359 * 0.5)) + (_1358 * 0.5)) + 0.16666667163372039794921875;
+                                }
+                                else
+                                {
+                                    float _1378;
+                                    if (_1356 == 0)
+                                    {
+                                        _1378 = _1360 * 0.16666667163372039794921875;
+                                    }
+                                    else
+                                    {
+                                        _1378 = 0.0;
+                                    }
+                                    _1385 = _1378;
+                                }
+                                _1390 = _1385;
+                            }
+                            _1397 = _1390;
+                        }
+                        _1398 = _1397;
+                    }
+                    else
+                    {
+                        _1398 = 0.0;
+                    }
+                    _1319.x = _1320 + ((((_1398 * 1.5) * _1275) * (0.02999999932944774627685546875 - _1320)) * 0.180000007152557373046875);
+                    float3 _1408 = fast::clamp(fast::clamp(_1319, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0));
+                    float3 _1411 = mix(float3(dot(_1408, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1408, float3(0.959999978542327880859375));
+                    float _1412 = _1411.x;
+                    float _1416 = 0.17999999225139617919921875 * exp2(18.0);
+                    float _1418 = exp2(-14.0);
+                    float _1421 = log((_1412 <= 0.0) ? _1418 : _1412) / _1065;
+                    float _1423 = log(0.17999999225139617919921875 * exp2(-15.0)) / _1065;
+                    float _1490;
+                    if (_1421 <= _1423)
+                    {
+                        _1490 = log(9.9999997473787516355514526367188e-05) / _1065;
+                    }
+                    else
+                    {
+                        float _1430 = log(0.180000007152557373046875) / _1065;
+                        float _1487;
+                        if ((_1421 > _1423) && (_1421 < _1430))
+                        {
+                            float _1470 = (3.0 * (_1421 - _1423)) / (_1430 - _1423);
+                            int _1471 = int(_1470);
+                            float _1473 = _1470 - float(_1471);
+                            _1487 = dot(float3(_1473 * _1473, _1473, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1471], _475[_1471 + 1], _475[_1471 + 2]));
+                        }
+                        else
+                        {
+                            float _1438 = log(_1416) / _1065;
+                            float _1466;
+                            if ((_1421 >= _1430) && (_1421 < _1438))
+                            {
+                                float _1449 = (3.0 * (_1421 - _1430)) / (_1438 - _1430);
+                                int _1450 = int(_1449);
+                                float _1452 = _1449 - float(_1450);
+                                _1466 = dot(float3(_1452 * _1452, _1452, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1450], _476[_1450 + 1], _476[_1450 + 2]));
+                            }
+                            else
+                            {
+                                _1466 = log(10000.0) / _1065;
+                            }
+                            _1487 = _1466;
+                        }
+                        _1490 = _1487;
+                    }
+                    float3 _1492;
+                    _1492.x = pow(10.0, _1490);
+                    float _1493 = _1411.y;
+                    float _1497 = log((_1493 <= 0.0) ? _1418 : _1493) / _1065;
+                    float _1564;
+                    if (_1497 <= _1423)
+                    {
+                        _1564 = log(9.9999997473787516355514526367188e-05) / _1065;
+                    }
+                    else
+                    {
+                        float _1504 = log(0.180000007152557373046875) / _1065;
+                        float _1561;
+                        if ((_1497 > _1423) && (_1497 < _1504))
+                        {
+                            float _1544 = (3.0 * (_1497 - _1423)) / (_1504 - _1423);
+                            int _1545 = int(_1544);
+                            float _1547 = _1544 - float(_1545);
+                            _1561 = dot(float3(_1547 * _1547, _1547, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1545], _475[_1545 + 1], _475[_1545 + 2]));
+                        }
+                        else
+                        {
+                            float _1512 = log(_1416) / _1065;
+                            float _1540;
+                            if ((_1497 >= _1504) && (_1497 < _1512))
+                            {
+                                float _1523 = (3.0 * (_1497 - _1504)) / (_1512 - _1504);
+                                int _1524 = int(_1523);
+                                float _1526 = _1523 - float(_1524);
+                                _1540 = dot(float3(_1526 * _1526, _1526, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1524], _476[_1524 + 1], _476[_1524 + 2]));
+                            }
+                            else
+                            {
+                                _1540 = log(10000.0) / _1065;
+                            }
+                            _1561 = _1540;
+                        }
+                        _1564 = _1561;
+                    }
+                    _1492.y = pow(10.0, _1564);
+                    float _1567 = _1411.z;
+                    float _1571 = log((_1567 <= 0.0) ? _1418 : _1567) / _1065;
+                    float _1638;
+                    if (_1571 <= _1423)
+                    {
+                        _1638 = log(9.9999997473787516355514526367188e-05) / _1065;
+                    }
+                    else
+                    {
+                        float _1578 = log(0.180000007152557373046875) / _1065;
+                        float _1635;
+                        if ((_1571 > _1423) && (_1571 < _1578))
+                        {
+                            float _1618 = (3.0 * (_1571 - _1423)) / (_1578 - _1423);
+                            int _1619 = int(_1618);
+                            float _1621 = _1618 - float(_1619);
+                            _1635 = dot(float3(_1621 * _1621, _1621, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1619], _475[_1619 + 1], _475[_1619 + 2]));
+                        }
+                        else
+                        {
+                            float _1586 = log(_1416) / _1065;
+                            float _1614;
+                            if ((_1571 >= _1578) && (_1571 < _1586))
+                            {
+                                float _1597 = (3.0 * (_1571 - _1578)) / (_1586 - _1578);
+                                int _1598 = int(_1597);
+                                float _1600 = _1597 - float(_1598);
+                                _1614 = dot(float3(_1600 * _1600, _1600, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1598], _476[_1598 + 1], _476[_1598 + 2]));
+                            }
+                            else
+                            {
+                                _1614 = log(10000.0) / _1065;
+                            }
+                            _1635 = _1614;
+                        }
+                        _1638 = _1635;
+                    }
+                    _1492.z = pow(10.0, _1638);
+                    float3 _1642 = (_1492 * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375));
+                    float _1644 = 0.17999999225139617919921875 * pow(2.0, -12.0);
+                    float _1648 = log((_1644 <= 0.0) ? _1418 : _1644) / _1065;
+                    float _1715;
+                    if (_1648 <= _1423)
+                    {
+                        _1715 = log(9.9999997473787516355514526367188e-05) / _1065;
+                    }
+                    else
+                    {
+                        float _1655 = log(0.180000007152557373046875) / _1065;
+                        float _1712;
+                        if ((_1648 > _1423) && (_1648 < _1655))
+                        {
+                            float _1695 = (3.0 * (_1648 - _1423)) / (_1655 - _1423);
+                            int _1696 = int(_1695);
+                            float _1698 = _1695 - float(_1696);
+                            _1712 = dot(float3(_1698 * _1698, _1698, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1696], _475[_1696 + 1], _475[_1696 + 2]));
+                        }
+                        else
+                        {
+                            float _1663 = log(_1416) / _1065;
+                            float _1691;
+                            if ((_1648 >= _1655) && (_1648 < _1663))
+                            {
+                                float _1674 = (3.0 * (_1648 - _1655)) / (_1663 - _1655);
+                                int _1675 = int(_1674);
+                                float _1677 = _1674 - float(_1675);
+                                _1691 = dot(float3(_1677 * _1677, _1677, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1675], _476[_1675 + 1], _476[_1675 + 2]));
+                            }
+                            else
+                            {
+                                _1691 = log(10000.0) / _1065;
+                            }
+                            _1712 = _1691;
+                        }
+                        _1715 = _1712;
+                    }
+                    float _1718 = log(0.180000007152557373046875) / _1065;
+                    float _1774;
+                    if (_1718 <= _1423)
+                    {
+                        _1774 = log(9.9999997473787516355514526367188e-05) / _1065;
+                    }
+                    else
+                    {
+                        float _1771;
+                        if ((_1718 > _1423) && (_1718 < _1718))
+                        {
+                            _1771 = (float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[3], _475[4], _475[5])).z;
+                        }
+                        else
+                        {
+                            float _1731 = log(_1416) / _1065;
+                            float _1759;
+                            if ((_1718 >= _1718) && (_1718 < _1731))
+                            {
+                                float _1742 = (3.0 * (_1718 - _1718)) / (_1731 - _1718);
+                                int _1743 = int(_1742);
+                                float _1745 = _1742 - float(_1743);
+                                _1759 = dot(float3(_1745 * _1745, _1745, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1743], _476[_1743 + 1], _476[_1743 + 2]));
+                            }
+                            else
+                            {
+                                _1759 = log(10000.0) / _1065;
+                            }
+                            _1771 = _1759;
+                        }
+                        _1774 = _1771;
+                    }
+                    float _1775 = pow(10.0, _1774);
+                    float _1777 = 0.17999999225139617919921875 * pow(2.0, 11.0);
+                    float _1781 = log((_1777 <= 0.0) ? _1418 : _1777) / _1065;
+                    float _1846;
+                    if (_1781 <= _1423)
+                    {
+                        _1846 = log(9.9999997473787516355514526367188e-05) / _1065;
+                    }
+                    else
+                    {
+                        float _1843;
+                        if ((_1781 > _1423) && (_1781 < _1718))
+                        {
+                            float _1826 = (3.0 * (_1781 - _1423)) / (_1718 - _1423);
+                            int _1827 = int(_1826);
+                            float _1829 = _1826 - float(_1827);
+                            _1843 = dot(float3(_1829 * _1829, _1829, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_475[_1827], _475[_1827 + 1], _475[_1827 + 2]));
+                        }
+                        else
+                        {
+                            float _1794 = log(_1416) / _1065;
+                            float _1822;
+                            if ((_1781 >= _1718) && (_1781 < _1794))
+                            {
+                                float _1805 = (3.0 * (_1781 - _1718)) / (_1794 - _1718);
+                                int _1806 = int(_1805);
+                                float _1808 = _1805 - float(_1806);
+                                _1822 = dot(float3(_1808 * _1808, _1808, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_476[_1806], _476[_1806 + 1], _476[_1806 + 2]));
+                            }
+                            else
+                            {
+                                _1822 = log(10000.0) / _1065;
+                            }
+                            _1843 = _1822;
+                        }
+                        _1846 = _1843;
+                    }
+                    float _1847 = pow(10.0, _1846);
+                    float _1848 = _1642.x;
+                    float _1852 = log((_1848 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1848) / _1065;
+                    float _1854 = log(pow(10.0, _1715)) / _1065;
+                    float _1926;
+                    if (_1852 <= _1854)
+                    {
+                        _1926 = log(0.004999999888241291046142578125) / _1065;
+                    }
+                    else
+                    {
+                        float _1861 = log(_1775) / _1065;
+                        float _1923;
+                        if ((_1852 > _1854) && (_1852 < _1861))
+                        {
+                            float _1906 = (7.0 * (_1852 - _1854)) / (_1861 - _1854);
+                            int _1907 = int(_1906);
+                            float _1909 = _1906 - float(_1907);
+                            _1923 = dot(float3(_1909 * _1909, _1909, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_482[_1907], _482[_1907 + 1], _482[_1907 + 2]));
+                        }
+                        else
+                        {
+                            float _1868 = log(_1847);
+                            float _1869 = _1868 / _1065;
+                            float _1902;
+                            if ((_1852 >= _1861) && (_1852 < _1869))
+                            {
+                                float _1885 = (7.0 * (_1852 - _1861)) / (_1869 - _1861);
+                                int _1886 = int(_1885);
+                                float _1888 = _1885 - float(_1886);
+                                _1902 = dot(float3(_1888 * _1888, _1888, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_483[_1886], _483[_1886 + 1], _483[_1886 + 2]));
+                            }
+                            else
+                            {
+                                _1902 = (_1852 * 0.119999997317790985107421875) + ((log(2000.0) / _1065) - ((0.119999997317790985107421875 * _1868) / _1065));
+                            }
+                            _1923 = _1902;
+                        }
+                        _1926 = _1923;
+                    }
+                    float3 _1928;
+                    _1928.x = pow(10.0, _1926);
+                    float _1929 = _1642.y;
+                    float _1933 = log((_1929 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1929) / _1065;
+                    float _2005;
+                    if (_1933 <= _1854)
+                    {
+                        _2005 = log(0.004999999888241291046142578125) / _1065;
+                    }
+                    else
+                    {
+                        float _1940 = log(_1775) / _1065;
+                        float _2002;
+                        if ((_1933 > _1854) && (_1933 < _1940))
+                        {
+                            float _1985 = (7.0 * (_1933 - _1854)) / (_1940 - _1854);
+                            int _1986 = int(_1985);
+                            float _1988 = _1985 - float(_1986);
+                            _2002 = dot(float3(_1988 * _1988, _1988, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_482[_1986], _482[_1986 + 1], _482[_1986 + 2]));
+                        }
+                        else
+                        {
+                            float _1947 = log(_1847);
+                            float _1948 = _1947 / _1065;
+                            float _1981;
+                            if ((_1933 >= _1940) && (_1933 < _1948))
+                            {
+                                float _1964 = (7.0 * (_1933 - _1940)) / (_1948 - _1940);
+                                int _1965 = int(_1964);
+                                float _1967 = _1964 - float(_1965);
+                                _1981 = dot(float3(_1967 * _1967, _1967, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_483[_1965], _483[_1965 + 1], _483[_1965 + 2]));
+                            }
+                            else
+                            {
+                                _1981 = (_1933 * 0.119999997317790985107421875) + ((log(2000.0) / _1065) - ((0.119999997317790985107421875 * _1947) / _1065));
+                            }
+                            _2002 = _1981;
+                        }
+                        _2005 = _2002;
+                    }
+                    _1928.y = pow(10.0, _2005);
+                    float _2008 = _1642.z;
+                    float _2012 = log((_2008 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2008) / _1065;
+                    float _2084;
+                    if (_2012 <= _1854)
+                    {
+                        _2084 = log(0.004999999888241291046142578125) / _1065;
+                    }
+                    else
+                    {
+                        float _2019 = log(_1775) / _1065;
+                        float _2081;
+                        if ((_2012 > _1854) && (_2012 < _2019))
+                        {
+                            float _2064 = (7.0 * (_2012 - _1854)) / (_2019 - _1854);
+                            int _2065 = int(_2064);
+                            float _2067 = _2064 - float(_2065);
+                            _2081 = dot(float3(_2067 * _2067, _2067, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_482[_2065], _482[_2065 + 1], _482[_2065 + 2]));
+                        }
+                        else
+                        {
+                            float _2026 = log(_1847);
+                            float _2027 = _2026 / _1065;
+                            float _2060;
+                            if ((_2012 >= _2019) && (_2012 < _2027))
+                            {
+                                float _2043 = (7.0 * (_2012 - _2019)) / (_2027 - _2019);
+                                int _2044 = int(_2043);
+                                float _2046 = _2043 - float(_2044);
+                                _2060 = dot(float3(_2046 * _2046, _2046, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_483[_2044], _483[_2044 + 1], _483[_2044 + 2]));
+                            }
+                            else
+                            {
+                                _2060 = (_2012 * 0.119999997317790985107421875) + ((log(2000.0) / _1065) - ((0.119999997317790985107421875 * _2026) / _1065));
+                            }
+                            _2081 = _2060;
+                        }
+                        _2084 = _2081;
+                    }
+                    _1928.z = pow(10.0, _2084);
+                    float3 _2089 = pow((_1928 * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
+                    _2097 = pow((float3(0.8359375) + (float3(18.8515625) * _2089)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _2089))), float3(78.84375));
+                }
+                else
+                {
+                    float3 _1260;
+                    if (_Globals.OutputDevice == 7u)
+                    {
+                        float3 _1252 = pow(((_906 * _547) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
+                        _1260 = pow((float3(0.8359375) + (float3(18.8515625) * _1252)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _1252))), float3(78.84375));
+                    }
+                    else
+                    {
+                        _1260 = pow((_1218 * _547) * _576, float3(_Globals.InverseGamma.z));
+                    }
+                    _2097 = _1260;
+                }
+                _2950 = _2097;
+            }
+            _2960 = _2950;
+        }
+        _3001 = _2960;
+    }
+    float3 _3002 = _3001 * float3(0.95238101482391357421875);
+    float4 _3003 = float4(_3002.x, _3002.y, _3002.z, float4(0.0).w);
+    _3003.w = 0.0;
+    out.out_var_SV_Target0 = _3003;
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag b/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag
new file mode 100644
index 00000000000..f405fbb68ef
--- /dev/null
+++ b/reference/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag
@@ -0,0 +1,1392 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_Globals
+{
+    float4 MappingPolynomial;
+    float3 InverseGamma;
+    float4 ColorMatrixR_ColorCurveCd1;
+    float4 ColorMatrixG_ColorCurveCd3Cm3;
+    float4 ColorMatrixB_ColorCurveCm2;
+    float4 ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3;
+    float4 ColorCurve_Ch1_Ch2;
+    float4 ColorShadow_Luma;
+    float4 ColorShadow_Tint1;
+    float4 ColorShadow_Tint2;
+    float FilmSlope;
+    float FilmToe;
+    float FilmShoulder;
+    float FilmBlackClip;
+    float FilmWhiteClip;
+    float4 LUTWeights[5];
+    float3 ColorScale;
+    float4 OverlayColor;
+    float WhiteTemp;
+    float WhiteTint;
+    float4 ColorSaturation;
+    float4 ColorContrast;
+    float4 ColorGamma;
+    float4 ColorGain;
+    float4 ColorOffset;
+    float4 ColorSaturationShadows;
+    float4 ColorContrastShadows;
+    float4 ColorGammaShadows;
+    float4 ColorGainShadows;
+    float4 ColorOffsetShadows;
+    float4 ColorSaturationMidtones;
+    float4 ColorContrastMidtones;
+    float4 ColorGammaMidtones;
+    float4 ColorGainMidtones;
+    float4 ColorOffsetMidtones;
+    float4 ColorSaturationHighlights;
+    float4 ColorContrastHighlights;
+    float4 ColorGammaHighlights;
+    float4 ColorGainHighlights;
+    float4 ColorOffsetHighlights;
+    float ColorCorrectionShadowsMax;
+    float ColorCorrectionHighlightsMin;
+    uint OutputDevice;
+    uint OutputGamut;
+    float BlueCorrection;
+    float ExpandGamut;
+};
+
+constant float3 _523 = {};
+constant float3 _525 = {};
+
+constant spvUnsafeArray<float, 6> _499 = spvUnsafeArray<float, 6>({ -4.0, -4.0, -3.1573765277862548828125, -0.485249996185302734375, 1.84773242473602294921875, 1.84773242473602294921875 });
+constant spvUnsafeArray<float, 6> _500 = spvUnsafeArray<float, 6>({ -0.718548238277435302734375, 2.0810306072235107421875, 3.66812419891357421875, 4.0, 4.0, 4.0 });
+constant spvUnsafeArray<float, 10> _503 = spvUnsafeArray<float, 10>({ -4.97062206268310546875, -3.0293781757354736328125, -2.1261999607086181640625, -1.5104999542236328125, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 });
+constant spvUnsafeArray<float, 10> _504 = spvUnsafeArray<float, 10>({ 0.80891323089599609375, 1.19108676910400390625, 1.5683000087738037109375, 1.94830000400543212890625, 2.308300018310546875, 2.63840007781982421875, 2.85949993133544921875, 2.9872608184814453125, 3.0127391815185546875, 3.0127391815185546875 });
+constant spvUnsafeArray<float, 10> _506 = spvUnsafeArray<float, 10>({ -2.3010299205780029296875, -2.3010299205780029296875, -1.9312000274658203125, -1.5204999446868896484375, -1.0578000545501708984375, -0.4668000042438507080078125, 0.11937999725341796875, 0.7088134288787841796875, 1.2911865711212158203125, 1.2911865711212158203125 });
+constant spvUnsafeArray<float, 10> _507 = spvUnsafeArray<float, 10>({ 0.801995217800140380859375, 1.19800484180450439453125, 1.5943000316619873046875, 1.99730002880096435546875, 2.3782999515533447265625, 2.7683999538421630859375, 3.0515000820159912109375, 3.2746293544769287109375, 3.32743072509765625, 3.32743072509765625 });
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 in_var_TEXCOORD0 [[user(locn0), center_no_perspective]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globals [[buffer(0)]], texture2d<float> Texture1 [[texture(0)]], sampler Texture1Sampler [[sampler(0)]], uint gl_Layer [[render_target_array_index]])
+{
+    main0_out out = {};
+    float3x3 _572 = float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * float3x3(float3(1.01303005218505859375, 0.0061053098179399967193603515625, -0.014971000142395496368408203125), float3(0.0076982299797236919403076171875, 0.99816501140594482421875, -0.005032029934227466583251953125), float3(-0.0028413101099431514739990234375, 0.0046851597726345062255859375, 0.92450702190399169921875));
+    float3x3 _573 = _572 * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125));
+    float3x3 _574 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(0.98722398281097412109375, -0.0061132698319852352142333984375, 0.01595330052077770233154296875), float3(-0.007598360069096088409423828125, 1.00186002254486083984375, 0.0053300200961530208587646484375), float3(0.003072570078074932098388671875, -0.0050959498621523380279541015625, 1.0816800594329833984375));
+    float3x3 _575 = _574 * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875));
+    float3x3 _576 = float3x3(float3(0.952552378177642822265625, 0.0, 9.25), float3(0.3439664542675018310546875, 0.728166103363037109375, -0.07213254272937774658203125), float3(0.0, 0.0, 1.00882518291473388671875)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125));
+    float3x3 _577 = float3x3(float3(0.662454187870025634765625, 0.1340042054653167724609375, 0.1561876833438873291015625), float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625), float3(-0.0055746496655046939849853515625, 0.0040607335977256298065185546875, 1.01033914089202880859375)) * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625));
+    float3x3 _602;
+    for (;;)
+    {
+        if (_Globals.OutputGamut == 1u)
+        {
+            _602 = _574 * float3x3(float3(2.493396282196044921875, -0.931345880031585693359375, -0.4026944935321807861328125), float3(-0.829486787319183349609375, 1.76265966892242431640625, 0.02362460084259510040283203125), float3(0.0358506999909877777099609375, -0.076182700693607330322265625, 0.957014024257659912109375));
+            break;
+        }
+        else
+        {
+            if (_Globals.OutputGamut == 2u)
+            {
+                _602 = _574 * float3x3(float3(1.71660840511322021484375, -0.3556621074676513671875, -0.253360092639923095703125), float3(-0.666682898998260498046875, 1.61647760868072509765625, 0.01576850004494190216064453125), float3(0.017642199993133544921875, -0.04277630150318145751953125, 0.94222867488861083984375));
+                break;
+            }
+            else
+            {
+                if (_Globals.OutputGamut == 3u)
+                {
+                    _602 = float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625));
+                    break;
+                }
+                else
+                {
+                    if (_Globals.OutputGamut == 4u)
+                    {
+                        _602 = float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0));
+                        break;
+                    }
+                    else
+                    {
+                        _602 = _575;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+    float3 _603 = float4((in.in_var_TEXCOORD0 - float2(0.015625)) * float2(1.03225803375244140625), float(gl_Layer) * 0.0322580635547637939453125, 0.0).xyz;
+    float3 _625;
+    if (_Globals.OutputDevice >= 3u)
+    {
+        float3 _617 = pow(_603, float3(0.0126833133399486541748046875));
+        _625 = pow(fast::max(float3(0.0), _617 - float3(0.8359375)) / (float3(18.8515625) - (float3(18.6875) * _617)), float3(6.277394771575927734375)) * float3(10000.0);
+    }
+    else
+    {
+        _625 = (exp2((_603 - float3(0.434017598628997802734375)) * float3(14.0)) * float3(0.180000007152557373046875)) - (exp2(float3(-6.0762462615966796875)) * float3(0.180000007152557373046875));
+    }
+    float _628 = _Globals.WhiteTemp * 1.00055634975433349609375;
+    float _642 = (_628 <= 7000.0) ? (0.24406300485134124755859375 + ((99.1100006103515625 + ((2967800.0 - (4604438528.0 / _Globals.WhiteTemp)) / _628)) / _628)) : (0.23703999817371368408203125 + ((247.4799957275390625 + ((1901800.0 - (2005284352.0 / _Globals.WhiteTemp)) / _628)) / _628));
+    float _659 = ((0.860117733478546142578125 + (0.00015411825734190642833709716796875 * _Globals.WhiteTemp)) + ((1.2864121856637211749330163002014e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 + (0.0008424202096648514270782470703125 * _Globals.WhiteTemp)) + ((7.0814513719597016461193561553955e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp));
+    float _670 = ((0.317398726940155029296875 + (4.25 * _Globals.WhiteTemp)) + ((4.2048167614439080352894961833954e-08 * _Globals.WhiteTemp) * _Globals.WhiteTemp)) / ((1.0 - (2.8974181986995972692966461181641e-05 * _Globals.WhiteTemp)) + ((1.6145605741257895715534687042236e-07 * _Globals.WhiteTemp) * _Globals.WhiteTemp));
+    float _675 = ((2.0 * _659) - (8.0 * _670)) + 4.0;
+    float2 _679 = float2((3.0 * _659) / _675, (2.0 * _670) / _675);
+    float2 _686 = fast::normalize(float2(_659, _670));
+    float _691 = _659 + (((-_686.y) * _Globals.WhiteTint) * 0.0500000007450580596923828125);
+    float _695 = _670 + ((_686.x * _Globals.WhiteTint) * 0.0500000007450580596923828125);
+    float _700 = ((2.0 * _691) - (8.0 * _695)) + 4.0;
+    float2 _706 = select(float2(_642, ((((-3.0) * _642) * _642) + (2.86999988555908203125 * _642)) - 0.2750000059604644775390625), _679, bool2(_Globals.WhiteTemp < 4000.0)) + (float2((3.0 * _691) / _700, (2.0 * _695) / _700) - _679);
+    float _709 = fast::max(_706.y, 1.0000000133514319600180897396058e-10);
+    float3 _711;
+    _711.x = _706.x / _709;
+    _711.y = 1.0;
+    _711.z = ((1.0 - _706.x) - _706.y) / _709;
+    float _717 = fast::max(0.328999996185302734375, 1.0000000133514319600180897396058e-10);
+    float3 _719;
+    _719.x = 0.3127000033855438232421875 / _717;
+    _719.y = 1.0;
+    _719.z = 0.3582999706268310546875 / _717;
+    float3 _723 = _711 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875));
+    float3 _724 = _719 * float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875));
+    float3 _743 = (_625 * ((float3x3(float3(0.41245639324188232421875, 0.3575761020183563232421875, 0.180437505245208740234375), float3(0.21267290413379669189453125, 0.715152204036712646484375, 0.072175003588199615478515625), float3(0.01933390088379383087158203125, 0.119191996753215789794921875, 0.950304090976715087890625)) * ((float3x3(float3(0.89509999752044677734375, 0.2664000093936920166015625, -0.16140000522136688232421875), float3(-0.750199973583221435546875, 1.71350002288818359375, 0.0366999991238117218017578125), float3(0.0388999991118907928466796875, -0.06849999725818634033203125, 1.02960002422332763671875)) * float3x3(float3(_724.x / _723.x, 0.0, 0.0), float3(0.0, _724.y / _723.y, 0.0), float3(0.0, 0.0, _724.z / _723.z))) * float3x3(float3(0.986992895603179931640625, -0.14705429971218109130859375, 0.15996269881725311279296875), float3(0.4323053061962127685546875, 0.518360316753387451171875, 0.049291200935840606689453125), float3(-0.00852870009839534759521484375, 0.0400427989661693572998046875, 0.968486726284027099609375)))) * float3x3(float3(3.2409698963165283203125, -1.53738319873809814453125, -0.4986107647418975830078125), float3(-0.96924364566802978515625, 1.875967502593994140625, 0.0415550582110881805419921875), float3(0.055630080401897430419921875, -0.2039769589900970458984375, 1.05697154998779296875)))) * _573;
+    float3 _771;
+    if (_Globals.ColorShadow_Tint2.w != 0.0)
+    {
+        float _750 = dot(_743, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625));
+        float3 _753 = (_743 / float3(_750)) - float3(1.0);
+        _771 = mix(_743, _743 * (_575 * (float3x3(float3(0.544169127941131591796875, 0.23959259688854217529296875, 0.16669429838657379150390625), float3(0.23946559429168701171875, 0.702153027057647705078125, 0.058381401002407073974609375), float3(-0.0023439000360667705535888671875, 0.0361833982169628143310546875, 1.05521833896636962890625)) * float3x3(float3(1.6410233974456787109375, -0.324803292751312255859375, -0.23642469942569732666015625), float3(-0.663662850856781005859375, 1.6153316497802734375, 0.016756348311901092529296875), float3(0.01172189414501190185546875, -0.008284442126750946044921875, 0.98839485645294189453125)))), float3((1.0 - exp2((-4.0) * dot(_753, _753))) * (1.0 - exp2((((-4.0) * _Globals.ExpandGamut) * _750) * _750))));
+    }
+    else
+    {
+        _771 = _743;
+    }
+    float _772 = dot(_771, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625));
+    float4 _777 = _Globals.ColorSaturationShadows * _Globals.ColorSaturation;
+    float4 _782 = _Globals.ColorContrastShadows * _Globals.ColorContrast;
+    float4 _787 = _Globals.ColorGammaShadows * _Globals.ColorGamma;
+    float4 _792 = _Globals.ColorGainShadows * _Globals.ColorGain;
+    float4 _797 = _Globals.ColorOffsetShadows + _Globals.ColorOffset;
+    float3 _798 = float3(_772);
+    float _830 = smoothstep(0.0, _Globals.ColorCorrectionShadowsMax, _772);
+    float4 _834 = _Globals.ColorSaturationHighlights * _Globals.ColorSaturation;
+    float4 _837 = _Globals.ColorContrastHighlights * _Globals.ColorContrast;
+    float4 _840 = _Globals.ColorGammaHighlights * _Globals.ColorGamma;
+    float4 _843 = _Globals.ColorGainHighlights * _Globals.ColorGain;
+    float4 _846 = _Globals.ColorOffsetHighlights + _Globals.ColorOffset;
+    float _878 = smoothstep(_Globals.ColorCorrectionHighlightsMin, 1.0, _772);
+    float4 _881 = _Globals.ColorSaturationMidtones * _Globals.ColorSaturation;
+    float4 _884 = _Globals.ColorContrastMidtones * _Globals.ColorContrast;
+    float4 _887 = _Globals.ColorGammaMidtones * _Globals.ColorGamma;
+    float4 _890 = _Globals.ColorGainMidtones * _Globals.ColorGain;
+    float4 _893 = _Globals.ColorOffsetMidtones + _Globals.ColorOffset;
+    float3 _931 = ((((pow(pow(fast::max(float3(0.0), mix(_798, _771, _777.xyz * float3(_777.w))) * float3(5.5555553436279296875), _782.xyz * float3(_782.w)) * float3(0.180000007152557373046875), float3(1.0) / (_787.xyz * float3(_787.w))) * (_792.xyz * float3(_792.w))) + (_797.xyz + float3(_797.w))) * float3(1.0 - _830)) + (((pow(pow(fast::max(float3(0.0), mix(_798, _771, _881.xyz * float3(_881.w))) * float3(5.5555553436279296875), _884.xyz * float3(_884.w)) * float3(0.180000007152557373046875), float3(1.0) / (_887.xyz * float3(_887.w))) * (_890.xyz * float3(_890.w))) + (_893.xyz + float3(_893.w))) * float3(_830 - _878))) + (((pow(pow(fast::max(float3(0.0), mix(_798, _771, _834.xyz * float3(_834.w))) * float3(5.5555553436279296875), _837.xyz * float3(_837.w)) * float3(0.180000007152557373046875), float3(1.0) / (_840.xyz * float3(_840.w))) * (_843.xyz * float3(_843.w))) + (_846.xyz + float3(_846.w))) * float3(_878));
+    float3 _932 = _931 * _575;
+    float3 _940 = float3(_Globals.BlueCorrection);
+    float3 _942 = mix(_931, _931 * ((_577 * float3x3(float3(0.940437257289886474609375, -0.01830687932670116424560546875, 0.07786960899829864501953125), float3(0.008378696627914905548095703125, 0.82866001129150390625, 0.162961304187774658203125), float3(0.0005471261101774871349334716796875, -0.00088337459601461887359619140625, 1.00033628940582275390625))) * _576), _940) * _577;
+    float _943 = _942.x;
+    float _944 = _942.y;
+    float _946 = _942.z;
+    float _949 = fast::max(fast::max(_943, _944), _946);
+    float _954 = (fast::max(_949, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_943, _944), _946), 1.0000000133514319600180897396058e-10)) / fast::max(_949, 0.00999999977648258209228515625);
+    float _967 = ((_946 + _944) + _943) + (1.75 * sqrt(((_946 * (_946 - _944)) + (_944 * (_944 - _943))) + (_943 * (_943 - _946))));
+    float _968 = _967 * 0.3333333432674407958984375;
+    float _969 = _954 - 0.4000000059604644775390625;
+    float _974 = fast::max(1.0 - abs(_969 * 2.5), 0.0);
+    float _982 = (1.0 + (float(int(sign(_969 * 5.0))) * (1.0 - (_974 * _974)))) * 0.02500000037252902984619140625;
+    float _995;
+    if (_968 <= 0.053333334624767303466796875)
+    {
+        _995 = _982;
+    }
+    else
+    {
+        float _994;
+        if (_968 >= 0.1599999964237213134765625)
+        {
+            _994 = 0.0;
+        }
+        else
+        {
+            _994 = _982 * ((0.23999999463558197021484375 / _967) - 0.5);
+        }
+        _995 = _994;
+    }
+    float3 _998 = _942 * float3(1.0 + _995);
+    float _999 = _998.x;
+    float _1000 = _998.y;
+    float _1002 = _998.z;
+    float _1016;
+    if ((_999 == _1000) && (_1000 == _1002))
+    {
+        _1016 = 0.0;
+    }
+    else
+    {
+        _1016 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_1000 - _1002), ((2.0 * _999) - _1000) - _1002);
+    }
+    float _1021;
+    if (_1016 < 0.0)
+    {
+        _1021 = _1016 + 360.0;
+    }
+    else
+    {
+        _1021 = _1016;
+    }
+    float _1022 = fast::clamp(_1021, 0.0, 360.0);
+    float _1027;
+    if (_1022 > 180.0)
+    {
+        _1027 = _1022 - 360.0;
+    }
+    else
+    {
+        _1027 = _1022;
+    }
+    float _1031 = smoothstep(0.0, 1.0, 1.0 - abs(_1027 * 0.01481481455266475677490234375));
+    _998.x = _999 + ((((_1031 * _1031) * _954) * (0.02999999932944774627685546875 - _999)) * 0.180000007152557373046875);
+    float3 _1040 = fast::max(float3(0.0), _998 * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)));
+    float _1049 = (1.0 + _Globals.FilmBlackClip) - _Globals.FilmToe;
+    float _1052 = 1.0 + _Globals.FilmWhiteClip;
+    float _1055 = _1052 - _Globals.FilmShoulder;
+    float _1082;
+    if (_Globals.FilmToe > 0.800000011920928955078125)
+    {
+        _1082 = ((0.819999992847442626953125 - _Globals.FilmToe) / _Globals.FilmSlope) + (log(0.180000007152557373046875) / log(10.0));
+    }
+    else
+    {
+        float _1061 = (0.180000007152557373046875 + _Globals.FilmBlackClip) / _1049;
+        _1082 = (log(0.180000007152557373046875) / log(10.0)) - ((0.5 * log(_1061 / (2.0 - _1061))) * (_1049 / _Globals.FilmSlope));
+    }
+    float _1087 = ((1.0 - _Globals.FilmToe) / _Globals.FilmSlope) - _1082;
+    float _1089 = (_Globals.FilmShoulder / _Globals.FilmSlope) - _1087;
+    float _1091 = log(10.0);
+    float3 _1093 = log(mix(float3(dot(_1040, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1040, float3(0.959999978542327880859375))) / float3(_1091);
+    float3 _1097 = float3(_Globals.FilmSlope) * (_1093 + float3(_1087));
+    float3 _1105 = float3(_1082);
+    float3 _1106 = _1093 - _1105;
+    float3 _1118 = float3(_1089);
+    float3 _1132 = fast::clamp(_1106 / float3(_1089 - _1082), float3(0.0), float3(1.0));
+    float3 _1136 = select(_1132, float3(1.0) - _1132, bool3(_1089 < _1082));
+    float3 _1141 = mix(select(_1097, float3(-_Globals.FilmBlackClip) + (float3(2.0 * _1049) / (float3(1.0) + exp(float3(((-2.0) * _Globals.FilmSlope) / _1049) * _1106))), _1093 < _1105), select(_1097, float3(_1052) - (float3(2.0 * _1055) / (float3(1.0) + exp(float3((2.0 * _Globals.FilmSlope) / _1055) * (_1093 - _1118)))), _1093 > _1118), ((float3(3.0) - (float3(2.0) * _1136)) * _1136) * _1136);
+    float3 _1145 = fast::max(float3(0.0), mix(float3(dot(_1141, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1141, float3(0.930000007152557373046875)));
+    float3 _1215;
+    if (_Globals.ColorShadow_Tint2.w == 0.0)
+    {
+        float3 _1157;
+        _1157.x = dot(_932, _Globals.ColorMatrixR_ColorCurveCd1.xyz);
+        _1157.y = dot(_932, _Globals.ColorMatrixG_ColorCurveCd3Cm3.xyz);
+        _1157.z = dot(_932, _Globals.ColorMatrixB_ColorCurveCm2.xyz);
+        float3 _1183 = fast::max(float3(0.0), _1157 * (_Globals.ColorShadow_Tint1.xyz + (_Globals.ColorShadow_Tint2.xyz * float3(1.0 / (dot(_932, _Globals.ColorShadow_Luma.xyz) + 1.0)))));
+        float3 _1188 = fast::max(float3(0.0), _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx - _1183);
+        float3 _1190 = fast::max(_1183, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz);
+        _1215 = ((((_1190 * _Globals.ColorCurve_Ch1_Ch2.xxx) + _Globals.ColorCurve_Ch1_Ch2.yyy) * (float3(1.0) / (_1190 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.www))) + ((fast::clamp(_1183, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.xxx, _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.zzz) * _Globals.ColorMatrixB_ColorCurveCm2.www) + (((_1188 * _Globals.ColorMatrixR_ColorCurveCd1.www) * (float3(1.0) / (_1188 + _Globals.ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3.yyy))) + _Globals.ColorMatrixG_ColorCurveCd3Cm3.www))) - float3(0.00200000009499490261077880859375);
+    }
+    else
+    {
+        _1215 = fast::max(float3(0.0), mix(_1145, _1145 * ((_577 * float3x3(float3(1.06317996978759765625, 0.02339559979736804962158203125, -0.08657260239124298095703125), float3(-0.010633699595928192138671875, 1.2063200473785400390625, -0.1956900060176849365234375), float3(-0.0005908869788981974124908447265625, 0.00105247995816171169281005859375, 0.999538004398345947265625))) * _576), _940) * _575);
+    }
+    float3 _1216 = fast::clamp(_1215, float3(0.0), float3(1.0));
+    float _1217 = _1216.x;
+    float _1229;
+    for (;;)
+    {
+        if (_1217 < 0.00313066993840038776397705078125)
+        {
+            _1229 = _1217 * 12.9200000762939453125;
+            break;
+        }
+        _1229 = (pow(_1217, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875;
+        break;
+    }
+    float _1230 = _1216.y;
+    float _1242;
+    for (;;)
+    {
+        if (_1230 < 0.00313066993840038776397705078125)
+        {
+            _1242 = _1230 * 12.9200000762939453125;
+            break;
+        }
+        _1242 = (pow(_1230, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875;
+        break;
+    }
+    float _1243 = _1216.z;
+    float _1255;
+    for (;;)
+    {
+        if (_1243 < 0.00313066993840038776397705078125)
+        {
+            _1255 = _1243 * 12.9200000762939453125;
+            break;
+        }
+        _1255 = (pow(_1243, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875;
+        break;
+    }
+    float3 _1256 = float3(_1229, _1242, _1255);
+    float3 _1258 = (_1256 * float3(0.9375)) + float3(0.03125);
+    float _1270 = (_1258.z * 16.0) - 0.5;
+    float _1271 = floor(_1270);
+    float _1275 = (_1258.x + _1271) * 0.0625;
+    float _1276 = _1258.y;
+    float4 _1279 = Texture1.sample(Texture1Sampler, float2(_1275, _1276));
+    float4 _1283 = Texture1.sample(Texture1Sampler, float2(_1275 + 0.0625, _1276));
+    float3 _1289 = fast::max(float3(6.1035199905745685100555419921875e-05), (float3(_Globals.LUTWeights[0].x) * _1256) + (float3(_Globals.LUTWeights[1].x) * mix(_1279, _1283, float4(_1270 - _1271)).xyz));
+    float3 _1295 = select(_1289 * float3(0.077399380505084991455078125), pow((_1289 * float3(0.94786727428436279296875)) + float3(0.0521326996386051177978515625), float3(2.400000095367431640625)), _1289 > float3(0.040449999272823333740234375));
+    float3 _1324 = pow(fast::max(float3(0.0), mix((((float3(_Globals.MappingPolynomial.x) * (_1295 * _1295)) + (float3(_Globals.MappingPolynomial.y) * _1295)) + float3(_Globals.MappingPolynomial.z)) * _Globals.ColorScale, _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y));
+    float3 _3103;
+    if (_Globals.OutputDevice == 0u)
+    {
+        float _3075;
+        for (;;)
+        {
+            if (_1324.x < 0.00313066993840038776397705078125)
+            {
+                _3075 = _1324.x * 12.9200000762939453125;
+                break;
+            }
+            _3075 = (pow(_1324.x, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875;
+            break;
+        }
+        float _3088;
+        for (;;)
+        {
+            if (_1324.y < 0.00313066993840038776397705078125)
+            {
+                _3088 = _1324.y * 12.9200000762939453125;
+                break;
+            }
+            _3088 = (pow(_1324.y, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875;
+            break;
+        }
+        float _3101;
+        for (;;)
+        {
+            if (_1324.z < 0.00313066993840038776397705078125)
+            {
+                _3101 = _1324.z * 12.9200000762939453125;
+                break;
+            }
+            _3101 = (pow(_1324.z, 0.4166666567325592041015625) * 1.05499994754791259765625) - 0.054999999701976776123046875;
+            break;
+        }
+        _3103 = float3(_3075, _3088, _3101);
+    }
+    else
+    {
+        float3 _3062;
+        if (_Globals.OutputDevice == 1u)
+        {
+            float3 _3055 = fast::max(float3(6.1035199905745685100555419921875e-05), (_1324 * _573) * _602);
+            _3062 = fast::min(_3055 * float3(4.5), (pow(fast::max(_3055, float3(0.017999999225139617919921875)), float3(0.449999988079071044921875)) * float3(1.09899997711181640625)) - float3(0.098999999463558197021484375));
+        }
+        else
+        {
+            float3 _3052;
+            if ((_Globals.OutputDevice == 3u) || (_Globals.OutputDevice == 5u))
+            {
+                float3 _2204 = (_932 * float3(1.5)) * (_572 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625)));
+                float _2205 = _2204.x;
+                float _2206 = _2204.y;
+                float _2208 = _2204.z;
+                float _2211 = fast::max(fast::max(_2205, _2206), _2208);
+                float _2216 = (fast::max(_2211, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_2205, _2206), _2208), 1.0000000133514319600180897396058e-10)) / fast::max(_2211, 0.00999999977648258209228515625);
+                float _2229 = ((_2208 + _2206) + _2205) + (1.75 * sqrt(((_2208 * (_2208 - _2206)) + (_2206 * (_2206 - _2205))) + (_2205 * (_2205 - _2208))));
+                float _2230 = _2229 * 0.3333333432674407958984375;
+                float _2231 = _2216 - 0.4000000059604644775390625;
+                float _2236 = fast::max(1.0 - abs(_2231 * 2.5), 0.0);
+                float _2244 = (1.0 + (float(int(sign(_2231 * 5.0))) * (1.0 - (_2236 * _2236)))) * 0.02500000037252902984619140625;
+                float _2257;
+                if (_2230 <= 0.053333334624767303466796875)
+                {
+                    _2257 = _2244;
+                }
+                else
+                {
+                    float _2256;
+                    if (_2230 >= 0.1599999964237213134765625)
+                    {
+                        _2256 = 0.0;
+                    }
+                    else
+                    {
+                        _2256 = _2244 * ((0.23999999463558197021484375 / _2229) - 0.5);
+                    }
+                    _2257 = _2256;
+                }
+                float3 _2260 = _2204 * float3(1.0 + _2257);
+                float _2261 = _2260.x;
+                float _2262 = _2260.y;
+                float _2264 = _2260.z;
+                float _2278;
+                if ((_2261 == _2262) && (_2262 == _2264))
+                {
+                    _2278 = 0.0;
+                }
+                else
+                {
+                    _2278 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_2262 - _2264), ((2.0 * _2261) - _2262) - _2264);
+                }
+                float _2283;
+                if (_2278 < 0.0)
+                {
+                    _2283 = _2278 + 360.0;
+                }
+                else
+                {
+                    _2283 = _2278;
+                }
+                float _2284 = fast::clamp(_2283, 0.0, 360.0);
+                float _2289;
+                if (_2284 > 180.0)
+                {
+                    _2289 = _2284 - 360.0;
+                }
+                else
+                {
+                    _2289 = _2284;
+                }
+                float _2339;
+                if ((_2289 > (-67.5)) && (_2289 < 67.5))
+                {
+                    float _2296 = (_2289 - (-67.5)) * 0.0296296291053295135498046875;
+                    int _2297 = int(_2296);
+                    float _2299 = _2296 - float(_2297);
+                    float _2300 = _2299 * _2299;
+                    float _2301 = _2300 * _2299;
+                    float _2338;
+                    if (_2297 == 3)
+                    {
+                        _2338 = (((_2301 * (-0.16666667163372039794921875)) + (_2300 * 0.5)) + (_2299 * (-0.5))) + 0.16666667163372039794921875;
+                    }
+                    else
+                    {
+                        float _2331;
+                        if (_2297 == 2)
+                        {
+                            _2331 = ((_2301 * 0.5) + (_2300 * (-1.0))) + 0.666666686534881591796875;
+                        }
+                        else
+                        {
+                            float _2326;
+                            if (_2297 == 1)
+                            {
+                                _2326 = (((_2301 * (-0.5)) + (_2300 * 0.5)) + (_2299 * 0.5)) + 0.16666667163372039794921875;
+                            }
+                            else
+                            {
+                                float _2319;
+                                if (_2297 == 0)
+                                {
+                                    _2319 = _2301 * 0.16666667163372039794921875;
+                                }
+                                else
+                                {
+                                    _2319 = 0.0;
+                                }
+                                _2326 = _2319;
+                            }
+                            _2331 = _2326;
+                        }
+                        _2338 = _2331;
+                    }
+                    _2339 = _2338;
+                }
+                else
+                {
+                    _2339 = 0.0;
+                }
+                _2260.x = _2261 + ((((_2339 * 1.5) * _2216) * (0.02999999932944774627685546875 - _2261)) * 0.180000007152557373046875);
+                float3 _2349 = fast::clamp(fast::clamp(_2260, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0));
+                float3 _2352 = mix(float3(dot(_2349, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _2349, float3(0.959999978542327880859375));
+                float _2353 = _2352.x;
+                float _2357 = 0.17999999225139617919921875 * exp2(18.0);
+                float _2359 = exp2(-14.0);
+                float _2362 = log((_2353 <= 0.0) ? _2359 : _2353) / _1091;
+                float _2364 = log(0.17999999225139617919921875 * exp2(-15.0)) / _1091;
+                float _2431;
+                if (_2362 <= _2364)
+                {
+                    _2431 = log(9.9999997473787516355514526367188e-05) / _1091;
+                }
+                else
+                {
+                    float _2371 = log(0.180000007152557373046875) / _1091;
+                    float _2428;
+                    if ((_2362 > _2364) && (_2362 < _2371))
+                    {
+                        float _2411 = (3.0 * (_2362 - _2364)) / (_2371 - _2364);
+                        int _2412 = int(_2411);
+                        float _2414 = _2411 - float(_2412);
+                        _2428 = dot(float3(_2414 * _2414, _2414, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2412], _499[_2412 + 1], _499[_2412 + 2]));
+                    }
+                    else
+                    {
+                        float _2379 = log(_2357) / _1091;
+                        float _2407;
+                        if ((_2362 >= _2371) && (_2362 < _2379))
+                        {
+                            float _2390 = (3.0 * (_2362 - _2371)) / (_2379 - _2371);
+                            int _2391 = int(_2390);
+                            float _2393 = _2390 - float(_2391);
+                            _2407 = dot(float3(_2393 * _2393, _2393, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2391], _500[_2391 + 1], _500[_2391 + 2]));
+                        }
+                        else
+                        {
+                            _2407 = log(10000.0) / _1091;
+                        }
+                        _2428 = _2407;
+                    }
+                    _2431 = _2428;
+                }
+                float3 _2433;
+                _2433.x = pow(10.0, _2431);
+                float _2434 = _2352.y;
+                float _2438 = log((_2434 <= 0.0) ? _2359 : _2434) / _1091;
+                float _2505;
+                if (_2438 <= _2364)
+                {
+                    _2505 = log(9.9999997473787516355514526367188e-05) / _1091;
+                }
+                else
+                {
+                    float _2445 = log(0.180000007152557373046875) / _1091;
+                    float _2502;
+                    if ((_2438 > _2364) && (_2438 < _2445))
+                    {
+                        float _2485 = (3.0 * (_2438 - _2364)) / (_2445 - _2364);
+                        int _2486 = int(_2485);
+                        float _2488 = _2485 - float(_2486);
+                        _2502 = dot(float3(_2488 * _2488, _2488, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2486], _499[_2486 + 1], _499[_2486 + 2]));
+                    }
+                    else
+                    {
+                        float _2453 = log(_2357) / _1091;
+                        float _2481;
+                        if ((_2438 >= _2445) && (_2438 < _2453))
+                        {
+                            float _2464 = (3.0 * (_2438 - _2445)) / (_2453 - _2445);
+                            int _2465 = int(_2464);
+                            float _2467 = _2464 - float(_2465);
+                            _2481 = dot(float3(_2467 * _2467, _2467, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2465], _500[_2465 + 1], _500[_2465 + 2]));
+                        }
+                        else
+                        {
+                            _2481 = log(10000.0) / _1091;
+                        }
+                        _2502 = _2481;
+                    }
+                    _2505 = _2502;
+                }
+                _2433.y = pow(10.0, _2505);
+                float _2508 = _2352.z;
+                float _2512 = log((_2508 <= 0.0) ? _2359 : _2508) / _1091;
+                float _2579;
+                if (_2512 <= _2364)
+                {
+                    _2579 = log(9.9999997473787516355514526367188e-05) / _1091;
+                }
+                else
+                {
+                    float _2519 = log(0.180000007152557373046875) / _1091;
+                    float _2576;
+                    if ((_2512 > _2364) && (_2512 < _2519))
+                    {
+                        float _2559 = (3.0 * (_2512 - _2364)) / (_2519 - _2364);
+                        int _2560 = int(_2559);
+                        float _2562 = _2559 - float(_2560);
+                        _2576 = dot(float3(_2562 * _2562, _2562, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2560], _499[_2560 + 1], _499[_2560 + 2]));
+                    }
+                    else
+                    {
+                        float _2527 = log(_2357) / _1091;
+                        float _2555;
+                        if ((_2512 >= _2519) && (_2512 < _2527))
+                        {
+                            float _2538 = (3.0 * (_2512 - _2519)) / (_2527 - _2519);
+                            int _2539 = int(_2538);
+                            float _2541 = _2538 - float(_2539);
+                            _2555 = dot(float3(_2541 * _2541, _2541, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2539], _500[_2539 + 1], _500[_2539 + 2]));
+                        }
+                        else
+                        {
+                            _2555 = log(10000.0) / _1091;
+                        }
+                        _2576 = _2555;
+                    }
+                    _2579 = _2576;
+                }
+                _2433.z = pow(10.0, _2579);
+                float3 _2583 = (_2433 * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375));
+                float _2585 = 0.17999999225139617919921875 * pow(2.0, -12.0);
+                float _2589 = log((_2585 <= 0.0) ? _2359 : _2585) / _1091;
+                float _2656;
+                if (_2589 <= _2364)
+                {
+                    _2656 = log(9.9999997473787516355514526367188e-05) / _1091;
+                }
+                else
+                {
+                    float _2596 = log(0.180000007152557373046875) / _1091;
+                    float _2653;
+                    if ((_2589 > _2364) && (_2589 < _2596))
+                    {
+                        float _2636 = (3.0 * (_2589 - _2364)) / (_2596 - _2364);
+                        int _2637 = int(_2636);
+                        float _2639 = _2636 - float(_2637);
+                        _2653 = dot(float3(_2639 * _2639, _2639, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2637], _499[_2637 + 1], _499[_2637 + 2]));
+                    }
+                    else
+                    {
+                        float _2604 = log(_2357) / _1091;
+                        float _2632;
+                        if ((_2589 >= _2596) && (_2589 < _2604))
+                        {
+                            float _2615 = (3.0 * (_2589 - _2596)) / (_2604 - _2596);
+                            int _2616 = int(_2615);
+                            float _2618 = _2615 - float(_2616);
+                            _2632 = dot(float3(_2618 * _2618, _2618, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2616], _500[_2616 + 1], _500[_2616 + 2]));
+                        }
+                        else
+                        {
+                            _2632 = log(10000.0) / _1091;
+                        }
+                        _2653 = _2632;
+                    }
+                    _2656 = _2653;
+                }
+                float _2659 = log(0.180000007152557373046875) / _1091;
+                float _2713;
+                if (_2659 <= _2364)
+                {
+                    _2713 = log(9.9999997473787516355514526367188e-05) / _1091;
+                }
+                else
+                {
+                    float _2710;
+                    if ((_2659 > _2364) && (_2659 < _2659))
+                    {
+                        _2710 = (float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[3], _499[4], _499[5])).z;
+                    }
+                    else
+                    {
+                        float _2672 = log(_2357) / _1091;
+                        float _2700;
+                        if ((_2659 >= _2659) && (_2659 < _2672))
+                        {
+                            float _2683 = (3.0 * (_2659 - _2659)) / (_2672 - _2659);
+                            int _2684 = int(_2683);
+                            float _2686 = _2683 - float(_2684);
+                            _2700 = dot(float3(_2686 * _2686, _2686, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2684], _500[_2684 + 1], _500[_2684 + 2]));
+                        }
+                        else
+                        {
+                            _2700 = log(10000.0) / _1091;
+                        }
+                        _2710 = _2700;
+                    }
+                    _2713 = _2710;
+                }
+                float _2714 = pow(10.0, _2713);
+                float _2716 = 0.17999999225139617919921875 * pow(2.0, 10.0);
+                float _2720 = log((_2716 <= 0.0) ? _2359 : _2716) / _1091;
+                float _2785;
+                if (_2720 <= _2364)
+                {
+                    _2785 = log(9.9999997473787516355514526367188e-05) / _1091;
+                }
+                else
+                {
+                    float _2782;
+                    if ((_2720 > _2364) && (_2720 < _2659))
+                    {
+                        float _2765 = (3.0 * (_2720 - _2364)) / (_2659 - _2364);
+                        int _2766 = int(_2765);
+                        float _2768 = _2765 - float(_2766);
+                        _2782 = dot(float3(_2768 * _2768, _2768, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_2766], _499[_2766 + 1], _499[_2766 + 2]));
+                    }
+                    else
+                    {
+                        float _2733 = log(_2357) / _1091;
+                        float _2761;
+                        if ((_2720 >= _2659) && (_2720 < _2733))
+                        {
+                            float _2744 = (3.0 * (_2720 - _2659)) / (_2733 - _2659);
+                            int _2745 = int(_2744);
+                            float _2747 = _2744 - float(_2745);
+                            _2761 = dot(float3(_2747 * _2747, _2747, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_2745], _500[_2745 + 1], _500[_2745 + 2]));
+                        }
+                        else
+                        {
+                            _2761 = log(10000.0) / _1091;
+                        }
+                        _2782 = _2761;
+                    }
+                    _2785 = _2782;
+                }
+                float _2786 = pow(10.0, _2785);
+                float _2787 = _2583.x;
+                float _2791 = log((_2787 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2787) / _1091;
+                float _2792 = log(pow(10.0, _2656));
+                float _2793 = _2792 / _1091;
+                float _2870;
+                if (_2791 <= _2793)
+                {
+                    _2870 = (_2791 * 3.0) + ((log(9.9999997473787516355514526367188e-05) / _1091) - ((3.0 * _2792) / _1091));
+                }
+                else
+                {
+                    float _2800 = log(_2714) / _1091;
+                    float _2862;
+                    if ((_2791 > _2793) && (_2791 < _2800))
+                    {
+                        float _2845 = (7.0 * (_2791 - _2793)) / (_2800 - _2793);
+                        int _2846 = int(_2845);
+                        float _2848 = _2845 - float(_2846);
+                        _2862 = dot(float3(_2848 * _2848, _2848, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_503[_2846], _503[_2846 + 1], _503[_2846 + 2]));
+                    }
+                    else
+                    {
+                        float _2807 = log(_2786);
+                        float _2808 = _2807 / _1091;
+                        float _2841;
+                        if ((_2791 >= _2800) && (_2791 < _2808))
+                        {
+                            float _2824 = (7.0 * (_2791 - _2800)) / (_2808 - _2800);
+                            int _2825 = int(_2824);
+                            float _2827 = _2824 - float(_2825);
+                            _2841 = dot(float3(_2827 * _2827, _2827, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_504[_2825], _504[_2825 + 1], _504[_2825 + 2]));
+                        }
+                        else
+                        {
+                            _2841 = (_2791 * 0.0599999986588954925537109375) + ((log(1000.0) / _1091) - ((0.0599999986588954925537109375 * _2807) / _1091));
+                        }
+                        _2862 = _2841;
+                    }
+                    _2870 = _2862;
+                }
+                float3 _2872;
+                _2872.x = pow(10.0, _2870);
+                float _2873 = _2583.y;
+                float _2877 = log((_2873 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2873) / _1091;
+                float _2954;
+                if (_2877 <= _2793)
+                {
+                    _2954 = (_2877 * 3.0) + ((log(9.9999997473787516355514526367188e-05) / _1091) - ((3.0 * _2792) / _1091));
+                }
+                else
+                {
+                    float _2884 = log(_2714) / _1091;
+                    float _2946;
+                    if ((_2877 > _2793) && (_2877 < _2884))
+                    {
+                        float _2929 = (7.0 * (_2877 - _2793)) / (_2884 - _2793);
+                        int _2930 = int(_2929);
+                        float _2932 = _2929 - float(_2930);
+                        _2946 = dot(float3(_2932 * _2932, _2932, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_503[_2930], _503[_2930 + 1], _503[_2930 + 2]));
+                    }
+                    else
+                    {
+                        float _2891 = log(_2786);
+                        float _2892 = _2891 / _1091;
+                        float _2925;
+                        if ((_2877 >= _2884) && (_2877 < _2892))
+                        {
+                            float _2908 = (7.0 * (_2877 - _2884)) / (_2892 - _2884);
+                            int _2909 = int(_2908);
+                            float _2911 = _2908 - float(_2909);
+                            _2925 = dot(float3(_2911 * _2911, _2911, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_504[_2909], _504[_2909 + 1], _504[_2909 + 2]));
+                        }
+                        else
+                        {
+                            _2925 = (_2877 * 0.0599999986588954925537109375) + ((log(1000.0) / _1091) - ((0.0599999986588954925537109375 * _2891) / _1091));
+                        }
+                        _2946 = _2925;
+                    }
+                    _2954 = _2946;
+                }
+                _2872.y = pow(10.0, _2954);
+                float _2957 = _2583.z;
+                float _2961 = log((_2957 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2957) / _1091;
+                float _3038;
+                if (_2961 <= _2793)
+                {
+                    _3038 = (_2961 * 3.0) + ((log(9.9999997473787516355514526367188e-05) / _1091) - ((3.0 * _2792) / _1091));
+                }
+                else
+                {
+                    float _2968 = log(_2714) / _1091;
+                    float _3030;
+                    if ((_2961 > _2793) && (_2961 < _2968))
+                    {
+                        float _3013 = (7.0 * (_2961 - _2793)) / (_2968 - _2793);
+                        int _3014 = int(_3013);
+                        float _3016 = _3013 - float(_3014);
+                        _3030 = dot(float3(_3016 * _3016, _3016, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_503[_3014], _503[_3014 + 1], _503[_3014 + 2]));
+                    }
+                    else
+                    {
+                        float _2975 = log(_2786);
+                        float _2976 = _2975 / _1091;
+                        float _3009;
+                        if ((_2961 >= _2968) && (_2961 < _2976))
+                        {
+                            float _2992 = (7.0 * (_2961 - _2968)) / (_2976 - _2968);
+                            int _2993 = int(_2992);
+                            float _2995 = _2992 - float(_2993);
+                            _3009 = dot(float3(_2995 * _2995, _2995, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_504[_2993], _504[_2993 + 1], _504[_2993 + 2]));
+                        }
+                        else
+                        {
+                            _3009 = (_2961 * 0.0599999986588954925537109375) + ((log(1000.0) / _1091) - ((0.0599999986588954925537109375 * _2975) / _1091));
+                        }
+                        _3030 = _3009;
+                    }
+                    _3038 = _3030;
+                }
+                _2872.z = pow(10.0, _3038);
+                float3 _3044 = pow(((_2872 - float3(3.5073844628641381859779357910156e-05)) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
+                _3052 = pow((float3(0.8359375) + (float3(18.8515625) * _3044)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _3044))), float3(78.84375));
+            }
+            else
+            {
+                float3 _2201;
+                if ((_Globals.OutputDevice == 4u) || (_Globals.OutputDevice == 6u))
+                {
+                    float3 _1369 = (_932 * float3(1.5)) * (_572 * float3x3(float3(1.04981100559234619140625, 0.0, -9.74845024757087230682373046875e-05), float3(-0.49590301513671875, 1.37331306934356689453125, 0.09824003279209136962890625), float3(0.0, 0.0, 0.991252005100250244140625)));
+                    float _1370 = _1369.x;
+                    float _1371 = _1369.y;
+                    float _1373 = _1369.z;
+                    float _1376 = fast::max(fast::max(_1370, _1371), _1373);
+                    float _1381 = (fast::max(_1376, 1.0000000133514319600180897396058e-10) - fast::max(fast::min(fast::min(_1370, _1371), _1373), 1.0000000133514319600180897396058e-10)) / fast::max(_1376, 0.00999999977648258209228515625);
+                    float _1394 = ((_1373 + _1371) + _1370) + (1.75 * sqrt(((_1373 * (_1373 - _1371)) + (_1371 * (_1371 - _1370))) + (_1370 * (_1370 - _1373))));
+                    float _1395 = _1394 * 0.3333333432674407958984375;
+                    float _1396 = _1381 - 0.4000000059604644775390625;
+                    float _1401 = fast::max(1.0 - abs(_1396 * 2.5), 0.0);
+                    float _1409 = (1.0 + (float(int(sign(_1396 * 5.0))) * (1.0 - (_1401 * _1401)))) * 0.02500000037252902984619140625;
+                    float _1422;
+                    if (_1395 <= 0.053333334624767303466796875)
+                    {
+                        _1422 = _1409;
+                    }
+                    else
+                    {
+                        float _1421;
+                        if (_1395 >= 0.1599999964237213134765625)
+                        {
+                            _1421 = 0.0;
+                        }
+                        else
+                        {
+                            _1421 = _1409 * ((0.23999999463558197021484375 / _1394) - 0.5);
+                        }
+                        _1422 = _1421;
+                    }
+                    float3 _1425 = _1369 * float3(1.0 + _1422);
+                    float _1426 = _1425.x;
+                    float _1427 = _1425.y;
+                    float _1429 = _1425.z;
+                    float _1443;
+                    if ((_1426 == _1427) && (_1427 == _1429))
+                    {
+                        _1443 = 0.0;
+                    }
+                    else
+                    {
+                        _1443 = 57.2957763671875 * precise::atan2(sqrt(3.0) * (_1427 - _1429), ((2.0 * _1426) - _1427) - _1429);
+                    }
+                    float _1448;
+                    if (_1443 < 0.0)
+                    {
+                        _1448 = _1443 + 360.0;
+                    }
+                    else
+                    {
+                        _1448 = _1443;
+                    }
+                    float _1449 = fast::clamp(_1448, 0.0, 360.0);
+                    float _1454;
+                    if (_1449 > 180.0)
+                    {
+                        _1454 = _1449 - 360.0;
+                    }
+                    else
+                    {
+                        _1454 = _1449;
+                    }
+                    float _1504;
+                    if ((_1454 > (-67.5)) && (_1454 < 67.5))
+                    {
+                        float _1461 = (_1454 - (-67.5)) * 0.0296296291053295135498046875;
+                        int _1462 = int(_1461);
+                        float _1464 = _1461 - float(_1462);
+                        float _1465 = _1464 * _1464;
+                        float _1466 = _1465 * _1464;
+                        float _1503;
+                        if (_1462 == 3)
+                        {
+                            _1503 = (((_1466 * (-0.16666667163372039794921875)) + (_1465 * 0.5)) + (_1464 * (-0.5))) + 0.16666667163372039794921875;
+                        }
+                        else
+                        {
+                            float _1496;
+                            if (_1462 == 2)
+                            {
+                                _1496 = ((_1466 * 0.5) + (_1465 * (-1.0))) + 0.666666686534881591796875;
+                            }
+                            else
+                            {
+                                float _1491;
+                                if (_1462 == 1)
+                                {
+                                    _1491 = (((_1466 * (-0.5)) + (_1465 * 0.5)) + (_1464 * 0.5)) + 0.16666667163372039794921875;
+                                }
+                                else
+                                {
+                                    float _1484;
+                                    if (_1462 == 0)
+                                    {
+                                        _1484 = _1466 * 0.16666667163372039794921875;
+                                    }
+                                    else
+                                    {
+                                        _1484 = 0.0;
+                                    }
+                                    _1491 = _1484;
+                                }
+                                _1496 = _1491;
+                            }
+                            _1503 = _1496;
+                        }
+                        _1504 = _1503;
+                    }
+                    else
+                    {
+                        _1504 = 0.0;
+                    }
+                    _1425.x = _1426 + ((((_1504 * 1.5) * _1381) * (0.02999999932944774627685546875 - _1426)) * 0.180000007152557373046875);
+                    float3 _1514 = fast::clamp(fast::clamp(_1425, float3(0.0), float3(65535.0)) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375)), float3(0.0), float3(65535.0));
+                    float3 _1517 = mix(float3(dot(_1514, float3(0.272228717803955078125, 0.674081742763519287109375, 0.053689517080783843994140625))), _1514, float3(0.959999978542327880859375));
+                    float _1518 = _1517.x;
+                    float _1522 = 0.17999999225139617919921875 * exp2(18.0);
+                    float _1524 = exp2(-14.0);
+                    float _1527 = log((_1518 <= 0.0) ? _1524 : _1518) / _1091;
+                    float _1529 = log(0.17999999225139617919921875 * exp2(-15.0)) / _1091;
+                    float _1596;
+                    if (_1527 <= _1529)
+                    {
+                        _1596 = log(9.9999997473787516355514526367188e-05) / _1091;
+                    }
+                    else
+                    {
+                        float _1536 = log(0.180000007152557373046875) / _1091;
+                        float _1593;
+                        if ((_1527 > _1529) && (_1527 < _1536))
+                        {
+                            float _1576 = (3.0 * (_1527 - _1529)) / (_1536 - _1529);
+                            int _1577 = int(_1576);
+                            float _1579 = _1576 - float(_1577);
+                            _1593 = dot(float3(_1579 * _1579, _1579, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1577], _499[_1577 + 1], _499[_1577 + 2]));
+                        }
+                        else
+                        {
+                            float _1544 = log(_1522) / _1091;
+                            float _1572;
+                            if ((_1527 >= _1536) && (_1527 < _1544))
+                            {
+                                float _1555 = (3.0 * (_1527 - _1536)) / (_1544 - _1536);
+                                int _1556 = int(_1555);
+                                float _1558 = _1555 - float(_1556);
+                                _1572 = dot(float3(_1558 * _1558, _1558, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1556], _500[_1556 + 1], _500[_1556 + 2]));
+                            }
+                            else
+                            {
+                                _1572 = log(10000.0) / _1091;
+                            }
+                            _1593 = _1572;
+                        }
+                        _1596 = _1593;
+                    }
+                    float3 _1598;
+                    _1598.x = pow(10.0, _1596);
+                    float _1599 = _1517.y;
+                    float _1603 = log((_1599 <= 0.0) ? _1524 : _1599) / _1091;
+                    float _1670;
+                    if (_1603 <= _1529)
+                    {
+                        _1670 = log(9.9999997473787516355514526367188e-05) / _1091;
+                    }
+                    else
+                    {
+                        float _1610 = log(0.180000007152557373046875) / _1091;
+                        float _1667;
+                        if ((_1603 > _1529) && (_1603 < _1610))
+                        {
+                            float _1650 = (3.0 * (_1603 - _1529)) / (_1610 - _1529);
+                            int _1651 = int(_1650);
+                            float _1653 = _1650 - float(_1651);
+                            _1667 = dot(float3(_1653 * _1653, _1653, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1651], _499[_1651 + 1], _499[_1651 + 2]));
+                        }
+                        else
+                        {
+                            float _1618 = log(_1522) / _1091;
+                            float _1646;
+                            if ((_1603 >= _1610) && (_1603 < _1618))
+                            {
+                                float _1629 = (3.0 * (_1603 - _1610)) / (_1618 - _1610);
+                                int _1630 = int(_1629);
+                                float _1632 = _1629 - float(_1630);
+                                _1646 = dot(float3(_1632 * _1632, _1632, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1630], _500[_1630 + 1], _500[_1630 + 2]));
+                            }
+                            else
+                            {
+                                _1646 = log(10000.0) / _1091;
+                            }
+                            _1667 = _1646;
+                        }
+                        _1670 = _1667;
+                    }
+                    _1598.y = pow(10.0, _1670);
+                    float _1673 = _1517.z;
+                    float _1677 = log((_1673 <= 0.0) ? _1524 : _1673) / _1091;
+                    float _1744;
+                    if (_1677 <= _1529)
+                    {
+                        _1744 = log(9.9999997473787516355514526367188e-05) / _1091;
+                    }
+                    else
+                    {
+                        float _1684 = log(0.180000007152557373046875) / _1091;
+                        float _1741;
+                        if ((_1677 > _1529) && (_1677 < _1684))
+                        {
+                            float _1724 = (3.0 * (_1677 - _1529)) / (_1684 - _1529);
+                            int _1725 = int(_1724);
+                            float _1727 = _1724 - float(_1725);
+                            _1741 = dot(float3(_1727 * _1727, _1727, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1725], _499[_1725 + 1], _499[_1725 + 2]));
+                        }
+                        else
+                        {
+                            float _1692 = log(_1522) / _1091;
+                            float _1720;
+                            if ((_1677 >= _1684) && (_1677 < _1692))
+                            {
+                                float _1703 = (3.0 * (_1677 - _1684)) / (_1692 - _1684);
+                                int _1704 = int(_1703);
+                                float _1706 = _1703 - float(_1704);
+                                _1720 = dot(float3(_1706 * _1706, _1706, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1704], _500[_1704 + 1], _500[_1704 + 2]));
+                            }
+                            else
+                            {
+                                _1720 = log(10000.0) / _1091;
+                            }
+                            _1741 = _1720;
+                        }
+                        _1744 = _1741;
+                    }
+                    _1598.z = pow(10.0, _1744);
+                    float3 _1748 = (_1598 * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375));
+                    float _1750 = 0.17999999225139617919921875 * pow(2.0, -12.0);
+                    float _1754 = log((_1750 <= 0.0) ? _1524 : _1750) / _1091;
+                    float _1821;
+                    if (_1754 <= _1529)
+                    {
+                        _1821 = log(9.9999997473787516355514526367188e-05) / _1091;
+                    }
+                    else
+                    {
+                        float _1761 = log(0.180000007152557373046875) / _1091;
+                        float _1818;
+                        if ((_1754 > _1529) && (_1754 < _1761))
+                        {
+                            float _1801 = (3.0 * (_1754 - _1529)) / (_1761 - _1529);
+                            int _1802 = int(_1801);
+                            float _1804 = _1801 - float(_1802);
+                            _1818 = dot(float3(_1804 * _1804, _1804, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1802], _499[_1802 + 1], _499[_1802 + 2]));
+                        }
+                        else
+                        {
+                            float _1769 = log(_1522) / _1091;
+                            float _1797;
+                            if ((_1754 >= _1761) && (_1754 < _1769))
+                            {
+                                float _1780 = (3.0 * (_1754 - _1761)) / (_1769 - _1761);
+                                int _1781 = int(_1780);
+                                float _1783 = _1780 - float(_1781);
+                                _1797 = dot(float3(_1783 * _1783, _1783, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1781], _500[_1781 + 1], _500[_1781 + 2]));
+                            }
+                            else
+                            {
+                                _1797 = log(10000.0) / _1091;
+                            }
+                            _1818 = _1797;
+                        }
+                        _1821 = _1818;
+                    }
+                    float _1824 = log(0.180000007152557373046875) / _1091;
+                    float _1878;
+                    if (_1824 <= _1529)
+                    {
+                        _1878 = log(9.9999997473787516355514526367188e-05) / _1091;
+                    }
+                    else
+                    {
+                        float _1875;
+                        if ((_1824 > _1529) && (_1824 < _1824))
+                        {
+                            _1875 = (float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[3], _499[4], _499[5])).z;
+                        }
+                        else
+                        {
+                            float _1837 = log(_1522) / _1091;
+                            float _1865;
+                            if ((_1824 >= _1824) && (_1824 < _1837))
+                            {
+                                float _1848 = (3.0 * (_1824 - _1824)) / (_1837 - _1824);
+                                int _1849 = int(_1848);
+                                float _1851 = _1848 - float(_1849);
+                                _1865 = dot(float3(_1851 * _1851, _1851, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1849], _500[_1849 + 1], _500[_1849 + 2]));
+                            }
+                            else
+                            {
+                                _1865 = log(10000.0) / _1091;
+                            }
+                            _1875 = _1865;
+                        }
+                        _1878 = _1875;
+                    }
+                    float _1879 = pow(10.0, _1878);
+                    float _1881 = 0.17999999225139617919921875 * pow(2.0, 11.0);
+                    float _1885 = log((_1881 <= 0.0) ? _1524 : _1881) / _1091;
+                    float _1950;
+                    if (_1885 <= _1529)
+                    {
+                        _1950 = log(9.9999997473787516355514526367188e-05) / _1091;
+                    }
+                    else
+                    {
+                        float _1947;
+                        if ((_1885 > _1529) && (_1885 < _1824))
+                        {
+                            float _1930 = (3.0 * (_1885 - _1529)) / (_1824 - _1529);
+                            int _1931 = int(_1930);
+                            float _1933 = _1930 - float(_1931);
+                            _1947 = dot(float3(_1933 * _1933, _1933, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_499[_1931], _499[_1931 + 1], _499[_1931 + 2]));
+                        }
+                        else
+                        {
+                            float _1898 = log(_1522) / _1091;
+                            float _1926;
+                            if ((_1885 >= _1824) && (_1885 < _1898))
+                            {
+                                float _1909 = (3.0 * (_1885 - _1824)) / (_1898 - _1824);
+                                int _1910 = int(_1909);
+                                float _1912 = _1909 - float(_1910);
+                                _1926 = dot(float3(_1912 * _1912, _1912, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_500[_1910], _500[_1910 + 1], _500[_1910 + 2]));
+                            }
+                            else
+                            {
+                                _1926 = log(10000.0) / _1091;
+                            }
+                            _1947 = _1926;
+                        }
+                        _1950 = _1947;
+                    }
+                    float _1951 = pow(10.0, _1950);
+                    float _1952 = _1748.x;
+                    float _1956 = log((_1952 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1952) / _1091;
+                    float _1958 = log(pow(10.0, _1821)) / _1091;
+                    float _2030;
+                    if (_1956 <= _1958)
+                    {
+                        _2030 = log(0.004999999888241291046142578125) / _1091;
+                    }
+                    else
+                    {
+                        float _1965 = log(_1879) / _1091;
+                        float _2027;
+                        if ((_1956 > _1958) && (_1956 < _1965))
+                        {
+                            float _2010 = (7.0 * (_1956 - _1958)) / (_1965 - _1958);
+                            int _2011 = int(_2010);
+                            float _2013 = _2010 - float(_2011);
+                            _2027 = dot(float3(_2013 * _2013, _2013, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_506[_2011], _506[_2011 + 1], _506[_2011 + 2]));
+                        }
+                        else
+                        {
+                            float _1972 = log(_1951);
+                            float _1973 = _1972 / _1091;
+                            float _2006;
+                            if ((_1956 >= _1965) && (_1956 < _1973))
+                            {
+                                float _1989 = (7.0 * (_1956 - _1965)) / (_1973 - _1965);
+                                int _1990 = int(_1989);
+                                float _1992 = _1989 - float(_1990);
+                                _2006 = dot(float3(_1992 * _1992, _1992, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_507[_1990], _507[_1990 + 1], _507[_1990 + 2]));
+                            }
+                            else
+                            {
+                                _2006 = (_1956 * 0.119999997317790985107421875) + ((log(2000.0) / _1091) - ((0.119999997317790985107421875 * _1972) / _1091));
+                            }
+                            _2027 = _2006;
+                        }
+                        _2030 = _2027;
+                    }
+                    float3 _2032;
+                    _2032.x = pow(10.0, _2030);
+                    float _2033 = _1748.y;
+                    float _2037 = log((_2033 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2033) / _1091;
+                    float _2109;
+                    if (_2037 <= _1958)
+                    {
+                        _2109 = log(0.004999999888241291046142578125) / _1091;
+                    }
+                    else
+                    {
+                        float _2044 = log(_1879) / _1091;
+                        float _2106;
+                        if ((_2037 > _1958) && (_2037 < _2044))
+                        {
+                            float _2089 = (7.0 * (_2037 - _1958)) / (_2044 - _1958);
+                            int _2090 = int(_2089);
+                            float _2092 = _2089 - float(_2090);
+                            _2106 = dot(float3(_2092 * _2092, _2092, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_506[_2090], _506[_2090 + 1], _506[_2090 + 2]));
+                        }
+                        else
+                        {
+                            float _2051 = log(_1951);
+                            float _2052 = _2051 / _1091;
+                            float _2085;
+                            if ((_2037 >= _2044) && (_2037 < _2052))
+                            {
+                                float _2068 = (7.0 * (_2037 - _2044)) / (_2052 - _2044);
+                                int _2069 = int(_2068);
+                                float _2071 = _2068 - float(_2069);
+                                _2085 = dot(float3(_2071 * _2071, _2071, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_507[_2069], _507[_2069 + 1], _507[_2069 + 2]));
+                            }
+                            else
+                            {
+                                _2085 = (_2037 * 0.119999997317790985107421875) + ((log(2000.0) / _1091) - ((0.119999997317790985107421875 * _2051) / _1091));
+                            }
+                            _2106 = _2085;
+                        }
+                        _2109 = _2106;
+                    }
+                    _2032.y = pow(10.0, _2109);
+                    float _2112 = _1748.z;
+                    float _2116 = log((_2112 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2112) / _1091;
+                    float _2188;
+                    if (_2116 <= _1958)
+                    {
+                        _2188 = log(0.004999999888241291046142578125) / _1091;
+                    }
+                    else
+                    {
+                        float _2123 = log(_1879) / _1091;
+                        float _2185;
+                        if ((_2116 > _1958) && (_2116 < _2123))
+                        {
+                            float _2168 = (7.0 * (_2116 - _1958)) / (_2123 - _1958);
+                            int _2169 = int(_2168);
+                            float _2171 = _2168 - float(_2169);
+                            _2185 = dot(float3(_2171 * _2171, _2171, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_506[_2169], _506[_2169 + 1], _506[_2169 + 2]));
+                        }
+                        else
+                        {
+                            float _2130 = log(_1951);
+                            float _2131 = _2130 / _1091;
+                            float _2164;
+                            if ((_2116 >= _2123) && (_2116 < _2131))
+                            {
+                                float _2147 = (7.0 * (_2116 - _2123)) / (_2131 - _2123);
+                                int _2148 = int(_2147);
+                                float _2150 = _2147 - float(_2148);
+                                _2164 = dot(float3(_2150 * _2150, _2150, 1.0), float3x3(float3(0.5, -1.0, 0.5), float3(-1.0, 1.0, 0.5), float3(0.5, 0.0, 0.0)) * float3(_507[_2148], _507[_2148 + 1], _507[_2148 + 2]));
+                            }
+                            else
+                            {
+                                _2164 = (_2116 * 0.119999997317790985107421875) + ((log(2000.0) / _1091) - ((0.119999997317790985107421875 * _2130) / _1091));
+                            }
+                            _2185 = _2164;
+                        }
+                        _2188 = _2185;
+                    }
+                    _2032.z = pow(10.0, _2188);
+                    float3 _2193 = pow((_2032 * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
+                    _2201 = pow((float3(0.8359375) + (float3(18.8515625) * _2193)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _2193))), float3(78.84375));
+                }
+                else
+                {
+                    float3 _1366;
+                    if (_Globals.OutputDevice == 7u)
+                    {
+                        float3 _1358 = pow(((_932 * _573) * _602) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
+                        _1366 = pow((float3(0.8359375) + (float3(18.8515625) * _1358)) * (float3(1.0) / (float3(1.0) + (float3(18.6875) * _1358))), float3(78.84375));
+                    }
+                    else
+                    {
+                        _1366 = pow((_1324 * _573) * _602, float3(_Globals.InverseGamma.z));
+                    }
+                    _2201 = _1366;
+                }
+                _3052 = _2201;
+            }
+            _3062 = _3052;
+        }
+        _3103 = _3062;
+    }
+    float3 _3104 = _3103 * float3(0.95238101482391357421875);
+    float4 _3105 = float4(_3104.x, _3104.y, _3104.z, float4(0.0).w);
+    _3105.w = 0.0;
+    out.out_var_SV_Target0 = _3105;
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag b/reference/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag
new file mode 100644
index 00000000000..72a9c58a639
--- /dev/null
+++ b/reference/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag
@@ -0,0 +1,503 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_StructuredBuffer_v4float
+{
+    float4 _m0[1];
+};
+
+struct type_TranslucentBasePass
+{
+    uint TranslucentBasePass_Shared_Forward_NumLocalLights;
+    uint TranslucentBasePass_Shared_Forward_NumReflectionCaptures;
+    uint TranslucentBasePass_Shared_Forward_HasDirectionalLight;
+    uint TranslucentBasePass_Shared_Forward_NumGridCells;
+    packed_int3 TranslucentBasePass_Shared_Forward_CulledGridSize;
+    uint TranslucentBasePass_Shared_Forward_MaxCulledLightsPerCell;
+    uint TranslucentBasePass_Shared_Forward_LightGridPixelSizeShift;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_36;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_40;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_44;
+    packed_float3 TranslucentBasePass_Shared_Forward_LightGridZParams;
+    float PrePadding_TranslucentBasePass_Shared_Forward_60;
+    packed_float3 TranslucentBasePass_Shared_Forward_DirectionalLightDirection;
+    float PrePadding_TranslucentBasePass_Shared_Forward_76;
+    packed_float3 TranslucentBasePass_Shared_Forward_DirectionalLightColor;
+    float TranslucentBasePass_Shared_Forward_DirectionalLightVolumetricScatteringIntensity;
+    uint TranslucentBasePass_Shared_Forward_DirectionalLightShadowMapChannelMask;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_100;
+    float2 TranslucentBasePass_Shared_Forward_DirectionalLightDistanceFadeMAD;
+    uint TranslucentBasePass_Shared_Forward_NumDirectionalLightCascades;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_116;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_120;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_124;
+    float4 TranslucentBasePass_Shared_Forward_CascadeEndDepths;
+    float4x4 TranslucentBasePass_Shared_Forward_DirectionalLightWorldToShadowMatrix[4];
+    float4 TranslucentBasePass_Shared_Forward_DirectionalLightShadowmapMinMax[4];
+    float4 TranslucentBasePass_Shared_Forward_DirectionalLightShadowmapAtlasBufferSize;
+    float TranslucentBasePass_Shared_Forward_DirectionalLightDepthBias;
+    uint TranslucentBasePass_Shared_Forward_DirectionalLightUseStaticShadowing;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_488;
+    uint PrePadding_TranslucentBasePass_Shared_Forward_492;
+    float4 TranslucentBasePass_Shared_Forward_DirectionalLightStaticShadowBufferSize;
+    float4x4 TranslucentBasePass_Shared_Forward_DirectionalLightWorldToStaticShadow;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_576;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_580;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_584;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_588;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_592;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_596;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_600;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_604;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_608;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_612;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_616;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_620;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_624;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_628;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_632;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_636;
+    uint TranslucentBasePass_Shared_ForwardISR_NumLocalLights;
+    uint TranslucentBasePass_Shared_ForwardISR_NumReflectionCaptures;
+    uint TranslucentBasePass_Shared_ForwardISR_HasDirectionalLight;
+    uint TranslucentBasePass_Shared_ForwardISR_NumGridCells;
+    packed_int3 TranslucentBasePass_Shared_ForwardISR_CulledGridSize;
+    uint TranslucentBasePass_Shared_ForwardISR_MaxCulledLightsPerCell;
+    uint TranslucentBasePass_Shared_ForwardISR_LightGridPixelSizeShift;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_676;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_680;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_684;
+    packed_float3 TranslucentBasePass_Shared_ForwardISR_LightGridZParams;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_700;
+    packed_float3 TranslucentBasePass_Shared_ForwardISR_DirectionalLightDirection;
+    float PrePadding_TranslucentBasePass_Shared_ForwardISR_716;
+    packed_float3 TranslucentBasePass_Shared_ForwardISR_DirectionalLightColor;
+    float TranslucentBasePass_Shared_ForwardISR_DirectionalLightVolumetricScatteringIntensity;
+    uint TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowMapChannelMask;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_740;
+    float2 TranslucentBasePass_Shared_ForwardISR_DirectionalLightDistanceFadeMAD;
+    uint TranslucentBasePass_Shared_ForwardISR_NumDirectionalLightCascades;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_756;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_760;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_764;
+    float4 TranslucentBasePass_Shared_ForwardISR_CascadeEndDepths;
+    float4x4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightWorldToShadowMatrix[4];
+    float4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowmapMinMax[4];
+    float4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowmapAtlasBufferSize;
+    float TranslucentBasePass_Shared_ForwardISR_DirectionalLightDepthBias;
+    uint TranslucentBasePass_Shared_ForwardISR_DirectionalLightUseStaticShadowing;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_1128;
+    uint PrePadding_TranslucentBasePass_Shared_ForwardISR_1132;
+    float4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightStaticShadowBufferSize;
+    float4x4 TranslucentBasePass_Shared_ForwardISR_DirectionalLightWorldToStaticShadow;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1216;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1220;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1224;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1228;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1232;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1236;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1240;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1244;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1248;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1252;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1256;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1260;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1264;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1268;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1272;
+    float PrePadding_TranslucentBasePass_Shared_Reflection_1276;
+    float4 TranslucentBasePass_Shared_Reflection_SkyLightParameters;
+    float TranslucentBasePass_Shared_Reflection_SkyLightCubemapBrightness;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1300;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1304;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1308;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1312;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1316;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1320;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1324;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1328;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1332;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1336;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1340;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1344;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1348;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1352;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1356;
+    float4 TranslucentBasePass_Shared_PlanarReflection_ReflectionPlane;
+    float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionOrigin;
+    float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionXAxis;
+    float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionYAxis;
+    float3x4 TranslucentBasePass_Shared_PlanarReflection_InverseTransposeMirrorMatrix;
+    packed_float3 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionParameters;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1484;
+    float2 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionParameters2;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1496;
+    float PrePadding_TranslucentBasePass_Shared_PlanarReflection_1500;
+    float4x4 TranslucentBasePass_Shared_PlanarReflection_ProjectionWithExtraFOV[2];
+    float4 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionScreenScaleBias[2];
+    float2 TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionScreenBound;
+    uint TranslucentBasePass_Shared_PlanarReflection_bIsStereo;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1676;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1680;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1684;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1688;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1692;
+    float4 TranslucentBasePass_Shared_Fog_ExponentialFogParameters;
+    float4 TranslucentBasePass_Shared_Fog_ExponentialFogParameters2;
+    float4 TranslucentBasePass_Shared_Fog_ExponentialFogColorParameter;
+    float4 TranslucentBasePass_Shared_Fog_ExponentialFogParameters3;
+    float4 TranslucentBasePass_Shared_Fog_InscatteringLightDirection;
+    float4 TranslucentBasePass_Shared_Fog_DirectionalInscatteringColor;
+    float2 TranslucentBasePass_Shared_Fog_SinCosInscatteringColorCubemapRotation;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1800;
+    float PrePadding_TranslucentBasePass_Shared_Fog_1804;
+    packed_float3 TranslucentBasePass_Shared_Fog_FogInscatteringTextureParameters;
+    float TranslucentBasePass_Shared_Fog_ApplyVolumetricFog;
+    float PrePadding_TranslucentBasePass_1824;
+    float PrePadding_TranslucentBasePass_1828;
+    float PrePadding_TranslucentBasePass_1832;
+    float PrePadding_TranslucentBasePass_1836;
+    float PrePadding_TranslucentBasePass_1840;
+    float PrePadding_TranslucentBasePass_1844;
+    float PrePadding_TranslucentBasePass_1848;
+    float PrePadding_TranslucentBasePass_1852;
+    float PrePadding_TranslucentBasePass_1856;
+    float PrePadding_TranslucentBasePass_1860;
+    float PrePadding_TranslucentBasePass_1864;
+    float PrePadding_TranslucentBasePass_1868;
+    float PrePadding_TranslucentBasePass_1872;
+    float PrePadding_TranslucentBasePass_1876;
+    float PrePadding_TranslucentBasePass_1880;
+    float PrePadding_TranslucentBasePass_1884;
+    float PrePadding_TranslucentBasePass_1888;
+    float PrePadding_TranslucentBasePass_1892;
+    float PrePadding_TranslucentBasePass_1896;
+    float PrePadding_TranslucentBasePass_1900;
+    float PrePadding_TranslucentBasePass_1904;
+    float PrePadding_TranslucentBasePass_1908;
+    float PrePadding_TranslucentBasePass_1912;
+    float PrePadding_TranslucentBasePass_1916;
+    float PrePadding_TranslucentBasePass_1920;
+    float PrePadding_TranslucentBasePass_1924;
+    float PrePadding_TranslucentBasePass_1928;
+    float PrePadding_TranslucentBasePass_1932;
+    float PrePadding_TranslucentBasePass_1936;
+    float PrePadding_TranslucentBasePass_1940;
+    float PrePadding_TranslucentBasePass_1944;
+    float PrePadding_TranslucentBasePass_1948;
+    float PrePadding_TranslucentBasePass_1952;
+    float PrePadding_TranslucentBasePass_1956;
+    float PrePadding_TranslucentBasePass_1960;
+    float PrePadding_TranslucentBasePass_1964;
+    float PrePadding_TranslucentBasePass_1968;
+    float PrePadding_TranslucentBasePass_1972;
+    float PrePadding_TranslucentBasePass_1976;
+    float PrePadding_TranslucentBasePass_1980;
+    float PrePadding_TranslucentBasePass_1984;
+    float PrePadding_TranslucentBasePass_1988;
+    float PrePadding_TranslucentBasePass_1992;
+    float PrePadding_TranslucentBasePass_1996;
+    float PrePadding_TranslucentBasePass_2000;
+    float PrePadding_TranslucentBasePass_2004;
+    float PrePadding_TranslucentBasePass_2008;
+    float PrePadding_TranslucentBasePass_2012;
+    float PrePadding_TranslucentBasePass_2016;
+    float PrePadding_TranslucentBasePass_2020;
+    float PrePadding_TranslucentBasePass_2024;
+    float PrePadding_TranslucentBasePass_2028;
+    float PrePadding_TranslucentBasePass_2032;
+    float PrePadding_TranslucentBasePass_2036;
+    float PrePadding_TranslucentBasePass_2040;
+    float PrePadding_TranslucentBasePass_2044;
+    float PrePadding_TranslucentBasePass_2048;
+    float PrePadding_TranslucentBasePass_2052;
+    float PrePadding_TranslucentBasePass_2056;
+    float PrePadding_TranslucentBasePass_2060;
+    float PrePadding_TranslucentBasePass_2064;
+    float PrePadding_TranslucentBasePass_2068;
+    float PrePadding_TranslucentBasePass_2072;
+    float PrePadding_TranslucentBasePass_2076;
+    float PrePadding_TranslucentBasePass_2080;
+    float PrePadding_TranslucentBasePass_2084;
+    float PrePadding_TranslucentBasePass_2088;
+    float PrePadding_TranslucentBasePass_2092;
+    float PrePadding_TranslucentBasePass_2096;
+    float PrePadding_TranslucentBasePass_2100;
+    float PrePadding_TranslucentBasePass_2104;
+    float PrePadding_TranslucentBasePass_2108;
+    float PrePadding_TranslucentBasePass_2112;
+    float PrePadding_TranslucentBasePass_2116;
+    float PrePadding_TranslucentBasePass_2120;
+    float PrePadding_TranslucentBasePass_2124;
+    float PrePadding_TranslucentBasePass_2128;
+    float PrePadding_TranslucentBasePass_2132;
+    float PrePadding_TranslucentBasePass_2136;
+    float PrePadding_TranslucentBasePass_2140;
+    float4 TranslucentBasePass_HZBUvFactorAndInvFactor;
+    float4 TranslucentBasePass_PrevScreenPositionScaleBias;
+    float TranslucentBasePass_PrevSceneColorPreExposureInv;
+};
+
+struct type_Material
+{
+    float4 Material_VectorExpressions[2];
+    float4 Material_ScalarExpressions[1];
+};
+
+constant float _108 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+    uint gl_SampleMask [[sample_mask]];
+};
+
+struct main0_in
+{
+    float4 in_var_TEXCOORD10_centroid [[user(locn0)]];
+    float4 in_var_TEXCOORD11_centroid [[user(locn1)]];
+    uint in_var_PRIMITIVE_ID [[user(locn2)]];
+    float4 in_var_TEXCOORD7 [[user(locn3)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], const device type_StructuredBuffer_v4float& View_PrimitiveSceneData [[buffer(1)]], constant type_TranslucentBasePass& TranslucentBasePass [[buffer(2)]], constant type_Material& Material [[buffer(3)]], texture3d<float> TranslucentBasePass_Shared_Fog_IntegratedLightScattering [[texture(0)]], sampler View_SharedBilinearClampedSampler [[sampler(0)]], float4 gl_FragCoord [[position]], uint gl_SampleMaskIn [[sample_mask]])
+{
+    main0_out out = {};
+    float4 _137 = View.View_SVPositionToTranslatedWorld * float4(gl_FragCoord.xyz, 1.0);
+    float3 _142 = (_137.xyz / float3(_137.w)) - float3(View.View_PreViewTranslation);
+    bool _165 = TranslucentBasePass.TranslucentBasePass_Shared_Fog_ApplyVolumetricFog > 0.0;
+    float4 _215;
+    if (_165)
+    {
+        float4 _172 = View.View_WorldToClip * float4(_142, 1.0);
+        float _173 = _172.w;
+        float4 _202;
+        if (_165)
+        {
+            _202 = TranslucentBasePass_Shared_Fog_IntegratedLightScattering.sample(View_SharedBilinearClampedSampler, float3(((_172.xy / float2(_173)).xy * float2(0.5, -0.5)) + float2(0.5), (log2((_173 * View.View_VolumetricFogGridZParams[0]) + View.View_VolumetricFogGridZParams[1]) * View.View_VolumetricFogGridZParams[2]) * View.View_VolumetricFogInvGridSize[2]), level(0.0));
+        }
+        else
+        {
+            _202 = float4(0.0, 0.0, 0.0, 1.0);
+        }
+        _215 = float4(_202.xyz + (in.in_var_TEXCOORD7.xyz * float3(_202.w)), _202.w * in.in_var_TEXCOORD7.w);
+    }
+    else
+    {
+        _215 = in.in_var_TEXCOORD7;
+    }
+    float3 _216 = fast::max(Material.Material_VectorExpressions[1].xyz * float3(((1.0 + dot(float3(-1.0, -1.5, 3.0) / float3(sqrt(12.25)), fast::normalize(float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz) * fast::normalize((float3(0.0, 0.0, 1.0) * float3(View.View_NormalOverrideParameter.w)) + View.View_NormalOverrideParameter.xyz)))) * 0.5) + 0.20000000298023223876953125), float3(0.0));
+    float3 _246;
+    if (View.View_OutOfBoundsMask > 0.0)
+    {
+        uint _222 = in.in_var_PRIMITIVE_ID * 26u;
+        float3 _245;
+        if (any(abs(_142 - View_PrimitiveSceneData._m0[_222 + 5u].xyz) > (View_PrimitiveSceneData._m0[_222 + 19u].xyz + float3(1.0))))
+        {
+            _245 = mix(float3(1.0, 1.0, 0.0), float3(0.0, 1.0, 1.0), float3(float3(fract(dot(_142, float3(0.57700002193450927734375)) * 0.00200000009499490261077880859375)) > float3(0.5)));
+        }
+        else
+        {
+            _245 = _216;
+        }
+        _246 = _245;
+    }
+    else
+    {
+        _246 = _216;
+    }
+    float4 _255 = float4((_246 * float3(_215.w)) + _215.xyz, _108);
+    _255.w = 1.0;
+    float4 _268;
+    uint _269;
+    if (View.View_NumSceneColorMSAASamples > 1)
+    {
+        _268 = _255 * float4(float(View.View_NumSceneColorMSAASamples) * 0.25);
+        _269 = gl_SampleMaskIn & 15u;
+    }
+    else
+    {
+        _268 = _255;
+        _269 = gl_SampleMaskIn;
+    }
+    out.out_var_SV_Target0 = _268;
+    out.gl_SampleMask = _269;
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag b/reference/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag
new file mode 100644
index 00000000000..d7a1993dc32
--- /dev/null
+++ b/reference/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag
@@ -0,0 +1,213 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_Globals
+{
+    float3 SoftTransitionScale;
+    float4 ShadowBufferSize;
+    float ShadowFadeFraction;
+    float ShadowSharpen;
+    float4 LightPositionAndInvRadius;
+    float4x4 ScreenToShadowMatrix;
+    float2 ProjectionDepthBiasParameters;
+    float4 ModulatedShadowColor;
+    float4 ShadowTileOffsetAndSize;
+};
+
+constant float4 _58 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], float4 _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData [[color(0)]], texture2d<float> ShadowDepthTexture [[texture(0)]], sampler ShadowDepthTextureSampler [[sampler(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    float4 _67 = _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData;
+    float _68 = _67.w;
+    float4 _82 = _Globals.ScreenToShadowMatrix * float4((((gl_FragCoord.xy * View.View_BufferSizeAndInvSize.zw) - View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_68), _68, 1.0);
+    float _118 = fast::clamp(((fast::clamp((ShadowDepthTexture.sample(ShadowDepthTextureSampler, (((_82.xyz / float3(_82.w)).xy * _Globals.ShadowTileOffsetAndSize.zw).xy + _Globals.ShadowTileOffsetAndSize.xy).xy, level(0.0)).xxx * float3(_Globals.SoftTransitionScale.z)) - float3((fast::min(_82.z, 0.999989986419677734375) * _Globals.SoftTransitionScale.z) - 1.0), float3(0.0), float3(1.0)).x - 0.5) * _Globals.ShadowSharpen) + 0.5, 0.0, 1.0);
+    float3 _127 = mix(_Globals.ModulatedShadowColor.xyz, float3(1.0), float3(mix(1.0, _118 * _118, _Globals.ShadowFadeFraction)));
+    float4 _128 = float4(_127.x, _127.y, _127.z, _58.w);
+    _128.w = 0.0;
+    out.out_var_SV_Target0 = _128;
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag b/reference/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag
new file mode 100644
index 00000000000..d7a1993dc32
--- /dev/null
+++ b/reference/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag
@@ -0,0 +1,213 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_Globals
+{
+    float3 SoftTransitionScale;
+    float4 ShadowBufferSize;
+    float ShadowFadeFraction;
+    float ShadowSharpen;
+    float4 LightPositionAndInvRadius;
+    float4x4 ScreenToShadowMatrix;
+    float2 ProjectionDepthBiasParameters;
+    float4 ModulatedShadowColor;
+    float4 ShadowTileOffsetAndSize;
+};
+
+constant float4 _58 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], float4 _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData [[color(0)]], texture2d<float> ShadowDepthTexture [[texture(0)]], sampler ShadowDepthTextureSampler [[sampler(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    float4 _67 = _RESERVED_IDENTIFIER_FIXUP_gl_LastFragData;
+    float _68 = _67.w;
+    float4 _82 = _Globals.ScreenToShadowMatrix * float4((((gl_FragCoord.xy * View.View_BufferSizeAndInvSize.zw) - View.View_ScreenPositionScaleBias.wz) / View.View_ScreenPositionScaleBias.xy) * float2(_68), _68, 1.0);
+    float _118 = fast::clamp(((fast::clamp((ShadowDepthTexture.sample(ShadowDepthTextureSampler, (((_82.xyz / float3(_82.w)).xy * _Globals.ShadowTileOffsetAndSize.zw).xy + _Globals.ShadowTileOffsetAndSize.xy).xy, level(0.0)).xxx * float3(_Globals.SoftTransitionScale.z)) - float3((fast::min(_82.z, 0.999989986419677734375) * _Globals.SoftTransitionScale.z) - 1.0), float3(0.0), float3(1.0)).x - 0.5) * _Globals.ShadowSharpen) + 0.5, 0.0, 1.0);
+    float3 _127 = mix(_Globals.ModulatedShadowColor.xyz, float3(1.0), float3(mix(1.0, _118 * _118, _Globals.ShadowFadeFraction)));
+    float4 _128 = float4(_127.x, _127.y, _127.z, _58.w);
+    _128.w = 0.0;
+    out.out_var_SV_Target0 = _128;
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag b/reference/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag
new file mode 100644
index 00000000000..575c9ddb5e9
--- /dev/null
+++ b/reference/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag
@@ -0,0 +1,130 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_StructuredBuffer_v4float
+{
+    float4 _m0[1];
+};
+
+struct type_Globals
+{
+    uint2 ShadowTileListGroupSize;
+};
+
+constant float3 _70 = {};
+
+struct spvDescriptorSetBuffer0
+{
+    const device type_StructuredBuffer_v4float* CulledObjectBoxBounds [[id(0)]];
+    constant type_Globals* _Globals [[id(1)]];
+    texture2d<uint> RWShadowTileNumCulledObjects [[id(2)]];
+    device atomic_uint* RWShadowTileNumCulledObjects_atomic [[id(3)]];
+};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    uint in_var_TEXCOORD0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    uint2 _77 = uint2(gl_FragCoord.xy);
+    uint _78 = _77.y;
+    uint _83 = _77.x;
+    float2 _91 = float2(float(_83), float(((*spvDescriptorSet0._Globals).ShadowTileListGroupSize.y - 1u) - _78));
+    float2 _93 = float2((*spvDescriptorSet0._Globals).ShadowTileListGroupSize);
+    float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
+    float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
+    float3 _101 = float3(_100.x, _100.y, _70.z);
+    _101.z = 1.0;
+    uint _103 = in.in_var_TEXCOORD0 * 5u;
+    uint _107 = _103 + 1u;
+    if (all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xy > _96.xy) && all((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz < _101))
+    {
+        float3 _121 = float3(0.5) * ((*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103].xyz + (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_107].xyz);
+        float _122 = _96.x;
+        float _123 = _96.y;
+        spvUnsafeArray<float3, 8> _73;
+        _73[0] = float3(_122, _123, -1000.0);
+        float _126 = _100.x;
+        _73[1] = float3(_126, _123, -1000.0);
+        float _129 = _100.y;
+        _73[2] = float3(_122, _129, -1000.0);
+        _73[3] = float3(_126, _129, -1000.0);
+        _73[4] = float3(_122, _123, 1.0);
+        _73[5] = float3(_126, _123, 1.0);
+        _73[6] = float3(_122, _129, 1.0);
+        _73[7] = float3(_126, _129, 1.0);
+        float3 _155;
+        float3 _158;
+        _155 = float3(-500000.0);
+        _158 = float3(500000.0);
+        for (int _160 = 0; _160 < 8; )
+        {
+            float3 _166 = _73[_160] - _121;
+            float3 _170 = float3(dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 2u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 3u].xyz), dot(_166, (*spvDescriptorSet0.CulledObjectBoxBounds)._m0[_103 + 4u].xyz));
+            _155 = fast::max(_155, _170);
+            _158 = fast::min(_158, _170);
+            _160++;
+            continue;
+        }
+        if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0)))
+        {
+            uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&spvDescriptorSet0.RWShadowTileNumCulledObjects_atomic[(_78 * (*spvDescriptorSet0._Globals).ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
+        }
+    }
+    out.out_var_SV_Target0 = float4(0.0);
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/frag/texture-atomics.asm.frag b/reference/shaders-ue4/asm/frag/texture-atomics.asm.frag
new file mode 100644
index 00000000000..0918dfbfefc
--- /dev/null
+++ b/reference/shaders-ue4/asm/frag/texture-atomics.asm.frag
@@ -0,0 +1,122 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_StructuredBuffer_v4float
+{
+    float4 _m0[1];
+};
+
+struct type_Globals
+{
+    uint2 ShadowTileListGroupSize;
+};
+
+constant float3 _70 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    uint in_var_TEXCOORD0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(0)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    uint2 _77 = uint2(gl_FragCoord.xy);
+    uint _78 = _77.y;
+    uint _83 = _77.x;
+    float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78));
+    float2 _93 = float2(_Globals.ShadowTileListGroupSize);
+    float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
+    float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
+    float3 _101 = float3(_100.x, _100.y, _70.z);
+    _101.z = 1.0;
+    uint _103 = in.in_var_TEXCOORD0 * 5u;
+    uint _107 = _103 + 1u;
+    if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _101))
+    {
+        float3 _121 = float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz);
+        float _122 = _96.x;
+        float _123 = _96.y;
+        spvUnsafeArray<float3, 8> _73;
+        _73[0] = float3(_122, _123, -1000.0);
+        float _126 = _100.x;
+        _73[1] = float3(_126, _123, -1000.0);
+        float _129 = _100.y;
+        _73[2] = float3(_122, _129, -1000.0);
+        _73[3] = float3(_126, _129, -1000.0);
+        _73[4] = float3(_122, _123, 1.0);
+        _73[5] = float3(_126, _123, 1.0);
+        _73[6] = float3(_122, _129, 1.0);
+        _73[7] = float3(_126, _129, 1.0);
+        float3 _155;
+        float3 _158;
+        _155 = float3(-500000.0);
+        _158 = float3(500000.0);
+        for (int _160 = 0; _160 < 8; )
+        {
+            float3 _166 = _73[_160] - _121;
+            float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
+            _155 = fast::max(_155, _170);
+            _158 = fast::min(_158, _170);
+            _160++;
+            continue;
+        }
+        if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0)))
+        {
+            uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
+        }
+    }
+    out.out_var_SV_Target0 = float4(0.0);
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag b/reference/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag
new file mode 100644
index 00000000000..0918dfbfefc
--- /dev/null
+++ b/reference/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag
@@ -0,0 +1,122 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_StructuredBuffer_v4float
+{
+    float4 _m0[1];
+};
+
+struct type_Globals
+{
+    uint2 ShadowTileListGroupSize;
+};
+
+constant float3 _70 = {};
+
+struct main0_out
+{
+    float4 out_var_SV_Target0 [[color(0)]];
+};
+
+struct main0_in
+{
+    uint in_var_TEXCOORD0 [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredBuffer_v4float& CulledObjectBoxBounds [[buffer(0)]], constant type_Globals& _Globals [[buffer(1)]], texture2d<uint> RWShadowTileNumCulledObjects [[texture(0)]], device atomic_uint* RWShadowTileNumCulledObjects_atomic [[buffer(2)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    uint2 _77 = uint2(gl_FragCoord.xy);
+    uint _78 = _77.y;
+    uint _83 = _77.x;
+    float2 _91 = float2(float(_83), float((_Globals.ShadowTileListGroupSize.y - 1u) - _78));
+    float2 _93 = float2(_Globals.ShadowTileListGroupSize);
+    float2 _96 = ((_91 / _93) * float2(2.0)) - float2(1.0);
+    float2 _100 = (((_91 + float2(1.0)) / _93) * float2(2.0)) - float2(1.0);
+    float3 _101 = float3(_100.x, _100.y, _70.z);
+    _101.z = 1.0;
+    uint _103 = in.in_var_TEXCOORD0 * 5u;
+    uint _107 = _103 + 1u;
+    if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _101))
+    {
+        float3 _121 = float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz);
+        float _122 = _96.x;
+        float _123 = _96.y;
+        spvUnsafeArray<float3, 8> _73;
+        _73[0] = float3(_122, _123, -1000.0);
+        float _126 = _100.x;
+        _73[1] = float3(_126, _123, -1000.0);
+        float _129 = _100.y;
+        _73[2] = float3(_122, _129, -1000.0);
+        _73[3] = float3(_126, _129, -1000.0);
+        _73[4] = float3(_122, _123, 1.0);
+        _73[5] = float3(_126, _123, 1.0);
+        _73[6] = float3(_122, _129, 1.0);
+        _73[7] = float3(_126, _129, 1.0);
+        float3 _155;
+        float3 _158;
+        _155 = float3(-500000.0);
+        _158 = float3(500000.0);
+        for (int _160 = 0; _160 < 8; )
+        {
+            float3 _166 = _73[_160] - _121;
+            float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz));
+            _155 = fast::max(_155, _170);
+            _158 = fast::min(_158, _170);
+            _160++;
+            continue;
+        }
+        if (all(_158 < float3(1.0)) && all(_155 > float3(-1.0)))
+        {
+            uint _179 = atomic_fetch_add_explicit((device atomic_uint*)&RWShadowTileNumCulledObjects_atomic[(_78 * _Globals.ShadowTileListGroupSize.x) + _83], 1u, memory_order_relaxed);
+        }
+    }
+    out.out_var_SV_Target0 = float4(0.0);
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc b/reference/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc
new file mode 100644
index 00000000000..1d0212593bb
--- /dev/null
+++ b/reference/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc
@@ -0,0 +1,396 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct FVertexFactoryInterpolantsVSToPS
+{
+    float4 TangentToWorld0;
+    float4 TangentToWorld2;
+    float4 Color;
+    spvUnsafeArray<float4, 1> TexCoords;
+    float4 LightMapCoordinate;
+    uint PrimitiveId;
+    uint LightmapDataIndex;
+};
+
+struct FVertexFactoryInterpolantsVSToDS
+{
+    FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS;
+};
+
+struct FSharedBasePassInterpolants
+{
+};
+struct FBasePassInterpolantsVSToDS
+{
+    FSharedBasePassInterpolants _m0;
+};
+
+struct FBasePassVSToDS
+{
+    FVertexFactoryInterpolantsVSToDS FactoryInterpolants;
+    FBasePassInterpolantsVSToDS BasePassInterpolants;
+    float4 Position;
+};
+
+struct FPNTessellationHSToDS
+{
+    FBasePassVSToDS PassSpecificData;
+    spvUnsafeArray<float4, 3> WorldPosition;
+    float3 DisplacementScale;
+    float TessellationMultiplier;
+    float WorldDisplacementMultiplier;
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_StructuredBuffer_v4float
+{
+    float4 _m0[1];
+};
+
+constant float4 _142 = {};
+
+struct main0_out
+{
+    float4 out_var_COLOR0;
+    uint out_var_LIGHTMAP_ID;
+    float3 out_var_PN_DisplacementScales;
+    spvUnsafeArray<float4, 3> out_var_PN_POSITION;
+    float out_var_PN_TessellationMultiplier;
+    float out_var_PN_WorldDisplacementMultiplier;
+    uint out_var_PRIMITIVE_ID;
+    spvUnsafeArray<float4, 1> out_var_TEXCOORD0;
+    float4 out_var_TEXCOORD10_centroid;
+    float4 out_var_TEXCOORD11_centroid;
+    float4 out_var_TEXCOORD4;
+    float4 out_var_VS_To_DS_Position;
+};
+
+struct main0_patchOut
+{
+    float4 out_var_PN_POSITION9;
+};
+
+struct main0_in
+{
+    float4 in_var_TEXCOORD10_centroid [[attribute(0)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(1)]];
+    float4 in_var_COLOR0 [[attribute(2)]];
+    float4 in_var_TEXCOORD0_0 [[attribute(3)]];
+    float4 in_var_TEXCOORD4 [[attribute(4)]];
+    uint in_var_PRIMITIVE_ID [[attribute(5)]];
+    uint in_var_LIGHTMAP_ID [[attribute(6)]];
+    float4 in_var_VS_To_DS_Position [[attribute(7)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], const device type_StructuredBuffer_v4float& View_PrimitiveSceneData [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 3)
+        return;
+    spvUnsafeArray<float4, 12> _144 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid, gl_in[3].in_var_TEXCOORD10_centroid, gl_in[4].in_var_TEXCOORD10_centroid, gl_in[5].in_var_TEXCOORD10_centroid, gl_in[6].in_var_TEXCOORD10_centroid, gl_in[7].in_var_TEXCOORD10_centroid, gl_in[8].in_var_TEXCOORD10_centroid, gl_in[9].in_var_TEXCOORD10_centroid, gl_in[10].in_var_TEXCOORD10_centroid, gl_in[11].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 12> _145 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid, gl_in[3].in_var_TEXCOORD11_centroid, gl_in[4].in_var_TEXCOORD11_centroid, gl_in[5].in_var_TEXCOORD11_centroid, gl_in[6].in_var_TEXCOORD11_centroid, gl_in[7].in_var_TEXCOORD11_centroid, gl_in[8].in_var_TEXCOORD11_centroid, gl_in[9].in_var_TEXCOORD11_centroid, gl_in[10].in_var_TEXCOORD11_centroid, gl_in[11].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<float4, 12> _146 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_COLOR0, gl_in[1].in_var_COLOR0, gl_in[2].in_var_COLOR0, gl_in[3].in_var_COLOR0, gl_in[4].in_var_COLOR0, gl_in[5].in_var_COLOR0, gl_in[6].in_var_COLOR0, gl_in[7].in_var_COLOR0, gl_in[8].in_var_COLOR0, gl_in[9].in_var_COLOR0, gl_in[10].in_var_COLOR0, gl_in[11].in_var_COLOR0 });
+    spvUnsafeArray<spvUnsafeArray<float4, 1>, 12> _147 = spvUnsafeArray<spvUnsafeArray<float4, 1>, 12>({ spvUnsafeArray<float4, 1>({ gl_in[0].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[1].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[2].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[3].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[4].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[5].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[6].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[7].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[8].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[9].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[10].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ gl_in[11].in_var_TEXCOORD0_0 }) });
+    spvUnsafeArray<float4, 12> _148 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD4, gl_in[1].in_var_TEXCOORD4, gl_in[2].in_var_TEXCOORD4, gl_in[3].in_var_TEXCOORD4, gl_in[4].in_var_TEXCOORD4, gl_in[5].in_var_TEXCOORD4, gl_in[6].in_var_TEXCOORD4, gl_in[7].in_var_TEXCOORD4, gl_in[8].in_var_TEXCOORD4, gl_in[9].in_var_TEXCOORD4, gl_in[10].in_var_TEXCOORD4, gl_in[11].in_var_TEXCOORD4 });
+    spvUnsafeArray<uint, 12> _149 = spvUnsafeArray<uint, 12>({ gl_in[0].in_var_PRIMITIVE_ID, gl_in[1].in_var_PRIMITIVE_ID, gl_in[2].in_var_PRIMITIVE_ID, gl_in[3].in_var_PRIMITIVE_ID, gl_in[4].in_var_PRIMITIVE_ID, gl_in[5].in_var_PRIMITIVE_ID, gl_in[6].in_var_PRIMITIVE_ID, gl_in[7].in_var_PRIMITIVE_ID, gl_in[8].in_var_PRIMITIVE_ID, gl_in[9].in_var_PRIMITIVE_ID, gl_in[10].in_var_PRIMITIVE_ID, gl_in[11].in_var_PRIMITIVE_ID });
+    spvUnsafeArray<uint, 12> _150 = spvUnsafeArray<uint, 12>({ gl_in[0].in_var_LIGHTMAP_ID, gl_in[1].in_var_LIGHTMAP_ID, gl_in[2].in_var_LIGHTMAP_ID, gl_in[3].in_var_LIGHTMAP_ID, gl_in[4].in_var_LIGHTMAP_ID, gl_in[5].in_var_LIGHTMAP_ID, gl_in[6].in_var_LIGHTMAP_ID, gl_in[7].in_var_LIGHTMAP_ID, gl_in[8].in_var_LIGHTMAP_ID, gl_in[9].in_var_LIGHTMAP_ID, gl_in[10].in_var_LIGHTMAP_ID, gl_in[11].in_var_LIGHTMAP_ID });
+    spvUnsafeArray<float4, 12> _259 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position, gl_in[3].in_var_VS_To_DS_Position, gl_in[4].in_var_VS_To_DS_Position, gl_in[5].in_var_VS_To_DS_Position, gl_in[6].in_var_VS_To_DS_Position, gl_in[7].in_var_VS_To_DS_Position, gl_in[8].in_var_VS_To_DS_Position, gl_in[9].in_var_VS_To_DS_Position, gl_in[10].in_var_VS_To_DS_Position, gl_in[11].in_var_VS_To_DS_Position });
+    spvUnsafeArray<FBasePassVSToDS, 12> _284 = spvUnsafeArray<FBasePassVSToDS, 12>({ FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[0], _145[0], _146[0], _147[0], _148[0], _149[0], _150[0] } }, FBasePassInterpolantsVSToDS{ { } }, _259[0] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[1], _145[1], _146[1], _147[1], _148[1], _149[1], _150[1] } }, FBasePassInterpolantsVSToDS{ { } }, _259[1] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[2], _145[2], _146[2], _147[2], _148[2], _149[2], _150[2] } }, FBasePassInterpolantsVSToDS{ { } }, _259[2] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[3], _145[3], _146[3], _147[3], _148[3], _149[3], _150[3] } }, FBasePassInterpolantsVSToDS{ { } }, _259[3] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[4], _145[4], _146[4], _147[4], _148[4], _149[4], _150[4] } }, FBasePassInterpolantsVSToDS{ { } }, _259[4] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[5], _145[5], _146[5], _147[5], _148[5], _149[5], _150[5] } }, FBasePassInterpolantsVSToDS{ { } }, _259[5] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[6], _145[6], _146[6], _147[6], _148[6], _149[6], _150[6] } }, FBasePassInterpolantsVSToDS{ { } }, _259[6] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[7], _145[7], _146[7], _147[7], _148[7], _149[7], _150[7] } }, FBasePassInterpolantsVSToDS{ { } }, _259[7] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[8], _145[8], _146[8], _147[8], _148[8], _149[8], _150[8] } }, FBasePassInterpolantsVSToDS{ { } }, _259[8] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[9], _145[9], _146[9], _147[9], _148[9], _149[9], _150[9] } }, FBasePassInterpolantsVSToDS{ { } }, _259[9] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[10], _145[10], _146[10], _147[10], _148[10], _149[10], _150[10] } }, FBasePassInterpolantsVSToDS{ { } }, _259[10] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _144[11], _145[11], _146[11], _147[11], _148[11], _149[11], _150[11] } }, FBasePassInterpolantsVSToDS{ { } }, _259[11] } });
+    spvUnsafeArray<FBasePassVSToDS, 12> param_var_I;
+    param_var_I = _284;
+    float4 _301 = float4(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float3 _310 = View_PrimitiveSceneData._m0[(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.PrimitiveId * 26u) + 22u].xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz);
+    uint _313 = (gl_InvocationID < 2u) ? (gl_InvocationID + 1u) : 0u;
+    uint _314 = 2u * gl_InvocationID;
+    uint _315 = 3u + _314;
+    uint _316 = _314 + 4u;
+    float4 _328 = float4(param_var_I[_313].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _336 = float4(param_var_I[_315].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _344 = float4(param_var_I[_316].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    spvUnsafeArray<float4, 3> _392 = spvUnsafeArray<float4, 3>({ param_var_I[gl_InvocationID].Position, (((((float4(2.0) * param_var_I[gl_InvocationID].Position) + param_var_I[_313].Position) - (float4(dot(param_var_I[_313].Position - param_var_I[gl_InvocationID].Position, _301)) * _301)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_315].Position) + param_var_I[_316].Position) - (float4(dot(param_var_I[_316].Position - param_var_I[_315].Position, _336)) * _336)) * float4(0.3333333432674407958984375))) * float4(0.5), (((((float4(2.0) * param_var_I[_313].Position) + param_var_I[gl_InvocationID].Position) - (float4(dot(param_var_I[gl_InvocationID].Position - param_var_I[_313].Position, _328)) * _328)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_316].Position) + param_var_I[_315].Position) - (float4(dot(param_var_I[_315].Position - param_var_I[_316].Position, _344)) * _344)) * float4(0.3333333432674407958984375))) * float4(0.5) });
+    gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+    gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+    gl_out[gl_InvocationID].out_var_COLOR0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.Color;
+    gl_out[gl_InvocationID].out_var_TEXCOORD0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TexCoords;
+    gl_out[gl_InvocationID].out_var_TEXCOORD4 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.LightMapCoordinate;
+    gl_out[gl_InvocationID].out_var_PRIMITIVE_ID = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.PrimitiveId;
+    gl_out[gl_InvocationID].out_var_LIGHTMAP_ID = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.LightmapDataIndex;
+    gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position;
+    gl_out[gl_InvocationID].out_var_PN_POSITION = _392;
+    gl_out[gl_InvocationID].out_var_PN_DisplacementScales = _310;
+    gl_out[gl_InvocationID].out_var_PN_TessellationMultiplier = 1.0;
+    gl_out[gl_InvocationID].out_var_PN_WorldDisplacementMultiplier = 1.0;
+    temp_var_hullMainRetVal[gl_InvocationID] = FPNTessellationHSToDS{ param_var_I[gl_InvocationID], _392, _310, 1.0, 1.0 };
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
+    if (gl_InvocationID == 0u)
+    {
+        float4 _450 = (((((temp_var_hullMainRetVal[0u].WorldPosition[1] + temp_var_hullMainRetVal[0u].WorldPosition[2]) + temp_var_hullMainRetVal[1u].WorldPosition[1]) + temp_var_hullMainRetVal[1u].WorldPosition[2]) + temp_var_hullMainRetVal[2u].WorldPosition[1]) + temp_var_hullMainRetVal[2u].WorldPosition[2]) * float4(0.16666667163372039794921875);
+        float4 _463;
+        _463.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        _463.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier);
+        _463.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier);
+        _463.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        float4 _589;
+        for (;;)
+        {
+            float4 _489 = View.View_ViewToClip * float4(0.0);
+            float4 _494 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[0u].WorldPosition[0].xyz, 1.0);
+            float3 _495 = _494.xyz;
+            float3 _496 = _489.xyz;
+            float _498 = _494.w;
+            float _499 = _489.w;
+            float4 _516 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[1u].WorldPosition[0].xyz, 1.0);
+            float3 _517 = _516.xyz;
+            float _519 = _516.w;
+            float4 _537 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0);
+            float3 _538 = _537.xyz;
+            float _540 = _537.w;
+            if (any((((int3((_495 - _496) < float3(_498 + _499)) + (int3(2) * int3((_495 + _496) > float3((-_498) - _499)))) | (int3((_517 - _496) < float3(_519 + _499)) + (int3(2) * int3((_517 + _496) > float3((-_519) - _499))))) | (int3((_538 - _496) < float3(_540 + _499)) + (int3(2) * int3((_538 + _496) > float3((-_540) - _499))))) != int3(3)))
+            {
+                _589 = float4(0.0);
+                break;
+            }
+            float3 _558 = temp_var_hullMainRetVal[0u].WorldPosition[0].xyz - temp_var_hullMainRetVal[1u].WorldPosition[0].xyz;
+            float3 _559 = temp_var_hullMainRetVal[1u].WorldPosition[0].xyz - temp_var_hullMainRetVal[2u].WorldPosition[0].xyz;
+            float3 _560 = temp_var_hullMainRetVal[2u].WorldPosition[0].xyz - temp_var_hullMainRetVal[0u].WorldPosition[0].xyz;
+            float3 _563 = (float3(0.5) * (temp_var_hullMainRetVal[0u].WorldPosition[0].xyz + temp_var_hullMainRetVal[1u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float3 _566 = (float3(0.5) * (temp_var_hullMainRetVal[1u].WorldPosition[0].xyz + temp_var_hullMainRetVal[2u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float3 _569 = (float3(0.5) * (temp_var_hullMainRetVal[2u].WorldPosition[0].xyz + temp_var_hullMainRetVal[0u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float _573 = sqrt(dot(_559, _559) / dot(_566, _566));
+            float _577 = sqrt(dot(_560, _560) / dot(_569, _569));
+            float _581 = sqrt(dot(_558, _558) / dot(_563, _563));
+            float4 _582 = float4(_573, _577, _581, 1.0);
+            _582.w = 0.333000004291534423828125 * ((_573 + _577) + _581);
+            _589 = float4(View.View_AdaptiveTessellationFactor) * _582;
+            break;
+        }
+        float4 _591 = fast::clamp(_463 * _589, float4(1.0), float4(15.0));
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_591.x);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_591.y);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_591.z);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_591.w);
+        patchOut.out_var_PN_POSITION9 = _450 + ((_450 - (((temp_var_hullMainRetVal[2u].WorldPosition[0] + temp_var_hullMainRetVal[1u].WorldPosition[0]) + temp_var_hullMainRetVal[0u].WorldPosition[0]) * float4(0.3333333432674407958984375))) * float4(0.5));
+    }
+}
+
diff --git a/reference/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc b/reference/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc
new file mode 100644
index 00000000000..f72e5d3b753
--- /dev/null
+++ b/reference/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc
@@ -0,0 +1,464 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct FVertexFactoryInterpolantsVSToPS
+{
+    float4 TangentToWorld0;
+    float4 TangentToWorld2;
+};
+
+struct FVertexFactoryInterpolantsVSToDS
+{
+    FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS;
+};
+
+struct FHitProxyVSToDS
+{
+    FVertexFactoryInterpolantsVSToDS FactoryInterpolants;
+    float4 Position;
+    uint VertexID;
+};
+
+struct FHullShaderConstantDominantVertexData
+{
+    float2 UV;
+    float4 Normal;
+    float3 Tangent;
+};
+
+struct FHullShaderConstantDominantEdgeData
+{
+    float2 UV0;
+    float2 UV1;
+    float4 Normal0;
+    float4 Normal1;
+    float3 Tangent0;
+    float3 Tangent1;
+};
+
+struct FPNTessellationHSToDS
+{
+    FHitProxyVSToDS PassSpecificData;
+    spvUnsafeArray<float4, 3> WorldPosition;
+    float3 DisplacementScale;
+    float TessellationMultiplier;
+    float WorldDisplacementMultiplier;
+    FHullShaderConstantDominantVertexData DominantVertex;
+    FHullShaderConstantDominantEdgeData DominantEdge;
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_ClipToWorld;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_908;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_924;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_940;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_956;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_972;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_1020;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_1036;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_1052;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1068;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1724;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1740;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1756;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2076;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2148;
+    float PrePadding_View_2152;
+    float PrePadding_View_2156;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2228;
+    float PrePadding_View_2232;
+    float PrePadding_View_2236;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2268;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2412;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    float View_AtmosphericFogSunDiscHalfApexAngleRadian;
+    float PrePadding_View_2492;
+    float4 View_AtmosphericFogSunDiscLuminance;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    uint PrePadding_View_2520;
+    uint PrePadding_View_2524;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2584;
+    float PrePadding_View_2588;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2744;
+    float PrePadding_View_2748;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float PrePadding_View_2908;
+    int2 View_CursorPosition;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    float PrePadding_View_2924;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2940;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2956;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2972;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2988;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_3004;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_Primitive
+{
+    float4x4 Primitive_LocalToWorld;
+    float4 Primitive_InvNonUniformScaleAndDeterminantSign;
+    float4 Primitive_ObjectWorldPositionAndRadius;
+    float4x4 Primitive_WorldToLocal;
+    float4x4 Primitive_PreviousLocalToWorld;
+    float4x4 Primitive_PreviousWorldToLocal;
+    packed_float3 Primitive_ActorWorldPosition;
+    float Primitive_UseSingleSampleShadowFromStationaryLights;
+    packed_float3 Primitive_ObjectBounds;
+    float Primitive_LpvBiasMultiplier;
+    float Primitive_DecalReceiverMask;
+    float Primitive_PerObjectGBufferData;
+    float Primitive_UseVolumetricLightmapShadowFromStationaryLights;
+    float Primitive_DrawsVelocity;
+    float4 Primitive_ObjectOrientation;
+    float4 Primitive_NonUniformScale;
+    packed_float3 Primitive_LocalObjectBoundsMin;
+    uint Primitive_LightingChannelMask;
+    packed_float3 Primitive_LocalObjectBoundsMax;
+    uint Primitive_LightmapDataIndex;
+    packed_float3 Primitive_PreSkinnedLocalBounds;
+    int Primitive_SingleCaptureIndex;
+    uint Primitive_OutputVelocity;
+    uint PrePadding_Primitive_420;
+    uint PrePadding_Primitive_424;
+    uint PrePadding_Primitive_428;
+    float4 Primitive_CustomPrimitiveData[4];
+};
+
+constant float4 _140 = {};
+
+struct main0_out
+{
+    float3 out_var_PN_DisplacementScales;
+    float2 out_var_PN_DominantEdge;
+    float2 out_var_PN_DominantEdge1;
+    float4 out_var_PN_DominantEdge2;
+    float4 out_var_PN_DominantEdge3;
+    float3 out_var_PN_DominantEdge4;
+    float3 out_var_PN_DominantEdge5;
+    float2 out_var_PN_DominantVertex;
+    float4 out_var_PN_DominantVertex1;
+    float3 out_var_PN_DominantVertex2;
+    spvUnsafeArray<float4, 3> out_var_PN_POSITION;
+    float out_var_PN_TessellationMultiplier;
+    float out_var_PN_WorldDisplacementMultiplier;
+    float4 out_var_TEXCOORD10_centroid;
+    float4 out_var_TEXCOORD11_centroid;
+    float4 out_var_VS_To_DS_Position;
+    uint out_var_VS_To_DS_VertexID;
+};
+
+struct main0_patchOut
+{
+    float4 out_var_PN_POSITION9;
+};
+
+struct main0_in
+{
+    float4 in_var_TEXCOORD10_centroid [[attribute(0)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(1)]];
+    float4 in_var_VS_To_DS_Position [[attribute(2)]];
+    uint in_var_VS_To_DS_VertexID [[attribute(3)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 3)
+        return;
+    spvUnsafeArray<float4, 12> _142 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid, gl_in[3].in_var_TEXCOORD10_centroid, gl_in[4].in_var_TEXCOORD10_centroid, gl_in[5].in_var_TEXCOORD10_centroid, gl_in[6].in_var_TEXCOORD10_centroid, gl_in[7].in_var_TEXCOORD10_centroid, gl_in[8].in_var_TEXCOORD10_centroid, gl_in[9].in_var_TEXCOORD10_centroid, gl_in[10].in_var_TEXCOORD10_centroid, gl_in[11].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 12> _143 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid, gl_in[3].in_var_TEXCOORD11_centroid, gl_in[4].in_var_TEXCOORD11_centroid, gl_in[5].in_var_TEXCOORD11_centroid, gl_in[6].in_var_TEXCOORD11_centroid, gl_in[7].in_var_TEXCOORD11_centroid, gl_in[8].in_var_TEXCOORD11_centroid, gl_in[9].in_var_TEXCOORD11_centroid, gl_in[10].in_var_TEXCOORD11_centroid, gl_in[11].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<float4, 12> _192 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position, gl_in[3].in_var_VS_To_DS_Position, gl_in[4].in_var_VS_To_DS_Position, gl_in[5].in_var_VS_To_DS_Position, gl_in[6].in_var_VS_To_DS_Position, gl_in[7].in_var_VS_To_DS_Position, gl_in[8].in_var_VS_To_DS_Position, gl_in[9].in_var_VS_To_DS_Position, gl_in[10].in_var_VS_To_DS_Position, gl_in[11].in_var_VS_To_DS_Position });
+    spvUnsafeArray<uint, 12> _193 = spvUnsafeArray<uint, 12>({ gl_in[0].in_var_VS_To_DS_VertexID, gl_in[1].in_var_VS_To_DS_VertexID, gl_in[2].in_var_VS_To_DS_VertexID, gl_in[3].in_var_VS_To_DS_VertexID, gl_in[4].in_var_VS_To_DS_VertexID, gl_in[5].in_var_VS_To_DS_VertexID, gl_in[6].in_var_VS_To_DS_VertexID, gl_in[7].in_var_VS_To_DS_VertexID, gl_in[8].in_var_VS_To_DS_VertexID, gl_in[9].in_var_VS_To_DS_VertexID, gl_in[10].in_var_VS_To_DS_VertexID, gl_in[11].in_var_VS_To_DS_VertexID });
+    spvUnsafeArray<FHitProxyVSToDS, 12> _230 = spvUnsafeArray<FHitProxyVSToDS, 12>({ FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[0], _143[0] } }, _192[0], _193[0] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[1], _143[1] } }, _192[1], _193[1] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[2], _143[2] } }, _192[2], _193[2] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[3], _143[3] } }, _192[3], _193[3] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[4], _143[4] } }, _192[4], _193[4] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[5], _143[5] } }, _192[5], _193[5] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[6], _143[6] } }, _192[6], _193[6] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[7], _143[7] } }, _192[7], _193[7] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[8], _143[8] } }, _192[8], _193[8] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[9], _143[9] } }, _192[9], _193[9] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[10], _143[10] } }, _192[10], _193[10] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _142[11], _143[11] } }, _192[11], _193[11] } });
+    spvUnsafeArray<FHitProxyVSToDS, 12> param_var_I;
+    param_var_I = _230;
+    float4 _247 = float4(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float3 _251 = Primitive.Primitive_NonUniformScale.xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz);
+    uint _254 = (gl_InvocationID < 2u) ? (gl_InvocationID + 1u) : 0u;
+    uint _255 = 2u * gl_InvocationID;
+    uint _256 = 3u + _255;
+    uint _257 = _255 + 4u;
+    uint _264 = (_254 < 2u) ? (_254 + 1u) : 0u;
+    uint _265 = 2u * _254;
+    uint _266 = 3u + _265;
+    uint _267 = _265 + 4u;
+    float4 _279 = float4(param_var_I[9u + gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _315;
+    float4 _316;
+    float4 _317;
+    float4 _318;
+    if ((param_var_I[_266].VertexID < param_var_I[_254].VertexID) || ((param_var_I[_266].VertexID == param_var_I[_254].VertexID) && (param_var_I[_267].VertexID < param_var_I[_264].VertexID)))
+    {
+        _315 = param_var_I[_267].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+        _316 = param_var_I[_267].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+        _317 = param_var_I[_266].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+        _318 = param_var_I[_266].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+    }
+    else
+    {
+        _315 = param_var_I[_264].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+        _316 = param_var_I[_264].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+        _317 = param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+        _318 = param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+    }
+    float4 _324 = float4(_318.xyz, 0.0);
+    float4 _328 = float4(_316.xyz, 0.0);
+    float4 _336 = float4(param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _344 = float4(param_var_I[_256].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _352 = float4(param_var_I[_257].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    spvUnsafeArray<float4, 3> _402 = spvUnsafeArray<float4, 3>({ param_var_I[gl_InvocationID].Position, (((((float4(2.0) * param_var_I[gl_InvocationID].Position) + param_var_I[_254].Position) - (float4(dot(param_var_I[_254].Position - param_var_I[gl_InvocationID].Position, _247)) * _247)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_256].Position) + param_var_I[_257].Position) - (float4(dot(param_var_I[_257].Position - param_var_I[_256].Position, _344)) * _344)) * float4(0.3333333432674407958984375))) * float4(0.5), (((((float4(2.0) * param_var_I[_254].Position) + param_var_I[gl_InvocationID].Position) - (float4(dot(param_var_I[gl_InvocationID].Position - param_var_I[_254].Position, _336)) * _336)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_257].Position) + param_var_I[_256].Position) - (float4(dot(param_var_I[_256].Position - param_var_I[_257].Position, _352)) * _352)) * float4(0.3333333432674407958984375))) * float4(0.5) });
+    gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+    gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+    gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position;
+    gl_out[gl_InvocationID].out_var_VS_To_DS_VertexID = param_var_I[gl_InvocationID].VertexID;
+    gl_out[gl_InvocationID].out_var_PN_POSITION = _402;
+    gl_out[gl_InvocationID].out_var_PN_DisplacementScales = _251;
+    gl_out[gl_InvocationID].out_var_PN_TessellationMultiplier = 1.0;
+    gl_out[gl_InvocationID].out_var_PN_WorldDisplacementMultiplier = 1.0;
+    gl_out[gl_InvocationID].out_var_PN_DominantVertex = float2(0.0);
+    gl_out[gl_InvocationID].out_var_PN_DominantVertex1 = _279;
+    gl_out[gl_InvocationID].out_var_PN_DominantVertex2 = param_var_I[9u + gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz;
+    gl_out[gl_InvocationID].out_var_PN_DominantEdge = float2(0.0);
+    gl_out[gl_InvocationID].out_var_PN_DominantEdge1 = float2(0.0);
+    gl_out[gl_InvocationID].out_var_PN_DominantEdge2 = _324;
+    gl_out[gl_InvocationID].out_var_PN_DominantEdge3 = _328;
+    gl_out[gl_InvocationID].out_var_PN_DominantEdge4 = _317.xyz;
+    gl_out[gl_InvocationID].out_var_PN_DominantEdge5 = _315.xyz;
+    temp_var_hullMainRetVal[gl_InvocationID] = FPNTessellationHSToDS{ param_var_I[gl_InvocationID], _402, _251, 1.0, 1.0, FHullShaderConstantDominantVertexData{ float2(0.0), _279, param_var_I[9u + gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz }, FHullShaderConstantDominantEdgeData{ float2(0.0), float2(0.0), _324, _328, _317.xyz, _315.xyz } };
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
+    if (gl_InvocationID == 0u)
+    {
+        float4 _461 = (((((temp_var_hullMainRetVal[0u].WorldPosition[1] + temp_var_hullMainRetVal[0u].WorldPosition[2]) + temp_var_hullMainRetVal[1u].WorldPosition[1]) + temp_var_hullMainRetVal[1u].WorldPosition[2]) + temp_var_hullMainRetVal[2u].WorldPosition[1]) + temp_var_hullMainRetVal[2u].WorldPosition[2]) * float4(0.16666667163372039794921875);
+        float4 _474;
+        _474.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        _474.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier);
+        _474.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier);
+        _474.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        float4 _600;
+        for (;;)
+        {
+            float4 _500 = View.View_ViewToClip * float4(0.0);
+            float4 _505 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[0u].WorldPosition[0].xyz, 1.0);
+            float3 _506 = _505.xyz;
+            float3 _507 = _500.xyz;
+            float _509 = _505.w;
+            float _510 = _500.w;
+            float4 _527 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[1u].WorldPosition[0].xyz, 1.0);
+            float3 _528 = _527.xyz;
+            float _530 = _527.w;
+            float4 _548 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0);
+            float3 _549 = _548.xyz;
+            float _551 = _548.w;
+            if (any((((int3((_506 - _507) < float3(_509 + _510)) + (int3(2) * int3((_506 + _507) > float3((-_509) - _510)))) | (int3((_528 - _507) < float3(_530 + _510)) + (int3(2) * int3((_528 + _507) > float3((-_530) - _510))))) | (int3((_549 - _507) < float3(_551 + _510)) + (int3(2) * int3((_549 + _507) > float3((-_551) - _510))))) != int3(3)))
+            {
+                _600 = float4(0.0);
+                break;
+            }
+            float3 _569 = temp_var_hullMainRetVal[0u].WorldPosition[0].xyz - temp_var_hullMainRetVal[1u].WorldPosition[0].xyz;
+            float3 _570 = temp_var_hullMainRetVal[1u].WorldPosition[0].xyz - temp_var_hullMainRetVal[2u].WorldPosition[0].xyz;
+            float3 _571 = temp_var_hullMainRetVal[2u].WorldPosition[0].xyz - temp_var_hullMainRetVal[0u].WorldPosition[0].xyz;
+            float3 _574 = (float3(0.5) * (temp_var_hullMainRetVal[0u].WorldPosition[0].xyz + temp_var_hullMainRetVal[1u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float3 _577 = (float3(0.5) * (temp_var_hullMainRetVal[1u].WorldPosition[0].xyz + temp_var_hullMainRetVal[2u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float3 _580 = (float3(0.5) * (temp_var_hullMainRetVal[2u].WorldPosition[0].xyz + temp_var_hullMainRetVal[0u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float _584 = sqrt(dot(_570, _570) / dot(_577, _577));
+            float _588 = sqrt(dot(_571, _571) / dot(_580, _580));
+            float _592 = sqrt(dot(_569, _569) / dot(_574, _574));
+            float4 _593 = float4(_584, _588, _592, 1.0);
+            _593.w = 0.333000004291534423828125 * ((_584 + _588) + _592);
+            _600 = float4(View.View_AdaptiveTessellationFactor) * _593;
+            break;
+        }
+        float4 _602 = fast::clamp(_474 * _600, float4(1.0), float4(15.0));
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_602.x);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_602.y);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_602.z);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_602.w);
+        patchOut.out_var_PN_POSITION9 = _461 + ((_461 - (((temp_var_hullMainRetVal[2u].WorldPosition[0] + temp_var_hullMainRetVal[1u].WorldPosition[0]) + temp_var_hullMainRetVal[0u].WorldPosition[0]) * float4(0.3333333432674407958984375))) * float4(0.5));
+    }
+}
+
diff --git a/reference/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc b/reference/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc
new file mode 100644
index 00000000000..5d4e320bd04
--- /dev/null
+++ b/reference/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc
@@ -0,0 +1,408 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct FVertexFactoryInterpolantsVSToPS
+{
+    float4 TangentToWorld0;
+    float4 TangentToWorld2;
+    float4 Color;
+    spvUnsafeArray<float2, 2> TexCoords;
+};
+
+struct FVertexFactoryInterpolantsVSToDS
+{
+    FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS;
+};
+
+struct FHitProxyVSToDS
+{
+    FVertexFactoryInterpolantsVSToDS FactoryInterpolants;
+    float4 Position;
+};
+
+struct FPNTessellationHSToDS
+{
+    FHitProxyVSToDS PassSpecificData;
+    spvUnsafeArray<float4, 3> WorldPosition;
+    float3 DisplacementScale;
+    float TessellationMultiplier;
+    float WorldDisplacementMultiplier;
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_ClipToWorld;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_908;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_924;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_940;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_956;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_972;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_1020;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_1036;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_1052;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1068;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1724;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1740;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1756;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2076;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2148;
+    float PrePadding_View_2152;
+    float PrePadding_View_2156;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2228;
+    float PrePadding_View_2232;
+    float PrePadding_View_2236;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2268;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2412;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    float View_AtmosphericFogSunDiscHalfApexAngleRadian;
+    float PrePadding_View_2492;
+    float4 View_AtmosphericFogSunDiscLuminance;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    uint PrePadding_View_2520;
+    uint PrePadding_View_2524;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2584;
+    float PrePadding_View_2588;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2744;
+    float PrePadding_View_2748;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float PrePadding_View_2908;
+    int2 View_CursorPosition;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    float PrePadding_View_2924;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2940;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2956;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2972;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2988;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_3004;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_Primitive
+{
+    float4x4 Primitive_LocalToWorld;
+    float4 Primitive_InvNonUniformScaleAndDeterminantSign;
+    float4 Primitive_ObjectWorldPositionAndRadius;
+    float4x4 Primitive_WorldToLocal;
+    float4x4 Primitive_PreviousLocalToWorld;
+    float4x4 Primitive_PreviousWorldToLocal;
+    packed_float3 Primitive_ActorWorldPosition;
+    float Primitive_UseSingleSampleShadowFromStationaryLights;
+    packed_float3 Primitive_ObjectBounds;
+    float Primitive_LpvBiasMultiplier;
+    float Primitive_DecalReceiverMask;
+    float Primitive_PerObjectGBufferData;
+    float Primitive_UseVolumetricLightmapShadowFromStationaryLights;
+    float Primitive_DrawsVelocity;
+    float4 Primitive_ObjectOrientation;
+    float4 Primitive_NonUniformScale;
+    packed_float3 Primitive_LocalObjectBoundsMin;
+    uint Primitive_LightingChannelMask;
+    packed_float3 Primitive_LocalObjectBoundsMax;
+    uint Primitive_LightmapDataIndex;
+    packed_float3 Primitive_PreSkinnedLocalBounds;
+    int Primitive_SingleCaptureIndex;
+    uint Primitive_OutputVelocity;
+    uint PrePadding_Primitive_420;
+    uint PrePadding_Primitive_424;
+    uint PrePadding_Primitive_428;
+    float4 Primitive_CustomPrimitiveData[4];
+};
+
+constant float4 _127 = {};
+
+struct main0_out
+{
+    float4 out_var_COLOR0;
+    float3 out_var_PN_DisplacementScales;
+    spvUnsafeArray<float4, 3> out_var_PN_POSITION;
+    float out_var_PN_TessellationMultiplier;
+    float out_var_PN_WorldDisplacementMultiplier;
+    spvUnsafeArray<float2, 2> out_var_TEXCOORD0;
+    float4 out_var_TEXCOORD10_centroid;
+    float4 out_var_TEXCOORD11_centroid;
+    float4 out_var_VS_To_DS_Position;
+};
+
+struct main0_patchOut
+{
+    float4 out_var_PN_POSITION9;
+};
+
+struct main0_in
+{
+    float4 in_var_TEXCOORD10_centroid [[attribute(0)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(1)]];
+    float4 in_var_COLOR0 [[attribute(2)]];
+    float2 in_var_TEXCOORD0_0 [[attribute(3)]];
+    float2 in_var_TEXCOORD0_1 [[attribute(4)]];
+    float4 in_var_VS_To_DS_Position [[attribute(5)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device main0_patchOut* spvPatchOut [[buffer(27)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    threadgroup FPNTessellationHSToDS temp_var_hullMainRetVal[3];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
+    device main0_patchOut& patchOut = spvPatchOut[gl_PrimitiveID];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 3)
+        return;
+    spvUnsafeArray<float4, 12> _129 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid, gl_in[3].in_var_TEXCOORD10_centroid, gl_in[4].in_var_TEXCOORD10_centroid, gl_in[5].in_var_TEXCOORD10_centroid, gl_in[6].in_var_TEXCOORD10_centroid, gl_in[7].in_var_TEXCOORD10_centroid, gl_in[8].in_var_TEXCOORD10_centroid, gl_in[9].in_var_TEXCOORD10_centroid, gl_in[10].in_var_TEXCOORD10_centroid, gl_in[11].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 12> _130 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid, gl_in[3].in_var_TEXCOORD11_centroid, gl_in[4].in_var_TEXCOORD11_centroid, gl_in[5].in_var_TEXCOORD11_centroid, gl_in[6].in_var_TEXCOORD11_centroid, gl_in[7].in_var_TEXCOORD11_centroid, gl_in[8].in_var_TEXCOORD11_centroid, gl_in[9].in_var_TEXCOORD11_centroid, gl_in[10].in_var_TEXCOORD11_centroid, gl_in[11].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<float4, 12> _131 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_COLOR0, gl_in[1].in_var_COLOR0, gl_in[2].in_var_COLOR0, gl_in[3].in_var_COLOR0, gl_in[4].in_var_COLOR0, gl_in[5].in_var_COLOR0, gl_in[6].in_var_COLOR0, gl_in[7].in_var_COLOR0, gl_in[8].in_var_COLOR0, gl_in[9].in_var_COLOR0, gl_in[10].in_var_COLOR0, gl_in[11].in_var_COLOR0 });
+    spvUnsafeArray<spvUnsafeArray<float2, 2>, 12> _132 = spvUnsafeArray<spvUnsafeArray<float2, 2>, 12>({ spvUnsafeArray<float2, 2>({ gl_in[0].in_var_TEXCOORD0_0, gl_in[0].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[1].in_var_TEXCOORD0_0, gl_in[1].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[2].in_var_TEXCOORD0_0, gl_in[2].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[3].in_var_TEXCOORD0_0, gl_in[3].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[4].in_var_TEXCOORD0_0, gl_in[4].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[5].in_var_TEXCOORD0_0, gl_in[5].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[6].in_var_TEXCOORD0_0, gl_in[6].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[7].in_var_TEXCOORD0_0, gl_in[7].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[8].in_var_TEXCOORD0_0, gl_in[8].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[9].in_var_TEXCOORD0_0, gl_in[9].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[10].in_var_TEXCOORD0_0, gl_in[10].in_var_TEXCOORD0_1 }), spvUnsafeArray<float2, 2>({ gl_in[11].in_var_TEXCOORD0_0, gl_in[11].in_var_TEXCOORD0_1 }) });
+    spvUnsafeArray<float4, 12> _205 = spvUnsafeArray<float4, 12>({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position, gl_in[3].in_var_VS_To_DS_Position, gl_in[4].in_var_VS_To_DS_Position, gl_in[5].in_var_VS_To_DS_Position, gl_in[6].in_var_VS_To_DS_Position, gl_in[7].in_var_VS_To_DS_Position, gl_in[8].in_var_VS_To_DS_Position, gl_in[9].in_var_VS_To_DS_Position, gl_in[10].in_var_VS_To_DS_Position, gl_in[11].in_var_VS_To_DS_Position });
+    spvUnsafeArray<FHitProxyVSToDS, 12> _230 = spvUnsafeArray<FHitProxyVSToDS, 12>({ FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[0], _130[0], _131[0], _132[0] } }, _205[0] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[1], _130[1], _131[1], _132[1] } }, _205[1] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[2], _130[2], _131[2], _132[2] } }, _205[2] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[3], _130[3], _131[3], _132[3] } }, _205[3] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[4], _130[4], _131[4], _132[4] } }, _205[4] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[5], _130[5], _131[5], _132[5] } }, _205[5] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[6], _130[6], _131[6], _132[6] } }, _205[6] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[7], _130[7], _131[7], _132[7] } }, _205[7] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[8], _130[8], _131[8], _132[8] } }, _205[8] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[9], _130[9], _131[9], _132[9] } }, _205[9] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[10], _130[10], _131[10], _132[10] } }, _205[10] }, FHitProxyVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _129[11], _130[11], _131[11], _132[11] } }, _205[11] } });
+    spvUnsafeArray<FHitProxyVSToDS, 12> param_var_I;
+    param_var_I = _230;
+    float4 _247 = float4(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float3 _251 = Primitive.Primitive_NonUniformScale.xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz);
+    uint _254 = (gl_InvocationID < 2u) ? (gl_InvocationID + 1u) : 0u;
+    uint _255 = 2u * gl_InvocationID;
+    uint _256 = 3u + _255;
+    uint _257 = _255 + 4u;
+    float4 _269 = float4(param_var_I[_254].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _277 = float4(param_var_I[_256].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    float4 _285 = float4(param_var_I[_257].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, 0.0);
+    spvUnsafeArray<float4, 3> _333 = spvUnsafeArray<float4, 3>({ param_var_I[gl_InvocationID].Position, (((((float4(2.0) * param_var_I[gl_InvocationID].Position) + param_var_I[_254].Position) - (float4(dot(param_var_I[_254].Position - param_var_I[gl_InvocationID].Position, _247)) * _247)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_256].Position) + param_var_I[_257].Position) - (float4(dot(param_var_I[_257].Position - param_var_I[_256].Position, _277)) * _277)) * float4(0.3333333432674407958984375))) * float4(0.5), (((((float4(2.0) * param_var_I[_254].Position) + param_var_I[gl_InvocationID].Position) - (float4(dot(param_var_I[gl_InvocationID].Position - param_var_I[_254].Position, _269)) * _269)) * float4(0.3333333432674407958984375)) + ((((float4(2.0) * param_var_I[_257].Position) + param_var_I[_256].Position) - (float4(dot(param_var_I[_256].Position - param_var_I[_257].Position, _285)) * _285)) * float4(0.3333333432674407958984375))) * float4(0.5) });
+    gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+    gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+    gl_out[gl_InvocationID].out_var_COLOR0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.Color;
+    gl_out[gl_InvocationID].out_var_TEXCOORD0 = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TexCoords;
+    gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position;
+    gl_out[gl_InvocationID].out_var_PN_POSITION = _333;
+    gl_out[gl_InvocationID].out_var_PN_DisplacementScales = _251;
+    gl_out[gl_InvocationID].out_var_PN_TessellationMultiplier = 1.0;
+    gl_out[gl_InvocationID].out_var_PN_WorldDisplacementMultiplier = 1.0;
+    temp_var_hullMainRetVal[gl_InvocationID] = FPNTessellationHSToDS{ param_var_I[gl_InvocationID], _333, _251, 1.0, 1.0 };
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
+    if (gl_InvocationID == 0u)
+    {
+        float4 _385 = (((((temp_var_hullMainRetVal[0u].WorldPosition[1] + temp_var_hullMainRetVal[0u].WorldPosition[2]) + temp_var_hullMainRetVal[1u].WorldPosition[1]) + temp_var_hullMainRetVal[1u].WorldPosition[2]) + temp_var_hullMainRetVal[2u].WorldPosition[1]) + temp_var_hullMainRetVal[2u].WorldPosition[2]) * float4(0.16666667163372039794921875);
+        float4 _398;
+        _398.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        _398.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier);
+        _398.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier);
+        _398.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        float4 _524;
+        for (;;)
+        {
+            float4 _424 = View.View_ViewToClip * float4(0.0);
+            float4 _429 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[0u].WorldPosition[0].xyz, 1.0);
+            float3 _430 = _429.xyz;
+            float3 _431 = _424.xyz;
+            float _433 = _429.w;
+            float _434 = _424.w;
+            float4 _451 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[1u].WorldPosition[0].xyz, 1.0);
+            float3 _452 = _451.xyz;
+            float _454 = _451.w;
+            float4 _472 = View.View_TranslatedWorldToClip * float4(temp_var_hullMainRetVal[2u].WorldPosition[0].xyz, 1.0);
+            float3 _473 = _472.xyz;
+            float _475 = _472.w;
+            if (any((((int3((_430 - _431) < float3(_433 + _434)) + (int3(2) * int3((_430 + _431) > float3((-_433) - _434)))) | (int3((_452 - _431) < float3(_454 + _434)) + (int3(2) * int3((_452 + _431) > float3((-_454) - _434))))) | (int3((_473 - _431) < float3(_475 + _434)) + (int3(2) * int3((_473 + _431) > float3((-_475) - _434))))) != int3(3)))
+            {
+                _524 = float4(0.0);
+                break;
+            }
+            float3 _493 = temp_var_hullMainRetVal[0u].WorldPosition[0].xyz - temp_var_hullMainRetVal[1u].WorldPosition[0].xyz;
+            float3 _494 = temp_var_hullMainRetVal[1u].WorldPosition[0].xyz - temp_var_hullMainRetVal[2u].WorldPosition[0].xyz;
+            float3 _495 = temp_var_hullMainRetVal[2u].WorldPosition[0].xyz - temp_var_hullMainRetVal[0u].WorldPosition[0].xyz;
+            float3 _498 = (float3(0.5) * (temp_var_hullMainRetVal[0u].WorldPosition[0].xyz + temp_var_hullMainRetVal[1u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float3 _501 = (float3(0.5) * (temp_var_hullMainRetVal[1u].WorldPosition[0].xyz + temp_var_hullMainRetVal[2u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float3 _504 = (float3(0.5) * (temp_var_hullMainRetVal[2u].WorldPosition[0].xyz + temp_var_hullMainRetVal[0u].WorldPosition[0].xyz)) - float3(View.View_TranslatedWorldCameraOrigin);
+            float _508 = sqrt(dot(_494, _494) / dot(_501, _501));
+            float _512 = sqrt(dot(_495, _495) / dot(_504, _504));
+            float _516 = sqrt(dot(_493, _493) / dot(_498, _498));
+            float4 _517 = float4(_508, _512, _516, 1.0);
+            _517.w = 0.333000004291534423828125 * ((_508 + _512) + _516);
+            _524 = float4(View.View_AdaptiveTessellationFactor) * _517;
+            break;
+        }
+        float4 _526 = fast::clamp(_398 * _524, float4(1.0), float4(15.0));
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_526.x);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_526.y);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_526.z);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_526.w);
+        patchOut.out_var_PN_POSITION9 = _385 + ((_385 - (((temp_var_hullMainRetVal[2u].WorldPosition[0] + temp_var_hullMainRetVal[1u].WorldPosition[0]) + temp_var_hullMainRetVal[0u].WorldPosition[0]) * float4(0.3333333432674407958984375))) * float4(0.5));
+    }
+}
+
diff --git a/reference/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc b/reference/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc
new file mode 100644
index 00000000000..9ae81e40615
--- /dev/null
+++ b/reference/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc
@@ -0,0 +1,175 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct FVertexFactoryInterpolantsVSToPS
+{
+    float4 TangentToWorld0;
+    float4 TangentToWorld2;
+};
+
+struct FVertexFactoryInterpolantsVSToDS
+{
+    FVertexFactoryInterpolantsVSToPS InterpolantsVSToPS;
+};
+
+struct FSharedBasePassInterpolants
+{
+};
+struct FBasePassInterpolantsVSToDS
+{
+    FSharedBasePassInterpolants _m0;
+};
+
+struct FBasePassVSToDS
+{
+    FVertexFactoryInterpolantsVSToDS FactoryInterpolants;
+    FBasePassInterpolantsVSToDS BasePassInterpolants;
+    float4 Position;
+};
+
+struct FFlatTessellationHSToDS
+{
+    FBasePassVSToDS PassSpecificData;
+    float3 DisplacementScale;
+    float TessellationMultiplier;
+    float WorldDisplacementMultiplier;
+};
+
+struct type_Primitive
+{
+    float4x4 Primitive_LocalToWorld;
+    float4 Primitive_InvNonUniformScaleAndDeterminantSign;
+    float4 Primitive_ObjectWorldPositionAndRadius;
+    float4x4 Primitive_WorldToLocal;
+    float4x4 Primitive_PreviousLocalToWorld;
+    float4x4 Primitive_PreviousWorldToLocal;
+    packed_float3 Primitive_ActorWorldPosition;
+    float Primitive_UseSingleSampleShadowFromStationaryLights;
+    packed_float3 Primitive_ObjectBounds;
+    float Primitive_LpvBiasMultiplier;
+    float Primitive_DecalReceiverMask;
+    float Primitive_PerObjectGBufferData;
+    float Primitive_UseVolumetricLightmapShadowFromStationaryLights;
+    float Primitive_DrawsVelocity;
+    float4 Primitive_ObjectOrientation;
+    float4 Primitive_NonUniformScale;
+    packed_float3 Primitive_LocalObjectBoundsMin;
+    uint Primitive_LightingChannelMask;
+    packed_float3 Primitive_LocalObjectBoundsMax;
+    uint Primitive_LightmapDataIndex;
+    packed_float3 Primitive_PreSkinnedLocalBounds;
+    int Primitive_SingleCaptureIndex;
+    uint Primitive_OutputVelocity;
+    uint PrePadding_Primitive_420;
+    uint PrePadding_Primitive_424;
+    uint PrePadding_Primitive_428;
+    float4 Primitive_CustomPrimitiveData[4];
+};
+
+struct type_Material
+{
+    float4 Material_VectorExpressions[3];
+    float4 Material_ScalarExpressions[1];
+};
+
+constant float4 _88 = {};
+
+struct main0_out
+{
+    float3 out_var_Flat_DisplacementScales;
+    float out_var_Flat_TessellationMultiplier;
+    float out_var_Flat_WorldDisplacementMultiplier;
+    float4 out_var_TEXCOORD10_centroid;
+    float4 out_var_TEXCOORD11_centroid;
+    float4 out_var_VS_To_DS_Position;
+};
+
+struct main0_in
+{
+    float4 in_var_TEXCOORD10_centroid [[attribute(0)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(1)]];
+    float4 in_var_VS_To_DS_Position [[attribute(2)]];
+};
+
+kernel void main0(main0_in in [[stage_in]], constant type_Primitive& Primitive [[buffer(0)]], constant type_Material& Material [[buffer(1)]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]])
+{
+    threadgroup FFlatTessellationHSToDS temp_var_hullMainRetVal[3];
+    device main0_out* gl_out = &spvOut[gl_PrimitiveID * 3];
+    if (gl_InvocationID < spvIndirectParams[0])
+        gl_in[gl_InvocationID] = in;
+    threadgroup_barrier(mem_flags::mem_threadgroup);
+    if (gl_InvocationID >= 3)
+        return;
+    spvUnsafeArray<float4, 3> _90 = spvUnsafeArray<float4, 3>({ gl_in[0].in_var_TEXCOORD10_centroid, gl_in[1].in_var_TEXCOORD10_centroid, gl_in[2].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 3> _91 = spvUnsafeArray<float4, 3>({ gl_in[0].in_var_TEXCOORD11_centroid, gl_in[1].in_var_TEXCOORD11_centroid, gl_in[2].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<float4, 3> _104 = spvUnsafeArray<float4, 3>({ gl_in[0].in_var_VS_To_DS_Position, gl_in[1].in_var_VS_To_DS_Position, gl_in[2].in_var_VS_To_DS_Position });
+    spvUnsafeArray<FBasePassVSToDS, 3> _111 = spvUnsafeArray<FBasePassVSToDS, 3>({ FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _90[0], _91[0] } }, FBasePassInterpolantsVSToDS{ { } }, _104[0] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _90[1], _91[1] } }, FBasePassInterpolantsVSToDS{ { } }, _104[1] }, FBasePassVSToDS{ FVertexFactoryInterpolantsVSToDS{ FVertexFactoryInterpolantsVSToPS{ _90[2], _91[2] } }, FBasePassInterpolantsVSToDS{ { } }, _104[2] } });
+    spvUnsafeArray<FBasePassVSToDS, 3> param_var_I;
+    param_var_I = _111;
+    float3 _128 = Primitive.Primitive_NonUniformScale.xyz * float3x3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz, cross(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz, param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0.xyz) * float3(param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.w), param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2.xyz);
+    gl_out[gl_InvocationID].out_var_TEXCOORD10_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld0;
+    gl_out[gl_InvocationID].out_var_TEXCOORD11_centroid = param_var_I[gl_InvocationID].FactoryInterpolants.InterpolantsVSToPS.TangentToWorld2;
+    gl_out[gl_InvocationID].out_var_VS_To_DS_Position = param_var_I[gl_InvocationID].Position;
+    gl_out[gl_InvocationID].out_var_Flat_DisplacementScales = _128;
+    gl_out[gl_InvocationID].out_var_Flat_TessellationMultiplier = Material.Material_ScalarExpressions[0].x;
+    gl_out[gl_InvocationID].out_var_Flat_WorldDisplacementMultiplier = 1.0;
+    temp_var_hullMainRetVal[gl_InvocationID] = FFlatTessellationHSToDS{ param_var_I[gl_InvocationID], _128, Material.Material_ScalarExpressions[0].x, 1.0 };
+    threadgroup_barrier(mem_flags::mem_device | mem_flags::mem_threadgroup);
+    if (gl_InvocationID == 0u)
+    {
+        float4 _154;
+        _154.x = 0.5 * (temp_var_hullMainRetVal[1u].TessellationMultiplier + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        _154.y = 0.5 * (temp_var_hullMainRetVal[2u].TessellationMultiplier + temp_var_hullMainRetVal[0u].TessellationMultiplier);
+        _154.z = 0.5 * (temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier);
+        _154.w = 0.333000004291534423828125 * ((temp_var_hullMainRetVal[0u].TessellationMultiplier + temp_var_hullMainRetVal[1u].TessellationMultiplier) + temp_var_hullMainRetVal[2u].TessellationMultiplier);
+        float4 _173 = fast::clamp(_154, float4(1.0), float4(15.0));
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0u] = half(_173.x);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1u] = half(_173.y);
+        spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[2u] = half(_173.z);
+        spvTessLevel[gl_PrimitiveID].insideTessellationFactor = half(_173.w);
+    }
+}
+
diff --git a/reference/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese b/reference/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese
new file mode 100644
index 00000000000..612100604d7
--- /dev/null
+++ b/reference/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese
@@ -0,0 +1,419 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_ClipToWorld;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_908;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_924;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_940;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_956;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_972;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_1020;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_1036;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_1052;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1068;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1724;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1740;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1756;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2076;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2148;
+    float PrePadding_View_2152;
+    float PrePadding_View_2156;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2228;
+    float PrePadding_View_2232;
+    float PrePadding_View_2236;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2268;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2412;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    float View_AtmosphericFogSunDiscHalfApexAngleRadian;
+    float PrePadding_View_2492;
+    float4 View_AtmosphericFogSunDiscLuminance;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    uint PrePadding_View_2520;
+    uint PrePadding_View_2524;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2584;
+    float PrePadding_View_2588;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2744;
+    float PrePadding_View_2748;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float PrePadding_View_2908;
+    int2 View_CursorPosition;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    float PrePadding_View_2924;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2940;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2956;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2972;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2988;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_3004;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_ShadowDepthPass
+{
+    float PrePadding_ShadowDepthPass_LPV_0;
+    float PrePadding_ShadowDepthPass_LPV_4;
+    float PrePadding_ShadowDepthPass_LPV_8;
+    float PrePadding_ShadowDepthPass_LPV_12;
+    float PrePadding_ShadowDepthPass_LPV_16;
+    float PrePadding_ShadowDepthPass_LPV_20;
+    float PrePadding_ShadowDepthPass_LPV_24;
+    float PrePadding_ShadowDepthPass_LPV_28;
+    float PrePadding_ShadowDepthPass_LPV_32;
+    float PrePadding_ShadowDepthPass_LPV_36;
+    float PrePadding_ShadowDepthPass_LPV_40;
+    float PrePadding_ShadowDepthPass_LPV_44;
+    float PrePadding_ShadowDepthPass_LPV_48;
+    float PrePadding_ShadowDepthPass_LPV_52;
+    float PrePadding_ShadowDepthPass_LPV_56;
+    float PrePadding_ShadowDepthPass_LPV_60;
+    float PrePadding_ShadowDepthPass_LPV_64;
+    float PrePadding_ShadowDepthPass_LPV_68;
+    float PrePadding_ShadowDepthPass_LPV_72;
+    float PrePadding_ShadowDepthPass_LPV_76;
+    float PrePadding_ShadowDepthPass_LPV_80;
+    float PrePadding_ShadowDepthPass_LPV_84;
+    float PrePadding_ShadowDepthPass_LPV_88;
+    float PrePadding_ShadowDepthPass_LPV_92;
+    float PrePadding_ShadowDepthPass_LPV_96;
+    float PrePadding_ShadowDepthPass_LPV_100;
+    float PrePadding_ShadowDepthPass_LPV_104;
+    float PrePadding_ShadowDepthPass_LPV_108;
+    float PrePadding_ShadowDepthPass_LPV_112;
+    float PrePadding_ShadowDepthPass_LPV_116;
+    float PrePadding_ShadowDepthPass_LPV_120;
+    float PrePadding_ShadowDepthPass_LPV_124;
+    float PrePadding_ShadowDepthPass_LPV_128;
+    float PrePadding_ShadowDepthPass_LPV_132;
+    float PrePadding_ShadowDepthPass_LPV_136;
+    float PrePadding_ShadowDepthPass_LPV_140;
+    float PrePadding_ShadowDepthPass_LPV_144;
+    float PrePadding_ShadowDepthPass_LPV_148;
+    float PrePadding_ShadowDepthPass_LPV_152;
+    float PrePadding_ShadowDepthPass_LPV_156;
+    float PrePadding_ShadowDepthPass_LPV_160;
+    float PrePadding_ShadowDepthPass_LPV_164;
+    float PrePadding_ShadowDepthPass_LPV_168;
+    float PrePadding_ShadowDepthPass_LPV_172;
+    float PrePadding_ShadowDepthPass_LPV_176;
+    float PrePadding_ShadowDepthPass_LPV_180;
+    float PrePadding_ShadowDepthPass_LPV_184;
+    float PrePadding_ShadowDepthPass_LPV_188;
+    float PrePadding_ShadowDepthPass_LPV_192;
+    float PrePadding_ShadowDepthPass_LPV_196;
+    float PrePadding_ShadowDepthPass_LPV_200;
+    float PrePadding_ShadowDepthPass_LPV_204;
+    float PrePadding_ShadowDepthPass_LPV_208;
+    float PrePadding_ShadowDepthPass_LPV_212;
+    float PrePadding_ShadowDepthPass_LPV_216;
+    float PrePadding_ShadowDepthPass_LPV_220;
+    float PrePadding_ShadowDepthPass_LPV_224;
+    float PrePadding_ShadowDepthPass_LPV_228;
+    float PrePadding_ShadowDepthPass_LPV_232;
+    float PrePadding_ShadowDepthPass_LPV_236;
+    float PrePadding_ShadowDepthPass_LPV_240;
+    float PrePadding_ShadowDepthPass_LPV_244;
+    float PrePadding_ShadowDepthPass_LPV_248;
+    float PrePadding_ShadowDepthPass_LPV_252;
+    float PrePadding_ShadowDepthPass_LPV_256;
+    float PrePadding_ShadowDepthPass_LPV_260;
+    float PrePadding_ShadowDepthPass_LPV_264;
+    float PrePadding_ShadowDepthPass_LPV_268;
+    float4x4 ShadowDepthPass_LPV_mRsmToWorld;
+    float4 ShadowDepthPass_LPV_mLightColour;
+    float4 ShadowDepthPass_LPV_GeometryVolumeCaptureLightDirection;
+    float4 ShadowDepthPass_LPV_mEyePos;
+    packed_int3 ShadowDepthPass_LPV_mOldGridOffset;
+    int PrePadding_ShadowDepthPass_LPV_396;
+    packed_int3 ShadowDepthPass_LPV_mLpvGridOffset;
+    float ShadowDepthPass_LPV_ClearMultiplier;
+    float ShadowDepthPass_LPV_LpvScale;
+    float ShadowDepthPass_LPV_OneOverLpvScale;
+    float ShadowDepthPass_LPV_DirectionalOcclusionIntensity;
+    float ShadowDepthPass_LPV_DirectionalOcclusionRadius;
+    float ShadowDepthPass_LPV_RsmAreaIntensityMultiplier;
+    float ShadowDepthPass_LPV_RsmPixelToTexcoordMultiplier;
+    float ShadowDepthPass_LPV_SecondaryOcclusionStrength;
+    float ShadowDepthPass_LPV_SecondaryBounceStrength;
+    float ShadowDepthPass_LPV_VplInjectionBias;
+    float ShadowDepthPass_LPV_GeometryVolumeInjectionBias;
+    float ShadowDepthPass_LPV_EmissiveInjectionMultiplier;
+    int ShadowDepthPass_LPV_PropagationIndex;
+    float4x4 ShadowDepthPass_ProjectionMatrix;
+    float4x4 ShadowDepthPass_ViewMatrix;
+    float4 ShadowDepthPass_ShadowParams;
+    float ShadowDepthPass_bClampToNearPlane;
+    float PrePadding_ShadowDepthPass_612;
+    float PrePadding_ShadowDepthPass_616;
+    float PrePadding_ShadowDepthPass_620;
+    float4x4 ShadowDepthPass_ShadowViewProjectionMatrices[6];
+    float4x4 ShadowDepthPass_ShadowViewMatrices[6];
+};
+
+constant float4 _113 = {};
+
+struct main0_out
+{
+    float4 out_var_TEXCOORD10_centroid [[user(locn0)]];
+    float4 out_var_TEXCOORD11_centroid [[user(locn1)]];
+    float4 out_var_COLOR0 [[user(locn2)]];
+    float4 out_var_TEXCOORD0_0 [[user(locn3)]];
+    uint out_var_PRIMITIVE_ID [[user(locn4)]];
+    float out_var_TEXCOORD6 [[user(locn5)]];
+    float out_var_TEXCOORD8 [[user(locn6)]];
+    float3 out_var_TEXCOORD7 [[user(locn7)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 in_var_COLOR0 [[attribute(0)]];
+    float4 in_var_PN_POSITION_0 [[attribute(2)]];
+    float4 in_var_PN_POSITION_1 [[attribute(3)]];
+    float4 in_var_PN_POSITION_2 [[attribute(4)]];
+    float in_var_PN_WorldDisplacementMultiplier [[attribute(7)]];
+    uint in_var_PRIMITIVE_ID [[attribute(8)]];
+    float4 in_var_TEXCOORD0_0 [[attribute(9)]];
+    float4 in_var_TEXCOORD10_centroid [[attribute(10)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(11)]];
+};
+
+struct main0_patchIn
+{
+    float4 in_var_PN_POSITION9 [[attribute(5)]];
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_View& View [[buffer(0)]], constant type_ShadowDepthPass& ShadowDepthPass [[buffer(1)]], texture2d<float> Material_Texture2D_3 [[texture(0)]], sampler Material_Texture2D_3Sampler [[sampler(0)]], float3 gl_TessCoord [[position_in_patch]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 1> out_var_TEXCOORD0 = {};
+    spvUnsafeArray<float4, 3> _117 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 3> _118 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<float4, 3> _119 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_COLOR0, patchIn.gl_in[1].in_var_COLOR0, patchIn.gl_in[2].in_var_COLOR0 });
+    spvUnsafeArray<spvUnsafeArray<float4, 1>, 3> _120 = spvUnsafeArray<spvUnsafeArray<float4, 1>, 3>({ spvUnsafeArray<float4, 1>({ patchIn.gl_in[0].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ patchIn.gl_in[1].in_var_TEXCOORD0_0 }), spvUnsafeArray<float4, 1>({ patchIn.gl_in[2].in_var_TEXCOORD0_0 }) });
+    spvUnsafeArray<spvUnsafeArray<float4, 3>, 3> _135 = spvUnsafeArray<spvUnsafeArray<float4, 3>, 3>({ spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_PN_POSITION_0, patchIn.gl_in[0].in_var_PN_POSITION_1, patchIn.gl_in[0].in_var_PN_POSITION_2 }), spvUnsafeArray<float4, 3>({ patchIn.gl_in[1].in_var_PN_POSITION_0, patchIn.gl_in[1].in_var_PN_POSITION_1, patchIn.gl_in[1].in_var_PN_POSITION_2 }), spvUnsafeArray<float4, 3>({ patchIn.gl_in[2].in_var_PN_POSITION_0, patchIn.gl_in[2].in_var_PN_POSITION_1, patchIn.gl_in[2].in_var_PN_POSITION_2 }) });
+    spvUnsafeArray<float, 3> _136 = spvUnsafeArray<float, 3>({ patchIn.gl_in[0].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[1].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[2].in_var_PN_WorldDisplacementMultiplier });
+    float _157 = gl_TessCoord.x * gl_TessCoord.x;
+    float _158 = gl_TessCoord.y * gl_TessCoord.y;
+    float _159 = gl_TessCoord.z * gl_TessCoord.z;
+    float4 _165 = float4(gl_TessCoord.x);
+    float4 _169 = float4(gl_TessCoord.y);
+    float4 _174 = float4(gl_TessCoord.z);
+    float4 _177 = float4(_157 * 3.0);
+    float4 _181 = float4(_158 * 3.0);
+    float4 _188 = float4(_159 * 3.0);
+    float4 _202 = ((((((((((_135[0][0] * float4(_157)) * _165) + ((_135[1][0] * float4(_158)) * _169)) + ((_135[2][0] * float4(_159)) * _174)) + ((_135[0][1] * _177) * _169)) + ((_135[0][2] * _181) * _165)) + ((_135[1][1] * _181) * _174)) + ((_135[1][2] * _188) * _169)) + ((_135[2][1] * _188) * _165)) + ((_135[2][2] * _177) * _174)) + ((((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _174) * _165) * _169);
+    float3 _226 = ((_117[0].xyz * float3(gl_TessCoord.x)) + (_117[1].xyz * float3(gl_TessCoord.y))).xyz + (_117[2].xyz * float3(gl_TessCoord.z));
+    float4 _229 = ((_118[0] * _165) + (_118[1] * _169)) + (_118[2] * _174);
+    float4 _231 = ((_119[0] * _165) + (_119[1] * _169)) + (_119[2] * _174);
+    float4 _233 = ((_120[0][0] * _165) + (_120[1][0] * _169)) + (_120[2][0] * _174);
+    spvUnsafeArray<float4, 1> _234 = spvUnsafeArray<float4, 1>({ _233 });
+    float3 _236 = _229.xyz;
+    float3 _264 = _202.xyz + (((float3((Material_Texture2D_3.sample(Material_Texture2D_3Sampler, (float2(View.View_GameTime * 0.20000000298023223876953125, View.View_GameTime * (-0.699999988079071044921875)) + (_233.zw * float2(1.0, 2.0))), level(-1.0)).x * 10.0) * (1.0 - _231.x)) * _236) * float3(0.5)) * float3(((_136[0] * gl_TessCoord.x) + (_136[1] * gl_TessCoord.y)) + (_136[2] * gl_TessCoord.z)));
+    float4x4 _116 = ShadowDepthPass.ShadowDepthPass_ViewMatrix;
+    float4 _270 = ShadowDepthPass.ShadowDepthPass_ProjectionMatrix * float4(_264.x, _264.y, _264.z, _202.w);
+    float4 _281;
+    if ((ShadowDepthPass.ShadowDepthPass_bClampToNearPlane > 0.0) && (_270.z < 0.0))
+    {
+        float4 _279 = _270;
+        _279.z = 9.9999999747524270787835121154785e-07;
+        _279.w = 1.0;
+        _281 = _279;
+    }
+    else
+    {
+        _281 = _270;
+    }
+    float _290 = abs(dot(float3(_116[0u].z, _116[1u].z, _116[2u].z), _236));
+    out.out_var_TEXCOORD10_centroid = float4(_226.x, _226.y, _226.z, _113.w);
+    out.out_var_TEXCOORD11_centroid = _229;
+    out.out_var_COLOR0 = _231;
+    out_var_TEXCOORD0 = _234;
+    out.out_var_PRIMITIVE_ID = patchIn.gl_in[0u].in_var_PRIMITIVE_ID;
+    out.out_var_TEXCOORD6 = _281.z;
+    out.out_var_TEXCOORD8 = (ShadowDepthPass.ShadowDepthPass_ShadowParams.y * fast::clamp((abs(_290) > 0.0) ? (sqrt(fast::clamp(1.0 - (_290 * _290), 0.0, 1.0)) / _290) : ShadowDepthPass.ShadowDepthPass_ShadowParams.z, 0.0, ShadowDepthPass.ShadowDepthPass_ShadowParams.z)) + ShadowDepthPass.ShadowDepthPass_ShadowParams.x;
+    out.out_var_TEXCOORD7 = _264.xyz;
+    out.gl_Position = _281;
+    out.out_var_TEXCOORD0_0 = out_var_TEXCOORD0[0];
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese b/reference/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese
new file mode 100644
index 00000000000..f1b74aacbbc
--- /dev/null
+++ b/reference/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese
@@ -0,0 +1,416 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_ClipToWorld;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_908;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_924;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_940;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_956;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_972;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_1020;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_1036;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_1052;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1068;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1724;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1740;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1756;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2076;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2148;
+    float PrePadding_View_2152;
+    float PrePadding_View_2156;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2228;
+    float PrePadding_View_2232;
+    float PrePadding_View_2236;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2268;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2412;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    float View_AtmosphericFogSunDiscHalfApexAngleRadian;
+    float PrePadding_View_2492;
+    float4 View_AtmosphericFogSunDiscLuminance;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    uint PrePadding_View_2520;
+    uint PrePadding_View_2524;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2584;
+    float PrePadding_View_2588;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2744;
+    float PrePadding_View_2748;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float PrePadding_View_2908;
+    int2 View_CursorPosition;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    float PrePadding_View_2924;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2940;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2956;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2972;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2988;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_3004;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+    float PrePadding_View_3048;
+    float PrePadding_View_3052;
+    float4x4 View_WorldToVirtualTexture;
+    float4 View_VirtualTextureParams;
+    float4 View_XRPassthroughCameraUVs[2];
+};
+
+struct type_Material
+{
+    float4 Material_VectorExpressions[5];
+    float4 Material_ScalarExpressions[2];
+};
+
+constant float4 _118 = {};
+
+struct main0_out
+{
+    float4 out_var_TEXCOORD6 [[user(locn0)]];
+    float4 out_var_TEXCOORD7 [[user(locn1)]];
+    float4 out_var_TEXCOORD10_centroid [[user(locn2)]];
+    float4 out_var_TEXCOORD11_centroid [[user(locn3)]];
+    float4 gl_Position [[position]];
+    float gl_ClipDistance [[clip_distance]] [1];
+    float gl_ClipDistance_0 [[user(clip0)]];
+};
+
+struct main0_in
+{
+    float4 in_var_PN_DominantEdge2 [[attribute(3)]];
+    float4 in_var_PN_DominantEdge3 [[attribute(4)]];
+    float3 in_var_PN_DominantEdge4 [[attribute(5)]];
+    float3 in_var_PN_DominantEdge5 [[attribute(6)]];
+    float4 in_var_PN_DominantVertex1 [[attribute(8)]];
+    float3 in_var_PN_DominantVertex2 [[attribute(9)]];
+    float4 in_var_PN_POSITION_0 [[attribute(10)]];
+    float4 in_var_PN_POSITION_1 [[attribute(11)]];
+    float4 in_var_PN_POSITION_2 [[attribute(12)]];
+    float in_var_PN_WorldDisplacementMultiplier [[attribute(15)]];
+    float4 in_var_TEXCOORD10_centroid [[attribute(16)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(17)]];
+    float4 in_var_TEXCOORD6 [[attribute(18)]];
+    float4 in_var_TEXCOORD8 [[attribute(19)]];
+};
+
+struct main0_patchIn
+{
+    float4 in_var_PN_POSITION9 [[attribute(13)]];
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Material& Material [[buffer(1)]], texture3d<float> View_GlobalDistanceFieldTexture0 [[texture(0)]], texture3d<float> View_GlobalDistanceFieldTexture1 [[texture(1)]], texture3d<float> View_GlobalDistanceFieldTexture2 [[texture(2)]], texture3d<float> View_GlobalDistanceFieldTexture3 [[texture(3)]], sampler View_GlobalDistanceFieldSampler0 [[sampler(0)]], float3 gl_TessCoord [[position_in_patch]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 3> _120 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD6, patchIn.gl_in[1].in_var_TEXCOORD6, patchIn.gl_in[2].in_var_TEXCOORD6 });
+    spvUnsafeArray<float4, 3> _121 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD8, patchIn.gl_in[1].in_var_TEXCOORD8, patchIn.gl_in[2].in_var_TEXCOORD8 });
+    spvUnsafeArray<float4, 3> _128 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 3> _129 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<spvUnsafeArray<float4, 3>, 3> _136 = spvUnsafeArray<spvUnsafeArray<float4, 3>, 3>({ spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_PN_POSITION_0, patchIn.gl_in[0].in_var_PN_POSITION_1, patchIn.gl_in[0].in_var_PN_POSITION_2 }), spvUnsafeArray<float4, 3>({ patchIn.gl_in[1].in_var_PN_POSITION_0, patchIn.gl_in[1].in_var_PN_POSITION_1, patchIn.gl_in[1].in_var_PN_POSITION_2 }), spvUnsafeArray<float4, 3>({ patchIn.gl_in[2].in_var_PN_POSITION_0, patchIn.gl_in[2].in_var_PN_POSITION_1, patchIn.gl_in[2].in_var_PN_POSITION_2 }) });
+    spvUnsafeArray<float, 3> _137 = spvUnsafeArray<float, 3>({ patchIn.gl_in[0].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[1].in_var_PN_WorldDisplacementMultiplier, patchIn.gl_in[2].in_var_PN_WorldDisplacementMultiplier });
+    spvUnsafeArray<float4, 3> _138 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_PN_DominantVertex1, patchIn.gl_in[1].in_var_PN_DominantVertex1, patchIn.gl_in[2].in_var_PN_DominantVertex1 });
+    spvUnsafeArray<float3, 3> _139 = spvUnsafeArray<float3, 3>({ patchIn.gl_in[0].in_var_PN_DominantVertex2, patchIn.gl_in[1].in_var_PN_DominantVertex2, patchIn.gl_in[2].in_var_PN_DominantVertex2 });
+    spvUnsafeArray<float4, 3> _146 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_PN_DominantEdge2, patchIn.gl_in[1].in_var_PN_DominantEdge2, patchIn.gl_in[2].in_var_PN_DominantEdge2 });
+    spvUnsafeArray<float4, 3> _147 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_PN_DominantEdge3, patchIn.gl_in[1].in_var_PN_DominantEdge3, patchIn.gl_in[2].in_var_PN_DominantEdge3 });
+    spvUnsafeArray<float3, 3> _148 = spvUnsafeArray<float3, 3>({ patchIn.gl_in[0].in_var_PN_DominantEdge4, patchIn.gl_in[1].in_var_PN_DominantEdge4, patchIn.gl_in[2].in_var_PN_DominantEdge4 });
+    spvUnsafeArray<float3, 3> _149 = spvUnsafeArray<float3, 3>({ patchIn.gl_in[0].in_var_PN_DominantEdge5, patchIn.gl_in[1].in_var_PN_DominantEdge5, patchIn.gl_in[2].in_var_PN_DominantEdge5 });
+    float _190 = gl_TessCoord.x * gl_TessCoord.x;
+    float _191 = gl_TessCoord.y * gl_TessCoord.y;
+    float _192 = gl_TessCoord.z * gl_TessCoord.z;
+    float4 _198 = float4(gl_TessCoord.x);
+    float4 _202 = float4(gl_TessCoord.y);
+    float4 _207 = float4(gl_TessCoord.z);
+    float4 _210 = float4(_190 * 3.0);
+    float4 _214 = float4(_191 * 3.0);
+    float4 _221 = float4(_192 * 3.0);
+    float4 _235 = ((((((((((_136[0][0] * float4(_190)) * _198) + ((_136[1][0] * float4(_191)) * _202)) + ((_136[2][0] * float4(_192)) * _207)) + ((_136[0][1] * _210) * _202)) + ((_136[0][2] * _214) * _198)) + ((_136[1][1] * _214) * _207)) + ((_136[1][2] * _221) * _202)) + ((_136[2][1] * _221) * _198)) + ((_136[2][2] * _210) * _207)) + ((((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _207) * _198) * _202);
+    float3 _237 = float3(gl_TessCoord.x);
+    float3 _240 = float3(gl_TessCoord.y);
+    float3 _254 = float3(gl_TessCoord.z);
+    float3 _256 = ((_128[0].xyz * _237) + (_128[1].xyz * _240)).xyz + (_128[2].xyz * _254);
+    float4 _259 = ((_129[0] * _198) + (_129[1] * _202)) + (_129[2] * _207);
+    float3 _264 = _235.xyz;
+    float3 _265 = _256.xyz;
+    float3 _266 = _259.xyz;
+    float3 _272 = _264 + float3(View.View_WorldCameraOrigin);
+    float _279 = float(int(gl_TessCoord.x == 0.0));
+    float _282 = float(int(gl_TessCoord.y == 0.0));
+    float _285 = float(int(gl_TessCoord.z == 0.0));
+    float _286 = _279 + _282;
+    float _287 = _286 + _285;
+    float4 _387;
+    float3 _388;
+    if (float(int(_287 == 2.0)) == 1.0)
+    {
+        float _363 = float(int((_282 + _285) == 2.0));
+        float _367 = float(int((_285 + _279) == 2.0));
+        float _370 = float(int(_286 == 2.0));
+        _387 = ((float4(_363) * _138[0]) + (float4(_367) * _138[1])) + (float4(_370) * _138[2]);
+        _388 = ((float3(_363) * _139[0]) + (float3(_367) * _139[1])) + (float3(_370) * _139[2]);
+    }
+    else
+    {
+        float4 _358;
+        float3 _359;
+        if (float(int(_287 == 1.0)) != 0.0)
+        {
+            float4 _304 = float4(_279);
+            float4 _306 = float4(_282);
+            float4 _309 = float4(_285);
+            float4 _311 = ((_304 * _146[0]) + (_306 * _146[1])) + (_309 * _146[2]);
+            float4 _316 = ((_304 * _147[0]) + (_306 * _147[1])) + (_309 * _147[2]);
+            float3 _331 = float3(_279);
+            float3 _333 = float3(_282);
+            float3 _336 = float3(_285);
+            float3 _338 = ((_331 * _148[0]) + (_333 * _148[1])) + (_336 * _148[2]);
+            float3 _343 = ((_331 * _149[0]) + (_333 * _149[1])) + (_336 * _149[2]);
+            _358 = ((_304 * ((_202 * _311) + (_207 * _316))) + (_306 * ((_207 * _311) + (_198 * _316)))) + (_309 * ((_198 * _311) + (_202 * _316)));
+            _359 = ((_331 * ((_240 * _338) + (_254 * _343))) + (_333 * ((_254 * _338) + (_237 * _343)))) + (_336 * ((_237 * _338) + (_240 * _343)));
+        }
+        else
+        {
+            _358 = float4(_259.xyz, 0.0);
+            _359 = _265;
+        }
+        _387 = _358;
+        _388 = _359;
+    }
+    float3x3 _398;
+    if (float(int(_287 == 0.0)) == 0.0)
+    {
+        _398 = float3x3(_388, cross(_387.xyz, _388) * float3(_387.w), _387.xyz);
+    }
+    else
+    {
+        _398 = float3x3(_265, cross(_266, _265) * float3(_259.w), _266);
+    }
+    float3 _411 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[0].xyz) + View.View_GlobalVolumeCenterAndExtent[0].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[0].xyz + View.View_GlobalVolumeCenterAndExtent[0].www) - _272, float3(0.0)));
+    float _547;
+    if (fast::min(_411.x, fast::min(_411.y, _411.z)) > (View.View_GlobalVolumeCenterAndExtent[0].w * View.View_GlobalVolumeTexelSize))
+    {
+        _547 = View_GlobalDistanceFieldTexture0.sample(View_GlobalDistanceFieldSampler0, ((_272 * View.View_GlobalVolumeWorldToUVAddAndMul[0u].www) + View.View_GlobalVolumeWorldToUVAddAndMul[0u].xyz), level(0.0)).x;
+    }
+    else
+    {
+        float3 _436 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[1].xyz) + View.View_GlobalVolumeCenterAndExtent[1].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[1].xyz + View.View_GlobalVolumeCenterAndExtent[1].www) - _272, float3(0.0)));
+        float _535;
+        if (fast::min(_436.x, fast::min(_436.y, _436.z)) > (View.View_GlobalVolumeCenterAndExtent[1].w * View.View_GlobalVolumeTexelSize))
+        {
+            _535 = View_GlobalDistanceFieldTexture1.sample(View_GlobalDistanceFieldSampler0, ((_272 * View.View_GlobalVolumeWorldToUVAddAndMul[1u].www) + View.View_GlobalVolumeWorldToUVAddAndMul[1u].xyz), level(0.0)).x;
+        }
+        else
+        {
+            float3 _459 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[2].xyz) + View.View_GlobalVolumeCenterAndExtent[2].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[2].xyz + View.View_GlobalVolumeCenterAndExtent[2].www) - _272, float3(0.0)));
+            float3 _475 = fast::min(fast::max((_272 - View.View_GlobalVolumeCenterAndExtent[3].xyz) + View.View_GlobalVolumeCenterAndExtent[3].www, float3(0.0)), fast::max((View.View_GlobalVolumeCenterAndExtent[3].xyz + View.View_GlobalVolumeCenterAndExtent[3].www) - _272, float3(0.0)));
+            float _480 = fast::min(_475.x, fast::min(_475.y, _475.z));
+            float _523;
+            if (fast::min(_459.x, fast::min(_459.y, _459.z)) > (View.View_GlobalVolumeCenterAndExtent[2].w * View.View_GlobalVolumeTexelSize))
+            {
+                _523 = View_GlobalDistanceFieldTexture2.sample(View_GlobalDistanceFieldSampler0, ((_272 * View.View_GlobalVolumeWorldToUVAddAndMul[2u].www) + View.View_GlobalVolumeWorldToUVAddAndMul[2u].xyz), level(0.0)).x;
+            }
+            else
+            {
+                float _511;
+                if (_480 > (View.View_GlobalVolumeCenterAndExtent[3].w * View.View_GlobalVolumeTexelSize))
+                {
+                    _511 = mix(View.View_MaxGlobalDistance, View_GlobalDistanceFieldTexture3.sample(View_GlobalDistanceFieldSampler0, ((_272 * View.View_GlobalVolumeWorldToUVAddAndMul[3u].www) + View.View_GlobalVolumeWorldToUVAddAndMul[3u].xyz), level(0.0)).x, fast::clamp((_480 * 10.0) * View.View_GlobalVolumeWorldToUVAddAndMul[3].w, 0.0, 1.0));
+                }
+                else
+                {
+                    _511 = View.View_MaxGlobalDistance;
+                }
+                _523 = _511;
+            }
+            _535 = _523;
+        }
+        _547 = _535;
+    }
+    float3 _565 = _264 + ((_398[2] * float3(fast::min(_547 + Material.Material_ScalarExpressions[0].z, 0.0) * Material.Material_ScalarExpressions[0].w)) * float3(((_137[0] * gl_TessCoord.x) + (_137[1] * gl_TessCoord.y)) + (_137[2] * gl_TessCoord.z)));
+    float4 _574 = View.View_TranslatedWorldToClip * float4(_565.x, _565.y, _565.z, _235.w);
+    _574.z = _574.z + (0.001000000047497451305389404296875 * _574.w);
+    out.gl_Position = _574;
+    out.out_var_TEXCOORD6 = ((_120[0] * _198) + (_120[1] * _202)) + (_120[2] * _207);
+    out.out_var_TEXCOORD7 = ((_121[0] * _198) + (_121[1] * _202)) + (_121[2] * _207);
+    out.out_var_TEXCOORD10_centroid = float4(_256.x, _256.y, _256.z, _118.w);
+    out.out_var_TEXCOORD11_centroid = _259;
+    out.gl_ClipDistance[0u] = dot(View.View_GlobalClippingPlane, float4(_565.xyz - float3(View.View_PreViewTranslation), 1.0));
+    out.gl_ClipDistance_0 = out.gl_ClipDistance[0];
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese b/reference/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese
new file mode 100644
index 00000000000..26b2e4b0ff0
--- /dev/null
+++ b/reference/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese
@@ -0,0 +1,216 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_ShadowDepthPass
+{
+    float PrePadding_ShadowDepthPass_LPV_0;
+    float PrePadding_ShadowDepthPass_LPV_4;
+    float PrePadding_ShadowDepthPass_LPV_8;
+    float PrePadding_ShadowDepthPass_LPV_12;
+    float PrePadding_ShadowDepthPass_LPV_16;
+    float PrePadding_ShadowDepthPass_LPV_20;
+    float PrePadding_ShadowDepthPass_LPV_24;
+    float PrePadding_ShadowDepthPass_LPV_28;
+    float PrePadding_ShadowDepthPass_LPV_32;
+    float PrePadding_ShadowDepthPass_LPV_36;
+    float PrePadding_ShadowDepthPass_LPV_40;
+    float PrePadding_ShadowDepthPass_LPV_44;
+    float PrePadding_ShadowDepthPass_LPV_48;
+    float PrePadding_ShadowDepthPass_LPV_52;
+    float PrePadding_ShadowDepthPass_LPV_56;
+    float PrePadding_ShadowDepthPass_LPV_60;
+    float PrePadding_ShadowDepthPass_LPV_64;
+    float PrePadding_ShadowDepthPass_LPV_68;
+    float PrePadding_ShadowDepthPass_LPV_72;
+    float PrePadding_ShadowDepthPass_LPV_76;
+    float PrePadding_ShadowDepthPass_LPV_80;
+    float PrePadding_ShadowDepthPass_LPV_84;
+    float PrePadding_ShadowDepthPass_LPV_88;
+    float PrePadding_ShadowDepthPass_LPV_92;
+    float PrePadding_ShadowDepthPass_LPV_96;
+    float PrePadding_ShadowDepthPass_LPV_100;
+    float PrePadding_ShadowDepthPass_LPV_104;
+    float PrePadding_ShadowDepthPass_LPV_108;
+    float PrePadding_ShadowDepthPass_LPV_112;
+    float PrePadding_ShadowDepthPass_LPV_116;
+    float PrePadding_ShadowDepthPass_LPV_120;
+    float PrePadding_ShadowDepthPass_LPV_124;
+    float PrePadding_ShadowDepthPass_LPV_128;
+    float PrePadding_ShadowDepthPass_LPV_132;
+    float PrePadding_ShadowDepthPass_LPV_136;
+    float PrePadding_ShadowDepthPass_LPV_140;
+    float PrePadding_ShadowDepthPass_LPV_144;
+    float PrePadding_ShadowDepthPass_LPV_148;
+    float PrePadding_ShadowDepthPass_LPV_152;
+    float PrePadding_ShadowDepthPass_LPV_156;
+    float PrePadding_ShadowDepthPass_LPV_160;
+    float PrePadding_ShadowDepthPass_LPV_164;
+    float PrePadding_ShadowDepthPass_LPV_168;
+    float PrePadding_ShadowDepthPass_LPV_172;
+    float PrePadding_ShadowDepthPass_LPV_176;
+    float PrePadding_ShadowDepthPass_LPV_180;
+    float PrePadding_ShadowDepthPass_LPV_184;
+    float PrePadding_ShadowDepthPass_LPV_188;
+    float PrePadding_ShadowDepthPass_LPV_192;
+    float PrePadding_ShadowDepthPass_LPV_196;
+    float PrePadding_ShadowDepthPass_LPV_200;
+    float PrePadding_ShadowDepthPass_LPV_204;
+    float PrePadding_ShadowDepthPass_LPV_208;
+    float PrePadding_ShadowDepthPass_LPV_212;
+    float PrePadding_ShadowDepthPass_LPV_216;
+    float PrePadding_ShadowDepthPass_LPV_220;
+    float PrePadding_ShadowDepthPass_LPV_224;
+    float PrePadding_ShadowDepthPass_LPV_228;
+    float PrePadding_ShadowDepthPass_LPV_232;
+    float PrePadding_ShadowDepthPass_LPV_236;
+    float PrePadding_ShadowDepthPass_LPV_240;
+    float PrePadding_ShadowDepthPass_LPV_244;
+    float PrePadding_ShadowDepthPass_LPV_248;
+    float PrePadding_ShadowDepthPass_LPV_252;
+    float PrePadding_ShadowDepthPass_LPV_256;
+    float PrePadding_ShadowDepthPass_LPV_260;
+    float PrePadding_ShadowDepthPass_LPV_264;
+    float PrePadding_ShadowDepthPass_LPV_268;
+    float4x4 ShadowDepthPass_LPV_mRsmToWorld;
+    float4 ShadowDepthPass_LPV_mLightColour;
+    float4 ShadowDepthPass_LPV_GeometryVolumeCaptureLightDirection;
+    float4 ShadowDepthPass_LPV_mEyePos;
+    packed_int3 ShadowDepthPass_LPV_mOldGridOffset;
+    int PrePadding_ShadowDepthPass_LPV_396;
+    packed_int3 ShadowDepthPass_LPV_mLpvGridOffset;
+    float ShadowDepthPass_LPV_ClearMultiplier;
+    float ShadowDepthPass_LPV_LpvScale;
+    float ShadowDepthPass_LPV_OneOverLpvScale;
+    float ShadowDepthPass_LPV_DirectionalOcclusionIntensity;
+    float ShadowDepthPass_LPV_DirectionalOcclusionRadius;
+    float ShadowDepthPass_LPV_RsmAreaIntensityMultiplier;
+    float ShadowDepthPass_LPV_RsmPixelToTexcoordMultiplier;
+    float ShadowDepthPass_LPV_SecondaryOcclusionStrength;
+    float ShadowDepthPass_LPV_SecondaryBounceStrength;
+    float ShadowDepthPass_LPV_VplInjectionBias;
+    float ShadowDepthPass_LPV_GeometryVolumeInjectionBias;
+    float ShadowDepthPass_LPV_EmissiveInjectionMultiplier;
+    int ShadowDepthPass_LPV_PropagationIndex;
+    float4x4 ShadowDepthPass_ProjectionMatrix;
+    float4x4 ShadowDepthPass_ViewMatrix;
+    float4 ShadowDepthPass_ShadowParams;
+    float ShadowDepthPass_bClampToNearPlane;
+    float PrePadding_ShadowDepthPass_612;
+    float PrePadding_ShadowDepthPass_616;
+    float PrePadding_ShadowDepthPass_620;
+    float4x4 ShadowDepthPass_ShadowViewProjectionMatrices[6];
+    float4x4 ShadowDepthPass_ShadowViewMatrices[6];
+};
+
+constant float4 _90 = {};
+
+struct main0_out
+{
+    float4 out_var_TEXCOORD10_centroid [[user(locn0)]];
+    float4 out_var_TEXCOORD11_centroid [[user(locn1)]];
+    float out_var_TEXCOORD6 [[user(locn2)]];
+    float3 out_var_TEXCOORD7 [[user(locn3)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 in_var_PN_POSITION_0 [[attribute(10)]];
+    float4 in_var_PN_POSITION_1 [[attribute(11)]];
+    float4 in_var_PN_POSITION_2 [[attribute(12)]];
+    float4 in_var_TEXCOORD10_centroid [[attribute(16)]];
+    float4 in_var_TEXCOORD11_centroid [[attribute(17)]];
+};
+
+struct main0_patchIn
+{
+    float4 in_var_PN_POSITION9 [[attribute(13)]];
+    patch_control_point<main0_in> gl_in;
+};
+
+[[ patch(triangle, 0) ]] vertex main0_out main0(main0_patchIn patchIn [[stage_in]], constant type_ShadowDepthPass& ShadowDepthPass [[buffer(0)]], float3 gl_TessCoord [[position_in_patch]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 3> _93 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD10_centroid, patchIn.gl_in[1].in_var_TEXCOORD10_centroid, patchIn.gl_in[2].in_var_TEXCOORD10_centroid });
+    spvUnsafeArray<float4, 3> _94 = spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_TEXCOORD11_centroid, patchIn.gl_in[1].in_var_TEXCOORD11_centroid, patchIn.gl_in[2].in_var_TEXCOORD11_centroid });
+    spvUnsafeArray<spvUnsafeArray<float4, 3>, 3> _101 = spvUnsafeArray<spvUnsafeArray<float4, 3>, 3>({ spvUnsafeArray<float4, 3>({ patchIn.gl_in[0].in_var_PN_POSITION_0, patchIn.gl_in[0].in_var_PN_POSITION_1, patchIn.gl_in[0].in_var_PN_POSITION_2 }), spvUnsafeArray<float4, 3>({ patchIn.gl_in[1].in_var_PN_POSITION_0, patchIn.gl_in[1].in_var_PN_POSITION_1, patchIn.gl_in[1].in_var_PN_POSITION_2 }), spvUnsafeArray<float4, 3>({ patchIn.gl_in[2].in_var_PN_POSITION_0, patchIn.gl_in[2].in_var_PN_POSITION_1, patchIn.gl_in[2].in_var_PN_POSITION_2 }) });
+    float _119 = gl_TessCoord.x * gl_TessCoord.x;
+    float _120 = gl_TessCoord.y * gl_TessCoord.y;
+    float _121 = gl_TessCoord.z * gl_TessCoord.z;
+    float4 _127 = float4(gl_TessCoord.x);
+    float4 _131 = float4(gl_TessCoord.y);
+    float4 _136 = float4(gl_TessCoord.z);
+    float4 _139 = float4(_119 * 3.0);
+    float4 _143 = float4(_120 * 3.0);
+    float4 _150 = float4(_121 * 3.0);
+    float4 _164 = ((((((((((_101[0][0] * float4(_119)) * _127) + ((_101[1][0] * float4(_120)) * _131)) + ((_101[2][0] * float4(_121)) * _136)) + ((_101[0][1] * _139) * _131)) + ((_101[0][2] * _143) * _127)) + ((_101[1][1] * _143) * _136)) + ((_101[1][2] * _150) * _131)) + ((_101[2][1] * _150) * _127)) + ((_101[2][2] * _139) * _136)) + ((((patchIn.in_var_PN_POSITION9 * float4(6.0)) * _136) * _127) * _131);
+    float3 _179 = ((_93[0].xyz * float3(gl_TessCoord.x)) + (_93[1].xyz * float3(gl_TessCoord.y))).xyz + (_93[2].xyz * float3(gl_TessCoord.z));
+    float4 _182 = ((_94[0] * _127) + (_94[1] * _131)) + (_94[2] * _136);
+    float4x4 _92 = ShadowDepthPass.ShadowDepthPass_ViewMatrix;
+    float4 _189 = ShadowDepthPass.ShadowDepthPass_ProjectionMatrix * float4(_164.x, _164.y, _164.z, _164.w);
+    float4 _200;
+    if ((ShadowDepthPass.ShadowDepthPass_bClampToNearPlane > 0.0) && (_189.z < 0.0))
+    {
+        float4 _198 = _189;
+        _198.z = 9.9999999747524270787835121154785e-07;
+        _198.w = 1.0;
+        _200 = _198;
+    }
+    else
+    {
+        _200 = _189;
+    }
+    float _209 = abs(dot(float3(_92[0u].z, _92[1u].z, _92[2u].z), _182.xyz));
+    float4 _234 = _200;
+    _234.z = ((_200.z * ShadowDepthPass.ShadowDepthPass_ShadowParams.w) + ((ShadowDepthPass.ShadowDepthPass_ShadowParams.y * fast::clamp((abs(_209) > 0.0) ? (sqrt(fast::clamp(1.0 - (_209 * _209), 0.0, 1.0)) / _209) : ShadowDepthPass.ShadowDepthPass_ShadowParams.z, 0.0, ShadowDepthPass.ShadowDepthPass_ShadowParams.z)) + ShadowDepthPass.ShadowDepthPass_ShadowParams.x)) * _200.w;
+    out.out_var_TEXCOORD10_centroid = float4(_179.x, _179.y, _179.z, _90.w);
+    out.out_var_TEXCOORD11_centroid = _182;
+    out.out_var_TEXCOORD6 = 0.0;
+    out.out_var_TEXCOORD7 = _164.xyz;
+    out.gl_Position = _234;
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/vert/array-missing-copies.asm.vert b/reference/shaders-ue4/asm/vert/array-missing-copies.asm.vert
new file mode 100644
index 00000000000..67097c57715
--- /dev/null
+++ b/reference/shaders-ue4/asm/vert/array-missing-copies.asm.vert
@@ -0,0 +1,467 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+template<typename T, size_t Num>
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_MobileBasePass
+{
+    float4 MobileBasePass_Fog_ExponentialFogParameters;
+    float4 MobileBasePass_Fog_ExponentialFogParameters2;
+    float4 MobileBasePass_Fog_ExponentialFogColorParameter;
+    float4 MobileBasePass_Fog_ExponentialFogParameters3;
+    float4 MobileBasePass_Fog_InscatteringLightDirection;
+    float4 MobileBasePass_Fog_DirectionalInscatteringColor;
+    float2 MobileBasePass_Fog_SinCosInscatteringColorCubemapRotation;
+    float PrePadding_MobileBasePass_Fog_104;
+    float PrePadding_MobileBasePass_Fog_108;
+    packed_float3 MobileBasePass_Fog_FogInscatteringTextureParameters;
+    float MobileBasePass_Fog_ApplyVolumetricFog;
+    float PrePadding_MobileBasePass_PlanarReflection_128;
+    float PrePadding_MobileBasePass_PlanarReflection_132;
+    float PrePadding_MobileBasePass_PlanarReflection_136;
+    float PrePadding_MobileBasePass_PlanarReflection_140;
+    float PrePadding_MobileBasePass_PlanarReflection_144;
+    float PrePadding_MobileBasePass_PlanarReflection_148;
+    float PrePadding_MobileBasePass_PlanarReflection_152;
+    float PrePadding_MobileBasePass_PlanarReflection_156;
+    float4 MobileBasePass_PlanarReflection_ReflectionPlane;
+    float4 MobileBasePass_PlanarReflection_PlanarReflectionOrigin;
+    float4 MobileBasePass_PlanarReflection_PlanarReflectionXAxis;
+    float4 MobileBasePass_PlanarReflection_PlanarReflectionYAxis;
+    float3x4 MobileBasePass_PlanarReflection_InverseTransposeMirrorMatrix;
+    packed_float3 MobileBasePass_PlanarReflection_PlanarReflectionParameters;
+    float PrePadding_MobileBasePass_PlanarReflection_284;
+    float2 MobileBasePass_PlanarReflection_PlanarReflectionParameters2;
+    float PrePadding_MobileBasePass_PlanarReflection_296;
+    float PrePadding_MobileBasePass_PlanarReflection_300;
+    float4x4 MobileBasePass_PlanarReflection_ProjectionWithExtraFOV[2];
+    float4 MobileBasePass_PlanarReflection_PlanarReflectionScreenScaleBias[2];
+    float2 MobileBasePass_PlanarReflection_PlanarReflectionScreenBound;
+    uint MobileBasePass_PlanarReflection_bIsStereo;
+};
+
+struct type_Primitive
+{
+    float4x4 Primitive_LocalToWorld;
+    float4 Primitive_InvNonUniformScaleAndDeterminantSign;
+    float4 Primitive_ObjectWorldPositionAndRadius;
+    float4x4 Primitive_WorldToLocal;
+    float4x4 Primitive_PreviousLocalToWorld;
+    float4x4 Primitive_PreviousWorldToLocal;
+    packed_float3 Primitive_ActorWorldPosition;
+    float Primitive_UseSingleSampleShadowFromStationaryLights;
+    packed_float3 Primitive_ObjectBounds;
+    float Primitive_LpvBiasMultiplier;
+    float Primitive_DecalReceiverMask;
+    float Primitive_PerObjectGBufferData;
+    float Primitive_UseVolumetricLightmapShadowFromStationaryLights;
+    float Primitive_UseEditorDepthTest;
+    float4 Primitive_ObjectOrientation;
+    float4 Primitive_NonUniformScale;
+    packed_float3 Primitive_LocalObjectBoundsMin;
+    float PrePadding_Primitive_380;
+    packed_float3 Primitive_LocalObjectBoundsMax;
+    uint Primitive_LightingChannelMask;
+    uint Primitive_LightmapDataIndex;
+    int Primitive_SingleCaptureIndex;
+};
+
+struct type_LandscapeParameters
+{
+    float4 LandscapeParameters_HeightmapUVScaleBias;
+    float4 LandscapeParameters_WeightmapUVScaleBias;
+    float4 LandscapeParameters_LandscapeLightmapScaleBias;
+    float4 LandscapeParameters_SubsectionSizeVertsLayerUVPan;
+    float4 LandscapeParameters_SubsectionOffsetParams;
+    float4 LandscapeParameters_LightmapSubsectionOffsetParams;
+    float4x4 LandscapeParameters_LocalToWorldNoScaling;
+};
+
+struct type_Globals
+{
+    float4 LodBias;
+    float4 LodValues;
+    float4 SectionLods;
+    float4 NeighborSectionLod[4];
+};
+
+struct main0_out
+{
+    float2 out_var_TEXCOORD0 [[user(locn0)]];
+    float2 out_var_TEXCOORD1 [[user(locn1)]];
+    float4 out_var_TEXCOORD2 [[user(locn2)]];
+    float4 out_var_TEXCOORD3 [[user(locn3)]];
+    float4 out_var_TEXCOORD8 [[user(locn4)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float4 in_var_ATTRIBUTE0 [[attribute(0)]];
+    float4 in_var_ATTRIBUTE1_0 [[attribute(1)]];
+    float4 in_var_ATTRIBUTE1_1 [[attribute(2)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_MobileBasePass& MobileBasePass [[buffer(1)]], constant type_Primitive& Primitive [[buffer(2)]], constant type_LandscapeParameters& LandscapeParameters [[buffer(3)]], constant type_Globals& _Globals [[buffer(4)]])
+{
+    main0_out out = {};
+    spvUnsafeArray<float4, 2> in_var_ATTRIBUTE1 = {};
+    in_var_ATTRIBUTE1[0] = in.in_var_ATTRIBUTE1_0;
+    in_var_ATTRIBUTE1[1] = in.in_var_ATTRIBUTE1_1;
+    spvUnsafeArray<float4, 1> _97;
+    for (int _107 = 0; _107 < 1; )
+    {
+        _97[_107] = float4(0.0);
+        _107++;
+        continue;
+    }
+    float4 _115 = in.in_var_ATTRIBUTE0 * float4(255.0);
+    float2 _116 = _115.zw;
+    float2 _119 = fract(_116 * float2(0.5)) * float2(2.0);
+    float2 _121 = (_116 - _119) * float2(0.0039215688593685626983642578125);
+    float2 _122 = _115.xy;
+    float2 _126 = _122 * float2(_Globals.LodValues.w);
+    float _127 = _126.y;
+    float _128 = _126.x;
+    float4 _132 = float4(_127, _128, 1.0 - _128, 1.0 - _127) * float4(2.0);
+    float4 _186;
+    if (_119.y > 0.5)
+    {
+        float4 _161;
+        if (_119.x > 0.5)
+        {
+            _161 = (_132 * float4(_Globals.SectionLods.w)) + ((float4(1.0) - _132) * _Globals.NeighborSectionLod[3]);
+        }
+        else
+        {
+            _161 = (_132 * float4(_Globals.SectionLods.z)) + ((float4(1.0) - _132) * _Globals.NeighborSectionLod[2]);
+        }
+        _186 = _161;
+    }
+    else
+    {
+        float4 _185;
+        if (_119.x > 0.5)
+        {
+            _185 = (_132 * float4(_Globals.SectionLods.y)) + ((float4(1.0) - _132) * _Globals.NeighborSectionLod[1]);
+        }
+        else
+        {
+            _185 = (_132 * float4(_Globals.SectionLods.x)) + ((float4(1.0) - _132) * _Globals.NeighborSectionLod[0]);
+        }
+        _186 = _185;
+    }
+    float _206;
+    if ((_128 + _127) > 1.0)
+    {
+        float _198;
+        if (_128 < _127)
+        {
+            _198 = _186.w;
+        }
+        else
+        {
+            _198 = _186.z;
+        }
+        _206 = _198;
+    }
+    else
+    {
+        float _205;
+        if (_128 < _127)
+        {
+            _205 = _186.y;
+        }
+        else
+        {
+            _205 = _186.x;
+        }
+        _206 = _205;
+    }
+    float _207 = floor(_206);
+    float _220 = _121.x;
+    float3 _235 = select(select(select(select(select(float3(0.03125, _121.yy), float3(0.0625, _220, _121.y), bool3(_207 < 5.0)), float3(0.125, in_var_ATTRIBUTE1[1].w, _220), bool3(_207 < 4.0)), float3(0.25, in_var_ATTRIBUTE1[1].zw), bool3(_207 < 3.0)), float3(0.5, in_var_ATTRIBUTE1[1].yz), bool3(_207 < 2.0)), float3(1.0, in_var_ATTRIBUTE1[1].xy), bool3(_207 < 1.0));
+    float _236 = _235.x;
+    float _245 = (((in_var_ATTRIBUTE1[0].x * 65280.0) + (in_var_ATTRIBUTE1[0].y * 255.0)) - 32768.0) * 0.0078125;
+    float _252 = (((in_var_ATTRIBUTE1[0].z * 65280.0) + (in_var_ATTRIBUTE1[0].w * 255.0)) - 32768.0) * 0.0078125;
+    float2 _257 = floor(_122 * float2(_236));
+    float2 _271 = float2((LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.x * _236) - 1.0, fast::max((LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.x * 0.5) * _236, 2.0) - 1.0) * float2(LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.y);
+    float3 _287 = mix(float3(_257 / float2(_271.x), mix(_245, _252, _235.y)), float3(floor(_257 * float2(0.5)) / float2(_271.y), mix(_245, _252, _235.z)), float3(_206 - _207));
+    float2 _288 = _119.xy;
+    float2 _292 = _288 * LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.ww;
+    float3 _296 = _287 + float3(_292, 0.0);
+    float4 _322 = float4((((Primitive.Primitive_LocalToWorld[0u].xyz * _296.xxx) + (Primitive.Primitive_LocalToWorld[1u].xyz * _296.yyy)) + (Primitive.Primitive_LocalToWorld[2u].xyz * _296.zzz)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0);
+    float2 _323 = _287.xy;
+    float4 _338 = float4(_322.x, _322.y, _322.z, _322.w);
+    float4 _339 = View.View_TranslatedWorldToClip * _338;
+    float3 _341 = _322.xyz - float3(View.View_TranslatedWorldCameraOrigin);
+    float _345 = dot(_341, _341);
+    float _346 = rsqrt(_345);
+    float _347 = _345 * _346;
+    float _354 = _341.z;
+    float _357 = fast::max(0.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.w);
+    float _393;
+    float _394;
+    float _395;
+    float _396;
+    if (_357 > 0.0)
+    {
+        float _361 = _357 * _346;
+        float _362 = _361 * _354;
+        float _365 = View.View_WorldCameraOrigin[2] + _362;
+        _393 = (1.0 - _361) * _347;
+        _394 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.z * exp2(-fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.y * (_365 - MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.w)));
+        _395 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.x * exp2(-fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.y * (_365 - MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.y)));
+        _396 = _354 - _362;
+    }
+    else
+    {
+        _393 = _347;
+        _394 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.x;
+        _395 = MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.x;
+        _396 = _354;
+    }
+    float _400 = fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters.y * _396);
+    float _405 = log(2.0);
+    float _407 = 0.5 * (_405 * _405);
+    float _417 = fast::max(-127.0, MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters2.y * _396);
+    float _428 = (_395 * ((abs(_400) > 0.00999999977648258209228515625) ? ((1.0 - exp2(-_400)) / _400) : (_405 - (_407 * _400)))) + (_394 * ((abs(_417) > 0.00999999977648258209228515625) ? ((1.0 - exp2(-_417)) / _417) : (_405 - (_407 * _417))));
+    float3 _459;
+    if (MobileBasePass.MobileBasePass_Fog_InscatteringLightDirection.w >= 0.0)
+    {
+        _459 = (MobileBasePass.MobileBasePass_Fog_DirectionalInscatteringColor.xyz * float3(pow(fast::clamp(dot(_341 * float3(_346), MobileBasePass.MobileBasePass_Fog_InscatteringLightDirection.xyz), 0.0, 1.0), MobileBasePass.MobileBasePass_Fog_DirectionalInscatteringColor.w))) * float3(1.0 - fast::clamp(exp2(-(_428 * fast::max(_393 - MobileBasePass.MobileBasePass_Fog_InscatteringLightDirection.w, 0.0))), 0.0, 1.0));
+    }
+    else
+    {
+        _459 = float3(0.0);
+    }
+    bool _468 = (MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w > 0.0) && (_347 > MobileBasePass.MobileBasePass_Fog_ExponentialFogParameters3.w);
+    float _471 = _468 ? 1.0 : fast::max(fast::clamp(exp2(-(_428 * _393)), 0.0, 1.0), MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.w);
+    float3 _475 = (MobileBasePass.MobileBasePass_Fog_ExponentialFogColorParameter.xyz * float3(1.0 - _471)) + select(_459, float3(0.0), bool3(_468));
+    _97[0] = float4(_475, _471);
+    float4 _482 = _338;
+    _482.w = _339.w;
+    out.out_var_TEXCOORD0 = ((_323 + LandscapeParameters.LandscapeParameters_SubsectionSizeVertsLayerUVPan.zw) + _292).xy;
+    out.out_var_TEXCOORD1 = ((_323 * LandscapeParameters.LandscapeParameters_WeightmapUVScaleBias.xy) + LandscapeParameters.LandscapeParameters_WeightmapUVScaleBias.zw) + (_288 * LandscapeParameters.LandscapeParameters_SubsectionOffsetParams.zz);
+    out.out_var_TEXCOORD2 = float4(float4(0.0).x, float4(0.0).y, _97[0].x, _97[0].y);
+    out.out_var_TEXCOORD3 = float4(float4(0.0).x, float4(0.0).y, _97[0].z, _97[0].w);
+    out.out_var_TEXCOORD8 = _482;
+    out.gl_Position = _339;
+    return out;
+}
+
diff --git a/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert b/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert
new file mode 100644
index 00000000000..5398fec390c
--- /dev/null
+++ b/reference/shaders-ue4/asm/vert/texture-buffer.asm.vert
@@ -0,0 +1,387 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+// Returns 2D texture coords corresponding to 1D texel buffer coords
+static inline __attribute__((always_inline))
+uint2 spvTexelBufferCoord(uint tc)
+{
+    return uint2(tc % 4096, tc / 4096);
+}
+
+struct type_View
+{
+    float4x4 View_TranslatedWorldToClip;
+    float4x4 View_WorldToClip;
+    float4x4 View_TranslatedWorldToView;
+    float4x4 View_ViewToTranslatedWorld;
+    float4x4 View_TranslatedWorldToCameraView;
+    float4x4 View_CameraViewToTranslatedWorld;
+    float4x4 View_ViewToClip;
+    float4x4 View_ViewToClipNoAA;
+    float4x4 View_ClipToView;
+    float4x4 View_ClipToTranslatedWorld;
+    float4x4 View_SVPositionToTranslatedWorld;
+    float4x4 View_ScreenToWorld;
+    float4x4 View_ScreenToTranslatedWorld;
+    packed_float3 View_ViewForward;
+    float PrePadding_View_844;
+    packed_float3 View_ViewUp;
+    float PrePadding_View_860;
+    packed_float3 View_ViewRight;
+    float PrePadding_View_876;
+    packed_float3 View_HMDViewNoRollUp;
+    float PrePadding_View_892;
+    packed_float3 View_HMDViewNoRollRight;
+    float PrePadding_View_908;
+    float4 View_InvDeviceZToWorldZTransform;
+    float4 View_ScreenPositionScaleBias;
+    packed_float3 View_WorldCameraOrigin;
+    float PrePadding_View_956;
+    packed_float3 View_TranslatedWorldCameraOrigin;
+    float PrePadding_View_972;
+    packed_float3 View_WorldViewOrigin;
+    float PrePadding_View_988;
+    packed_float3 View_PreViewTranslation;
+    float PrePadding_View_1004;
+    float4x4 View_PrevProjection;
+    float4x4 View_PrevViewProj;
+    float4x4 View_PrevViewRotationProj;
+    float4x4 View_PrevViewToClip;
+    float4x4 View_PrevClipToView;
+    float4x4 View_PrevTranslatedWorldToClip;
+    float4x4 View_PrevTranslatedWorldToView;
+    float4x4 View_PrevViewToTranslatedWorld;
+    float4x4 View_PrevTranslatedWorldToCameraView;
+    float4x4 View_PrevCameraViewToTranslatedWorld;
+    packed_float3 View_PrevWorldCameraOrigin;
+    float PrePadding_View_1660;
+    packed_float3 View_PrevWorldViewOrigin;
+    float PrePadding_View_1676;
+    packed_float3 View_PrevPreViewTranslation;
+    float PrePadding_View_1692;
+    float4x4 View_PrevInvViewProj;
+    float4x4 View_PrevScreenToTranslatedWorld;
+    float4x4 View_ClipToPrevClip;
+    float4 View_TemporalAAJitter;
+    float4 View_GlobalClippingPlane;
+    float2 View_FieldOfViewWideAngles;
+    float2 View_PrevFieldOfViewWideAngles;
+    float4 View_ViewRectMin;
+    float4 View_ViewSizeAndInvSize;
+    float4 View_BufferSizeAndInvSize;
+    float4 View_BufferBilinearUVMinMax;
+    int View_NumSceneColorMSAASamples;
+    float View_PreExposure;
+    float View_OneOverPreExposure;
+    float PrePadding_View_2012;
+    float4 View_DiffuseOverrideParameter;
+    float4 View_SpecularOverrideParameter;
+    float4 View_NormalOverrideParameter;
+    float2 View_RoughnessOverrideParameter;
+    float View_PrevFrameGameTime;
+    float View_PrevFrameRealTime;
+    float View_OutOfBoundsMask;
+    float PrePadding_View_2084;
+    float PrePadding_View_2088;
+    float PrePadding_View_2092;
+    packed_float3 View_WorldCameraMovementSinceLastFrame;
+    float View_CullingSign;
+    float View_NearPlane;
+    float View_AdaptiveTessellationFactor;
+    float View_GameTime;
+    float View_RealTime;
+    float View_DeltaTime;
+    float View_MaterialTextureMipBias;
+    float View_MaterialTextureDerivativeMultiply;
+    uint View_Random;
+    uint View_FrameNumber;
+    uint View_StateFrameIndexMod8;
+    uint View_StateFrameIndex;
+    float View_CameraCut;
+    float View_UnlitViewmodeMask;
+    float PrePadding_View_2164;
+    float PrePadding_View_2168;
+    float PrePadding_View_2172;
+    float4 View_DirectionalLightColor;
+    packed_float3 View_DirectionalLightDirection;
+    float PrePadding_View_2204;
+    float4 View_TranslucencyLightingVolumeMin[2];
+    float4 View_TranslucencyLightingVolumeInvSize[2];
+    float4 View_TemporalAAParams;
+    float4 View_CircleDOFParams;
+    float View_DepthOfFieldSensorWidth;
+    float View_DepthOfFieldFocalDistance;
+    float View_DepthOfFieldScale;
+    float View_DepthOfFieldFocalLength;
+    float View_DepthOfFieldFocalRegion;
+    float View_DepthOfFieldNearTransitionRegion;
+    float View_DepthOfFieldFarTransitionRegion;
+    float View_MotionBlurNormalizedToPixel;
+    float View_bSubsurfacePostprocessEnabled;
+    float View_GeneralPurposeTweak;
+    float View_DemosaicVposOffset;
+    float PrePadding_View_2348;
+    packed_float3 View_IndirectLightingColorScale;
+    float View_HDR32bppEncodingMode;
+    packed_float3 View_AtmosphericFogSunDirection;
+    float View_AtmosphericFogSunPower;
+    float View_AtmosphericFogPower;
+    float View_AtmosphericFogDensityScale;
+    float View_AtmosphericFogDensityOffset;
+    float View_AtmosphericFogGroundOffset;
+    float View_AtmosphericFogDistanceScale;
+    float View_AtmosphericFogAltitudeScale;
+    float View_AtmosphericFogHeightScaleRayleigh;
+    float View_AtmosphericFogStartDistance;
+    float View_AtmosphericFogDistanceOffset;
+    float View_AtmosphericFogSunDiscScale;
+    uint View_AtmosphericFogRenderMask;
+    uint View_AtmosphericFogInscatterAltitudeSampleNum;
+    float4 View_AtmosphericFogSunColor;
+    packed_float3 View_NormalCurvatureToRoughnessScaleBias;
+    float View_RenderingReflectionCaptureMask;
+    float4 View_AmbientCubemapTint;
+    float View_AmbientCubemapIntensity;
+    float View_SkyLightParameters;
+    float PrePadding_View_2488;
+    float PrePadding_View_2492;
+    float4 View_SkyLightColor;
+    float4 View_SkyIrradianceEnvironmentMap[7];
+    float View_MobilePreviewMode;
+    float View_HMDEyePaddingOffset;
+    float View_ReflectionCubemapMaxMip;
+    float View_ShowDecalsMask;
+    uint View_DistanceFieldAOSpecularOcclusionMode;
+    float View_IndirectCapsuleSelfShadowingIntensity;
+    float PrePadding_View_2648;
+    float PrePadding_View_2652;
+    packed_float3 View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight;
+    int View_StereoPassIndex;
+    float4 View_GlobalVolumeCenterAndExtent[4];
+    float4 View_GlobalVolumeWorldToUVAddAndMul[4];
+    float View_GlobalVolumeDimension;
+    float View_GlobalVolumeTexelSize;
+    float View_MaxGlobalDistance;
+    float View_bCheckerboardSubsurfaceProfileRendering;
+    packed_float3 View_VolumetricFogInvGridSize;
+    float PrePadding_View_2828;
+    packed_float3 View_VolumetricFogGridZParams;
+    float PrePadding_View_2844;
+    float2 View_VolumetricFogSVPosToVolumeUV;
+    float View_VolumetricFogMaxDistance;
+    float PrePadding_View_2860;
+    packed_float3 View_VolumetricLightmapWorldToUVScale;
+    float PrePadding_View_2876;
+    packed_float3 View_VolumetricLightmapWorldToUVAdd;
+    float PrePadding_View_2892;
+    packed_float3 View_VolumetricLightmapIndirectionTextureSize;
+    float View_VolumetricLightmapBrickSize;
+    packed_float3 View_VolumetricLightmapBrickTexelSize;
+    float View_StereoIPD;
+    float View_IndirectLightingCacheShowFlag;
+    float View_EyeToPixelSpreadAngle;
+};
+
+struct type_Primitive
+{
+    float4x4 Primitive_LocalToWorld;
+    float4 Primitive_InvNonUniformScaleAndDeterminantSign;
+    float4 Primitive_ObjectWorldPositionAndRadius;
+    float4x4 Primitive_WorldToLocal;
+    float4x4 Primitive_PreviousLocalToWorld;
+    float4x4 Primitive_PreviousWorldToLocal;
+    packed_float3 Primitive_ActorWorldPosition;
+    float Primitive_UseSingleSampleShadowFromStationaryLights;
+    packed_float3 Primitive_ObjectBounds;
+    float Primitive_LpvBiasMultiplier;
+    float Primitive_DecalReceiverMask;
+    float Primitive_PerObjectGBufferData;
+    float Primitive_UseVolumetricLightmapShadowFromStationaryLights;
+    float Primitive_UseEditorDepthTest;
+    float4 Primitive_ObjectOrientation;
+    float4 Primitive_NonUniformScale;
+    packed_float3 Primitive_LocalObjectBoundsMin;
+    float PrePadding_Primitive_380;
+    packed_float3 Primitive_LocalObjectBoundsMax;
+    uint Primitive_LightingChannelMask;
+    uint Primitive_LightmapDataIndex;
+    int Primitive_SingleCaptureIndex;
+};
+
+struct type_MobileShadowDepthPass
+{
+    float PrePadding_MobileShadowDepthPass_0;
+    float PrePadding_MobileShadowDepthPass_4;
+    float PrePadding_MobileShadowDepthPass_8;
+    float PrePadding_MobileShadowDepthPass_12;
+    float PrePadding_MobileShadowDepthPass_16;
+    float PrePadding_MobileShadowDepthPass_20;
+    float PrePadding_MobileShadowDepthPass_24;
+    float PrePadding_MobileShadowDepthPass_28;
+    float PrePadding_MobileShadowDepthPass_32;
+    float PrePadding_MobileShadowDepthPass_36;
+    float PrePadding_MobileShadowDepthPass_40;
+    float PrePadding_MobileShadowDepthPass_44;
+    float PrePadding_MobileShadowDepthPass_48;
+    float PrePadding_MobileShadowDepthPass_52;
+    float PrePadding_MobileShadowDepthPass_56;
+    float PrePadding_MobileShadowDepthPass_60;
+    float PrePadding_MobileShadowDepthPass_64;
+    float PrePadding_MobileShadowDepthPass_68;
+    float PrePadding_MobileShadowDepthPass_72;
+    float PrePadding_MobileShadowDepthPass_76;
+    float4x4 MobileShadowDepthPass_ProjectionMatrix;
+    float2 MobileShadowDepthPass_ShadowParams;
+    float MobileShadowDepthPass_bClampToNearPlane;
+    float PrePadding_MobileShadowDepthPass_156;
+    float4x4 MobileShadowDepthPass_ShadowViewProjectionMatrices[6];
+};
+
+struct type_EmitterDynamicUniforms
+{
+    float2 EmitterDynamicUniforms_LocalToWorldScale;
+    float EmitterDynamicUniforms_EmitterInstRandom;
+    float PrePadding_EmitterDynamicUniforms_12;
+    float4 EmitterDynamicUniforms_AxisLockRight;
+    float4 EmitterDynamicUniforms_AxisLockUp;
+    float4 EmitterDynamicUniforms_DynamicColor;
+    float4 EmitterDynamicUniforms_MacroUVParameters;
+};
+
+struct type_EmitterUniforms
+{
+    float4 EmitterUniforms_ColorCurve;
+    float4 EmitterUniforms_ColorScale;
+    float4 EmitterUniforms_ColorBias;
+    float4 EmitterUniforms_MiscCurve;
+    float4 EmitterUniforms_MiscScale;
+    float4 EmitterUniforms_MiscBias;
+    float4 EmitterUniforms_SizeBySpeed;
+    float4 EmitterUniforms_SubImageSize;
+    float4 EmitterUniforms_TangentSelector;
+    packed_float3 EmitterUniforms_CameraFacingBlend;
+    float EmitterUniforms_RemoveHMDRoll;
+    float EmitterUniforms_RotationRateScale;
+    float EmitterUniforms_RotationBias;
+    float EmitterUniforms_CameraMotionBlurAmount;
+    float PrePadding_EmitterUniforms_172;
+    float2 EmitterUniforms_PivotOffset;
+};
+
+struct type_Globals
+{
+    uint ParticleIndicesOffset;
+};
+
+struct main0_out
+{
+    float out_var_TEXCOORD6 [[user(locn0)]];
+    float4 gl_Position [[position]];
+};
+
+struct main0_in
+{
+    float2 in_var_ATTRIBUTE0 [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant type_View& View [[buffer(0)]], constant type_Primitive& Primitive [[buffer(1)]], constant type_MobileShadowDepthPass& MobileShadowDepthPass [[buffer(2)]], constant type_EmitterDynamicUniforms& EmitterDynamicUniforms [[buffer(3)]], constant type_EmitterUniforms& EmitterUniforms [[buffer(4)]], constant type_Globals& _Globals [[buffer(5)]], texture2d<float> ParticleIndices [[texture(0)]], texture2d<float> PositionTexture [[texture(1)]], texture2d<float> VelocityTexture [[texture(2)]], texture2d<float> AttributesTexture [[texture(3)]], texture2d<float> CurveTexture [[texture(4)]], sampler PositionTextureSampler [[sampler(0)]], sampler VelocityTextureSampler [[sampler(1)]], sampler AttributesTextureSampler [[sampler(2)]], sampler CurveTextureSampler [[sampler(3)]], uint gl_VertexIndex [[vertex_id]], uint gl_InstanceIndex [[instance_id]])
+{
+    main0_out out = {};
+    float2 _133 = ParticleIndices.read(spvTexelBufferCoord((_Globals.ParticleIndicesOffset + ((gl_InstanceIndex * 16u) + (gl_VertexIndex / 4u))))).xy;
+    float4 _137 = PositionTexture.sample(PositionTextureSampler, _133, level(0.0));
+    float4 _145 = AttributesTexture.sample(AttributesTextureSampler, _133, level(0.0));
+    float _146 = _137.w;
+    float3 _158 = float3x3(Primitive.Primitive_LocalToWorld[0].xyz, Primitive.Primitive_LocalToWorld[1].xyz, Primitive.Primitive_LocalToWorld[2].xyz) * VelocityTexture.sample(VelocityTextureSampler, _133, level(0.0)).xyz;
+    float3 _160 = fast::normalize(_158 + float3(0.0, 0.0, 9.9999997473787516355514526367188e-05));
+    float2 _204 = ((((_145.xy + float2((_145.x < 0.5) ? 0.0 : (-0.5), (_145.y < 0.5) ? 0.0 : (-0.5))) * float2(2.0)) * (((CurveTexture.sample(CurveTextureSampler, (EmitterUniforms.EmitterUniforms_MiscCurve.xy + (EmitterUniforms.EmitterUniforms_MiscCurve.zw * float2(_146))), level(0.0)) * EmitterUniforms.EmitterUniforms_MiscScale) + EmitterUniforms.EmitterUniforms_MiscBias).xy * EmitterDynamicUniforms.EmitterDynamicUniforms_LocalToWorldScale)) * fast::min(fast::max(EmitterUniforms.EmitterUniforms_SizeBySpeed.xy * float2(length(_158)), float2(1.0)), EmitterUniforms.EmitterUniforms_SizeBySpeed.zw)) * float2(step(_146, 1.0));
+    float3 _239 = float4((((Primitive.Primitive_LocalToWorld[0u].xyz * _137.xxx) + (Primitive.Primitive_LocalToWorld[1u].xyz * _137.yyy)) + (Primitive.Primitive_LocalToWorld[2u].xyz * _137.zzz)) + (Primitive.Primitive_LocalToWorld[3u].xyz + float3(View.View_PreViewTranslation)), 1.0).xyz;
+    float3 _242 = float3(EmitterUniforms.EmitterUniforms_RemoveHMDRoll);
+    float3 _251 = mix(mix(float3(View.View_ViewRight), float3(View.View_HMDViewNoRollRight), _242), EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.xyz, float3(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.w));
+    float3 _259 = mix(-mix(float3(View.View_ViewUp), float3(View.View_HMDViewNoRollUp), _242), EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockUp.xyz, float3(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockUp.w));
+    float3 _260 = float3(View.View_TranslatedWorldCameraOrigin) - _239;
+    float _261 = dot(_260, _260);
+    float3 _265 = _260 / float3(sqrt(fast::max(_261, 0.00999999977648258209228515625)));
+    float3 _335;
+    float3 _336;
+    if (EmitterUniforms.EmitterUniforms_CameraFacingBlend[0] > 0.0)
+    {
+        float3 _279 = cross(_265, float3(0.0, 0.0, 1.0));
+        float3 _284 = _279 / float3(sqrt(fast::max(dot(_279, _279), 0.00999999977648258209228515625)));
+        float3 _286 = float3(fast::clamp((_261 * EmitterUniforms.EmitterUniforms_CameraFacingBlend[1]) - EmitterUniforms.EmitterUniforms_CameraFacingBlend[2], 0.0, 1.0));
+        _335 = fast::normalize(mix(_251, _284, _286));
+        _336 = fast::normalize(mix(_259, cross(_265, _284), _286));
+    }
+    else
+    {
+        float3 _333;
+        float3 _334;
+        if (EmitterUniforms.EmitterUniforms_TangentSelector.y > 0.0)
+        {
+            float3 _297 = cross(_265, _160);
+            _333 = _297 / float3(sqrt(fast::max(dot(_297, _297), 0.00999999977648258209228515625)));
+            _334 = -_160;
+        }
+        else
+        {
+            float3 _331;
+            float3 _332;
+            if (EmitterUniforms.EmitterUniforms_TangentSelector.z > 0.0)
+            {
+                float3 _310 = cross(EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.xyz, _265);
+                _331 = EmitterDynamicUniforms.EmitterDynamicUniforms_AxisLockRight.xyz;
+                _332 = -(_310 / float3(sqrt(fast::max(dot(_310, _310), 0.00999999977648258209228515625))));
+            }
+            else
+            {
+                float3 _329;
+                float3 _330;
+                if (EmitterUniforms.EmitterUniforms_TangentSelector.w > 0.0)
+                {
+                    float3 _322 = cross(_265, float3(0.0, 0.0, 1.0));
+                    float3 _327 = _322 / float3(sqrt(fast::max(dot(_322, _322), 0.00999999977648258209228515625)));
+                    _329 = _327;
+                    _330 = cross(_265, _327);
+                }
+                else
+                {
+                    _329 = _251;
+                    _330 = _259;
+                }
+                _331 = _329;
+                _332 = _330;
+            }
+            _333 = _331;
+            _334 = _332;
+        }
+        _335 = _333;
+        _336 = _334;
+    }
+    float _339 = ((_145.z + ((_145.w * EmitterUniforms.EmitterUniforms_RotationRateScale) * _146)) * 6.283185482025146484375) + EmitterUniforms.EmitterUniforms_RotationBias;
+    float3 _342 = float3(sin(_339));
+    float3 _344 = float3(cos(_339));
+    float3 _367 = _239 + ((float3(_204.x * (in.in_var_ATTRIBUTE0.x + EmitterUniforms.EmitterUniforms_PivotOffset.x)) * ((_342 * _336) + (_344 * _335))) + (float3(_204.y * (in.in_var_ATTRIBUTE0.y + EmitterUniforms.EmitterUniforms_PivotOffset.y)) * ((_344 * _336) - (_342 * _335))));
+    float4 _371 = float4(_367, 1.0);
+    float4 _375 = MobileShadowDepthPass.MobileShadowDepthPass_ProjectionMatrix * float4(_371.x, _371.y, _371.z, _371.w);
+    float4 _386;
+    if ((MobileShadowDepthPass.MobileShadowDepthPass_bClampToNearPlane > 0.0) && (_375.z < 0.0))
+    {
+        float4 _384 = _375;
+        _384.z = 9.9999999747524270787835121154785e-07;
+        _384.w = 1.0;
+        _386 = _384;
+    }
+    else
+    {
+        _386 = _375;
+    }
+    float4 _396 = _386;
+    _396.z = ((_386.z * MobileShadowDepthPass.MobileShadowDepthPass_ShadowParams.y) + MobileShadowDepthPass.MobileShadowDepthPass_ShadowParams.x) * _386.w;
+    out.out_var_TEXCOORD6 = 0.0;
+    out.gl_Position = _396;
+    return out;
+}
+
diff --git a/reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk b/reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk
deleted file mode 100644
index 4aaf397a0fa..00000000000
--- a/reference/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag.vk
+++ /dev/null
@@ -1,11 +0,0 @@
-#version 450
-#extension GL_AMD_shader_fragment_mask : require
-
-layout(input_attachment_index = 0, set = 0, binding = 0) uniform subpassInputMS t;
-
-void main()
-{
-    vec4 test2 = fragmentFetchAMD(t, 4u);
-    uint testi2 = fragmentMaskFetchAMD(t);
-}
-
diff --git a/reference/shaders/amd/gcn_shader.comp b/reference/shaders/amd/gcn_shader.comp
index 1c0c5ae38bf..380fb9dfa7b 100644
--- a/reference/shaders/amd/gcn_shader.comp
+++ b/reference/shaders/amd/gcn_shader.comp
@@ -1,5 +1,11 @@
 #version 450
+#if defined(GL_ARB_gpu_shader_int64)
 #extension GL_ARB_gpu_shader_int64 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
 #extension GL_AMD_gcn_shader : require
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
diff --git a/reference/shaders/amd/shader_ballot.comp b/reference/shaders/amd/shader_ballot.comp
index 1fade727c6b..04d363457a0 100644
--- a/reference/shaders/amd/shader_ballot.comp
+++ b/reference/shaders/amd/shader_ballot.comp
@@ -1,5 +1,11 @@
 #version 450
+#if defined(GL_ARB_gpu_shader_int64)
 #extension GL_ARB_gpu_shader_int64 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
 #extension GL_ARB_shader_ballot : require
 #extension GL_AMD_shader_ballot : require
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
diff --git a/reference/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp b/reference/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp
deleted file mode 100644
index a14343ae127..00000000000
--- a/reference/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp
+++ /dev/null
@@ -1,11 +0,0 @@
-#version 450
-#extension GL_AMD_shader_ballot : require
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-void main()
-{
-    float addInvocations = addInvocationsNonUniformAMD(0.0);
-    int minInvocations = minInvocationsNonUniformAMD(1);
-    uint maxInvocations = uint(maxInvocationsNonUniformAMD(4));
-}
-
diff --git a/reference/shaders/asm/comp/bitcast_icmp.asm.comp b/reference/shaders/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000000..8d59fcc856a
--- /dev/null
+++ b/reference/shaders/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,27 @@
+#version 310 es
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) restrict buffer _3_5
+{
+    ivec4 _m0;
+    uvec4 _m1;
+} _5;
+
+layout(binding = 1, std430) restrict buffer _4_6
+{
+    uvec4 _m0;
+    ivec4 _m1;
+} _6;
+
+void main()
+{
+    _6._m0 = uvec4(lessThan(ivec4(_5._m1), _5._m0));
+    _6._m0 = uvec4(lessThanEqual(ivec4(_5._m1), _5._m0));
+    _6._m0 = uvec4(lessThan(_5._m1, uvec4(_5._m0)));
+    _6._m0 = uvec4(lessThanEqual(_5._m1, uvec4(_5._m0)));
+    _6._m0 = uvec4(greaterThan(ivec4(_5._m1), _5._m0));
+    _6._m0 = uvec4(greaterThanEqual(ivec4(_5._m1), _5._m0));
+    _6._m0 = uvec4(greaterThan(_5._m1, uvec4(_5._m0)));
+    _6._m0 = uvec4(greaterThanEqual(_5._m1, uvec4(_5._m0)));
+}
+
diff --git a/reference/shaders/asm/comp/bitcast_iequal.asm.comp b/reference/shaders/asm/comp/bitcast_iequal.asm.comp
index bdb3eeb9afd..8a552dba068 100644
--- a/reference/shaders/asm/comp/bitcast_iequal.asm.comp
+++ b/reference/shaders/asm/comp/bitcast_iequal.asm.comp
@@ -21,13 +21,13 @@ void main()
     bvec4 _35 = equal(_30, ivec4(_31));
     bvec4 _36 = equal(_31, _31);
     bvec4 _37 = equal(_30, _30);
-    _6._m0 = mix(uvec4(0u), uvec4(1u), _34);
-    _6._m0 = mix(uvec4(0u), uvec4(1u), _35);
-    _6._m0 = mix(uvec4(0u), uvec4(1u), _36);
-    _6._m0 = mix(uvec4(0u), uvec4(1u), _37);
-    _6._m1 = mix(ivec4(0), ivec4(1), _34);
-    _6._m1 = mix(ivec4(0), ivec4(1), _35);
-    _6._m1 = mix(ivec4(0), ivec4(1), _36);
-    _6._m1 = mix(ivec4(0), ivec4(1), _37);
+    _6._m0 = uvec4(_34);
+    _6._m0 = uvec4(_35);
+    _6._m0 = uvec4(_36);
+    _6._m0 = uvec4(_37);
+    _6._m1 = ivec4(_34);
+    _6._m1 = ivec4(_35);
+    _6._m1 = ivec4(_36);
+    _6._m1 = ivec4(_37);
 }
 
diff --git a/reference/shaders/asm/comp/block-name-alias-global.asm.comp b/reference/shaders/asm/comp/block-name-alias-global.asm.comp
index 08fccbcde6b..86ba0a3b9f9 100644
--- a/reference/shaders/asm/comp/block-name-alias-global.asm.comp
+++ b/reference/shaders/asm/comp/block-name-alias-global.asm.comp
@@ -7,12 +7,6 @@ struct A
     int b;
 };
 
-struct A_1
-{
-    int a;
-    int b;
-};
-
 layout(binding = 1, std430) buffer C1
 {
     A Data[];
@@ -20,7 +14,7 @@ layout(binding = 1, std430) buffer C1
 
 layout(binding = 2, std140) uniform C2
 {
-    A_1 Data[1024];
+    A Data[1024];
 } C2_1;
 
 layout(binding = 0, std430) buffer B
@@ -30,7 +24,7 @@ layout(binding = 0, std430) buffer B
 
 layout(binding = 3, std140) uniform B
 {
-    A_1 Data[1024];
+    A Data[1024];
 } C4;
 
 void main()
diff --git a/reference/shaders/asm/comp/control-flow-hints.asm.comp b/reference/shaders/asm/comp/control-flow-hints.asm.comp
new file mode 100644
index 00000000000..a1e0a082252
--- /dev/null
+++ b/reference/shaders/asm/comp/control-flow-hints.asm.comp
@@ -0,0 +1,58 @@
+#version 450
+#if defined(GL_EXT_control_flow_attributes)
+#extension GL_EXT_control_flow_attributes : require
+#define SPIRV_CROSS_FLATTEN [[flatten]]
+#define SPIRV_CROSS_BRANCH [[dont_flatten]]
+#define SPIRV_CROSS_UNROLL [[unroll]]
+#define SPIRV_CROSS_LOOP [[dont_unroll]]
+#else
+#define SPIRV_CROSS_FLATTEN
+#define SPIRV_CROSS_BRANCH
+#define SPIRV_CROSS_UNROLL
+#define SPIRV_CROSS_LOOP
+#endif
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer bar
+{
+    vec4 _data[];
+} bar_1;
+
+layout(binding = 1, std430) buffer foo
+{
+    vec4 _data[];
+} foo_1;
+
+void _main()
+{
+    SPIRV_CROSS_UNROLL
+    for (int i = 0; i < 16; i++)
+    {
+        bar_1._data[i] = foo_1._data[i];
+    }
+    SPIRV_CROSS_LOOP
+    for (int i_1 = 0; i_1 < 16; i_1++)
+    {
+        bar_1._data[15 - i_1] = foo_1._data[i_1];
+    }
+    float v = bar_1._data[10].x;
+    float w = foo_1._data[10].x;
+    SPIRV_CROSS_BRANCH
+    if (v > 10.0)
+    {
+        foo_1._data[20] = vec4(5.0);
+    }
+    float value = 20.0;
+    SPIRV_CROSS_FLATTEN
+    if (w > 40.0)
+    {
+        value = 20.0;
+    }
+    foo_1._data[20] = vec4(value);
+}
+
+void main()
+{
+    _main();
+}
+
diff --git a/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000000..449a87d0dd3
--- /dev/null
+++ b/reference/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,39 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float a1;
+    vec2 a2;
+    vec3 a3;
+    vec4 a4;
+    float b1;
+    vec2 b2;
+    vec3 b3;
+    vec4 b4;
+    float c1;
+    vec2 c2;
+    vec3 c3;
+    vec4 c4;
+} _4;
+
+void main()
+{
+    _4.a1 = min(_4.b1, _4.c1);
+    _4.a2 = min(_4.b2, _4.c2);
+    _4.a3 = min(_4.b3, _4.c3);
+    _4.a4 = min(_4.b4, _4.c4);
+    _4.a1 = max(_4.b1, _4.c1);
+    _4.a2 = max(_4.b2, _4.c2);
+    _4.a3 = max(_4.b3, _4.c3);
+    _4.a4 = max(_4.b4, _4.c4);
+    _4.a1 = clamp(_4.a1, _4.b1, _4.c1);
+    _4.a2 = clamp(_4.a2, _4.b2, _4.c2);
+    _4.a3 = clamp(_4.a3, _4.b3, _4.c3);
+    _4.a4 = clamp(_4.a4, _4.b4, _4.c4);
+    for (int i = 0; i < 2; i++, _4.a1 = clamp(_4.a1, _4.b2.x, _4.b2.y))
+    {
+        _4.a2 = min(_4.b2, _4.c2);
+    }
+}
+
diff --git a/reference/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp b/reference/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp
deleted file mode 100644
index 482cfd8a069..00000000000
--- a/reference/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp
+++ /dev/null
@@ -1,28 +0,0 @@
-#version 450
-
-#ifndef SPIRV_CROSS_CONSTANT_ID_0
-#define SPIRV_CROSS_CONSTANT_ID_0 1u
-#endif
-#ifndef SPIRV_CROSS_CONSTANT_ID_2
-#define SPIRV_CROSS_CONSTANT_ID_2 3u
-#endif
-
-layout(local_size_x = SPIRV_CROSS_CONSTANT_ID_0, local_size_y = 2, local_size_z = SPIRV_CROSS_CONSTANT_ID_2) in;
-
-layout(binding = 0, std430) buffer _6_8
-{
-    float _m0[];
-} _8;
-
-layout(binding = 1, std430) buffer _6_9
-{
-    float _m0[];
-} _9;
-
-uvec3 _22 = gl_WorkGroupSize;
-
-void main()
-{
-    _8._m0[gl_WorkGroupID.x] = _9._m0[gl_WorkGroupID.x] + _8._m0[gl_WorkGroupID.x];
-}
-
diff --git a/reference/opt/shaders/asm/comp/switch-break-ladder.asm.comp b/reference/shaders/asm/comp/switch-break-ladder.asm.invalid.comp
similarity index 83%
rename from reference/opt/shaders/asm/comp/switch-break-ladder.asm.comp
rename to reference/shaders/asm/comp/switch-break-ladder.asm.invalid.comp
index 4cf3f126d6a..f326869cef5 100644
--- a/reference/opt/shaders/asm/comp/switch-break-ladder.asm.comp
+++ b/reference/shaders/asm/comp/switch-break-ladder.asm.invalid.comp
@@ -10,13 +10,12 @@ layout(binding = 0, std430) buffer BUF
 
 void main()
 {
-    int _44;
-    _44 = o.a;
-    int _48;
+    int c = o.a;
+    int a;
     for (;;)
     {
         bool _22_ladder_break = false;
-        switch (_44)
+        switch (c)
         {
             case 5:
             {
@@ -28,6 +27,7 @@ void main()
                         case 10:
                         case 20:
                         {
+                            c += c;
                             _30_ladder_break = true;
                             break;
                         }
@@ -41,29 +41,24 @@ void main()
                         break;
                     }
                 }
-                _48 = _44 + _44;
                 break;
             }
             case 1:
             case 2:
             case 3:
             {
+                a = c;
                 _22_ladder_break = true;
                 break;
             }
-            default:
-            {
-                _48 = _44;
-                break;
-            }
         }
         if (_22_ladder_break)
         {
             break;
         }
-        _44 = _48 + 1;
+        c++;
         continue;
     }
-    o.b = _44;
+    o.b = a;
 }
 
diff --git a/reference/shaders/asm/comp/undefined-constant-composite.asm.comp b/reference/shaders/asm/comp/undefined-constant-composite.asm.comp
new file mode 100644
index 00000000000..dae558dc84a
--- /dev/null
+++ b/reference/shaders/asm/comp/undefined-constant-composite.asm.comp
@@ -0,0 +1,33 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+struct _20
+{
+    int _m0;
+    int _m1;
+};
+
+int _28;
+
+layout(binding = 1, std430) buffer _5_6
+{
+    int _m0[10];
+} _6;
+
+layout(binding = 0, std430) buffer _7_8
+{
+    int _m0[10];
+} _8;
+
+int _39(int _41, _20 _42)
+{
+    return _41 + _42._m1;
+}
+
+void main()
+{
+    int _32 = _8._m0[gl_GlobalInvocationID.x];
+    _20 _33 = _20(_28, 200);
+    _6._m0[gl_GlobalInvocationID.x] = _39(_32, _33);
+}
+
diff --git a/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp b/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp
deleted file mode 100644
index 7755593f573..00000000000
--- a/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp
+++ /dev/null
@@ -1,18 +0,0 @@
-#version 430
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-layout(binding = 0, std430) buffer _8_9
-{
-    float _m0[];
-} _9;
-
-layout(binding = 1, std430) buffer _8_10
-{
-    float _m0[];
-} _10;
-
-void main()
-{
-    _10._m0[gl_GlobalInvocationID.x] = -_9._m0[gl_GlobalInvocationID.x];
-}
-
diff --git a/reference/shaders/asm/frag/for-loop-phi-only-continue.asm.frag b/reference/shaders/asm/frag/for-loop-phi-only-continue.asm.frag
index feb45db4405..31011429448 100644
--- a/reference/shaders/asm/frag/for-loop-phi-only-continue.asm.frag
+++ b/reference/shaders/asm/frag/for-loop-phi-only-continue.asm.frag
@@ -12,7 +12,6 @@ void main()
     {
         _20 = _19 + 1.0;
         _23 = _22 + 1;
-        continue;
     }
     FragColor = vec4(_19);
 }
diff --git a/reference/shaders/asm/frag/hlsl-sample-cmp-level-zero-cube.asm.frag b/reference/shaders/asm/frag/hlsl-sample-cmp-level-zero-cube.asm.frag
index 3585285eb66..ec89d44eef9 100644
--- a/reference/shaders/asm/frag/hlsl-sample-cmp-level-zero-cube.asm.frag
+++ b/reference/shaders/asm/frag/hlsl-sample-cmp-level-zero-cube.asm.frag
@@ -6,7 +6,7 @@ layout(location = 0) out float _entryPointOutput;
 
 float _main()
 {
-    vec4 _33 = vec4(vec3(0.100000001490116119384765625), 0.5);
+    vec4 _33 = vec4(0.100000001490116119384765625, 0.100000001490116119384765625, 0.100000001490116119384765625, 0.5);
     return textureGrad(SPIRV_Cross_CombinedpointLightShadowMapshadowSamplerPCF, vec4(_33.xyz, _33.w), vec3(0.0), vec3(0.0));
 }
 
diff --git a/reference/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag b/reference/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag
new file mode 100644
index 00000000000..60bb78aa5c0
--- /dev/null
+++ b/reference/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag
@@ -0,0 +1,38 @@
+#version 450
+
+uniform sampler2D SPIRV_Cross_CombinedparamSPIRV_Cross_DummySampler;
+uniform sampler2D SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler;
+uniform sampler2D SPIRV_Cross_CombinedparamSampler;
+uniform sampler2D SPIRV_Cross_CombinedSampledImageSampler;
+
+layout(location = 0) out vec4 _entryPointOutput;
+
+vec4 sample_fetch(ivec3 UV, sampler2D SPIRV_Cross_CombinedtexSPIRV_Cross_DummySampler)
+{
+    return texelFetch(SPIRV_Cross_CombinedtexSPIRV_Cross_DummySampler, UV.xy, UV.z);
+}
+
+vec4 sample_sampler(vec2 UV, sampler2D SPIRV_Cross_CombinedtexSampler)
+{
+    return texture(SPIRV_Cross_CombinedtexSampler, UV);
+}
+
+vec4 _main(vec4 xIn)
+{
+    ivec3 coord = ivec3(int(xIn.x * 1280.0), int(xIn.y * 720.0), 0);
+    ivec3 param = coord;
+    vec4 value = sample_fetch(param, SPIRV_Cross_CombinedparamSPIRV_Cross_DummySampler);
+    value += texelFetch(SPIRV_Cross_CombinedSampledImageSPIRV_Cross_DummySampler, coord.xy, coord.z);
+    vec2 param_1 = xIn.xy;
+    value += sample_sampler(param_1, SPIRV_Cross_CombinedparamSampler);
+    value += texture(SPIRV_Cross_CombinedSampledImageSampler, xIn.xy);
+    return value;
+}
+
+void main()
+{
+    vec4 xIn = gl_FragCoord;
+    vec4 param = xIn;
+    _entryPointOutput = _main(param);
+}
+
diff --git a/reference/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag.vk b/reference/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag.vk
new file mode 100644
index 00000000000..e4d9fc4543e
--- /dev/null
+++ b/reference/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag.vk
@@ -0,0 +1,37 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler Sampler;
+layout(set = 0, binding = 0) uniform texture2D SampledImage;
+layout(set = 0, binding = 0) uniform sampler SPIRV_Cross_DummySampler;
+
+layout(location = 0) out vec4 _entryPointOutput;
+
+vec4 sample_fetch(texture2D tex, ivec3 UV)
+{
+    return texelFetch(sampler2D(tex, SPIRV_Cross_DummySampler), UV.xy, UV.z);
+}
+
+vec4 sample_sampler(texture2D tex, vec2 UV)
+{
+    return texture(sampler2D(tex, Sampler), UV);
+}
+
+vec4 _main(vec4 xIn)
+{
+    ivec3 coord = ivec3(int(xIn.x * 1280.0), int(xIn.y * 720.0), 0);
+    ivec3 param = coord;
+    vec4 value = sample_fetch(SampledImage, param);
+    value += texelFetch(sampler2D(SampledImage, SPIRV_Cross_DummySampler), coord.xy, coord.z);
+    vec2 param_1 = xIn.xy;
+    value += sample_sampler(SampledImage, param_1);
+    value += texture(sampler2D(SampledImage, Sampler), xIn.xy);
+    return value;
+}
+
+void main()
+{
+    vec4 xIn = gl_FragCoord;
+    vec4 param = xIn;
+    _entryPointOutput = _main(param);
+}
+
diff --git a/reference/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag b/reference/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag
new file mode 100644
index 00000000000..2040dd1afb7
--- /dev/null
+++ b/reference/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag
@@ -0,0 +1,13 @@
+#version 450
+
+uniform sampler2D SPIRV_Cross_CombineduSampler2DSPIRV_Cross_DummySampler;
+uniform sampler2DMS SPIRV_Cross_CombineduSampler2DMSSPIRV_Cross_DummySampler;
+
+void main()
+{
+    ivec2 b = textureSize(SPIRV_Cross_CombineduSampler2DSPIRV_Cross_DummySampler, 0);
+    ivec2 c = textureSize(SPIRV_Cross_CombineduSampler2DMSSPIRV_Cross_DummySampler);
+    int l1 = textureQueryLevels(SPIRV_Cross_CombineduSampler2DSPIRV_Cross_DummySampler);
+    int s0 = textureSamples(SPIRV_Cross_CombineduSampler2DMSSPIRV_Cross_DummySampler);
+}
+
diff --git a/reference/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag.vk b/reference/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag.vk
new file mode 100644
index 00000000000..828d2a87271
--- /dev/null
+++ b/reference/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag.vk
@@ -0,0 +1,14 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform texture2D uSampler2D;
+layout(set = 0, binding = 0) uniform texture2DMS uSampler2DMS;
+layout(set = 0, binding = 0) uniform sampler SPIRV_Cross_DummySampler;
+
+void main()
+{
+    ivec2 b = textureSize(sampler2D(uSampler2D, SPIRV_Cross_DummySampler), 0);
+    ivec2 c = textureSize(sampler2DMS(uSampler2DMS, SPIRV_Cross_DummySampler));
+    int l1 = textureQueryLevels(sampler2D(uSampler2D, SPIRV_Cross_DummySampler));
+    int s0 = textureSamples(sampler2DMS(uSampler2DMS, SPIRV_Cross_DummySampler));
+}
+
diff --git a/reference/shaders/asm/frag/inf-nan-constant-double.asm.frag b/reference/shaders/asm/frag/inf-nan-constant-double.asm.frag
index d8e29aa4041..e53b282f879 100644
--- a/reference/shaders/asm/frag/inf-nan-constant-double.asm.frag
+++ b/reference/shaders/asm/frag/inf-nan-constant-double.asm.frag
@@ -1,11 +1,17 @@
 #version 450
+#if defined(GL_ARB_gpu_shader_int64)
 #extension GL_ARB_gpu_shader_int64 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
 
 layout(location = 0) out vec3 FragColor;
 layout(location = 0) flat in double vTmp;
 
 void main()
 {
-    FragColor = vec3(dvec3(uint64BitsToDouble(0x7ff0000000000000ul), uint64BitsToDouble(0xfff0000000000000ul), uint64BitsToDouble(0x7ff8000000000000ul)) + dvec3(vTmp));
+    FragColor = vec3(dvec3(uint64BitsToDouble(0x7ff0000000000000ul /* inf */), uint64BitsToDouble(0xfff0000000000000ul /* -inf */), uint64BitsToDouble(0x7ff8000000000000ul /* nan */)) + dvec3(vTmp));
 }
 
diff --git a/reference/shaders/asm/frag/inf-nan-constant.asm.frag b/reference/shaders/asm/frag/inf-nan-constant.asm.frag
index dd4284c9b11..b5e0c6e968b 100644
--- a/reference/shaders/asm/frag/inf-nan-constant.asm.frag
+++ b/reference/shaders/asm/frag/inf-nan-constant.asm.frag
@@ -6,6 +6,6 @@ layout(location = 0) out highp vec3 FragColor;
 
 void main()
 {
-    FragColor = vec3(uintBitsToFloat(0x7f800000u), uintBitsToFloat(0xff800000u), uintBitsToFloat(0x7fc00000u));
+    FragColor = vec3(uintBitsToFloat(0x7f800000u /* inf */), uintBitsToFloat(0xff800000u /* -inf */), uintBitsToFloat(0x7fc00000u /* nan */));
 }
 
diff --git a/reference/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag b/reference/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag
index e1edccff69e..3ee68e321d9 100644
--- a/reference/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag
+++ b/reference/shaders/asm/frag/loop-body-dominator-continue-access.asm.frag
@@ -1,4 +1,16 @@
 #version 450
+#if defined(GL_EXT_control_flow_attributes)
+#extension GL_EXT_control_flow_attributes : require
+#define SPIRV_CROSS_FLATTEN [[flatten]]
+#define SPIRV_CROSS_BRANCH [[dont_flatten]]
+#define SPIRV_CROSS_UNROLL [[unroll]]
+#define SPIRV_CROSS_LOOP [[dont_unroll]]
+#else
+#define SPIRV_CROSS_FLATTEN
+#define SPIRV_CROSS_BRANCH
+#define SPIRV_CROSS_UNROLL
+#define SPIRV_CROSS_LOOP
+#endif
 
 layout(binding = 0, std140) uniform Foo
 {
@@ -10,6 +22,8 @@ layout(binding = 0, std140) uniform Foo
 layout(location = 0) in vec3 fragWorld;
 layout(location = 0) out int _entryPointOutput;
 
+mat4 spvWorkaroundRowMajor(mat4 wrap) { return wrap; }
+
 mat4 GetClip2TexMatrix()
 {
     if (_11.test == 0)
@@ -21,9 +35,10 @@ mat4 GetClip2TexMatrix()
 
 int GetCascade(vec3 fragWorldPosition)
 {
+    SPIRV_CROSS_UNROLL
     for (uint cascadeIndex = 0u; cascadeIndex < _11.shadowCascadesNum; cascadeIndex++)
     {
-        mat4 worldToShadowMap = GetClip2TexMatrix() * _11.lightVP[cascadeIndex];
+        mat4 worldToShadowMap = GetClip2TexMatrix() * spvWorkaroundRowMajor(_11.lightVP[cascadeIndex]);
         vec4 fragShadowMapPos = worldToShadowMap * vec4(fragWorldPosition, 1.0);
         if ((((fragShadowMapPos.z >= 0.0) && (fragShadowMapPos.z <= 1.0)) && (max(fragShadowMapPos.x, fragShadowMapPos.y) <= 1.0)) && (min(fragShadowMapPos.x, fragShadowMapPos.y) >= 0.0))
         {
diff --git a/reference/shaders/asm/frag/loop-header-to-continue.asm.frag b/reference/shaders/asm/frag/loop-header-to-continue.asm.frag
index a99322d67e1..8a3b664bc44 100644
--- a/reference/shaders/asm/frag/loop-header-to-continue.asm.frag
+++ b/reference/shaders/asm/frag/loop-header-to-continue.asm.frag
@@ -22,8 +22,10 @@ layout(location = 0) out vec4 _entryPointOutput;
 
 void main()
 {
+    vec2 _45 = vec2(0.0, _8.CB1.TextureSize.w);
     vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv);
     float _50 = _49.y;
+    float _53 = clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375);
     float _55;
     float _58;
     _55 = 0.0;
@@ -31,8 +33,8 @@ void main()
     for (int _60 = -3; _60 <= 3; )
     {
         float _64 = float(_60);
-        vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64));
-        float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375));
+        vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (_45 * _64));
+        float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < _53);
         _55 += (_72.x * _78);
         _58 += _78;
         _60++;
diff --git a/reference/shaders/asm/frag/op-phi-swap-continue-block.asm.frag b/reference/shaders/asm/frag/op-phi-swap-continue-block.asm.frag
index 3dae3e161c2..d62b63a0e0a 100644
--- a/reference/shaders/asm/frag/op-phi-swap-continue-block.asm.frag
+++ b/reference/shaders/asm/frag/op-phi-swap-continue-block.asm.frag
@@ -18,7 +18,6 @@ void main()
     _24 = _5.uJ;
     for (int _26 = 0; _26 < _5.uCount; _23_copy = _23, _23 = _24, _24 = _23_copy, _26++)
     {
-        continue;
     }
     FragColor = float(_24 - _23) * float(_5.uJ * _5.uK);
 }
diff --git a/reference/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag b/reference/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag
new file mode 100644
index 00000000000..eddb3829b70
--- /dev/null
+++ b/reference/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag
@@ -0,0 +1,15 @@
+#version 320 es
+precision mediump float;
+precision highp int;
+
+const uint _15 = 3u;
+
+void main()
+{
+    vec3 v = vec3(0.0);
+    if (false)
+    {
+        v[0] = 99.0;
+    }
+}
+
diff --git a/reference/shaders/asm/frag/out-of-bounds-access.asm.frag b/reference/shaders/asm/frag/out-of-bounds-access.asm.frag
new file mode 100644
index 00000000000..080283d4120
--- /dev/null
+++ b/reference/shaders/asm/frag/out-of-bounds-access.asm.frag
@@ -0,0 +1,14 @@
+#version 320 es
+precision mediump float;
+precision highp int;
+
+void main()
+{
+    vec3 v = vec3(0.0);
+    if (false)
+    {
+        v.x = 99.0;
+        v.x = 88.0;
+    }
+}
+
diff --git a/reference/shaders/asm/frag/pack-and-unpack-uint2.asm.frag b/reference/shaders/asm/frag/pack-and-unpack-uint2.asm.frag
new file mode 100644
index 00000000000..039c2b598d5
--- /dev/null
+++ b/reference/shaders/asm/frag/pack-and-unpack-uint2.asm.frag
@@ -0,0 +1,18 @@
+#version 450
+#if defined(GL_ARB_gpu_shader_int64)
+#extension GL_ARB_gpu_shader_int64 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    uint64_t _packed = packUint2x32(uvec2(18u, 52u));
+    uvec2 unpacked = unpackUint2x32(_packed);
+    FragColor = vec4(float(unpacked.x), float(unpacked.y), 1.0, 1.0);
+}
+
diff --git a/reference/shaders/asm/frag/storage-class-output-initializer.asm.frag b/reference/shaders/asm/frag/storage-class-output-initializer.asm.frag
index 229358757aa..a5faaefb309 100644
--- a/reference/shaders/asm/frag/storage-class-output-initializer.asm.frag
+++ b/reference/shaders/asm/frag/storage-class-output-initializer.asm.frag
@@ -2,10 +2,12 @@
 
 layout(location = 0) out vec4 FragColors[2];
 layout(location = 2) out vec4 FragColor;
+const vec4 _3_init[2] = vec4[](vec4(1.0, 2.0, 3.0, 4.0), vec4(10.0));
+const vec4 _4_init = vec4(5.0);
 
 void main()
 {
-    FragColors = vec4[](vec4(1.0, 2.0, 3.0, 4.0), vec4(10.0));
-    FragColor = vec4(5.0);
+    FragColors = _3_init;
+    FragColor = _4_init;
 }
 
diff --git a/reference/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag b/reference/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag
index b2473f4d037..7930ca3b4a0 100644
--- a/reference/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag
+++ b/reference/shaders/asm/frag/struct-composite-extract-swizzle.asm.frag
@@ -8,12 +8,12 @@ struct Foo
     float var2;
 };
 
+Foo _22;
+
 layout(binding = 0) uniform mediump sampler2D uSampler;
 
 layout(location = 0) out vec4 FragColor;
 
-Foo _22;
-
 void main()
 {
     FragColor = texture(uSampler, vec2(_22.var1, _22.var2));
diff --git a/reference/shaders/asm/frag/switch-preserve-sign-extension.asm.frag b/reference/shaders/asm/frag/switch-preserve-sign-extension.asm.frag
new file mode 100644
index 00000000000..08921e1e923
--- /dev/null
+++ b/reference/shaders/asm/frag/switch-preserve-sign-extension.asm.frag
@@ -0,0 +1,27 @@
+#version 330
+#ifdef GL_ARB_shading_language_420pack
+#extension GL_ARB_shading_language_420pack : require
+#endif
+
+void main()
+{
+    int sw = 42;
+    int result = 0;
+    switch (sw)
+    {
+        case -42:
+        {
+            result = 42;
+        }
+        case 420:
+        {
+            result = 420;
+        }
+        case -1234:
+        {
+            result = 420;
+            break;
+        }
+    }
+}
+
diff --git a/reference/shaders/asm/frag/undef-variable-store.asm.frag b/reference/shaders/asm/frag/undef-variable-store.asm.frag
index 26ad568ad05..a3b1290de6b 100644
--- a/reference/shaders/asm/frag/undef-variable-store.asm.frag
+++ b/reference/shaders/asm/frag/undef-variable-store.asm.frag
@@ -1,10 +1,10 @@
 #version 450
 
-layout(location = 0) out vec4 _entryPointOutput;
-
 vec4 _38;
 vec4 _47;
 
+layout(location = 0) out vec4 _entryPointOutput;
+
 void main()
 {
     vec4 _27;
diff --git a/reference/shaders/asm/frag/unreachable.asm.frag b/reference/shaders/asm/frag/unreachable.asm.frag
index 8bc88b9f0ad..beb8708e160 100644
--- a/reference/shaders/asm/frag/unreachable.asm.frag
+++ b/reference/shaders/asm/frag/unreachable.asm.frag
@@ -1,10 +1,10 @@
 #version 450
 
+vec4 _21;
+
 layout(location = 0) flat in int counter;
 layout(location = 0) out vec4 FragColor;
 
-vec4 _21;
-
 void main()
 {
     vec4 _24;
diff --git a/reference/shaders/asm/frag/vector-shuffle-oom.asm.frag b/reference/shaders/asm/frag/vector-shuffle-oom.asm.frag
index cdaf78727ee..97c9a2eb9cf 100644
--- a/reference/shaders/asm/frag/vector-shuffle-oom.asm.frag
+++ b/reference/shaders/asm/frag/vector-shuffle-oom.asm.frag
@@ -1,10 +1,24 @@
 #version 450
+#if defined(GL_EXT_control_flow_attributes)
+#extension GL_EXT_control_flow_attributes : require
+#define SPIRV_CROSS_FLATTEN [[flatten]]
+#define SPIRV_CROSS_BRANCH [[dont_flatten]]
+#define SPIRV_CROSS_UNROLL [[unroll]]
+#define SPIRV_CROSS_LOOP [[dont_unroll]]
+#else
+#define SPIRV_CROSS_FLATTEN
+#define SPIRV_CROSS_BRANCH
+#define SPIRV_CROSS_UNROLL
+#define SPIRV_CROSS_LOOP
+#endif
 
 struct _28
 {
     vec4 _m0;
 };
 
+_28 _74;
+
 layout(binding = 0, std140) uniform _6_7
 {
     vec4 _m0;
@@ -90,11 +104,9 @@ uniform sampler2D SPIRV_Cross_Combined_2;
 
 layout(location = 0) out vec4 _5;
 
-_28 _74;
-
 void main()
 {
-    _28 _77 = _74;
+    _28 _77;
     _77._m0 = vec4(0.0);
     vec2 _82 = gl_FragCoord.xy * _19._m23.xy;
     vec4 _88 = _7._m2 * _7._m0.xyxy;
@@ -102,6 +114,7 @@ void main()
     vec3 _109 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _97, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _113 = textureLod(SPIRV_Cross_Combined_1, _97, 0.0);
     vec3 _129;
+    SPIRV_CROSS_BRANCH
     if (_113.y > 0.0)
     {
         _129 = _109 + (textureLod(SPIRV_Cross_Combined_2, _97, 0.0).xyz * clamp(_113.y * _113.z, 0.0, 1.0));
@@ -110,15 +123,14 @@ void main()
     {
         _129 = _109;
     }
-    vec3 _130 = _129 * 0.5;
-    vec3 _133 = vec4(0.0).xyz + _130;
+    vec3 _133 = vec4(0.0).xyz + (_129 * 0.5);
     vec4 _134 = vec4(_133.x, _133.y, _133.z, vec4(0.0).w);
-    _28 _135 = _77;
-    _135._m0 = _134;
+    _77._m0 = _134;
     vec2 _144 = clamp(_82 + (vec3(-1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     vec3 _156 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _144, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _160 = textureLod(SPIRV_Cross_Combined_1, _144, 0.0);
     vec3 _176;
+    SPIRV_CROSS_BRANCH
     if (_160.y > 0.0)
     {
         _176 = _156 + (textureLod(SPIRV_Cross_Combined_2, _144, 0.0).xyz * clamp(_160.y * _160.z, 0.0, 1.0));
@@ -127,15 +139,14 @@ void main()
     {
         _176 = _156;
     }
-    vec3 _177 = _176 * 0.5;
-    vec3 _180 = _134.xyz + _177;
+    vec3 _180 = _134.xyz + (_176 * 0.5);
     vec4 _181 = vec4(_180.x, _180.y, _180.z, _134.w);
-    _28 _182 = _135;
-    _182._m0 = _181;
+    _77._m0 = _181;
     vec2 _191 = clamp(_82 + (vec3(0.0, -1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
     vec3 _203 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _191, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _207 = textureLod(SPIRV_Cross_Combined_1, _191, 0.0);
     vec3 _223;
+    SPIRV_CROSS_BRANCH
     if (_207.y > 0.0)
     {
         _223 = _203 + (textureLod(SPIRV_Cross_Combined_2, _191, 0.0).xyz * clamp(_207.y * _207.z, 0.0, 1.0));
@@ -144,15 +155,14 @@ void main()
     {
         _223 = _203;
     }
-    vec3 _224 = _223 * 0.75;
-    vec3 _227 = _181.xyz + _224;
+    vec3 _227 = _181.xyz + (_223 * 0.75);
     vec4 _228 = vec4(_227.x, _227.y, _227.z, _181.w);
-    _28 _229 = _182;
-    _229._m0 = _228;
+    _77._m0 = _228;
     vec2 _238 = clamp(_82 + (vec3(1.0, -1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     vec3 _250 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _238, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _254 = textureLod(SPIRV_Cross_Combined_1, _238, 0.0);
     vec3 _270;
+    SPIRV_CROSS_BRANCH
     if (_254.y > 0.0)
     {
         _270 = _250 + (textureLod(SPIRV_Cross_Combined_2, _238, 0.0).xyz * clamp(_254.y * _254.z, 0.0, 1.0));
@@ -161,15 +171,14 @@ void main()
     {
         _270 = _250;
     }
-    vec3 _271 = _270 * 0.5;
-    vec3 _274 = _228.xyz + _271;
+    vec3 _274 = _228.xyz + (_270 * 0.5);
     vec4 _275 = vec4(_274.x, _274.y, _274.z, _228.w);
-    _28 _276 = _229;
-    _276._m0 = _275;
+    _77._m0 = _275;
     vec2 _285 = clamp(_82 + (vec3(-2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     vec3 _297 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _285, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _301 = textureLod(SPIRV_Cross_Combined_1, _285, 0.0);
     vec3 _317;
+    SPIRV_CROSS_BRANCH
     if (_301.y > 0.0)
     {
         _317 = _297 + (textureLod(SPIRV_Cross_Combined_2, _285, 0.0).xyz * clamp(_301.y * _301.z, 0.0, 1.0));
@@ -178,15 +187,14 @@ void main()
     {
         _317 = _297;
     }
-    vec3 _318 = _317 * 0.5;
-    vec3 _321 = _275.xyz + _318;
+    vec3 _321 = _275.xyz + (_317 * 0.5);
     vec4 _322 = vec4(_321.x, _321.y, _321.z, _275.w);
-    _28 _323 = _276;
-    _323._m0 = _322;
+    _77._m0 = _322;
     vec2 _332 = clamp(_82 + (vec3(-1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
     vec3 _344 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _332, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _348 = textureLod(SPIRV_Cross_Combined_1, _332, 0.0);
     vec3 _364;
+    SPIRV_CROSS_BRANCH
     if (_348.y > 0.0)
     {
         _364 = _344 + (textureLod(SPIRV_Cross_Combined_2, _332, 0.0).xyz * clamp(_348.y * _348.z, 0.0, 1.0));
@@ -195,15 +203,14 @@ void main()
     {
         _364 = _344;
     }
-    vec3 _365 = _364 * 0.75;
-    vec3 _368 = _322.xyz + _365;
+    vec3 _368 = _322.xyz + (_364 * 0.75);
     vec4 _369 = vec4(_368.x, _368.y, _368.z, _322.w);
-    _28 _370 = _323;
-    _370._m0 = _369;
+    _77._m0 = _369;
     vec2 _379 = clamp(_82 + (vec3(0.0, 0.0, 1.0).xy * _7._m0.xy), _88.xy, _88.zw);
     vec3 _391 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _379, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _395 = textureLod(SPIRV_Cross_Combined_1, _379, 0.0);
     vec3 _411;
+    SPIRV_CROSS_BRANCH
     if (_395.y > 0.0)
     {
         _411 = _391 + (textureLod(SPIRV_Cross_Combined_2, _379, 0.0).xyz * clamp(_395.y * _395.z, 0.0, 1.0));
@@ -212,15 +219,14 @@ void main()
     {
         _411 = _391;
     }
-    vec3 _412 = _411 * 1.0;
-    vec3 _415 = _369.xyz + _412;
+    vec3 _415 = _369.xyz + (_411 * 1.0);
     vec4 _416 = vec4(_415.x, _415.y, _415.z, _369.w);
-    _28 _417 = _370;
-    _417._m0 = _416;
+    _77._m0 = _416;
     vec2 _426 = clamp(_82 + (vec3(1.0, 0.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
     vec3 _438 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _426, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _442 = textureLod(SPIRV_Cross_Combined_1, _426, 0.0);
     vec3 _458;
+    SPIRV_CROSS_BRANCH
     if (_442.y > 0.0)
     {
         _458 = _438 + (textureLod(SPIRV_Cross_Combined_2, _426, 0.0).xyz * clamp(_442.y * _442.z, 0.0, 1.0));
@@ -229,15 +235,14 @@ void main()
     {
         _458 = _438;
     }
-    vec3 _459 = _458 * 0.75;
-    vec3 _462 = _416.xyz + _459;
+    vec3 _462 = _416.xyz + (_458 * 0.75);
     vec4 _463 = vec4(_462.x, _462.y, _462.z, _416.w);
-    _28 _464 = _417;
-    _464._m0 = _463;
+    _77._m0 = _463;
     vec2 _473 = clamp(_82 + (vec3(2.0, 0.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     vec3 _485 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _473, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _489 = textureLod(SPIRV_Cross_Combined_1, _473, 0.0);
     vec3 _505;
+    SPIRV_CROSS_BRANCH
     if (_489.y > 0.0)
     {
         _505 = _485 + (textureLod(SPIRV_Cross_Combined_2, _473, 0.0).xyz * clamp(_489.y * _489.z, 0.0, 1.0));
@@ -246,15 +251,14 @@ void main()
     {
         _505 = _485;
     }
-    vec3 _506 = _505 * 0.5;
-    vec3 _509 = _463.xyz + _506;
+    vec3 _509 = _463.xyz + (_505 * 0.5);
     vec4 _510 = vec4(_509.x, _509.y, _509.z, _463.w);
-    _28 _511 = _464;
-    _511._m0 = _510;
+    _77._m0 = _510;
     vec2 _520 = clamp(_82 + (vec3(-1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     vec3 _532 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _520, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _536 = textureLod(SPIRV_Cross_Combined_1, _520, 0.0);
     vec3 _552;
+    SPIRV_CROSS_BRANCH
     if (_536.y > 0.0)
     {
         _552 = _532 + (textureLod(SPIRV_Cross_Combined_2, _520, 0.0).xyz * clamp(_536.y * _536.z, 0.0, 1.0));
@@ -263,15 +267,14 @@ void main()
     {
         _552 = _532;
     }
-    vec3 _553 = _552 * 0.5;
-    vec3 _556 = _510.xyz + _553;
+    vec3 _556 = _510.xyz + (_552 * 0.5);
     vec4 _557 = vec4(_556.x, _556.y, _556.z, _510.w);
-    _28 _558 = _511;
-    _558._m0 = _557;
+    _77._m0 = _557;
     vec2 _567 = clamp(_82 + (vec3(0.0, 1.0, 0.75).xy * _7._m0.xy), _88.xy, _88.zw);
     vec3 _579 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _567, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _583 = textureLod(SPIRV_Cross_Combined_1, _567, 0.0);
     vec3 _599;
+    SPIRV_CROSS_BRANCH
     if (_583.y > 0.0)
     {
         _599 = _579 + (textureLod(SPIRV_Cross_Combined_2, _567, 0.0).xyz * clamp(_583.y * _583.z, 0.0, 1.0));
@@ -280,15 +283,14 @@ void main()
     {
         _599 = _579;
     }
-    vec3 _600 = _599 * 0.75;
-    vec3 _603 = _557.xyz + _600;
+    vec3 _603 = _557.xyz + (_599 * 0.75);
     vec4 _604 = vec4(_603.x, _603.y, _603.z, _557.w);
-    _28 _605 = _558;
-    _605._m0 = _604;
+    _77._m0 = _604;
     vec2 _614 = clamp(_82 + (vec3(1.0, 1.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     vec3 _626 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _614, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _630 = textureLod(SPIRV_Cross_Combined_1, _614, 0.0);
     vec3 _646;
+    SPIRV_CROSS_BRANCH
     if (_630.y > 0.0)
     {
         _646 = _626 + (textureLod(SPIRV_Cross_Combined_2, _614, 0.0).xyz * clamp(_630.y * _630.z, 0.0, 1.0));
@@ -297,15 +299,14 @@ void main()
     {
         _646 = _626;
     }
-    vec3 _647 = _646 * 0.5;
-    vec3 _650 = _604.xyz + _647;
+    vec3 _650 = _604.xyz + (_646 * 0.5);
     vec4 _651 = vec4(_650.x, _650.y, _650.z, _604.w);
-    _28 _652 = _605;
-    _652._m0 = _651;
+    _77._m0 = _651;
     vec2 _661 = clamp(_82 + (vec3(0.0, 2.0, 0.5).xy * _7._m0.xy), _88.xy, _88.zw);
     vec3 _673 = _11._m5 * clamp(textureLod(SPIRV_Cross_Combined, _661, 0.0).w * _7._m1, 0.0, 1.0);
     vec4 _677 = textureLod(SPIRV_Cross_Combined_1, _661, 0.0);
     vec3 _693;
+    SPIRV_CROSS_BRANCH
     if (_677.y > 0.0)
     {
         _693 = _673 + (textureLod(SPIRV_Cross_Combined_2, _661, 0.0).xyz * clamp(_677.y * _677.z, 0.0, 1.0));
@@ -316,13 +317,10 @@ void main()
     }
     vec3 _697 = _651.xyz + (_693 * 0.5);
     vec4 _698 = vec4(_697.x, _697.y, _697.z, _651.w);
-    _28 _699 = _652;
-    _699._m0 = _698;
+    _77._m0 = _698;
     vec3 _702 = _698.xyz / vec3(((((((((((((0.0 + 0.5) + 0.5) + 0.75) + 0.5) + 0.5) + 0.75) + 1.0) + 0.75) + 0.5) + 0.5) + 0.75) + 0.5) + 0.5);
-    _28 _704 = _699;
-    _704._m0 = vec4(_702.x, _702.y, _702.z, _698.w);
-    _28 _705 = _704;
-    _705._m0.w = 1.0;
-    _5 = _705._m0;
+    _77._m0 = vec4(_702.x, _702.y, _702.z, _698.w);
+    _77._m0.w = 1.0;
+    _5 = _77._m0;
 }
 
diff --git a/reference/shaders/asm/geom/store-uint-layer.invalid.asm.geom b/reference/shaders/asm/geom/store-uint-layer.invalid.asm.geom
deleted file mode 100644
index c768d5da863..00000000000
--- a/reference/shaders/asm/geom/store-uint-layer.invalid.asm.geom
+++ /dev/null
@@ -1,41 +0,0 @@
-#version 450
-layout(triangles) in;
-layout(max_vertices = 3, triangle_strip) out;
-
-struct VertexOutput
-{
-    vec4 pos;
-};
-
-struct GeometryOutput
-{
-    vec4 pos;
-    uint layer;
-};
-
-void _main(VertexOutput _input[3], GeometryOutput stream)
-{
-    GeometryOutput _output;
-    _output.layer = 1u;
-    for (int v = 0; v < 3; v++)
-    {
-        _output.pos = _input[v].pos;
-        gl_Position = _output.pos;
-        gl_Layer = int(_output.layer);
-        EmitVertex();
-    }
-    EndPrimitive();
-}
-
-void main()
-{
-    VertexOutput _input[3];
-    _input[0].pos = gl_in[0].gl_Position;
-    _input[1].pos = gl_in[1].gl_Position;
-    _input[2].pos = gl_in[2].gl_Position;
-    VertexOutput param[3] = _input;
-    GeometryOutput param_1;
-    _main(param, param_1);
-    GeometryOutput stream = param_1;
-}
-
diff --git a/reference/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/reference/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
deleted file mode 100644
index 8cb7a4e64c3..00000000000
--- a/reference/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
+++ /dev/null
@@ -1,79 +0,0 @@
-#version 450
-layout(vertices = 3) out;
-
-struct VertexOutput
-{
-    vec4 pos;
-    vec2 uv;
-};
-
-struct HSOut
-{
-    vec4 pos;
-    vec2 uv;
-};
-
-struct HSConstantOut
-{
-    float EdgeTess[3];
-    float InsideTess;
-};
-
-struct VertexOutput_1
-{
-    vec2 uv;
-};
-
-struct HSOut_1
-{
-    vec2 uv;
-};
-
-layout(location = 0) in VertexOutput_1 p[];
-layout(location = 0) out HSOut_1 _entryPointOutput[3];
-
-HSOut _hs_main(VertexOutput p_1[3], uint i)
-{
-    HSOut _output;
-    _output.pos = p_1[i].pos;
-    _output.uv = p_1[i].uv;
-    return _output;
-}
-
-HSConstantOut PatchHS(VertexOutput _patch[3])
-{
-    HSConstantOut _output;
-    _output.EdgeTess[0] = (vec2(1.0) + _patch[0].uv).x;
-    _output.EdgeTess[1] = (vec2(1.0) + _patch[0].uv).x;
-    _output.EdgeTess[2] = (vec2(1.0) + _patch[0].uv).x;
-    _output.InsideTess = (vec2(1.0) + _patch[0].uv).x;
-    return _output;
-}
-
-void main()
-{
-    VertexOutput p_1[3];
-    p_1[0].pos = gl_in[0].gl_Position;
-    p_1[0].uv = p[0].uv;
-    p_1[1].pos = gl_in[1].gl_Position;
-    p_1[1].uv = p[1].uv;
-    p_1[2].pos = gl_in[2].gl_Position;
-    p_1[2].uv = p[2].uv;
-    uint i = gl_InvocationID;
-    VertexOutput param[3] = p_1;
-    uint param_1 = i;
-    HSOut flattenTemp = _hs_main(param, param_1);
-    gl_out[gl_InvocationID].gl_Position = flattenTemp.pos;
-    _entryPointOutput[gl_InvocationID].uv = flattenTemp.uv;
-    barrier();
-    if (int(gl_InvocationID) == 0)
-    {
-        VertexOutput param_2[3] = p_1;
-        HSConstantOut _patchConstantResult = PatchHS(param_2);
-        gl_TessLevelOuter[0] = _patchConstantResult.EdgeTess[0];
-        gl_TessLevelOuter[1] = _patchConstantResult.EdgeTess[1];
-        gl_TessLevelOuter[2] = _patchConstantResult.EdgeTess[2];
-        gl_TessLevelInner[0] = _patchConstantResult.InsideTess;
-    }
-}
-
diff --git a/reference/shaders/asm/vert/empty-io.asm.vert b/reference/shaders/asm/vert/empty-io.asm.vert
index cc432cb8907..91e65d6d7a5 100644
--- a/reference/shaders/asm/vert/empty-io.asm.vert
+++ b/reference/shaders/asm/vert/empty-io.asm.vert
@@ -16,6 +16,7 @@ struct VSOutput_1
 };
 
 layout(location = 0) in vec4 position;
+layout(location = 0) out VSOutput_1 _entryPointOutput;
 
 VSOutput _main(VSInput _input)
 {
diff --git a/reference/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert b/reference/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert
index 835456f5bd2..40eef2d09df 100644
--- a/reference/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert
+++ b/reference/shaders/asm/vert/extract-transposed-matrix-from-struct.asm.vert
@@ -1,4 +1,7 @@
 #version 450
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
 
 struct V2F
 {
@@ -18,7 +21,11 @@ layout(binding = 0, std430) readonly buffer gInstanceData
 } gInstanceData_1;
 
 layout(location = 0) in vec3 PosL;
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
 uniform int SPIRV_Cross_BaseInstance;
+#endif
 layout(location = 0) out vec4 _entryPointOutput_Color;
 
 V2F _VS(vec3 PosL_1, uint instanceID)
diff --git a/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert b/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
index fdba2a26046..daf0071ae7b 100644
--- a/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
+++ b/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
@@ -10,9 +10,10 @@ const int _20 = (_7 + 2);
 #endif
 const uint _8 = SPIRV_CROSS_CONSTANT_ID_202;
 const uint _25 = (_8 % 5u);
-const ivec4 _30 = ivec4(20, 30, _20, _20);
-const ivec2 _32 = ivec2(_30.y, _30.x);
-const int _33 = _30.y;
+const int _30 = _7 - (-3) * (_7 / (-3));
+const ivec4 _32 = ivec4(20, 30, _20, _30);
+const ivec2 _34 = ivec2(_32.y, _32.x);
+const int _35 = _32.y;
 #ifndef SPIRV_CROSS_CONSTANT_ID_200
 #define SPIRV_CROSS_CONSTANT_ID_200 3.141590118408203125
 #endif
@@ -25,10 +26,10 @@ void main()
     vec4 pos = vec4(0.0);
     pos.y += float(_20);
     pos.z += float(_25);
-    pos += vec4(_30);
-    vec2 _56 = pos.xy + vec2(_32);
-    pos = vec4(_56.x, _56.y, pos.z, pos.w);
+    pos += vec4(_32);
+    vec2 _58 = pos.xy + vec2(_34);
+    pos = vec4(_58.x, _58.y, pos.z, pos.w);
     gl_Position = pos;
-    _4 = _33;
+    _4 = _35;
 }
 
diff --git a/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk b/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk
index 02c3e312575..4cddf82138c 100644
--- a/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk
+++ b/reference/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert.vk
@@ -4,9 +4,10 @@ layout(constant_id = 201) const int _7 = -10;
 const int _20 = (_7 + 2);
 layout(constant_id = 202) const uint _8 = 100u;
 const uint _25 = (_8 % 5u);
-const ivec4 _30 = ivec4(20, 30, _20, _20);
-const ivec2 _32 = ivec2(_30.y, _30.x);
-const int _33 = _30.y;
+const int _30 = _7 - (-3) * (_7 / (-3));
+const ivec4 _32 = ivec4(20, 30, _20, _30);
+const ivec2 _34 = ivec2(_32.y, _32.x);
+const int _35 = _32.y;
 layout(constant_id = 200) const float _9 = 3.141590118408203125;
 
 layout(location = 0) flat out int _4;
@@ -14,12 +15,16 @@ layout(location = 0) flat out int _4;
 void main()
 {
     vec4 pos = vec4(0.0);
-    pos.y += float(_20);
-    pos.z += float(_25);
-    pos += vec4(_30);
-    vec2 _56 = pos.xy + vec2(_32);
-    pos = vec4(_56.x, _56.y, pos.z, pos.w);
+    float _42 = float(_20);
+    pos.y += _42;
+    float _47 = float(_25);
+    pos.z += _47;
+    vec4 _52 = vec4(_32);
+    pos += _52;
+    vec2 _55 = vec2(_34);
+    vec2 _58 = pos.xy + _55;
+    pos = vec4(_58.x, _58.y, pos.z, pos.w);
     gl_Position = pos;
-    _4 = _33;
+    _4 = _35;
 }
 
diff --git a/reference/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert b/reference/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert
index 31f13bd777f..d79c08f8a3c 100644
--- a/reference/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert
+++ b/reference/shaders/asm/vert/uint-vertex-id-instance-id.asm.vert
@@ -1,6 +1,13 @@
 #version 450
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
 
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
 uniform int SPIRV_Cross_BaseInstance;
+#endif
 
 vec4 _main(uint vid, uint iid)
 {
diff --git a/reference/shaders/comp/bake_gradient.comp b/reference/shaders/comp/bake_gradient.comp
index 7b0bb34c64f..49fa9532445 100644
--- a/reference/shaders/comp/bake_gradient.comp
+++ b/reference/shaders/comp/bake_gradient.comp
@@ -29,8 +29,8 @@ void main()
     vec2 displacement = textureLod(uDisplacement, uv.zw, 0.0).xy * 1.2000000476837158203125;
     vec2 dDdx = (textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(1, 0)).xy - textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(-1, 0)).xy) * 0.60000002384185791015625;
     vec2 dDdy = (textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, 1)).xy - textureLodOffset(uDisplacement, uv.zw, 0.0, ivec2(0, -1)).xy) * 0.60000002384185791015625;
-    vec2 param = dDdx * _46.uScale.z;
-    vec2 param_1 = dDdy * _46.uScale.z;
+    mediump vec2 param = dDdx * _46.uScale.z;
+    mediump vec2 param_1 = dDdy * _46.uScale.z;
     float j = jacobian(param, param_1);
     displacement = vec2(0.0);
     imageStore(iHeightDisplacement, ivec2(gl_GlobalInvocationID.xy), vec4(h, displacement, 0.0));
diff --git a/reference/shaders/comp/barriers.comp b/reference/shaders/comp/barriers.comp
index a1b975de830..1102c91e28c 100644
--- a/reference/shaders/comp/barriers.comp
+++ b/reference/shaders/comp/barriers.comp
@@ -28,41 +28,35 @@ void group_barrier()
 
 void barrier_shared_exec()
 {
-    memoryBarrierShared();
     barrier();
 }
 
 void full_barrier_exec()
 {
     memoryBarrier();
-    memoryBarrierShared();
     barrier();
 }
 
 void image_barrier_exec()
 {
     memoryBarrierImage();
-    memoryBarrierShared();
     barrier();
 }
 
 void buffer_barrier_exec()
 {
     memoryBarrierBuffer();
-    memoryBarrierShared();
     barrier();
 }
 
 void group_barrier_exec()
 {
     groupMemoryBarrier();
-    memoryBarrierShared();
     barrier();
 }
 
 void exec_barrier()
 {
-    memoryBarrierShared();
     barrier();
 }
 
diff --git a/reference/shaders/comp/bitcast-16bit-2.invalid.comp b/reference/shaders/comp/bitcast-16bit-2.invalid.comp
deleted file mode 100644
index bddc16d62bc..00000000000
--- a/reference/shaders/comp/bitcast-16bit-2.invalid.comp
+++ /dev/null
@@ -1,39 +0,0 @@
-#version 450
-#if defined(GL_AMD_gpu_shader_int16)
-#extension GL_AMD_gpu_shader_int16 : require
-#else
-#error No extension available for Int16.
-#endif
-#if defined(GL_AMD_gpu_shader_half_float)
-#extension GL_AMD_gpu_shader_half_float : require
-#elif defined(GL_NV_gpu_shader5)
-#extension GL_NV_gpu_shader5 : require
-#else
-#error No extension available for FP16.
-#endif
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-layout(binding = 1, std430) buffer SSBO1
-{
-    i16vec4 outputs[];
-} _21;
-
-layout(binding = 0, std430) buffer SSBO0
-{
-    ivec4 inputs[];
-} _29;
-
-layout(binding = 2, std140) uniform UBO
-{
-    f16vec4 const0;
-} _40;
-
-void main()
-{
-    uint ident = gl_GlobalInvocationID.x;
-    i16vec2 _47 = unpackInt2x16(_29.inputs[ident].x) + float16BitsToInt16(_40.const0.xy);
-    _21.outputs[ident] = i16vec4(_47.x, _47.y, _21.outputs[ident].z, _21.outputs[ident].w);
-    i16vec2 _66 = i16vec2(unpackUint2x16(uint(_29.inputs[ident].y)) - float16BitsToUint16(_40.const0.zw));
-    _21.outputs[ident] = i16vec4(_21.outputs[ident].x, _21.outputs[ident].y, _66.x, _66.y);
-}
-
diff --git a/reference/shaders/comp/casts.comp b/reference/shaders/comp/casts.comp
index 973668676af..b98890a4ddf 100644
--- a/reference/shaders/comp/casts.comp
+++ b/reference/shaders/comp/casts.comp
@@ -14,6 +14,6 @@ layout(binding = 0, std430) buffer SSBO0
 void main()
 {
     uint ident = gl_GlobalInvocationID.x;
-    _21.outputs[ident] = mix(ivec4(0), ivec4(1), notEqual((_27.inputs[ident] & ivec4(3)), ivec4(uvec4(0u))));
+    _21.outputs[ident] = ivec4(notEqual((_27.inputs[ident] & ivec4(3)), ivec4(uvec4(0u))));
 }
 
diff --git a/reference/shaders/comp/generate_height.comp b/reference/shaders/comp/generate_height.comp
index fe733e2893d..e482bf0ca5a 100644
--- a/reference/shaders/comp/generate_height.comp
+++ b/reference/shaders/comp/generate_height.comp
@@ -27,7 +27,6 @@ uvec2 workaround_mix(uvec2 a, uvec2 b, bvec2 sel)
     {
         _86 = a.x;
     }
-    uint _94 = _86;
     uint _97;
     if (sel.y)
     {
@@ -37,7 +36,7 @@ uvec2 workaround_mix(uvec2 a, uvec2 b, bvec2 sel)
     {
         _97 = a.y;
     }
-    return uvec2(_94, _97);
+    return uvec2(_86, _97);
 }
 
 vec2 alias(vec2 i, vec2 N)
diff --git a/reference/shaders/comp/inout-struct.invalid.comp b/reference/shaders/comp/inout-struct.invalid.comp
deleted file mode 100644
index 640e25bb952..00000000000
--- a/reference/shaders/comp/inout-struct.invalid.comp
+++ /dev/null
@@ -1,65 +0,0 @@
-#version 310 es
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-struct Foo
-{
-    vec4 a;
-    vec4 b;
-    vec4 c;
-    vec4 d;
-};
-
-layout(binding = 1, std430) readonly buffer SSBO2
-{
-    vec4 data[];
-} indata;
-
-layout(binding = 0, std430) writeonly buffer SSBO
-{
-    vec4 data[];
-} outdata;
-
-layout(binding = 2, std430) readonly buffer SSBO3
-{
-    Foo foos[];
-} foobar;
-
-void baz(inout Foo foo)
-{
-    uint ident = gl_GlobalInvocationID.x;
-    foo.a = indata.data[(4u * ident) + 0u];
-    foo.b = indata.data[(4u * ident) + 1u];
-    foo.c = indata.data[(4u * ident) + 2u];
-    foo.d = indata.data[(4u * ident) + 3u];
-}
-
-void meow(inout Foo foo)
-{
-    foo.a += vec4(10.0);
-    foo.b += vec4(20.0);
-    foo.c += vec4(30.0);
-    foo.d += vec4(40.0);
-}
-
-vec4 bar(Foo foo)
-{
-    return ((foo.a + foo.b) + foo.c) + foo.d;
-}
-
-void main()
-{
-    Foo param;
-    baz(param);
-    Foo foo = param;
-    Foo param_1 = foo;
-    meow(param_1);
-    foo = param_1;
-    Foo param_2 = foo;
-    Foo param_3;
-    param_3.a = foobar.foos[gl_GlobalInvocationID.x].a;
-    param_3.b = foobar.foos[gl_GlobalInvocationID.x].b;
-    param_3.c = foobar.foos[gl_GlobalInvocationID.x].c;
-    param_3.d = foobar.foos[gl_GlobalInvocationID.x].d;
-    outdata.data[gl_GlobalInvocationID.x] = bar(param_2) + bar(param_3);
-}
-
diff --git a/reference/shaders/comp/shared.comp b/reference/shaders/comp/shared.comp
index d0987a65287..545ef22e617 100644
--- a/reference/shaders/comp/shared.comp
+++ b/reference/shaders/comp/shared.comp
@@ -18,7 +18,6 @@ void main()
     uint ident = gl_GlobalInvocationID.x;
     float idata = _22.in_data[ident];
     sShared[gl_LocalInvocationIndex] = idata;
-    memoryBarrierShared();
     barrier();
     _44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
 }
diff --git a/reference/shaders/comp/struct-packing.comp b/reference/shaders/comp/struct-packing.comp
index cd1eda1b32b..f4b58342d48 100644
--- a/reference/shaders/comp/struct-packing.comp
+++ b/reference/shaders/comp/struct-packing.comp
@@ -43,48 +43,6 @@ struct Content
     S4 m3s[8];
 };
 
-struct S0_1
-{
-    vec2 a[1];
-    float b;
-};
-
-struct S1_1
-{
-    vec3 a;
-    float b;
-};
-
-struct S2_1
-{
-    vec3 a[1];
-    float b;
-};
-
-struct S3_1
-{
-    vec2 a;
-    float b;
-};
-
-struct S4_1
-{
-    vec2 c;
-};
-
-struct Content_1
-{
-    S0_1 m0s[1];
-    S1_1 m1s[1];
-    S2_1 m2s[1];
-    S0_1 m0;
-    S1_1 m1;
-    S2_1 m2;
-    S3_1 m3;
-    float m4;
-    S4_1 m3s[8];
-};
-
 layout(binding = 1, std430) restrict buffer SSBO1
 {
     Content content;
@@ -103,9 +61,9 @@ layout(binding = 1, std430) restrict buffer SSBO1
 
 layout(binding = 0, std140) restrict buffer SSBO0
 {
-    Content_1 content;
-    Content_1 content1[2];
-    Content_1 content2;
+    Content content;
+    Content content1[2];
+    Content content2;
     mat2 m0;
     mat2 m1;
     mat2x3 m2[4];
diff --git a/reference/shaders/desktop-only/comp/enhanced-layouts.comp b/reference/shaders/desktop-only/comp/enhanced-layouts.comp
index 45b25064b6b..ba37ca237b8 100644
--- a/reference/shaders/desktop-only/comp/enhanced-layouts.comp
+++ b/reference/shaders/desktop-only/comp/enhanced-layouts.comp
@@ -8,13 +8,6 @@ struct Foo
     int c;
 };
 
-struct Foo_1
-{
-    int a;
-    int b;
-    int c;
-};
-
 layout(binding = 1, std140) buffer SSBO1
 {
     layout(offset = 4) int a;
@@ -27,7 +20,7 @@ layout(binding = 2, std430) buffer SSBO2
 {
     layout(offset = 4) int a;
     layout(offset = 8) int b;
-    layout(offset = 16) Foo_1 foo;
+    layout(offset = 16) Foo foo;
     layout(offset = 48) int c[8];
 } ssbo2;
 
diff --git a/reference/shaders/desktop-only/comp/int64.desktop.comp b/reference/shaders/desktop-only/comp/int64.desktop.comp
index 702456b303f..28afc2fbd7d 100644
--- a/reference/shaders/desktop-only/comp/int64.desktop.comp
+++ b/reference/shaders/desktop-only/comp/int64.desktop.comp
@@ -1,5 +1,11 @@
 #version 450
+#if defined(GL_ARB_gpu_shader_int64)
 #extension GL_ARB_gpu_shader_int64 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
 struct M0
diff --git a/reference/shaders/desktop-only/frag/fp16.invalid.desktop.frag b/reference/shaders/desktop-only/frag/fp16.invalid.desktop.frag
deleted file mode 100644
index faf79b2b44d..00000000000
--- a/reference/shaders/desktop-only/frag/fp16.invalid.desktop.frag
+++ /dev/null
@@ -1,159 +0,0 @@
-#version 450
-#if defined(GL_AMD_gpu_shader_half_float)
-#extension GL_AMD_gpu_shader_half_float : require
-#elif defined(GL_NV_gpu_shader5)
-#extension GL_NV_gpu_shader5 : require
-#else
-#error No extension available for FP16.
-#endif
-
-struct ResType
-{
-    f16vec4 _m0;
-    ivec4 _m1;
-};
-
-layout(location = 3) in f16vec4 v4;
-layout(location = 2) in f16vec3 v3;
-layout(location = 0) in float16_t v1;
-layout(location = 1) in f16vec2 v2;
-
-f16mat2 test_mat2(f16vec2 a, f16vec2 b, f16vec2 c, f16vec2 d)
-{
-    return f16mat2(f16vec2(a), f16vec2(b)) * f16mat2(f16vec2(c), f16vec2(d));
-}
-
-f16mat3 test_mat3(f16vec3 a, f16vec3 b, f16vec3 c, f16vec3 d, f16vec3 e, f16vec3 f)
-{
-    return f16mat3(f16vec3(a), f16vec3(b), f16vec3(c)) * f16mat3(f16vec3(d), f16vec3(e), f16vec3(f));
-}
-
-void test_constants()
-{
-    float16_t a = float16_t(1.0);
-    float16_t b = float16_t(1.5);
-    float16_t c = float16_t(-1.5);
-    float16_t d = float16_t(0.0 / 0.0);
-    float16_t e = float16_t(1.0 / 0.0);
-    float16_t f = float16_t(-1.0 / 0.0);
-    float16_t g = float16_t(1014.0);
-    float16_t h = float16_t(9.5367431640625e-07);
-}
-
-float16_t test_result()
-{
-    return float16_t(1.0);
-}
-
-void test_conversions()
-{
-    float16_t one = test_result();
-    int a = int(one);
-    uint b = uint(one);
-    bool c = one != float16_t(0.0);
-    float d = float(one);
-    double e = double(one);
-    float16_t a2 = float16_t(a);
-    float16_t b2 = float16_t(b);
-    float16_t c2 = float16_t(c);
-    float16_t d2 = float16_t(d);
-    float16_t e2 = float16_t(e);
-}
-
-void test_builtins()
-{
-    f16vec4 res = radians(v4);
-    res = degrees(v4);
-    res = sin(v4);
-    res = cos(v4);
-    res = tan(v4);
-    res = asin(v4);
-    res = atan(v4, v3.xyzz);
-    res = atan(v4);
-    res = sinh(v4);
-    res = cosh(v4);
-    res = tanh(v4);
-    res = asinh(v4);
-    res = acosh(v4);
-    res = atanh(v4);
-    res = pow(v4, v4);
-    res = exp(v4);
-    res = log(v4);
-    res = exp2(v4);
-    res = log2(v4);
-    res = sqrt(v4);
-    res = inversesqrt(v4);
-    res = abs(v4);
-    res = sign(v4);
-    res = floor(v4);
-    res = trunc(v4);
-    res = round(v4);
-    res = roundEven(v4);
-    res = ceil(v4);
-    res = fract(v4);
-    res = mod(v4, v4);
-    f16vec4 tmp;
-    f16vec4 _231 = modf(v4, tmp);
-    res = _231;
-    res = min(v4, v4);
-    res = max(v4, v4);
-    res = clamp(v4, v4, v4);
-    res = mix(v4, v4, v4);
-    res = mix(v4, v4, lessThan(v4, v4));
-    res = step(v4, v4);
-    res = smoothstep(v4, v4, v4);
-    bvec4 btmp = isnan(v4);
-    btmp = isinf(v4);
-    res = fma(v4, v4, v4);
-    ResType _275;
-    _275._m0 = frexp(v4, _275._m1);
-    ivec4 itmp = _275._m1;
-    res = _275._m0;
-    res = ldexp(res, itmp);
-    uint pack0 = packFloat2x16(v4.xy);
-    uint pack1 = packFloat2x16(v4.zw);
-    res = f16vec4(unpackFloat2x16(pack0), unpackFloat2x16(pack1));
-    float16_t t0 = length(v4);
-    t0 = distance(v4, v4);
-    t0 = dot(v4, v4);
-    f16vec3 res3 = cross(v3, v3);
-    res = normalize(v4);
-    res = faceforward(v4, v4, v4);
-    res = reflect(v4, v4);
-    res = refract(v4, v4, v1);
-    btmp = lessThan(v4, v4);
-    btmp = lessThanEqual(v4, v4);
-    btmp = greaterThan(v4, v4);
-    btmp = greaterThanEqual(v4, v4);
-    btmp = equal(v4, v4);
-    btmp = notEqual(v4, v4);
-    res = dFdx(v4);
-    res = dFdy(v4);
-    res = dFdxFine(v4);
-    res = dFdyFine(v4);
-    res = dFdxCoarse(v4);
-    res = dFdyCoarse(v4);
-    res = fwidth(v4);
-    res = fwidthFine(v4);
-    res = fwidthCoarse(v4);
-}
-
-void main()
-{
-    f16vec2 param = v2;
-    f16vec2 param_1 = v2;
-    f16vec2 param_2 = v3.xy;
-    f16vec2 param_3 = v3.xy;
-    f16mat2 m0 = test_mat2(param, param_1, param_2, param_3);
-    f16vec3 param_4 = v3;
-    f16vec3 param_5 = v3;
-    f16vec3 param_6 = v3;
-    f16vec3 param_7 = v4.xyz;
-    f16vec3 param_8 = v4.xyz;
-    f16vec3 param_9 = v4.yzw;
-    f16mat3 m1 = test_mat3(param_4, param_5, param_6, param_7, param_8, param_9);
-    test_constants();
-    test_conversions();
-    test_builtins();
-}
-
diff --git a/reference/shaders/desktop-only/frag/image-size.frag b/reference/shaders/desktop-only/frag/image-size.frag
new file mode 100644
index 00000000000..5bb060398ed
--- /dev/null
+++ b/reference/shaders/desktop-only/frag/image-size.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(binding = 0, r32f) uniform readonly writeonly image2D uImage1;
+layout(binding = 1, r32f) uniform readonly writeonly image2D uImage2;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(vec2(imageSize(uImage1)), vec2(imageSize(uImage2)));
+}
+
diff --git a/reference/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag b/reference/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag
new file mode 100644
index 00000000000..1d9062064a8
--- /dev/null
+++ b/reference/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(binding = 0, r32f) uniform image2D uImage1;
+layout(binding = 1, r32f) uniform image2D uImage2;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(vec2(imageSize(uImage1)), vec2(imageSize(uImage2)));
+}
+
diff --git a/reference/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag b/reference/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag
index d5e45bda431..60c45908fa7 100644
--- a/reference/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag
+++ b/reference/shaders/desktop-only/frag/texture-proj-shadow.desktop.frag
@@ -13,12 +13,14 @@ layout(location = 0) in vec3 vClip3;
 
 void main()
 {
-    vec4 _20 = vClip4;
-    _20.y = vClip4.w;
-    FragColor = textureProj(uShadow1D, vec4(_20.x, 0.0, vClip4.z, _20.y));
-    vec4 _30 = vClip4;
-    _30.z = vClip4.w;
-    FragColor = textureProj(uShadow2D, vec4(_30.xy, vClip4.z, _30.z));
+    vec4 _17 = vClip4;
+    vec4 _20 = _17;
+    _20.y = _17.w;
+    FragColor = textureProj(uShadow1D, vec4(_20.x, 0.0, _17.z, _20.y));
+    vec4 _27 = vClip4;
+    vec4 _30 = _27;
+    _30.z = _27.w;
+    FragColor = textureProj(uShadow2D, vec4(_30.xy, _27.z, _30.z));
     FragColor = textureProj(uSampler1D, vClip2).x;
     FragColor = textureProj(uSampler2D, vClip3).x;
     FragColor = textureProj(uSampler3D, vClip4).x;
diff --git a/reference/shaders/desktop-only/tesc/basic.desktop.sso.tesc b/reference/shaders/desktop-only/tesc/basic.desktop.sso.tesc
index 5e958256af5..c51699db6db 100644
--- a/reference/shaders/desktop-only/tesc/basic.desktop.sso.tesc
+++ b/reference/shaders/desktop-only/tesc/basic.desktop.sso.tesc
@@ -4,7 +4,7 @@ layout(vertices = 1) out;
 in gl_PerVertex
 {
     vec4 gl_Position;
-} gl_in[gl_MaxPatchVertices];
+} gl_in[];
 
 out gl_PerVertex
 {
diff --git a/reference/shaders/desktop-only/tese/triangle.desktop.sso.tese b/reference/shaders/desktop-only/tese/triangle.desktop.sso.tese
index 31027dae80f..c9bacd464e4 100644
--- a/reference/shaders/desktop-only/tese/triangle.desktop.sso.tese
+++ b/reference/shaders/desktop-only/tese/triangle.desktop.sso.tese
@@ -4,7 +4,7 @@ layout(triangles, cw, fractional_even_spacing) in;
 in gl_PerVertex
 {
     vec4 gl_Position;
-} gl_in[gl_MaxPatchVertices];
+} gl_in[];
 
 out gl_PerVertex
 {
diff --git a/reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert b/reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert
new file mode 100644
index 00000000000..2b3c5ce0516
--- /dev/null
+++ b/reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert
@@ -0,0 +1,24 @@
+#version 450
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
+
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseVertex gl_BaseVertexARB
+#else
+uniform int SPIRV_Cross_BaseVertex;
+#endif
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
+uniform int SPIRV_Cross_BaseInstance;
+#endif
+#ifndef GL_ARB_shader_draw_parameters
+#error GL_ARB_shader_draw_parameters is not supported.
+#endif
+
+void main()
+{
+    gl_Position = vec4(float(SPIRV_Cross_BaseVertex), float(SPIRV_Cross_BaseInstance), float(gl_DrawIDARB), 1.0);
+}
+
diff --git a/reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vert b/reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert.vk
similarity index 100%
rename from reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vert
rename to reference/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert.vk
diff --git a/reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert b/reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert
new file mode 100644
index 00000000000..bc16d0431aa
--- /dev/null
+++ b/reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert
@@ -0,0 +1,24 @@
+#version 460
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
+
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseVertex gl_BaseVertexARB
+#else
+uniform int SPIRV_Cross_BaseVertex;
+#endif
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
+uniform int SPIRV_Cross_BaseInstance;
+#endif
+#ifndef GL_ARB_shader_draw_parameters
+#error GL_ARB_shader_draw_parameters is not supported.
+#endif
+
+void main()
+{
+    gl_Position = vec4(float(SPIRV_Cross_BaseVertex), float(SPIRV_Cross_BaseInstance), float(gl_DrawIDARB), 1.0);
+}
+
diff --git a/reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vert b/reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert.vk
similarity index 100%
rename from reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vert
rename to reference/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert.vk
diff --git a/reference/shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag b/reference/shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag
deleted file mode 100644
index ef6bb526ab9..00000000000
--- a/reference/shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag
+++ /dev/null
@@ -1,24 +0,0 @@
-#version 450
-
-layout(binding = 0) uniform sampler2D uTextures[2 * 3 * 1];
-
-layout(location = 1) in vec2 vUV;
-layout(location = 0) out vec4 FragColor;
-layout(location = 0) flat in int vIndex;
-
-void main()
-{
-    vec4 values3[2 * 3 * 1];
-    for (int z = 0; z < 2; z++)
-    {
-        for (int y = 0; y < 3; y++)
-        {
-            for (int x = 0; x < 1; x++)
-            {
-                values3[z * 3 * 1 + y * 1 + x] = texture(uTextures[z * 3 * 1 + y * 1 + x], vUV);
-            }
-        }
-    }
-    FragColor = (values3[1 * 3 * 1 + 2 * 1 + 0] + values3[0 * 3 * 1 + 2 * 1 + 0]) + values3[(vIndex + 1) * 3 * 1 + 2 * 1 + vIndex];
-}
-
diff --git a/reference/shaders/frag/avoid-expression-lowering-to-loop.frag b/reference/shaders/frag/avoid-expression-lowering-to-loop.frag
new file mode 100644
index 00000000000..6313d896e27
--- /dev/null
+++ b/reference/shaders/frag/avoid-expression-lowering-to-loop.frag
@@ -0,0 +1,26 @@
+#version 310 es
+precision mediump float;
+precision highp int;
+
+layout(binding = 1, std140) uniform Count
+{
+    float count;
+} _44;
+
+layout(binding = 0) uniform mediump sampler2D tex;
+
+layout(location = 0) in highp vec4 vertex;
+layout(location = 0) out vec4 fragColor;
+
+void main()
+{
+    highp float size = 1.0 / float(textureSize(tex, 0).x);
+    float r = 0.0;
+    float d = dFdx(vertex.x);
+    for (float i = 0.0; i < _44.count; i += 1.0)
+    {
+        r += (size * d);
+    }
+    fragColor = vec4(r);
+}
+
diff --git a/reference/shaders/frag/barycentric-khr.frag b/reference/shaders/frag/barycentric-khr.frag
new file mode 100644
index 00000000000..71a44c38575
--- /dev/null
+++ b/reference/shaders/frag/barycentric-khr.frag
@@ -0,0 +1,13 @@
+#version 450
+#extension GL_EXT_fragment_shader_barycentric : require
+
+layout(location = 0) out vec2 value;
+layout(location = 0) pervertexEXT in vec2 vUV[3];
+layout(location = 3) pervertexEXT in vec2 vUV2[3];
+
+void main()
+{
+    value = ((vUV[0] * gl_BaryCoordEXT.x) + (vUV[1] * gl_BaryCoordEXT.y)) + (vUV[2] * gl_BaryCoordEXT.z);
+    value += (((vUV2[0] * gl_BaryCoordNoPerspEXT.x) + (vUV2[1] * gl_BaryCoordNoPerspEXT.y)) + (vUV2[2] * gl_BaryCoordNoPerspEXT.z));
+}
+
diff --git a/reference/shaders/frag/barycentric-nv.frag b/reference/shaders/frag/barycentric-nv.frag
index 12d24bb739c..b3b57e2f880 100644
--- a/reference/shaders/frag/barycentric-nv.frag
+++ b/reference/shaders/frag/barycentric-nv.frag
@@ -1,20 +1,13 @@
 #version 450
 #extension GL_NV_fragment_shader_barycentric : require
 
-layout(binding = 0, std430) readonly buffer Vertices
-{
-    vec2 uvs[];
-} _19;
-
 layout(location = 0) out vec2 value;
+layout(location = 0) pervertexNV in vec2 vUV[3];
+layout(location = 1) pervertexNV in vec2 vUV2[3];
 
 void main()
 {
-    int prim = gl_PrimitiveID;
-    vec2 uv0 = _19.uvs[(3 * prim) + 0];
-    vec2 uv1 = _19.uvs[(3 * prim) + 1];
-    vec2 uv2 = _19.uvs[(3 * prim) + 2];
-    value = ((uv0 * gl_BaryCoordNV.x) + (uv1 * gl_BaryCoordNV.y)) + (uv2 * gl_BaryCoordNV.z);
-    value += (((uv0 * gl_BaryCoordNoPerspNV.x) + (uv1 * gl_BaryCoordNoPerspNV.y)) + (uv2 * gl_BaryCoordNoPerspNV.z));
+    value = ((vUV[0] * gl_BaryCoordNV.x) + (vUV[1] * gl_BaryCoordNV.y)) + (vUV[2] * gl_BaryCoordNV.z);
+    value += (((vUV2[0] * gl_BaryCoordNoPerspNV.x) + (vUV2[1] * gl_BaryCoordNoPerspNV.y)) + (vUV2[2] * gl_BaryCoordNoPerspNV.z));
 }
 
diff --git a/reference/shaders/frag/ground.frag b/reference/shaders/frag/ground.frag
index 4d998d56898..c36bb317b65 100644
--- a/reference/shaders/frag/ground.frag
+++ b/reference/shaders/frag/ground.frag
@@ -38,7 +38,7 @@ void main()
 {
     vec3 Normal = (texture(TexNormalmap, TexCoord).xyz * 2.0) - vec3(1.0);
     Normal = normalize(Normal);
-    highp float param = length(EyeVec) / 1000.0;
+    float param = length(EyeVec) / 1000.0;
     vec2 scatter_uv;
     scatter_uv.x = saturate(param);
     vec3 nEye = normalize(EyeVec);
@@ -53,10 +53,10 @@ void main()
     Color = mix(dirt, base, vec3(edge));
     Color *= Color;
     float Roughness = 1.0 - (edge * grass_snow);
-    highp vec3 param_1 = Color;
-    highp vec3 param_2 = Normal;
-    highp float param_3 = Roughness;
-    highp float param_4 = 0.0;
+    vec3 param_1 = Color;
+    vec3 param_2 = Normal;
+    float param_3 = Roughness;
+    float param_4 = 0.0;
     Resolve(param_1, param_2, param_3, param_4);
 }
 
diff --git a/reference/shaders/frag/modf-pointer-function-analysis.frag b/reference/shaders/frag/modf-pointer-function-analysis.frag
new file mode 100644
index 00000000000..2ca0050bad0
--- /dev/null
+++ b/reference/shaders/frag/modf-pointer-function-analysis.frag
@@ -0,0 +1,32 @@
+#version 450
+
+layout(location = 0) in vec4 v;
+layout(location = 0) out vec4 vo0;
+layout(location = 1) out vec4 vo1;
+
+vec4 modf_inner(out vec4 tmp)
+{
+    vec4 _20 = modf(v, tmp);
+    return _20;
+}
+
+float modf_inner_partial(inout vec4 tmp)
+{
+    float _30 = modf(v.x, tmp.x);
+    return _30;
+}
+
+void main()
+{
+    vec4 param;
+    vec4 _37 = modf_inner(param);
+    vec4 tmp = param;
+    vo0 = _37;
+    vo1 = tmp;
+    vec4 param_1 = tmp;
+    float _43 = modf_inner_partial(param_1);
+    tmp = param_1;
+    vo0.x += _43;
+    vo1.x += tmp.x;
+}
+
diff --git a/reference/shaders/frag/partial-write-preserve.frag b/reference/shaders/frag/partial-write-preserve.frag
index cf8a83cf0c4..87e689aff26 100644
--- a/reference/shaders/frag/partial-write-preserve.frag
+++ b/reference/shaders/frag/partial-write-preserve.frag
@@ -80,16 +80,16 @@ void branchy_inout_2(out B b)
 void main()
 {
     vec4 a = vec4(10.0);
-    highp vec4 param = a;
+    vec4 param = a;
     partial_inout(param);
     a = param;
-    highp vec4 param_1;
+    vec4 param_1;
     complete_inout(param_1);
     a = param_1;
-    highp vec4 param_2 = a;
+    vec4 param_2 = a;
     branchy_inout(param_2);
     a = param_2;
-    highp vec4 param_3;
+    vec4 param_3;
     branchy_inout_2(param_3);
     a = param_3;
     B b = B(10.0, 20.0);
diff --git a/reference/shaders/frag/pixel-interlock-ordered.frag b/reference/shaders/frag/pixel-interlock-ordered.frag
new file mode 100644
index 00000000000..915b56511f2
--- /dev/null
+++ b/reference/shaders/frag/pixel-interlock-ordered.frag
@@ -0,0 +1,35 @@
+#version 450
+#ifdef GL_ARB_fragment_shader_interlock
+#extension GL_ARB_fragment_shader_interlock : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
+#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
+#elif defined(GL_INTEL_fragment_shader_ordering)
+#extension GL_INTEL_fragment_shader_ordering : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
+#define SPIRV_Cross_endInvocationInterlock()
+#endif
+#if defined(GL_ARB_fragment_shader_interlock)
+layout(pixel_interlock_ordered) in;
+#elif !defined(GL_INTEL_fragment_shader_ordering)
+#error Fragment Shader Interlock/Ordering extension missing!
+#endif
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    SPIRV_Cross_beginInvocationInterlock();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    SPIRV_Cross_endInvocationInterlock();
+}
+
diff --git a/reference/shaders/frag/pixel-interlock-unordered.frag b/reference/shaders/frag/pixel-interlock-unordered.frag
new file mode 100644
index 00000000000..13962daf19d
--- /dev/null
+++ b/reference/shaders/frag/pixel-interlock-unordered.frag
@@ -0,0 +1,35 @@
+#version 450
+#ifdef GL_ARB_fragment_shader_interlock
+#extension GL_ARB_fragment_shader_interlock : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
+#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
+#elif defined(GL_INTEL_fragment_shader_ordering)
+#extension GL_INTEL_fragment_shader_ordering : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
+#define SPIRV_Cross_endInvocationInterlock()
+#endif
+#if defined(GL_ARB_fragment_shader_interlock)
+layout(pixel_interlock_unordered) in;
+#elif !defined(GL_INTEL_fragment_shader_ordering)
+#error Fragment Shader Interlock/Ordering extension missing!
+#endif
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    SPIRV_Cross_beginInvocationInterlock();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    SPIRV_Cross_endInvocationInterlock();
+}
+
diff --git a/reference/shaders/frag/post-depth-coverage-es.frag b/reference/shaders/frag/post-depth-coverage-es.frag
new file mode 100644
index 00000000000..d086560e5d2
--- /dev/null
+++ b/reference/shaders/frag/post-depth-coverage-es.frag
@@ -0,0 +1,14 @@
+#version 310 es
+#extension GL_EXT_post_depth_coverage : require
+#extension GL_OES_sample_variables : require
+precision mediump float;
+precision highp int;
+layout(early_fragment_tests, post_depth_coverage) in;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(float(gl_SampleMaskIn[0]));
+}
+
diff --git a/reference/shaders/frag/post-depth-coverage.frag b/reference/shaders/frag/post-depth-coverage.frag
new file mode 100644
index 00000000000..caca9c03cb5
--- /dev/null
+++ b/reference/shaders/frag/post-depth-coverage.frag
@@ -0,0 +1,15 @@
+#version 450
+#if defined(GL_ARB_post_depth_coverge)
+#extension GL_ARB_post_depth_coverage : require
+#else
+#extension GL_EXT_post_depth_coverage : require
+#endif
+layout(early_fragment_tests, post_depth_coverage) in;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(float(gl_SampleMaskIn[0]));
+}
+
diff --git a/reference/shaders/frag/round-even.frag b/reference/shaders/frag/round-even.frag
new file mode 100644
index 00000000000..ab6f37adc14
--- /dev/null
+++ b/reference/shaders/frag/round-even.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vA;
+layout(location = 1) in float vB;
+
+void main()
+{
+    FragColor = roundEven(vA);
+    FragColor *= roundEven(vB);
+}
+
diff --git a/reference/shaders/frag/round.frag b/reference/shaders/frag/round.frag
new file mode 100644
index 00000000000..0f1fc0db0f3
--- /dev/null
+++ b/reference/shaders/frag/round.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vA;
+layout(location = 1) in float vB;
+
+void main()
+{
+    FragColor = round(vA);
+    FragColor *= round(vB);
+}
+
diff --git a/reference/shaders/frag/sample-interlock-ordered.frag b/reference/shaders/frag/sample-interlock-ordered.frag
new file mode 100644
index 00000000000..9d5f90e4aaf
--- /dev/null
+++ b/reference/shaders/frag/sample-interlock-ordered.frag
@@ -0,0 +1,35 @@
+#version 450
+#ifdef GL_ARB_fragment_shader_interlock
+#extension GL_ARB_fragment_shader_interlock : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
+#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
+#elif defined(GL_INTEL_fragment_shader_ordering)
+#extension GL_INTEL_fragment_shader_ordering : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
+#define SPIRV_Cross_endInvocationInterlock()
+#endif
+#if defined(GL_ARB_fragment_shader_interlock)
+layout(sample_interlock_ordered) in;
+#elif !defined(GL_INTEL_fragment_shader_ordering)
+#error Fragment Shader Interlock/Ordering extension missing!
+#endif
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    SPIRV_Cross_beginInvocationInterlock();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0]));
+    SPIRV_Cross_endInvocationInterlock();
+}
+
diff --git a/reference/shaders/frag/sample-interlock-unordered.frag b/reference/shaders/frag/sample-interlock-unordered.frag
new file mode 100644
index 00000000000..441198814e0
--- /dev/null
+++ b/reference/shaders/frag/sample-interlock-unordered.frag
@@ -0,0 +1,35 @@
+#version 450
+#ifdef GL_ARB_fragment_shader_interlock
+#extension GL_ARB_fragment_shader_interlock : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()
+#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()
+#elif defined(GL_INTEL_fragment_shader_ordering)
+#extension GL_INTEL_fragment_shader_ordering : enable
+#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()
+#define SPIRV_Cross_endInvocationInterlock()
+#endif
+#if defined(GL_ARB_fragment_shader_interlock)
+layout(sample_interlock_unordered) in;
+#elif !defined(GL_INTEL_fragment_shader_ordering)
+#error Fragment Shader Interlock/Ordering extension missing!
+#endif
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    SPIRV_Cross_beginInvocationInterlock();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    SPIRV_Cross_endInvocationInterlock();
+}
+
diff --git a/reference/shaders/frag/sampler.frag b/reference/shaders/frag/sampler.frag
index 0ec200c7146..d62f0afeff9 100644
--- a/reference/shaders/frag/sampler.frag
+++ b/reference/shaders/frag/sampler.frag
@@ -15,7 +15,7 @@ vec4 sample_texture(mediump sampler2D tex, vec2 uv)
 
 void main()
 {
-    highp vec2 param = vTex;
+    vec2 param = vTex;
     FragColor = vColor * sample_texture(uTex, param);
 }
 
diff --git a/reference/shaders/frag/struct-type-unrelated-alias.frag b/reference/shaders/frag/struct-type-unrelated-alias.frag
new file mode 100644
index 00000000000..f38d18ff28f
--- /dev/null
+++ b/reference/shaders/frag/struct-type-unrelated-alias.frag
@@ -0,0 +1,18 @@
+#version 450
+
+struct T
+{
+    float a;
+};
+
+layout(location = 0) out float FragColor;
+
+void main()
+{
+    T foo;
+    foo.a = 10.0;
+    T bar;
+    bar.a = 20.0;
+    FragColor = foo.a + bar.a;
+}
+
diff --git a/reference/shaders/frag/switch-unreachable-break.frag b/reference/shaders/frag/switch-unreachable-break.frag
new file mode 100644
index 00000000000..d8396d69a90
--- /dev/null
+++ b/reference/shaders/frag/switch-unreachable-break.frag
@@ -0,0 +1,36 @@
+#version 450
+
+layout(binding = 0, std140) uniform UBO
+{
+    int cond;
+    int cond2;
+} _13;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    bool frog = false;
+    switch (_13.cond)
+    {
+        case 1:
+        {
+            if (_13.cond2 < 50)
+            {
+                break;
+            }
+            else
+            {
+                discard;
+            }
+            break; // unreachable workaround
+        }
+        default:
+        {
+            frog = true;
+            break;
+        }
+    }
+    FragColor = mix(vec4(20.0), vec4(10.0), bvec4(frog));
+}
+
diff --git a/reference/shaders/frag/ubo-load-row-major-workaround.frag b/reference/shaders/frag/ubo-load-row-major-workaround.frag
new file mode 100644
index 00000000000..13049b456b7
--- /dev/null
+++ b/reference/shaders/frag/ubo-load-row-major-workaround.frag
@@ -0,0 +1,48 @@
+#version 450
+
+struct RowMajor
+{
+    mat4 B;
+};
+
+struct NestedRowMajor
+{
+    RowMajor rm;
+};
+
+layout(binding = 2, std140) uniform UBO3
+{
+    layout(row_major) NestedRowMajor rm2;
+} _17;
+
+layout(binding = 1, std140) uniform UBO2
+{
+    layout(row_major) RowMajor rm;
+} _35;
+
+layout(binding = 0, std140) uniform UBO
+{
+    layout(row_major) mat4 A;
+    mat4 C;
+} _42;
+
+layout(binding = 3, std140) uniform UBONoWorkaround
+{
+    mat4 D;
+} _56;
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 Clip;
+
+NestedRowMajor spvWorkaroundRowMajor(NestedRowMajor wrap) { return wrap; }
+mat4 spvWorkaroundRowMajor(mat4 wrap) { return wrap; }
+
+void main()
+{
+    NestedRowMajor rm2_loaded;
+    rm2_loaded.rm.B = spvWorkaroundRowMajor(_17.rm2).rm.B;
+    FragColor = (((rm2_loaded.rm.B * spvWorkaroundRowMajor(_35.rm.B)) * spvWorkaroundRowMajor(_42.A)) * spvWorkaroundRowMajor(_42.C)) * Clip;
+    FragColor += (_56.D * Clip);
+    FragColor += (_42.A[1] * Clip);
+}
+
diff --git a/reference/shaders/frag/ubo_layout.frag b/reference/shaders/frag/ubo_layout.frag
index 4b66e1396a7..bc0b01c065f 100644
--- a/reference/shaders/frag/ubo_layout.frag
+++ b/reference/shaders/frag/ubo_layout.frag
@@ -7,11 +7,6 @@ struct Str
     mat4 foo;
 };
 
-struct Str_1
-{
-    mat4 foo;
-};
-
 layout(binding = 0, std140) uniform UBO1
 {
     layout(row_major) Str foo;
@@ -19,7 +14,7 @@ layout(binding = 0, std140) uniform UBO1
 
 layout(binding = 1, std140) uniform UBO2
 {
-    Str_1 foo;
+    Str foo;
 } ubo0;
 
 layout(location = 0) out vec4 FragColor;
diff --git a/reference/shaders/geom/geometry-passthrough.geom b/reference/shaders/geom/geometry-passthrough.geom
new file mode 100644
index 00000000000..d0d8806ad2a
--- /dev/null
+++ b/reference/shaders/geom/geometry-passthrough.geom
@@ -0,0 +1,27 @@
+#version 450
+#extension GL_NV_geometry_shader_passthrough : require
+layout(triangles) in;
+
+layout(passthrough) in gl_PerVertex
+{
+    vec4 gl_Position;
+} gl_in[];
+
+layout(passthrough, location = 0) in VertexBlock
+{
+    int a;
+    int b;
+} v1[3];
+
+layout(location = 2) in VertexBlock2
+{
+    int a;
+    layout(passthrough) int b;
+} v2[3];
+
+
+void main()
+{
+    gl_Layer = (gl_InvocationID + v1[0].a) + v2[1].b;
+}
+
diff --git a/reference/shaders/geom/multi-stream.geom b/reference/shaders/geom/multi-stream.geom
new file mode 100644
index 00000000000..548164d7804
--- /dev/null
+++ b/reference/shaders/geom/multi-stream.geom
@@ -0,0 +1,14 @@
+#version 450
+layout(triangles) in;
+layout(max_vertices = 2, points) out;
+
+void main()
+{
+    gl_Position = gl_in[0].gl_Position;
+    EmitStreamVertex(0);
+    EndStreamPrimitive(0);
+    gl_Position = gl_in[0].gl_Position + vec4(2.0);
+    EmitStreamVertex(1);
+    EndStreamPrimitive(1);
+}
+
diff --git a/reference/shaders/geom/transform-feedback-streams.geom b/reference/shaders/geom/transform-feedback-streams.geom
new file mode 100644
index 00000000000..4d238b4adff
--- /dev/null
+++ b/reference/shaders/geom/transform-feedback-streams.geom
@@ -0,0 +1,26 @@
+#version 450
+layout(points) in;
+layout(max_vertices = 2, points) out;
+
+layout(xfb_buffer = 1, xfb_stride = 20, stream = 1) out gl_PerVertex
+{
+    layout(xfb_offset = 4) vec4 gl_Position;
+    float gl_PointSize;
+};
+
+layout(location = 0, xfb_buffer = 2, xfb_stride = 32, xfb_offset = 16, stream = 1) out vec4 vFoo;
+layout(xfb_buffer = 3, xfb_stride = 16, stream = 2) out VertOut
+{
+    layout(location = 1, xfb_offset = 0) vec4 vBar;
+} _23;
+
+
+void main()
+{
+    gl_Position = vec4(1.0);
+    vFoo = vec4(3.0);
+    EmitStreamVertex(1);
+    _23.vBar = vec4(5.0);
+    EmitStreamVertex(2);
+}
+
diff --git a/reference/shaders/legacy/fragment/explicit-lod.legacy.vert b/reference/shaders/legacy/fragment/explicit-lod.legacy.vert
new file mode 100644
index 00000000000..b73faa47ab5
--- /dev/null
+++ b/reference/shaders/legacy/fragment/explicit-lod.legacy.vert
@@ -0,0 +1,11 @@
+#version 100
+
+uniform mediump sampler2D tex;
+
+varying mediump vec4 FragColor;
+
+void main()
+{
+    FragColor = texture2DLod(tex, vec2(0.4000000059604644775390625, 0.60000002384185791015625), 3.0);
+}
+
diff --git a/reference/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag b/reference/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag
new file mode 100644
index 00000000000..1c811d3089f
--- /dev/null
+++ b/reference/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag
@@ -0,0 +1,39 @@
+#version 100
+precision mediump float;
+precision highp int;
+
+struct Foo
+{
+    highp vec4 a;
+    highp vec4 b;
+};
+
+struct Bar
+{
+    highp vec4 a;
+    highp vec4 b;
+};
+
+struct Baz
+{
+    Foo foo;
+    Bar bar;
+};
+
+varying highp vec4 baz_foo_a;
+varying highp vec4 baz_foo_b;
+varying highp vec4 baz_bar_a;
+varying highp vec4 baz_bar_b;
+varying highp vec4 _33_a_a;
+varying highp vec4 _33_a_b;
+varying highp vec4 _33_b_a;
+varying highp vec4 _33_b_b;
+
+void main()
+{
+    Baz bazzy = Baz(Foo(baz_foo_a, baz_foo_b), Bar(baz_bar_a, baz_bar_b));
+    Foo bazzy_foo = Foo(baz_foo_a, baz_foo_b);
+    Bar bazzy_bar = Bar(baz_bar_a, baz_bar_b);
+    gl_FragData[0] = (((_33_a_a + _33_b_b) + bazzy.foo.b) + bazzy_foo.a) + bazzy_bar.b;
+}
+
diff --git a/reference/shaders/legacy/fragment/round.legacy.frag b/reference/shaders/legacy/fragment/round.legacy.frag
new file mode 100644
index 00000000000..9033bc3c56c
--- /dev/null
+++ b/reference/shaders/legacy/fragment/round.legacy.frag
@@ -0,0 +1,13 @@
+#version 100
+precision mediump float;
+precision highp int;
+
+varying highp vec4 vA;
+varying highp float vB;
+
+void main()
+{
+    gl_FragData[0] = floor(vA + vec4(0.5));
+    gl_FragData[0] *= floor(vB + float(0.5));
+}
+
diff --git a/reference/shaders/legacy/fragment/switch.legacy.frag b/reference/shaders/legacy/fragment/switch.legacy.frag
new file mode 100644
index 00000000000..9155d1cdbcc
--- /dev/null
+++ b/reference/shaders/legacy/fragment/switch.legacy.frag
@@ -0,0 +1,78 @@
+#version 100
+precision mediump float;
+precision highp int;
+
+varying highp float vIndexF;
+
+void main()
+{
+    int vIndex = int(vIndexF);
+    highp vec4 v = vec4(0.0);
+    for (int spvDummy21 = 0; spvDummy21 < 1; spvDummy21++)
+    {
+        if (vIndex == 2)
+        {
+            v = vec4(0.0, 2.0, 3.0, 4.0);
+            break;
+        }
+        else if ((vIndex == 4) || (vIndex == 5))
+        {
+            v = vec4(1.0, 2.0, 3.0, 4.0);
+            break;
+        }
+        else if ((vIndex == 8) || (vIndex == 9))
+        {
+            v = vec4(40.0, 20.0, 30.0, 40.0);
+            break;
+        }
+        else if (vIndex == 10)
+        {
+            v = vec4(10.0);
+            highp vec4 _43 = v;
+            highp vec4 _44 = vec4(1.0);
+            highp vec4 _45 = _43 + _44;
+            v = _45;
+            highp vec4 _46 = v;
+            highp vec4 _47 = vec4(2.0);
+            highp vec4 _48 = _46 + _47;
+            v = _48;
+            break;
+        }
+        else if (vIndex == 11)
+        {
+            highp vec4 _43 = v;
+            highp vec4 _44 = vec4(1.0);
+            highp vec4 _45 = _43 + _44;
+            v = _45;
+            highp vec4 _46 = v;
+            highp vec4 _47 = vec4(2.0);
+            highp vec4 _48 = _46 + _47;
+            v = _48;
+            break;
+        }
+        else if (vIndex == 12)
+        {
+            highp vec4 _46 = v;
+            highp vec4 _47 = vec4(2.0);
+            highp vec4 _48 = _46 + _47;
+            v = _48;
+            break;
+        }
+        else
+        {
+            v = vec4(10.0, 20.0, 30.0, 40.0);
+            break;
+        }
+    }
+    highp vec4 w = vec4(20.0);
+    for (int spvDummy165 = 0; spvDummy165 < 1; spvDummy165++)
+    {
+        if ((vIndex == 10) || (vIndex == 20))
+        {
+            w = vec4(40.0);
+            break;
+        }
+    }
+    gl_FragData[0] = v + w;
+}
+
diff --git a/reference/shaders/legacy/vert/implicit-lod.legacy.vert b/reference/shaders/legacy/vert/implicit-lod.legacy.vert
index 6e441074482..2d2050498e0 100644
--- a/reference/shaders/legacy/vert/implicit-lod.legacy.vert
+++ b/reference/shaders/legacy/vert/implicit-lod.legacy.vert
@@ -4,6 +4,6 @@ uniform mediump sampler2D tex;
 
 void main()
 {
-    gl_Position = texture2D(tex, vec2(0.4000000059604644775390625, 0.60000002384185791015625));
+    gl_Position = texture2DLod(tex, vec2(0.4000000059604644775390625, 0.60000002384185791015625), 0.0);
 }
 
diff --git a/reference/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert b/reference/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert
new file mode 100644
index 00000000000..fa1d643bc4f
--- /dev/null
+++ b/reference/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert
@@ -0,0 +1,18 @@
+#version 100
+
+struct Foo
+{
+    float a[4];
+};
+
+varying float foo_a[4];
+
+void main()
+{
+    gl_Position = vec4(1.0);
+    for (int i = 0; i < 4; i++)
+    {
+        foo_a[i] = float(i + 2);
+    }
+}
+
diff --git a/reference/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert b/reference/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert
new file mode 100644
index 00000000000..cf807c41f7f
--- /dev/null
+++ b/reference/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert
@@ -0,0 +1,49 @@
+#version 100
+
+struct Foo
+{
+    vec4 a;
+    vec4 b;
+};
+
+struct Bar
+{
+    vec4 a;
+    vec4 b;
+};
+
+struct Baz
+{
+    Foo foo;
+    Bar bar;
+};
+
+varying vec4 _12_a_a;
+varying vec4 _12_a_b;
+varying vec4 _12_b_a;
+varying vec4 _12_b_b;
+varying vec4 baz_foo_a;
+varying vec4 baz_foo_b;
+varying vec4 baz_bar_a;
+varying vec4 baz_bar_b;
+
+void main()
+{
+    _12_a_a = vec4(10.0);
+    _12_a_b = vec4(20.0);
+    _12_b_a = vec4(30.0);
+    _12_b_b = vec4(40.0);
+    _12_a_a = Foo(vec4(50.0), vec4(60.0)).a;
+    _12_a_b = Foo(vec4(50.0), vec4(60.0)).b;
+    _12_b_a = Bar(vec4(50.0), vec4(60.0)).a;
+    _12_b_b = Bar(vec4(50.0), vec4(60.0)).b;
+    baz_foo_a = Foo(vec4(100.0), vec4(200.0)).a;
+    baz_foo_b = Foo(vec4(100.0), vec4(200.0)).b;
+    baz_bar_a = Bar(vec4(300.0), vec4(400.0)).a;
+    baz_bar_b = Bar(vec4(300.0), vec4(400.0)).b;
+    baz_foo_a = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).foo.a;
+    baz_foo_b = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).foo.b;
+    baz_bar_a = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).bar.a;
+    baz_bar_b = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0))).bar.b;
+}
+
diff --git a/reference/shaders/legacy/vert/struct-varying.legacy.vert b/reference/shaders/legacy/vert/struct-varying.legacy.vert
index 261e986034f..fcdeb55bb6f 100644
--- a/reference/shaders/legacy/vert/struct-varying.legacy.vert
+++ b/reference/shaders/legacy/vert/struct-varying.legacy.vert
@@ -12,16 +12,10 @@ varying vec2 vout_b;
 void main()
 {
     Output s = Output(vec4(0.5), vec2(0.25));
-    {
-        Output vout = s;
-        vout_a = vout.a;
-        vout_b = vout.b;
-    }
-    {
-        Output vout = s;
-        vout_a = vout.a;
-        vout_b = vout.b;
-    }
+    vout_a = s.a;
+    vout_b = s.b;
+    vout_a = s.a;
+    vout_b = s.b;
     Output tmp = Output(vout_a, vout_b);
     vout_a = tmp.a;
     vout_b = tmp.b;
diff --git a/reference/shaders/legacy/vert/switch-nested.legacy.vert b/reference/shaders/legacy/vert/switch-nested.legacy.vert
new file mode 100644
index 00000000000..3ec027b4472
--- /dev/null
+++ b/reference/shaders/legacy/vert/switch-nested.legacy.vert
@@ -0,0 +1,41 @@
+#version 100
+
+struct UBO
+{
+    int func_arg;
+    int inner_func_arg;
+};
+
+uniform UBO _34;
+
+vec4 test_inner_func(bool b)
+{
+    if (b)
+    {
+        return vec4(1.0);
+    }
+    else
+    {
+        return vec4(0.0);
+    }
+}
+
+vec4 test_func(bool b)
+{
+    if (b)
+    {
+        bool param = _34.inner_func_arg != 0;
+        return test_inner_func(param);
+    }
+    else
+    {
+        return vec4(0.0);
+    }
+}
+
+void main()
+{
+    bool param = _34.func_arg != 0;
+    gl_Position = test_func(param);
+}
+
diff --git a/reference/shaders/legacy/vert/transpose.legacy.vert b/reference/shaders/legacy/vert/transpose.legacy.vert
index c73d1a11d92..ce5cf8b172f 100644
--- a/reference/shaders/legacy/vert/transpose.legacy.vert
+++ b/reference/shaders/legacy/vert/transpose.legacy.vert
@@ -11,12 +11,28 @@ uniform Buffer _13;
 
 attribute vec4 Position;
 
+highp mat4 spvWorkaroundRowMajor(highp mat4 wrap) { return wrap; }
+mediump mat4 spvWorkaroundRowMajorMP(mediump mat4 wrap) { return wrap; }
+
+mat4 spvTranspose(mat4 m)
+{
+    return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);
+}
+
 void main()
 {
-    vec4 c0 = _13.M * (Position * _13.MVPRowMajor);
-    vec4 c1 = _13.M * (_13.MVPColMajor * Position);
-    vec4 c2 = _13.M * (_13.MVPRowMajor * Position);
-    vec4 c3 = _13.M * (Position * _13.MVPColMajor);
-    gl_Position = ((c0 + c1) + c2) + c3;
+    vec4 c0 = spvWorkaroundRowMajor(_13.M) * (Position * _13.MVPRowMajor);
+    vec4 c1 = spvWorkaroundRowMajor(_13.M) * (spvWorkaroundRowMajor(_13.MVPColMajor) * Position);
+    vec4 c2 = spvWorkaroundRowMajor(_13.M) * (_13.MVPRowMajor * Position);
+    vec4 c3 = spvWorkaroundRowMajor(_13.M) * (Position * spvWorkaroundRowMajor(_13.MVPColMajor));
+    vec4 c4 = _13.MVPRowMajor * Position;
+    vec4 c5 = Position * spvWorkaroundRowMajor(_13.MVPColMajor);
+    vec4 c6 = Position * _13.MVPRowMajor;
+    vec4 c7 = spvWorkaroundRowMajor(_13.MVPColMajor) * Position;
+    vec4 c8 = (spvTranspose(_13.MVPRowMajor) * 2.0) * Position;
+    vec4 c9 = (spvTranspose(spvWorkaroundRowMajor(_13.MVPColMajor)) * 2.0) * Position;
+    vec4 c10 = Position * (spvTranspose(_13.MVPRowMajor) * 2.0);
+    vec4 c11 = Position * (spvTranspose(spvWorkaroundRowMajor(_13.MVPColMajor)) * 2.0);
+    gl_Position = ((((((((((c0 + c1) + c2) + c3) + c4) + c5) + c6) + c7) + c8) + c9) + c10) + c11;
 }
 
diff --git a/reference/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk b/reference/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk
new file mode 100644
index 00000000000..20597b986e7
--- /dev/null
+++ b/reference/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk
@@ -0,0 +1,63 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+#extension GL_EXT_fragment_shading_rate : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(max_vertices = 24, max_primitives = 22, lines) out;
+
+out gl_MeshPerVertexEXT
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[2];
+} gl_MeshVerticesEXT[];
+
+struct TaskPayload
+{
+    float a;
+    float b;
+    int c;
+};
+
+layout(location = 0) out vec4 vOut[24];
+layout(location = 2) out BlockOut
+{
+    vec4 a;
+    vec4 b;
+} outputs[24];
+
+layout(location = 1) perprimitiveEXT out vec4 vPrim[22];
+layout(location = 4) perprimitiveEXT out BlockOutPrim
+{
+    vec4 a;
+    vec4 b;
+} prim_outputs[22];
+
+taskPayloadSharedEXT TaskPayload payload;
+shared float shared_float[16];
+
+void main()
+{
+    SetMeshOutputsEXT(24u, 22u);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(vec3(gl_GlobalInvocationID), 1.0);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
+    vOut[gl_LocalInvocationIndex] = vec4(vec3(gl_GlobalInvocationID), 2.0);
+    outputs[gl_LocalInvocationIndex].a = vec4(5.0);
+    outputs[gl_LocalInvocationIndex].b = vec4(6.0);
+    barrier();
+    if (gl_LocalInvocationIndex < 22u)
+    {
+        vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0);
+        prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
+        prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
+        gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0u, 1u) + uvec2(gl_LocalInvocationIndex);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+    }
+}
+
diff --git a/reference/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk b/reference/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk
new file mode 100644
index 00000000000..ecb8285df2b
--- /dev/null
+++ b/reference/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk
@@ -0,0 +1,63 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+#extension GL_EXT_fragment_shading_rate : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(max_vertices = 24, max_primitives = 22, points) out;
+
+out gl_MeshPerVertexEXT
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[2];
+} gl_MeshVerticesEXT[];
+
+struct TaskPayload
+{
+    float a;
+    float b;
+    int c;
+};
+
+layout(location = 0) out vec4 vOut[24];
+layout(location = 2) out BlockOut
+{
+    vec4 a;
+    vec4 b;
+} outputs[24];
+
+layout(location = 1) perprimitiveEXT out vec4 vPrim[22];
+layout(location = 4) perprimitiveEXT out BlockOutPrim
+{
+    vec4 a;
+    vec4 b;
+} prim_outputs[22];
+
+taskPayloadSharedEXT TaskPayload payload;
+shared float shared_float[16];
+
+void main()
+{
+    SetMeshOutputsEXT(24u, 22u);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(vec3(gl_GlobalInvocationID), 1.0);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
+    vOut[gl_LocalInvocationIndex] = vec4(vec3(gl_GlobalInvocationID), 2.0);
+    outputs[gl_LocalInvocationIndex].a = vec4(5.0);
+    outputs[gl_LocalInvocationIndex].b = vec4(6.0);
+    barrier();
+    if (gl_LocalInvocationIndex < 22u)
+    {
+        vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0);
+        prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
+        prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
+        gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+    }
+}
+
diff --git a/reference/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk b/reference/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk
new file mode 100644
index 00000000000..e10459d7b9c
--- /dev/null
+++ b/reference/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk
@@ -0,0 +1,63 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+#extension GL_EXT_fragment_shading_rate : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(max_vertices = 24, max_primitives = 22, triangles) out;
+
+out gl_MeshPerVertexEXT
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[2];
+} gl_MeshVerticesEXT[];
+
+struct TaskPayload
+{
+    float a;
+    float b;
+    int c;
+};
+
+layout(location = 0) out vec4 vOut[24];
+layout(location = 2) out BlockOut
+{
+    vec4 a;
+    vec4 b;
+} outputs[24];
+
+layout(location = 1) perprimitiveEXT out vec4 vPrim[22];
+layout(location = 4) perprimitiveEXT out BlockOutPrim
+{
+    vec4 a;
+    vec4 b;
+} prim_outputs[22];
+
+taskPayloadSharedEXT TaskPayload payload;
+shared float shared_float[16];
+
+void main()
+{
+    SetMeshOutputsEXT(24u, 22u);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(vec3(gl_GlobalInvocationID), 1.0);
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
+    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
+    vOut[gl_LocalInvocationIndex] = vec4(vec3(gl_GlobalInvocationID), 2.0);
+    outputs[gl_LocalInvocationIndex].a = vec4(5.0);
+    outputs[gl_LocalInvocationIndex].b = vec4(6.0);
+    barrier();
+    if (gl_LocalInvocationIndex < 22u)
+    {
+        vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0);
+        prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
+        prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
+        gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0u, 1u, 2u) + uvec3(gl_LocalInvocationIndex);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u;
+        gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+    }
+}
+
diff --git a/reference/shaders/tesc/water_tess.tesc b/reference/shaders/tesc/water_tess.tesc
index 8d5a4a30479..6f499380b74 100644
--- a/reference/shaders/tesc/water_tess.tesc
+++ b/reference/shaders/tesc/water_tess.tesc
@@ -26,9 +26,7 @@ bool frustum_cull(vec2 p0)
     float radius = 0.5 * length(bb_max - bb_min);
     vec3 f0 = vec3(dot(_41.uFrustum[0], vec4(center, 1.0)), dot(_41.uFrustum[1], vec4(center, 1.0)), dot(_41.uFrustum[2], vec4(center, 1.0)));
     vec3 f1 = vec3(dot(_41.uFrustum[3], vec4(center, 1.0)), dot(_41.uFrustum[4], vec4(center, 1.0)), dot(_41.uFrustum[5], vec4(center, 1.0)));
-    vec3 _199 = f0;
-    float _200 = radius;
-    bool _205 = any(lessThanEqual(_199, vec3(-_200)));
+    bool _205 = any(lessThanEqual(f0, vec3(-radius)));
     bool _215;
     if (!_205)
     {
diff --git a/reference/shaders/tese/load-array-of-array.tese b/reference/shaders/tese/load-array-of-array.tese
new file mode 100644
index 00000000000..7fab08ef66e
--- /dev/null
+++ b/reference/shaders/tese/load-array-of-array.tese
@@ -0,0 +1,16 @@
+#version 450
+layout(quads, ccw, equal_spacing) in;
+
+layout(location = 0) in vec4 vTexCoord[][1];
+
+void main()
+{
+    vec4 _17_unrolled[32][1];
+    for (int i = 0; i < int(32); i++)
+    {
+        _17_unrolled[i] = vTexCoord[i];
+    }
+    vec4 tmp[32][1] = _17_unrolled;
+    gl_Position = (tmp[0][0] + tmp[2][0]) + tmp[3][0];
+}
+
diff --git a/reference/shaders/tese/patch-input-array.tese b/reference/shaders/tese/patch-input-array.tese
new file mode 100644
index 00000000000..413d8b391fb
--- /dev/null
+++ b/reference/shaders/tese/patch-input-array.tese
@@ -0,0 +1,10 @@
+#version 450
+layout(quads, ccw, equal_spacing) in;
+
+layout(location = 0) patch in float P[4];
+
+void main()
+{
+    gl_Position = vec4(P[0], P[1], P[2], P[3]);
+}
+
diff --git a/reference/shaders/tese/water_tess.tese b/reference/shaders/tese/water_tess.tese
index e743ed3e9c0..a0cf42d513b 100644
--- a/reference/shaders/tese/water_tess.tese
+++ b/reference/shaders/tese/water_tess.tese
@@ -52,7 +52,7 @@ void main()
     vGradNormalTex = vec4(tex + (_31.uInvHeightmapSize * 0.5), tex * _31.uScale.zw);
     vec2 param_2 = tex;
     vec2 param_3 = off;
-    vec2 param_4 = lod;
+    mediump vec2 param_4 = lod;
     vec3 height_displacement = sample_height_displacement(param_2, param_3, param_4);
     pos += height_displacement.yz;
     vWorld = vec3(pos.x, height_displacement.x, pos.y);
diff --git a/reference/shaders/vert/ground.vert b/reference/shaders/vert/ground.vert
index 69f92534ccb..73923c447f6 100644
--- a/reference/shaders/vert/ground.vert
+++ b/reference/shaders/vert/ground.vert
@@ -1,4 +1,7 @@
 #version 310 es
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
 
 struct PatchData
 {
@@ -44,7 +47,11 @@ layout(binding = 1) uniform mediump sampler2D TexLOD;
 layout(binding = 0) uniform mediump sampler2D TexHeightmap;
 
 layout(location = 1) in vec4 LODWeights;
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
 uniform int SPIRV_Cross_BaseInstance;
+#endif
 layout(location = 0) in vec2 Position;
 layout(location = 1) out vec3 EyeVec;
 layout(location = 0) out vec2 TexCoord;
@@ -67,7 +74,6 @@ vec2 warp_position()
     {
         _110 = 0u;
     }
-    uint _116 = _110;
     uint _120;
     if (uPosition.y < 32u)
     {
@@ -77,7 +83,7 @@ vec2 warp_position()
     {
         _120 = 0u;
     }
-    uvec2 rounding = uvec2(_116, _120);
+    uvec2 rounding = uvec2(_110, _120);
     vec4 lower_upper_snapped = vec4((uPosition + rounding).xyxy & (~mask).xxyy);
     return mix(lower_upper_snapped.xy, lower_upper_snapped.zw, vec2(fract_lod));
 }
diff --git a/reference/shaders/vert/no-contraction.vert b/reference/shaders/vert/no-contraction.vert
new file mode 100644
index 00000000000..83e392c6cc2
--- /dev/null
+++ b/reference/shaders/vert/no-contraction.vert
@@ -0,0 +1,24 @@
+#version 450
+
+layout(location = 0) in vec4 vA;
+layout(location = 1) in vec4 vB;
+layout(location = 2) in vec4 vC;
+
+void main()
+{
+    precise vec4 _15 = vA * vB;
+    vec4 mul = _15;
+    precise vec4 _19 = vA + vB;
+    vec4 add = _19;
+    precise vec4 _23 = vA - vB;
+    vec4 sub = _23;
+    precise vec4 _27 = vA * vB;
+    precise vec4 _30 = _27 + vC;
+    vec4 mad = _30;
+    precise vec4 _34 = mul + add;
+    precise vec4 _36 = _34 + sub;
+    precise vec4 _38 = _36 + mad;
+    vec4 summed = _38;
+    gl_Position = summed;
+}
+
diff --git a/reference/shaders/vert/ocean.vert b/reference/shaders/vert/ocean.vert
index 720bd7d0de7..60fa80ebdb2 100644
--- a/reference/shaders/vert/ocean.vert
+++ b/reference/shaders/vert/ocean.vert
@@ -1,4 +1,7 @@
 #version 310 es
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
 
 struct PatchData
 {
@@ -45,7 +48,11 @@ layout(binding = 1) uniform mediump sampler2D TexLOD;
 layout(binding = 0) uniform mediump sampler2D TexDisplacement;
 
 layout(location = 1) in vec4 LODWeights;
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
 uniform int SPIRV_Cross_BaseInstance;
+#endif
 layout(location = 0) in vec4 Position;
 layout(location = 0) out vec3 EyeVec;
 layout(location = 1) out vec4 TexCoord;
diff --git a/reference/shaders/vert/read-from-row-major-array.vert b/reference/shaders/vert/read-from-row-major-array.vert
index 1c950f3fa41..8b5ec967388 100644
--- a/reference/shaders/vert/read-from-row-major-array.vert
+++ b/reference/shaders/vert/read-from-row-major-array.vert
@@ -8,6 +8,9 @@ layout(binding = 0, std140) uniform Block
 layout(location = 0) in vec4 a_position;
 layout(location = 0) out mediump float v_vtxResult;
 
+highp mat2x3 spvWorkaroundRowMajor(highp mat2x3 wrap) { return wrap; }
+mediump mat2x3 spvWorkaroundRowMajorMP(mediump mat2x3 wrap) { return wrap; }
+
 mediump float compare_float(float a, float b)
 {
     return float(abs(a - b) < 0.0500000007450580596923828125);
@@ -37,7 +40,7 @@ void main()
 {
     gl_Position = a_position;
     mediump float result = 1.0;
-    mat2x3 param = _104.var[0][0];
+    mat2x3 param = spvWorkaroundRowMajor(_104.var[0][0]);
     mat2x3 param_1 = mat2x3(vec3(2.0, 6.0, -6.0), vec3(0.0, 5.0, 5.0));
     result *= compare_mat2x3(param, param_1);
     v_vtxResult = result;
diff --git a/reference/shaders/vert/row-major-workaround.vert b/reference/shaders/vert/row-major-workaround.vert
new file mode 100644
index 00000000000..4fe6885d101
--- /dev/null
+++ b/reference/shaders/vert/row-major-workaround.vert
@@ -0,0 +1,30 @@
+#version 310 es
+
+layout(binding = 0, std140) uniform Buffer
+{
+    layout(row_major) mat4 HP;
+    layout(row_major) mediump mat4 MP;
+} _21;
+
+layout(binding = 1, std140) uniform Buffer2
+{
+    layout(row_major) mediump mat4 MP2;
+} _39;
+
+layout(location = 0) out vec4 H;
+layout(location = 0) in vec4 Hin;
+layout(location = 1) out mediump vec4 M;
+layout(location = 1) in mediump vec4 Min;
+layout(location = 2) out mediump vec4 M2;
+
+highp mat4 spvWorkaroundRowMajor(highp mat4 wrap) { return wrap; }
+mediump mat4 spvWorkaroundRowMajorMP(mediump mat4 wrap) { return wrap; }
+
+void main()
+{
+    gl_Position = vec4(1.0);
+    H = spvWorkaroundRowMajor(_21.HP) * Hin;
+    M = spvWorkaroundRowMajor(_21.MP) * Min;
+    M2 = spvWorkaroundRowMajorMP(_39.MP2) * Min;
+}
+
diff --git a/reference/shaders/vert/texture_buffer.vert b/reference/shaders/vert/texture_buffer.vert
index e9442ce1196..217804dfce9 100644
--- a/reference/shaders/vert/texture_buffer.vert
+++ b/reference/shaders/vert/texture_buffer.vert
@@ -1,5 +1,5 @@
 #version 310 es
-#extension GL_OES_texture_buffer : require
+#extension GL_EXT_texture_buffer : require
 
 layout(binding = 4) uniform highp samplerBuffer uSamp;
 layout(binding = 5, rgba32f) uniform readonly highp imageBuffer uSampo;
diff --git a/reference/shaders/vert/transform-feedback-decorations.vert b/reference/shaders/vert/transform-feedback-decorations.vert
new file mode 100644
index 00000000000..23e7cf3c19d
--- /dev/null
+++ b/reference/shaders/vert/transform-feedback-decorations.vert
@@ -0,0 +1,22 @@
+#version 450
+
+layout(xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex
+{
+    layout(xfb_offset = 4) vec4 gl_Position;
+    float gl_PointSize;
+};
+
+layout(location = 0, xfb_buffer = 2, xfb_stride = 32, xfb_offset = 16) out vec4 vFoo;
+layout(xfb_buffer = 3, xfb_stride = 16) out VertOut
+{
+    layout(location = 1, xfb_offset = 0) vec4 vBar;
+} _22;
+
+
+void main()
+{
+    gl_Position = vec4(1.0);
+    vFoo = vec4(3.0);
+    _22.vBar = vec4(5.0);
+}
+
diff --git a/reference/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk
index 82ebb960856..771d0496447 100644
--- a/reference/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk
+++ b/reference/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp.vk
@@ -3,7 +3,7 @@
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
 layout(buffer_reference) buffer Block;
-layout(buffer_reference, std430) buffer Block
+layout(buffer_reference, buffer_reference_align = 4, std430) buffer Block
 {
     float v;
 };
diff --git a/reference/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp.vk
new file mode 100644
index 00000000000..f5907d3e2c0
--- /dev/null
+++ b/reference/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp.vk
@@ -0,0 +1,28 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer Bar;
+layout(buffer_reference) buffer Foo;
+layout(buffer_reference, buffer_reference_align = 8, std430) buffer Bar
+{
+    uint a;
+    uint b;
+    Foo foo;
+};
+
+layout(buffer_reference, std430) buffer Foo
+{
+    uint v;
+};
+
+layout(push_constant, std430) uniform Push
+{
+    Bar bar;
+} _13;
+
+void main()
+{
+    uint _24 = atomicAdd(_13.bar.b, 1u);
+}
+
diff --git a/reference/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp.vk
new file mode 100644
index 00000000000..9cd3d3e5bbe
--- /dev/null
+++ b/reference/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp.vk
@@ -0,0 +1,29 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer Bar;
+layout(buffer_reference) buffer Foo;
+layout(buffer_reference, buffer_reference_align = 8, std430) buffer Bar
+{
+    uint a;
+    uint b;
+    Foo foo;
+};
+
+layout(buffer_reference, std430) buffer Foo
+{
+    uint v;
+};
+
+layout(push_constant, std430) uniform Push
+{
+    Bar bar;
+} _15;
+
+void main()
+{
+    uint v = _15.bar.b;
+    uint _31 = atomicAdd(_15.bar.a, v);
+}
+
diff --git a/reference/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp.vk
new file mode 100644
index 00000000000..20a4f1b4239
--- /dev/null
+++ b/reference/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp.vk
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference_uvec2 : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer PtrInt;
+layout(buffer_reference, buffer_reference_align = 4, std430) buffer PtrInt
+{
+    int value;
+};
+
+layout(set = 0, binding = 0, std430) buffer Buf
+{
+    uvec2 ptr;
+    PtrInt ptrint;
+} _13;
+
+void main()
+{
+    _13.ptr = uvec2(_13.ptrint);
+}
+
diff --git a/reference/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp.vk
new file mode 100644
index 00000000000..5cf6e2df36d
--- /dev/null
+++ b/reference/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp.vk
@@ -0,0 +1,21 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference_uvec2 : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer PtrInt;
+layout(buffer_reference, buffer_reference_align = 16, std430) buffer PtrInt
+{
+    int value;
+};
+
+layout(set = 0, binding = 0, std430) buffer Buf
+{
+    uvec2 ptr;
+} _10;
+
+void main()
+{
+    PtrInt(_10.ptr).value = 10;
+}
+
diff --git a/reference/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk
index 5752f81b268..8923d21d780 100644
--- a/reference/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk
+++ b/reference/shaders/vulkan/comp/buffer-reference-bitcast.nocompat.vk.comp.vk
@@ -4,12 +4,12 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
 layout(buffer_reference) buffer PtrUint;
 layout(buffer_reference) buffer PtrInt;
-layout(buffer_reference, std430) buffer PtrUint
+layout(buffer_reference, buffer_reference_align = 4, std430) buffer PtrUint
 {
     uint value;
 };
 
-layout(buffer_reference, std430) buffer PtrInt
+layout(buffer_reference, buffer_reference_align = 16, std430) buffer PtrInt
 {
     int value;
 };
diff --git a/reference/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp.vk
new file mode 100644
index 00000000000..b7e88062a04
--- /dev/null
+++ b/reference/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp.vk
@@ -0,0 +1,35 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer RO;
+layout(buffer_reference) buffer RW;
+layout(buffer_reference) buffer WO;
+layout(buffer_reference, buffer_reference_align = 16, std430) readonly buffer RO
+{
+    vec4 v[];
+};
+
+layout(buffer_reference, buffer_reference_align = 16, std430) restrict buffer RW
+{
+    vec4 v[];
+};
+
+layout(buffer_reference, buffer_reference_align = 16, std430) coherent writeonly buffer WO
+{
+    vec4 v[];
+};
+
+layout(push_constant, std430) uniform Registers
+{
+    RO ro;
+    RW rw;
+    WO wo;
+} registers;
+
+void main()
+{
+    registers.rw.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x];
+    registers.wo.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x];
+}
+
diff --git a/reference/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk
index e22974114bd..241483ede4c 100644
--- a/reference/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk
+++ b/reference/shaders/vulkan/comp/buffer-reference.nocompat.vk.comp.vk
@@ -1,10 +1,14 @@
 #version 450
+#if defined(GL_ARB_gpu_shader_int64)
 #extension GL_ARB_gpu_shader_int64 : require
+#else
+#error No extension available for 64-bit integers.
+#endif
 #extension GL_EXT_buffer_reference : require
 layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
 layout(buffer_reference) buffer Node;
-layout(buffer_reference, std430) buffer Node
+layout(buffer_reference, buffer_reference_align = 16, std430) buffer Node
 {
     layout(offset = 0) int value;
     layout(offset = 16) Node next;
diff --git a/reference/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp.vk b/reference/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp.vk
new file mode 100644
index 00000000000..db022309be1
--- /dev/null
+++ b/reference/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp.vk
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_EXT_shader_atomic_float : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 1, std430) buffer SSBO
+{
+    float v;
+} _18;
+
+layout(set = 0, binding = 0, r32f) uniform image2D uImage;
+
+shared float shared_v;
+
+void main()
+{
+    float _15 = atomicAdd(shared_v, 2.0);
+    float value = _15;
+    float _24 = atomicAdd(_18.v, value);
+    float _39 = imageAtomicAdd(uImage, ivec2(gl_GlobalInvocationID.xy), value);
+    float _45 = imageAtomicExchange(uImage, ivec2(gl_GlobalInvocationID.xy), value);
+    value = _45;
+}
+
diff --git a/reference/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp.vk b/reference/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp.vk
new file mode 100644
index 00000000000..fbe5e3d9640
--- /dev/null
+++ b/reference/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp.vk
@@ -0,0 +1,72 @@
+#version 460
+#extension GL_EXT_ray_query : require
+#extension GL_EXT_ray_flags_primitive_culling : require
+#extension GL_EXT_ray_tracing : require
+layout(primitive_culling);
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 1, std140) uniform Params
+{
+    uint ray_flags;
+    uint cull_mask;
+    vec3 origin;
+    float tmin;
+    vec3 dir;
+    float tmax;
+    float thit;
+    uvec2 bda;
+} _19;
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT AS;
+
+rayQueryEXT q;
+rayQueryEXT q2[2];
+
+void main()
+{
+    rayQueryInitializeEXT(q, AS, _19.ray_flags, _19.cull_mask, _19.origin, _19.tmin, _19.dir, _19.tmax);
+    rayQueryInitializeEXT(q2[1], accelerationStructureEXT(_19.bda), _19.ray_flags, _19.cull_mask, _19.origin, _19.tmin, _19.dir, _19.tmax);
+    bool _67 = rayQueryProceedEXT(q);
+    bool res = _67;
+    rayQueryTerminateEXT(q2[0]);
+    rayQueryGenerateIntersectionEXT(q, _19.thit);
+    rayQueryConfirmIntersectionEXT(q2[1]);
+    float _75 = rayQueryGetRayTMinEXT(q);
+    float fval = _75;
+    uint _79 = rayQueryGetRayFlagsEXT(q2[0]);
+    uint type = _79;
+    vec3 _82 = rayQueryGetWorldRayDirectionEXT(q);
+    vec3 fvals = _82;
+    vec3 _83 = rayQueryGetWorldRayOriginEXT(q);
+    fvals = _83;
+    uint _86 = rayQueryGetIntersectionTypeEXT(q2[1], bool(1));
+    type = _86;
+    bool _88 = rayQueryGetIntersectionCandidateAABBOpaqueEXT(q2[1]);
+    res = _88;
+    float _91 = rayQueryGetIntersectionTEXT(q2[1], bool(0));
+    fval = _91;
+    int _94 = rayQueryGetIntersectionInstanceCustomIndexEXT(q, bool(1));
+    int ival = _94;
+    int _96 = rayQueryGetIntersectionInstanceIdEXT(q2[0], bool(0));
+    ival = _96;
+    uint _97 = rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(q, bool(1));
+    type = _97;
+    int _99 = rayQueryGetIntersectionGeometryIndexEXT(q2[1], bool(0));
+    ival = _99;
+    int _100 = rayQueryGetIntersectionPrimitiveIndexEXT(q, bool(1));
+    ival = _100;
+    vec2 _103 = rayQueryGetIntersectionBarycentricsEXT(q2[0], bool(0));
+    fvals.x = _103.x;
+    fvals.y = _103.y;
+    bool _110 = rayQueryGetIntersectionFrontFaceEXT(q, bool(1));
+    res = _110;
+    vec3 _111 = rayQueryGetIntersectionObjectRayDirectionEXT(q, bool(0));
+    fvals = _111;
+    vec3 _113 = rayQueryGetIntersectionObjectRayOriginEXT(q2[0], bool(1));
+    fvals = _113;
+    mat4x3 _117 = rayQueryGetIntersectionObjectToWorldEXT(q, bool(0));
+    mat4x3 matrices = _117;
+    mat4x3 _119 = rayQueryGetIntersectionWorldToObjectEXT(q2[1], bool(1));
+    matrices = _119;
+}
+
diff --git a/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag b/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag
index af64fb87aa1..716e283baf8 100644
--- a/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag
+++ b/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag
@@ -9,7 +9,7 @@ layout(location = 0) out float FragColor;
 
 float samp2(mediump sampler2DShadow SPIRV_Cross_Combinedts)
 {
-    return texture(SPIRV_Cross_Combinedts, vec3(vec3(1.0).xy, vec3(1.0).z));
+    return texture(SPIRV_Cross_Combinedts, vec3(vec3(1.0).xy, 1.0));
 }
 
 float samp3(mediump sampler2D SPIRV_Cross_Combinedts)
diff --git a/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag.vk b/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag.vk
index f475ae53a9a..5ba3bdf35c2 100644
--- a/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag.vk
+++ b/reference/shaders/vulkan/frag/combined-texture-sampler-shadow.vk.frag.vk
@@ -10,7 +10,7 @@ layout(location = 0) out float FragColor;
 
 float samp2(mediump texture2D t, mediump samplerShadow s)
 {
-    return texture(sampler2DShadow(t, s), vec3(vec3(1.0).xy, vec3(1.0).z));
+    return texture(sampler2DShadow(t, s), vec3(vec3(1.0).xy, 1.0));
 }
 
 float samp3(mediump texture2D t, mediump sampler s)
diff --git a/reference/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag.vk b/reference/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag.vk
new file mode 100644
index 00000000000..153164920f0
--- /dev/null
+++ b/reference/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag.vk
@@ -0,0 +1,15 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    bool _15 = helperInvocationEXT();
+    demote;
+    if (!_15)
+    {
+        FragColor = vec4(1.0, 0.0, 0.0, 1.0);
+    }
+}
+
diff --git a/reference/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag.vk b/reference/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag.vk
new file mode 100644
index 00000000000..302f8354021
--- /dev/null
+++ b/reference/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag.vk
@@ -0,0 +1,10 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+void main()
+{
+    demote;
+    bool _9 = helperInvocationEXT();
+    bool helper = _9;
+}
+
diff --git a/reference/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk b/reference/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk
index 6e98ca0bff1..9a7862a4637 100644
--- a/reference/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk
+++ b/reference/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag.vk
@@ -1,19 +1,24 @@
 #version 450
 #extension GL_EXT_nonuniform_qualifier : require
+#extension GL_EXT_samplerless_texture_functions : require
 
 layout(set = 0, binding = 2, std140) uniform UBO
 {
     vec4 v[64];
 } ubos[];
 
-layout(set = 0, binding = 3, std430) readonly buffer SSBO
+layout(set = 0, binding = 3, std430) buffer SSBO
 {
+    uint counter;
     vec4 v[];
 } ssbos[];
 
 layout(set = 0, binding = 0) uniform texture2D uSamplers[];
 layout(set = 0, binding = 1) uniform sampler uSamps[];
 layout(set = 0, binding = 4) uniform sampler2D uCombinedSamplers[];
+layout(set = 0, binding = 0) uniform texture2DMS uSamplersMS[];
+layout(set = 0, binding = 5, r32f) uniform image2D uImages[];
+layout(set = 0, binding = 5, r32ui) uniform uimage2D uImagesU32[];
 
 layout(location = 0) flat in int vIndex;
 layout(location = 0) out vec4 FragColor;
@@ -22,9 +27,48 @@ layout(location = 1) in vec2 vUV;
 void main()
 {
     int i = vIndex;
-    FragColor = texture(sampler2D(uSamplers[nonuniformEXT(i + 10)], uSamps[nonuniformEXT(i + 40)]), vUV);
-    FragColor = texture(uCombinedSamplers[nonuniformEXT(i + 10)], vUV);
-    FragColor += ubos[nonuniformEXT(i + 20)].v[i + 40];
-    FragColor += ssbos[nonuniformEXT(i + 50)].v[i + 60];
+    FragColor = texture(nonuniformEXT(sampler2D(uSamplers[i + 10], uSamps[i + 40])), vUV);
+    int _49 = i + 10;
+    FragColor = texture(uCombinedSamplers[nonuniformEXT(_49)], vUV);
+    int _65 = i + 20;
+    int _69 = i + 40;
+    FragColor += ubos[nonuniformEXT(_65)].v[_69];
+    int _83 = i + 50;
+    int _88 = i + 60;
+    FragColor += ssbos[nonuniformEXT(_83)].v[_88];
+    int _96 = i + 60;
+    int _100 = i + 70;
+    ssbos[nonuniformEXT(_96)].v[_100] = vec4(20.0);
+    int _106 = i + 10;
+    FragColor = texelFetch(uSamplers[nonuniformEXT(_106)], ivec2(vUV), 0);
+    int _116 = i + 100;
+    uint _122 = atomicAdd(ssbos[_116].counter, 100u);
+    vec2 queried = textureQueryLod(nonuniformEXT(sampler2D(uSamplers[i + 10], uSamps[i + 40])), vUV);
+    int _139 = i + 10;
+    queried += textureQueryLod(uCombinedSamplers[nonuniformEXT(_139)], vUV);
+    vec4 _147 = FragColor;
+    vec2 _149 = _147.xy + queried;
+    FragColor.x = _149.x;
+    FragColor.y = _149.y;
+    int _157 = i + 20;
+    FragColor.x += float(textureQueryLevels(uSamplers[nonuniformEXT(_157)]));
+    int _172 = i + 20;
+    FragColor.y += float(textureSamples(uSamplersMS[nonuniformEXT(_172)]));
+    int _184 = i + 20;
+    vec4 _189 = FragColor;
+    vec2 _191 = _189.xy + vec2(textureSize(uSamplers[nonuniformEXT(_184)], 0));
+    FragColor.x = _191.x;
+    FragColor.y = _191.y;
+    int _202 = i + 50;
+    FragColor += imageLoad(uImages[nonuniformEXT(_202)], ivec2(vUV));
+    int _213 = i + 20;
+    vec4 _218 = FragColor;
+    vec2 _220 = _218.xy + vec2(imageSize(uImages[nonuniformEXT(_213)]));
+    FragColor.x = _220.x;
+    FragColor.y = _220.y;
+    int _227 = i + 60;
+    imageStore(uImages[nonuniformEXT(_227)], ivec2(vUV), vec4(50.0));
+    int _240 = i + 70;
+    uint _248 = imageAtomicAdd(uImagesU32[nonuniformEXT(_240)], ivec2(vUV), 40u);
 }
 
diff --git a/reference/shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk b/reference/shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk
deleted file mode 100644
index 434cb3d3d9f..00000000000
--- a/reference/shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag.vk
+++ /dev/null
@@ -1,24 +0,0 @@
-#version 450
-#extension GL_EXT_scalar_block_layout : require
-
-layout(set = 0, binding = 0, std430) uniform UBO
-{
-    float a[1024];
-    vec3 b[2];
-} _17;
-
-layout(set = 0, binding = 1, std430) uniform UBOEnhancedLayout
-{
-    layout(offset = 0) float c[1024];
-    layout(offset = 4096) vec3 d[2];
-    layout(offset = 10000) float e;
-} _30;
-
-layout(location = 0) out float FragColor;
-layout(location = 0) flat in int vIndex;
-
-void main()
-{
-    FragColor = (_17.a[vIndex] + _30.c[vIndex]) + _30.e;
-}
-
diff --git a/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag b/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
index 43393f4e770..8ca3085a10f 100644
--- a/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
+++ b/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag
@@ -30,11 +30,11 @@ void main()
 {
     vec2 off = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[1], 0));
     vec2 off2 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler[2], 1));
-    highp vec2 param = (vTex + off) + off2;
+    vec2 param = (vTex + off) + off2;
     vec4 c0 = sample_func(param, SPIRV_Cross_CombineduTextureuSampler);
-    highp vec2 param_1 = (vTex + off) + off2;
+    vec2 param_1 = (vTex + off) + off2;
     vec4 c1 = sample_func_dual(param_1, SPIRV_Cross_CombineduTextureuSampler[1]);
-    highp vec2 param_2 = (vTex + off) + off2;
+    vec2 param_2 = (vTex + off) + off2;
     vec4 c2 = sample_func_dual_array(param_2, SPIRV_Cross_CombineduTextureuSampler);
     vec4 c3 = texture(SPIRV_Cross_CombineduTextureArrayuSampler[3], vTex3);
     vec4 c4 = texture(SPIRV_Cross_CombineduTextureCubeuSampler[1], vTex3);
diff --git a/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk b/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk
index 495874ecc23..0afa489c588 100644
--- a/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk
+++ b/reference/shaders/vulkan/frag/separate-sampler-texture-array.vk.frag.vk
@@ -31,11 +31,11 @@ void main()
 {
     vec2 off = vec2(1.0) / vec2(textureSize(sampler2D(uTexture[1], uSampler), 0));
     vec2 off2 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture[2], uSampler), 1));
-    highp vec2 param = (vTex + off) + off2;
+    vec2 param = (vTex + off) + off2;
     vec4 c0 = sample_func(uSampler, param);
-    highp vec2 param_1 = (vTex + off) + off2;
+    vec2 param_1 = (vTex + off) + off2;
     vec4 c1 = sample_func_dual(uSampler, uTexture[1], param_1);
-    highp vec2 param_2 = (vTex + off) + off2;
+    vec2 param_2 = (vTex + off) + off2;
     vec4 c2 = sample_func_dual_array(uSampler, uTexture, param_2);
     vec4 c3 = texture(sampler2DArray(uTextureArray[3], uSampler), vTex3);
     vec4 c4 = texture(samplerCube(uTextureCube[1], uSampler), vTex3);
diff --git a/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag b/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag
index 78477cfbae1..c2530d5d8e7 100644
--- a/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag
+++ b/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag
@@ -25,9 +25,9 @@ void main()
 {
     vec2 off = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 0));
     vec2 off2 = vec2(1.0) / vec2(textureSize(SPIRV_Cross_CombineduTextureuSampler, 1));
-    highp vec2 param = (vTex + off) + off2;
+    vec2 param = (vTex + off) + off2;
     vec4 c0 = sample_func(param, SPIRV_Cross_CombineduTextureuSampler);
-    highp vec2 param_1 = (vTex + off) + off2;
+    vec2 param_1 = (vTex + off) + off2;
     vec4 c1 = sample_func_dual(param_1, SPIRV_Cross_CombineduTextureuSampler);
     vec4 c2 = texture(SPIRV_Cross_CombineduTextureArrayuSampler, vTex3);
     vec4 c3 = texture(SPIRV_Cross_CombineduTextureCubeuSampler, vTex3);
diff --git a/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk b/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk
index cfa2f39616c..105379d770c 100644
--- a/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk
+++ b/reference/shaders/vulkan/frag/separate-sampler-texture.vk.frag.vk
@@ -26,9 +26,9 @@ void main()
 {
     vec2 off = vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 0));
     vec2 off2 = vec2(1.0) / vec2(textureSize(sampler2D(uTexture, uSampler), 1));
-    highp vec2 param = (vTex + off) + off2;
+    vec2 param = (vTex + off) + off2;
     vec4 c0 = sample_func(uSampler, param);
-    highp vec2 param_1 = (vTex + off) + off2;
+    vec2 param_1 = (vTex + off) + off2;
     vec4 c1 = sample_func_dual(uSampler, uTexture, param_1);
     vec4 c2 = texture(sampler2DArray(uTextureArray, uSampler), vTex3);
     vec4 c3 = texture(samplerCube(uTextureCube, uSampler), vTex3);
diff --git a/reference/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk b/reference/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk
index 34bfea02604..04c4062a6c9 100644
--- a/reference/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk
+++ b/reference/shaders/vulkan/frag/spec-constant-ternary.vk.frag.vk
@@ -8,6 +8,7 @@ layout(location = 0) out float FragColor;
 
 void main()
 {
-    FragColor = float(f);
+    float _17 = float(f);
+    FragColor = _17;
 }
 
diff --git a/reference/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit.vk b/reference/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit.vk
new file mode 100644
index 00000000000..4cb6bf8d8fa
--- /dev/null
+++ b/reference/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit.vk
@@ -0,0 +1,22 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void in_func()
+{
+    if (payload > 0.0)
+    {
+        ignoreIntersectionEXT;
+    }
+    else
+    {
+        terminateRayEXT;
+    }
+}
+
+void main()
+{
+    in_func();
+}
+
diff --git a/reference/shaders/vulkan/rahit/terminators.nocompat.vk.rahit.vk b/reference/shaders/vulkan/rahit/terminators.nocompat.vk.rahit.vk
new file mode 100644
index 00000000000..cee09bc0ea5
--- /dev/null
+++ b/reference/shaders/vulkan/rahit/terminators.nocompat.vk.rahit.vk
@@ -0,0 +1,22 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+rayPayloadInNV float payload;
+
+void in_func()
+{
+    if (payload > 0.0)
+    {
+        ignoreIntersectionNV();
+    }
+    else
+    {
+        terminateRayNV();
+    }
+}
+
+void main()
+{
+    in_func();
+}
+
diff --git a/reference/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall.vk b/reference/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall.vk
new file mode 100644
index 00000000000..5adfac164fa
--- /dev/null
+++ b/reference/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) callableDataInEXT float c;
+
+void main()
+{
+    executeCallableEXT(10u, 0);
+}
+
diff --git a/reference/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..b6c1876d313
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,24 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+struct Foo2
+{
+    float a;
+    float b;
+};
+
+layout(location = 0) rayPayloadInEXT Foo payload;
+hitAttributeEXT Foo2 hit;
+
+void main()
+{
+    payload.a = hit.a;
+    payload.b = hit.b;
+}
+
diff --git a/reference/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..614a04d95e7
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit.vk
@@ -0,0 +1,24 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+struct Foo2
+{
+    float a;
+    float b;
+};
+
+layout(location = 0) rayPayloadInNV Foo payload;
+hitAttributeNV Foo2 hit;
+
+void main()
+{
+    payload.a = hit.a;
+    payload.b = hit.b;
+}
+
diff --git a/reference/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..56e8ff4aa34
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,29 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+struct Foo2
+{
+    float a;
+    float b;
+};
+
+layout(location = 0) rayPayloadInEXT Foo payload;
+hitAttributeEXT Foo2 hit;
+
+void in_function()
+{
+    payload.a = hit.a;
+    payload.b = hit.b;
+}
+
+void main()
+{
+    in_function();
+}
+
diff --git a/reference/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..e747bb29d05
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit.vk
@@ -0,0 +1,29 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+struct Foo2
+{
+    float a;
+    float b;
+};
+
+layout(location = 0) rayPayloadInNV Foo payload;
+hitAttributeNV Foo2 hit;
+
+void in_function()
+{
+    payload.a = hit.a;
+    payload.b = hit.b;
+}
+
+void main()
+{
+    in_function();
+}
+
diff --git a/reference/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..eeccd3bb092
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,11 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec2 payload;
+hitAttributeEXT vec2 hit;
+
+void main()
+{
+    payload = hit;
+}
+
diff --git a/reference/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..908d96344f3
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit.vk
@@ -0,0 +1,11 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec2 payload;
+hitAttributeNV vec2 hit;
+
+void main()
+{
+    payload = hit;
+}
+
diff --git a/reference/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..a51e6b088f3
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,17 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+layout(location = 0) rayPayloadInEXT Foo payload;
+hitAttributeEXT Foo hit;
+
+void main()
+{
+    payload = hit;
+}
+
diff --git a/reference/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..133bdfc1d90
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit.vk
@@ -0,0 +1,17 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+struct Foo
+{
+    float a;
+    float b;
+};
+
+layout(location = 0) rayPayloadInNV Foo payload;
+hitAttributeNV Foo hit;
+
+void main()
+{
+    payload = hit;
+}
+
diff --git a/reference/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..e4e0103ddb5
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+    payload = gl_HitKindEXT;
+}
+
diff --git a/reference/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..64f79a8dce0
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+    payload = gl_HitKindNV;
+}
+
diff --git a/reference/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..e94e3323c98
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+    payload = gl_RayTmaxEXT;
+}
+
diff --git a/reference/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..9004a00c40e
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV float payload;
+
+void main()
+{
+    payload = gl_HitTNV;
+}
+
diff --git a/reference/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..a013baa11d5
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+    payload = gl_IncomingRayFlagsEXT;
+}
+
diff --git a/reference/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..d17ab8ce76c
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+    payload = gl_IncomingRayFlagsNV;
+}
+
diff --git a/reference/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..e28af5d2527
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+    payload = uint(gl_InstanceCustomIndexEXT);
+}
+
diff --git a/reference/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..531a1fc2845
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+    payload = uint(gl_InstanceCustomIndexNV);
+}
+
diff --git a/reference/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..0413e0d234a
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+    payload = uint(gl_InstanceID);
+}
+
diff --git a/reference/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..ff551db7c9d
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+    payload = uint(gl_InstanceID);
+}
+
diff --git a/reference/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..237d4790e55
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+    payload = gl_ObjectRayDirectionEXT;
+}
+
diff --git a/reference/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..01afa0e067a
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+    payload = gl_ObjectRayDirectionNV;
+}
+
diff --git a/reference/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..5739ac09ff5
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+    payload = gl_ObjectRayOriginEXT;
+}
+
diff --git a/reference/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..a49e17a1738
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+    payload = gl_ObjectRayOriginNV;
+}
+
diff --git a/reference/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..7922e1efbf4
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+    payload = gl_ObjectToWorldEXT * vec4(payload, 1.0);
+}
+
diff --git a/reference/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..fc2c5ed0c2c
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+    payload = gl_ObjectToWorldNV * vec4(payload, 1.0);
+}
+
diff --git a/reference/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..e896816ec30
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,20 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+struct Payload
+{
+    vec4 a;
+};
+
+layout(location = 0) rayPayloadInEXT Payload payload;
+
+void write_incoming_payload_in_function()
+{
+    payload.a = vec4(10.0);
+}
+
+void main()
+{
+    write_incoming_payload_in_function();
+}
+
diff --git a/reference/shaders/vulkan/rchit/payloads.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/payloads.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..a3ddd56b176
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/payloads.nocompat.vk.rchit.vk
@@ -0,0 +1,20 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+struct Payload
+{
+    vec4 a;
+};
+
+layout(location = 0) rayPayloadInNV Payload payload;
+
+void write_incoming_payload_in_function()
+{
+    payload.a = vec4(10.0);
+}
+
+void main()
+{
+    write_incoming_payload_in_function();
+}
+
diff --git a/reference/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..667c015e8d6
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+    payload = uint(gl_PrimitiveID);
+}
+
diff --git a/reference/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..d3b0ef19429
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+    payload = uint(gl_PrimitiveID);
+}
+
diff --git a/reference/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..e94e3323c98
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+    payload = gl_RayTmaxEXT;
+}
+
diff --git a/reference/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..769c96ad6b7
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV float payload;
+
+void main()
+{
+    payload = gl_RayTmaxNV;
+}
+
diff --git a/reference/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..04b89549508
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+    payload = gl_RayTminEXT;
+}
+
diff --git a/reference/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..2709899a13a
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV float payload;
+
+void main()
+{
+    payload = gl_RayTminNV;
+}
+
diff --git a/reference/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..05af948b379
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+    payload = 1.0 + float(gl_InstanceID);
+}
+
diff --git a/reference/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk
index 547b9cd51a5..103fd66b801 100644
--- a/reference/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk
+++ b/reference/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit.vk
@@ -5,6 +5,6 @@ layout(location = 0) rayPayloadInNV float payload;
 
 void main()
 {
-    payload = 1.0;
+    payload = 1.0 + float(gl_InstanceID);
 }
 
diff --git a/reference/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..68ba2bafa54
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+    payload = gl_WorldRayDirectionEXT;
+}
+
diff --git a/reference/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..4acf03e0649
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+    payload = gl_WorldRayDirectionNV;
+}
+
diff --git a/reference/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..a5c6766e055
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+    payload = gl_WorldRayOriginEXT;
+}
+
diff --git a/reference/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..70241f23620
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+    payload = gl_WorldRayOriginNV;
+}
+
diff --git a/reference/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..309ca4c6f17
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+    payload = gl_WorldToObjectEXT * vec4(payload, 1.0);
+}
+
diff --git a/reference/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit.vk b/reference/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit.vk
new file mode 100644
index 00000000000..0b93e38acd1
--- /dev/null
+++ b/reference/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+    payload = gl_WorldToObjectNV * vec4(payload, 1.0);
+}
+
diff --git a/reference/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen.vk b/reference/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen.vk
new file mode 100644
index 00000000000..d131b0aa800
--- /dev/null
+++ b/reference/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen.vk
@@ -0,0 +1,17 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(push_constant, std430) uniform Registers
+{
+    uvec2 ptr;
+} _19;
+
+layout(location = 0) rayPayloadEXT vec4 payload;
+
+void main()
+{
+    vec3 origin = vec3(0.0);
+    vec3 direction = vec3(0.0, 0.0, -1.0);
+    traceRayEXT(accelerationStructureEXT(_19.ptr), 1u, 255u, 0u, 0u, 0u, origin, 0.0, direction, 100.0, 0);
+}
+
diff --git a/reference/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen.vk b/reference/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen.vk
new file mode 100644
index 00000000000..8bb3d0070b3
--- /dev/null
+++ b/reference/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen.vk
@@ -0,0 +1,17 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadEXT vec4 payload;
+layout(location = 1) callableDataEXT float blend;
+layout(set = 0, binding = 1, rgba32f) uniform writeonly image2D image;
+
+void main()
+{
+    vec3 origin = vec3(0.0);
+    vec3 direction = vec3(0.0, 0.0, -1.0);
+    traceRayEXT(as, 1u, 255u, 0u, 0u, 0u, origin, 0.0, direction, 100.0, 0);
+    executeCallableEXT(0u, 1);
+    imageStore(image, ivec2(gl_LaunchIDEXT.xy), payload + vec4(blend));
+}
+
diff --git a/reference/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..1614c49626e
--- /dev/null
+++ b/reference/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform writeonly image2D uImage;
+
+void main()
+{
+    imageStore(uImage, ivec2(gl_LaunchIDEXT.xy), vec4(1.0));
+}
+
diff --git a/reference/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..f907e6fd606
--- /dev/null
+++ b/reference/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform writeonly image2D uImage;
+
+void main()
+{
+    imageStore(uImage, ivec2(gl_LaunchIDNV.xy), vec4(1.0));
+}
+
diff --git a/reference/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..240e93daa48
--- /dev/null
+++ b/reference/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform writeonly image2D uImage;
+
+void main()
+{
+    imageStore(uImage, ivec2(gl_LaunchSizeEXT.xy) - ivec2(1), vec4(1.0));
+}
+
diff --git a/reference/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..08992c63194
--- /dev/null
+++ b/reference/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform writeonly image2D uImage;
+
+void main()
+{
+    imageStore(uImage, ivec2(gl_LaunchSizeNV.xy) - ivec2(1), vec4(1.0));
+}
+
diff --git a/reference/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..7885f4f3d34
--- /dev/null
+++ b/reference/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen.vk
@@ -0,0 +1,47 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+struct Payload
+{
+    float a;
+    float b;
+};
+
+struct Block
+{
+    float a;
+    float b;
+    Payload c;
+    Payload d;
+};
+
+layout(set = 0, binding = 1) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadEXT Payload payload2;
+layout(location = 1) rayPayloadEXT float payload1;
+layout(location = 2) rayPayloadEXT Block _71;
+layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image;
+
+vec4 trace_in_function()
+{
+    vec4 result = vec4(0.0);
+    vec3 origin = vec3(1.0, 0.0, 0.0);
+    vec3 direction = vec3(0.0, 1.0, 0.0);
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0);
+    result += vec4(payload2.a);
+    result += vec4(payload2.b);
+    return result;
+}
+
+void main()
+{
+    vec3 origin = vec3(1.0, 0.0, 0.0);
+    vec3 direction = vec3(0.0, 1.0, 0.0);
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 1);
+    vec4 result = vec4(payload1);
+    vec4 _62 = trace_in_function();
+    result += _62;
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 2);
+    result += vec4(((((_71.a + _71.b) + _71.c.a) + _71.c.b) + _71.d.a) + _71.d.b);
+    imageStore(image, ivec2(gl_LaunchIDEXT.xy), result);
+}
+
diff --git a/reference/shaders/vulkan/rgen/payloads.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/payloads.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..5d6b24c6018
--- /dev/null
+++ b/reference/shaders/vulkan/rgen/payloads.nocompat.vk.rgen.vk
@@ -0,0 +1,47 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+struct Payload
+{
+    float a;
+    float b;
+};
+
+struct Block
+{
+    float a;
+    float b;
+    Payload c;
+    Payload d;
+};
+
+layout(set = 0, binding = 1) uniform accelerationStructureNV as;
+layout(location = 1) rayPayloadNV Payload payload2;
+layout(location = 0) rayPayloadNV float payload1;
+layout(location = 2) rayPayloadNV Block _71;
+layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image;
+
+vec4 trace_in_function()
+{
+    vec4 result = vec4(0.0);
+    vec3 origin = vec3(1.0, 0.0, 0.0);
+    vec3 direction = vec3(0.0, 1.0, 0.0);
+    traceNV(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 1);
+    result += vec4(payload2.a);
+    result += vec4(payload2.b);
+    return result;
+}
+
+void main()
+{
+    vec3 origin = vec3(1.0, 0.0, 0.0);
+    vec3 direction = vec3(0.0, 1.0, 0.0);
+    traceNV(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0);
+    vec4 result = vec4(payload1);
+    vec4 _62 = trace_in_function();
+    result += _62;
+    traceNV(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 2);
+    result += vec4(((((_71.a + _71.b) + _71.c.a) + _71.c.b) + _71.d.a) + _71.d.b);
+    imageStore(image, ivec2(gl_LaunchIDNV.xy), result);
+}
+
diff --git a/reference/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..71e04d670b9
--- /dev/null
+++ b/reference/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen.vk
@@ -0,0 +1,21 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 1) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadEXT float payload;
+
+float pure_call(vec2 launchID, vec2 launchSize)
+{
+    vec3 origin = vec3(launchID.x / launchSize.x, launchID.y / launchSize.y, 1.0);
+    vec3 direction = vec3(0.0, 0.0, -1.0);
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0);
+    return 0.0;
+}
+
+void main()
+{
+    vec2 param = vec2(gl_LaunchIDEXT.xy);
+    vec2 param_1 = vec2(gl_LaunchSizeEXT.xy);
+    float _64 = pure_call(param, param_1);
+}
+
diff --git a/reference/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..67deac226d1
--- /dev/null
+++ b/reference/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen.vk
@@ -0,0 +1,17 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 1) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadEXT float payload;
+layout(set = 0, binding = 0, rgba8) uniform writeonly image2D image;
+
+void main()
+{
+    vec4 col = vec4(0.0, 0.0, 0.0, 1.0);
+    vec3 origin = vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0);
+    vec3 direction = vec3(0.0, 0.0, -1.0);
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0);
+    col.y = payload;
+    imageStore(image, ivec2(gl_LaunchIDEXT.xy), col);
+}
+
diff --git a/reference/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen.vk b/reference/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen.vk
new file mode 100644
index 00000000000..3056e8ad281
--- /dev/null
+++ b/reference/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen.vk
@@ -0,0 +1,17 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(shaderRecordEXT, std430) buffer sbt
+{
+    vec3 direction;
+    float tmax;
+} _20;
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadEXT float payload;
+
+void main()
+{
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(0.0), 0.0, _20.direction, _20.tmax, 0);
+}
+
diff --git a/reference/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint.vk b/reference/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint.vk
new file mode 100644
index 00000000000..761609a8128
--- /dev/null
+++ b/reference/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint.vk
@@ -0,0 +1,13 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+void in_func()
+{
+    bool _13 = reportIntersectionEXT(0.5, 10u);
+}
+
+void main()
+{
+    in_func();
+}
+
diff --git a/reference/shaders/vulkan/rint/report-intersection.nocompat.vk.rint.vk b/reference/shaders/vulkan/rint/report-intersection.nocompat.vk.rint.vk
new file mode 100644
index 00000000000..c99b78495ae
--- /dev/null
+++ b/reference/shaders/vulkan/rint/report-intersection.nocompat.vk.rint.vk
@@ -0,0 +1,13 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+void in_func()
+{
+    bool _13 = reportIntersectionNV(0.5, 10u);
+}
+
+void main()
+{
+    in_func();
+}
+
diff --git a/reference/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss.vk b/reference/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss.vk
new file mode 100644
index 00000000000..c055a268144
--- /dev/null
+++ b/reference/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss.vk
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+    payload = 0.0;
+}
+
diff --git a/reference/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss.vk b/reference/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss.vk
new file mode 100644
index 00000000000..696c998c279
--- /dev/null
+++ b/reference/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss.vk
@@ -0,0 +1,13 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadInEXT float p;
+
+void main()
+{
+    vec3 origin = vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0);
+    vec3 direction = vec3(0.0, 0.0, -1.0);
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0);
+}
+
diff --git a/reference/shaders/vulkan/vert/device-group.nocompat.vk.vert.vk b/reference/shaders/vulkan/vert/device-group.nocompat.vk.vert.vk
new file mode 100644
index 00000000000..9cadcdb6dce
--- /dev/null
+++ b/reference/shaders/vulkan/vert/device-group.nocompat.vk.vert.vk
@@ -0,0 +1,8 @@
+#version 450
+#extension GL_EXT_device_group : require
+
+void main()
+{
+    gl_Position = vec4(float(gl_DeviceIndex));
+}
+
diff --git a/reference/shaders/vulkan/vert/small-storage.vk.vert b/reference/shaders/vulkan/vert/small-storage.vk.vert
index b3aafc8d8c1..2c4beb71e5f 100644
--- a/reference/shaders/vulkan/vert/small-storage.vk.vert
+++ b/reference/shaders/vulkan/vert/small-storage.vk.vert
@@ -1,10 +1,20 @@
 #version 450
-#if defined(GL_AMD_gpu_shader_int16)
+#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
+#elif defined(GL_AMD_gpu_shader_int16)
 #extension GL_AMD_gpu_shader_int16 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
 #else
 #error No extension available for Int16.
 #endif
+#if defined(GL_EXT_shader_explicit_arithmetic_types_int8)
 #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for Int8.
+#endif
 #if defined(GL_AMD_gpu_shader_half_float)
 #extension GL_AMD_gpu_shader_half_float : require
 #elif defined(GL_NV_gpu_shader5)
diff --git a/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert b/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert
index 60ba1882f82..d939aa625c5 100644
--- a/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert
+++ b/reference/shaders/vulkan/vert/vulkan-vertex.vk.vert
@@ -1,6 +1,13 @@
 #version 310 es
+#ifdef GL_ARB_shader_draw_parameters
+#extension GL_ARB_shader_draw_parameters : enable
+#endif
 
+#ifdef GL_ARB_shader_draw_parameters
+#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB
+#else
 uniform int SPIRV_Cross_BaseInstance;
+#endif
 
 void main()
 {
diff --git a/samples/cpp/Makefile b/samples/cpp/Makefile
index 225bb3d57d1..e5b66d4b88f 100644
--- a/samples/cpp/Makefile
+++ b/samples/cpp/Makefile
@@ -1,3 +1,6 @@
+# Copyright 2016-2021 The Khronos Group Inc.
+# SPDX-License-Identifier: Apache-2.0
+
 SOURCES := $(wildcard *.comp)
 SPIRV := $(SOURCES:.comp=.spv)
 CPP_INTERFACE := $(SOURCES:.comp=.spv.cpp)
diff --git a/samples/cpp/atomics.comp b/samples/cpp/atomics.comp
index 0bf6d2ad011..f315124743b 100644
--- a/samples/cpp/atomics.comp
+++ b/samples/cpp/atomics.comp
@@ -1,3 +1,6 @@
+// Copyright 2016-2021 The Khronos Group Inc.
+// SPDX-License-Identifier: Apache-2.0
+
 #version 310 es
 layout(local_size_x = 64) in;
 
diff --git a/samples/cpp/atomics.cpp b/samples/cpp/atomics.cpp
index 89351a5ae5f..36d8fc15c06 100644
--- a/samples/cpp/atomics.cpp
+++ b/samples/cpp/atomics.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright 2015-2017 ARM Limited
+ * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/samples/cpp/multiply.comp b/samples/cpp/multiply.comp
index 1ac7869ad05..678415aed65 100644
--- a/samples/cpp/multiply.comp
+++ b/samples/cpp/multiply.comp
@@ -1,3 +1,6 @@
+// Copyright 2016-2021 The Khronos Group Inc.
+// SPDX-License-Identifier: Apache-2.0
+
 #version 310 es
 layout(local_size_x = 64) in;
 
diff --git a/samples/cpp/multiply.cpp b/samples/cpp/multiply.cpp
index daa1fc6477d..4ff61843198 100644
--- a/samples/cpp/multiply.cpp
+++ b/samples/cpp/multiply.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright 2015-2017 ARM Limited
+ * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/samples/cpp/shared.comp b/samples/cpp/shared.comp
index 7d59060aa98..a9d55d2c5c6 100644
--- a/samples/cpp/shared.comp
+++ b/samples/cpp/shared.comp
@@ -1,3 +1,6 @@
+// Copyright 2016-2021 The Khronos Group Inc.
+// SPDX-License-Identifier: Apache-2.0
+
 #version 310 es
 layout(local_size_x = 64) in;
 
diff --git a/samples/cpp/shared.cpp b/samples/cpp/shared.cpp
index 5be62d681fc..b997704bf81 100644
--- a/samples/cpp/shared.cpp
+++ b/samples/cpp/shared.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright 2015-2017 ARM Limited
+ * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp b/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp
new file mode 100644
index 00000000000..3371e3af21b
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp
@@ -0,0 +1,118 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 437
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Baz "Baz"
+               OpMemberName %Baz 0 "c"
+               OpName %Bar "Bar"
+               OpMemberName %Bar 0 "d"
+               OpMemberName %Bar 1 "baz"
+               OpName %Foo "Foo"
+               OpMemberName %Foo 0 "a"
+               OpMemberName %Foo 1 "b"
+               OpMemberName %Foo 2 "c"
+               OpName %Baz_0 "Baz"
+               OpMemberName %Baz_0 0 "c"
+               OpName %Bar_0 "Bar"
+               OpMemberName %Bar_0 0 "d"
+               OpMemberName %Bar_0 1 "baz"
+               OpName %Foo_0 "Foo"
+               OpMemberName %Foo_0 0 "a"
+               OpMemberName %Foo_0 1 "b"
+               OpMemberName %Foo_0 2 "c"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "foo"
+               OpMemberName %SSBO 1 "foo2"
+               OpName %_ ""
+               OpDecorate %_arr_float_uint_4_0 ArrayStride 4
+               OpDecorate %_arr__arr_float_uint_4_0_uint_2 ArrayStride 16
+               OpMemberDecorate %Baz_0 0 Offset 0
+               OpDecorate %_arr_Baz_0_uint_2 ArrayStride 4
+               OpMemberDecorate %Bar_0 0 Offset 0
+               OpMemberDecorate %Bar_0 1 Offset 32
+               OpDecorate %_arr_Bar_0_uint_5 ArrayStride 40
+               OpMemberDecorate %Foo_0 0 RowMajor
+               OpMemberDecorate %Foo_0 0 Offset 0
+               OpMemberDecorate %Foo_0 0 MatrixStride 8
+               OpMemberDecorate %Foo_0 1 Offset 16
+               OpMemberDecorate %Foo_0 2 Offset 24
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 224
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%mat2v2float = OpTypeMatrix %v2float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+     %uint_2 = OpConstant %uint 2
+%_arr__arr_float_uint_4_uint_2 = OpTypeArray %_arr_float_uint_4 %uint_2
+        %Baz = OpTypeStruct %float
+%_arr_Baz_uint_2 = OpTypeArray %Baz %uint_2
+        %Bar = OpTypeStruct %_arr__arr_float_uint_4_uint_2 %_arr_Baz_uint_2
+     %uint_5 = OpConstant %uint 5
+%_arr_Bar_uint_5 = OpTypeArray %Bar %uint_5
+        %Foo = OpTypeStruct %mat2v2float %v2float %_arr_Bar_uint_5
+%_ptr_Function_Foo = OpTypePointer Function %Foo
+%_arr_float_uint_4_0 = OpTypeArray %float %uint_4
+%_arr__arr_float_uint_4_0_uint_2 = OpTypeArray %_arr_float_uint_4_0 %uint_2
+      %Baz_0 = OpTypeStruct %float
+%_arr_Baz_0_uint_2 = OpTypeArray %Baz_0 %uint_2
+      %Bar_0 = OpTypeStruct %_arr__arr_float_uint_4_0_uint_2 %_arr_Baz_0_uint_2
+%_arr_Bar_0_uint_5 = OpTypeArray %Bar_0 %uint_5
+      %Foo_0 = OpTypeStruct %mat2v2float %v2float %_arr_Bar_0_uint_5
+       %SSBO = OpTypeStruct %Foo_0 %Foo_0
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0
+%_ptr_Function_mat2v2float = OpTypePointer Function %mat2v2float
+      %int_1 = OpConstant %int 1
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+      %int_2 = OpConstant %int 2
+%_ptr_Function__arr_Bar_uint_5 = OpTypePointer Function %_arr_Bar_uint_5
+%_ptr_Function_Bar = OpTypePointer Function %Bar
+%_ptr_Function__arr__arr_float_uint_4_uint_2 = OpTypePointer Function %_arr__arr_float_uint_4_uint_2
+%_ptr_Function__arr_float_uint_4 = OpTypePointer Function %_arr_float_uint_4
+%_ptr_Function_float = OpTypePointer Function %float
+      %int_3 = OpConstant %int 3
+%_ptr_Function__arr_Baz_uint_2 = OpTypePointer Function %_arr_Baz_uint_2
+%_ptr_Function_Baz = OpTypePointer Function %Baz
+      %int_4 = OpConstant %int 4
+    %float_1 = OpConstant %float 1
+    %float_2 = OpConstant %float 2
+    %float_5 = OpConstant %float 5
+%_ptr_Uniform_mat2v2float = OpTypePointer Uniform %mat2v2float
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%_ptr_Uniform__arr_Bar_0_uint_5 = OpTypePointer Uniform %_arr_Bar_0_uint_5
+%_ptr_Uniform_Bar_0 = OpTypePointer Uniform %Bar_0
+%_ptr_Uniform__arr__arr_float_uint_4_0_uint_2 = OpTypePointer Uniform %_arr__arr_float_uint_4_0_uint_2
+%_ptr_Uniform__arr_float_uint_4_0 = OpTypePointer Uniform %_arr_float_uint_4_0
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform__arr_Baz_0_uint_2 = OpTypePointer Uniform %_arr_Baz_0_uint_2
+%_ptr_Uniform_Baz_0 = OpTypePointer Uniform %Baz_0
+     %v3uint = OpTypeVector %uint 3
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %ptr_load = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_0
+         %ptr_store = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_1
+         %loaded = OpLoad %Foo_0 %ptr_load
+		 OpStore %ptr_store %loaded
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/shaders-hlsl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp
new file mode 100644
index 00000000000..87aee2db54f
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp
@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %T "T"
+               OpMemberName %T 0 "a"
+               OpName %v "v"
+               OpName %T_0 "T"
+               OpMemberName %T_0 0 "b"
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "foo"
+               OpName %_ ""
+               OpName %T_1 "T"
+               OpMemberName %T_1 0 "c"
+               OpName %SSBO2 "SSBO2"
+               OpMemberName %SSBO2 0 "bar"
+               OpName %__0 ""
+               OpMemberDecorate %T_0 0 Offset 0
+               OpDecorate %_runtimearr_T_0 ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpMemberDecorate %T_1 0 Offset 0
+               OpDecorate %_runtimearr_T_1 ArrayStride 16
+               OpMemberDecorate %SSBO2 0 Offset 0
+               OpDecorate %SSBO2 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 1
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+          %T = OpTypeStruct %float
+%_ptr_Function_T = OpTypePointer Function %T
+   %float_40 = OpConstant %float 40
+         %11 = OpConstantComposite %T %float_40
+        %T_0 = OpTypeStruct %float
+%_runtimearr_T_0 = OpTypeRuntimeArray %T_0
+      %SSBO1 = OpTypeStruct %_runtimearr_T_0
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %int_10 = OpConstant %int 10
+%_ptr_Uniform_T_0 = OpTypePointer Uniform %T_0
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %T_1 = OpTypeStruct %float
+%_runtimearr_T_1 = OpTypeRuntimeArray %T_1
+      %SSBO2 = OpTypeStruct %_runtimearr_T_1
+%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2
+        %__0 = OpVariable %_ptr_Uniform_SSBO2 Uniform
+     %int_30 = OpConstant %int 30
+%_ptr_Uniform_T_1 = OpTypePointer Uniform %T_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %v = OpVariable %_ptr_Function_T Function
+               OpStore %v %11
+         %20 = OpLoad %T %v
+         %22 = OpAccessChain %_ptr_Uniform_T_0 %_ %int_0 %int_10
+         %23 = OpCompositeExtract %float %20 0
+         %25 = OpAccessChain %_ptr_Uniform_float %22 %int_0
+               OpStore %25 %23
+         %32 = OpLoad %T %v
+         %34 = OpAccessChain %_ptr_Uniform_T_1 %__0 %int_0 %int_30
+         %35 = OpCompositeExtract %float %32 0
+         %36 = OpAccessChain %_ptr_Uniform_float %34 %int_0
+               OpStore %36 %35
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp b/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp
new file mode 100644
index 00000000000..3f2d141a1f5
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/atomic-load-store.asm.comp
@@ -0,0 +1,48 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 23
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %c "c"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a"
+               OpMemberName %SSBO 1 "b"
+               OpName %_ ""
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+       %SSBO = OpTypeStruct %uint %uint
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+      %int_0 = OpConstant %int 0
+     %v3uint = OpTypeVector %uint 3
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %c = OpVariable %_ptr_Function_uint Function
+         %15 = OpAccessChain %_ptr_Uniform_uint %_ %int_1
+         %16 = OpAtomicLoad %uint %15 %int_1 %int_0
+               OpStore %c %16
+         %18 = OpLoad %uint %c
+         %19 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
+               OpAtomicStore %19 %int_1 %int_0 %18 
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/shaders-hlsl-no-opt/asm/comp/bitfield-signed-operations.asm.comp
new file mode 100644
index 00000000000..435fa322215
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/bitfield-signed-operations.asm.comp
@@ -0,0 +1,97 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 26
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "ints"
+               OpMemberName %SSBO 1 "uints"
+               OpName %_ ""
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 16
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %v4int = OpTypeVector %int 4
+       %uint = OpTypeInt 32 0
+     %v4uint = OpTypeVector %uint 4
+
+	 %int_1 = OpConstant %int 1
+	 %uint_11 = OpConstant %uint 11
+
+       %SSBO = OpTypeStruct %v4int %v4uint
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
+%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %ints_ptr = OpAccessChain %_ptr_Uniform_v4int %_ %int_0
+         %uints_ptr = OpAccessChain %_ptr_Uniform_v4uint %_ %int_1
+         %ints = OpLoad %v4int %ints_ptr
+         %uints = OpLoad %v4uint %uints_ptr
+
+		 %ints_alt = OpVectorShuffle %v4int %ints %ints 3 2 1 0
+		 %uints_alt = OpVectorShuffle %v4uint %uints %uints 3 2 1 0
+
+         %int_to_int_popcount = OpBitCount %v4int %ints
+         %int_to_uint_popcount = OpBitCount %v4uint %ints
+         %uint_to_int_popcount = OpBitCount %v4int %uints
+         %uint_to_uint_popcount = OpBitCount %v4uint %uints
+
+		; BitReverse must have matching types w.r.t. sign, yay.
+         %int_to_int_reverse = OpBitReverse %v4int %ints
+         ;%int_to_uint_reverse = OpBitReverse %v4uint %ints
+         ;%uint_to_int_reverse = OpBitReverse %v4int %uints
+         %uint_to_uint_reverse = OpBitReverse %v4uint %uints
+
+		; Base and Result must match.
+         %int_to_int_sbit = OpBitFieldSExtract %v4int %ints %int_1 %uint_11
+         ;%int_to_uint_sbit = OpBitFieldSExtract %v4uint %ints %offset %count
+         ;%uint_to_int_sbit = OpBitFieldSExtract %v4int %uints %offset %count
+         %uint_to_uint_sbit = OpBitFieldSExtract %v4uint %uints %uint_11 %int_1
+
+		; Base and Result must match.
+         %int_to_int_ubit = OpBitFieldUExtract %v4int %ints %int_1 %uint_11
+         ;%int_to_uint_ubit = OpBitFieldUExtract %v4uint %ints %offset %count
+         ;%uint_to_int_ubit = OpBitFieldUExtract %v4int %uints %offset %count
+         %uint_to_uint_ubit = OpBitFieldUExtract %v4uint %uints %uint_11 %int_1
+
+		 %int_to_int_insert = OpBitFieldInsert %v4int %ints %ints_alt %int_1 %uint_11
+		 %uint_to_uint_insert = OpBitFieldInsert %v4uint %uints %uints_alt %uint_11 %int_1
+
+               OpStore %ints_ptr %int_to_int_popcount
+               OpStore %uints_ptr %int_to_uint_popcount
+               OpStore %ints_ptr %uint_to_int_popcount
+               OpStore %uints_ptr %uint_to_uint_popcount
+
+               OpStore %ints_ptr %int_to_int_reverse
+               ;OpStore %uints_ptr %int_to_uint_reverse
+               ;OpStore %ints_ptr %uint_to_int_reverse
+               OpStore %uints_ptr %uint_to_uint_reverse
+
+               OpStore %ints_ptr %int_to_int_sbit
+               ;OpStore %uints_ptr %int_to_uint_sbit
+               ;OpStore %ints_ptr %uint_to_int_sbit
+               OpStore %uints_ptr %uint_to_uint_sbit
+
+               OpStore %ints_ptr %int_to_int_ubit
+               ;OpStore %uints_ptr %int_to_uint_ubit
+               ;OpStore %ints_ptr %uint_to_int_ubit
+               OpStore %uints_ptr %uint_to_uint_ubit
+
+			   OpStore %ints_ptr %int_to_int_insert
+			   OpStore %uints_ptr %uint_to_uint_insert
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/comp/bitscan.asm.comp b/shaders-hlsl-no-opt/asm/comp/bitscan.asm.comp
new file mode 100644
index 00000000000..e3b785cd52b
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/bitscan.asm.comp
@@ -0,0 +1,72 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 35
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "u"
+               OpMemberName %SSBO 1 "i"
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 16
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %ivec4 = OpTypeVector %int 4
+       %uint = OpTypeInt 32 0
+      %uvec4 = OpTypeVector %uint 4
+       %SSBO = OpTypeStruct %uvec4 %ivec4
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_uvec4 = OpTypePointer Uniform %uvec4
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_ivec4 = OpTypePointer Uniform %ivec4
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %uptr = OpAccessChain %_ptr_Uniform_uvec4 %_ %int_0
+         %iptr = OpAccessChain %_ptr_Uniform_ivec4 %_ %int_1
+         %uvalue = OpLoad %uvec4 %uptr
+         %ivalue = OpLoad %ivec4 %iptr
+
+         %lsb_uint_to_uint = OpExtInst %uvec4 %1 FindILsb %uvalue
+         %lsb_uint_to_int = OpExtInst %ivec4 %1 FindILsb %uvalue
+         %lsb_int_to_uint = OpExtInst %uvec4 %1 FindILsb %ivalue
+         %lsb_int_to_int = OpExtInst %ivec4 %1 FindILsb %ivalue
+
+         %umsb_uint_to_uint = OpExtInst %uvec4 %1 FindUMsb %uvalue
+         %umsb_uint_to_int = OpExtInst %ivec4 %1 FindUMsb %uvalue
+         %umsb_int_to_uint = OpExtInst %uvec4 %1 FindUMsb %ivalue
+         %umsb_int_to_int = OpExtInst %ivec4 %1 FindUMsb %ivalue
+
+         %smsb_uint_to_uint = OpExtInst %uvec4 %1 FindSMsb %uvalue
+         %smsb_uint_to_int = OpExtInst %ivec4 %1 FindSMsb %uvalue
+         %smsb_int_to_uint = OpExtInst %uvec4 %1 FindSMsb %ivalue
+         %smsb_int_to_int = OpExtInst %ivec4 %1 FindSMsb %ivalue
+
+	OpStore %uptr %lsb_uint_to_uint
+	OpStore %iptr %lsb_uint_to_int
+	OpStore %uptr %lsb_int_to_uint
+	OpStore %iptr %lsb_int_to_int
+
+	OpStore %uptr %umsb_uint_to_uint
+	OpStore %iptr %umsb_uint_to_int
+	OpStore %uptr %umsb_int_to_uint
+	OpStore %iptr %umsb_int_to_int
+
+	OpStore %uptr %smsb_uint_to_uint
+	OpStore %iptr %smsb_uint_to_int
+	OpStore %uptr %smsb_int_to_uint
+	OpStore %iptr %smsb_int_to_int
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/comp/buffer-atomic-nonuniform.asm.sm51.nonuniformresource.comp b/shaders-hlsl-no-opt/asm/comp/buffer-atomic-nonuniform.asm.sm51.nonuniformresource.comp
new file mode 100644
index 00000000000..132f38bf72d
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/buffer-atomic-nonuniform.asm.sm51.nonuniformresource.comp
@@ -0,0 +1,53 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 26
+; Schema: 0
+               OpCapability Shader
+               OpCapability ShaderNonUniform
+               OpCapability RuntimeDescriptorArray
+               OpCapability StorageBufferArrayNonUniformIndexing
+               OpExtension "SPV_EXT_descriptor_indexing"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_nonuniform_qualifier"
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "v"
+               OpName %ssbos "ssbos"
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %ssbos DescriptorSet 0
+               OpDecorate %ssbos Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %22 NonUniform
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+       %SSBO = OpTypeStruct %uint
+%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
+%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO
+      %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_2 = OpConstant %uint 2
+%_ptr_Input_uint = OpTypePointer Input %uint
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+     %uint_1 = OpConstant %uint 1
+     %uint_0 = OpConstant %uint 0
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2
+         %17 = OpLoad %uint %16
+         %18 = OpCopyObject %uint %17
+         %22 = OpAccessChain %_ptr_Uniform_uint %ssbos %18 %int_0
+         %25 = OpAtomicIAdd %uint %22 %uint_1 %uint_0 %uint_1
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/shaders-hlsl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp
new file mode 100644
index 00000000000..e1dcb0ef8e2
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp
@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 49
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID %gl_LocalInvocationID
+               OpExecutionMode %main LocalSize 4 4 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values"
+               OpName %_ ""
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpName %gl_LocalInvocationID "gl_LocalInvocationID"
+               OpName %indexable "indexable"
+               OpName %indexable_0 "indexable"
+			   OpName %25 "indexable"
+			   OpName %38 "indexable"
+               OpDecorate %_runtimearr_int ArrayStride 4
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_runtimearr_int = OpTypeRuntimeArray %int
+       %SSBO = OpTypeStruct %_runtimearr_int
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+     %uint_4 = OpConstant %uint 4
+%_arr_int_uint_4 = OpTypeArray %int %uint_4
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+      %int_3 = OpConstant %int 3
+         %25 = OpConstantComposite %_arr_int_uint_4 %int_0 %int_1 %int_2 %int_3
+%gl_LocalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%_ptr_Function__arr_int_uint_4 = OpTypePointer Function %_arr_int_uint_4
+%_ptr_Function_int = OpTypePointer Function %int
+      %int_4 = OpConstant %int 4
+      %int_5 = OpConstant %int 5
+      %int_6 = OpConstant %int 6
+      %int_7 = OpConstant %int 7
+         %38 = OpConstantComposite %_arr_int_uint_4 %int_4 %int_5 %int_6 %int_7
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_4 %uint_4 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+  %indexable = OpVariable %_ptr_Function__arr_int_uint_4 Function
+%indexable_0 = OpVariable %_ptr_Function__arr_int_uint_4 Function
+         %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %19 = OpLoad %uint %18
+         %27 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0
+         %28 = OpLoad %uint %27
+               OpStore %indexable %25
+         %32 = OpAccessChain %_ptr_Function_int %indexable %28
+         %33 = OpLoad %int %32
+         %40 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_1
+         %41 = OpLoad %uint %40
+               OpStore %indexable_0 %38
+         %43 = OpAccessChain %_ptr_Function_int %indexable_0 %41
+         %44 = OpLoad %int %43
+         %45 = OpIAdd %int %33 %44
+         %47 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19
+               OpStore %47 %45
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/shaders-hlsl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp
new file mode 100644
index 00000000000..73f3ceee1ad
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp
@@ -0,0 +1,59 @@
+; SPIR-V
+; Version: 1.5
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 26
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               ;OpEntryPoint GLCompute %main "main" %Samp %ubo %ssbo %v %w
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 64 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Samp "Samp"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "v"
+               OpName %ubo "ubo"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "v"
+               OpName %ssbo "ssbo"
+               OpName %v "v"
+               OpName %w "w"
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+               OpDecorate %Samp DescriptorSet 0
+               OpDecorate %Samp Binding 0
+               OpMemberDecorate %UBO 0 Offset 0
+               OpDecorate %UBO Block
+               OpDecorate %ubo DescriptorSet 0
+               OpDecorate %ubo Binding 1
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO Block
+               OpDecorate %ssbo DescriptorSet 0
+               OpDecorate %ssbo Binding 2
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+    %uint_64 = OpConstant %uint 64
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1
+      %float = OpTypeFloat 32
+         %12 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %13 = OpTypeSampledImage %12
+%_ptr_UniformConstant_13 = OpTypePointer UniformConstant %13
+       %Samp = OpVariable %_ptr_UniformConstant_13 UniformConstant
+        %UBO = OpTypeStruct %float
+%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO
+        %ubo = OpVariable %_ptr_Uniform_UBO Uniform
+       %SSBO = OpTypeStruct %float
+%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
+       %ssbo = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
+%_ptr_Private_float = OpTypePointer Private %float
+          %v = OpVariable %_ptr_Private_float Private
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+          %w = OpVariable %_ptr_Workgroup_float Workgroup
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.fxconly.asm.comp b/shaders-hlsl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.fxconly.asm.comp
new file mode 100644
index 00000000000..30db11d45bc
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.fxconly.asm.comp
@@ -0,0 +1,55 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+ %ResTypeMod = OpTypeStruct %float %float
+%_ptr_Function_ResTypeMod = OpTypePointer Function %ResTypeMod
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+   %float_20 = OpConstant %float 20
+      %int_1 = OpConstant %int 1
+%_ptr_Function_float = OpTypePointer Function %float
+%ResTypeFrexp = OpTypeStruct %float %int
+%_ptr_Function_ResTypeFrexp = OpTypePointer Function %ResTypeFrexp
+   %float_40 = OpConstant %float 40
+%_ptr_Function_int = OpTypePointer Function %int
+       %SSBO = OpTypeStruct %float %int
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %modres = OpExtInst %ResTypeMod %1 ModfStruct %float_20
+         %frexpres = OpExtInst %ResTypeFrexp %1 FrexpStruct %float_40
+
+		 %modres_f = OpCompositeExtract %float %modres 0
+		 %modres_i = OpCompositeExtract %float %modres 1
+		 %frexpres_f = OpCompositeExtract %float %frexpres 0
+		 %frexpres_i = OpCompositeExtract %int %frexpres 1
+
+         %float_ptr = OpAccessChain %_ptr_Uniform_float %_ %int_0
+         %int_ptr = OpAccessChain %_ptr_Uniform_int %_ %int_1
+
+               OpStore %float_ptr %modres_f
+               OpStore %float_ptr %modres_i
+               OpStore %float_ptr %frexpres_f
+               OpStore %int_ptr %frexpres_i
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/comp/image-atomic-nonuniform.asm.sm51.nonuniformresource.comp b/shaders-hlsl-no-opt/asm/comp/image-atomic-nonuniform.asm.sm51.nonuniformresource.comp
new file mode 100644
index 00000000000..5dad9dd5ed8
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/image-atomic-nonuniform.asm.sm51.nonuniformresource.comp
@@ -0,0 +1,55 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 32
+; Schema: 0
+               OpCapability Shader
+               OpCapability ShaderNonUniform
+               OpCapability RuntimeDescriptorArray
+               OpCapability StorageImageArrayNonUniformIndexing
+               OpExtension "SPV_EXT_descriptor_indexing"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_nonuniform_qualifier"
+               OpName %main "main"
+               OpName %uImage "uImage"
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpDecorate %uImage DescriptorSet 0
+               OpDecorate %uImage Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %30 NonUniform
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+          %7 = OpTypeImage %uint 2D 0 0 0 2 R32ui
+%_runtimearr_7 = OpTypeRuntimeArray %7
+%_ptr_UniformConstant__runtimearr_7 = OpTypePointer UniformConstant %_runtimearr_7
+     %uImage = OpVariable %_ptr_UniformConstant__runtimearr_7 UniformConstant
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_2 = OpConstant %uint 2
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_UniformConstant_7 = OpTypePointer UniformConstant %7
+     %v2uint = OpTypeVector %uint 2
+        %int = OpTypeInt 32 1
+      %v2int = OpTypeVector %int 2
+     %uint_1 = OpConstant %uint 1
+     %uint_0 = OpConstant %uint 0
+%_ptr_Image_uint = OpTypePointer Image %uint
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2
+         %17 = OpLoad %uint %16
+         %18 = OpCopyObject %uint %17
+         %20 = OpAccessChain %_ptr_UniformConstant_7 %uImage %18
+         %22 = OpLoad %v3uint %gl_GlobalInvocationID
+         %23 = OpVectorShuffle %v2uint %22 %22 0 1
+         %26 = OpBitcast %v2int %23
+         %30 = OpImageTexelPointer %_ptr_Image_uint %20 %26 %uint_0
+         %31 = OpAtomicIAdd %uint %30 %uint_1 %uint_0 %uint_1
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/comp/local-size-id-override.asm.comp b/shaders-hlsl-no-opt/asm/comp/local-size-id-override.asm.comp
new file mode 100644
index 00000000000..2eaef4bdbee
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/local-size-id-override.asm.comp
@@ -0,0 +1,60 @@
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values"
+               OpName %_ ""
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %spec_1 SpecId 1
+               OpDecorate %spec_2 SpecId 2
+               OpDecorate %spec_3 SpecId 3
+               OpDecorate %spec_4 SpecId 4
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+       %SSBO = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+    %float_2 = OpConstant %float 2
+%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float
+         %spec_1 = OpSpecConstant %uint 11
+         %spec_2 = OpSpecConstant %uint 12
+         %spec_3 = OpSpecConstant %uint 13
+         %spec_4 = OpSpecConstant %uint 14
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %uint_3 %spec_1 %spec_2
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %21 = OpLoad %uint %20
+         %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21
+         %25 = OpLoad %v4float %24
+         %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2
+         %27 = OpFAdd %v4float %25 %26
+         %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21
+               OpStore %28 %27
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/comp/local-size-id.asm.comp b/shaders-hlsl-no-opt/asm/comp/local-size-id.asm.comp
new file mode 100644
index 00000000000..3031f4bb8af
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/local-size-id.asm.comp
@@ -0,0 +1,76 @@
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values"
+               OpName %_ ""
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %spec_1 SpecId 1
+               OpDecorate %spec_2 SpecId 2
+               OpDecorate %spec_3 SpecId 3
+               OpDecorate %spec_4 SpecId 4
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+     %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+       %SSBO = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+    %float_2 = OpConstant %float 2
+%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float
+		; Test that we can declare the spec constant as signed.
+		; Needs implicit bitcast since WorkGroupSize is uint.
+         %spec_1 = OpSpecConstant %int 11
+         %spec_2 = OpSpecConstant %int 12
+         %spec_3 = OpSpecConstant %int 13
+         %spec_4 = OpSpecConstant %int 14
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+	 ; Test that we can build spec constant composites out of local size id values.
+	 ; Needs special case handling.
+	 %spec_3_op = OpSpecConstantOp %uint IAdd %spec_3 %uint_3
+%WorkGroupSize = OpSpecConstantComposite %v3uint %spec_3_op %spec_4 %uint_2
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %21 = OpLoad %uint %20
+         %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21
+         %25 = OpLoad %v4float %24
+         %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2
+         %27 = OpFAdd %v4float %25 %26
+		 %wg_f = OpConvertUToF %v3float %WorkGroupSize
+		 %wg_f4 = OpVectorShuffle %v4float %wg_f %wg_f 0 1 2 2
+	 ; Test that we can use the spec constants directly which needs to translate to gl_WorkGroupSize.elem.
+	 ; Needs special case handling.
+		 %res = OpFAdd %v4float %27 %wg_f4
+		 %f0 = OpConvertSToF %float %spec_3
+		 %f1 = OpConvertSToF %float %spec_4
+		 %f2 = OpConvertSToF %float %uint_2
+		 %res1 = OpVectorTimesScalar %v4float %res %f0
+		 %res2 = OpVectorTimesScalar %v4float %res1 %f1
+		 %res3 = OpVectorTimesScalar %v4float %res2 %f2
+         %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21
+               OpStore %28 %res3
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/comp/num-workgroups.spv14.asm.comp b/shaders-hlsl-no-opt/asm/comp/num-workgroups.spv14.asm.comp
new file mode 100644
index 00000000000..e820da5fc45
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/num-workgroups.spv14.asm.comp
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %_ %gl_NumWorkGroups %__0
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "v"
+               OpName %_ ""
+               OpName %gl_NumWorkGroups "gl_NumWorkGroups"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "w"
+               OpName %__0 ""
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups
+               OpMemberDecorate %UBO 0 Offset 0
+               OpDecorate %UBO Block
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+       %SSBO = OpTypeStruct %v3uint
+%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
+          %_ = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input
+        %UBO = OpTypeStruct %v3uint
+%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO
+        %__0 = OpVariable %_ptr_Uniform_UBO Uniform
+%_ptr_Uniform_v3uint = OpTypePointer Uniform %v3uint
+%_ptr_StorageBuffer_v3uint = OpTypePointer StorageBuffer %v3uint
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpLoad %v3uint %gl_NumWorkGroups
+         %20 = OpAccessChain %_ptr_Uniform_v3uint %__0 %int_0
+         %21 = OpLoad %v3uint %20
+         %22 = OpIAdd %v3uint %15 %21
+         %24 = OpAccessChain %_ptr_StorageBuffer_v3uint %_ %int_0
+               OpStore %24 %22
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp b/shaders-hlsl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp
new file mode 100644
index 00000000000..b4e622baced
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp
@@ -0,0 +1,78 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 35
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values"
+               OpName %_ ""
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpName %A "A"
+               OpName %B "A"
+               OpName %C "A"
+               OpName %D "A"
+               OpName %E "A"
+               OpName %F "A"
+               OpName %G "A"
+               OpName %H "A"
+               OpName %I "A"
+               OpName %J "A"
+               OpName %K "A"
+               OpName %L "A"
+               OpDecorate %_runtimearr_int ArrayStride 4
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %A SpecId 0
+               OpDecorate %B SpecId 1
+               OpDecorate %C SpecId 2
+               OpDecorate %D SpecId 3
+               OpDecorate %E SpecId 4
+               OpDecorate %F SpecId 5
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_runtimearr_int = OpTypeRuntimeArray %int
+       %SSBO = OpTypeStruct %_runtimearr_int
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+          %A = OpSpecConstant %int 0
+          %B = OpSpecConstant %int 1
+          %C = OpSpecConstant %int 2
+          %D = OpSpecConstant %int 3
+          %E = OpSpecConstant %int 4
+          %F = OpSpecConstant %int 5
+          %G = OpSpecConstantOp %int ISub %A %B
+          %H = OpSpecConstantOp %int ISub %G %C
+          %I = OpSpecConstantOp %int ISub %H %D
+          %J = OpSpecConstantOp %int ISub %I %E
+          %K = OpSpecConstantOp %int ISub %J %F
+		  %L = OpSpecConstantOp %int IAdd %K %F
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %19 = OpLoad %uint %18
+         %32 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19
+               OpStore %32 %L
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp b/shaders-hlsl-no-opt/asm/comp/storage-buffer-basic.nofxc.asm.comp
similarity index 95%
rename from shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp
rename to shaders-hlsl-no-opt/asm/comp/storage-buffer-basic.nofxc.asm.comp
index edb1a05e549..db9a8490df6 100644
--- a/shaders-hlsl/asm/comp/storage-buffer-basic.invalid.nofxc.asm.comp
+++ b/shaders-hlsl-no-opt/asm/comp/storage-buffer-basic.nofxc.asm.comp
@@ -4,9 +4,9 @@
 ; Bound: 31
 ; Schema: 0
                OpCapability Shader
-               OpCapability VariablePointers
+               ;OpCapability VariablePointers
                OpExtension "SPV_KHR_storage_buffer_storage_class"
-               OpExtension "SPV_KHR_variable_pointers"
+               ;OpExtension "SPV_KHR_variable_pointers"
                OpMemoryModel Logical GLSL450
                OpEntryPoint GLCompute %22 "main" %gl_WorkGroupID
                OpSource OpenCL_C 120
diff --git a/shaders-hlsl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag b/shaders-hlsl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag
new file mode 100644
index 00000000000..6782b124730
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag
@@ -0,0 +1,83 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %_
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpMemberName %AA 0 "foo"
+               OpMemberName %AB 0 "foo"
+               OpMemberName %A 0 "_aa"
+               OpMemberName %A 1 "ab"
+               OpMemberName %BA 0 "foo"
+               OpMemberName %BB 0 "foo"
+               OpMemberName %B 0 "_ba"
+               OpMemberName %B 1 "bb"
+               OpName %VertexData "VertexData"
+               OpMemberName %VertexData 0 "_a"
+               OpMemberName %VertexData 1 "b"
+               OpName %_ ""
+               OpMemberName %CA 0 "foo"
+               OpMemberName %C 0 "_ca"
+               OpMemberName %DA 0 "foo"
+               OpMemberName %D 0 "da"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "_c"
+               OpMemberName %UBO 1 "d"
+               OpName %__0 ""
+               OpMemberName %E 0 "a"
+               OpName %SSBO "SSBO"
+               ;OpMemberName %SSBO 0 "e" Test that we don't try to assign bogus aliases.
+               OpMemberName %SSBO 1 "_e"
+               OpMemberName %SSBO 2 "f"
+               OpName %__1 ""
+               OpDecorate %VertexData Block
+               OpDecorate %_ Location 0
+               OpMemberDecorate %CA 0 Offset 0
+               OpMemberDecorate %C 0 Offset 0
+               OpMemberDecorate %DA 0 Offset 0
+               OpMemberDecorate %D 0 Offset 0
+               OpMemberDecorate %UBO 0 Offset 0
+               OpMemberDecorate %UBO 1 Offset 16
+               OpDecorate %UBO Block
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+               OpMemberDecorate %E 0 Offset 0
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpMemberDecorate %SSBO 2 Offset 8
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %__1 DescriptorSet 0
+               OpDecorate %__1 Binding 1
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+         %AA = OpTypeStruct %int
+         %AB = OpTypeStruct %int
+          %A = OpTypeStruct %AA %AB
+         %BA = OpTypeStruct %int
+         %BB = OpTypeStruct %int
+          %B = OpTypeStruct %BA %BB
+ %VertexData = OpTypeStruct %A %B
+%_ptr_Input_VertexData = OpTypePointer Input %VertexData
+          %_ = OpVariable %_ptr_Input_VertexData Input
+         %CA = OpTypeStruct %int
+          %C = OpTypeStruct %CA
+         %DA = OpTypeStruct %int
+          %D = OpTypeStruct %DA
+        %UBO = OpTypeStruct %C %D
+%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO
+        %__0 = OpVariable %_ptr_Uniform_UBO Uniform
+          %E = OpTypeStruct %int
+       %SSBO = OpTypeStruct %E %E %E
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+        %__1 = OpVariable %_ptr_Uniform_SSBO Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/composite-insert-inheritance.asm.frag b/shaders-hlsl-no-opt/asm/frag/composite-insert-inheritance.asm.frag
new file mode 100644
index 00000000000..9408e69ac09
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/composite-insert-inheritance.asm.frag
@@ -0,0 +1,127 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vInput %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %vInput "vInput"
+               OpName %FragColor "FragColor"
+			   OpName %phi "PHI"
+               OpDecorate %vInput RelaxedPrecision
+               OpDecorate %vInput Location 0
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+			   OpDecorate %b0 RelaxedPrecision
+			   OpDecorate %b1 RelaxedPrecision
+			   OpDecorate %b2 RelaxedPrecision
+			   OpDecorate %b3 RelaxedPrecision
+			   OpDecorate %c1 RelaxedPrecision
+			   OpDecorate %c3 RelaxedPrecision
+			   OpDecorate %d4_mp RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+     %vInput = OpVariable %_ptr_Input_v4float Input
+    %float_1 = OpConstant %float 1
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Function_float = OpTypePointer Function %float
+    %float_2 = OpConstant %float 2
+     %uint_1 = OpConstant %uint 1
+    %float_3 = OpConstant %float 3
+     %uint_2 = OpConstant %uint 2
+    %float_4 = OpConstant %float 4
+     %uint_3 = OpConstant %uint 3
+	 %v4float_arr2 = OpTypeArray %v4float %uint_2
+	 %v44float = OpTypeMatrix %v4float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+	%v4undef = OpUndef %v4float
+	%v4const = OpConstantNull %v4float
+	%v4arrconst = OpConstantNull %v4float_arr2
+	%v44const = OpConstantNull %v44float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+
+         %loaded0 = OpLoad %v4float %vInput
+
+		; Basic case (highp).
+         %a0 = OpCompositeInsert %v4float %float_1 %loaded0 0
+         %a1 = OpCompositeInsert %v4float %float_2 %a0 1
+         %a2 = OpCompositeInsert %v4float %float_3 %a1 2
+         %a3 = OpCompositeInsert %v4float %float_4 %a2 3
+		 	OpStore %FragColor %a3
+
+		; Basic case (mediump).
+         %b0 = OpCompositeInsert %v4float %float_1 %loaded0 0
+         %b1 = OpCompositeInsert %v4float %float_2 %b0 1
+         %b2 = OpCompositeInsert %v4float %float_3 %b1 2
+         %b3 = OpCompositeInsert %v4float %float_4 %b2 3
+		 	OpStore %FragColor %b3
+
+		; Mix relaxed precision.
+         %c0 = OpCompositeInsert %v4float %float_1 %loaded0 0
+         %c1 = OpCompositeInsert %v4float %float_2 %c0 1
+         %c2 = OpCompositeInsert %v4float %float_3 %c1 2
+         %c3 = OpCompositeInsert %v4float %float_4 %c2 3
+		 	OpStore %FragColor %c3
+
+		; SSA use after insert
+         %d0 = OpCompositeInsert %v4float %float_1 %loaded0 0
+         %d1 = OpCompositeInsert %v4float %float_2 %d0 1
+         %d2 = OpCompositeInsert %v4float %float_3 %d1 2
+         %d3 = OpCompositeInsert %v4float %float_4 %d2 3
+		 %d4 = OpFAdd %v4float %d3 %d0
+		 	OpStore %FragColor %d4
+		 %d4_mp = OpFAdd %v4float %d3 %d1
+		 	OpStore %FragColor %d4_mp
+
+		; Verify Insert behavior on Undef.
+		  %e0 = OpCompositeInsert %v4float %float_1 %v4undef 0
+		  %e1 = OpCompositeInsert %v4float %float_2 %e0 1
+		  %e2 = OpCompositeInsert %v4float %float_3 %e1 2
+		  %e3 = OpCompositeInsert %v4float %float_4 %e2 3
+		 	OpStore %FragColor %e3
+
+		; Verify Insert behavior on Constant.
+		  %f0 = OpCompositeInsert %v4float %float_1 %v4const 0
+		 	OpStore %FragColor %f0
+
+		; Verify Insert behavior on Array.
+		  %g0 = OpCompositeInsert %v4float_arr2 %float_1 %v4arrconst 1 2
+		  %g1 = OpCompositeInsert %v4float_arr2 %float_2 %g0 0 3
+		  %g2 = OpCompositeExtract %v4float %g1 0
+		 	OpStore %FragColor %g2
+		  %g3 = OpCompositeExtract %v4float %g1 1
+		 	OpStore %FragColor %g3
+
+		; Verify Insert behavior on Matrix.
+		  %h0 = OpCompositeInsert %v44float %float_1 %v44const 1 2
+		  %h1 = OpCompositeInsert %v44float %float_2 %h0 2 3
+		  %h2 = OpCompositeExtract %v4float %h1 0
+		 	OpStore %FragColor %h2
+		  %h3 = OpCompositeExtract %v4float %h1 1
+		 	OpStore %FragColor %h3
+		  %h4 = OpCompositeExtract %v4float %h1 2
+		 	OpStore %FragColor %h4
+		  %h5 = OpCompositeExtract %v4float %h1 3
+		 	OpStore %FragColor %h5
+
+		; Verify that we cannot RMW PHI variables.
+		OpBranch %next
+		%next = OpLabel
+		%phi = OpPhi %v4float %d2 %5
+         %i0 = OpCompositeInsert %v4float %float_4 %phi 3
+		 	OpStore %FragColor %i0
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/empty-struct-in-struct.asm.frag b/shaders-hlsl-no-opt/asm/frag/empty-struct-in-struct.asm.frag
new file mode 100644
index 00000000000..a9650ddbb6b
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/empty-struct-in-struct.asm.frag
@@ -0,0 +1,61 @@
+; SPIR-V
+; Version: 1.2
+; Generator: Khronos; 0
+; Bound: 43
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %EntryPoint_Main "main"
+               OpExecutionMode %EntryPoint_Main OriginUpperLeft
+               OpSource Unknown 100
+               OpName %EmptyStructTest "EmptyStructTest"
+               OpName %EmptyStruct2Test "EmptyStruct2Test"
+               OpName %GetValue "GetValue"
+               OpName %GetValue2 "GetValue"
+               OpName %self "self"
+               OpName %self2 "self"
+               OpName %emptyStruct "emptyStruct"
+               OpName %value "value"
+               OpName %EntryPoint_Main "EntryPoint_Main"
+
+%EmptyStructTest = OpTypeStruct
+%EmptyStruct2Test = OpTypeStruct %EmptyStructTest
+%_ptr_Function_EmptyStruct2Test = OpTypePointer Function %EmptyStruct2Test
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+          %5 = OpTypeFunction %float %_ptr_Function_EmptyStruct2Test
+          %6 = OpTypeFunction %float %EmptyStruct2Test
+       %void = OpTypeVoid
+%_ptr_Function_void = OpTypePointer Function %void
+          %8 = OpTypeFunction %void %_ptr_Function_EmptyStruct2Test
+          %9 = OpTypeFunction %void
+    %float_0 = OpConstant %float 0
+	  %value4 = OpConstantNull %EmptyStruct2Test
+
+   %GetValue = OpFunction %float None %5
+       %self = OpFunctionParameter %_ptr_Function_EmptyStruct2Test
+         %13 = OpLabel
+               OpReturnValue %float_0
+               OpFunctionEnd
+
+   %GetValue2 = OpFunction %float None %6
+       %self2 = OpFunctionParameter %EmptyStruct2Test
+         %14 = OpLabel
+               OpReturnValue %float_0
+               OpFunctionEnd
+
+%EntryPoint_Main = OpFunction %void None %9
+         %37 = OpLabel
+     %emptyStruct = OpVariable %_ptr_Function_EmptyStruct2Test Function
+         %18 = OpVariable %_ptr_Function_EmptyStruct2Test Function
+      %value = OpVariable %_ptr_Function_float Function
+	  %value2 = OpCompositeConstruct %EmptyStructTest
+	  %value3 = OpCompositeConstruct %EmptyStruct2Test %value2
+         %22 = OpFunctionCall %float %GetValue %emptyStruct
+         %23 = OpFunctionCall %float %GetValue2 %value3
+         %24 = OpFunctionCall %float %GetValue2 %value4
+               OpStore %value %22
+               OpStore %value %23
+               OpStore %value %24
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/nonuniform-bracket-handling-2.nonuniformresource.sm51.asm.frag b/shaders-hlsl-no-opt/asm/frag/nonuniform-bracket-handling-2.nonuniformresource.sm51.asm.frag
new file mode 100644
index 00000000000..72f6d9d86e9
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/nonuniform-bracket-handling-2.nonuniformresource.sm51.asm.frag
@@ -0,0 +1,96 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 53
+; Schema: 0
+               OpCapability Shader
+               OpCapability ShaderNonUniform
+               OpCapability RuntimeDescriptorArray
+               OpCapability SampledImageArrayNonUniformIndexing
+               OpExtension "SPV_EXT_descriptor_indexing"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vUV %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_nonuniform_qualifier"
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uSamplers "uSamplers"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "indices"
+               OpName %_ ""
+               OpName %vUV "vUV"
+               OpName %uSampler "uSampler"
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uSamplers DescriptorSet 0
+               OpDecorate %uSamplers Binding 0
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO 0 NonWritable
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 2
+               OpDecorate %_ Binding 0
+               OpDecorate %26 NonUniform
+               OpDecorate %28 NonUniform
+               OpDecorate %29 NonUniform
+               OpDecorate %vUV Location 0
+               OpDecorate %uSampler DescriptorSet 1
+               OpDecorate %uSampler Binding 1
+               OpDecorate %38 NonUniform
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %10 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %11 = OpTypeSampledImage %10
+%_runtimearr_11 = OpTypeRuntimeArray %11
+%_ptr_UniformConstant__runtimearr_11 = OpTypePointer UniformConstant %_runtimearr_11
+  %uSamplers = OpVariable %_ptr_UniformConstant__runtimearr_11 UniformConstant
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+       %SSBO = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %int_10 = OpConstant %int 10
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %vUV = OpVariable %_ptr_Input_v2float Input
+    %float_0 = OpConstant %float 0
+   %uSampler = OpVariable %_ptr_UniformConstant_11 UniformConstant
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_1 = OpConstant %uint 1
+%_ptr_Input_float = OpTypePointer Input %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %24 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %int_10
+         %26 = OpLoad %uint %24
+         %28 = OpAccessChain %_ptr_UniformConstant_11 %uSamplers %26
+         %29 = OpLoad %11 %28
+         %33 = OpLoad %v2float %vUV
+         %35 = OpImageSampleExplicitLod %v4float %29 %33 Lod %float_0
+               OpStore %FragColor %35
+         %37 = OpLoad %11 %uSampler
+         %38 = OpCopyObject %11 %37
+         %39 = OpLoad %v2float %vUV
+         %44 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_1
+         %45 = OpLoad %float %44
+         %46 = OpConvertFToS %int %45
+         %47 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %46
+         %48 = OpLoad %uint %47
+         %49 = OpConvertUToF %float %48
+         %50 = OpImageSampleExplicitLod %v4float %38 %39 Lod %49
+         %51 = OpLoad %v4float %FragColor
+         %52 = OpFAdd %v4float %51 %50
+               OpStore %FragColor %52
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/nonuniform-ssbo.sm51.nonuniformresource.asm.frag b/shaders-hlsl-no-opt/asm/frag/nonuniform-ssbo.sm51.nonuniformresource.asm.frag
new file mode 100644
index 00000000000..c5428a8bb9b
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/nonuniform-ssbo.sm51.nonuniformresource.asm.frag
@@ -0,0 +1,99 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 59
+; Schema: 0
+               OpCapability Shader
+               OpCapability ShaderNonUniform
+               OpCapability RuntimeDescriptorArray
+               OpCapability StorageBufferArrayNonUniformIndexing
+               OpExtension "SPV_EXT_descriptor_indexing"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vIndex %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_nonuniform_qualifier"
+               OpSourceExtension "GL_EXT_samplerless_texture_functions"
+               OpName %main "main"
+               OpName %i "i"
+               OpName %vIndex "vIndex"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "counter"
+               OpMemberName %SSBO 1 "v"
+               OpName %ssbos "ssbos"
+               OpName %FragColor "FragColor"
+               OpDecorate %vIndex Flat
+               OpDecorate %vIndex Location 0
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 16
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %ssbos DescriptorSet 0
+               OpDecorate %ssbos Binding 3
+               OpDecorate %32 NonUniform
+               OpDecorate %39 NonUniform
+               OpDecorate %49 NonUniform
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+%_ptr_Input_int = OpTypePointer Input %int
+     %vIndex = OpVariable %_ptr_Input_int Input
+       %uint = OpTypeInt 32 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+       %SSBO = OpTypeStruct %uint %_runtimearr_v4float
+%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
+%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO
+      %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform
+     %int_60 = OpConstant %int 60
+      %int_1 = OpConstant %int 1
+     %int_70 = OpConstant %int 70
+   %float_20 = OpConstant %float 20
+         %30 = OpConstantComposite %v4float %float_20 %float_20 %float_20 %float_20
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+    %int_100 = OpConstant %int 100
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+   %uint_100 = OpConstant %uint 100
+     %uint_1 = OpConstant %uint 1
+     %uint_0 = OpConstant %uint 0
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+     %uint_2 = OpConstant %uint 2
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+         %11 = OpLoad %int %vIndex
+               OpStore %i %11
+         %20 = OpLoad %int %i
+         %22 = OpIAdd %int %20 %int_60
+         %23 = OpCopyObject %int %22
+         %25 = OpLoad %int %i
+         %27 = OpIAdd %int %25 %int_70
+         %28 = OpCopyObject %int %27
+         %32 = OpAccessChain %_ptr_Uniform_v4float %ssbos %23 %int_1 %28
+               OpStore %32 %30
+         %33 = OpLoad %int %i
+         %35 = OpIAdd %int %33 %int_100
+         %36 = OpCopyObject %int %35
+         %39 = OpAccessChain %_ptr_Uniform_uint %ssbos %36 %int_0
+         %43 = OpAtomicIAdd %uint %39 %uint_1 %uint_0 %uint_100
+         %46 = OpLoad %int %i
+         %47 = OpCopyObject %int %46
+         %49 = OpAccessChain %_ptr_Uniform_SSBO %ssbos %47
+         %50 = OpArrayLength %uint %49 1
+         %51 = OpBitcast %int %50
+         %52 = OpConvertSToF %float %51
+         %55 = OpAccessChain %_ptr_Output_float %FragColor %uint_2
+         %56 = OpLoad %float %55
+         %57 = OpFAdd %float %56 %52
+         %58 = OpAccessChain %_ptr_Output_float %FragColor %uint_2
+               OpStore %58 %57
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag b/shaders-hlsl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag
new file mode 100644
index 00000000000..17aab1d8f77
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag
@@ -0,0 +1,25 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 10
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragDepth
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main DepthReplacing
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_FragDepth "gl_FragDepth"
+               OpDecorate %gl_FragDepth BuiltIn FragDepth
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Output_float = OpTypePointer Output %float
+  %float_0_5 = OpConstant %float 0.5
+%gl_FragDepth = OpVariable %_ptr_Output_float Output %float_0_5
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/phi.zero-initialize.asm.frag b/shaders-hlsl-no-opt/asm/frag/phi.zero-initialize.asm.frag
new file mode 100644
index 00000000000..3696660d36d
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/phi.zero-initialize.asm.frag
@@ -0,0 +1,69 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 40
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vColor %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %vColor "vColor"
+               OpName %uninit_function_int "uninit_function_int"
+               OpName %FragColor "FragColor"
+               OpName %uninit_int "uninit_int"
+               OpName %uninit_vector "uninit_vector"
+               OpName %uninit_matrix "uninit_matrix"
+               OpName %Foo "Foo"
+               OpMemberName %Foo 0 "a"
+               OpName %uninit_foo "uninit_foo"
+               OpDecorate %vColor Location 0
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+     %vColor = OpVariable %_ptr_Input_v4float Input
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+   %float_10 = OpConstant %float 10
+       %bool = OpTypeBool
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+     %int_10 = OpConstant %int 10
+     %int_20 = OpConstant %int 20
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+%_ptr_Private_int = OpTypePointer Private %int
+ %uninit_int = OpUndef %int
+      %v4int = OpTypeVector %int 4
+%_ptr_Private_v4int = OpTypePointer Private %v4int
+%uninit_vector = OpUndef %v4int
+%mat4v4float = OpTypeMatrix %v4float 4
+%_ptr_Private_mat4v4float = OpTypePointer Private %mat4v4float
+%uninit_matrix = OpUndef %mat4v4float
+        %Foo = OpTypeStruct %int
+%_ptr_Private_Foo = OpTypePointer Private %Foo
+ %uninit_foo = OpUndef %Foo
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+%uninit_function_int = OpVariable %_ptr_Function_int Function
+         %13 = OpAccessChain %_ptr_Input_float %vColor %uint_0
+         %14 = OpLoad %float %13
+         %17 = OpFOrdGreaterThan %bool %14 %float_10
+               OpSelectionMerge %19 None
+               OpBranchConditional %17 %18 %24
+         %18 = OpLabel
+               OpBranch %19
+         %24 = OpLabel
+               OpBranch %19
+         %19 = OpLabel
+		 %27 = OpPhi %int %int_10 %18 %int_20 %24
+         %28 = OpLoad %v4float %vColor
+               OpStore %FragColor %28
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag
new file mode 100644
index 00000000000..ebd8d6bab75
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag
@@ -0,0 +1,89 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+               OpReturn
+               OpFunctionEnd
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+               OpBeginInvocationInterlockEXT
+         %43 = OpFunctionCall %void %callee2_
+               OpEndInvocationInterlockEXT
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag
new file mode 100644
index 00000000000..69b8f911204
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag
@@ -0,0 +1,121 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+
+               OpMemberDecorate %SSBO2 0 Offset 0
+               OpDecorate %SSBO2 BufferBlock
+               OpDecorate %ssbo2 DescriptorSet 0
+               OpDecorate %ssbo2 Binding 2
+
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+      %SSBO2 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+          %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+	  %uint_4 = OpConstant %uint 4
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+	%bool = OpTypeBool
+	%true = OpConstantTrue %bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+		 %callee3_res = OpFunctionCall %void %callee3_
+               OpReturn
+               OpFunctionEnd
+
+   %callee3_ = OpFunction %void None %3
+   	%calle3_block = OpLabel
+         %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %frag_coord_x = OpLoad %float %frag_coord_x_ptr
+         %frag_coord_int = OpConvertFToS %int %frag_coord_x
+         %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int
+		 OpStore %ssbo_ptr %uint_4
+	OpReturn
+	OpFunctionEnd
+
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+         %43 = OpFunctionCall %void %callee2_
+
+		 OpSelectionMerge %merged_block None
+		 OpBranchConditional %true %dummy_block %merged_block
+		 %dummy_block = OpLabel
+		 	OpBeginInvocationInterlockEXT
+		 	OpEndInvocationInterlockEXT
+			OpBranch %merged_block
+
+			%merged_block = OpLabel
+               OpReturn
+
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag
new file mode 100644
index 00000000000..7c0fe9a2b24
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag
@@ -0,0 +1,102 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+		 %call3res = OpFunctionCall %void %callee3_
+		 %call4res = OpFunctionCall %void %callee4_
+               OpReturn
+               OpFunctionEnd
+
+   %callee3_ = OpFunction %void None %3
+   	      %begin3 = OpLabel
+               OpBeginInvocationInterlockEXT
+			   OpReturn
+               OpFunctionEnd
+
+   %callee4_ = OpFunction %void None %3
+   	      %begin4 = OpLabel
+               OpEndInvocationInterlockEXT
+			   OpReturn
+               OpFunctionEnd
+
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+         %43 = OpFunctionCall %void %callee2_
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/reserved-function-identifier.asm.frag b/shaders-hlsl-no-opt/asm/frag/reserved-function-identifier.asm.frag
new file mode 100644
index 00000000000..a5a16f2873b
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/reserved-function-identifier.asm.frag
@@ -0,0 +1,60 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %ACOS_f1_ "mat3"
+               OpName %a "a"
+               OpName %ACOS_i1_ "gl_Foo"
+               OpName %a_0 "a"
+               OpName %FragColor "FragColor"
+               OpName %param "param"
+               OpName %param_0 "param"
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+          %8 = OpTypeFunction %float %_ptr_Function_float
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+         %14 = OpTypeFunction %float %_ptr_Function_int
+    %float_1 = OpConstant %float 1
+%_ptr_Output_float = OpTypePointer Output %float
+  %FragColor = OpVariable %_ptr_Output_float Output
+    %float_2 = OpConstant %float 2
+      %int_4 = OpConstant %int 4
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+      %param = OpVariable %_ptr_Function_float Function
+    %param_0 = OpVariable %_ptr_Function_int Function
+               OpStore %param %float_2
+         %32 = OpFunctionCall %float %ACOS_f1_ %param
+               OpStore %param_0 %int_4
+         %35 = OpFunctionCall %float %ACOS_i1_ %param_0
+         %36 = OpFAdd %float %32 %35
+               OpStore %FragColor %36
+               OpReturn
+               OpFunctionEnd
+   %ACOS_f1_ = OpFunction %float None %8
+          %a = OpFunctionParameter %_ptr_Function_float
+         %11 = OpLabel
+         %18 = OpLoad %float %a
+         %20 = OpFAdd %float %18 %float_1
+               OpReturnValue %20
+               OpFunctionEnd
+   %ACOS_i1_ = OpFunction %float None %14
+        %a_0 = OpFunctionParameter %_ptr_Function_int
+         %17 = OpLabel
+         %23 = OpLoad %int %a_0
+         %24 = OpConvertSToF %float %23
+         %25 = OpFAdd %float %24 %float_1
+               OpReturnValue %25
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/scalar-select.spv14.asm.frag b/shaders-hlsl-no-opt/asm/frag/scalar-select.spv14.asm.frag
new file mode 100644
index 00000000000..07450ee80b6
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/scalar-select.spv14.asm.frag
@@ -0,0 +1,62 @@
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %bool = OpTypeBool
+      %false = OpConstantFalse %bool
+	  %uint = OpTypeInt 32 0
+	  %uint_1 = OpConstant %uint 1
+	  %uint_2 = OpConstant %uint 2
+      %true = OpConstantTrue %bool
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+		 %s = OpTypeStruct %float
+		 %arr = OpTypeArray %float %uint_2
+%_ptr_Function_s = OpTypePointer Function %s
+%_ptr_Function_arr = OpTypePointer Function %arr
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+         %17 = OpConstantComposite %v4float %float_1 %float_1 %float_0 %float_1
+         %18 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
+         %19 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+         %20 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+		 %s0 = OpConstantComposite %s %float_0
+		 %s1 = OpConstantComposite %s %float_1
+     %v4bool = OpTypeVector %bool 4
+	 	%b4	= OpConstantComposite %v4bool %false %true %false %true
+		%arr1 = OpConstantComposite %arr %float_0 %float_1
+		%arr2 = OpConstantComposite %arr %float_1 %float_0
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+		  %ss = OpVariable %_ptr_Function_s Function
+		  %arrvar = OpVariable %_ptr_Function_arr Function
+		  ; Not trivial
+         %21 = OpSelect %v4float %false %17 %18
+               OpStore %FragColor %21
+		  ; Trivial
+         %22 = OpSelect %v4float %false %19 %20
+               OpStore %FragColor %22
+			; Vector not trivial
+         %23 = OpSelect %v4float %b4 %17 %18
+               OpStore %FragColor %23
+			; Vector trivial
+         %24 = OpSelect %v4float %b4 %19 %20
+               OpStore %FragColor %24
+		  ; Struct selection
+         %sout = OpSelect %s %false %s0 %s1
+               OpStore %ss %sout
+		; Array selection
+         %arrout = OpSelect %arr %true %arr1 %arr2
+               OpStore %arrvar %arrout
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/struct-packing-last-element-array-matrix-rule.invalid.asm.frag b/shaders-hlsl-no-opt/asm/frag/struct-packing-last-element-array-matrix-rule.invalid.asm.frag
new file mode 100644
index 00000000000..421e4660932
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/struct-packing-last-element-array-matrix-rule.invalid.asm.frag
@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 33
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %Foo "Foo"
+               OpMemberName %Foo 0 "m"
+               OpMemberName %Foo 1 "v"
+               OpName %FooUBO "FooUBO"
+               OpMemberName %FooUBO 0 "foo"
+               OpName %_ ""
+               OpName %Bar "Bar"
+               OpMemberName %Bar 0 "m"
+               OpMemberName %Bar 1 "v"
+               OpName %BarUBO "BarUBO"
+               OpMemberName %BarUBO 0 "bar"
+               OpName %__0 ""
+               OpDecorate %FragColor Location 0
+               OpDecorate %_arr_mat3v3float_uint_2 ArrayStride 48
+               OpMemberDecorate %Foo 0 ColMajor
+               OpMemberDecorate %Foo 0 Offset 0
+               OpMemberDecorate %Foo 0 MatrixStride 16
+               OpMemberDecorate %Foo 1 Offset 92
+               OpMemberDecorate %FooUBO 0 Offset 0
+               OpDecorate %FooUBO Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpMemberDecorate %Bar 0 ColMajor
+               OpMemberDecorate %Bar 0 Offset 0
+               OpMemberDecorate %Bar 0 MatrixStride 16
+               OpMemberDecorate %Bar 1 Offset 44
+               OpMemberDecorate %BarUBO 0 Offset 0
+               OpDecorate %BarUBO Block
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 1
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+%_arr_mat3v3float_uint_2 = OpTypeArray %mat3v3float %uint_2
+        %Foo = OpTypeStruct %_arr_mat3v3float_uint_2 %float
+     %FooUBO = OpTypeStruct %Foo
+%_ptr_Uniform_FooUBO = OpTypePointer Uniform %FooUBO
+          %_ = OpVariable %_ptr_Uniform_FooUBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %Bar = OpTypeStruct %mat3v3float %float
+     %BarUBO = OpTypeStruct %Bar
+%_ptr_Uniform_BarUBO = OpTypePointer Uniform %BarUBO
+        %__0 = OpVariable %_ptr_Uniform_BarUBO Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %23 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %int_1
+         %24 = OpLoad %float %23
+         %29 = OpAccessChain %_ptr_Uniform_float %__0 %int_0 %int_1
+         %30 = OpLoad %float %29
+         %31 = OpFAdd %float %24 %30
+         %32 = OpCompositeConstruct %v4float %31 %31 %31 %31
+               OpStore %FragColor %32
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/subgroup-arithmetic-cast.invalid.nofxc.sm60.asm.frag b/shaders-hlsl-no-opt/asm/frag/subgroup-arithmetic-cast.invalid.nofxc.sm60.asm.frag
new file mode 100644
index 00000000000..a9b34893c83
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/subgroup-arithmetic-cast.invalid.nofxc.sm60.asm.frag
@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 78
+; Schema: 0
+               OpCapability Shader
+               OpCapability GroupNonUniform
+               OpCapability GroupNonUniformArithmetic
+               OpCapability GroupNonUniformClustered
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %index %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_KHR_shader_subgroup_arithmetic"
+               OpSourceExtension "GL_KHR_shader_subgroup_basic"
+               OpSourceExtension "GL_KHR_shader_subgroup_clustered"
+               OpName %main "main"
+               OpName %index "index"
+               OpName %FragColor "FragColor"
+               OpDecorate %index Flat
+               OpDecorate %index Location 0
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+      %index = OpVariable %_ptr_Input_int Input
+     %uint_3 = OpConstant %uint 3
+     %uint_4 = OpConstant %uint 4
+%_ptr_Output_uint = OpTypePointer Output %uint
+  %FragColor = OpVariable %_ptr_Output_uint Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %i = OpLoad %int %index
+         %u = OpBitcast %uint %i
+         %res0 = OpGroupNonUniformSMin %uint %uint_3 Reduce %i
+         %res1 = OpGroupNonUniformSMax %uint %uint_3 Reduce %u
+         %res2 = OpGroupNonUniformUMin %uint %uint_3 Reduce %i
+         %res3 = OpGroupNonUniformUMax %uint %uint_3 Reduce %u
+         ;%res4 = OpGroupNonUniformSMax %uint %uint_3 InclusiveScan %i
+         ;%res5 = OpGroupNonUniformSMin %uint %uint_3 InclusiveScan %u
+         ;%res6 = OpGroupNonUniformUMax %uint %uint_3 ExclusiveScan %i
+         ;%res7 = OpGroupNonUniformUMin %uint %uint_3 ExclusiveScan %u
+         ;%res8 = OpGroupNonUniformSMin %uint %uint_3 ClusteredReduce %i %uint_4
+         ;%res9 = OpGroupNonUniformSMax %uint %uint_3 ClusteredReduce %u %uint_4
+         ;%res10 = OpGroupNonUniformUMin %uint %uint_3 ClusteredReduce %i %uint_4
+         ;%res11 = OpGroupNonUniformUMax %uint %uint_3 ClusteredReduce %u %uint_4
+               OpStore %FragColor %res0
+               OpStore %FragColor %res1
+               OpStore %FragColor %res2
+               OpStore %FragColor %res3
+               ;OpStore %FragColor %res4
+               ;OpStore %FragColor %res5
+               ;OpStore %FragColor %res6
+               ;OpStore %FragColor %res7
+               ;OpStore %FragColor %res8
+               ;OpStore %FragColor %res9
+               ;OpStore %FragColor %res10
+               ;OpStore %FragColor %res11
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag
similarity index 100%
rename from shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag
rename to shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag
diff --git a/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
new file mode 100644
index 00000000000..2e5e030922b
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/unordered-compare.asm.frag
@@ -0,0 +1,179 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 132
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %A %B %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %test_vector_ "test_vector("
+               OpName %test_scalar_ "test_scalar("
+               OpName %le "le"
+               OpName %A "A"
+               OpName %B "B"
+               OpName %leq "leq"
+               OpName %ge "ge"
+               OpName %geq "geq"
+               OpName %eq "eq"
+               OpName %neq "neq"
+               OpName %le_0 "le"
+               OpName %leq_0 "leq"
+               OpName %ge_0 "ge"
+               OpName %geq_0 "geq"
+               OpName %eq_0 "eq"
+               OpName %neq_0 "neq"
+               OpName %FragColor "FragColor"
+               OpDecorate %A Location 0
+               OpDecorate %B Location 1
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %8 = OpTypeFunction %v4float
+         %11 = OpTypeFunction %float
+       %bool = OpTypeBool
+     %v4bool = OpTypeVector %bool 4
+%_ptr_Function_v4bool = OpTypePointer Function %v4bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+          %A = OpVariable %_ptr_Input_v4float Input
+          %B = OpVariable %_ptr_Input_v4float Input
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+         %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+         %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Function_bool = OpTypePointer Function %bool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %128 = OpFunctionCall %v4float %test_vector_
+        %129 = OpFunctionCall %float %test_scalar_
+        %130 = OpCompositeConstruct %v4float %129 %129 %129 %129
+        %131 = OpFAdd %v4float %128 %130
+               OpStore %FragColor %131
+               OpReturn
+               OpFunctionEnd
+%test_vector_ = OpFunction %v4float None %8
+         %10 = OpLabel
+         %le = OpVariable %_ptr_Function_v4bool Function
+        %leq = OpVariable %_ptr_Function_v4bool Function
+         %ge = OpVariable %_ptr_Function_v4bool Function
+        %geq = OpVariable %_ptr_Function_v4bool Function
+         %eq = OpVariable %_ptr_Function_v4bool Function
+        %neq = OpVariable %_ptr_Function_v4bool Function
+         %20 = OpLoad %v4float %A
+         %22 = OpLoad %v4float %B
+         %23 = OpFUnordLessThan %v4bool %20 %22
+               OpStore %le %23
+         %25 = OpLoad %v4float %A
+         %26 = OpLoad %v4float %B
+         %27 = OpFUnordLessThanEqual %v4bool %25 %26
+               OpStore %leq %27
+         %29 = OpLoad %v4float %A
+         %30 = OpLoad %v4float %B
+         %31 = OpFUnordGreaterThan %v4bool %29 %30
+               OpStore %ge %31
+         %33 = OpLoad %v4float %A
+         %34 = OpLoad %v4float %B
+         %35 = OpFUnordGreaterThanEqual %v4bool %33 %34
+               OpStore %geq %35
+         %37 = OpLoad %v4float %A
+         %38 = OpLoad %v4float %B
+         %39 = OpFUnordEqual %v4bool %37 %38
+               OpStore %eq %39
+         %ordered = OpFOrdNotEqual %v4bool %37 %38
+               OpStore %neq %ordered
+         %41 = OpLoad %v4float %A
+         %42 = OpLoad %v4float %B
+         %43 = OpFUnordNotEqual %v4bool %41 %42
+               OpStore %neq %43
+         %44 = OpLoad %v4bool %le
+         %49 = OpSelect %v4float %44 %48 %47
+         %50 = OpLoad %v4bool %leq
+         %51 = OpSelect %v4float %50 %48 %47
+         %52 = OpFAdd %v4float %49 %51
+         %53 = OpLoad %v4bool %ge
+         %54 = OpSelect %v4float %53 %48 %47
+         %55 = OpFAdd %v4float %52 %54
+         %56 = OpLoad %v4bool %geq
+         %57 = OpSelect %v4float %56 %48 %47
+         %58 = OpFAdd %v4float %55 %57
+         %59 = OpLoad %v4bool %eq
+         %60 = OpSelect %v4float %59 %48 %47
+         %61 = OpFAdd %v4float %58 %60
+         %62 = OpLoad %v4bool %neq
+         %63 = OpSelect %v4float %62 %48 %47
+         %64 = OpFAdd %v4float %61 %63
+               OpReturnValue %64
+               OpFunctionEnd
+%test_scalar_ = OpFunction %float None %11
+         %13 = OpLabel
+       %le_0 = OpVariable %_ptr_Function_bool Function
+      %leq_0 = OpVariable %_ptr_Function_bool Function
+       %ge_0 = OpVariable %_ptr_Function_bool Function
+      %geq_0 = OpVariable %_ptr_Function_bool Function
+       %eq_0 = OpVariable %_ptr_Function_bool Function
+      %neq_0 = OpVariable %_ptr_Function_bool Function
+         %72 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %73 = OpLoad %float %72
+         %74 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %75 = OpLoad %float %74
+         %76 = OpFUnordLessThan %bool %73 %75
+               OpStore %le_0 %76
+         %78 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %79 = OpLoad %float %78
+         %80 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %81 = OpLoad %float %80
+         %82 = OpFUnordLessThanEqual %bool %79 %81
+               OpStore %leq_0 %82
+         %84 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %87 = OpLoad %float %86
+         %88 = OpFUnordGreaterThan %bool %85 %87
+               OpStore %ge_0 %88
+         %90 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %91 = OpLoad %float %90
+         %92 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %93 = OpLoad %float %92
+         %94 = OpFUnordGreaterThanEqual %bool %91 %93
+               OpStore %geq_0 %94
+         %96 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %97 = OpLoad %float %96
+         %98 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %99 = OpLoad %float %98
+        %100 = OpFUnordEqual %bool %97 %99
+               OpStore %eq_0 %100
+        %102 = OpAccessChain %_ptr_Input_float %A %uint_0
+        %103 = OpLoad %float %102
+        %104 = OpAccessChain %_ptr_Input_float %B %uint_0
+        %105 = OpLoad %float %104
+        %106 = OpFUnordNotEqual %bool %103 %105
+               OpStore %neq_0 %106
+        %107 = OpLoad %bool %le_0
+        %108 = OpSelect %float %107 %float_1 %float_0
+        %109 = OpLoad %bool %leq_0
+        %110 = OpSelect %float %109 %float_1 %float_0
+        %111 = OpFAdd %float %108 %110
+        %112 = OpLoad %bool %ge_0
+        %113 = OpSelect %float %112 %float_1 %float_0
+        %114 = OpFAdd %float %111 %113
+        %115 = OpLoad %bool %geq_0
+        %116 = OpSelect %float %115 %float_1 %float_0
+        %117 = OpFAdd %float %114 %116
+        %118 = OpLoad %bool %eq_0
+        %119 = OpSelect %float %118 %float_1 %float_0
+        %120 = OpFAdd %float %117 %119
+        %121 = OpLoad %bool %neq_0
+        %122 = OpSelect %float %121 %float_1 %float_0
+        %123 = OpFAdd %float %120 %122
+               OpReturnValue %123
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
new file mode 100644
index 00000000000..2e5e030922b
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
@@ -0,0 +1,179 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 132
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %A %B %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %test_vector_ "test_vector("
+               OpName %test_scalar_ "test_scalar("
+               OpName %le "le"
+               OpName %A "A"
+               OpName %B "B"
+               OpName %leq "leq"
+               OpName %ge "ge"
+               OpName %geq "geq"
+               OpName %eq "eq"
+               OpName %neq "neq"
+               OpName %le_0 "le"
+               OpName %leq_0 "leq"
+               OpName %ge_0 "ge"
+               OpName %geq_0 "geq"
+               OpName %eq_0 "eq"
+               OpName %neq_0 "neq"
+               OpName %FragColor "FragColor"
+               OpDecorate %A Location 0
+               OpDecorate %B Location 1
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %8 = OpTypeFunction %v4float
+         %11 = OpTypeFunction %float
+       %bool = OpTypeBool
+     %v4bool = OpTypeVector %bool 4
+%_ptr_Function_v4bool = OpTypePointer Function %v4bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+          %A = OpVariable %_ptr_Input_v4float Input
+          %B = OpVariable %_ptr_Input_v4float Input
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+         %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+         %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Function_bool = OpTypePointer Function %bool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %128 = OpFunctionCall %v4float %test_vector_
+        %129 = OpFunctionCall %float %test_scalar_
+        %130 = OpCompositeConstruct %v4float %129 %129 %129 %129
+        %131 = OpFAdd %v4float %128 %130
+               OpStore %FragColor %131
+               OpReturn
+               OpFunctionEnd
+%test_vector_ = OpFunction %v4float None %8
+         %10 = OpLabel
+         %le = OpVariable %_ptr_Function_v4bool Function
+        %leq = OpVariable %_ptr_Function_v4bool Function
+         %ge = OpVariable %_ptr_Function_v4bool Function
+        %geq = OpVariable %_ptr_Function_v4bool Function
+         %eq = OpVariable %_ptr_Function_v4bool Function
+        %neq = OpVariable %_ptr_Function_v4bool Function
+         %20 = OpLoad %v4float %A
+         %22 = OpLoad %v4float %B
+         %23 = OpFUnordLessThan %v4bool %20 %22
+               OpStore %le %23
+         %25 = OpLoad %v4float %A
+         %26 = OpLoad %v4float %B
+         %27 = OpFUnordLessThanEqual %v4bool %25 %26
+               OpStore %leq %27
+         %29 = OpLoad %v4float %A
+         %30 = OpLoad %v4float %B
+         %31 = OpFUnordGreaterThan %v4bool %29 %30
+               OpStore %ge %31
+         %33 = OpLoad %v4float %A
+         %34 = OpLoad %v4float %B
+         %35 = OpFUnordGreaterThanEqual %v4bool %33 %34
+               OpStore %geq %35
+         %37 = OpLoad %v4float %A
+         %38 = OpLoad %v4float %B
+         %39 = OpFUnordEqual %v4bool %37 %38
+               OpStore %eq %39
+         %ordered = OpFOrdNotEqual %v4bool %37 %38
+               OpStore %neq %ordered
+         %41 = OpLoad %v4float %A
+         %42 = OpLoad %v4float %B
+         %43 = OpFUnordNotEqual %v4bool %41 %42
+               OpStore %neq %43
+         %44 = OpLoad %v4bool %le
+         %49 = OpSelect %v4float %44 %48 %47
+         %50 = OpLoad %v4bool %leq
+         %51 = OpSelect %v4float %50 %48 %47
+         %52 = OpFAdd %v4float %49 %51
+         %53 = OpLoad %v4bool %ge
+         %54 = OpSelect %v4float %53 %48 %47
+         %55 = OpFAdd %v4float %52 %54
+         %56 = OpLoad %v4bool %geq
+         %57 = OpSelect %v4float %56 %48 %47
+         %58 = OpFAdd %v4float %55 %57
+         %59 = OpLoad %v4bool %eq
+         %60 = OpSelect %v4float %59 %48 %47
+         %61 = OpFAdd %v4float %58 %60
+         %62 = OpLoad %v4bool %neq
+         %63 = OpSelect %v4float %62 %48 %47
+         %64 = OpFAdd %v4float %61 %63
+               OpReturnValue %64
+               OpFunctionEnd
+%test_scalar_ = OpFunction %float None %11
+         %13 = OpLabel
+       %le_0 = OpVariable %_ptr_Function_bool Function
+      %leq_0 = OpVariable %_ptr_Function_bool Function
+       %ge_0 = OpVariable %_ptr_Function_bool Function
+      %geq_0 = OpVariable %_ptr_Function_bool Function
+       %eq_0 = OpVariable %_ptr_Function_bool Function
+      %neq_0 = OpVariable %_ptr_Function_bool Function
+         %72 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %73 = OpLoad %float %72
+         %74 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %75 = OpLoad %float %74
+         %76 = OpFUnordLessThan %bool %73 %75
+               OpStore %le_0 %76
+         %78 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %79 = OpLoad %float %78
+         %80 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %81 = OpLoad %float %80
+         %82 = OpFUnordLessThanEqual %bool %79 %81
+               OpStore %leq_0 %82
+         %84 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %87 = OpLoad %float %86
+         %88 = OpFUnordGreaterThan %bool %85 %87
+               OpStore %ge_0 %88
+         %90 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %91 = OpLoad %float %90
+         %92 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %93 = OpLoad %float %92
+         %94 = OpFUnordGreaterThanEqual %bool %91 %93
+               OpStore %geq_0 %94
+         %96 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %97 = OpLoad %float %96
+         %98 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %99 = OpLoad %float %98
+        %100 = OpFUnordEqual %bool %97 %99
+               OpStore %eq_0 %100
+        %102 = OpAccessChain %_ptr_Input_float %A %uint_0
+        %103 = OpLoad %float %102
+        %104 = OpAccessChain %_ptr_Input_float %B %uint_0
+        %105 = OpLoad %float %104
+        %106 = OpFUnordNotEqual %bool %103 %105
+               OpStore %neq_0 %106
+        %107 = OpLoad %bool %le_0
+        %108 = OpSelect %float %107 %float_1 %float_0
+        %109 = OpLoad %bool %leq_0
+        %110 = OpSelect %float %109 %float_1 %float_0
+        %111 = OpFAdd %float %108 %110
+        %112 = OpLoad %bool %ge_0
+        %113 = OpSelect %float %112 %float_1 %float_0
+        %114 = OpFAdd %float %111 %113
+        %115 = OpLoad %bool %geq_0
+        %116 = OpSelect %float %115 %float_1 %float_0
+        %117 = OpFAdd %float %114 %116
+        %118 = OpLoad %bool %eq_0
+        %119 = OpSelect %float %118 %float_1 %float_0
+        %120 = OpFAdd %float %117 %119
+        %121 = OpLoad %bool %neq_0
+        %122 = OpSelect %float %121 %float_1 %float_0
+        %123 = OpFAdd %float %120 %122
+               OpReturnValue %123
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh
new file mode 100644
index 00000000000..7b38001d8d4
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh
@@ -0,0 +1,150 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Unknown(30017); 21022
+; Bound: 89
+; Schema: 0
+               OpCapability Shader
+               OpCapability Geometry
+               OpCapability ShaderViewportIndexLayerEXT
+               OpCapability MeshShadingEXT
+               OpExtension "SPV_EXT_mesh_shader"
+               OpExtension "SPV_EXT_shader_viewport_index_layer"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint MeshEXT %main "main" %SV_Position %B %SV_CullPrimitive %SV_RenderTargetArrayIndex %SV_PrimitiveID %C %indices %32 %gl_LocalInvocationIndex %38
+               OpExecutionMode %main OutputVertices 24
+               OpExecutionMode %main OutputPrimitivesNV 8
+               OpExecutionMode %main OutputTrianglesNV
+               OpExecutionMode %main LocalSize 2 3 4
+               OpName %main "main"
+               OpName %SV_Position "SV_Position"
+               OpName %B "B"
+               OpName %SV_CullPrimitive "SV_CullPrimitive"
+               OpName %SV_RenderTargetArrayIndex "SV_RenderTargetArrayIndex"
+               OpName %SV_PrimitiveID "SV_PrimitiveID"
+               OpName %C "C"
+               OpName %indices "indices"
+               OpName %_ ""
+               OpDecorate %SV_Position BuiltIn Position
+               OpDecorate %B Location 1
+               OpDecorate %SV_CullPrimitive BuiltIn CullPrimitiveEXT
+               OpDecorate %SV_CullPrimitive PerPrimitiveNV
+               OpDecorate %SV_RenderTargetArrayIndex BuiltIn Layer
+               OpDecorate %SV_RenderTargetArrayIndex PerPrimitiveNV
+               OpDecorate %SV_PrimitiveID BuiltIn PrimitiveId
+               OpDecorate %SV_PrimitiveID PerPrimitiveNV
+               OpDecorate %C Location 3
+               OpDecorate %C PerPrimitiveNV
+               OpDecorate %indices BuiltIn PrimitiveTriangleIndicesEXT
+               OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
+       %void = OpTypeVoid
+          %2 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+    %uint_24 = OpConstant %uint 24
+%_arr_v4float_uint_24 = OpTypeArray %v4float %uint_24
+%_ptr_Output__arr_v4float_uint_24 = OpTypePointer Output %_arr_v4float_uint_24
+%SV_Position = OpVariable %_ptr_Output__arr_v4float_uint_24 Output
+          %B = OpVariable %_ptr_Output__arr_v4float_uint_24 Output
+       %bool = OpTypeBool
+     %uint_8 = OpConstant %uint 8
+%_arr_bool_uint_8 = OpTypeArray %bool %uint_8
+%_ptr_Output__arr_bool_uint_8 = OpTypePointer Output %_arr_bool_uint_8
+%SV_CullPrimitive = OpVariable %_ptr_Output__arr_bool_uint_8 Output
+%_arr_uint_uint_8 = OpTypeArray %uint %uint_8
+%_ptr_Output__arr_uint_uint_8 = OpTypePointer Output %_arr_uint_uint_8
+%SV_RenderTargetArrayIndex = OpVariable %_ptr_Output__arr_uint_uint_8 Output
+%SV_PrimitiveID = OpVariable %_ptr_Output__arr_uint_uint_8 Output
+%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8
+%_ptr_Output__arr_v4float_uint_8 = OpTypePointer Output %_arr_v4float_uint_8
+          %C = OpVariable %_ptr_Output__arr_v4float_uint_8 Output
+     %v3uint = OpTypeVector %uint 3
+%_arr_v3uint_uint_8 = OpTypeArray %v3uint %uint_8
+%_ptr_Output__arr_v3uint_uint_8 = OpTypePointer Output %_arr_v3uint_uint_8
+    %indices = OpVariable %_ptr_Output__arr_v3uint_uint_8 Output
+    %uint_64 = OpConstant %uint 64
+%_arr_float_uint_64 = OpTypeArray %float %uint_64
+%_ptr_Workgroup__arr_float_uint_64 = OpTypePointer Workgroup %_arr_float_uint_64
+         %32 = OpVariable %_ptr_Workgroup__arr_float_uint_64 Workgroup
+%_ptr_Input_uint = OpTypePointer Input %uint
+%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
+          %_ = OpTypeStruct %float
+%_ptr_TaskPayloadWorkgroupEXT__ = OpTypePointer TaskPayloadWorkgroupEXT %_
+         %38 = OpVariable %_ptr_TaskPayloadWorkgroupEXT__ TaskPayloadWorkgroupEXT
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+%_ptr_Output_float = OpTypePointer Output %float
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_3 = OpConstant %uint 3
+%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float
+%_ptr_Output_v3uint = OpTypePointer Output %v3uint
+%_ptr_Output_bool = OpTypePointer Output %bool
+%_ptr_Output_uint = OpTypePointer Output %uint
+       %main = OpFunction %void None %2
+          %4 = OpLabel
+               OpBranch %85
+         %85 = OpLabel
+         %35 = OpLoad %uint %gl_LocalInvocationIndex
+         %39 = OpConvertUToF %float %35
+         %41 = OpAccessChain %_ptr_Workgroup_float %32 %35
+               OpStore %41 %39
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+               OpSetMeshOutputsEXT %uint_24 %uint_8
+         %44 = OpLoad %float %41
+         %46 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_0
+               OpStore %46 %44
+         %48 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_1
+               OpStore %48 %44
+         %50 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_2
+               OpStore %50 %44
+         %51 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_3
+               OpStore %51 %44
+         %53 = OpBitwiseXor %uint %35 %uint_1
+         %54 = OpAccessChain %_ptr_Workgroup_float %32 %53
+         %55 = OpLoad %float %54
+         %57 = OpInBoundsAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %38 %uint_0
+         %58 = OpLoad %float %57
+         %59 = OpFAdd %float %58 %55
+         %60 = OpAccessChain %_ptr_Output_float %B %35 %uint_0
+               OpStore %60 %59
+         %61 = OpAccessChain %_ptr_Output_float %B %35 %uint_1
+               OpStore %61 %59
+         %62 = OpAccessChain %_ptr_Output_float %B %35 %uint_2
+               OpStore %62 %59
+         %63 = OpAccessChain %_ptr_Output_float %B %35 %uint_3
+               OpStore %63 %59
+         %64 = OpULessThan %bool %35 %uint_8
+               OpSelectionMerge %87 None
+               OpBranchConditional %64 %86 %87
+         %86 = OpLabel
+         %65 = OpIMul %uint %35 %uint_3
+         %66 = OpIAdd %uint %65 %uint_1
+         %67 = OpIAdd %uint %65 %uint_2
+         %68 = OpCompositeConstruct %v3uint %65 %66 %67
+         %70 = OpAccessChain %_ptr_Output_v3uint %indices %35
+               OpStore %70 %68
+         %71 = OpBitwiseAnd %uint %35 %uint_1
+         %72 = OpINotEqual %bool %71 %uint_0
+         %74 = OpAccessChain %_ptr_Output_bool %SV_CullPrimitive %35
+               OpStore %74 %72
+         %76 = OpAccessChain %_ptr_Output_uint %SV_PrimitiveID %35
+               OpStore %76 %35
+         %77 = OpAccessChain %_ptr_Output_uint %SV_RenderTargetArrayIndex %35
+               OpStore %77 %35
+         %78 = OpBitwiseXor %uint %35 %uint_2
+         %79 = OpAccessChain %_ptr_Workgroup_float %32 %78
+         %80 = OpLoad %float %79
+         %81 = OpAccessChain %_ptr_Output_float %C %35 %uint_0
+               OpStore %81 %80
+         %82 = OpAccessChain %_ptr_Output_float %C %35 %uint_1
+               OpStore %82 %80
+         %83 = OpAccessChain %_ptr_Output_float %C %35 %uint_2
+               OpStore %83 %80
+         %84 = OpAccessChain %_ptr_Output_float %C %35 %uint_3
+               OpStore %84 %80
+               OpBranch %87
+         %87 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/temporary.zero-initialize.asm.frag b/shaders-hlsl-no-opt/asm/temporary.zero-initialize.asm.frag
new file mode 100644
index 00000000000..eccff08b331
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/temporary.zero-initialize.asm.frag
@@ -0,0 +1,93 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 65
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vA %vB
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %vA "vA"
+               OpName %vB "vB"
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %vA RelaxedPrecision
+               OpDecorate %vA Flat
+               OpDecorate %vA Location 0
+               OpDecorate %25 RelaxedPrecision
+               OpDecorate %30 RelaxedPrecision
+               OpDecorate %vB RelaxedPrecision
+               OpDecorate %vB Flat
+               OpDecorate %vB Location 1
+               OpDecorate %38 RelaxedPrecision
+               OpDecorate %40 RelaxedPrecision
+               OpDecorate %49 RelaxedPrecision
+               OpDecorate %51 RelaxedPrecision
+               OpDecorate %53 RelaxedPrecision
+               OpDecorate %56 RelaxedPrecision
+               OpDecorate %64 RelaxedPrecision
+               OpDecorate %58 RelaxedPrecision
+               OpDecorate %57 RelaxedPrecision
+               OpDecorate %60 RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_0 = OpConstant %float 0
+         %11 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Input_int = OpTypePointer Input %int
+         %vA = OpVariable %_ptr_Input_int Input
+       %bool = OpTypeBool
+     %int_20 = OpConstant %int 20
+     %int_50 = OpConstant %int 50
+         %vB = OpVariable %_ptr_Input_int Input
+     %int_40 = OpConstant %int 40
+     %int_60 = OpConstant %int 60
+     %int_10 = OpConstant %int 10
+    %float_1 = OpConstant %float 1
+         %63 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpStore %FragColor %11
+               OpBranch %17
+         %17 = OpLabel
+         %60 = OpPhi %int %int_0 %5 %58 %20
+         %57 = OpPhi %int %int_0 %5 %56 %20
+         %25 = OpLoad %int %vA
+         %27 = OpSLessThan %bool %57 %25
+               OpLoopMerge %19 %20 None
+               OpBranchConditional %27 %18 %19
+         %18 = OpLabel
+         %30 = OpIAdd %int %25 %57
+         %32 = OpIEqual %bool %30 %int_20
+               OpSelectionMerge %34 None
+               OpBranchConditional %32 %33 %36
+         %33 = OpLabel
+               OpBranch %34
+         %36 = OpLabel
+         %38 = OpLoad %int %vB
+         %40 = OpIAdd %int %38 %57
+         %42 = OpIEqual %bool %40 %int_40
+         %64 = OpSelect %int %42 %int_60 %60
+               OpBranch %34
+         %34 = OpLabel
+         %58 = OpPhi %int %int_50 %33 %64 %36
+         %49 = OpIAdd %int %58 %int_10
+         %51 = OpLoad %v4float %FragColor
+         %53 = OpFAdd %v4float %51 %63
+               OpStore %FragColor %53
+               OpBranch %20
+         %20 = OpLabel
+         %56 = OpIAdd %int %57 %49
+               OpBranch %17
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/vert/block-struct-initializer.asm.vert b/shaders-hlsl-no-opt/asm/vert/block-struct-initializer.asm.vert
new file mode 100644
index 00000000000..a431e6a7174
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/vert/block-struct-initializer.asm.vert
@@ -0,0 +1,37 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 13
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_ %foo
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Vert "Vert"
+               OpMemberName %Vert 0 "a"
+               OpMemberName %Vert 1 "b"
+               OpName %_ ""
+               OpName %Foo "Foo"
+               OpMemberName %Foo 0 "c"
+               OpMemberName %Foo 1 "d"
+               OpName %foo "foo"
+               OpDecorate %Vert Block
+               OpDecorate %_ Location 0
+               OpDecorate %foo Location 2
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+       %Vert = OpTypeStruct %float %float
+%_ptr_Output_Vert = OpTypePointer Output %Vert
+		%zero_vert = OpConstantNull %Vert
+          %_ = OpVariable %_ptr_Output_Vert Output %zero_vert
+        %Foo = OpTypeStruct %float %float
+%_ptr_Output_Foo = OpTypePointer Output %Foo
+%zero_foo = OpConstantNull %Foo
+        %foo = OpVariable %_ptr_Output_Foo Output %zero_foo
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/vert/builtin-output-initializer.asm.vert b/shaders-hlsl-no-opt/asm/vert/builtin-output-initializer.asm.vert
new file mode 100644
index 00000000000..aaa68662e5d
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/vert/builtin-output-initializer.asm.vert
@@ -0,0 +1,44 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 20
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %_ ""
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+	%zero = OpConstantNull %gl_PerVertex
+          %_ = OpVariable %_ptr_Output_gl_PerVertex Output %zero
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %17 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %19 %17
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/asm/vert/complex-link-by-name.asm.vert b/shaders-hlsl-no-opt/asm/vert/complex-link-by-name.asm.vert
new file mode 100644
index 00000000000..94a883c1ed1
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/vert/complex-link-by-name.asm.vert
@@ -0,0 +1,119 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 59
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_ %output_location_0 %output_location_2 %output_location_3
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Foo "Struct_vec4"
+               OpMemberName %Foo 0 "m0"
+               OpName %c "c"
+               OpName %Foo_0 "Struct_vec4"
+               OpMemberName %Foo_0 0 "m0"
+               OpName %Bar "Struct_vec4"
+               OpMemberName %Bar 0 "m0"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "m0"
+               OpMemberName %UBO 1 "m1"
+               OpName %ubo_binding_0 "ubo_binding_0"
+               OpName %Bar_0 "Struct_vec4"
+               OpMemberName %Bar_0 0 "m0"
+               OpName %b "b"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %_ ""
+               OpName %VertexOut "VertexOut"
+               OpMemberName %VertexOut 0 "m0"
+               OpMemberName %VertexOut 1 "m1"
+               OpName %output_location_0 "output_location_0"
+               OpName %output_location_2 "output_location_2"
+               OpName %output_location_3 "output_location_3"
+               OpMemberDecorate %Foo_0 0 Offset 0
+               OpMemberDecorate %Bar 0 Offset 0
+               OpMemberDecorate %UBO 0 Offset 0
+               OpMemberDecorate %UBO 1 Offset 16
+               OpDecorate %UBO Block
+               OpDecorate %ubo_binding_0 DescriptorSet 0
+               OpDecorate %ubo_binding_0 Binding 0
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %VertexOut Block
+               OpDecorate %output_location_0 Location 0
+               OpDecorate %output_location_2 Location 2
+               OpDecorate %output_location_3 Location 3
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+        %Foo = OpTypeStruct %v4float
+%_ptr_Function_Foo = OpTypePointer Function %Foo
+      %Foo_0 = OpTypeStruct %v4float
+        %Bar = OpTypeStruct %v4float
+        %UBO = OpTypeStruct %Foo_0 %Bar
+%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO
+%ubo_binding_0 = OpVariable %_ptr_Uniform_UBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+      %Bar_0 = OpTypeStruct %v4float
+%_ptr_Function_Bar_0 = OpTypePointer Function %Bar_0
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_Bar = OpTypePointer Uniform %Bar
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+          %_ = OpVariable %_ptr_Output_gl_PerVertex Output
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %VertexOut = OpTypeStruct %Foo %Bar_0
+%_ptr_Output_VertexOut = OpTypePointer Output %VertexOut
+%output_location_0 = OpVariable %_ptr_Output_VertexOut Output
+%_ptr_Output_Foo = OpTypePointer Output %Foo
+%_ptr_Output_Bar_0 = OpTypePointer Output %Bar_0
+%output_location_2 = OpVariable %_ptr_Output_Foo Output
+%output_location_3 = OpVariable %_ptr_Output_Bar_0 Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %c = OpVariable %_ptr_Function_Foo Function
+          %b = OpVariable %_ptr_Function_Bar_0 Function
+         %19 = OpAccessChain %_ptr_Uniform_Foo_0 %ubo_binding_0 %int_0
+         %20 = OpLoad %Foo_0 %19
+         %21 = OpCompositeExtract %v4float %20 0
+         %23 = OpAccessChain %_ptr_Function_v4float %c %int_0
+               OpStore %23 %21
+         %29 = OpAccessChain %_ptr_Uniform_Bar %ubo_binding_0 %int_1
+         %30 = OpLoad %Bar %29
+         %31 = OpCompositeExtract %v4float %30 0
+         %32 = OpAccessChain %_ptr_Function_v4float %b %int_0
+               OpStore %32 %31
+         %39 = OpAccessChain %_ptr_Function_v4float %c %int_0
+         %40 = OpLoad %v4float %39
+         %41 = OpAccessChain %_ptr_Function_v4float %b %int_0
+         %42 = OpLoad %v4float %41
+         %43 = OpFAdd %v4float %40 %42
+         %45 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %45 %43
+         %49 = OpLoad %Foo %c
+         %51 = OpAccessChain %_ptr_Output_Foo %output_location_0 %int_0
+               OpStore %51 %49
+         %52 = OpLoad %Bar_0 %b
+         %54 = OpAccessChain %_ptr_Output_Bar_0 %output_location_0 %int_1
+               OpStore %54 %52
+         %56 = OpLoad %Foo %c
+               OpStore %output_location_2 %56
+         %58 = OpLoad %Bar_0 %b
+               OpStore %output_location_3 %58
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp b/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp
new file mode 100644
index 00000000000..83b714bc406
--- /dev/null
+++ b/shaders-hlsl-no-opt/comp/glsl.std450.fxconly.comp
@@ -0,0 +1,130 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+	float res;
+	int ires;
+	uint ures;
+
+	vec4 f32;
+	ivec4 s32;
+	uvec4 u32;
+
+	mat2 m2;
+	mat3 m3;
+	mat4 m4;
+};
+
+void main()
+{
+	float tmp;
+	vec2 v2;
+	vec3 v3;
+	vec4 v4;
+	int itmp;
+
+	res = round(f32.x);
+	//res = roundEven(f32.x);
+	res = trunc(f32.x);
+	res = abs(f32.x);
+	ires = abs(s32.x);
+	res = sign(f32.x);
+	ires = sign(s32.x);
+	res = floor(f32.x);
+	res = ceil(f32.x);
+	res = fract(f32.x);
+	res = radians(f32.x);
+	res = degrees(f32.x);
+	res = sin(f32.x);
+	res = cos(f32.x);
+	res = tan(f32.x);
+	res = asin(f32.x);
+	res = acos(f32.x);
+	res = atan(f32.x);
+	res = sinh(f32.x);
+	res = cosh(f32.x);
+	res = tanh(f32.x);
+	//res = asinh(f32.x);
+	//res = acosh(f32.x);
+	//res = atanh(f32.x);
+	res = atan(f32.x, f32.y);
+	res = pow(f32.x, f32.y);
+	res = exp(f32.x);
+	res = log(f32.x);
+	res = exp2(f32.x);
+	res = log2(f32.x);
+	res = sqrt(f32.x);
+	res = inversesqrt(f32.x);
+
+	res = length(f32.x);
+	res = distance(f32.x, f32.y);
+	res = normalize(f32.x);
+	res = faceforward(f32.x, f32.y, f32.z);
+	res = reflect(f32.x, f32.y);
+	res = refract(f32.x, f32.y, f32.z);
+
+	res = length(f32.xy);
+	res = distance(f32.xy, f32.zw);
+	v2 = normalize(f32.xy);
+	v2 = faceforward(f32.xy, f32.yz, f32.zw);
+	v2 = reflect(f32.xy, f32.zw);
+	v2 = refract(f32.xy, f32.yz, f32.w);
+
+	v3 = cross(f32.xyz, f32.yzw);
+
+	res = determinant(m2);
+	res = determinant(m3);
+	res = determinant(m4);
+	m2 = inverse(m2);
+	m3 = inverse(m3);
+	m4 = inverse(m4);
+
+	res = modf(f32.x, tmp);
+	// ModfStruct
+
+	res = min(f32.x, f32.y);
+	ures = min(u32.x, u32.y);
+	ires = min(s32.x, s32.y);
+	res = max(f32.x, f32.y);
+	ures = max(u32.x, u32.y);
+	ires = max(s32.x, s32.y);
+
+	res = clamp(f32.x, f32.y, f32.z);
+	ures = clamp(u32.x, u32.y, u32.z);
+	ires = clamp(s32.x, s32.y, s32.z);
+
+	res = mix(f32.x, f32.y, f32.z);
+	res = step(f32.x, f32.y);
+	res = smoothstep(f32.x, f32.y, f32.z);
+	res = fma(f32.x, f32.y, f32.z);
+
+	res = frexp(f32.x, itmp);
+
+	// FrexpStruct
+	res = ldexp(f32.x, itmp);
+
+	ures = packSnorm4x8(f32);
+	ures = packUnorm4x8(f32);
+	ures = packSnorm2x16(f32.xy);
+	ures = packUnorm2x16(f32.xy);
+	ures = packHalf2x16(f32.xy);
+	// packDouble2x32
+
+	v2 = unpackSnorm2x16(u32.x);
+	v2 = unpackUnorm2x16(u32.x);
+	v2 = unpackHalf2x16(u32.x);
+	v4 = unpackSnorm4x8(u32.x);
+	v4 = unpackUnorm4x8(u32.x);
+	// unpackDouble2x32
+
+	s32 = findLSB(s32);
+	s32 = findLSB(u32);
+	s32 = findMSB(s32);
+	s32 = findMSB(u32);
+
+	// interpolateAtSample
+	// interpolateAtOffset
+
+	// NMin, NMax, NClamp
+}
diff --git a/shaders-hlsl-no-opt/comp/illegal-struct-name.asm.comp b/shaders-hlsl-no-opt/comp/illegal-struct-name.asm.comp
new file mode 100644
index 00000000000..f7a8787d3d8
--- /dev/null
+++ b/shaders-hlsl-no-opt/comp/illegal-struct-name.asm.comp
@@ -0,0 +1,62 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Foo "Foo"
+               OpMemberName %Foo 0 "abs"
+               OpName %f "f"
+               OpName %Foo_0 "Foo"
+               OpMemberName %Foo_0 0 "abs"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "foo"
+               OpMemberName %SSBO 1 "foo2"
+               OpName %_ ""
+               OpName %linear "abs"
+               OpMemberDecorate %Foo_0 0 Offset 0
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+        %Foo = OpTypeStruct %float
+%_ptr_Function_Foo = OpTypePointer Function %Foo
+      %Foo_0 = OpTypeStruct %float
+       %SSBO = OpTypeStruct %Foo_0 %Foo_0
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Function_int = OpTypePointer Function %int
+     %int_10 = OpConstant %int 10
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %f = OpVariable %_ptr_Function_Foo Function
+     %linear = OpVariable %_ptr_Function_int Function
+         %17 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_0
+         %18 = OpLoad %Foo_0 %17
+         %19 = OpCompositeExtract %float %18 0
+         %21 = OpAccessChain %_ptr_Function_float %f %int_0
+               OpStore %21 %19
+               OpStore %linear %int_10
+         %26 = OpLoad %Foo %f
+         %27 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_1
+         %28 = OpCompositeExtract %float %26 0
+         %30 = OpAccessChain %_ptr_Uniform_float %27 %int_0
+               OpStore %30 %28
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/comp/intmin-literal.comp b/shaders-hlsl-no-opt/comp/intmin-literal.comp
new file mode 100644
index 00000000000..ee35cedabb9
--- /dev/null
+++ b/shaders-hlsl-no-opt/comp/intmin-literal.comp
@@ -0,0 +1,18 @@
+#version 450
+
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 1) buffer SSBO
+{
+	float a;
+};
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	float b;
+};
+
+void main()
+{
+	a = intBitsToFloat(floatBitsToInt(b) ^ 0x80000000);
+}
diff --git a/shaders-hlsl-no-opt/comp/subgroups-boolean.invalid.nofxc.sm60.comp b/shaders-hlsl-no-opt/comp/subgroups-boolean.invalid.nofxc.sm60.comp
new file mode 100644
index 00000000000..bc182c52923
--- /dev/null
+++ b/shaders-hlsl-no-opt/comp/subgroups-boolean.invalid.nofxc.sm60.comp
@@ -0,0 +1,30 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_shuffle : require
+#extension GL_KHR_shader_subgroup_shuffle_relative : require
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_clustered : require
+#extension GL_KHR_shader_subgroup_quad : require
+layout(local_size_x = 30) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	uint FragColor[];
+};
+
+void main()
+{
+	bool v = gl_GlobalInvocationID.x != 3;
+	bvec4 v4;
+	v4.x = subgroupOr(v);
+	v4.y = subgroupAnd(v);
+	v4.z = subgroupXor(v);
+	v4.w = subgroupAllEqual(v);
+
+	uvec4 w = uvec4(v4);
+	FragColor[gl_GlobalInvocationID.x] = w.x + w.y + w.z + w.w;
+}
+
+
diff --git a/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp b/shaders-hlsl-no-opt/comp/subgroups.invalid.nofxc.sm60.comp
similarity index 85%
rename from shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
rename to shaders-hlsl-no-opt/comp/subgroups.invalid.nofxc.sm60.comp
index 81135e2a93e..bbda0efd5ba 100644
--- a/shaders-hlsl/comp/subgroups.invalid.nofxc.sm60.comp
+++ b/shaders-hlsl-no-opt/comp/subgroups.invalid.nofxc.sm60.comp
@@ -40,18 +40,18 @@ void main()
 	//bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
 	//bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
 	uint bit_count = subgroupBallotBitCount(ballot_value);
-	//uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
-	//uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
+	uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
+	uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
 	//uint lsb = subgroupBallotFindLSB(ballot_value);
 	//uint msb = subgroupBallotFindMSB(ballot_value);
 
 	// shuffle
-	//uint shuffled = subgroupShuffle(10u, 8u);
-	//uint shuffled_xor = subgroupShuffleXor(30u, 8u);
+	uint shuffled = subgroupShuffle(10u, 8u);
+	uint shuffled_xor = subgroupShuffleXor(30u, 8u);
 
 	// shuffle relative 
-	//uint shuffled_up = subgroupShuffleUp(20u, 4u);
-	//uint shuffled_down = subgroupShuffleDown(20u, 4u);
+	uint shuffled_up = subgroupShuffleUp(20u, 4u);
+	uint shuffled_down = subgroupShuffleDown(20u, 4u);
 
 	// vote
 	bool has_all = subgroupAll(true);
@@ -72,6 +72,9 @@ void main()
 	uvec4 anded = subgroupAnd(ballot_value);
 	uvec4 ored = subgroupOr(ballot_value);
 	uvec4 xored = subgroupXor(ballot_value);
+	bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42)));
+	bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42)));
+	bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42)));
 
 	added = subgroupInclusiveAdd(added);
 	iadded = subgroupInclusiveAdd(iadded);
@@ -121,6 +124,10 @@ void main()
 	anded = subgroupClusteredAnd(anded, 4u);
 	ored = subgroupClusteredOr(ored, 4u);
 	xored = subgroupClusteredXor(xored, 4u);
+
+	anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u);
+	ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u);
+	xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u);
 #endif
 
 	// quad
diff --git a/shaders-hlsl-no-opt/comp/trivial-select-cast-vector.comp b/shaders-hlsl-no-opt/comp/trivial-select-cast-vector.comp
new file mode 100644
index 00000000000..c3e0922a166
--- /dev/null
+++ b/shaders-hlsl-no-opt/comp/trivial-select-cast-vector.comp
@@ -0,0 +1,14 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0) buffer A
+{
+	vec3 a;
+	vec3 b;
+};
+
+void main()
+{
+	bvec3 c = lessThan(b, vec3(1.0));
+	a = mix(vec3(1, 0, 0), vec3(0, 0, 1), c);
+}
diff --git a/shaders-hlsl-no-opt/comp/trivial-select-matrix.spv14.comp b/shaders-hlsl-no-opt/comp/trivial-select-matrix.spv14.comp
new file mode 100644
index 00000000000..5ffcc3f3a49
--- /dev/null
+++ b/shaders-hlsl-no-opt/comp/trivial-select-matrix.spv14.comp
@@ -0,0 +1,16 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0) buffer A
+{
+	mat3 a;
+	float b;
+};
+
+void main()
+{
+	// Scalar to Matrix
+	bool c = b < 1.0;
+	a = c ? mat3(vec3(1), vec3(1), vec3(1)) : mat3(vec3(0), vec3(0), vec3(0));
+	a = c ? mat3(1) : mat3(0);
+}
diff --git a/shaders-hlsl-no-opt/frag/cbuffer-packing-straddle.frag b/shaders-hlsl-no-opt/frag/cbuffer-packing-straddle.frag
new file mode 100644
index 00000000000..4f22da56d96
--- /dev/null
+++ b/shaders-hlsl-no-opt/frag/cbuffer-packing-straddle.frag
@@ -0,0 +1,50 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	vec4 a[2]; // 0
+	vec4 b; // 32
+	vec4 c; // 48
+	mat4x4 d; // 64
+
+	float e; // 128
+	vec2 f; // 136
+
+	float g; // 144
+	vec2 h; // 152
+
+	float i; // 160
+	vec2 j; // 168
+
+	float k;
+	vec2 l;
+
+	float m;
+	float n;
+	float o;
+
+	vec4 p;
+	vec4 q;
+	vec3 r;
+	vec4 s;
+	vec4 t;
+	vec4 u;
+	float v;
+	float w;
+	float x;
+	float y;
+	float z;
+	float aa;
+	float ab;
+	float ac;
+	float ad;
+	float ae;
+	vec4 ef;
+};
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = a[1];
+}
diff --git a/shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag b/shaders-hlsl-no-opt/frag/constant-buffer-array.invalid.sm51.frag
similarity index 100%
rename from shaders-hlsl/frag/constant-buffer-array.invalid.sm51.frag
rename to shaders-hlsl-no-opt/frag/constant-buffer-array.invalid.sm51.frag
diff --git a/shaders-hlsl/frag/fp16.invalid.desktop.frag b/shaders-hlsl-no-opt/frag/fp16.invalid.desktop.frag
similarity index 100%
rename from shaders-hlsl/frag/fp16.invalid.desktop.frag
rename to shaders-hlsl-no-opt/frag/fp16.invalid.desktop.frag
diff --git a/shaders-hlsl-no-opt/frag/frag-coord.frag b/shaders-hlsl-no-opt/frag/frag-coord.frag
new file mode 100644
index 00000000000..e688659a6b3
--- /dev/null
+++ b/shaders-hlsl-no-opt/frag/frag-coord.frag
@@ -0,0 +1,8 @@
+#version 450
+
+layout(location = 0) out vec3 FragColor;
+
+void main()
+{
+	FragColor = gl_FragCoord.xyz / gl_FragCoord.w;
+}
diff --git a/shaders-hlsl-no-opt/frag/helper-invocation.fxconly.nofxc.frag b/shaders-hlsl-no-opt/frag/helper-invocation.fxconly.nofxc.frag
new file mode 100644
index 00000000000..6f70c772645
--- /dev/null
+++ b/shaders-hlsl-no-opt/frag/helper-invocation.fxconly.nofxc.frag
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+layout(location = 0) out float FragColor;
+
+void main()
+{
+	FragColor = float(gl_HelperInvocation);
+	demote;
+	FragColor = float(helperInvocationEXT());
+}
diff --git a/shaders-hlsl-no-opt/frag/native-16bit-types.fxconly.nofxc.sm62.native-16bit.frag b/shaders-hlsl-no-opt/frag/native-16bit-types.fxconly.nofxc.sm62.native-16bit.frag
new file mode 100644
index 00000000000..92e6621fda5
--- /dev/null
+++ b/shaders-hlsl-no-opt/frag/native-16bit-types.fxconly.nofxc.sm62.native-16bit.frag
@@ -0,0 +1,72 @@
+#version 450
+#extension GL_EXT_shader_explicit_arithmetic_types : require
+
+layout(location = 0) out f16vec4 Output;
+layout(location = 0) in f16vec4 Input;
+layout(location = 1) out i16vec4 OutputI;
+layout(location = 1) flat in i16vec4 InputI;
+layout(location = 2) out u16vec4 OutputU;
+layout(location = 2) flat in u16vec4 InputU;
+
+layout(set = 0, binding = 0) buffer Buf
+{
+    float16_t foo0[4];
+    int16_t foo1[4];
+    uint16_t foo2[4];
+
+    f16vec4 foo3[4];
+    i16vec4 foo4[4];
+    u16vec4 foo5[4];
+
+    f16mat2x3 foo6[4];
+    layout(row_major) f16mat2x3 foo7[4];
+};
+
+void main()
+{
+    int index = int(gl_FragCoord.x);
+    Output = Input + float16_t(20.0);
+    OutputI = InputI + int16_t(-40);
+    OutputU = InputU + uint16_t(20);
+
+    // Load 16-bit scalar.
+    Output += foo0[index];
+    OutputI += foo1[index];
+    OutputU += foo2[index];
+
+    // Load 16-bit vector.
+    Output += foo3[index];
+    OutputI += foo4[index];
+    OutputU += foo5[index];
+
+    // Load 16-bit vector from ColMajor matrix.
+    Output += foo6[index][1].xyzz;
+
+    // Load 16-bit vector from RowMajor matrix.
+    Output += foo7[index][1].xyzz;
+
+    // Load 16-bit matrix from ColMajor.
+    f16mat2x3 m0 = foo6[index];
+    // Load 16-bit matrix from RowMajor.
+    f16mat2x3 m1 = foo7[index];
+
+    // Store 16-bit scalar
+    foo0[index] = Output.x;
+    foo1[index] = OutputI.y;
+    foo2[index] = OutputU.z;
+
+    // Store 16-bit vector
+    foo3[index] = Output;
+    foo4[index] = OutputI;
+    foo5[index] = OutputU;
+
+    // Store 16-bit vector to ColMajor matrix.
+    foo6[index][1] = Output.xyz;
+    // Store 16-bit vector to RowMajor matrix.
+    foo7[index][1] = Output.xyz;
+
+    // Store 16-bit matrix to ColMajor.
+    foo6[index] = f16mat2x3(Output.xyz, Output.wzy);
+    // Store 16-bit matrix to RowMajor.
+    foo7[index] = f16mat2x3(Output.xyz, Output.wzy);
+}
diff --git a/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag b/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag
new file mode 100644
index 00000000000..452aa953a42
--- /dev/null
+++ b/shaders-hlsl-no-opt/frag/nonuniform-constructor.sm51.nonuniformresource.frag
@@ -0,0 +1,14 @@
+#version 450
+#extension GL_EXT_nonuniform_qualifier : require
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec2 vUV;
+layout(location = 1) flat in int vIndex;
+
+layout(set = 0, binding = 0) uniform texture2D uTex[];
+layout(set = 1, binding = 0) uniform sampler Immut;
+
+void main()
+{
+	FragColor = texture(nonuniformEXT(sampler2D(uTex[vIndex], Immut)), vUV);
+}
diff --git a/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag b/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag
new file mode 100644
index 00000000000..59079fe58b4
--- /dev/null
+++ b/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO0
+{
+	uint values0[];
+};
+
+layout(set = 0, binding = 1, std430) buffer SSBO1
+{
+	uint values1[];
+};
+
+void callee2()
+{
+	values1[int(gl_FragCoord.x)] += 1;
+}
+
+void callee()
+{
+	values0[int(gl_FragCoord.x)] += 1;
+	callee2();
+}
+
+void main()
+{
+	beginInvocationInterlockARB();
+	callee();
+	endInvocationInterlockARB();
+}
diff --git a/shaders-hlsl-no-opt/frag/texture-gather-uint-component.asm.frag b/shaders-hlsl-no-opt/frag/texture-gather-uint-component.asm.frag
new file mode 100644
index 00000000000..b4d9509ab49
--- /dev/null
+++ b/shaders-hlsl-no-opt/frag/texture-gather-uint-component.asm.frag
@@ -0,0 +1,42 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 22
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vUV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uSamp "uSamp"
+               OpName %vUV "vUV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uSamp DescriptorSet 0
+               OpDecorate %uSamp Binding 0
+               OpDecorate %vUV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %10 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %11 = OpTypeSampledImage %10
+%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11
+      %uSamp = OpVariable %_ptr_UniformConstant_11 UniformConstant
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %vUV = OpVariable %_ptr_Input_v2float Input
+        %int = OpTypeInt 32 0
+      %int_1 = OpConstant %int 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %14 = OpLoad %11 %uSamp
+         %18 = OpLoad %v2float %vUV
+         %21 = OpImageGather %v4float %14 %18 %int_1
+               OpStore %FragColor %21
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl-no-opt/frag/ubo-offset-out-of-order.frag b/shaders-hlsl-no-opt/frag/ubo-offset-out-of-order.frag
new file mode 100644
index 00000000000..77760522f94
--- /dev/null
+++ b/shaders-hlsl-no-opt/frag/ubo-offset-out-of-order.frag
@@ -0,0 +1,16 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	layout(offset = 16) mat4 m;
+	layout(offset = 0) vec4 v;
+};
+
+layout(location = 0) in vec4 vColor;
+
+void main()
+{
+	FragColor = m * vColor + v;
+}
diff --git a/shaders-hlsl-no-opt/frag/variables.zero-initialize.frag b/shaders-hlsl-no-opt/frag/variables.zero-initialize.frag
new file mode 100644
index 00000000000..41da8001f47
--- /dev/null
+++ b/shaders-hlsl-no-opt/frag/variables.zero-initialize.frag
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) in vec4 vColor;
+layout(location = 0) out vec4 FragColor;
+
+int uninit_int;
+ivec4 uninit_vector;
+mat4 uninit_matrix;
+
+struct Foo { int a; };
+Foo uninit_foo;
+
+void main()
+{
+	int uninit_function_int;
+	if (vColor.x > 10.0)
+		uninit_function_int = 10;
+	else
+		uninit_function_int = 20;
+	FragColor = vColor;
+}
diff --git a/shaders-hlsl-no-opt/frag/volatile-helper-invocation.fxconly.nofxc.spv16.frag b/shaders-hlsl-no-opt/frag/volatile-helper-invocation.fxconly.nofxc.spv16.frag
new file mode 100644
index 00000000000..9a8d9d20b25
--- /dev/null
+++ b/shaders-hlsl-no-opt/frag/volatile-helper-invocation.fxconly.nofxc.spv16.frag
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+layout(location = 0) out float FragColor;
+
+void main()
+{
+	FragColor = float(gl_HelperInvocation);
+	demote;
+	FragColor = float(gl_HelperInvocation);
+}
diff --git a/shaders-hlsl-no-opt/vert/base-instance.vert b/shaders-hlsl-no-opt/vert/base-instance.vert
new file mode 100644
index 00000000000..20b686cfe50
--- /dev/null
+++ b/shaders-hlsl-no-opt/vert/base-instance.vert
@@ -0,0 +1,7 @@
+#version 450
+#extension GL_ARB_shader_draw_parameters : require
+
+void main()
+{
+	gl_Position = vec4(gl_BaseInstanceARB);
+}
diff --git a/shaders-hlsl-no-opt/vert/base-vertex.vert b/shaders-hlsl-no-opt/vert/base-vertex.vert
new file mode 100644
index 00000000000..ef486c857d0
--- /dev/null
+++ b/shaders-hlsl-no-opt/vert/base-vertex.vert
@@ -0,0 +1,7 @@
+#version 450
+#extension GL_ARB_shader_draw_parameters : require
+
+void main()
+{
+	gl_Position = vec4(gl_BaseVertexARB);
+}
diff --git a/shaders-hlsl-no-opt/vert/block-io-auto-location-assignment.vert b/shaders-hlsl-no-opt/vert/block-io-auto-location-assignment.vert
new file mode 100644
index 00000000000..98c39bd5ef2
--- /dev/null
+++ b/shaders-hlsl-no-opt/vert/block-io-auto-location-assignment.vert
@@ -0,0 +1,31 @@
+#version 450
+
+struct Bar
+{
+	float v[2];
+	float w;
+};
+
+layout(location = 0) out V
+{
+	float a;
+	float b[2];
+	Bar c[2];
+	Bar d;
+};
+
+void main()
+{
+	a = 1.0;
+	b[0] = 2.0;
+	b[1] = 3.0;
+	c[0].v[0] = 4.0;
+	c[0].v[1] = 5.0;
+	c[0].w = 6.0;
+	c[1].v[0] = 7.0;
+	c[1].v[1] = 8.0;
+	c[1].w = 9.0;
+	d.v[0] = 10.0;
+	d.v[1] = 11.0;
+	d.w = 12.0;
+}
diff --git a/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert b/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert
new file mode 100644
index 00000000000..0b8dbb5a91d
--- /dev/null
+++ b/shaders-hlsl-no-opt/vert/empty-shader.nofxc.sm30.vert
@@ -0,0 +1,5 @@
+#version 450
+
+void main()
+{
+}
diff --git a/shaders-hlsl-no-opt/vert/flatten-matrix-input.flatten-matrix-vertex-input.vert b/shaders-hlsl-no-opt/vert/flatten-matrix-input.flatten-matrix-vertex-input.vert
new file mode 100644
index 00000000000..b49480617ab
--- /dev/null
+++ b/shaders-hlsl-no-opt/vert/flatten-matrix-input.flatten-matrix-vertex-input.vert
@@ -0,0 +1,13 @@
+#version 450
+
+layout(location = 0) in mat4 m4;
+layout(location = 4) in mat3 m3;
+layout(location = 7) in mat2 m2;
+layout(location = 9) in vec4 v;
+
+void main()
+{
+	gl_Position = m4 * v;
+	gl_Position.xyz += m3 * v.xyz;
+	gl_Position.xy += m2 * v.xy;
+}
diff --git a/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp b/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000000..b7b4e0b2e1e
--- /dev/null
+++ b/shaders-hlsl/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,101 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+			   OpDecorate %inputs Restrict
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+			   OpDecorate %outputs Restrict
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+		  %bool = OpTypeBool
+		  %bvec4 = OpTypeVector %bool 4
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+		 %uzero = OpConstant %uint 0
+		 %uone = OpConstant %uint 1
+		 %utrue = OpConstantComposite %uvec4 %uone %uone %uone %uone
+		 %ufalse = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+
+         %result_slt = OpSLessThan %bvec4 %input0 %input1
+         %result_sle = OpSLessThanEqual %bvec4 %input0 %input1
+         %result_ult = OpULessThan %bvec4 %input0 %input1
+         %result_ule = OpULessThanEqual %bvec4 %input0 %input1
+         %result_sgt = OpSGreaterThan %bvec4 %input0 %input1
+         %result_sge = OpSGreaterThanEqual %bvec4 %input0 %input1
+         %result_ugt = OpUGreaterThan %bvec4 %input0 %input1
+         %result_uge = OpUGreaterThanEqual %bvec4 %input0 %input1
+
+		 %int_slt = OpSelect %uvec4 %result_slt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_slt
+
+		 %int_sle = OpSelect %uvec4 %result_sle %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sle
+
+		 %int_ult = OpSelect %uvec4 %result_ult %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ult
+
+		 %int_ule = OpSelect %uvec4 %result_ule %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ule
+
+		 %int_sgt = OpSelect %uvec4 %result_sgt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sgt
+
+		 %int_sge = OpSelect %uvec4 %result_sge %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sge
+
+		 %int_ugt = OpSelect %uvec4 %result_ugt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ugt
+
+		 %int_uge = OpSelect %uvec4 %result_uge %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_uge
+
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000000..6c060eedad9
--- /dev/null
+++ b/shaders-hlsl/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,203 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 139
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a1"
+               OpMemberName %SSBO 1 "a2"
+               OpMemberName %SSBO 2 "a3"
+               OpMemberName %SSBO 3 "a4"
+               OpMemberName %SSBO 4 "b1"
+               OpMemberName %SSBO 5 "b2"
+               OpMemberName %SSBO 6 "b3"
+               OpMemberName %SSBO 7 "b4"
+               OpMemberName %SSBO 8 "c1"
+               OpMemberName %SSBO 9 "c2"
+               OpMemberName %SSBO 10 "c3"
+               OpMemberName %SSBO 11 "c4"
+               OpName %_ ""
+               OpName %i "i"
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 8
+               OpMemberDecorate %SSBO 2 Offset 16
+               OpMemberDecorate %SSBO 3 Offset 32
+               OpMemberDecorate %SSBO 4 Offset 48
+               OpMemberDecorate %SSBO 5 Offset 56
+               OpMemberDecorate %SSBO 6 Offset 64
+               OpMemberDecorate %SSBO 7 Offset 80
+               OpMemberDecorate %SSBO 8 Offset 96
+               OpMemberDecorate %SSBO 9 Offset 104
+               OpMemberDecorate %SSBO 10 Offset 112
+               OpMemberDecorate %SSBO 11 Offset 128
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+       %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+      %int_8 = OpConstant %int 8
+      %int_1 = OpConstant %int 1
+      %int_5 = OpConstant %int 5
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+      %int_9 = OpConstant %int 9
+      %int_2 = OpConstant %int 2
+      %int_6 = OpConstant %int 6
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+     %int_10 = OpConstant %int 10
+      %int_3 = OpConstant %int 3
+      %int_7 = OpConstant %int 7
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+     %int_11 = OpConstant %int 11
+%_ptr_Function_int = OpTypePointer Function %int
+       %bool = OpTypeBool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+       %main = OpFunction %void None %7
+         %35 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+         %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %37 = OpLoad %float %36
+         %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %39 = OpLoad %float %38
+         %40 = OpExtInst %float %1 NMin %37 %39
+         %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %41 %40
+         %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %43 = OpLoad %v2float %42
+         %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %45 = OpLoad %v2float %44
+         %46 = OpExtInst %v2float %1 NMin %43 %45
+         %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %47 %46
+         %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+         %49 = OpLoad %v3float %48
+         %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+         %51 = OpLoad %v3float %50
+         %52 = OpExtInst %v3float %1 NMin %49 %51
+         %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %53 %52
+         %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+         %55 = OpLoad %v4float %54
+         %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+         %57 = OpLoad %v4float %56
+         %58 = OpExtInst %v4float %1 NMin %55 %57
+         %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %59 %58
+         %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %61 = OpLoad %float %60
+         %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %63 = OpLoad %float %62
+         %64 = OpExtInst %float %1 NMax %61 %63
+         %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %65 %64
+         %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %67 = OpLoad %v2float %66
+         %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %69 = OpLoad %v2float %68
+         %70 = OpExtInst %v2float %1 NMax %67 %69
+         %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %71 %70
+         %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+         %73 = OpLoad %v3float %72
+         %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+         %75 = OpLoad %v3float %74
+         %76 = OpExtInst %v3float %1 NMax %73 %75
+         %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %77 %76
+         %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+         %79 = OpLoad %v4float %78
+         %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+         %81 = OpLoad %v4float %80
+         %82 = OpExtInst %v4float %1 NMax %79 %81
+         %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %83 %82
+         %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %87 = OpLoad %float %86
+         %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %89 = OpLoad %float %88
+         %90 = OpExtInst %float %1 NClamp %85 %87 %89
+         %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %91 %90
+         %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+         %93 = OpLoad %v2float %92
+         %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %95 = OpLoad %v2float %94
+         %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %97 = OpLoad %v2float %96
+         %98 = OpExtInst %v2float %1 NClamp %93 %95 %97
+         %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %99 %98
+        %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+        %101 = OpLoad %v3float %100
+        %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+        %103 = OpLoad %v3float %102
+        %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+        %105 = OpLoad %v3float %104
+        %106 = OpExtInst %v3float %1 NClamp %101 %103 %105
+        %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %107 %106
+        %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+        %109 = OpLoad %v4float %108
+        %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+        %111 = OpLoad %v4float %110
+        %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+        %113 = OpLoad %v4float %112
+        %114 = OpExtInst %v4float %1 NClamp %109 %111 %113
+        %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %115 %114
+               OpStore %i %int_0
+               OpBranch %116
+        %116 = OpLabel
+               OpLoopMerge %117 %118 None
+               OpBranch %119
+        %119 = OpLabel
+        %120 = OpLoad %int %i
+        %121 = OpSLessThan %bool %120 %int_2
+               OpBranchConditional %121 %122 %117
+        %122 = OpLabel
+        %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+        %124 = OpLoad %v2float %123
+        %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+        %126 = OpLoad %v2float %125
+        %127 = OpExtInst %v2float %1 NMin %124 %126
+        %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %128 %127
+               OpBranch %118
+        %118 = OpLabel
+        %129 = OpLoad %int %i
+        %130 = OpIAdd %int %129 %int_1
+               OpStore %i %130
+        %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+        %132 = OpLoad %float %131
+        %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0
+        %134 = OpLoad %float %133
+        %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1
+        %136 = OpLoad %float %135
+        %137 = OpExtInst %float %1 NClamp %132 %134 %136
+        %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %138 %137
+               OpBranch %116
+        %117 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag b/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag
new file mode 100644
index 00000000000..43d0970e8d5
--- /dev/null
+++ b/shaders-hlsl/asm/frag/pack-and-unpack-uint2.fxconly.nofxc.sm60.asm.frag
@@ -0,0 +1,55 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 34
+; Schema: 0
+               OpCapability Shader
+               OpCapability Int64
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_gpu_shader_int64"
+               OpName %main "main"
+               OpName %packed "packed"
+               OpName %unpacked "unpacked"
+               OpName %FragColor "FragColor"
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %ulong = OpTypeInt 64 0
+%_ptr_Function_ulong = OpTypePointer Function %ulong
+       %uint = OpTypeInt 32 0
+     %v2uint = OpTypeVector %uint 2
+    %uint_18 = OpConstant %uint 18
+    %uint_52 = OpConstant %uint 52
+         %13 = OpConstantComposite %v2uint %uint_18 %uint_52
+%_ptr_Function_v2uint = OpTypePointer Function %v2uint
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+     %uint_0 = OpConstant %uint 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+     %uint_1 = OpConstant %uint 1
+    %float_1 = OpConstant %float 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+     %packed = OpVariable %_ptr_Function_ulong Function
+   %unpacked = OpVariable %_ptr_Function_v2uint Function
+         %14 = OpBitcast %ulong %13
+               OpStore %packed %14
+         %17 = OpLoad %ulong %packed
+         %18 = OpBitcast %v2uint %17
+               OpStore %unpacked %18
+         %25 = OpAccessChain %_ptr_Function_uint %unpacked %uint_0
+         %26 = OpLoad %uint %25
+         %27 = OpConvertUToF %float %26
+         %29 = OpAccessChain %_ptr_Function_uint %unpacked %uint_1
+         %30 = OpLoad %uint %29
+         %31 = OpConvertUToF %float %30
+         %33 = OpCompositeConstruct %v4float %27 %31 %float_1 %float_1
+               OpStore %FragColor %33
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag b/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
index ae7a972d7b2..e7e6f37ea27 100644
--- a/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
@@ -5,6 +5,7 @@
 ; Schema: 0
                OpCapability Shader
                OpCapability StorageInputOutput16
+               OpCapability Float16
                OpExtension "SPV_KHR_16bit_storage"
           %1 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical GLSL450
diff --git a/shaders-hlsl/comp/access-chain-load-composite.comp b/shaders-hlsl/comp/access-chain-load-composite.comp
new file mode 100644
index 00000000000..69cc7a13be2
--- /dev/null
+++ b/shaders-hlsl/comp/access-chain-load-composite.comp
@@ -0,0 +1,35 @@
+#version 450
+layout(local_size_x = 1) in;
+
+struct Baz
+{
+	float c;
+};
+
+struct Bar
+{
+	float d[2][4];
+	Baz baz[2];
+};
+
+struct Foo
+{
+	mat2 a;
+	vec2 b;
+	Bar c[5];
+};
+
+layout(row_major, std430, set = 0, binding = 0) buffer SSBO
+{
+	Foo foo;
+	Foo foo2;
+};
+
+void main()
+{
+	Foo f = foo;
+	f.a += 1.0;
+	f.b += 2.0;
+	f.c[3].d[1][1] += 5.0;
+	foo2 = f;
+}
diff --git a/shaders-hlsl/comp/access-chains.force-uav.comp b/shaders-hlsl/comp/access-chains.force-uav.comp
new file mode 100644
index 00000000000..639f3cac155
--- /dev/null
+++ b/shaders-hlsl/comp/access-chains.force-uav.comp
@@ -0,0 +1,24 @@
+#version 310 es
+layout(local_size_x = 1) in;
+
+// TODO: Read structs, matrices and arrays.
+
+layout(std430, binding = 0) readonly buffer SSBO
+{
+	vec4 a[3][2][4];
+	float b[3][2][4];
+	vec4 unsized[];
+} ro;
+
+layout(std430, binding = 1) writeonly buffer SSBO1
+{
+	vec4 c[3][2][4];
+	float d[3][2][4];
+	vec4 unsized[];
+} wo;
+
+void main()
+{
+	wo.c[2][gl_GlobalInvocationID.x][1] = ro.a[1][gl_GlobalInvocationID.x][2];
+	wo.unsized[gl_GlobalInvocationID.x] = ro.unsized[gl_GlobalInvocationID.x];
+}
diff --git a/shaders-hlsl/comp/image.nonwritable-uav-texture.comp b/shaders-hlsl/comp/image.nonwritable-uav-texture.comp
new file mode 100644
index 00000000000..1d3c8b4c65d
--- /dev/null
+++ b/shaders-hlsl/comp/image.nonwritable-uav-texture.comp
@@ -0,0 +1,77 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(r32f, binding = 0) uniform readonly image2D uImageInF;
+layout(r32f, binding = 1) uniform writeonly image2D uImageOutF;
+layout(r32i, binding = 2) uniform readonly iimage2D uImageInI;
+layout(r32i, binding = 3) uniform writeonly iimage2D uImageOutI;
+layout(r32ui, binding = 4) uniform readonly uimage2D uImageInU;
+layout(r32ui, binding = 5) uniform writeonly uimage2D uImageOutU;
+layout(r32f, binding = 6) uniform readonly imageBuffer uImageInBuffer;
+layout(r32f, binding = 7) uniform writeonly imageBuffer uImageOutBuffer;
+
+layout(rg32f, binding = 8) uniform readonly image2D uImageInF2;
+layout(rg32f, binding = 9) uniform writeonly image2D uImageOutF2;
+layout(rg32i, binding = 10) uniform readonly iimage2D uImageInI2;
+layout(rg32i, binding = 11) uniform writeonly iimage2D uImageOutI2;
+layout(rg32ui, binding = 12) uniform readonly uimage2D uImageInU2;
+layout(rg32ui, binding = 13) uniform writeonly uimage2D uImageOutU2;
+layout(rg32f, binding = 14) uniform readonly imageBuffer uImageInBuffer2;
+layout(rg32f, binding = 15) uniform writeonly imageBuffer uImageOutBuffer2;
+
+layout(rgba32f, binding = 16) uniform readonly image2D uImageInF4;
+layout(rgba32f, binding = 17) uniform writeonly image2D uImageOutF4;
+layout(rgba32i, binding = 18) uniform readonly iimage2D uImageInI4;
+layout(rgba32i, binding = 19) uniform writeonly iimage2D uImageOutI4;
+layout(rgba32ui, binding = 20) uniform readonly uimage2D uImageInU4;
+layout(rgba32ui, binding = 21) uniform writeonly uimage2D uImageOutU4;
+layout(rgba32f, binding = 22) uniform readonly imageBuffer uImageInBuffer4;
+layout(rgba32f, binding = 23) uniform writeonly imageBuffer uImageOutBuffer4;
+
+layout(binding = 24) uniform writeonly image2D uImageNoFmtF;
+layout(binding = 25) uniform writeonly uimage2D uImageNoFmtU;
+layout(binding = 26) uniform writeonly iimage2D uImageNoFmtI;
+
+void main()
+{
+    vec4 f = imageLoad(uImageInF, ivec2(gl_GlobalInvocationID.xy));
+    imageStore(uImageOutF, ivec2(gl_GlobalInvocationID.xy), f);
+
+    ivec4 i = imageLoad(uImageInI, ivec2(gl_GlobalInvocationID.xy));
+    imageStore(uImageOutI, ivec2(gl_GlobalInvocationID.xy), i);
+
+    uvec4 u = imageLoad(uImageInU, ivec2(gl_GlobalInvocationID.xy));
+    imageStore(uImageOutU, ivec2(gl_GlobalInvocationID.xy), u);
+
+	vec4 b = imageLoad(uImageInBuffer, int(gl_GlobalInvocationID.x));
+	imageStore(uImageOutBuffer, int(gl_GlobalInvocationID.x), b);
+
+    vec4 f2 = imageLoad(uImageInF2, ivec2(gl_GlobalInvocationID.xy));
+    imageStore(uImageOutF2, ivec2(gl_GlobalInvocationID.xy), f2);
+
+    ivec4 i2 = imageLoad(uImageInI2, ivec2(gl_GlobalInvocationID.xy));
+    imageStore(uImageOutI2, ivec2(gl_GlobalInvocationID.xy), i2);
+
+    uvec4 u2 = imageLoad(uImageInU2, ivec2(gl_GlobalInvocationID.xy));
+    imageStore(uImageOutU2, ivec2(gl_GlobalInvocationID.xy), u2);
+
+	vec4 b2 = imageLoad(uImageInBuffer2, int(gl_GlobalInvocationID.x));
+	imageStore(uImageOutBuffer2, int(gl_GlobalInvocationID.x), b2);
+
+    vec4 f4 = imageLoad(uImageInF4, ivec2(gl_GlobalInvocationID.xy));
+    imageStore(uImageOutF4, ivec2(gl_GlobalInvocationID.xy), f4);
+
+    ivec4 i4 = imageLoad(uImageInI4, ivec2(gl_GlobalInvocationID.xy));
+    imageStore(uImageOutI4, ivec2(gl_GlobalInvocationID.xy), i4);
+
+    uvec4 u4 = imageLoad(uImageInU4, ivec2(gl_GlobalInvocationID.xy));
+    imageStore(uImageOutU4, ivec2(gl_GlobalInvocationID.xy), u4);
+
+	vec4 b4 = imageLoad(uImageInBuffer4, int(gl_GlobalInvocationID.x));
+	imageStore(uImageOutBuffer4, int(gl_GlobalInvocationID.x), b4);
+
+	imageStore(uImageNoFmtF, ivec2(gl_GlobalInvocationID.xy), b2);
+	imageStore(uImageNoFmtU, ivec2(gl_GlobalInvocationID.xy), u4);
+	imageStore(uImageNoFmtI, ivec2(gl_GlobalInvocationID.xy), i4);
+}
+
diff --git a/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp b/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp
new file mode 100644
index 00000000000..3a2a8d0d2dd
--- /dev/null
+++ b/shaders-hlsl/comp/rayquery.nofxc.fxconly.comp
@@ -0,0 +1,213 @@
+#version 460
+#extension GL_EXT_ray_query : enable
+#extension GL_EXT_ray_flags_primitive_culling : enable
+
+layout(primitive_culling);
+struct Ray
+{
+    vec3 pos;
+    float tmin;
+    vec3 dir;
+    float tmax;
+};
+
+layout(std430, set = 0, binding = 0) buffer Log
+{
+    uint x;
+    uint y;
+};
+
+layout(binding = 1, set = 0) uniform accelerationStructureEXT rtas;
+layout(std430, set = 0, binding = 2) buffer Rays { Ray rays[]; };
+
+void doSomething()
+{
+    x = 0;
+    y = 0;
+}
+
+Ray makeRayDesc()
+{
+    Ray ray;
+    ray.pos= vec3(0,0,0);
+    ray.dir = vec3(1,0,0);
+    ray.tmin = 0.0f;
+    ray.tmax = 9999.0;
+    return ray;
+}
+
+void main()
+{
+    Ray ray = makeRayDesc();
+    rayQueryEXT rayQuery;
+    rayQueryInitializeEXT(rayQuery, rtas, gl_RayFlagsNoneEXT, 0xFF, ray.pos, ray.tmin, ray.dir, ray.tmax);
+
+    mat4x3 _mat4x3;
+    mat3x4 _mat3x4;
+
+    while (rayQueryProceedEXT(rayQuery))
+    {
+        uint candidateType = rayQueryGetIntersectionTypeEXT(rayQuery, false);
+        switch(candidateType)
+        {
+            case gl_RayQueryCandidateIntersectionTriangleEXT:
+
+                rayQueryTerminateEXT(rayQuery);
+                _mat4x3 = rayQueryGetIntersectionObjectToWorldEXT(rayQuery, false);
+                _mat3x4 = transpose(_mat4x3);
+                rayQueryConfirmIntersectionEXT(rayQuery);
+
+                if (rayQueryGetIntersectionFrontFaceEXT(rayQuery, true))
+                {
+                    doSomething();
+                }
+
+                if (rayQueryGetIntersectionBarycentricsEXT(rayQuery, true).x == 0)
+                {
+                    doSomething();
+                }
+
+                if (rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true) > 0)
+                {
+                    doSomething();
+                }
+
+                if (rayQueryGetIntersectionInstanceIdEXT(rayQuery, true) > 0)
+                {
+                    doSomething();
+                }
+
+                if (rayQueryGetIntersectionObjectRayDirectionEXT(rayQuery, true).x > 0)
+                {
+                    doSomething();
+                }
+
+                if (rayQueryGetIntersectionObjectRayOriginEXT(rayQuery, true).x > 0)
+                {
+                    doSomething();
+                }
+
+                if (rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, true) > 0)
+                {
+                    doSomething();
+                }
+
+                if (rayQueryGetIntersectionTEXT(rayQuery, true) > 0.f)
+                {
+                    doSomething();
+                }
+
+                if (rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(rayQuery, true) > 0)
+                {
+                    doSomething();
+                }
+                break;
+
+            case gl_RayQueryCandidateIntersectionAABBEXT:
+            {
+                _mat4x3 = rayQueryGetIntersectionObjectToWorldEXT(rayQuery, false);
+                _mat3x4 = transpose(_mat4x3);
+                if (rayQueryGetIntersectionCandidateAABBOpaqueEXT(rayQuery))
+                {
+                    doSomething();
+                }
+
+                float t = 0.5;
+                rayQueryGenerateIntersectionEXT(rayQuery, t);
+                rayQueryTerminateEXT(rayQuery);
+                break;
+            }
+        }
+    }
+
+    if(_mat3x4[0][0] == _mat4x3[0][0])
+    {
+        doSomething();
+    }
+
+    uint committedStatus = rayQueryGetIntersectionTypeEXT(rayQuery, true);
+
+    switch(committedStatus)
+    {
+        case gl_RayQueryCommittedIntersectionNoneEXT :
+            _mat4x3 = rayQueryGetIntersectionWorldToObjectEXT(rayQuery, false);
+            _mat3x4 = transpose(_mat4x3);
+            break;
+
+        case gl_RayQueryCommittedIntersectionTriangleEXT :
+            _mat4x3 = rayQueryGetIntersectionWorldToObjectEXT(rayQuery, true);
+            _mat3x4 = transpose(_mat4x3);
+
+            if (rayQueryGetIntersectionFrontFaceEXT(rayQuery, true))
+            {
+                doSomething();
+            }
+
+            if (rayQueryGetIntersectionBarycentricsEXT(rayQuery, true).y == 0)
+            {
+                doSomething();
+            }
+            break;
+
+        case gl_RayQueryCommittedIntersectionGeneratedEXT :
+
+            if(rayQueryGetIntersectionGeometryIndexEXT(rayQuery, true) > 0)
+            {
+                doSomething();
+            }
+
+            if(rayQueryGetIntersectionInstanceIdEXT(rayQuery, true) > 0)
+            {
+                doSomething();
+            }
+
+            if(rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true) > 0)
+            {
+                doSomething();
+            }
+
+            if(rayQueryGetIntersectionObjectRayDirectionEXT(rayQuery, true).z > 0)
+            {
+                doSomething();
+            }
+
+            if(rayQueryGetIntersectionObjectRayOriginEXT(rayQuery, true).x > 0)
+            {
+                doSomething();
+            }
+
+            if(rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, true) > 0)
+            {
+                doSomething();
+            }
+
+            if(rayQueryGetIntersectionTEXT(rayQuery, true) > 0.f)
+            {
+                doSomething();
+            }
+            break;
+    }
+
+    if (_mat3x4[0][0] == _mat4x3[0][0])
+    {
+        doSomething();
+    }
+
+    if (rayQueryGetRayFlagsEXT(rayQuery) > gl_RayFlagsSkipTrianglesEXT)
+    {
+        doSomething();
+    }
+
+    if (rayQueryGetRayTMinEXT(rayQuery) > 0.0)
+    {
+        doSomething();
+    }
+
+    vec3 o = rayQueryGetWorldRayOriginEXT(rayQuery);
+    vec3 d = rayQueryGetWorldRayDirectionEXT(rayQuery);
+
+    if (o.x == d.z)
+    {
+        doSomething();
+    }
+}
diff --git a/shaders-hlsl/flatten/array.flatten.vert b/shaders-hlsl/flatten/array.flatten.vert
new file mode 100644
index 00000000000..fa6da076c96
--- /dev/null
+++ b/shaders-hlsl/flatten/array.flatten.vert
@@ -0,0 +1,19 @@
+#version 310 es
+
+layout(std140) uniform UBO
+{
+    vec4 A4[5][4][2];
+    mat4 uMVP;
+    vec4 A1[2];
+    vec4 A2[2][3];
+    float A3[3];
+    vec4 Offset;
+};
+layout(location = 0) in vec4 aVertex;
+
+void main()
+{
+    vec4 a4 = A4[2][3][1]; // 2 * (4 * 2) + 3 * 2 + 1 = 16 + 6 + 1 = 23.
+    vec4 offset = A2[1][1] + A1[1] + A3[2];
+    gl_Position = uMVP * aVertex + Offset + offset;
+}
diff --git a/shaders-hlsl/flatten/basic.flatten.vert b/shaders-hlsl/flatten/basic.flatten.vert
new file mode 100644
index 00000000000..e60a9067b14
--- /dev/null
+++ b/shaders-hlsl/flatten/basic.flatten.vert
@@ -0,0 +1,16 @@
+#version 310 es
+
+layout(std140) uniform UBO
+{
+    mat4 uMVP;
+};
+
+layout(location = 0) in vec4 aVertex;
+layout(location = 1) in vec3 aNormal;
+layout(location = 0) out vec3 vNormal;
+
+void main()
+{
+    gl_Position = uMVP * aVertex;
+    vNormal = aNormal;
+}
diff --git a/shaders-hlsl/flatten/copy.flatten.vert b/shaders-hlsl/flatten/copy.flatten.vert
new file mode 100644
index 00000000000..4f1b8805e74
--- /dev/null
+++ b/shaders-hlsl/flatten/copy.flatten.vert
@@ -0,0 +1,34 @@
+#version 310 es
+
+struct Light
+{
+    vec3 Position;
+    float Radius;
+
+    vec4 Color;
+};
+
+layout(std140) uniform UBO
+{
+    mat4 uMVP;
+
+    Light lights[4];
+};
+
+layout(location = 0) in vec4 aVertex;
+layout(location = 1) in vec3 aNormal;
+layout(location = 0) out vec4 vColor;
+
+void main()
+{
+    gl_Position = uMVP * aVertex;
+
+    vColor = vec4(0.0);
+
+    for (int i = 0; i < 4; ++i)
+    {
+        Light light = lights[i];
+        vec3 L = aVertex.xyz - light.Position;
+        vColor += dot(aNormal, normalize(L)) * (clamp(1.0 - length(L) / light.Radius, 0.0, 1.0) * lights[i].Color);
+    }
+}
diff --git a/shaders-hlsl/flatten/dynamic.flatten.vert b/shaders-hlsl/flatten/dynamic.flatten.vert
new file mode 100644
index 00000000000..a341d452884
--- /dev/null
+++ b/shaders-hlsl/flatten/dynamic.flatten.vert
@@ -0,0 +1,33 @@
+#version 310 es
+
+struct Light
+{
+    vec3 Position;
+    float Radius;
+
+    vec4 Color;
+};
+
+layout(std140) uniform UBO
+{
+    mat4 uMVP;
+
+    Light lights[4];
+};
+
+layout(location = 0) in vec4 aVertex;
+layout(location = 1) in vec3 aNormal;
+layout(location = 0) out vec4 vColor;
+
+void main()
+{
+    gl_Position = uMVP * aVertex;
+
+    vColor = vec4(0.0);
+
+    for (int i = 0; i < 4; ++i)
+    {
+        vec3 L = aVertex.xyz - lights[i].Position;
+        vColor += dot(aNormal, normalize(L)) * (clamp(1.0 - length(L) / lights[i].Radius, 0.0, 1.0) * lights[i].Color);
+    }
+}
diff --git a/shaders-hlsl/flatten/matrix-conversion.flatten.frag b/shaders-hlsl/flatten/matrix-conversion.flatten.frag
new file mode 100644
index 00000000000..427825c3402
--- /dev/null
+++ b/shaders-hlsl/flatten/matrix-conversion.flatten.frag
@@ -0,0 +1,14 @@
+#version 310 es
+precision mediump float;
+layout(location = 0) out vec3 FragColor;
+layout(location = 0) flat in vec3 vNormal;
+
+layout(binding = 0, std140) uniform UBO
+{
+	mat4 m;
+};
+
+void main()
+{
+	FragColor = mat3(m) * vNormal;
+}
diff --git a/shaders-hlsl/flatten/matrixindex.flatten.vert b/shaders-hlsl/flatten/matrixindex.flatten.vert
new file mode 100644
index 00000000000..0ee78384324
--- /dev/null
+++ b/shaders-hlsl/flatten/matrixindex.flatten.vert
@@ -0,0 +1,25 @@
+#version 310 es
+
+layout(std140) uniform UBO
+{
+    layout(column_major) mat4 M1C;
+    layout(row_major) mat4 M1R;
+    layout(column_major) mat2x4 M2C;
+    layout(row_major) mat2x4 M2R;
+};
+
+layout(location = 0) out vec4 oA;
+layout(location = 1) out vec4 oB;
+layout(location = 2) out vec4 oC;
+layout(location = 3) out vec4 oD;
+layout(location = 4) out vec4 oE;
+
+void main()
+{
+	gl_Position = vec4(0.0);
+	oA = M1C[1];
+	oB = M1R[1];
+	oC = M2C[1];
+	oD = M2R[0];
+	oE = vec4(M1C[1][2], M1R[1][2], M2C[1][2], M2R[1][2]);
+}
diff --git a/shaders-hlsl/flatten/multiindex.flatten.vert b/shaders-hlsl/flatten/multiindex.flatten.vert
new file mode 100644
index 00000000000..0b471d86e09
--- /dev/null
+++ b/shaders-hlsl/flatten/multiindex.flatten.vert
@@ -0,0 +1,13 @@
+#version 310 es
+
+layout(std140) uniform UBO
+{
+    vec4 Data[3][5];
+};
+
+layout(location = 0) in ivec2 aIndex;
+
+void main()
+{
+    gl_Position = Data[aIndex.x][aIndex.y];
+}
diff --git a/shaders-hlsl/flatten/push-constant.flatten.vert b/shaders-hlsl/flatten/push-constant.flatten.vert
new file mode 100644
index 00000000000..c7b1b42e1b7
--- /dev/null
+++ b/shaders-hlsl/flatten/push-constant.flatten.vert
@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(push_constant, std430) uniform PushMe
+{
+   mat4 MVP;
+   mat2 Rot; // The MatrixStride will be 8 here.
+   float Arr[4];
+} registers;
+
+layout(location = 0) in vec2 Rot;
+layout(location = 1) in vec4 Pos;
+layout(location = 0) out vec2 vRot;
+void main()
+{
+   gl_Position = registers.MVP * Pos;
+   vRot = registers.Rot * Rot + registers.Arr[2]; // Constant access should work even if array stride is just 4 here.
+}
diff --git a/shaders-hlsl/flatten/rowmajor.flatten.vert b/shaders-hlsl/flatten/rowmajor.flatten.vert
new file mode 100644
index 00000000000..88c468c8f25
--- /dev/null
+++ b/shaders-hlsl/flatten/rowmajor.flatten.vert
@@ -0,0 +1,16 @@
+#version 310 es
+
+layout(std140) uniform UBO
+{
+    layout(column_major) mat4 uMVPR;
+    layout(row_major) mat4 uMVPC;
+    layout(row_major) mat2x4 uMVP;
+};
+
+layout(location = 0) in vec4 aVertex;
+
+void main()
+{
+	vec2 v = aVertex * uMVP;
+	gl_Position = uMVPR * aVertex + uMVPC * aVertex;
+}
diff --git a/shaders-hlsl/flatten/struct.flatten.vert b/shaders-hlsl/flatten/struct.flatten.vert
new file mode 100644
index 00000000000..936bb41b852
--- /dev/null
+++ b/shaders-hlsl/flatten/struct.flatten.vert
@@ -0,0 +1,30 @@
+#version 310 es
+
+struct Light
+{
+    vec3 Position;
+    float Radius;
+
+    vec4 Color;
+};
+
+layout(std140) uniform UBO
+{
+    mat4 uMVP;
+
+    Light light;
+};
+
+layout(location = 0) in vec4 aVertex;
+layout(location = 1) in vec3 aNormal;
+layout(location = 0) out vec4 vColor;
+
+void main()
+{
+    gl_Position = uMVP * aVertex;
+
+    vColor = vec4(0.0);
+
+    vec3 L = aVertex.xyz - light.Position;
+    vColor += dot(aNormal, normalize(L)) * (clamp(1.0 - length(L) / light.Radius, 0.0, 1.0) * light.Color);
+}
diff --git a/shaders-hlsl/flatten/struct.rowmajor.flatten.vert b/shaders-hlsl/flatten/struct.rowmajor.flatten.vert
new file mode 100644
index 00000000000..231389b8f49
--- /dev/null
+++ b/shaders-hlsl/flatten/struct.rowmajor.flatten.vert
@@ -0,0 +1,26 @@
+#version 310 es
+
+struct Foo
+{
+   mat3x4 MVP0;
+   mat3x4 MVP1;
+};
+
+layout(std140, binding = 0) uniform UBO
+{
+   layout(row_major) Foo foo;
+};
+
+layout(location = 0) in vec4 v0;
+layout(location = 1) in vec4 v1;
+layout(location = 0) out vec3 V0;
+layout(location = 1) out vec3 V1;
+
+void main()
+{
+   Foo f = foo;
+   vec3 a = v0 * f.MVP0;
+   vec3 b = v1 * f.MVP1;
+   V0 = a;
+   V1 = b;
+}
diff --git a/shaders-hlsl/flatten/swizzle.flatten.vert b/shaders-hlsl/flatten/swizzle.flatten.vert
new file mode 100644
index 00000000000..fafff7734eb
--- /dev/null
+++ b/shaders-hlsl/flatten/swizzle.flatten.vert
@@ -0,0 +1,47 @@
+#version 310 es
+
+// comments note the 16b alignment boundaries (see GL spec 7.6.2.2 Standard Uniform Block Layout)
+layout(std140, binding = 0) uniform UBO
+{
+    // 16b boundary
+    vec4 A;
+    // 16b boundary
+    vec2 B0;
+    vec2 B1;
+    // 16b boundary
+    float C0;
+    // 16b boundary (vec3 is aligned to 16b)
+    vec3 C1;
+    // 16b boundary
+    vec3 D0;
+    float D1;
+    // 16b boundary
+    float E0;
+    float E1;
+    float E2;
+    float E3;
+    // 16b boundary
+    float F0;
+    vec2 F1;
+    // 16b boundary (vec2 before us is aligned to 8b)
+    float F2;
+};
+
+layout(location = 0) out vec4 oA;
+layout(location = 1) out vec4 oB;
+layout(location = 2) out vec4 oC;
+layout(location = 3) out vec4 oD;
+layout(location = 4) out vec4 oE;
+layout(location = 5) out vec4 oF;
+
+void main()
+{
+    gl_Position = vec4(0.0);
+
+    oA = A;
+    oB = vec4(B0, B1);
+    oC = vec4(C0, C1);
+    oD = vec4(D0, D1);
+    oE = vec4(E0, E1, E2, E3);
+    oF = vec4(F0, F1, F2);
+}
diff --git a/shaders-hlsl/flatten/types.flatten.frag b/shaders-hlsl/flatten/types.flatten.frag
new file mode 100644
index 00000000000..faab5b7e058
--- /dev/null
+++ b/shaders-hlsl/flatten/types.flatten.frag
@@ -0,0 +1,27 @@
+#version 310 es
+precision mediump float;
+
+layout(std140, binding = 0) uniform UBO0
+{
+   vec4 a;
+   vec4 b;
+};
+
+layout(std140, binding = 0) uniform UBO1
+{
+   ivec4 c;
+   ivec4 d;
+};
+
+layout(std140, binding = 0) uniform UBO2
+{
+   uvec4 e;
+   uvec4 f;
+};
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+   FragColor = vec4(c) + vec4(d) + vec4(e) + vec4(f) + a + b; 
+}
diff --git a/shaders-hlsl/frag/demote-to-helper.frag b/shaders-hlsl/frag/demote-to-helper.frag
new file mode 100644
index 00000000000..bdfef6f9b43
--- /dev/null
+++ b/shaders-hlsl/frag/demote-to-helper.frag
@@ -0,0 +1,7 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+void main()
+{
+	demote;
+}
diff --git a/shaders-hlsl/frag/image-query-uav.frag b/shaders-hlsl/frag/image-query-uav.frag
new file mode 100644
index 00000000000..25103e6e95f
--- /dev/null
+++ b/shaders-hlsl/frag/image-query-uav.frag
@@ -0,0 +1,18 @@
+#version 450
+
+layout(rgba32f, binding = 0) uniform writeonly image1D uImage1D;
+layout(rg32f, binding = 1) uniform writeonly image2D uImage2D;
+layout(r32f, binding = 2) uniform readonly image2DArray uImage2DArray;
+layout(rgba8, binding = 3) uniform writeonly image3D uImage3D;
+layout(rgba8_snorm, binding = 6) uniform writeonly imageBuffer uImageBuffer;
+
+// There is no RWTexture2DMS.
+
+void main()
+{
+	int a = imageSize(uImage1D);
+	ivec2 b = imageSize(uImage2D);
+	ivec3 c = imageSize(uImage2DArray);
+	ivec3 d = imageSize(uImage3D);
+	int e = imageSize(uImageBuffer);
+}
diff --git a/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag b/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag
new file mode 100644
index 00000000000..25103e6e95f
--- /dev/null
+++ b/shaders-hlsl/frag/image-query-uav.nonwritable-uav-texture.frag
@@ -0,0 +1,18 @@
+#version 450
+
+layout(rgba32f, binding = 0) uniform writeonly image1D uImage1D;
+layout(rg32f, binding = 1) uniform writeonly image2D uImage2D;
+layout(r32f, binding = 2) uniform readonly image2DArray uImage2DArray;
+layout(rgba8, binding = 3) uniform writeonly image3D uImage3D;
+layout(rgba8_snorm, binding = 6) uniform writeonly imageBuffer uImageBuffer;
+
+// There is no RWTexture2DMS.
+
+void main()
+{
+	int a = imageSize(uImage1D);
+	ivec2 b = imageSize(uImage2D);
+	ivec3 c = imageSize(uImage2DArray);
+	ivec3 d = imageSize(uImage3D);
+	int e = imageSize(uImageBuffer);
+}
diff --git a/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag b/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag
new file mode 100644
index 00000000000..30b957b26b4
--- /dev/null
+++ b/shaders-hlsl/frag/legacy-tex-modifiers.sm30.frag
@@ -0,0 +1,13 @@
+#version 450
+
+layout(location = 0) in vec2 vUV;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 0) uniform sampler2D uSampler;
+
+void main()
+{
+	FragColor = textureProj(uSampler, vec3(vUV, 5.0));
+	FragColor += texture(uSampler, vUV, 3.0);
+	FragColor += textureLod(uSampler, vUV, 2.0);
+	FragColor += textureGrad(uSampler, vUV, vec2(4.0), vec2(5.0));
+}
diff --git a/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag b/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag
index 0aadd14883c..35373279fc1 100644
--- a/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag
+++ b/shaders-hlsl/frag/nonuniform-qualifier.nonuniformresource.sm51.frag
@@ -1,28 +1,52 @@
 #version 450
 #extension GL_EXT_nonuniform_qualifier : require
+#extension GL_EXT_samplerless_texture_functions : require
 
 layout(set = 0, binding = 0) uniform texture2D uSamplers[];
-layout(set = 1, binding = 0) uniform sampler2D uCombinedSamplers[];
-layout(set = 2, binding = 0) uniform sampler uSamps[];
+layout(set = 1, binding = 0) uniform texture2DMS uSamplersMS[];
+layout(set = 2, binding = 4) uniform sampler2D uCombinedSamplers[];
+layout(set = 3, binding = 1) uniform sampler uSamps[];
 layout(location = 0) flat in int vIndex;
 layout(location = 1) in vec2 vUV;
 layout(location = 0) out vec4 FragColor;
 
-layout(set = 3, binding = 0) uniform UBO 
+layout(r32f, set = 7, binding = 5) uniform image2D uImages[];
+layout(r32ui, set = 8, binding = 5) uniform uimage2D uImagesU32[];
+
+layout(set = 9, binding = 2) uniform UBO 
 {
 	vec4 v[64];
 } ubos[];
 
-layout(set = 4, binding = 0) readonly buffer SSBO
+layout(set = 10, binding = 3) buffer SSBO
 {
+	uint counter;
 	vec4 v[];
 } ssbos[];
 
 void main()
 {
 	int i = vIndex;
-	FragColor = texture(sampler2D(uSamplers[nonuniformEXT(i + 10)], uSamps[nonuniformEXT(i + 40)]), vUV);
+	FragColor = texture(nonuniformEXT(sampler2D(uSamplers[i + 10], uSamps[i + 40])), vUV);
 	FragColor = texture(uCombinedSamplers[nonuniformEXT(i + 10)], vUV);
 	FragColor += ubos[nonuniformEXT(i + 20)].v[nonuniformEXT(i + 40)];
 	FragColor += ssbos[nonuniformEXT(i + 50)].v[nonuniformEXT(i + 60)];
+	ssbos[nonuniformEXT(i + 60)].v[nonuniformEXT(i + 70)] = vec4(20.0);
+
+	FragColor = texelFetch(uSamplers[nonuniformEXT(i + 10)], ivec2(vUV), 0);
+	atomicAdd(ssbos[nonuniformEXT(i + 100)].counter, 100u);
+
+	vec2 queried = textureQueryLod(nonuniformEXT(sampler2D(uSamplers[i + 10], uSamps[i + 40])), vUV);
+	queried += textureQueryLod(uCombinedSamplers[nonuniformEXT(i + 10)], vUV);
+	FragColor.xy += queried;
+
+	FragColor.x += float(textureQueryLevels(uSamplers[nonuniformEXT(i + 20)]));
+	FragColor.y += float(textureSamples(uSamplersMS[nonuniformEXT(i + 20)]));
+	FragColor.xy += vec2(textureSize(uSamplers[nonuniformEXT(i + 20)], 0));
+
+	FragColor += imageLoad(uImages[nonuniformEXT(i + 50)], ivec2(vUV));
+	FragColor.xy += vec2(imageSize(uImages[nonuniformEXT(i + 20)]));
+	imageStore(uImages[nonuniformEXT(i + 60)], ivec2(vUV), vec4(50.0));
+
+	imageAtomicAdd(uImagesU32[nonuniformEXT(i + 70)], ivec2(vUV), 40u);
 }
diff --git a/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
new file mode 100644
index 00000000000..ceac8cc50e4
--- /dev/null
+++ b/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
@@ -0,0 +1,36 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2, rgba8) uniform readonly image2D img3;
+layout(binding = 3) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+layout(binding = 4) buffer Buffer2
+{
+	uint quux;
+};
+
+layout(binding = 5, rgba8) uniform writeonly image2D img4;
+layout(binding = 6) buffer Buffer3
+{
+	int baz;
+};
+
+void main()
+{
+	// Deliberately outside the critical section to test usage tracking.
+	baz = 0;
+	imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, quux);
+	endInvocationInterlockARB();
+}
diff --git a/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag b/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag
new file mode 100644
index 00000000000..cd035467be2
--- /dev/null
+++ b/shaders-hlsl/frag/readonly-coherent-ssbo.force-uav.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(set = 0, binding = 0) coherent readonly buffer SSBO
+{
+	vec4 a;
+};
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = a;
+}
diff --git a/shaders-hlsl/frag/readonly-coherent-ssbo.frag b/shaders-hlsl/frag/readonly-coherent-ssbo.frag
new file mode 100644
index 00000000000..cd035467be2
--- /dev/null
+++ b/shaders-hlsl/frag/readonly-coherent-ssbo.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(set = 0, binding = 0) coherent readonly buffer SSBO
+{
+	vec4 a;
+};
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = a;
+}
diff --git a/shaders-hlsl/frag/sample-mask-in-and-out.frag b/shaders-hlsl/frag/sample-mask-in-and-out.frag
new file mode 100644
index 00000000000..75ed3cc1675
--- /dev/null
+++ b/shaders-hlsl/frag/sample-mask-in-and-out.frag
@@ -0,0 +1,9 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = vec4(1.0);
+	gl_SampleMask[0] = gl_SampleMaskIn[0];
+}
diff --git a/shaders-hlsl/frag/sample-mask-in.frag b/shaders-hlsl/frag/sample-mask-in.frag
new file mode 100644
index 00000000000..16031a35573
--- /dev/null
+++ b/shaders-hlsl/frag/sample-mask-in.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	if ((gl_SampleMaskIn[0] & (1 << gl_SampleID)) != 0)
+	{
+		FragColor = vec4(1.0);
+	}
+}
diff --git a/shaders-hlsl/frag/sample-mask-out.frag b/shaders-hlsl/frag/sample-mask-out.frag
new file mode 100644
index 00000000000..c7fb80eba9d
--- /dev/null
+++ b/shaders-hlsl/frag/sample-mask-out.frag
@@ -0,0 +1,9 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = vec4(1.0);
+	gl_SampleMask[0] = 0;
+}
diff --git a/shaders-hlsl/frag/switch-unreachable-break.frag b/shaders-hlsl/frag/switch-unreachable-break.frag
new file mode 100644
index 00000000000..b0421e60ef3
--- /dev/null
+++ b/shaders-hlsl/frag/switch-unreachable-break.frag
@@ -0,0 +1,32 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vInput;
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	int cond;
+	int cond2;
+};
+
+void main()
+{
+	bool frog = false;
+	switch (cond)
+	{
+	case 1:
+		if (cond2 < 50)
+			break;
+		else
+			discard;
+
+		break;
+
+	default:
+		frog = true;
+		break;
+	}
+
+	FragColor = frog ? vec4(10.0) : vec4(20.0);
+}
+
diff --git a/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000000..4f9500fe177
--- /dev/null
+++ b/shaders-hlsl/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
@@ -0,0 +1,74 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(lines, max_vertices = 24, max_primitives = 22) out;
+
+out gl_MeshPerVertexEXT
+{
+	vec4 gl_Position;
+	float gl_PointSize;
+	float gl_ClipDistance[1];
+	float gl_CullDistance[2];
+} gl_MeshVerticesEXT[];
+
+layout(location = 0) out vec4 vOut[];
+layout(location = 1) perprimitiveEXT out vec4 vPrim[];
+
+layout(location = 2) out BlockOut
+{
+	vec4 a;
+	vec4 b;
+} outputs[];
+
+layout(location = 4) perprimitiveEXT out BlockOutPrim
+{
+	vec4 a;
+	vec4 b;
+} prim_outputs[];
+
+shared float shared_float[16];
+
+struct TaskPayload
+{
+	float a;
+	float b;
+	int c;
+};
+
+taskPayloadSharedEXT TaskPayload payload;
+
+void main3()
+{
+	gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0, 1) + gl_LocalInvocationIndex;
+	gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+	gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+	gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+	gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1);
+	gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+}
+
+void main2()
+{
+	SetMeshOutputsEXT(24, 22);
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0);
+	// gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 6.0;
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
+	vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0);
+	outputs[gl_LocalInvocationIndex].a = vec4(5.0);
+	outputs[gl_LocalInvocationIndex].b = vec4(6.0);
+	barrier();
+	if (gl_LocalInvocationIndex < 22)
+	{
+		vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0);
+		prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
+		prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
+		main3();
+	}
+}
+
+void main()
+{
+	main2();
+}
diff --git a/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000000..4d8e3f64944
--- /dev/null
+++ b/shaders-hlsl/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
@@ -0,0 +1,64 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(triangles, max_vertices = 24, max_primitives = 22) out;
+
+out gl_MeshPerVertexEXT
+{
+	vec4 gl_Position;
+	float gl_PointSize;
+	float gl_ClipDistance[1];
+	float gl_CullDistance[2];
+} gl_MeshVerticesEXT[];
+
+layout(location = 0) out vec4 vOut[];
+layout(location = 1) perprimitiveEXT out vec4 vPrim[];
+
+layout(location = 2) out BlockOut
+{
+	vec4 a;
+	vec4 b;
+} outputs[];
+
+layout(location = 4) perprimitiveEXT out BlockOutPrim
+{
+	vec4 a;
+	vec4 b;
+} prim_outputs[];
+
+shared float shared_float[16];
+
+struct TaskPayload
+{
+	float a;
+	float b;
+	int c;
+};
+
+taskPayloadSharedEXT TaskPayload payload;
+
+void main()
+{
+	SetMeshOutputsEXT(24, 22);
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0);
+	// gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[0] = 3.0;
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
+	vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0);
+	outputs[gl_LocalInvocationIndex].a = vec4(5.0);
+	outputs[gl_LocalInvocationIndex].b = vec4(6.0);
+	barrier();
+	if (gl_LocalInvocationIndex < 22)
+	{
+		vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0);
+		prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
+		prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
+		gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0, 1, 2) + gl_LocalInvocationIndex;
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1);
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+	}
+}
diff --git a/shaders-hlsl/vert/invariant.vert b/shaders-hlsl/vert/invariant.vert
new file mode 100644
index 00000000000..239b985da12
--- /dev/null
+++ b/shaders-hlsl/vert/invariant.vert
@@ -0,0 +1,13 @@
+#version 310 es
+
+invariant gl_Position;
+layout(location = 0) invariant out vec4 vColor;
+layout(location = 0) in vec4 vInput0;
+layout(location = 1) in vec4 vInput1;
+layout(location = 2) in vec4 vInput2;
+
+void main()
+{
+	gl_Position = vInput0 + vInput1 * vInput2;
+	vColor = (vInput0 - vInput1) * vInput2;
+}
diff --git a/shaders-hlsl/vert/no-contraction.vert b/shaders-hlsl/vert/no-contraction.vert
new file mode 100644
index 00000000000..206fbf0de80
--- /dev/null
+++ b/shaders-hlsl/vert/no-contraction.vert
@@ -0,0 +1,15 @@
+#version 450
+
+layout(location = 0) in vec4 vA;
+layout(location = 1) in vec4 vB;
+layout(location = 2) in vec4 vC;
+
+void main()
+{
+	precise vec4 mul = vA * vB;
+	precise vec4 add = vA + vB;
+	precise vec4 sub = vA - vB;
+	precise vec4 mad = vA * vB + vC;
+	precise vec4 summed = mul + add + sub + mad;
+	gl_Position = summed;
+}
diff --git a/shaders-msl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/shaders-msl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp
new file mode 100644
index 00000000000..87aee2db54f
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp
@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %T "T"
+               OpMemberName %T 0 "a"
+               OpName %v "v"
+               OpName %T_0 "T"
+               OpMemberName %T_0 0 "b"
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "foo"
+               OpName %_ ""
+               OpName %T_1 "T"
+               OpMemberName %T_1 0 "c"
+               OpName %SSBO2 "SSBO2"
+               OpMemberName %SSBO2 0 "bar"
+               OpName %__0 ""
+               OpMemberDecorate %T_0 0 Offset 0
+               OpDecorate %_runtimearr_T_0 ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpMemberDecorate %T_1 0 Offset 0
+               OpDecorate %_runtimearr_T_1 ArrayStride 16
+               OpMemberDecorate %SSBO2 0 Offset 0
+               OpDecorate %SSBO2 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 1
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+          %T = OpTypeStruct %float
+%_ptr_Function_T = OpTypePointer Function %T
+   %float_40 = OpConstant %float 40
+         %11 = OpConstantComposite %T %float_40
+        %T_0 = OpTypeStruct %float
+%_runtimearr_T_0 = OpTypeRuntimeArray %T_0
+      %SSBO1 = OpTypeStruct %_runtimearr_T_0
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %int_10 = OpConstant %int 10
+%_ptr_Uniform_T_0 = OpTypePointer Uniform %T_0
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %T_1 = OpTypeStruct %float
+%_runtimearr_T_1 = OpTypeRuntimeArray %T_1
+      %SSBO2 = OpTypeStruct %_runtimearr_T_1
+%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2
+        %__0 = OpVariable %_ptr_Uniform_SSBO2 Uniform
+     %int_30 = OpConstant %int 30
+%_ptr_Uniform_T_1 = OpTypePointer Uniform %T_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %v = OpVariable %_ptr_Function_T Function
+               OpStore %v %11
+         %20 = OpLoad %T %v
+         %22 = OpAccessChain %_ptr_Uniform_T_0 %_ %int_0 %int_10
+         %23 = OpCompositeExtract %float %20 0
+         %25 = OpAccessChain %_ptr_Uniform_float %22 %int_0
+               OpStore %25 %23
+         %32 = OpLoad %T %v
+         %34 = OpAccessChain %_ptr_Uniform_T_1 %__0 %int_0 %int_30
+         %35 = OpCompositeExtract %float %32 0
+         %36 = OpAccessChain %_ptr_Uniform_float %34 %int_0
+               OpStore %36 %35
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp b/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp
new file mode 100644
index 00000000000..3f2d141a1f5
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/atomic-load-store.asm.comp
@@ -0,0 +1,48 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 23
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %c "c"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a"
+               OpMemberName %SSBO 1 "b"
+               OpName %_ ""
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+       %SSBO = OpTypeStruct %uint %uint
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+      %int_0 = OpConstant %int 0
+     %v3uint = OpTypeVector %uint 3
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %c = OpVariable %_ptr_Function_uint Function
+         %15 = OpAccessChain %_ptr_Uniform_uint %_ %int_1
+         %16 = OpAtomicLoad %uint %15 %int_1 %int_0
+               OpStore %c %16
+         %18 = OpLoad %uint %c
+         %19 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
+               OpAtomicStore %19 %int_1 %int_0 %18 
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/atomic-min-max-sign.asm.comp b/shaders-msl-no-opt/asm/comp/atomic-min-max-sign.asm.comp
new file mode 100644
index 00000000000..832a2735497
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/atomic-min-max-sign.asm.comp
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a"
+               OpMemberName %SSBO 1 "b"
+               OpName %_ ""
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+        %int = OpTypeInt 32 1
+       %SSBO = OpTypeStruct %uint %int
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+     %uint_1 = OpConstant %uint 1
+     %uint_0 = OpConstant %uint 0
+%uint_4294967295 = OpConstant %uint 4294967295
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+     %int_n3 = OpConstant %int -3
+      %int_4 = OpConstant %int 4
+     %v3uint = OpTypeVector %uint 3
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %13 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
+         %18 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
+         %22 = OpAccessChain %_ptr_Uniform_int %_ %int_1
+         %25 = OpAccessChain %_ptr_Uniform_int %_ %int_1
+         %30 = OpAtomicUMax %uint %13 %uint_1 %uint_0 %uint_1
+         %31 = OpAtomicSMin %uint %13 %uint_1 %uint_0 %uint_1
+         %32 = OpAtomicUMin %uint %18 %uint_1 %uint_0 %uint_4294967295
+         %33 = OpAtomicSMax %uint %18 %uint_1 %uint_0 %uint_4294967295
+         %34 = OpAtomicSMax %int %22 %uint_1 %uint_0 %int_n3
+         %35 = OpAtomicUMin %int %22 %uint_1 %uint_0 %int_n3
+         %36 = OpAtomicSMin %int %25 %uint_1 %uint_0 %int_4
+         %37 = OpAtomicUMax %int %25 %uint_1 %uint_0 %int_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp b/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp
new file mode 100644
index 00000000000..3651a4de527
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/bitcast-fp16-fp32.asm.comp
@@ -0,0 +1,63 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 33
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability StorageBuffer16BitAccess
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types"
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a"
+               OpMemberName %SSBO 1 "b"
+               OpMemberName %SSBO 2 "c"
+               OpMemberName %SSBO 3 "d"
+               OpName %_ ""
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpMemberDecorate %SSBO 2 Offset 8
+               OpMemberDecorate %SSBO 3 Offset 12
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+      %float = OpTypeFloat 32
+       %SSBO = OpTypeStruct %v2half %float %float %v2half
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+       %uint = OpTypeInt 32 0
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+      %int_3 = OpConstant %int 3
+      %int_2 = OpConstant %int 2
+     %v3uint = OpTypeVector %uint 3
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpAccessChain %_ptr_Uniform_v2half %_ %int_0
+         %17 = OpLoad %v2half %16
+         %20 = OpBitcast %float %17
+         %22 = OpAccessChain %_ptr_Uniform_float %_ %int_1
+               OpStore %22 %20
+         %25 = OpAccessChain %_ptr_Uniform_float %_ %int_2
+         %26 = OpLoad %float %25
+         %28 = OpBitcast %v2half %26
+         %29 = OpAccessChain %_ptr_Uniform_v2half %_ %int_3
+               OpStore %29 %28
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/shaders-msl-no-opt/asm/comp/bitfield-signed-operations.asm.comp
new file mode 100644
index 00000000000..435fa322215
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/bitfield-signed-operations.asm.comp
@@ -0,0 +1,97 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 26
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "ints"
+               OpMemberName %SSBO 1 "uints"
+               OpName %_ ""
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 16
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %v4int = OpTypeVector %int 4
+       %uint = OpTypeInt 32 0
+     %v4uint = OpTypeVector %uint 4
+
+	 %int_1 = OpConstant %int 1
+	 %uint_11 = OpConstant %uint 11
+
+       %SSBO = OpTypeStruct %v4int %v4uint
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
+%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %ints_ptr = OpAccessChain %_ptr_Uniform_v4int %_ %int_0
+         %uints_ptr = OpAccessChain %_ptr_Uniform_v4uint %_ %int_1
+         %ints = OpLoad %v4int %ints_ptr
+         %uints = OpLoad %v4uint %uints_ptr
+
+		 %ints_alt = OpVectorShuffle %v4int %ints %ints 3 2 1 0
+		 %uints_alt = OpVectorShuffle %v4uint %uints %uints 3 2 1 0
+
+         %int_to_int_popcount = OpBitCount %v4int %ints
+         %int_to_uint_popcount = OpBitCount %v4uint %ints
+         %uint_to_int_popcount = OpBitCount %v4int %uints
+         %uint_to_uint_popcount = OpBitCount %v4uint %uints
+
+		; BitReverse must have matching types w.r.t. sign, yay.
+         %int_to_int_reverse = OpBitReverse %v4int %ints
+         ;%int_to_uint_reverse = OpBitReverse %v4uint %ints
+         ;%uint_to_int_reverse = OpBitReverse %v4int %uints
+         %uint_to_uint_reverse = OpBitReverse %v4uint %uints
+
+		; Base and Result must match.
+         %int_to_int_sbit = OpBitFieldSExtract %v4int %ints %int_1 %uint_11
+         ;%int_to_uint_sbit = OpBitFieldSExtract %v4uint %ints %offset %count
+         ;%uint_to_int_sbit = OpBitFieldSExtract %v4int %uints %offset %count
+         %uint_to_uint_sbit = OpBitFieldSExtract %v4uint %uints %uint_11 %int_1
+
+		; Base and Result must match.
+         %int_to_int_ubit = OpBitFieldUExtract %v4int %ints %int_1 %uint_11
+         ;%int_to_uint_ubit = OpBitFieldUExtract %v4uint %ints %offset %count
+         ;%uint_to_int_ubit = OpBitFieldUExtract %v4int %uints %offset %count
+         %uint_to_uint_ubit = OpBitFieldUExtract %v4uint %uints %uint_11 %int_1
+
+		 %int_to_int_insert = OpBitFieldInsert %v4int %ints %ints_alt %int_1 %uint_11
+		 %uint_to_uint_insert = OpBitFieldInsert %v4uint %uints %uints_alt %uint_11 %int_1
+
+               OpStore %ints_ptr %int_to_int_popcount
+               OpStore %uints_ptr %int_to_uint_popcount
+               OpStore %ints_ptr %uint_to_int_popcount
+               OpStore %uints_ptr %uint_to_uint_popcount
+
+               OpStore %ints_ptr %int_to_int_reverse
+               ;OpStore %uints_ptr %int_to_uint_reverse
+               ;OpStore %ints_ptr %uint_to_int_reverse
+               OpStore %uints_ptr %uint_to_uint_reverse
+
+               OpStore %ints_ptr %int_to_int_sbit
+               ;OpStore %uints_ptr %int_to_uint_sbit
+               ;OpStore %ints_ptr %uint_to_int_sbit
+               OpStore %uints_ptr %uint_to_uint_sbit
+
+               OpStore %ints_ptr %int_to_int_ubit
+               ;OpStore %uints_ptr %int_to_uint_ubit
+               ;OpStore %ints_ptr %uint_to_int_ubit
+               OpStore %uints_ptr %uint_to_uint_ubit
+
+			   OpStore %ints_ptr %int_to_int_insert
+			   OpStore %uints_ptr %uint_to_uint_insert
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/bitscan.asm.comp b/shaders-msl-no-opt/asm/comp/bitscan.asm.comp
new file mode 100644
index 00000000000..e3b785cd52b
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/bitscan.asm.comp
@@ -0,0 +1,72 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 35
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "u"
+               OpMemberName %SSBO 1 "i"
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 16
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %ivec4 = OpTypeVector %int 4
+       %uint = OpTypeInt 32 0
+      %uvec4 = OpTypeVector %uint 4
+       %SSBO = OpTypeStruct %uvec4 %ivec4
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_uvec4 = OpTypePointer Uniform %uvec4
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_ivec4 = OpTypePointer Uniform %ivec4
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %uptr = OpAccessChain %_ptr_Uniform_uvec4 %_ %int_0
+         %iptr = OpAccessChain %_ptr_Uniform_ivec4 %_ %int_1
+         %uvalue = OpLoad %uvec4 %uptr
+         %ivalue = OpLoad %ivec4 %iptr
+
+         %lsb_uint_to_uint = OpExtInst %uvec4 %1 FindILsb %uvalue
+         %lsb_uint_to_int = OpExtInst %ivec4 %1 FindILsb %uvalue
+         %lsb_int_to_uint = OpExtInst %uvec4 %1 FindILsb %ivalue
+         %lsb_int_to_int = OpExtInst %ivec4 %1 FindILsb %ivalue
+
+         %umsb_uint_to_uint = OpExtInst %uvec4 %1 FindUMsb %uvalue
+         %umsb_uint_to_int = OpExtInst %ivec4 %1 FindUMsb %uvalue
+         %umsb_int_to_uint = OpExtInst %uvec4 %1 FindUMsb %ivalue
+         %umsb_int_to_int = OpExtInst %ivec4 %1 FindUMsb %ivalue
+
+         %smsb_uint_to_uint = OpExtInst %uvec4 %1 FindSMsb %uvalue
+         %smsb_uint_to_int = OpExtInst %ivec4 %1 FindSMsb %uvalue
+         %smsb_int_to_uint = OpExtInst %uvec4 %1 FindSMsb %ivalue
+         %smsb_int_to_int = OpExtInst %ivec4 %1 FindSMsb %ivalue
+
+	OpStore %uptr %lsb_uint_to_uint
+	OpStore %iptr %lsb_uint_to_int
+	OpStore %uptr %lsb_int_to_uint
+	OpStore %iptr %lsb_int_to_int
+
+	OpStore %uptr %umsb_uint_to_uint
+	OpStore %iptr %umsb_uint_to_int
+	OpStore %uptr %umsb_int_to_uint
+	OpStore %iptr %umsb_int_to_int
+
+	OpStore %uptr %smsb_uint_to_uint
+	OpStore %iptr %smsb_uint_to_int
+	OpStore %uptr %smsb_int_to_uint
+	OpStore %iptr %smsb_int_to_int
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp b/shaders-msl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp
new file mode 100644
index 00000000000..37ff035fa6d
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp
@@ -0,0 +1,85 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google spiregg; 0
+; Bound: 40
+; Schema: 0
+               OpCapability Shader
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+               OpExtension "SPV_GOOGLE_user_type"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %csMainClear "main" %gl_GlobalInvocationID
+               OpExecutionMode %csMainClear LocalSize 64 1 1
+               OpSource HLSL 600
+               OpName %type_CommonConstants "type.CommonConstants"
+               OpMemberName %type_CommonConstants 0 "g_count"
+               OpMemberName %type_CommonConstants 1 "g_padding4"
+               OpName %CommonConstants "CommonConstants"
+               OpName %type_RWStructuredBuffer_MyStruct "type.RWStructuredBuffer.MyStruct"
+               OpName %MyStruct "MyStruct"
+               OpMemberName %MyStruct 0 "m_coefficients"
+               OpName %g_data "g_data"
+               OpName %csMainClear "csMainClear"
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorateString %gl_GlobalInvocationID UserSemantic "SV_DispatchThreadID"
+               OpDecorate %CommonConstants DescriptorSet 0
+               OpDecorate %CommonConstants Binding 0
+               OpDecorate %g_data DescriptorSet 0
+               OpDecorate %g_data Binding 1
+               OpMemberDecorate %type_CommonConstants 0 Offset 0
+               OpMemberDecorate %type_CommonConstants 1 Offset 4
+               OpDecorate %type_CommonConstants Block
+               OpDecorateString %CommonConstants UserTypeGOOGLE "cbuffer"
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %MyStruct 0 Offset 0
+               OpDecorate %_runtimearr_MyStruct ArrayStride 64
+               OpMemberDecorate %type_RWStructuredBuffer_MyStruct 0 Offset 0
+               OpDecorate %type_RWStructuredBuffer_MyStruct BufferBlock
+               OpDecorateString %g_data UserTypeGOOGLE "rwstructuredbuffer"
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+     %v3uint = OpTypeVector %uint 3
+%type_CommonConstants = OpTypeStruct %uint %v3uint
+%_ptr_Uniform_type_CommonConstants = OpTypePointer Uniform %type_CommonConstants
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+   %MyStruct = OpTypeStruct %_arr_v4float_uint_4
+%_runtimearr_MyStruct = OpTypeRuntimeArray %MyStruct
+%type_RWStructuredBuffer_MyStruct = OpTypeStruct %_runtimearr_MyStruct
+%_ptr_Uniform_type_RWStructuredBuffer_MyStruct = OpTypePointer Uniform %type_RWStructuredBuffer_MyStruct
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+       %void = OpTypeVoid
+         %21 = OpTypeFunction %void
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+       %bool = OpTypeBool
+%_ptr_Uniform_MyStruct = OpTypePointer Uniform %MyStruct
+%CommonConstants = OpVariable %_ptr_Uniform_type_CommonConstants Uniform
+     %g_data = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_MyStruct Uniform
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+         %26 = OpConstantNull %v4float
+         %27 = OpConstantComposite %_arr_v4float_uint_4 %26 %26 %26 %26
+         %28 = OpConstantComposite %MyStruct %27
+%csMainClear = OpFunction %void None %21
+         %29 = OpLabel
+         %30 = OpLoad %v3uint %gl_GlobalInvocationID
+               OpSelectionMerge %31 None
+               OpSwitch %uint_0 %32
+         %32 = OpLabel
+         %33 = OpCompositeExtract %uint %30 0
+         %34 = OpAccessChain %_ptr_Uniform_uint %CommonConstants %int_0
+         %35 = OpLoad %uint %34
+         %36 = OpUGreaterThanEqual %bool %33 %35
+               OpSelectionMerge %37 DontFlatten
+               OpBranchConditional %36 %38 %37
+         %38 = OpLabel
+               OpBranch %31
+         %37 = OpLabel
+         %39 = OpAccessChain %_ptr_Uniform_MyStruct %g_data %int_0 %33
+               OpStore %39 %28
+               OpBranch %31
+         %31 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/block-like-array-type-construct.asm.comp b/shaders-msl-no-opt/asm/comp/block-like-array-type-construct.asm.comp
new file mode 100644
index 00000000000..8aaa9500afb
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/block-like-array-type-construct.asm.comp
@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 32
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foo "foo"
+               OpName %foo2 "foo2"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a"
+               OpMemberName %SSBO 1 "b"
+               OpName %_ ""
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+			   OpDecorate %_arr_float_uint_4 ArrayStride 4
+			   OpDecorate %struct_arr ArrayStride 32
+			   OpMemberDecorate %struct 0 Offset 0
+			   OpMemberDecorate %struct 1 Offset 16
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_4 = OpConstant %uint 4
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Private__arr_float_uint_4 = OpTypePointer Private %_arr_float_uint_4
+        %foo = OpVariable %_ptr_Private__arr_float_uint_4 Private
+        %foo2 = OpVariable %_ptr_Private__arr_float_uint_4 Private
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+	%struct = OpTypeStruct %_arr_float_uint_4 %_arr_float_uint_4
+	%struct_arr = OpTypeArray %struct %uint_2
+	%ptr_struct = OpTypePointer Function %struct
+%_ptr_Private_float = OpTypePointer Private %float
+      %int_1 = OpConstant %int 1
+    %float_2 = OpConstant %float 2
+      %int_2 = OpConstant %int 2
+    %float_3 = OpConstant %float 3
+      %int_3 = OpConstant %int 3
+    %float_4 = OpConstant %float 4
+     %v3uint = OpTypeVector %uint 3
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+		%carr = OpConstantComposite %_arr_float_uint_4 %float_1 %float_2 %float_3 %float_4
+		%struct_constant_0 = OpConstantComposite %struct %carr %carr
+		%struct_constant_1 = OpConstantComposite %struct %carr %carr
+		%struct_arr_constant = OpConstantComposite %struct_arr %struct_constant_0 %struct_constant_1
+       %SSBO = OpTypeStruct %uint %int
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+	   %struct_var = OpVariable %ptr_struct Function
+         %16 = OpAccessChain %_ptr_Private_float %foo %int_0
+               OpStore %16 %float_1
+			   OpStore %foo %carr
+         %19 = OpAccessChain %_ptr_Private_float %foo %int_1
+               OpStore %19 %float_2
+         %22 = OpAccessChain %_ptr_Private_float %foo %int_2
+               OpStore %22 %float_3
+         %25 = OpAccessChain %_ptr_Private_float %foo %int_3
+               OpStore %25 %float_4
+			   OpCopyMemory %foo2 %foo
+			%l0 = OpLoad %_arr_float_uint_4 %foo
+			%l1 = OpLoad %_arr_float_uint_4 %foo2
+			%struct0 = OpCompositeConstruct %struct %l0 %l1
+			OpStore %struct_var %struct0
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/buffer-device-address-ptr-casting.msl24.asm.comp b/shaders-msl-no-opt/asm/comp/buffer-device-address-ptr-casting.msl24.asm.comp
new file mode 100644
index 00000000000..ed8d0ba6f5e
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/buffer-device-address-ptr-casting.msl24.asm.comp
@@ -0,0 +1,106 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 62
+; Schema: 0
+               OpCapability Shader
+               OpCapability Int64
+               OpCapability PhysicalStorageBufferAddresses
+               OpExtension "SPV_KHR_physical_storage_buffer"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel PhysicalStorageBuffer64 GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_gpu_shader_int64"
+               OpSourceExtension "GL_EXT_buffer_reference"
+               OpSourceExtension "GL_EXT_buffer_reference_uvec2"
+               OpName %main "main"
+               OpName %SomeBuffer "SomeBuffer"
+               OpMemberName %SomeBuffer 0 "v"
+               OpMemberName %SomeBuffer 1 "a"
+               OpMemberName %SomeBuffer 2 "b"
+               OpName %Registers "Registers"
+               OpMemberName %Registers 0 "address"
+               OpMemberName %Registers 1 "address2"
+               OpName %registers "registers"
+               OpName %a "a"
+               OpName %b "b"
+               OpMemberDecorate %SomeBuffer 0 Offset 0
+               OpMemberDecorate %SomeBuffer 1 Offset 16
+               OpMemberDecorate %SomeBuffer 2 Offset 24
+               OpDecorate %SomeBuffer Block
+               OpMemberDecorate %Registers 0 Offset 0
+               OpMemberDecorate %Registers 1 Offset 8
+               OpDecorate %Registers Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+               OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_SomeBuffer PhysicalStorageBuffer
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+      %ulong = OpTypeInt 64 0
+       %uint = OpTypeInt 32 0
+     %v2uint = OpTypeVector %uint 2
+ %SomeBuffer = OpTypeStruct %v4float %ulong %v2uint
+%_ptr_PhysicalStorageBuffer_SomeBuffer = OpTypePointer PhysicalStorageBuffer %SomeBuffer
+%_ptr_Function__ptr_PhysicalStorageBuffer_SomeBuffer = OpTypePointer Function %_ptr_PhysicalStorageBuffer_SomeBuffer
+  %Registers = OpTypeStruct %ulong %v2uint
+%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers
+  %registers = OpVariable %_ptr_PushConstant_Registers PushConstant
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_PushConstant_ulong = OpTypePointer PushConstant %ulong
+      %int_1 = OpConstant %int 1
+%_ptr_PushConstant_v2uint = OpTypePointer PushConstant %v2uint
+    %float_1 = OpConstant %float 1
+    %float_2 = OpConstant %float 2
+    %float_3 = OpConstant %float 3
+    %float_4 = OpConstant %float 4
+         %35 = OpConstantComposite %v4float %float_1 %float_2 %float_3 %float_4
+%_ptr_PhysicalStorageBuffer_v4float = OpTypePointer PhysicalStorageBuffer %v4float
+    %float_5 = OpConstant %float 5
+    %float_6 = OpConstant %float 6
+    %float_7 = OpConstant %float 7
+    %float_8 = OpConstant %float 8
+         %43 = OpConstantComposite %v4float %float_5 %float_6 %float_7 %float_8
+%_ptr_Function_ulong = OpTypePointer Function %ulong
+%_ptr_Function_v2uint = OpTypePointer Function %v2uint
+%_ptr_PhysicalStorageBuffer_ulong = OpTypePointer PhysicalStorageBuffer %ulong
+      %int_2 = OpConstant %int 2
+%_ptr_PhysicalStorageBuffer_v2uint = OpTypePointer PhysicalStorageBuffer %v2uint
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %a = OpVariable %_ptr_Function_ulong Function
+          %b = OpVariable %_ptr_Function_v2uint Function
+         %21 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_0
+         %27 = OpAccessChain %_ptr_PushConstant_v2uint %registers %int_1
+         %uint_ptr0 = OpLoad %ulong %21
+         %uint_ptr1 = OpLoad %v2uint %27
+
+		 ; ConvertUToPtr and vice versa do not accept vectors.
+         %ulong_ptr0 = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr0
+         %ulong_ptr1 = OpBitcast %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr0
+         %uvec2_ptr0 = OpBitcast %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr1
+
+         %vec4_write0 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %ulong_ptr0 %int_0
+         %vec4_write1 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %ulong_ptr1 %int_0
+         %vec4_write2 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %uvec2_ptr0 %int_0
+
+		   OpStore %vec4_write0 %35 Aligned 16
+		   OpStore %vec4_write1 %35 Aligned 16
+		   OpStore %vec4_write2 %35 Aligned 16
+
+         %ulong_from_ptr0 = OpConvertPtrToU %ulong %ulong_ptr0
+         %ulong_from_ptr1 = OpBitcast %ulong %ulong_ptr1
+         %uvec2_from_ptr0 = OpBitcast %v2uint %uvec2_ptr0
+
+         %ptr0 = OpAccessChain %_ptr_PhysicalStorageBuffer_ulong %ulong_ptr0 %int_1
+         %ptr1 = OpAccessChain %_ptr_PhysicalStorageBuffer_ulong %ulong_ptr1 %int_1
+         %ptr2 = OpAccessChain %_ptr_PhysicalStorageBuffer_v2uint %uvec2_ptr0 %int_2
+
+		   OpStore %ptr0 %ulong_from_ptr0 Aligned 8
+		   OpStore %ptr1 %ulong_from_ptr1 Aligned 8
+		   OpStore %ptr2 %uvec2_from_ptr0 Aligned 8
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/composite-construct-buffer-struct.asm.comp b/shaders-msl-no-opt/asm/comp/composite-construct-buffer-struct.asm.comp
new file mode 100644
index 00000000000..c7b76a8c064
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/composite-construct-buffer-struct.asm.comp
@@ -0,0 +1,54 @@
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+			   OpName %Block "Block"
+			   OpName %SSBO "SSBO"
+			   OpName %SSBO_Var "ssbo"
+			   OpName %UBO_Var "ubo"
+			   OpDecorate %SSBO_Var Binding 0
+			   OpDecorate %SSBO_Var DescriptorSet 0
+			   OpDecorate %UBO_Var Binding 1
+			   OpDecorate %UBO_Var DescriptorSet 0
+			   OpMemberDecorate %SSBO 0 Offset 0
+			   OpMemberDecorate %Block 0 Offset 0
+			   OpMemberDecorate %Block 1 Offset 16
+			   OpDecorate %BlockArray ArrayStride 32
+			   OpDecorate %arr_uvec2_2 ArrayStride 8
+			   OpDecorate %SSBO Block
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+     %uvec2 = OpTypeVector %uint 2
+     %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2
+	 %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2
+	 %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2
+	 %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2
+	 %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2
+	 %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup
+   %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2
+   %Block_ptr = OpTypePointer StorageBuffer %Block
+%BlockArray = OpTypeArray %Block %uint_3
+%SSBO = OpTypeStruct %BlockArray
+%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO
+%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer
+%UBO_Ptr = OpTypePointer Uniform %SSBO
+%UBO_Var = OpVariable %UBO_Ptr Uniform
+%void = OpTypeVoid
+%func_type = OpTypeFunction %void
+
+    %main = OpFunction %void None %func_type
+         %25 = OpLabel
+		 %func = OpVariable %arr_uvec2_2_ptr_func Function
+
+		; Copy device array to temporary.
+		 %ptr = OpAccessChain %Block_ptr %SSBO_Var %uint_0 %uint_0
+		 %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1
+		 %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1
+		 %constructed = OpCompositeConstruct %Block %loaded_array %loaded_array
+		 OpStore %ptr %constructed
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/shaders-msl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp
new file mode 100644
index 00000000000..e1dcb0ef8e2
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp
@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 49
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID %gl_LocalInvocationID
+               OpExecutionMode %main LocalSize 4 4 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values"
+               OpName %_ ""
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpName %gl_LocalInvocationID "gl_LocalInvocationID"
+               OpName %indexable "indexable"
+               OpName %indexable_0 "indexable"
+			   OpName %25 "indexable"
+			   OpName %38 "indexable"
+               OpDecorate %_runtimearr_int ArrayStride 4
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_runtimearr_int = OpTypeRuntimeArray %int
+       %SSBO = OpTypeStruct %_runtimearr_int
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+     %uint_4 = OpConstant %uint 4
+%_arr_int_uint_4 = OpTypeArray %int %uint_4
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+      %int_3 = OpConstant %int 3
+         %25 = OpConstantComposite %_arr_int_uint_4 %int_0 %int_1 %int_2 %int_3
+%gl_LocalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%_ptr_Function__arr_int_uint_4 = OpTypePointer Function %_arr_int_uint_4
+%_ptr_Function_int = OpTypePointer Function %int
+      %int_4 = OpConstant %int 4
+      %int_5 = OpConstant %int 5
+      %int_6 = OpConstant %int 6
+      %int_7 = OpConstant %int 7
+         %38 = OpConstantComposite %_arr_int_uint_4 %int_4 %int_5 %int_6 %int_7
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_4 %uint_4 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+  %indexable = OpVariable %_ptr_Function__arr_int_uint_4 Function
+%indexable_0 = OpVariable %_ptr_Function__arr_int_uint_4 Function
+         %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %19 = OpLoad %uint %18
+         %27 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0
+         %28 = OpLoad %uint %27
+               OpStore %indexable %25
+         %32 = OpAccessChain %_ptr_Function_int %indexable %28
+         %33 = OpLoad %int %32
+         %40 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_1
+         %41 = OpLoad %uint %40
+               OpStore %indexable_0 %38
+         %43 = OpAccessChain %_ptr_Function_int %indexable_0 %41
+         %44 = OpLoad %int %43
+         %45 = OpIAdd %int %33 %44
+         %47 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19
+               OpStore %47 %45
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp
new file mode 100644
index 00000000000..6a7065a6fb8
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp
@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %ssbo
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+		OpName %B1 "B1"
+		OpName %A "A"
+		OpName %C "C"
+		OpName %B2 "B2"
+		OpMemberName %A 0 "a"
+		OpMemberName %A 1 "b1"
+		OpMemberName %A 2 "b1_array"
+		OpMemberName %C 0 "c"
+		OpMemberName %C 1 "b2"
+		OpMemberName %C 2 "b2_array"
+		OpMemberName %B1 0 "elem1"
+		OpMemberName %B2 0 "elem2"
+		OpMemberName %SSBO 0 "a_block"
+		OpMemberName %SSBO 1 "c_block"
+		OpDecorate %B1Array ArrayStride 16
+		OpDecorate %B2Array ArrayStride 16
+               OpMemberDecorate %B1 0 Offset 0
+               OpMemberDecorate %A 0 Offset 0
+               OpMemberDecorate %A 1 Offset 16
+               OpMemberDecorate %A 2 Offset 32
+               OpMemberDecorate %A 3 Offset 96
+               OpMemberDecorate %B2 0 Offset 0
+               OpMemberDecorate %C 0 Offset 0
+               OpMemberDecorate %C 1 Offset 16
+               OpMemberDecorate %C 2 Offset 32
+               OpMemberDecorate %C 3 Offset 96
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 112
+		OpMemberDecorate %A0 0 Offset 0
+		OpMemberDecorate %C0 0 Offset 0
+		OpMemberDecorate %A0 0 RowMajor
+		OpMemberDecorate %A0 0 MatrixStride 8
+		OpMemberDecorate %C0 0 ColMajor
+		OpMemberDecorate %C0 0 MatrixStride 16
+               OpDecorate %SSBO Block
+               OpDecorate %ssbo DescriptorSet 0
+               OpDecorate %ssbo Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+	%uint = OpTypeInt 32 0
+	%uint_4 = OpConstant %uint 4
+    %v4float = OpTypeVector %float 4
+    %v2float = OpTypeVector %float 2
+    %m2float = OpTypeMatrix %v2float 2
+        %A0 = OpTypeStruct %m2float
+        %C0 = OpTypeStruct %m2float
+         %B2 = OpTypeStruct %v4float
+	%B2Array = OpTypeArray %B2 %uint_4
+          %C = OpTypeStruct %v4float %B2 %B2Array %C0
+         %B1 = OpTypeStruct %v4float
+	%B1Array = OpTypeArray %B1 %uint_4
+          %A = OpTypeStruct %v4float %B1 %B1Array %A0
+       %SSBO = OpTypeStruct %A %C
+%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO
+       %ssbo = OpVariable %_ptr_Uniform_SSBO StorageBuffer
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_C = OpTypePointer StorageBuffer %C
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_A = OpTypePointer StorageBuffer %A
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %22 = OpAccessChain %_ptr_Uniform_C %ssbo %int_1
+         %39 = OpAccessChain %_ptr_Uniform_A %ssbo %int_0
+         %23 = OpLoad %C %22
+         %24 = OpCopyLogical %A %23
+               OpStore %39 %24
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp
new file mode 100644
index 00000000000..026bd113172
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp
@@ -0,0 +1,60 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 24
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %2 "main" %3 %4
+               OpExecutionMode %2 LocalSize 1 1 1
+               OpDecorate %5 Block
+               OpMemberDecorate %5 0 Offset 0
+               OpMemberDecorate %5 1 Offset 16
+               OpMemberDecorate %5 2 Offset 48
+               OpMemberDecorate %5 3 Offset 64
+               OpMemberDecorate %5 4 Offset 80
+               OpMemberDecorate %5 5 Offset 96
+               OpMemberDecorate %5 6 Offset 112
+               OpDecorate %6 Block
+               OpMemberDecorate %6 0 Offset 0
+               OpMemberDecorate %6 1 Offset 4
+               OpMemberDecorate %6 2 Offset 12
+               OpMemberDecorate %6 3 Offset 16
+               OpMemberDecorate %6 4 Offset 32
+               OpMemberDecorate %6 5 Offset 48
+               OpMemberDecorate %6 6 Offset 64
+               OpDecorate %3 DescriptorSet 0
+               OpDecorate %3 Binding 0
+               OpDecorate %4 DescriptorSet 0
+               OpDecorate %4 Binding 1
+               OpDecorate %7 ArrayStride 4
+               OpDecorate %8 ArrayStride 16
+               OpMemberDecorate %9 0 Offset 4
+               OpMemberDecorate %10 0 Offset 8
+         %11 = OpTypeVoid
+         %12 = OpTypeFloat 32
+         %13 = OpTypeVector %12 2
+         %14 = OpTypeVector %12 3
+         %15 = OpTypeVector %12 4
+         %16 = OpTypeMatrix %15 4
+         %17 = OpTypeInt 32 0
+         %18 = OpConstant %17 2
+          %7 = OpTypeArray %17 %18
+          %8 = OpTypeArray %17 %18
+          %9 = OpTypeStruct %17
+         %10 = OpTypeStruct %17
+          %5 = OpTypeStruct %17 %8 %17 %9 %15 %14 %13
+         %19 = OpTypePointer StorageBuffer %5
+          %6 = OpTypeStruct %17 %7 %17 %10 %15 %14 %13
+         %20 = OpTypePointer StorageBuffer %6
+          %3 = OpVariable %20 StorageBuffer
+          %4 = OpVariable %19 StorageBuffer
+         %21 = OpTypeFunction %11
+          %2 = OpFunction %11 None %21
+          %1 = OpLabel
+         %22 = OpLoad %6 %3
+         %23 = OpCopyLogical %5 %22
+               OpStore %4 %23
+               OpReturn
+               OpFunctionEnd
+
diff --git a/shaders-msl-no-opt/asm/comp/copy-logical.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/copy-logical.spv14.asm.comp
new file mode 100644
index 00000000000..20fa0b099b8
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/copy-logical.spv14.asm.comp
@@ -0,0 +1,69 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %ssbo
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+		OpName %B1 "B1"
+		OpName %A "A"
+		OpName %C "C"
+		OpName %B2 "B2"
+		OpMemberName %A 0 "a"
+		OpMemberName %A 1 "b1"
+		OpMemberName %A 2 "b1_array"
+		OpMemberName %C 0 "c"
+		OpMemberName %C 1 "b2"
+		OpMemberName %C 2 "b2_array"
+		OpMemberName %B1 0 "elem1"
+		OpMemberName %B2 0 "elem2"
+		OpMemberName %SSBO 0 "a_block"
+		OpMemberName %SSBO 1 "c_block"
+		OpDecorate %B1Array ArrayStride 16
+		OpDecorate %B2Array ArrayStride 16
+               OpMemberDecorate %B1 0 Offset 0
+               OpMemberDecorate %A 0 Offset 0
+               OpMemberDecorate %A 1 Offset 16
+               OpMemberDecorate %A 2 Offset 32
+               OpMemberDecorate %B2 0 Offset 0
+               OpMemberDecorate %C 0 Offset 0
+               OpMemberDecorate %C 1 Offset 16
+               OpMemberDecorate %C 2 Offset 32
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 96
+               OpDecorate %SSBO Block
+               OpDecorate %ssbo DescriptorSet 0
+               OpDecorate %ssbo Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+	%uint = OpTypeInt 32 0
+	%uint_4 = OpConstant %uint 4
+    %v4float = OpTypeVector %float 4
+         %B2 = OpTypeStruct %v4float
+	%B2Array = OpTypeArray %B2 %uint_4
+          %C = OpTypeStruct %v4float %B2 %B2Array
+         %B1 = OpTypeStruct %v4float
+	%B1Array = OpTypeArray %B1 %uint_4
+          %A = OpTypeStruct %v4float %B1 %B1Array
+       %SSBO = OpTypeStruct %A %C
+%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO
+       %ssbo = OpVariable %_ptr_Uniform_SSBO StorageBuffer
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_C = OpTypePointer StorageBuffer %C
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_A = OpTypePointer StorageBuffer %A
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %22 = OpAccessChain %_ptr_Uniform_C %ssbo %int_1
+         %39 = OpAccessChain %_ptr_Uniform_A %ssbo %int_0
+         %23 = OpLoad %C %22
+         %24 = OpCopyLogical %A %23
+               OpStore %39 %24
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/device-array-load-temporary.asm.comp b/shaders-msl-no-opt/asm/comp/device-array-load-temporary.asm.comp
new file mode 100644
index 00000000000..d59aad3cef1
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/device-array-load-temporary.asm.comp
@@ -0,0 +1,53 @@
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+			   OpName %Block "Block"
+			   OpName %SSBO "SSBO"
+			   OpName %SSBO_Var "ssbo"
+			   OpName %UBO_Var "ubo"
+			   OpDecorate %SSBO_Var Binding 0
+			   OpDecorate %SSBO_Var DescriptorSet 0
+			   OpDecorate %UBO_Var Binding 1
+			   OpDecorate %UBO_Var DescriptorSet 0
+			   OpMemberDecorate %SSBO 0 Offset 0
+			   OpMemberDecorate %Block 0 Offset 0
+			   OpMemberDecorate %Block 1 Offset 16
+			   OpDecorate %BlockArray ArrayStride 32
+			   OpDecorate %arr_uvec2_2 ArrayStride 8
+			   OpDecorate %SSBO Block
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+     %uvec2 = OpTypeVector %uint 2
+     %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2
+	 %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2
+	 %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2
+	 %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2
+	 %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2
+	 %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup
+   %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2
+%BlockArray = OpTypeArray %Block %uint_3
+%SSBO = OpTypeStruct %BlockArray
+%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO
+%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer
+%UBO_Ptr = OpTypePointer Uniform %SSBO
+%UBO_Var = OpVariable %UBO_Ptr Uniform
+%void = OpTypeVoid
+%func_type = OpTypeFunction %void
+
+    %main = OpFunction %void None %func_type
+         %25 = OpLabel
+		 %func = OpVariable %arr_uvec2_2_ptr_func Function
+
+		; Copy device array to temporary.
+		 %ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0
+		 %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1
+		 %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1
+		 OpStore %ptr_arr_0 %loaded_array
+		 OpStore %ptr_arr_0 %loaded_array
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.comp b/shaders-msl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.comp
new file mode 100644
index 00000000000..d59aad3cef1
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.comp
@@ -0,0 +1,53 @@
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+			   OpName %Block "Block"
+			   OpName %SSBO "SSBO"
+			   OpName %SSBO_Var "ssbo"
+			   OpName %UBO_Var "ubo"
+			   OpDecorate %SSBO_Var Binding 0
+			   OpDecorate %SSBO_Var DescriptorSet 0
+			   OpDecorate %UBO_Var Binding 1
+			   OpDecorate %UBO_Var DescriptorSet 0
+			   OpMemberDecorate %SSBO 0 Offset 0
+			   OpMemberDecorate %Block 0 Offset 0
+			   OpMemberDecorate %Block 1 Offset 16
+			   OpDecorate %BlockArray ArrayStride 32
+			   OpDecorate %arr_uvec2_2 ArrayStride 8
+			   OpDecorate %SSBO Block
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+     %uvec2 = OpTypeVector %uint 2
+     %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2
+	 %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2
+	 %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2
+	 %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2
+	 %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2
+	 %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup
+   %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2
+%BlockArray = OpTypeArray %Block %uint_3
+%SSBO = OpTypeStruct %BlockArray
+%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO
+%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer
+%UBO_Ptr = OpTypePointer Uniform %SSBO
+%UBO_Var = OpVariable %UBO_Ptr Uniform
+%void = OpTypeVoid
+%func_type = OpTypeFunction %void
+
+    %main = OpFunction %void None %func_type
+         %25 = OpLabel
+		 %func = OpVariable %arr_uvec2_2_ptr_func Function
+
+		; Copy device array to temporary.
+		 %ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0
+		 %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1
+		 %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1
+		 OpStore %ptr_arr_0 %loaded_array
+		 OpStore %ptr_arr_0 %loaded_array
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.asm.comp b/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.asm.comp
new file mode 100644
index 00000000000..d9d0d51c39c
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.asm.comp
@@ -0,0 +1,81 @@
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+			   OpName %Block "Block"
+			   OpName %SSBO "SSBO"
+			   OpName %SSBO_Var "ssbo"
+			   OpName %UBO_Var "ubo"
+			   OpDecorate %SSBO_Var Binding 0
+			   OpDecorate %SSBO_Var DescriptorSet 0
+			   OpDecorate %UBO_Var Binding 1
+			   OpDecorate %UBO_Var DescriptorSet 0
+			   OpMemberDecorate %SSBO 0 Offset 0
+			   OpMemberDecorate %Block 0 Offset 0
+			   OpMemberDecorate %Block 1 Offset 16
+			   OpDecorate %BlockArray ArrayStride 32
+			   OpDecorate %arr_uvec2_2 ArrayStride 8
+			   OpDecorate %SSBO Block
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+     %uvec2 = OpTypeVector %uint 2
+     %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2
+	 %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2
+	 %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2
+	 %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2
+	 %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2
+	 %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup
+   %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2
+%BlockArray = OpTypeArray %Block %uint_3
+%SSBO = OpTypeStruct %BlockArray
+%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO
+%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer
+%UBO_Ptr = OpTypePointer Uniform %SSBO
+%UBO_Var = OpVariable %UBO_Ptr Uniform
+%void = OpTypeVoid
+%func_type = OpTypeFunction %void
+
+    %main = OpFunction %void None %func_type
+         %25 = OpLabel
+		 %func = OpVariable %arr_uvec2_2_ptr_func Function
+
+		 ; DeviceToDevice
+		 %ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0
+		 %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1
+		 %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1
+		 OpStore %ptr_arr_0 %loaded_array
+
+		 ; ConstantToDevice
+		 %ptr_arr_1_const = OpAccessChain %arr_uvec2_2_ptr_const %UBO_Var %uint_0 %uint_0 %uint_1
+		 %loaded_array_const = OpLoad %arr_uvec2_2 %ptr_arr_1_const
+		 OpStore %ptr_arr_0 %loaded_array_const
+
+		 ; StackToDevice
+		 %loaded_array_func = OpLoad %arr_uvec2_2 %func
+		 OpStore %ptr_arr_0 %loaded_array_func
+
+		 ; ThreadGroupToDevice
+		 %loaded_array_workgroup = OpLoad %arr_uvec2_2 %wg
+		 OpStore %ptr_arr_0 %loaded_array_workgroup
+
+		 ; DeviceToThreadGroup
+		 %loaded_array_2 = OpLoad %arr_uvec2_2 %ptr_arr_1
+		 OpStore %wg %loaded_array_2
+
+		 ; DeviceToStack
+		 %loaded_array_3 = OpLoad %arr_uvec2_2 %ptr_arr_1
+		 OpStore %func %loaded_array_3
+
+		 ; ConstantToThreadGroup
+		 %loaded_array_const_2 = OpLoad %arr_uvec2_2 %ptr_arr_1_const
+		 OpStore %wg %loaded_array_const_2
+
+		 ; ConstantToStack
+		 %loaded_array_const_3 = OpLoad %arr_uvec2_2 %ptr_arr_1_const
+		 OpStore %func %loaded_array_const_3
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.comp b/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.comp
new file mode 100644
index 00000000000..d9d0d51c39c
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.comp
@@ -0,0 +1,81 @@
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+			   OpName %Block "Block"
+			   OpName %SSBO "SSBO"
+			   OpName %SSBO_Var "ssbo"
+			   OpName %UBO_Var "ubo"
+			   OpDecorate %SSBO_Var Binding 0
+			   OpDecorate %SSBO_Var DescriptorSet 0
+			   OpDecorate %UBO_Var Binding 1
+			   OpDecorate %UBO_Var DescriptorSet 0
+			   OpMemberDecorate %SSBO 0 Offset 0
+			   OpMemberDecorate %Block 0 Offset 0
+			   OpMemberDecorate %Block 1 Offset 16
+			   OpDecorate %BlockArray ArrayStride 32
+			   OpDecorate %arr_uvec2_2 ArrayStride 8
+			   OpDecorate %SSBO Block
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+     %uvec2 = OpTypeVector %uint 2
+     %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2
+	 %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2
+	 %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2
+	 %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2
+	 %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2
+	 %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup
+   %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2
+%BlockArray = OpTypeArray %Block %uint_3
+%SSBO = OpTypeStruct %BlockArray
+%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO
+%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer
+%UBO_Ptr = OpTypePointer Uniform %SSBO
+%UBO_Var = OpVariable %UBO_Ptr Uniform
+%void = OpTypeVoid
+%func_type = OpTypeFunction %void
+
+    %main = OpFunction %void None %func_type
+         %25 = OpLabel
+		 %func = OpVariable %arr_uvec2_2_ptr_func Function
+
+		 ; DeviceToDevice
+		 %ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0
+		 %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1
+		 %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1
+		 OpStore %ptr_arr_0 %loaded_array
+
+		 ; ConstantToDevice
+		 %ptr_arr_1_const = OpAccessChain %arr_uvec2_2_ptr_const %UBO_Var %uint_0 %uint_0 %uint_1
+		 %loaded_array_const = OpLoad %arr_uvec2_2 %ptr_arr_1_const
+		 OpStore %ptr_arr_0 %loaded_array_const
+
+		 ; StackToDevice
+		 %loaded_array_func = OpLoad %arr_uvec2_2 %func
+		 OpStore %ptr_arr_0 %loaded_array_func
+
+		 ; ThreadGroupToDevice
+		 %loaded_array_workgroup = OpLoad %arr_uvec2_2 %wg
+		 OpStore %ptr_arr_0 %loaded_array_workgroup
+
+		 ; DeviceToThreadGroup
+		 %loaded_array_2 = OpLoad %arr_uvec2_2 %ptr_arr_1
+		 OpStore %wg %loaded_array_2
+
+		 ; DeviceToStack
+		 %loaded_array_3 = OpLoad %arr_uvec2_2 %ptr_arr_1
+		 OpStore %func %loaded_array_3
+
+		 ; ConstantToThreadGroup
+		 %loaded_array_const_2 = OpLoad %arr_uvec2_2 %ptr_arr_1_const
+		 OpStore %wg %loaded_array_const_2
+
+		 ; ConstantToStack
+		 %loaded_array_const_3 = OpLoad %arr_uvec2_2 %ptr_arr_1_const
+		 OpStore %func %loaded_array_const_3
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp
new file mode 100644
index 00000000000..73f3ceee1ad
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp
@@ -0,0 +1,59 @@
+; SPIR-V
+; Version: 1.5
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 26
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               ;OpEntryPoint GLCompute %main "main" %Samp %ubo %ssbo %v %w
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 64 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Samp "Samp"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "v"
+               OpName %ubo "ubo"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "v"
+               OpName %ssbo "ssbo"
+               OpName %v "v"
+               OpName %w "w"
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+               OpDecorate %Samp DescriptorSet 0
+               OpDecorate %Samp Binding 0
+               OpMemberDecorate %UBO 0 Offset 0
+               OpDecorate %UBO Block
+               OpDecorate %ubo DescriptorSet 0
+               OpDecorate %ubo Binding 1
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO Block
+               OpDecorate %ssbo DescriptorSet 0
+               OpDecorate %ssbo Binding 2
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+    %uint_64 = OpConstant %uint 64
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1
+      %float = OpTypeFloat 32
+         %12 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %13 = OpTypeSampledImage %12
+%_ptr_UniformConstant_13 = OpTypePointer UniformConstant %13
+       %Samp = OpVariable %_ptr_UniformConstant_13 UniformConstant
+        %UBO = OpTypeStruct %float
+%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO
+        %ubo = OpVariable %_ptr_Uniform_UBO Uniform
+       %SSBO = OpTypeStruct %float
+%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
+       %ssbo = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
+%_ptr_Private_float = OpTypePointer Private %float
+          %v = OpVariable %_ptr_Private_float Private
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+          %w = OpVariable %_ptr_Workgroup_float Workgroup
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp b/shaders-msl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp
new file mode 100644
index 00000000000..30db11d45bc
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp
@@ -0,0 +1,55 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+ %ResTypeMod = OpTypeStruct %float %float
+%_ptr_Function_ResTypeMod = OpTypePointer Function %ResTypeMod
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+   %float_20 = OpConstant %float 20
+      %int_1 = OpConstant %int 1
+%_ptr_Function_float = OpTypePointer Function %float
+%ResTypeFrexp = OpTypeStruct %float %int
+%_ptr_Function_ResTypeFrexp = OpTypePointer Function %ResTypeFrexp
+   %float_40 = OpConstant %float 40
+%_ptr_Function_int = OpTypePointer Function %int
+       %SSBO = OpTypeStruct %float %int
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %modres = OpExtInst %ResTypeMod %1 ModfStruct %float_20
+         %frexpres = OpExtInst %ResTypeFrexp %1 FrexpStruct %float_40
+
+		 %modres_f = OpCompositeExtract %float %modres 0
+		 %modres_i = OpCompositeExtract %float %modres 1
+		 %frexpres_f = OpCompositeExtract %float %frexpres 0
+		 %frexpres_i = OpCompositeExtract %int %frexpres 1
+
+         %float_ptr = OpAccessChain %_ptr_Uniform_float %_ %int_0
+         %int_ptr = OpAccessChain %_ptr_Uniform_int %_ %int_1
+
+               OpStore %float_ptr %modres_f
+               OpStore %float_ptr %modres_i
+               OpStore %float_ptr %frexpres_f
+               OpStore %int_ptr %frexpres_i
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/local-size-id-override.asm.comp b/shaders-msl-no-opt/asm/comp/local-size-id-override.asm.comp
new file mode 100644
index 00000000000..2eaef4bdbee
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/local-size-id-override.asm.comp
@@ -0,0 +1,60 @@
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values"
+               OpName %_ ""
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %spec_1 SpecId 1
+               OpDecorate %spec_2 SpecId 2
+               OpDecorate %spec_3 SpecId 3
+               OpDecorate %spec_4 SpecId 4
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+       %SSBO = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+    %float_2 = OpConstant %float 2
+%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float
+         %spec_1 = OpSpecConstant %uint 11
+         %spec_2 = OpSpecConstant %uint 12
+         %spec_3 = OpSpecConstant %uint 13
+         %spec_4 = OpSpecConstant %uint 14
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %uint_3 %spec_1 %spec_2
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %21 = OpLoad %uint %20
+         %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21
+         %25 = OpLoad %v4float %24
+         %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2
+         %27 = OpFAdd %v4float %25 %26
+         %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21
+               OpStore %28 %27
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/local-size-id.asm.comp b/shaders-msl-no-opt/asm/comp/local-size-id.asm.comp
new file mode 100644
index 00000000000..3031f4bb8af
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/local-size-id.asm.comp
@@ -0,0 +1,76 @@
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values"
+               OpName %_ ""
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %spec_1 SpecId 1
+               OpDecorate %spec_2 SpecId 2
+               OpDecorate %spec_3 SpecId 3
+               OpDecorate %spec_4 SpecId 4
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+     %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+       %SSBO = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+    %float_2 = OpConstant %float 2
+%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float
+		; Test that we can declare the spec constant as signed.
+		; Needs implicit bitcast since WorkGroupSize is uint.
+         %spec_1 = OpSpecConstant %int 11
+         %spec_2 = OpSpecConstant %int 12
+         %spec_3 = OpSpecConstant %int 13
+         %spec_4 = OpSpecConstant %int 14
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+	 ; Test that we can build spec constant composites out of local size id values.
+	 ; Needs special case handling.
+	 %spec_3_op = OpSpecConstantOp %uint IAdd %spec_3 %uint_3
+%WorkGroupSize = OpSpecConstantComposite %v3uint %spec_3_op %spec_4 %uint_2
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %21 = OpLoad %uint %20
+         %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21
+         %25 = OpLoad %v4float %24
+         %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2
+         %27 = OpFAdd %v4float %25 %26
+		 %wg_f = OpConvertUToF %v3float %WorkGroupSize
+		 %wg_f4 = OpVectorShuffle %v4float %wg_f %wg_f 0 1 2 2
+	 ; Test that we can use the spec constants directly which needs to translate to gl_WorkGroupSize.elem.
+	 ; Needs special case handling.
+		 %res = OpFAdd %v4float %27 %wg_f4
+		 %f0 = OpConvertSToF %float %spec_3
+		 %f1 = OpConvertSToF %float %spec_4
+		 %f2 = OpConvertSToF %float %uint_2
+		 %res1 = OpVectorTimesScalar %v4float %res %f0
+		 %res2 = OpVectorTimesScalar %v4float %res1 %f1
+		 %res3 = OpVectorTimesScalar %v4float %res2 %f2
+         %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21
+               OpStore %28 %res3
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/modf-storage-class.asm.comp b/shaders-msl-no-opt/asm/comp/modf-storage-class.asm.comp
new file mode 100644
index 00000000000..126b01e4616
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/modf-storage-class.asm.comp
@@ -0,0 +1,116 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 91
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %2 "main"
+               OpExecutionMode %2 LocalSize 1 1 1
+               OpDecorate %_arr_v2uint_uint_324 ArrayStride 8
+               OpMemberDecorate %_struct_6 0 NonWritable
+               OpMemberDecorate %_struct_6 0 Offset 0
+               OpDecorate %_struct_6 BufferBlock
+               OpDecorate %7 DescriptorSet 0
+               OpDecorate %7 Binding 0
+               OpDecorate %_arr_v2float_uint_648 ArrayStride 8
+               OpMemberDecorate %_struct_9 0 Offset 0
+               OpDecorate %_struct_9 BufferBlock
+               OpDecorate %11 DescriptorSet 0
+               OpDecorate %11 Binding 1
+               OpDecorate %_arr_v2float_uint_648_0 ArrayStride 8
+               OpMemberDecorate %_struct_13 0 Offset 0
+               OpDecorate %_struct_13 BufferBlock
+               OpDecorate %14 DescriptorSet 0
+               OpDecorate %14 Binding 2
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+ %_struct_19 = OpTypeStruct %v2float %v2float
+         %10 = OpTypeFunction %_struct_19 %_ptr_Function_v2float
+%_ptr_Function__struct_19 = OpTypePointer Function %_struct_19
+       %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+     %uint_0 = OpConstant %uint 0
+   %uint_648 = OpConstant %uint 648
+       %bool = OpTypeBool
+     %v2uint = OpTypeVector %uint 2
+%_ptr_Function_v2uint = OpTypePointer Function %v2uint
+   %uint_324 = OpConstant %uint 324
+%_arr_v2uint_uint_324 = OpTypeArray %v2uint %uint_324
+  %_struct_6 = OpTypeStruct %_arr_v2uint_uint_324
+%_ptr_Uniform__struct_6 = OpTypePointer Uniform %_struct_6
+          %7 = OpVariable %_ptr_Uniform__struct_6 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint
+%_arr_v2float_uint_648 = OpTypeArray %v2float %uint_648
+  %_struct_9 = OpTypeStruct %_arr_v2float_uint_648
+%_ptr_Uniform__struct_9 = OpTypePointer Uniform %_struct_9
+         %11 = OpVariable %_ptr_Uniform__struct_9 Uniform
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%_arr_v2float_uint_648_0 = OpTypeArray %v2float %uint_648
+ %_struct_13 = OpTypeStruct %_arr_v2float_uint_648_0
+%_ptr_Uniform__struct_13 = OpTypePointer Uniform %_struct_13
+         %14 = OpVariable %_ptr_Uniform__struct_13 Uniform
+      %int_1 = OpConstant %int 1
+          %2 = OpFunction %void None %3
+          %5 = OpLabel
+         %46 = OpVariable %_ptr_Function_uint Function
+         %47 = OpVariable %_ptr_Function_v2uint Function
+         %48 = OpVariable %_ptr_Function_v2float Function
+         %50 = OpVariable %_ptr_Function__struct_19 Function
+               OpStore %46 %uint_0
+               OpBranch %30
+         %30 = OpLabel
+               OpLoopMerge %32 %33 None
+               OpBranch %34
+         %34 = OpLabel
+         %35 = OpLoad %uint %46
+         %38 = OpULessThan %bool %35 %uint_648
+               OpBranchConditional %38 %31 %32
+         %31 = OpLabel
+         %49 = OpLoad %uint %46
+         %51 = OpUDiv %uint %49 %uint_2
+         %53 = OpAccessChain %_ptr_Uniform_v2uint %7 %int_0 %51
+         %54 = OpLoad %v2uint %53
+               OpStore %47 %54
+         %56 = OpLoad %v2uint %47
+         %57 = OpBitcast %v2float %56
+               OpStore %48 %57
+         %62 = OpLoad %uint %46
+         %64 = OpIAdd %uint %62 %uint_1
+         %65 = OpLoad %v2float %48
+         %66 = OpLoad %uint %46
+         %68 = OpAccessChain %_ptr_Uniform_v2float %11 %int_0 %66
+         %69 = OpExtInst %v2float %1 Modf %65 %68
+         %70 = OpAccessChain %_ptr_Uniform_v2float %11 %int_0 %64
+               OpStore %70 %69
+         %73 = OpLoad %v2float %48
+         %74 = OpExtInst %_struct_19 %1 ModfStruct %73
+               OpStore %50 %74
+         %79 = OpLoad %uint %46
+         %81 = OpAccessChain %_ptr_Function_v2float %50 %int_1
+         %82 = OpLoad %v2float %81
+         %83 = OpAccessChain %_ptr_Uniform_v2float %14 %int_0 %79
+               OpStore %83 %82
+         %84 = OpLoad %uint %46
+         %85 = OpIAdd %uint %84 %uint_1
+         %86 = OpAccessChain %_ptr_Function_v2float %50 %int_0
+         %87 = OpLoad %v2float %86
+         %88 = OpAccessChain %_ptr_Uniform_v2float %14 %int_0 %85
+               OpStore %88 %87
+               OpBranch %33
+         %33 = OpLabel
+         %89 = OpLoad %uint %46
+         %90 = OpIAdd %uint %89 %uint_2
+               OpStore %46 %90
+               OpBranch %30
+         %32 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/opptrdiff-basic.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/opptrdiff-basic.spv14.asm.comp
new file mode 100644
index 00000000000..8319dfdb607
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/opptrdiff-basic.spv14.asm.comp
@@ -0,0 +1,98 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 59
+; Schema: 0
+               OpCapability Shader
+               OpCapability VariablePointersStorageBuffer
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %1 "main" %2 %3 %4 %5 %6
+               OpExecutionMode %1 LocalSize 4 1 1
+               OpDecorate %7 Block
+               OpMemberDecorate %7 0 Offset 0
+               OpDecorate %8 ArrayStride 16
+               OpDecorate %9 Block
+               OpMemberDecorate %9 0 Offset 0
+               OpDecorate %10 ArrayStride 68
+               OpDecorate %11 Block
+               OpMemberDecorate %11 0 Offset 0
+               OpDecorate %12 ArrayStride 4
+               OpDecorate %13 ArrayStride 4
+               OpDecorate %2 DescriptorSet 0
+               OpDecorate %2 Binding 0
+               OpDecorate %3 DescriptorSet 0
+               OpDecorate %3 Binding 1
+               OpDecorate %4 DescriptorSet 0
+               OpDecorate %4 Binding 2
+               OpDecorate %5 BuiltIn LocalInvocationId
+               OpDecorate %6 BuiltIn WorkgroupId
+         %14 = OpTypeVoid
+         %15 = OpTypeBool
+         %16 = OpTypeInt 32 1
+         %17 = OpConstant %16 0
+         %18 = OpConstant %16 1
+         %19 = OpConstant %16 4
+         %20 = OpConstant %16 16
+         %21 = OpConstant %16 17
+         %22 = OpTypeVector %16 3
+         %23 = OpTypePointer Input %22
+         %12 = OpTypeArray %16 %19
+          %8 = OpTypeRuntimeArray %12
+          %7 = OpTypeStruct %8
+         %24 = OpTypePointer StorageBuffer %7
+         %25 = OpTypePointer StorageBuffer %12
+         %13 = OpTypeArray %16 %21
+         %10 = OpTypeRuntimeArray %13
+          %9 = OpTypeStruct %10
+         %26 = OpTypePointer StorageBuffer %9
+         %27 = OpTypePointer StorageBuffer %13
+         %28 = OpTypePointer StorageBuffer %16
+         %11 = OpTypeStruct %16
+         %29 = OpTypePointer Uniform %11
+         %30 = OpTypePointer Uniform %16
+          %2 = OpVariable %24 StorageBuffer
+          %3 = OpVariable %26 StorageBuffer
+          %4 = OpVariable %29 Uniform
+          %5 = OpVariable %23 Input
+          %6 = OpVariable %23 Input
+         %31 = OpTypeFunction %14
+          %1 = OpFunction %14 None %31
+         %32 = OpLabel
+         %33 = OpAccessChain %30 %4 %17
+         %34 = OpLoad %16 %33
+         %35 = OpLoad %22 %6
+         %36 = OpCompositeExtract %16 %35 0
+         %37 = OpLoad %22 %5
+         %38 = OpCompositeExtract %16 %37 0
+         %39 = OpAccessChain %25 %2 %17 %17
+         %40 = OpAccessChain %25 %2 %17 %36
+         %41 = OpSGreaterThanEqual %15 %36 %34
+               OpSelectionMerge %42 None
+               OpBranchConditional %41 %43 %42
+         %43 = OpLabel
+               OpReturn
+         %42 = OpLabel
+         %44 = OpIEqual %15 %38 %18
+               OpSelectionMerge %45 None
+               OpBranchConditional %44 %46 %45
+         %46 = OpLabel
+         %47 = OpPtrDiff %16 %40 %39
+         %48 = OpAccessChain %28 %3 %17 %36 %20
+               OpStore %48 %47
+               OpBranch %45
+         %45 = OpLabel
+         %49 = OpPhi %16 %17 %42 %17 %46 %50 %45
+         %50 = OpIAdd %16 %49 %18
+         %51 = OpIEqual %15 %50 %19
+         %52 = OpIMul %16 %38 %19
+         %53 = OpIAdd %16 %52 %49
+         %54 = OpAccessChain %28 %40 %38
+         %55 = OpAccessChain %28 %40 %49
+         %56 = OpPtrDiff %16 %54 %55
+         %57 = OpAccessChain %28 %3 %17 %36 %53
+               OpStore %57 %56
+               OpLoopMerge %58 %45 None
+               OpBranchConditional %51 %58 %45
+         %58 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp
new file mode 100644
index 00000000000..8566491955e
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp
@@ -0,0 +1,79 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 46
+; Schema: 0
+               OpCapability Shader
+               OpCapability VariablePointersStorageBuffer
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %1 "main" %2 %3 %4 %5
+               OpExecutionMode %1 LocalSize 1 1 1
+               OpDecorate %6 ArrayStride 4
+               OpDecorate %7 Block
+               OpMemberDecorate %7 0 Offset 0
+               OpMemberDecorate %7 1 Offset 4
+               OpDecorate %2 DescriptorSet 0
+               OpDecorate %2 Binding 0
+               OpDecorate %8 ArrayStride 8
+               OpDecorate %9 Block
+               OpMemberDecorate %9 0 Offset 0
+               OpDecorate %3 DescriptorSet 0
+               OpDecorate %3 Binding 1
+               OpDecorate %10 ArrayStride 4
+         %11 = OpTypeVoid
+         %12 = OpTypeBool
+         %13 = OpTypeInt 32 1
+         %14 = OpConstant %13 -1
+         %15 = OpConstant %13 0
+         %16 = OpConstant %13 1
+         %17 = OpConstant %13 2
+         %18 = OpConstant %13 3
+         %19 = OpTypeVector %13 2
+          %6 = OpTypeRuntimeArray %13
+          %7 = OpTypeStruct %13 %6
+         %20 = OpTypePointer StorageBuffer %7
+          %2 = OpVariable %20 StorageBuffer
+          %8 = OpTypeRuntimeArray %19
+          %9 = OpTypeStruct %8
+         %21 = OpTypePointer StorageBuffer %9
+          %3 = OpVariable %21 StorageBuffer
+         %10 = OpTypePointer StorageBuffer %13
+         %22 = OpTypePointer Private %10
+          %4 = OpVariable %22 Private
+          %5 = OpVariable %22 Private
+         %23 = OpTypePointer StorageBuffer %13
+         %24 = OpTypePointer StorageBuffer %19
+         %25 = OpTypeFunction %11
+          %1 = OpFunction %11 None %25
+         %26 = OpLabel
+         %27 = OpAccessChain %23 %2 %15
+         %28 = OpLoad %13 %27
+         %29 = OpAccessChain %10 %2 %16 %15
+               OpStore %4 %29
+         %30 = OpPtrAccessChain %10 %29 %28
+               OpStore %5 %30
+         %31 = OpSLessThanEqual %12 %28 %15
+               OpSelectionMerge %32 None
+               OpBranchConditional %31 %32 %33
+         %33 = OpLabel
+         %34 = OpPhi %13 %15 %26 %35 %33
+         %36 = OpLoad %10 %4
+         %37 = OpLoad %10 %5
+         %38 = OpPtrAccessChain %10 %36 %16
+         %39 = OpPtrAccessChain %10 %37 %14
+         %35 = OpIAdd %13 %34 %16
+               OpStore %4 %38
+               OpStore %5 %39
+         %40 = OpPtrDiff %13 %36 %37
+         %41 = OpPtrDiff %13 %37 %36
+         %42 = OpCompositeConstruct %19 %40 %41
+         %43 = OpAccessChain %24 %3 %15 %34
+               OpStore %43 %42
+         %44 = OpSGreaterThanEqual %12 %34 %28
+               OpLoopMerge %45 %33 None
+               OpBranchConditional %44 %45 %33
+         %45 = OpLabel
+               OpBranch %32
+         %32 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp
new file mode 100644
index 00000000000..5a97976ce8e
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp
@@ -0,0 +1,96 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 64
+; Schema: 0
+               OpCapability Shader
+               OpCapability VariablePointers
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %1 "main" %2 %3 %4 %5
+               OpExecutionMode %1 LocalSize 1 1 1
+               OpDecorate %6 ArrayStride 4
+               OpDecorate %7 Block
+               OpMemberDecorate %7 0 Offset 0
+               OpDecorate %2 DescriptorSet 0
+               OpDecorate %2 Binding 0
+               OpDecorate %3 DescriptorSet 0
+               OpDecorate %3 Binding 1
+               OpDecorate %4 DescriptorSet 0
+               OpDecorate %4 Binding 2
+               OpDecorate %5 DescriptorSet 0
+               OpDecorate %5 Binding 3
+          %8 = OpTypeVoid
+          %9 = OpTypeBool
+         %10 = OpTypeInt 32 0
+         %11 = OpConstant %10 0
+         %12 = OpConstant %10 1
+          %6 = OpTypeRuntimeArray %10
+          %7 = OpTypeStruct %6
+         %13 = OpTypePointer StorageBuffer %7
+         %14 = OpTypePointer StorageBuffer %6
+         %15 = OpTypePointer StorageBuffer %10
+          %2 = OpVariable %13 StorageBuffer
+          %3 = OpVariable %13 StorageBuffer
+          %4 = OpVariable %13 StorageBuffer
+          %5 = OpVariable %13 StorageBuffer
+         %16 = OpTypeFunction %8
+          %1 = OpFunction %8 None %16
+         %17 = OpLabel
+         %18 = OpCopyObject %10 %11
+         %19 = OpAccessChain %14 %2 %11
+         %20 = OpAccessChain %15 %2 %11 %11
+         %21 = OpAccessChain %14 %3 %11
+         %22 = OpAccessChain %15 %3 %11 %11
+         %23 = OpAccessChain %14 %4 %11
+         %24 = OpAccessChain %15 %4 %11 %11
+         %25 = OpPtrEqual %9 %2 %3
+         %26 = OpSelect %10 %25 %12 %11
+         %27 = OpAccessChain %15 %5 %11 %18
+         %28 = OpIAdd %10 %18 %12
+               OpStore %27 %26
+         %29 = OpPtrEqual %9 %19 %21
+         %30 = OpSelect %10 %29 %12 %11
+         %31 = OpAccessChain %15 %5 %11 %28
+         %32 = OpIAdd %10 %28 %12
+               OpStore %31 %30
+         %33 = OpPtrEqual %9 %20 %22
+         %34 = OpSelect %10 %33 %12 %11
+         %35 = OpAccessChain %15 %5 %11 %32
+         %36 = OpIAdd %10 %32 %12
+               OpStore %35 %34
+         %37 = OpPtrEqual %9 %2 %4
+         %38 = OpSelect %10 %37 %12 %11
+         %39 = OpAccessChain %15 %5 %11 %36
+         %40 = OpIAdd %10 %36 %12
+               OpStore %39 %38
+         %41 = OpPtrEqual %9 %19 %23
+         %42 = OpSelect %10 %41 %12 %11
+         %43 = OpAccessChain %15 %5 %11 %40
+         %44 = OpIAdd %10 %40 %12
+               OpStore %43 %42
+         %45 = OpPtrEqual %9 %20 %24
+         %46 = OpSelect %10 %45 %12 %11
+         %47 = OpAccessChain %15 %5 %11 %44
+         %48 = OpIAdd %10 %44 %12
+               OpStore %47 %46
+         %49 = OpPtrEqual %9 %3 %4
+         %50 = OpSelect %10 %49 %12 %11
+         %51 = OpAccessChain %15 %5 %11 %48
+         %52 = OpIAdd %10 %48 %12
+               OpStore %51 %50
+         %53 = OpPtrEqual %9 %21 %23
+         %54 = OpSelect %10 %53 %12 %11
+         %55 = OpAccessChain %15 %5 %11 %52
+         %56 = OpIAdd %10 %52 %12
+               OpStore %55 %54
+         %57 = OpPtrEqual %9 %22 %24
+         %58 = OpSelect %10 %57 %12 %11
+         %59 = OpAccessChain %15 %5 %11 %56
+         %60 = OpIAdd %10 %56 %12
+               OpStore %59 %58
+         %61 = OpPtrEqual %9 %2 %2
+         %62 = OpSelect %10 %61 %12 %11
+         %63 = OpAccessChain %15 %5 %11 %60
+               OpStore %63 %62
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp
new file mode 100644
index 00000000000..89813b22654
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp
@@ -0,0 +1,98 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 63
+; Schema: 0
+               OpCapability Shader
+               OpCapability VariablePointersStorageBuffer
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %1 "main" %2 %3 %4
+               OpExecutionMode %1 LocalSize 1 1 1
+               OpDecorate %5 ArrayStride 4
+               OpDecorate %6 Block
+               OpDecorate %7 Block
+               OpMemberDecorate %6 0 ColMajor
+               OpMemberDecorate %6 0 Offset 0
+               OpMemberDecorate %6 0 MatrixStride 16
+               OpMemberDecorate %6 1 RowMajor
+               OpMemberDecorate %6 1 Offset 64
+               OpMemberDecorate %6 1 MatrixStride 16
+               OpMemberDecorate %6 2 Offset 128
+               OpMemberDecorate %6 3 Offset 132
+               OpMemberDecorate %7 0 Offset 0
+               OpDecorate %2 DescriptorSet 0
+               OpDecorate %2 Binding 0
+               OpDecorate %3 DescriptorSet 0
+               OpDecorate %3 Binding 1
+               OpDecorate %4 DescriptorSet 0
+               OpDecorate %4 Binding 2
+          %8 = OpTypeVoid
+          %9 = OpTypeBool
+         %10 = OpTypeInt 32 0
+         %11 = OpConstant %10 0
+         %12 = OpConstant %10 1
+         %13 = OpConstant %10 2
+         %14 = OpConstant %10 3
+         %15 = OpTypeFloat 32
+          %5 = OpTypeRuntimeArray %10
+         %16 = OpTypeVector %15 4
+         %17 = OpTypeMatrix %16 4
+          %6 = OpTypeStruct %17 %17 %15 %15
+          %7 = OpTypeStruct %5
+         %18 = OpTypePointer StorageBuffer %6
+         %19 = OpTypePointer StorageBuffer %7
+         %20 = OpTypePointer StorageBuffer %17
+         %21 = OpTypePointer StorageBuffer %10
+         %22 = OpTypePointer StorageBuffer %15
+         %23 = OpTypePointer StorageBuffer %16
+          %2 = OpVariable %18 StorageBuffer
+          %3 = OpVariable %18 StorageBuffer
+          %4 = OpVariable %19 StorageBuffer
+         %24 = OpTypeFunction %8
+          %1 = OpFunction %8 None %24
+         %25 = OpLabel
+         %26 = OpCopyObject %10 %11
+         %27 = OpAccessChain %22 %2 %13
+         %28 = OpAccessChain %22 %2 %14
+         %29 = OpAccessChain %22 %3 %13
+         %30 = OpAccessChain %22 %3 %14
+         %31 = OpAccessChain %20 %2 %11
+         %32 = OpAccessChain %20 %2 %12
+         %33 = OpAccessChain %23 %2 %11 %11
+         %34 = OpAccessChain %23 %2 %11 %12
+         %35 = OpAccessChain %22 %2 %11 %11 %11
+         %36 = OpPtrEqual %9 %27 %28
+         %37 = OpSelect %10 %36 %11 %12
+         %38 = OpAccessChain %21 %4 %11 %26
+         %39 = OpIAdd %10 %26 %12
+               OpStore %38 %37
+         %40 = OpPtrEqual %9 %27 %29
+         %41 = OpSelect %10 %40 %11 %12
+         %42 = OpAccessChain %21 %4 %11 %39
+         %43 = OpIAdd %10 %39 %12
+               OpStore %42 %41
+         %44 = OpSelect %22 %40 %27 %28
+         %45 = OpSelect %22 %40 %29 %30
+         %46 = OpPtrEqual %9 %44 %45
+         %47 = OpSelect %10 %46 %11 %12
+         %48 = OpAccessChain %21 %4 %11 %43
+         %49 = OpIAdd %10 %43 %12
+               OpStore %48 %47
+         %50 = OpSelect %22 %46 %27 %28
+         %51 = OpPtrEqual %9 %50 %35
+         %52 = OpSelect %10 %51 %11 %12
+         %53 = OpAccessChain %21 %4 %11 %49
+         %54 = OpIAdd %10 %49 %12
+               OpStore %53 %52
+         %55 = OpPtrEqual %9 %31 %32
+         %56 = OpSelect %10 %55 %11 %12
+         %57 = OpAccessChain %21 %4 %11 %54
+         %58 = OpIAdd %10 %54 %12
+               OpStore %57 %56
+         %59 = OpPtrEqual %9 %33 %34
+         %60 = OpSelect %10 %59 %11 %12
+         %61 = OpAccessChain %21 %4 %11 %58
+         %62 = OpIAdd %10 %58 %12
+               OpStore %61 %56
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp b/shaders-msl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp
new file mode 100644
index 00000000000..1cbf8045c55
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp
@@ -0,0 +1,96 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 64
+; Schema: 0
+               OpCapability Shader
+               OpCapability VariablePointers
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %1 "main" %2 %3 %4 %5
+               OpExecutionMode %1 LocalSize 1 1 1
+               OpDecorate %6 ArrayStride 4
+               OpDecorate %7 Block
+               OpMemberDecorate %7 0 Offset 0
+               OpDecorate %2 DescriptorSet 0
+               OpDecorate %2 Binding 0
+               OpDecorate %3 DescriptorSet 0
+               OpDecorate %3 Binding 1
+               OpDecorate %4 DescriptorSet 0
+               OpDecorate %4 Binding 2
+               OpDecorate %5 DescriptorSet 0
+               OpDecorate %5 Binding 3
+          %8 = OpTypeVoid
+          %9 = OpTypeBool
+         %10 = OpTypeInt 32 0
+         %11 = OpConstant %10 0
+         %12 = OpConstant %10 1
+          %6 = OpTypeRuntimeArray %10
+          %7 = OpTypeStruct %6
+         %13 = OpTypePointer StorageBuffer %7
+         %14 = OpTypePointer StorageBuffer %6
+         %15 = OpTypePointer StorageBuffer %10
+          %2 = OpVariable %13 StorageBuffer
+          %3 = OpVariable %13 StorageBuffer
+          %4 = OpVariable %13 StorageBuffer
+          %5 = OpVariable %13 StorageBuffer
+         %16 = OpTypeFunction %8
+          %1 = OpFunction %8 None %16
+         %17 = OpLabel
+         %18 = OpCopyObject %10 %11
+         %19 = OpAccessChain %14 %2 %11
+         %20 = OpAccessChain %15 %2 %11 %11
+         %21 = OpAccessChain %14 %3 %11
+         %22 = OpAccessChain %15 %3 %11 %11
+         %23 = OpAccessChain %14 %4 %11
+         %24 = OpAccessChain %15 %4 %11 %11
+         %25 = OpPtrNotEqual %9 %2 %3
+         %26 = OpSelect %10 %25 %12 %11
+         %27 = OpAccessChain %15 %5 %11 %18
+         %28 = OpIAdd %10 %18 %12
+               OpStore %27 %26
+         %29 = OpPtrNotEqual %9 %19 %21
+         %30 = OpSelect %10 %29 %12 %11
+         %31 = OpAccessChain %15 %5 %11 %28
+         %32 = OpIAdd %10 %28 %12
+               OpStore %31 %30
+         %33 = OpPtrNotEqual %9 %20 %22
+         %34 = OpSelect %10 %33 %12 %11
+         %35 = OpAccessChain %15 %5 %11 %32
+         %36 = OpIAdd %10 %32 %12
+               OpStore %35 %34
+         %37 = OpPtrNotEqual %9 %2 %4
+         %38 = OpSelect %10 %37 %12 %11
+         %39 = OpAccessChain %15 %5 %11 %36
+         %40 = OpIAdd %10 %36 %12
+               OpStore %39 %38
+         %41 = OpPtrNotEqual %9 %19 %23
+         %42 = OpSelect %10 %41 %12 %11
+         %43 = OpAccessChain %15 %5 %11 %40
+         %44 = OpIAdd %10 %40 %12
+               OpStore %43 %42
+         %45 = OpPtrNotEqual %9 %20 %24
+         %46 = OpSelect %10 %45 %12 %11
+         %47 = OpAccessChain %15 %5 %11 %44
+         %48 = OpIAdd %10 %44 %12
+               OpStore %47 %46
+         %49 = OpPtrNotEqual %9 %3 %4
+         %50 = OpSelect %10 %49 %12 %11
+         %51 = OpAccessChain %15 %5 %11 %48
+         %52 = OpIAdd %10 %48 %12
+               OpStore %51 %50
+         %53 = OpPtrNotEqual %9 %21 %23
+         %54 = OpSelect %10 %53 %12 %11
+         %55 = OpAccessChain %15 %5 %11 %52
+         %56 = OpIAdd %10 %52 %12
+               OpStore %55 %54
+         %57 = OpPtrNotEqual %9 %22 %24
+         %58 = OpSelect %10 %57 %12 %11
+         %59 = OpAccessChain %15 %5 %11 %56
+         %60 = OpIAdd %10 %56 %12
+               OpStore %59 %58
+         %61 = OpPtrNotEqual %9 %2 %2
+         %62 = OpSelect %10 %61 %12 %11
+         %63 = OpAccessChain %15 %5 %11 %60
+               OpStore %63 %62
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp b/shaders-msl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp
new file mode 100644
index 00000000000..b4e622baced
--- /dev/null
+++ b/shaders-msl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp
@@ -0,0 +1,78 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 35
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values"
+               OpName %_ ""
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpName %A "A"
+               OpName %B "A"
+               OpName %C "A"
+               OpName %D "A"
+               OpName %E "A"
+               OpName %F "A"
+               OpName %G "A"
+               OpName %H "A"
+               OpName %I "A"
+               OpName %J "A"
+               OpName %K "A"
+               OpName %L "A"
+               OpDecorate %_runtimearr_int ArrayStride 4
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %A SpecId 0
+               OpDecorate %B SpecId 1
+               OpDecorate %C SpecId 2
+               OpDecorate %D SpecId 3
+               OpDecorate %E SpecId 4
+               OpDecorate %F SpecId 5
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_runtimearr_int = OpTypeRuntimeArray %int
+       %SSBO = OpTypeStruct %_runtimearr_int
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+          %A = OpSpecConstant %int 0
+          %B = OpSpecConstant %int 1
+          %C = OpSpecConstant %int 2
+          %D = OpSpecConstant %int 3
+          %E = OpSpecConstant %int 4
+          %F = OpSpecConstant %int 5
+          %G = OpSpecConstantOp %int ISub %A %B
+          %H = OpSpecConstantOp %int ISub %G %C
+          %I = OpSpecConstantOp %int ISub %H %D
+          %J = OpSpecConstantOp %int ISub %I %E
+          %K = OpSpecConstantOp %int ISub %J %F
+		  %L = OpSpecConstantOp %int IAdd %K %F
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %19 = OpLoad %uint %18
+         %32 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19
+               OpStore %32 %L
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp b/shaders-msl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp
similarity index 100%
rename from shaders-msl/asm/comp/storage-buffer-basic.invalid.asm.comp
rename to shaders-msl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp
diff --git a/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.argument.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.argument.msl2.asm.frag
new file mode 100644
index 00000000000..1a268acb2fa
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.argument.msl2.asm.frag
@@ -0,0 +1,50 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %UBOs "UBOs"
+               OpMemberName %UBOs 0 "v"
+               OpName %ubos "ubos"
+               OpDecorate %FragColor Location 0
+               OpMemberDecorate %UBOs 0 Offset 0
+               OpDecorate %UBOs Block
+               OpDecorate %ubos DescriptorSet 0
+               OpDecorate %ubos Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %UBOs = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+%_arr_UBOs_uint_2 = OpTypeArray %UBOs %uint_2
+%_ptr_Uniform__arr_UBOs_uint_2 = OpTypePointer Uniform %_arr_UBOs_uint_2
+%_ptr_Uniform_UBOs = OpTypePointer Uniform %UBOs
+       %ubos = OpVariable %_ptr_Uniform__arr_UBOs_uint_2 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+      %int_1 = OpConstant %int 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %ptr0_partial = OpAccessChain %_ptr_Uniform_UBOs %ubos %int_0
+		 %ptr0 = OpAccessChain %_ptr_Uniform_v4float %ptr0_partial %int_0
+         %ptr1_partial = OpAccessChain %_ptr_Uniform_UBOs %ubos %int_1
+		 %ptr1 = OpAccessChain %_ptr_Uniform_v4float %ptr1_partial %int_0
+         %20 = OpLoad %v4float %ptr0
+         %23 = OpLoad %v4float %ptr1
+         %24 = OpFAdd %v4float %20 %23
+               OpStore %FragColor %24
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.asm.frag b/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.asm.frag
new file mode 100644
index 00000000000..1a268acb2fa
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/access-chain-array-ubo-partial.asm.frag
@@ -0,0 +1,50 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %UBOs "UBOs"
+               OpMemberName %UBOs 0 "v"
+               OpName %ubos "ubos"
+               OpDecorate %FragColor Location 0
+               OpMemberDecorate %UBOs 0 Offset 0
+               OpDecorate %UBOs Block
+               OpDecorate %ubos DescriptorSet 0
+               OpDecorate %ubos Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %UBOs = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+%_arr_UBOs_uint_2 = OpTypeArray %UBOs %uint_2
+%_ptr_Uniform__arr_UBOs_uint_2 = OpTypePointer Uniform %_arr_UBOs_uint_2
+%_ptr_Uniform_UBOs = OpTypePointer Uniform %UBOs
+       %ubos = OpVariable %_ptr_Uniform__arr_UBOs_uint_2 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+      %int_1 = OpConstant %int 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %ptr0_partial = OpAccessChain %_ptr_Uniform_UBOs %ubos %int_0
+		 %ptr0 = OpAccessChain %_ptr_Uniform_v4float %ptr0_partial %int_0
+         %ptr1_partial = OpAccessChain %_ptr_Uniform_UBOs %ubos %int_1
+		 %ptr1 = OpAccessChain %_ptr_Uniform_v4float %ptr1_partial %int_0
+         %20 = OpLoad %v4float %ptr0
+         %23 = OpLoad %v4float %ptr1
+         %24 = OpFAdd %v4float %20 %23
+               OpStore %FragColor %24
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag b/shaders-msl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag
new file mode 100644
index 00000000000..6782b124730
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag
@@ -0,0 +1,83 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %_
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpMemberName %AA 0 "foo"
+               OpMemberName %AB 0 "foo"
+               OpMemberName %A 0 "_aa"
+               OpMemberName %A 1 "ab"
+               OpMemberName %BA 0 "foo"
+               OpMemberName %BB 0 "foo"
+               OpMemberName %B 0 "_ba"
+               OpMemberName %B 1 "bb"
+               OpName %VertexData "VertexData"
+               OpMemberName %VertexData 0 "_a"
+               OpMemberName %VertexData 1 "b"
+               OpName %_ ""
+               OpMemberName %CA 0 "foo"
+               OpMemberName %C 0 "_ca"
+               OpMemberName %DA 0 "foo"
+               OpMemberName %D 0 "da"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "_c"
+               OpMemberName %UBO 1 "d"
+               OpName %__0 ""
+               OpMemberName %E 0 "a"
+               OpName %SSBO "SSBO"
+               ;OpMemberName %SSBO 0 "e" Test that we don't try to assign bogus aliases.
+               OpMemberName %SSBO 1 "_e"
+               OpMemberName %SSBO 2 "f"
+               OpName %__1 ""
+               OpDecorate %VertexData Block
+               OpDecorate %_ Location 0
+               OpMemberDecorate %CA 0 Offset 0
+               OpMemberDecorate %C 0 Offset 0
+               OpMemberDecorate %DA 0 Offset 0
+               OpMemberDecorate %D 0 Offset 0
+               OpMemberDecorate %UBO 0 Offset 0
+               OpMemberDecorate %UBO 1 Offset 16
+               OpDecorate %UBO Block
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+               OpMemberDecorate %E 0 Offset 0
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpMemberDecorate %SSBO 2 Offset 8
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %__1 DescriptorSet 0
+               OpDecorate %__1 Binding 1
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+         %AA = OpTypeStruct %int
+         %AB = OpTypeStruct %int
+          %A = OpTypeStruct %AA %AB
+         %BA = OpTypeStruct %int
+         %BB = OpTypeStruct %int
+          %B = OpTypeStruct %BA %BB
+ %VertexData = OpTypeStruct %A %B
+%_ptr_Input_VertexData = OpTypePointer Input %VertexData
+          %_ = OpVariable %_ptr_Input_VertexData Input
+         %CA = OpTypeStruct %int
+          %C = OpTypeStruct %CA
+         %DA = OpTypeStruct %int
+          %D = OpTypeStruct %DA
+        %UBO = OpTypeStruct %C %D
+%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO
+        %__0 = OpVariable %_ptr_Uniform_UBO Uniform
+          %E = OpTypeStruct %int
+       %SSBO = OpTypeStruct %E %E %E
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+        %__1 = OpVariable %_ptr_Uniform_SSBO Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/component-insert-packed-expression.asm.frag b/shaders-msl-no-opt/asm/frag/component-insert-packed-expression.asm.frag
new file mode 100644
index 00000000000..fb7cdb07184
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/component-insert-packed-expression.asm.frag
@@ -0,0 +1,70 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 43
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %frag "main" %gl_FragCoord %out_var_SV_Target
+               OpExecutionMode %frag OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type__Globals "type.$Globals"
+               OpMemberName %type__Globals 0 "_BorderWidths"
+               OpName %_Globals "$Globals"
+               OpName %out_var_SV_Target "out.var.SV_Target"
+               OpName %frag "frag"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %out_var_SV_Target Location 0
+               OpDecorate %_Globals DescriptorSet 0
+               OpDecorate %_Globals Binding 0
+               OpDecorate %_arr_float_uint_4 ArrayStride 16
+               OpMemberDecorate %type__Globals 0 Offset 0
+               OpDecorate %type__Globals Block
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+      %float = OpTypeFloat 32
+    %float_0 = OpConstant %float 0
+      %int_2 = OpConstant %int 2
+       %uint = OpTypeInt 32 0
+    %float_1 = OpConstant %float 1
+     %uint_4 = OpConstant %uint 4
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%type__Globals = OpTypeStruct %_arr_float_uint_4
+%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %21 = OpTypeFunction %void
+    %v2float = OpTypeVector %float 2
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+       %bool = OpTypeBool
+   %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%out_var_SV_Target = OpVariable %_ptr_Output_v4float Output
+       %frag = OpFunction %void None %21
+         %25 = OpLabel
+         %26 = OpLoad %v4float %gl_FragCoord
+         %27 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_0
+         %28 = OpLoad %float %27
+         %29 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_1
+         %30 = OpLoad %float %29
+         %31 = OpCompositeConstruct %v2float %28 %30
+         %32 = OpCompositeExtract %float %26 0
+         %33 = OpFOrdGreaterThan %bool %32 %float_0
+               OpSelectionMerge %34 None
+               OpBranchConditional %33 %35 %34
+         %35 = OpLabel
+         %36 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_2
+         %37 = OpLoad %float %36
+         %38 = OpCompositeInsert %v2float %37 %31 0
+               OpBranch %34
+         %34 = OpLabel
+         %39 = OpPhi %v2float %31 %25 %38 %35
+         %40 = OpCompositeExtract %float %39 0
+         %41 = OpCompositeExtract %float %39 1
+         %42 = OpCompositeConstruct %v4float %40 %41 %float_0 %float_1
+               OpStore %out_var_SV_Target %42
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/composite-insert-inheritance.asm.frag b/shaders-msl-no-opt/asm/frag/composite-insert-inheritance.asm.frag
new file mode 100644
index 00000000000..9408e69ac09
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/composite-insert-inheritance.asm.frag
@@ -0,0 +1,127 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vInput %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %vInput "vInput"
+               OpName %FragColor "FragColor"
+			   OpName %phi "PHI"
+               OpDecorate %vInput RelaxedPrecision
+               OpDecorate %vInput Location 0
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+			   OpDecorate %b0 RelaxedPrecision
+			   OpDecorate %b1 RelaxedPrecision
+			   OpDecorate %b2 RelaxedPrecision
+			   OpDecorate %b3 RelaxedPrecision
+			   OpDecorate %c1 RelaxedPrecision
+			   OpDecorate %c3 RelaxedPrecision
+			   OpDecorate %d4_mp RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+     %vInput = OpVariable %_ptr_Input_v4float Input
+    %float_1 = OpConstant %float 1
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Function_float = OpTypePointer Function %float
+    %float_2 = OpConstant %float 2
+     %uint_1 = OpConstant %uint 1
+    %float_3 = OpConstant %float 3
+     %uint_2 = OpConstant %uint 2
+    %float_4 = OpConstant %float 4
+     %uint_3 = OpConstant %uint 3
+	 %v4float_arr2 = OpTypeArray %v4float %uint_2
+	 %v44float = OpTypeMatrix %v4float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+	%v4undef = OpUndef %v4float
+	%v4const = OpConstantNull %v4float
+	%v4arrconst = OpConstantNull %v4float_arr2
+	%v44const = OpConstantNull %v44float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+
+         %loaded0 = OpLoad %v4float %vInput
+
+		; Basic case (highp).
+         %a0 = OpCompositeInsert %v4float %float_1 %loaded0 0
+         %a1 = OpCompositeInsert %v4float %float_2 %a0 1
+         %a2 = OpCompositeInsert %v4float %float_3 %a1 2
+         %a3 = OpCompositeInsert %v4float %float_4 %a2 3
+		 	OpStore %FragColor %a3
+
+		; Basic case (mediump).
+         %b0 = OpCompositeInsert %v4float %float_1 %loaded0 0
+         %b1 = OpCompositeInsert %v4float %float_2 %b0 1
+         %b2 = OpCompositeInsert %v4float %float_3 %b1 2
+         %b3 = OpCompositeInsert %v4float %float_4 %b2 3
+		 	OpStore %FragColor %b3
+
+		; Mix relaxed precision.
+         %c0 = OpCompositeInsert %v4float %float_1 %loaded0 0
+         %c1 = OpCompositeInsert %v4float %float_2 %c0 1
+         %c2 = OpCompositeInsert %v4float %float_3 %c1 2
+         %c3 = OpCompositeInsert %v4float %float_4 %c2 3
+		 	OpStore %FragColor %c3
+
+		; SSA use after insert
+         %d0 = OpCompositeInsert %v4float %float_1 %loaded0 0
+         %d1 = OpCompositeInsert %v4float %float_2 %d0 1
+         %d2 = OpCompositeInsert %v4float %float_3 %d1 2
+         %d3 = OpCompositeInsert %v4float %float_4 %d2 3
+		 %d4 = OpFAdd %v4float %d3 %d0
+		 	OpStore %FragColor %d4
+		 %d4_mp = OpFAdd %v4float %d3 %d1
+		 	OpStore %FragColor %d4_mp
+
+		; Verify Insert behavior on Undef.
+		  %e0 = OpCompositeInsert %v4float %float_1 %v4undef 0
+		  %e1 = OpCompositeInsert %v4float %float_2 %e0 1
+		  %e2 = OpCompositeInsert %v4float %float_3 %e1 2
+		  %e3 = OpCompositeInsert %v4float %float_4 %e2 3
+		 	OpStore %FragColor %e3
+
+		; Verify Insert behavior on Constant.
+		  %f0 = OpCompositeInsert %v4float %float_1 %v4const 0
+		 	OpStore %FragColor %f0
+
+		; Verify Insert behavior on Array.
+		  %g0 = OpCompositeInsert %v4float_arr2 %float_1 %v4arrconst 1 2
+		  %g1 = OpCompositeInsert %v4float_arr2 %float_2 %g0 0 3
+		  %g2 = OpCompositeExtract %v4float %g1 0
+		 	OpStore %FragColor %g2
+		  %g3 = OpCompositeExtract %v4float %g1 1
+		 	OpStore %FragColor %g3
+
+		; Verify Insert behavior on Matrix.
+		  %h0 = OpCompositeInsert %v44float %float_1 %v44const 1 2
+		  %h1 = OpCompositeInsert %v44float %float_2 %h0 2 3
+		  %h2 = OpCompositeExtract %v4float %h1 0
+		 	OpStore %FragColor %h2
+		  %h3 = OpCompositeExtract %v4float %h1 1
+		 	OpStore %FragColor %h3
+		  %h4 = OpCompositeExtract %v4float %h1 2
+		 	OpStore %FragColor %h4
+		  %h5 = OpCompositeExtract %v4float %h1 3
+		 	OpStore %FragColor %h5
+
+		; Verify that we cannot RMW PHI variables.
+		OpBranch %next
+		%next = OpLabel
+		%phi = OpPhi %v4float %d2 %5
+         %i0 = OpCompositeInsert %v4float %float_4 %phi 3
+		 	OpStore %FragColor %i0
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/constant-composite-block-no-array-stride-2.asm.frag b/shaders-msl-no-opt/asm/frag/constant-composite-block-no-array-stride-2.asm.frag
new file mode 100644
index 00000000000..d4bf014bbfa
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/constant-composite-block-no-array-stride-2.asm.frag
@@ -0,0 +1,33 @@
+OpCapability Shader
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %frag_out
+OpExecutionMode %main OriginUpperLeft
+OpDecorate %frag_out Location 0
+OpMemberDecorate %type 1 Offset 0
+%void = OpTypeVoid
+%float = OpTypeFloat 32
+%uint = OpTypeInt 32 0
+%uint_2 = OpConstant %uint 2
+%const_1 = OpConstant %float 1.0
+%const_2 = OpConstant %float 2.0
+%const_3 = OpConstant %float 3.0
+%const_4 = OpConstant %float 4.0
+%const_5 = OpConstant %float 5.0
+%const_6 = OpConstant %float 6.0
+%arr_float_2 = OpTypeArray %float %uint_2
+%const_arr0 = OpConstantComposite %arr_float_2 %const_1 %const_2
+%const_arr1 = OpConstantComposite %arr_float_2 %const_3 %const_4
+%const_arr2 = OpConstantComposite %arr_float_2 %const_5 %const_6
+%type = OpTypeStruct %arr_float_2 %arr_float_2 %arr_float_2
+%float_ptr = OpTypePointer Output %float
+%const_var = OpConstantComposite %type %const_arr0 %const_arr1 %const_arr2
+%type_ptr = OpTypePointer Function %type
+%frag_out = OpVariable %float_ptr Output
+%main_func = OpTypeFunction %void
+%main = OpFunction %void None %main_func
+%label = OpLabel
+%var = OpVariable %type_ptr Function
+OpStore %var %const_var
+OpStore %frag_out %const_1
+OpReturn
+OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag b/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag
new file mode 100644
index 00000000000..ebab7fd0c97
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/copy-memory-block-like-thread-local.asm.frag
@@ -0,0 +1,50 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google rspirv; 0
+; Bound: 43
+; Schema: 0
+               OpCapability ImageQuery
+               OpCapability Int8
+               OpCapability RuntimeDescriptorArray
+               OpCapability StorageImageWriteWithoutFormat
+               OpCapability Shader
+               OpCapability VulkanMemoryModel
+               OpExtension "SPV_EXT_descriptor_indexing"
+               OpExtension "SPV_KHR_vulkan_memory_model"
+               OpMemoryModel Logical Vulkan
+               OpEntryPoint Fragment %1 "main"
+               OpExecutionMode %1 OriginUpperLeft
+               OpDecorate %2 ArrayStride 4
+               OpMemberDecorate %3 0 Offset 0
+          %4 = OpTypeInt 32 0
+          %5 = OpTypeFloat 32
+          %6 = OpTypePointer Function %5
+          %7 = OpTypeVoid
+          %8 = OpTypeFunction %7
+          %9 = OpConstant %4 0
+         %10 = OpConstant %4 1
+         %11 = OpConstant %4 2
+         %12 = OpConstant %4 4
+         %13 = OpConstant %4 3
+         %14 = OpConstant %5 0
+          %2 = OpTypeArray %5 %12
+         %15 = OpTypePointer Function %2
+         %16 = OpTypeFunction %7 %15
+          %3 = OpTypeStruct %2
+         %17 = OpTypePointer Function %3
+          %1 = OpFunction %7 None %8
+         %31 = OpLabel
+         %33 = OpVariable %17 Function
+         %34 = OpVariable %15 Function
+         %39 = OpAccessChain %6 %34 %9
+               OpStore %39 %14
+         %40 = OpAccessChain %6 %34 %10
+               OpStore %40 %14
+         %41 = OpAccessChain %6 %34 %11
+               OpStore %41 %14
+         %42 = OpAccessChain %6 %34 %13
+               OpStore %42 %14
+         %37 = OpAccessChain %15 %33 %9
+               OpCopyMemory %37 %34
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/empty-struct-in-struct.asm.frag b/shaders-msl-no-opt/asm/frag/empty-struct-in-struct.asm.frag
new file mode 100644
index 00000000000..a9650ddbb6b
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/empty-struct-in-struct.asm.frag
@@ -0,0 +1,61 @@
+; SPIR-V
+; Version: 1.2
+; Generator: Khronos; 0
+; Bound: 43
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %EntryPoint_Main "main"
+               OpExecutionMode %EntryPoint_Main OriginUpperLeft
+               OpSource Unknown 100
+               OpName %EmptyStructTest "EmptyStructTest"
+               OpName %EmptyStruct2Test "EmptyStruct2Test"
+               OpName %GetValue "GetValue"
+               OpName %GetValue2 "GetValue"
+               OpName %self "self"
+               OpName %self2 "self"
+               OpName %emptyStruct "emptyStruct"
+               OpName %value "value"
+               OpName %EntryPoint_Main "EntryPoint_Main"
+
+%EmptyStructTest = OpTypeStruct
+%EmptyStruct2Test = OpTypeStruct %EmptyStructTest
+%_ptr_Function_EmptyStruct2Test = OpTypePointer Function %EmptyStruct2Test
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+          %5 = OpTypeFunction %float %_ptr_Function_EmptyStruct2Test
+          %6 = OpTypeFunction %float %EmptyStruct2Test
+       %void = OpTypeVoid
+%_ptr_Function_void = OpTypePointer Function %void
+          %8 = OpTypeFunction %void %_ptr_Function_EmptyStruct2Test
+          %9 = OpTypeFunction %void
+    %float_0 = OpConstant %float 0
+	  %value4 = OpConstantNull %EmptyStruct2Test
+
+   %GetValue = OpFunction %float None %5
+       %self = OpFunctionParameter %_ptr_Function_EmptyStruct2Test
+         %13 = OpLabel
+               OpReturnValue %float_0
+               OpFunctionEnd
+
+   %GetValue2 = OpFunction %float None %6
+       %self2 = OpFunctionParameter %EmptyStruct2Test
+         %14 = OpLabel
+               OpReturnValue %float_0
+               OpFunctionEnd
+
+%EntryPoint_Main = OpFunction %void None %9
+         %37 = OpLabel
+     %emptyStruct = OpVariable %_ptr_Function_EmptyStruct2Test Function
+         %18 = OpVariable %_ptr_Function_EmptyStruct2Test Function
+      %value = OpVariable %_ptr_Function_float Function
+	  %value2 = OpCompositeConstruct %EmptyStructTest
+	  %value3 = OpCompositeConstruct %EmptyStruct2Test %value2
+         %22 = OpFunctionCall %float %GetValue %emptyStruct
+         %23 = OpFunctionCall %float %GetValue2 %value3
+         %24 = OpFunctionCall %float %GetValue2 %value4
+               OpStore %value %22
+               OpStore %value %23
+               OpStore %value %24
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/fixup-entry-point-identifier.nomain.asm.frag b/shaders-msl-no-opt/asm/frag/fixup-entry-point-identifier.nomain.asm.frag
new file mode 100644
index 00000000000..2f522f44b57
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/fixup-entry-point-identifier.nomain.asm.frag
@@ -0,0 +1,27 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 12
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %foo "_5ma@@in" %FragColor
+               OpExecutionMode %foo OriginUpperLeft
+               OpSource GLSL 450
+               OpName %foo "FOO"
+               OpName %FragColor "FragColor"
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+         %11 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+       %foo = OpFunction %void None %3
+          %5 = OpLabel
+               OpStore %FragColor %11
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/image-gather.asm.frag b/shaders-msl-no-opt/asm/frag/image-gather.asm.frag
new file mode 100644
index 00000000000..f26bb07264e
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/image-gather.asm.frag
@@ -0,0 +1,74 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google spiregg; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+               OpExtension "SPV_GOOGLE_user_type"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %psMain "main" %gl_FragCoord %in_var_TEXCOORD0 %out_var_SV_Target0
+               OpExecutionMode %psMain OriginUpperLeft
+               OpSource HLSL 500
+               OpName %type_2d_image "type.2d.image"
+               OpName %g_texture "g_texture"
+               OpName %type_sampler "type.sampler"
+               OpName %g_sampler "g_sampler"
+               OpName %g_comp "g_comp"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %psMain "psMain"
+               OpName %type_sampled_image "type.sampled.image"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_Position"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %in_var_TEXCOORD0 Location 0
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %g_texture DescriptorSet 0
+               OpDecorate %g_texture Binding 0
+               OpDecorate %g_sampler DescriptorSet 0
+               OpDecorate %g_sampler Binding 0
+               OpDecorate %g_comp DescriptorSet 0
+               OpDecorate %g_comp Binding 1
+               OpDecorateString %g_texture UserTypeGOOGLE "texture2d"
+      %float = OpTypeFloat 32
+  %float_0_5 = OpConstant %float 0.5
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+      %v2int = OpTypeVector %int 2
+         %16 = OpConstantComposite %v2int %int_0 %int_0
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %25 = OpTypeFunction %void
+%type_sampled_image = OpTypeSampledImage %type_2d_image
+%g_texture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+  %g_sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+     %g_comp = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input_v2float Input
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+     %psMain = OpFunction %void None %25
+         %26 = OpLabel
+         %27 = OpLoad %v2float %in_var_TEXCOORD0
+         %28 = OpLoad %type_2d_image %g_texture
+         %29 = OpLoad %type_sampler %g_comp
+         %30 = OpSampledImage %type_sampled_image %28 %29
+         %32 = OpLoad %type_sampler %g_sampler
+         %33 = OpSampledImage %type_sampled_image %28 %32
+         %31 = OpImageGather %v4float %33 %27 %int_1 ConstOffset %16
+         %34 = OpImageGather %v4float %33 %27 %int_0 ConstOffset %16
+         %35 = OpFMul %v4float %34 %31
+               OpStore %out_var_SV_Target0 %35
+               OpReturn
+               OpFunctionEnd
+
diff --git a/shaders-msl-no-opt/asm/frag/in_block_with_aliased_struct_and_name.asm.frag b/shaders-msl-no-opt/asm/frag/in_block_with_aliased_struct_and_name.asm.frag
new file mode 100644
index 00000000000..1840c9b1370
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/in_block_with_aliased_struct_and_name.asm.frag
@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 40
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %foos %bars
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %Foo "Foo"
+               OpMemberName %Foo 0 "a"
+               OpMemberName %Foo 1 "b"
+               OpName %foos "ALIAS"
+               OpName %bars "ALIAS"
+               OpDecorate %FragColor Location 0
+               OpDecorate %foos Location 1
+               OpDecorate %bars Location 10
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+        %Foo = OpTypeStruct %float %float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_Foo_uint_4 = OpTypeArray %Foo %uint_4
+%_ptr_Input__arr_Foo_uint_4 = OpTypePointer Input %_arr_Foo_uint_4
+       %foos = OpVariable %_ptr_Input__arr_Foo_uint_4 Input
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_0 = OpConstant %uint 0
+%_ptr_Output_float = OpTypePointer Output %float
+      %int_1 = OpConstant %int 1
+     %uint_1 = OpConstant %uint 1
+      %int_2 = OpConstant %int 2
+     %uint_2 = OpConstant %uint 2
+       %bars = OpVariable %_ptr_Input__arr_Foo_uint_4 Input
+      %int_3 = OpConstant %int 3
+     %uint_3 = OpConstant %uint 3
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpAccessChain %_ptr_Input_float %foos %int_0 %int_0
+         %20 = OpLoad %float %19
+         %23 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+               OpStore %23 %20
+         %25 = OpAccessChain %_ptr_Input_float %foos %int_1 %int_1
+         %26 = OpLoad %float %25
+         %28 = OpAccessChain %_ptr_Output_float %FragColor %uint_1
+               OpStore %28 %26
+         %30 = OpAccessChain %_ptr_Input_float %foos %int_2 %int_0
+         %31 = OpLoad %float %30
+         %33 = OpAccessChain %_ptr_Output_float %FragColor %uint_2
+               OpStore %33 %31
+         %36 = OpAccessChain %_ptr_Input_float %bars %int_3 %int_1
+         %37 = OpLoad %float %36
+         %39 = OpAccessChain %_ptr_Output_float %FragColor %uint_3
+               OpStore %39 %37
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag b/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
deleted file mode 100644
index 8b09e5b68f8..00000000000
--- a/shaders-msl-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag
+++ /dev/null
@@ -1,646 +0,0 @@
-; SPIR-V
-; Version: 1.0
-; Generator: Khronos Glslang Reference Front End; 3
-; Bound: 1532
-; Schema: 0
-               OpCapability Shader
-          %1 = OpExtInstImport "GLSL.std.450"
-               OpMemoryModel Logical GLSL450
-               OpEntryPoint Fragment %main "main" %IN_HPosition %IN_Uv_EdgeDistance1 %IN_UvStuds_EdgeDistance2 %IN_Color %IN_LightPosition_Fog %IN_View_Depth %IN_Normal_SpecPower %IN_Tangent %IN_PosLightSpace_Reflectance %IN_studIndex %_entryPointOutput
-               OpExecutionMode %main OriginUpperLeft
-               OpSource HLSL 500
-               OpName %main "main"
-               OpName %VertexOutput "VertexOutput"
-               OpMemberName %VertexOutput 0 "HPosition"
-               OpMemberName %VertexOutput 1 "Uv_EdgeDistance1"
-               OpMemberName %VertexOutput 2 "UvStuds_EdgeDistance2"
-               OpMemberName %VertexOutput 3 "Color"
-               OpMemberName %VertexOutput 4 "LightPosition_Fog"
-               OpMemberName %VertexOutput 5 "View_Depth"
-               OpMemberName %VertexOutput 6 "Normal_SpecPower"
-               OpMemberName %VertexOutput 7 "Tangent"
-               OpMemberName %VertexOutput 8 "PosLightSpace_Reflectance"
-               OpMemberName %VertexOutput 9 "studIndex"
-               OpName %Surface "Surface"
-               OpMemberName %Surface 0 "albedo"
-               OpMemberName %Surface 1 "normal"
-               OpMemberName %Surface 2 "specular"
-               OpMemberName %Surface 3 "gloss"
-               OpMemberName %Surface 4 "reflectance"
-               OpMemberName %Surface 5 "opacity"
-               OpName %SurfaceInput "SurfaceInput"
-               OpMemberName %SurfaceInput 0 "Color"
-               OpMemberName %SurfaceInput 1 "Uv"
-               OpMemberName %SurfaceInput 2 "UvStuds"
-               OpName %Globals "Globals"
-               OpMemberName %Globals 0 "ViewProjection"
-               OpMemberName %Globals 1 "ViewRight"
-               OpMemberName %Globals 2 "ViewUp"
-               OpMemberName %Globals 3 "ViewDir"
-               OpMemberName %Globals 4 "CameraPosition"
-               OpMemberName %Globals 5 "AmbientColor"
-               OpMemberName %Globals 6 "Lamp0Color"
-               OpMemberName %Globals 7 "Lamp0Dir"
-               OpMemberName %Globals 8 "Lamp1Color"
-               OpMemberName %Globals 9 "FogParams"
-               OpMemberName %Globals 10 "FogColor"
-               OpMemberName %Globals 11 "LightBorder"
-               OpMemberName %Globals 12 "LightConfig0"
-               OpMemberName %Globals 13 "LightConfig1"
-               OpMemberName %Globals 14 "LightConfig2"
-               OpMemberName %Globals 15 "LightConfig3"
-               OpMemberName %Globals 16 "RefractionBias_FadeDistance_GlowFactor"
-               OpMemberName %Globals 17 "OutlineBrightness_ShadowInfo"
-               OpMemberName %Globals 18 "ShadowMatrix0"
-               OpMemberName %Globals 19 "ShadowMatrix1"
-               OpMemberName %Globals 20 "ShadowMatrix2"
-               OpName %CB0 "CB0"
-               OpMemberName %CB0 0 "CB0"
-               OpName %_ ""
-               OpName %LightMapTexture "LightMapTexture"
-               OpName %LightMapSampler "LightMapSampler"
-               OpName %ShadowMapSampler "ShadowMapSampler"
-               OpName %ShadowMapTexture "ShadowMapTexture"
-               OpName %EnvironmentMapTexture "EnvironmentMapTexture"
-               OpName %EnvironmentMapSampler "EnvironmentMapSampler"
-               OpName %IN_HPosition "IN.HPosition"
-               OpName %IN_Uv_EdgeDistance1 "IN.Uv_EdgeDistance1"
-               OpName %IN_UvStuds_EdgeDistance2 "IN.UvStuds_EdgeDistance2"
-               OpName %IN_Color "IN.Color"
-               OpName %IN_LightPosition_Fog "IN.LightPosition_Fog"
-               OpName %IN_View_Depth "IN.View_Depth"
-               OpName %IN_Normal_SpecPower "IN.Normal_SpecPower"
-               OpName %IN_Tangent "IN.Tangent"
-               OpName %IN_PosLightSpace_Reflectance "IN.PosLightSpace_Reflectance"
-               OpName %IN_studIndex "IN.studIndex"
-               OpName %_entryPointOutput "@entryPointOutput"
-               OpName %DiffuseMapSampler "DiffuseMapSampler"
-               OpName %DiffuseMapTexture "DiffuseMapTexture"
-               OpName %NormalMapSampler "NormalMapSampler"
-               OpName %NormalMapTexture "NormalMapTexture"
-               OpName %NormalDetailMapTexture "NormalDetailMapTexture"
-               OpName %NormalDetailMapSampler "NormalDetailMapSampler"
-               OpName %StudsMapTexture "StudsMapTexture"
-               OpName %StudsMapSampler "StudsMapSampler"
-               OpName %SpecularMapSampler "SpecularMapSampler"
-               OpName %SpecularMapTexture "SpecularMapTexture"
-               OpName %Params "Params"
-               OpMemberName %Params 0 "LqmatFarTilingFactor"
-               OpName %CB2 "CB2"
-               OpMemberName %CB2 0 "CB2"
-               OpMemberDecorate %Globals 0 ColMajor
-               OpMemberDecorate %Globals 0 Offset 0
-               OpMemberDecorate %Globals 0 MatrixStride 16
-               OpMemberDecorate %Globals 1 Offset 64
-               OpMemberDecorate %Globals 2 Offset 80
-               OpMemberDecorate %Globals 3 Offset 96
-               OpMemberDecorate %Globals 4 Offset 112
-               OpMemberDecorate %Globals 5 Offset 128
-               OpMemberDecorate %Globals 6 Offset 144
-               OpMemberDecorate %Globals 7 Offset 160
-               OpMemberDecorate %Globals 8 Offset 176
-               OpMemberDecorate %Globals 9 Offset 192
-               OpMemberDecorate %Globals 10 Offset 208
-               OpMemberDecorate %Globals 11 Offset 224
-               OpMemberDecorate %Globals 12 Offset 240
-               OpMemberDecorate %Globals 13 Offset 256
-               OpMemberDecorate %Globals 14 Offset 272
-               OpMemberDecorate %Globals 15 Offset 288
-               OpMemberDecorate %Globals 16 Offset 304
-               OpMemberDecorate %Globals 17 Offset 320
-               OpMemberDecorate %Globals 18 Offset 336
-               OpMemberDecorate %Globals 19 Offset 352
-               OpMemberDecorate %Globals 20 Offset 368
-               OpMemberDecorate %CB0 0 Offset 0
-               OpDecorate %CB0 Block
-               OpDecorate %_ DescriptorSet 0
-               OpDecorate %_ Binding 0
-               OpDecorate %LightMapTexture DescriptorSet 1
-               OpDecorate %LightMapTexture Binding 6
-               OpDecorate %LightMapSampler DescriptorSet 1
-               OpDecorate %LightMapSampler Binding 6
-               OpDecorate %ShadowMapSampler DescriptorSet 1
-               OpDecorate %ShadowMapSampler Binding 1
-               OpDecorate %ShadowMapTexture DescriptorSet 1
-               OpDecorate %ShadowMapTexture Binding 1
-               OpDecorate %EnvironmentMapTexture DescriptorSet 1
-               OpDecorate %EnvironmentMapTexture Binding 2
-               OpDecorate %EnvironmentMapSampler DescriptorSet 1
-               OpDecorate %EnvironmentMapSampler Binding 2
-               OpDecorate %IN_HPosition BuiltIn FragCoord
-               OpDecorate %IN_Uv_EdgeDistance1 Location 0
-               OpDecorate %IN_UvStuds_EdgeDistance2 Location 1
-               OpDecorate %IN_Color Location 2
-               OpDecorate %IN_LightPosition_Fog Location 3
-               OpDecorate %IN_View_Depth Location 4
-               OpDecorate %IN_Normal_SpecPower Location 5
-               OpDecorate %IN_Tangent Location 6
-               OpDecorate %IN_PosLightSpace_Reflectance Location 7
-               OpDecorate %IN_studIndex Location 8
-               OpDecorate %_entryPointOutput Location 0
-               OpDecorate %DiffuseMapSampler DescriptorSet 1
-               OpDecorate %DiffuseMapSampler Binding 3
-               OpDecorate %DiffuseMapTexture DescriptorSet 1
-               OpDecorate %DiffuseMapTexture Binding 3
-               OpDecorate %NormalMapSampler DescriptorSet 1
-               OpDecorate %NormalMapSampler Binding 4
-               OpDecorate %NormalMapTexture DescriptorSet 1
-               OpDecorate %NormalMapTexture Binding 4
-               OpDecorate %NormalDetailMapTexture DescriptorSet 1
-               OpDecorate %NormalDetailMapTexture Binding 8
-               OpDecorate %NormalDetailMapSampler DescriptorSet 1
-               OpDecorate %NormalDetailMapSampler Binding 8
-               OpDecorate %StudsMapTexture DescriptorSet 1
-               OpDecorate %StudsMapTexture Binding 0
-               OpDecorate %StudsMapSampler DescriptorSet 1
-               OpDecorate %StudsMapSampler Binding 0
-               OpDecorate %SpecularMapSampler DescriptorSet 1
-               OpDecorate %SpecularMapSampler Binding 5
-               OpDecorate %SpecularMapTexture DescriptorSet 1
-               OpDecorate %SpecularMapTexture Binding 5
-               OpMemberDecorate %Params 0 Offset 0
-               OpMemberDecorate %CB2 0 Offset 0
-               OpDecorate %CB2 Block
-       %void = OpTypeVoid
-          %3 = OpTypeFunction %void
-      %float = OpTypeFloat 32
-%_ptr_Function_float = OpTypePointer Function %float
-          %8 = OpTypeFunction %float %_ptr_Function_float
-    %v4float = OpTypeVector %float 4
-%_ptr_Function_v4float = OpTypePointer Function %v4float
-    %v3float = OpTypeVector %float 3
-         %18 = OpTypeFunction %v3float %_ptr_Function_v4float
-%_ptr_Function_v3float = OpTypePointer Function %v3float
-         %23 = OpTypeFunction %v4float %_ptr_Function_v3float
-         %27 = OpTypeFunction %float %_ptr_Function_v3float
-         %31 = OpTypeFunction %float %_ptr_Function_float %_ptr_Function_float
-         %36 = OpTypeSampler
-%_ptr_Function_36 = OpTypePointer Function %36
-         %38 = OpTypeImage %float 2D 0 0 0 1 Unknown
-%_ptr_Function_38 = OpTypePointer Function %38
-         %40 = OpTypeFunction %float %_ptr_Function_36 %_ptr_Function_38 %_ptr_Function_v3float %_ptr_Function_float
-%VertexOutput = OpTypeStruct %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v3float %v4float %float
-%_ptr_Function_VertexOutput = OpTypePointer Function %VertexOutput
-    %Surface = OpTypeStruct %v3float %v3float %float %float %float %float
-         %50 = OpTypeFunction %Surface %_ptr_Function_VertexOutput
-         %54 = OpTypeFunction %v4float %_ptr_Function_VertexOutput
-    %v2float = OpTypeVector %float 2
-%_ptr_Function_v2float = OpTypePointer Function %v2float
-         %60 = OpTypeFunction %v4float %_ptr_Function_36 %_ptr_Function_38 %_ptr_Function_v2float %_ptr_Function_float %_ptr_Function_float
-%SurfaceInput = OpTypeStruct %v4float %v2float %v2float
-%_ptr_Function_SurfaceInput = OpTypePointer Function %SurfaceInput
-         %70 = OpTypeFunction %Surface %_ptr_Function_SurfaceInput %_ptr_Function_v2float
-    %float_0 = OpConstant %float 0
-    %float_1 = OpConstant %float 1
-    %float_2 = OpConstant %float 2
-%mat4v4float = OpTypeMatrix %v4float 4
-    %Globals = OpTypeStruct %mat4v4float %v4float %v4float %v4float %v3float %v3float %v3float %v3float %v3float %v4float %v3float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float
-        %CB0 = OpTypeStruct %Globals
-%_ptr_Uniform_CB0 = OpTypePointer Uniform %CB0
-          %_ = OpVariable %_ptr_Uniform_CB0 Uniform
-        %int = OpTypeInt 32 1
-      %int_0 = OpConstant %int 0
-     %int_15 = OpConstant %int 15
-%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
-     %int_14 = OpConstant %int 14
-        %128 = OpConstantComposite %v3float %float_1 %float_1 %float_1
-        %133 = OpTypeImage %float 3D 0 0 0 1 Unknown
-%_ptr_UniformConstant_133 = OpTypePointer UniformConstant %133
-%LightMapTexture = OpVariable %_ptr_UniformConstant_133 UniformConstant
-%_ptr_UniformConstant_36 = OpTypePointer UniformConstant %36
-%LightMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant
-        %140 = OpTypeSampledImage %133
-     %int_11 = OpConstant %int 11
-       %uint = OpTypeInt 32 0
-    %float_9 = OpConstant %float 9
-   %float_20 = OpConstant %float 20
-  %float_0_5 = OpConstant %float 0.5
-        %183 = OpTypeSampledImage %38
-     %uint_0 = OpConstant %uint 0
-     %uint_1 = OpConstant %uint 1
-     %int_17 = OpConstant %int 17
-     %uint_3 = OpConstant %uint 3
-%_ptr_Uniform_float = OpTypePointer Uniform %float
- %float_0_25 = OpConstant %float 0.25
-      %int_5 = OpConstant %int 5
-%float_0_00333333 = OpConstant %float 0.00333333
-     %int_16 = OpConstant %int 16
-%_ptr_Function_Surface = OpTypePointer Function %Surface
-      %int_6 = OpConstant %int 6
-      %int_7 = OpConstant %int 7
-%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
-      %int_8 = OpConstant %int 8
-%ShadowMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant
-%_ptr_UniformConstant_38 = OpTypePointer UniformConstant %38
-%ShadowMapTexture = OpVariable %_ptr_UniformConstant_38 UniformConstant
-        %367 = OpTypeImage %float Cube 0 0 0 1 Unknown
-%_ptr_UniformConstant_367 = OpTypePointer UniformConstant %367
-%EnvironmentMapTexture = OpVariable %_ptr_UniformConstant_367 UniformConstant
-%EnvironmentMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant
-        %373 = OpTypeSampledImage %367
-  %float_1_5 = OpConstant %float 1.5
-     %int_10 = OpConstant %int 10
-%_ptr_Input_v4float = OpTypePointer Input %v4float
-%IN_HPosition = OpVariable %_ptr_Input_v4float Input
-%IN_Uv_EdgeDistance1 = OpVariable %_ptr_Input_v4float Input
-%IN_UvStuds_EdgeDistance2 = OpVariable %_ptr_Input_v4float Input
-   %IN_Color = OpVariable %_ptr_Input_v4float Input
-%IN_LightPosition_Fog = OpVariable %_ptr_Input_v4float Input
-%IN_View_Depth = OpVariable %_ptr_Input_v4float Input
-%IN_Normal_SpecPower = OpVariable %_ptr_Input_v4float Input
-%_ptr_Input_v3float = OpTypePointer Input %v3float
- %IN_Tangent = OpVariable %_ptr_Input_v3float Input
-%IN_PosLightSpace_Reflectance = OpVariable %_ptr_Input_v4float Input
-%_ptr_Input_float = OpTypePointer Input %float
-%IN_studIndex = OpVariable %_ptr_Input_float Input
-%_ptr_Output_v4float = OpTypePointer Output %v4float
-%_entryPointOutput = OpVariable %_ptr_Output_v4float Output
-       %bool = OpTypeBool
-%DiffuseMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant
-%DiffuseMapTexture = OpVariable %_ptr_UniformConstant_38 UniformConstant
-%NormalMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant
-%NormalMapTexture = OpVariable %_ptr_UniformConstant_38 UniformConstant
-%NormalDetailMapTexture = OpVariable %_ptr_UniformConstant_38 UniformConstant
-%NormalDetailMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant
-  %float_0_3 = OpConstant %float 0.3
-%StudsMapTexture = OpVariable %_ptr_UniformConstant_38 UniformConstant
-%StudsMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant
-%SpecularMapSampler = OpVariable %_ptr_UniformConstant_36 UniformConstant
-%SpecularMapTexture = OpVariable %_ptr_UniformConstant_38 UniformConstant
- %float_0_75 = OpConstant %float 0.75
-  %float_256 = OpConstant %float 256
-        %689 = OpConstantComposite %v2float %float_2 %float_256
- %float_0_01 = OpConstant %float 0.01
-        %692 = OpConstantComposite %v2float %float_0 %float_0_01
-  %float_0_8 = OpConstant %float 0.8
-  %float_120 = OpConstant %float 120
-        %697 = OpConstantComposite %v2float %float_0_8 %float_120
-     %Params = OpTypeStruct %v4float
-        %CB2 = OpTypeStruct %Params
-%_ptr_Uniform_CB2 = OpTypePointer Uniform %CB2
-      %false = OpConstantFalse %bool
-       %1509 = OpUndef %VertexOutput
-       %1510 = OpUndef %SurfaceInput
-       %1511 = OpUndef %v2float
-       %1512 = OpUndef %v4float
-       %1531 = OpUndef %Surface
-       %main = OpFunction %void None %3
-          %5 = OpLabel
-        %501 = OpLoad %v4float %IN_HPosition
-       %1378 = OpCompositeInsert %VertexOutput %501 %1509 0
-        %504 = OpLoad %v4float %IN_Uv_EdgeDistance1
-       %1380 = OpCompositeInsert %VertexOutput %504 %1378 1
-        %507 = OpLoad %v4float %IN_UvStuds_EdgeDistance2
-       %1382 = OpCompositeInsert %VertexOutput %507 %1380 2
-        %510 = OpLoad %v4float %IN_Color
-       %1384 = OpCompositeInsert %VertexOutput %510 %1382 3
-        %513 = OpLoad %v4float %IN_LightPosition_Fog
-       %1386 = OpCompositeInsert %VertexOutput %513 %1384 4
-        %516 = OpLoad %v4float %IN_View_Depth
-       %1388 = OpCompositeInsert %VertexOutput %516 %1386 5
-        %519 = OpLoad %v4float %IN_Normal_SpecPower
-       %1390 = OpCompositeInsert %VertexOutput %519 %1388 6
-        %523 = OpLoad %v3float %IN_Tangent
-       %1392 = OpCompositeInsert %VertexOutput %523 %1390 7
-        %526 = OpLoad %v4float %IN_PosLightSpace_Reflectance
-       %1394 = OpCompositeInsert %VertexOutput %526 %1392 8
-        %530 = OpLoad %float %IN_studIndex
-       %1396 = OpCompositeInsert %VertexOutput %530 %1394 9
-       %1400 = OpCompositeInsert %SurfaceInput %510 %1510 0
-        %954 = OpVectorShuffle %v2float %504 %504 0 1
-       %1404 = OpCompositeInsert %SurfaceInput %954 %1400 1
-        %958 = OpVectorShuffle %v2float %507 %507 0 1
-       %1408 = OpCompositeInsert %SurfaceInput %958 %1404 2
-       %1410 = OpCompositeExtract %float %1408 2 1
-        %962 = OpExtInst %float %1 Fract %1410
-        %965 = OpFAdd %float %962 %530
-        %966 = OpFMul %float %965 %float_0_25
-       %1414 = OpCompositeInsert %SurfaceInput %966 %1408 2 1
-       %1416 = OpCompositeExtract %float %1396 5 3
-        %970 = OpFMul %float %1416 %float_0_00333333
-        %971 = OpFSub %float %float_1 %970
-        %987 = OpExtInst %float %1 FClamp %971 %float_0 %float_1
-        %976 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %int_16 %uint_1
-        %977 = OpLoad %float %976
-        %978 = OpFMul %float %1416 %977
-        %979 = OpFSub %float %float_1 %978
-        %990 = OpExtInst %float %1 FClamp %979 %float_0 %float_1
-       %1024 = OpVectorTimesScalar %v2float %954 %float_1
-       %1029 = OpLoad %36 %DiffuseMapSampler
-       %1030 = OpLoad %38 %DiffuseMapTexture
-               OpBranch %1119
-       %1119 = OpLabel
-               OpLoopMerge %1120 %1121 None
-               OpBranch %1122
-       %1122 = OpLabel
-       %1124 = OpFOrdEqual %bool %float_0 %float_0
-               OpSelectionMerge %1125 None
-               OpBranchConditional %1124 %1126 %1127
-       %1126 = OpLabel
-       %1130 = OpSampledImage %183 %1030 %1029
-       %1132 = OpImageSampleImplicitLod %v4float %1130 %1024
-               OpBranch %1120
-       %1127 = OpLabel
-       %1134 = OpFSub %float %float_1 %float_0
-       %1135 = OpFDiv %float %float_1 %1134
-       %1138 = OpSampledImage %183 %1030 %1029
-       %1140 = OpVectorTimesScalar %v2float %1024 %float_0_25
-       %1141 = OpImageSampleImplicitLod %v4float %1138 %1140
-       %1144 = OpSampledImage %183 %1030 %1029
-       %1146 = OpImageSampleImplicitLod %v4float %1144 %1024
-       %1149 = OpFMul %float %987 %1135
-       %1152 = OpFMul %float %float_0 %1135
-       %1153 = OpFSub %float %1149 %1152
-       %1161 = OpExtInst %float %1 FClamp %1153 %float_0 %float_1
-       %1155 = OpCompositeConstruct %v4float %1161 %1161 %1161 %1161
-       %1156 = OpExtInst %v4float %1 FMix %1141 %1146 %1155
-               OpBranch %1120
-       %1125 = OpLabel
-       %1157 = OpUndef %v4float
-               OpBranch %1120
-       %1121 = OpLabel
-               OpBranchConditional %false %1119 %1120
-       %1120 = OpLabel
-       %1517 = OpPhi %v4float %1132 %1126 %1156 %1127 %1157 %1125 %1512 %1121
-       %1035 = OpVectorTimesScalar %v4float %1517 %float_1
-       %1036 = OpLoad %36 %NormalMapSampler
-       %1037 = OpLoad %38 %NormalMapTexture
-               OpBranch %1165
-       %1165 = OpLabel
-               OpLoopMerge %1166 %1167 None
-               OpBranch %1168
-       %1168 = OpLabel
-               OpSelectionMerge %1171 None
-               OpBranchConditional %1124 %1172 %1173
-       %1172 = OpLabel
-       %1176 = OpSampledImage %183 %1037 %1036
-       %1178 = OpImageSampleImplicitLod %v4float %1176 %1024
-               OpBranch %1166
-       %1173 = OpLabel
-       %1180 = OpFSub %float %float_1 %float_0
-       %1181 = OpFDiv %float %float_1 %1180
-       %1184 = OpSampledImage %183 %1037 %1036
-       %1186 = OpVectorTimesScalar %v2float %1024 %float_0_25
-       %1187 = OpImageSampleImplicitLod %v4float %1184 %1186
-       %1190 = OpSampledImage %183 %1037 %1036
-       %1192 = OpImageSampleImplicitLod %v4float %1190 %1024
-       %1195 = OpFMul %float %990 %1181
-       %1198 = OpFMul %float %float_0 %1181
-       %1199 = OpFSub %float %1195 %1198
-       %1206 = OpExtInst %float %1 FClamp %1199 %float_0 %float_1
-       %1201 = OpCompositeConstruct %v4float %1206 %1206 %1206 %1206
-       %1202 = OpExtInst %v4float %1 FMix %1187 %1192 %1201
-               OpBranch %1166
-       %1171 = OpLabel
-       %1203 = OpUndef %v4float
-               OpBranch %1166
-       %1167 = OpLabel
-               OpBranchConditional %false %1165 %1166
-       %1166 = OpLabel
-       %1523 = OpPhi %v4float %1178 %1172 %1202 %1173 %1203 %1171 %1512 %1167
-       %1210 = OpVectorShuffle %v2float %1523 %1523 3 1
-       %1211 = OpVectorTimesScalar %v2float %1210 %float_2
-       %1212 = OpCompositeConstruct %v2float %float_1 %float_1
-       %1213 = OpFSub %v2float %1211 %1212
-       %1216 = OpFNegate %v2float %1213
-       %1218 = OpDot %float %1216 %1213
-       %1219 = OpFAdd %float %float_1 %1218
-       %1220 = OpExtInst %float %1 FClamp %1219 %float_0 %float_1
-       %1221 = OpExtInst %float %1 Sqrt %1220
-       %1222 = OpCompositeExtract %float %1213 0
-       %1223 = OpCompositeExtract %float %1213 1
-       %1224 = OpCompositeConstruct %v3float %1222 %1223 %1221
-       %1042 = OpLoad %38 %NormalDetailMapTexture
-       %1043 = OpLoad %36 %NormalDetailMapSampler
-       %1044 = OpSampledImage %183 %1042 %1043
-       %1046 = OpVectorTimesScalar %v2float %1024 %float_0
-       %1047 = OpImageSampleImplicitLod %v4float %1044 %1046
-       %1228 = OpVectorShuffle %v2float %1047 %1047 3 1
-       %1229 = OpVectorTimesScalar %v2float %1228 %float_2
-       %1231 = OpFSub %v2float %1229 %1212
-       %1234 = OpFNegate %v2float %1231
-       %1236 = OpDot %float %1234 %1231
-       %1237 = OpFAdd %float %float_1 %1236
-       %1238 = OpExtInst %float %1 FClamp %1237 %float_0 %float_1
-       %1239 = OpExtInst %float %1 Sqrt %1238
-       %1240 = OpCompositeExtract %float %1231 0
-       %1241 = OpCompositeExtract %float %1231 1
-       %1242 = OpCompositeConstruct %v3float %1240 %1241 %1239
-       %1050 = OpVectorShuffle %v2float %1242 %1242 0 1
-       %1051 = OpVectorTimesScalar %v2float %1050 %float_0
-       %1053 = OpVectorShuffle %v2float %1224 %1224 0 1
-       %1054 = OpFAdd %v2float %1053 %1051
-       %1056 = OpVectorShuffle %v3float %1224 %1054 3 4 2
-       %1059 = OpVectorShuffle %v2float %1056 %1056 0 1
-       %1060 = OpVectorTimesScalar %v2float %1059 %990
-       %1062 = OpVectorShuffle %v3float %1056 %1060 3 4 2
-       %1430 = OpCompositeExtract %float %1062 0
-       %1065 = OpFMul %float %1430 %float_0_3
-       %1066 = OpFAdd %float %float_1 %1065
-       %1069 = OpVectorShuffle %v3float %510 %510 0 1 2
-       %1071 = OpVectorShuffle %v3float %1035 %1035 0 1 2
-       %1072 = OpFMul %v3float %1069 %1071
-       %1074 = OpVectorTimesScalar %v3float %1072 %1066
-       %1075 = OpLoad %38 %StudsMapTexture
-       %1076 = OpLoad %36 %StudsMapSampler
-       %1077 = OpSampledImage %183 %1075 %1076
-       %1434 = OpCompositeExtract %v2float %1414 2
-       %1080 = OpImageSampleImplicitLod %v4float %1077 %1434
-       %1436 = OpCompositeExtract %float %1080 0
-       %1083 = OpFMul %float %1436 %float_2
-       %1085 = OpVectorTimesScalar %v3float %1074 %1083
-       %1086 = OpLoad %36 %SpecularMapSampler
-       %1087 = OpLoad %38 %SpecularMapTexture
-               OpBranch %1246
-       %1246 = OpLabel
-               OpLoopMerge %1247 %1248 None
-               OpBranch %1249
-       %1249 = OpLabel
-       %1251 = OpFOrdEqual %bool %float_0_75 %float_0
-               OpSelectionMerge %1252 None
-               OpBranchConditional %1251 %1253 %1254
-       %1253 = OpLabel
-       %1257 = OpSampledImage %183 %1087 %1086
-       %1259 = OpImageSampleImplicitLod %v4float %1257 %1024
-               OpBranch %1247
-       %1254 = OpLabel
-       %1261 = OpFSub %float %float_1 %float_0_75
-       %1262 = OpFDiv %float %float_1 %1261
-       %1265 = OpSampledImage %183 %1087 %1086
-       %1267 = OpVectorTimesScalar %v2float %1024 %float_0_25
-       %1268 = OpImageSampleImplicitLod %v4float %1265 %1267
-       %1271 = OpSampledImage %183 %1087 %1086
-       %1273 = OpImageSampleImplicitLod %v4float %1271 %1024
-       %1276 = OpFMul %float %990 %1262
-       %1279 = OpFMul %float %float_0_75 %1262
-       %1280 = OpFSub %float %1276 %1279
-       %1287 = OpExtInst %float %1 FClamp %1280 %float_0 %float_1
-       %1282 = OpCompositeConstruct %v4float %1287 %1287 %1287 %1287
-       %1283 = OpExtInst %v4float %1 FMix %1268 %1273 %1282
-               OpBranch %1247
-       %1252 = OpLabel
-       %1284 = OpUndef %v4float
-               OpBranch %1247
-       %1248 = OpLabel
-               OpBranchConditional %false %1246 %1247
-       %1247 = OpLabel
-       %1530 = OpPhi %v4float %1259 %1253 %1283 %1254 %1284 %1252 %1512 %1248
-       %1091 = OpVectorShuffle %v2float %1530 %1530 0 1
-       %1093 = OpFMul %v2float %1091 %689
-       %1094 = OpFAdd %v2float %1093 %692
-       %1097 = OpCompositeConstruct %v2float %990 %990
-       %1098 = OpExtInst %v2float %1 FMix %697 %1094 %1097
-       %1438 = OpCompositeInsert %Surface %1085 %1531 0
-       %1440 = OpCompositeInsert %Surface %1062 %1438 1
-       %1442 = OpCompositeExtract %float %1098 0
-       %1444 = OpCompositeInsert %Surface %1442 %1440 2
-       %1446 = OpCompositeExtract %float %1098 1
-       %1448 = OpCompositeInsert %Surface %1446 %1444 3
-       %1450 = OpCompositeExtract %float %1091 1
-       %1112 = OpFMul %float %1450 %990
-       %1113 = OpFMul %float %1112 %float_0
-       %1452 = OpCompositeInsert %Surface %1113 %1448 4
-       %1456 = OpCompositeExtract %float %1396 3 3
-        %764 = OpCompositeExtract %float %1085 0
-        %765 = OpCompositeExtract %float %1085 1
-        %766 = OpCompositeExtract %float %1085 2
-        %767 = OpCompositeConstruct %v4float %764 %765 %766 %1456
-        %770 = OpVectorShuffle %v3float %519 %519 0 1 2
-        %773 = OpExtInst %v3float %1 Cross %770 %523
-       %1462 = OpCompositeExtract %float %1452 1 0
-        %778 = OpVectorTimesScalar %v3float %523 %1462
-       %1466 = OpCompositeExtract %float %1452 1 1
-        %782 = OpVectorTimesScalar %v3float %773 %1466
-        %783 = OpFAdd %v3float %778 %782
-       %1468 = OpCompositeExtract %float %1452 1 2
-        %789 = OpVectorTimesScalar %v3float %770 %1468
-        %790 = OpFAdd %v3float %783 %789
-        %791 = OpExtInst %v3float %1 Normalize %790
-        %793 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 %int_7
-        %794 = OpLoad %v3float %793
-        %795 = OpFNegate %v3float %794
-        %796 = OpDot %float %791 %795
-       %1290 = OpExtInst %float %1 FClamp %796 %float_0 %float_1
-        %799 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 %int_6
-        %800 = OpLoad %v3float %799
-        %801 = OpVectorTimesScalar %v3float %800 %1290
-        %803 = OpFNegate %float %796
-        %804 = OpExtInst %float %1 FMax %803 %float_0
-        %805 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 %int_8
-        %806 = OpLoad %v3float %805
-        %807 = OpVectorTimesScalar %v3float %806 %804
-        %808 = OpFAdd %v3float %801 %807
-        %810 = OpExtInst %float %1 Step %float_0 %796
-        %813 = OpFMul %float %810 %1442
-        %820 = OpVectorShuffle %v3float %513 %513 0 1 2
-       %1296 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %int_15
-       %1297 = OpLoad %v4float %1296
-       %1298 = OpVectorShuffle %v3float %1297 %1297 0 1 2
-       %1300 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %int_14
-       %1301 = OpLoad %v4float %1300
-       %1302 = OpVectorShuffle %v3float %1301 %1301 0 1 2
-       %1303 = OpFSub %v3float %820 %1302
-       %1304 = OpExtInst %v3float %1 FAbs %1303
-       %1305 = OpExtInst %v3float %1 Step %1298 %1304
-       %1307 = OpDot %float %1305 %128
-       %1328 = OpExtInst %float %1 FClamp %1307 %float_0 %float_1
-       %1309 = OpLoad %133 %LightMapTexture
-       %1310 = OpLoad %36 %LightMapSampler
-       %1311 = OpSampledImage %140 %1309 %1310
-       %1313 = OpVectorShuffle %v3float %820 %820 1 2 0
-       %1317 = OpVectorTimesScalar %v3float %1313 %1328
-       %1318 = OpFSub %v3float %1313 %1317
-       %1319 = OpImageSampleImplicitLod %v4float %1311 %1318
-       %1321 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %int_11
-       %1322 = OpLoad %v4float %1321
-       %1324 = OpCompositeConstruct %v4float %1328 %1328 %1328 %1328
-       %1325 = OpExtInst %v4float %1 FMix %1319 %1322 %1324
-        %822 = OpLoad %36 %ShadowMapSampler
-        %823 = OpLoad %38 %ShadowMapTexture
-        %826 = OpVectorShuffle %v3float %526 %526 0 1 2
-       %1482 = OpCompositeExtract %float %1325 3
-       %1337 = OpSampledImage %183 %823 %822
-       %1339 = OpVectorShuffle %v2float %826 %826 0 1
-       %1340 = OpImageSampleImplicitLod %v4float %1337 %1339
-       %1341 = OpVectorShuffle %v2float %1340 %1340 0 1
-       %1484 = OpCompositeExtract %float %826 2
-       %1486 = OpCompositeExtract %float %1341 0
-       %1363 = OpExtInst %float %1 Step %1486 %1484
-       %1365 = OpFSub %float %1484 %float_0_5
-       %1366 = OpExtInst %float %1 FAbs %1365
-       %1367 = OpFMul %float %float_20 %1366
-       %1368 = OpFSub %float %float_9 %1367
-       %1369 = OpExtInst %float %1 FClamp %1368 %float_0 %float_1
-       %1370 = OpFMul %float %1363 %1369
-       %1488 = OpCompositeExtract %float %1341 1
-       %1350 = OpFMul %float %1370 %1488
-       %1351 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %int_17 %uint_3
-       %1352 = OpLoad %float %1351
-       %1353 = OpFMul %float %1350 %1352
-       %1354 = OpFSub %float %float_1 %1353
-       %1356 = OpFMul %float %1354 %1482
-        %830 = OpLoad %367 %EnvironmentMapTexture
-        %831 = OpLoad %36 %EnvironmentMapSampler
-        %832 = OpSampledImage %373 %830 %831
-        %835 = OpVectorShuffle %v3float %516 %516 0 1 2
-        %836 = OpFNegate %v3float %835
-        %838 = OpExtInst %v3float %1 Reflect %836 %791
-        %839 = OpImageSampleImplicitLod %v4float %832 %838
-        %840 = OpVectorShuffle %v3float %839 %839 0 1 2
-        %842 = OpVectorShuffle %v3float %767 %767 0 1 2
-        %845 = OpCompositeConstruct %v3float %1113 %1113 %1113
-        %846 = OpExtInst %v3float %1 FMix %842 %840 %845
-        %848 = OpVectorShuffle %v4float %767 %846 4 5 6 3
-        %849 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 %int_5
-        %850 = OpLoad %v3float %849
-        %853 = OpVectorTimesScalar %v3float %808 %1356
-        %854 = OpFAdd %v3float %850 %853
-        %856 = OpVectorShuffle %v3float %1325 %1325 0 1 2
-        %857 = OpFAdd %v3float %854 %856
-        %859 = OpVectorShuffle %v3float %848 %848 0 1 2
-        %860 = OpFMul %v3float %857 %859
-        %865 = OpFMul %float %813 %1356
-        %873 = OpExtInst %v3float %1 Normalize %835
-        %874 = OpFAdd %v3float %795 %873
-        %875 = OpExtInst %v3float %1 Normalize %874
-        %876 = OpDot %float %791 %875
-        %877 = OpExtInst %float %1 FClamp %876 %float_0 %float_1
-        %879 = OpExtInst %float %1 Pow %877 %1446
-        %880 = OpFMul %float %865 %879
-        %881 = OpVectorTimesScalar %v3float %800 %880
-        %884 = OpFAdd %v3float %860 %881
-        %886 = OpVectorShuffle %v4float %1512 %884 4 5 6 3
-       %1494 = OpCompositeExtract %float %848 3
-       %1496 = OpCompositeInsert %v4float %1494 %886 3
-        %896 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %int_17 %uint_0
-        %897 = OpLoad %float %896
-        %898 = OpFMul %float %978 %897
-        %899 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %int_17 %uint_1
-        %900 = OpLoad %float %899
-        %901 = OpFAdd %float %898 %900
-       %1373 = OpExtInst %float %1 FClamp %901 %float_0 %float_1
-        %905 = OpVectorShuffle %v2float %504 %504 3 2
-        %908 = OpVectorShuffle %v2float %507 %507 3 2
-        %909 = OpExtInst %v2float %1 FMin %905 %908
-       %1504 = OpCompositeExtract %float %909 0
-       %1506 = OpCompositeExtract %float %909 1
-        %914 = OpExtInst %float %1 FMin %1504 %1506
-        %916 = OpFDiv %float %914 %978
-        %919 = OpFSub %float %float_1_5 %916
-        %920 = OpFMul %float %1373 %919
-        %922 = OpFAdd %float %920 %916
-       %1376 = OpExtInst %float %1 FClamp %922 %float_0 %float_1
-        %925 = OpVectorShuffle %v3float %1496 %1496 0 1 2
-        %926 = OpVectorTimesScalar %v3float %925 %1376
-        %928 = OpVectorShuffle %v4float %1496 %926 4 5 6 3
-       %1508 = OpCompositeExtract %float %1396 4 3
-        %931 = OpExtInst %float %1 FClamp %1508 %float_0 %float_1
-        %932 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0 %int_10
-        %933 = OpLoad %v3float %932
-        %935 = OpVectorShuffle %v3float %928 %928 0 1 2
-        %937 = OpCompositeConstruct %v3float %931 %931 %931
-        %938 = OpExtInst %v3float %1 FMix %933 %935 %937
-        %940 = OpVectorShuffle %v4float %928 %938 4 5 6 3
-               OpStore %_entryPointOutput %940
-               OpReturn
-               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag b/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag
new file mode 100644
index 00000000000..518dbd81e44
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/input-attachment-unused-frag-coord.asm.frag
@@ -0,0 +1,74 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 35
+; Schema: 0
+               OpCapability Shader
+               OpCapability InputAttachment
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %load_subpasses_IP1_ "load_subpasses(IP1;"
+               OpName %uInput "uInput"
+               OpName %FragColor "FragColor"
+               OpName %uSubpass0 "uSubpass0"
+               OpName %uSubpass1 "uSubpass1"
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpDecorate %load_subpasses_IP1_ RelaxedPrecision
+               OpDecorate %uInput RelaxedPrecision
+               OpDecorate %14 RelaxedPrecision
+               OpDecorate %19 RelaxedPrecision
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %uSubpass0 RelaxedPrecision
+               OpDecorate %uSubpass0 DescriptorSet 0
+               OpDecorate %uSubpass0 Binding 0
+               OpDecorate %uSubpass0 InputAttachmentIndex 0
+               OpDecorate %25 RelaxedPrecision
+               OpDecorate %26 RelaxedPrecision
+               OpDecorate %uSubpass1 RelaxedPrecision
+               OpDecorate %uSubpass1 DescriptorSet 0
+               OpDecorate %uSubpass1 Binding 1
+               OpDecorate %uSubpass1 InputAttachmentIndex 1
+               OpDecorate %28 RelaxedPrecision
+               OpDecorate %29 RelaxedPrecision
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+          %7 = OpTypeImage %float SubpassData 0 0 0 2 Unknown
+%_ptr_UniformConstant_7 = OpTypePointer UniformConstant %7
+    %v4float = OpTypeVector %float 4
+         %10 = OpTypeFunction %v4float %_ptr_UniformConstant_7
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %v2int = OpTypeVector %int 2
+         %18 = OpConstantComposite %v2int %int_0 %int_0
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+  %uSubpass0 = OpVariable %_ptr_UniformConstant_7 UniformConstant
+  %uSubpass1 = OpVariable %_ptr_UniformConstant_7 UniformConstant
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %25 = OpLoad %7 %uSubpass0
+         %26 = OpImageRead %v4float %25 %18
+         %28 = OpFunctionCall %v4float %load_subpasses_IP1_ %uSubpass1
+         %29 = OpFAdd %v4float %26 %28
+         ;%32 = OpLoad %v4float %gl_FragCoord
+         ;%33 = OpVectorShuffle %v4float %32 %32 0 1 0 1
+         ;%34 = OpFAdd %v4float %29 %33
+               OpStore %FragColor %29
+               OpReturn
+               OpFunctionEnd
+%load_subpasses_IP1_ = OpFunction %v4float None %10
+     %uInput = OpFunctionParameter %_ptr_UniformConstant_7
+         %13 = OpLabel
+         %14 = OpLoad %7 %uInput
+         %19 = OpImageRead %v4float %14 %18
+               OpReturnValue %19
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/modf-frexp-scalar-access-chain-output.asm.frag b/shaders-msl-no-opt/asm/frag/modf-frexp-scalar-access-chain-output.asm.frag
new file mode 100644
index 00000000000..707fa550b93
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/modf-frexp-scalar-access-chain-output.asm.frag
@@ -0,0 +1,36 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 17
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %col "col"
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+%float_0_150000006 = OpConstant %float 0.150000006
+    %v3float = OpTypeVector %float 3
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+       %int = OpTypeInt 32 1
+     %int_0 = OpConstant %int 0
+     %int_1 = OpConstant %int 1
+	 %v2int = OpTypeVector %int 2
+%_ptr_Function_v2int = OpTypePointer Function %v2int
+%_ptr_Function_int = OpTypePointer Function %int
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %col = OpVariable %_ptr_Function_v3float Function
+        %icol = OpVariable %_ptr_Function_v2int Function
+         %ptr_x = OpAccessChain %_ptr_Function_float %col %int_0
+         %ptr_y = OpAccessChain %_ptr_Function_int %icol %int_1
+         %16 = OpExtInst %float %1 Modf %float_0_150000006 %ptr_x
+         %17 = OpExtInst %float %1 Frexp %float_0_150000006 %ptr_y
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag b/shaders-msl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag
new file mode 100644
index 00000000000..17aab1d8f77
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/only-initializer-frag-depth.asm.frag
@@ -0,0 +1,25 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 10
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragDepth
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main DepthReplacing
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_FragDepth "gl_FragDepth"
+               OpDecorate %gl_FragDepth BuiltIn FragDepth
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Output_float = OpTypePointer Output %float
+  %float_0_5 = OpConstant %float 0.5
+%gl_FragDepth = OpVariable %_ptr_Output_float Output %float_0_5
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/phi.zero-initialize.asm.frag b/shaders-msl-no-opt/asm/frag/phi.zero-initialize.asm.frag
new file mode 100644
index 00000000000..3696660d36d
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/phi.zero-initialize.asm.frag
@@ -0,0 +1,69 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 40
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vColor %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %vColor "vColor"
+               OpName %uninit_function_int "uninit_function_int"
+               OpName %FragColor "FragColor"
+               OpName %uninit_int "uninit_int"
+               OpName %uninit_vector "uninit_vector"
+               OpName %uninit_matrix "uninit_matrix"
+               OpName %Foo "Foo"
+               OpMemberName %Foo 0 "a"
+               OpName %uninit_foo "uninit_foo"
+               OpDecorate %vColor Location 0
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+     %vColor = OpVariable %_ptr_Input_v4float Input
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+   %float_10 = OpConstant %float 10
+       %bool = OpTypeBool
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+     %int_10 = OpConstant %int 10
+     %int_20 = OpConstant %int 20
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+%_ptr_Private_int = OpTypePointer Private %int
+ %uninit_int = OpUndef %int
+      %v4int = OpTypeVector %int 4
+%_ptr_Private_v4int = OpTypePointer Private %v4int
+%uninit_vector = OpUndef %v4int
+%mat4v4float = OpTypeMatrix %v4float 4
+%_ptr_Private_mat4v4float = OpTypePointer Private %mat4v4float
+%uninit_matrix = OpUndef %mat4v4float
+        %Foo = OpTypeStruct %int
+%_ptr_Private_Foo = OpTypePointer Private %Foo
+ %uninit_foo = OpUndef %Foo
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+%uninit_function_int = OpVariable %_ptr_Function_int Function
+         %13 = OpAccessChain %_ptr_Input_float %vColor %uint_0
+         %14 = OpLoad %float %13
+         %17 = OpFOrdGreaterThan %bool %14 %float_10
+               OpSelectionMerge %19 None
+               OpBranchConditional %17 %18 %24
+         %18 = OpLabel
+               OpBranch %19
+         %24 = OpLabel
+               OpBranch %19
+         %19 = OpLabel
+		 %27 = OpPhi %int %int_10 %18 %int_20 %24
+         %28 = OpLoad %v4float %vColor
+               OpStore %FragColor %28
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag
new file mode 100644
index 00000000000..ebd8d6bab75
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag
@@ -0,0 +1,89 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+               OpReturn
+               OpFunctionEnd
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+               OpBeginInvocationInterlockEXT
+         %43 = OpFunctionCall %void %callee2_
+               OpEndInvocationInterlockEXT
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag
new file mode 100644
index 00000000000..69b8f911204
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag
@@ -0,0 +1,121 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+
+               OpMemberDecorate %SSBO2 0 Offset 0
+               OpDecorate %SSBO2 BufferBlock
+               OpDecorate %ssbo2 DescriptorSet 0
+               OpDecorate %ssbo2 Binding 2
+
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+      %SSBO2 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+          %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+	  %uint_4 = OpConstant %uint 4
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+	%bool = OpTypeBool
+	%true = OpConstantTrue %bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+		 %callee3_res = OpFunctionCall %void %callee3_
+               OpReturn
+               OpFunctionEnd
+
+   %callee3_ = OpFunction %void None %3
+   	%calle3_block = OpLabel
+         %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %frag_coord_x = OpLoad %float %frag_coord_x_ptr
+         %frag_coord_int = OpConvertFToS %int %frag_coord_x
+         %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int
+		 OpStore %ssbo_ptr %uint_4
+	OpReturn
+	OpFunctionEnd
+
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+         %43 = OpFunctionCall %void %callee2_
+
+		 OpSelectionMerge %merged_block None
+		 OpBranchConditional %true %dummy_block %merged_block
+		 %dummy_block = OpLabel
+		 	OpBeginInvocationInterlockEXT
+		 	OpEndInvocationInterlockEXT
+			OpBranch %merged_block
+
+			%merged_block = OpLabel
+               OpReturn
+
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag b/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag
new file mode 100644
index 00000000000..7c0fe9a2b24
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag
@@ -0,0 +1,102 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+		 %call3res = OpFunctionCall %void %callee3_
+		 %call4res = OpFunctionCall %void %callee4_
+               OpReturn
+               OpFunctionEnd
+
+   %callee3_ = OpFunction %void None %3
+   	      %begin3 = OpLabel
+               OpBeginInvocationInterlockEXT
+			   OpReturn
+               OpFunctionEnd
+
+   %callee4_ = OpFunction %void None %3
+   	      %begin4 = OpLabel
+               OpEndInvocationInterlockEXT
+			   OpReturn
+               OpFunctionEnd
+
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+         %43 = OpFunctionCall %void %callee2_
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/private-initializer-direct-store.asm.frag b/shaders-msl-no-opt/asm/frag/private-initializer-direct-store.asm.frag
new file mode 100644
index 00000000000..49ed96094a5
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/private-initializer-direct-store.asm.frag
@@ -0,0 +1,32 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 17
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %b "b"
+               OpName %FragColor "FragColor"
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Private_float = OpTypePointer Private %float
+   %float_10 = OpConstant %float 10
+   %float_20 = OpConstant %float 20
+          %b = OpVariable %_ptr_Private_float Private %float_10
+%_ptr_Output_float = OpTypePointer Output %float
+  %FragColor = OpVariable %_ptr_Output_float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpStore %b %float_20
+         %15 = OpLoad %float %b
+         %16 = OpFAdd %float %15 %15
+               OpStore %FragColor %16
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/reserved-function-identifier.asm.frag b/shaders-msl-no-opt/asm/frag/reserved-function-identifier.asm.frag
new file mode 100644
index 00000000000..a5a16f2873b
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/reserved-function-identifier.asm.frag
@@ -0,0 +1,60 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %ACOS_f1_ "mat3"
+               OpName %a "a"
+               OpName %ACOS_i1_ "gl_Foo"
+               OpName %a_0 "a"
+               OpName %FragColor "FragColor"
+               OpName %param "param"
+               OpName %param_0 "param"
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+          %8 = OpTypeFunction %float %_ptr_Function_float
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+         %14 = OpTypeFunction %float %_ptr_Function_int
+    %float_1 = OpConstant %float 1
+%_ptr_Output_float = OpTypePointer Output %float
+  %FragColor = OpVariable %_ptr_Output_float Output
+    %float_2 = OpConstant %float 2
+      %int_4 = OpConstant %int 4
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+      %param = OpVariable %_ptr_Function_float Function
+    %param_0 = OpVariable %_ptr_Function_int Function
+               OpStore %param %float_2
+         %32 = OpFunctionCall %float %ACOS_f1_ %param
+               OpStore %param_0 %int_4
+         %35 = OpFunctionCall %float %ACOS_i1_ %param_0
+         %36 = OpFAdd %float %32 %35
+               OpStore %FragColor %36
+               OpReturn
+               OpFunctionEnd
+   %ACOS_f1_ = OpFunction %float None %8
+          %a = OpFunctionParameter %_ptr_Function_float
+         %11 = OpLabel
+         %18 = OpLoad %float %a
+         %20 = OpFAdd %float %18 %float_1
+               OpReturnValue %20
+               OpFunctionEnd
+   %ACOS_i1_ = OpFunction %float None %14
+        %a_0 = OpFunctionParameter %_ptr_Function_int
+         %17 = OpLabel
+         %23 = OpLoad %int %a_0
+         %24 = OpConvertSToF %float %23
+         %25 = OpFAdd %float %24 %float_1
+               OpReturnValue %25
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/scalar-select.spv14.asm.frag b/shaders-msl-no-opt/asm/frag/scalar-select.spv14.asm.frag
new file mode 100644
index 00000000000..07450ee80b6
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/scalar-select.spv14.asm.frag
@@ -0,0 +1,62 @@
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %bool = OpTypeBool
+      %false = OpConstantFalse %bool
+	  %uint = OpTypeInt 32 0
+	  %uint_1 = OpConstant %uint 1
+	  %uint_2 = OpConstant %uint 2
+      %true = OpConstantTrue %bool
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+		 %s = OpTypeStruct %float
+		 %arr = OpTypeArray %float %uint_2
+%_ptr_Function_s = OpTypePointer Function %s
+%_ptr_Function_arr = OpTypePointer Function %arr
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+         %17 = OpConstantComposite %v4float %float_1 %float_1 %float_0 %float_1
+         %18 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
+         %19 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+         %20 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+		 %s0 = OpConstantComposite %s %float_0
+		 %s1 = OpConstantComposite %s %float_1
+     %v4bool = OpTypeVector %bool 4
+	 	%b4	= OpConstantComposite %v4bool %false %true %false %true
+		%arr1 = OpConstantComposite %arr %float_0 %float_1
+		%arr2 = OpConstantComposite %arr %float_1 %float_0
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+		  %ss = OpVariable %_ptr_Function_s Function
+		  %arrvar = OpVariable %_ptr_Function_arr Function
+		  ; Not trivial
+         %21 = OpSelect %v4float %false %17 %18
+               OpStore %FragColor %21
+		  ; Trivial
+         %22 = OpSelect %v4float %false %19 %20
+               OpStore %FragColor %22
+			; Vector not trivial
+         %23 = OpSelect %v4float %b4 %17 %18
+               OpStore %FragColor %23
+			; Vector trivial
+         %24 = OpSelect %v4float %b4 %19 %20
+               OpStore %FragColor %24
+		  ; Struct selection
+         %sout = OpSelect %s %false %s0 %s1
+               OpStore %ss %sout
+		; Array selection
+         %arrout = OpSelect %arr %true %arr1 %arr2
+               OpStore %arrvar %arrout
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/subgroup-arithmetic-cast.msl21.asm.frag b/shaders-msl-no-opt/asm/frag/subgroup-arithmetic-cast.msl21.asm.frag
new file mode 100644
index 00000000000..5f0734062d6
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/subgroup-arithmetic-cast.msl21.asm.frag
@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 78
+; Schema: 0
+               OpCapability Shader
+               OpCapability GroupNonUniform
+               OpCapability GroupNonUniformArithmetic
+               OpCapability GroupNonUniformClustered
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %index %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_KHR_shader_subgroup_arithmetic"
+               OpSourceExtension "GL_KHR_shader_subgroup_basic"
+               OpSourceExtension "GL_KHR_shader_subgroup_clustered"
+               OpName %main "main"
+               OpName %index "index"
+               OpName %FragColor "FragColor"
+               OpDecorate %index Flat
+               OpDecorate %index Location 0
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+      %index = OpVariable %_ptr_Input_int Input
+     %uint_3 = OpConstant %uint 3
+     %uint_4 = OpConstant %uint 4
+%_ptr_Output_uint = OpTypePointer Output %uint
+  %FragColor = OpVariable %_ptr_Output_uint Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %i = OpLoad %int %index
+         %u = OpBitcast %uint %i
+         %res0 = OpGroupNonUniformSMin %uint %uint_3 Reduce %i
+         %res1 = OpGroupNonUniformSMax %uint %uint_3 Reduce %u
+         %res2 = OpGroupNonUniformUMin %uint %uint_3 Reduce %i
+         %res3 = OpGroupNonUniformUMax %uint %uint_3 Reduce %u
+         ;%res4 = OpGroupNonUniformSMax %uint %uint_3 InclusiveScan %i
+         ;%res5 = OpGroupNonUniformSMin %uint %uint_3 InclusiveScan %u
+         ;%res6 = OpGroupNonUniformUMax %uint %uint_3 ExclusiveScan %i
+         ;%res7 = OpGroupNonUniformUMin %uint %uint_3 ExclusiveScan %u
+         %res8 = OpGroupNonUniformSMin %uint %uint_3 ClusteredReduce %i %uint_4
+         %res9 = OpGroupNonUniformSMax %uint %uint_3 ClusteredReduce %u %uint_4
+         %res10 = OpGroupNonUniformUMin %uint %uint_3 ClusteredReduce %i %uint_4
+         %res11 = OpGroupNonUniformUMax %uint %uint_3 ClusteredReduce %u %uint_4
+               OpStore %FragColor %res0
+               OpStore %FragColor %res1
+               OpStore %FragColor %res2
+               OpStore %FragColor %res3
+               ;OpStore %FragColor %res4
+               ;OpStore %FragColor %res5
+               ;OpStore %FragColor %res6
+               ;OpStore %FragColor %res7
+               OpStore %FragColor %res8
+               OpStore %FragColor %res9
+               OpStore %FragColor %res10
+               OpStore %FragColor %res11
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag
similarity index 100%
rename from shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag
rename to shaders-msl-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag
diff --git a/shaders-msl-no-opt/asm/frag/usage-tracking-modf-io-pointer.asm.frag b/shaders-msl-no-opt/asm/frag/usage-tracking-modf-io-pointer.asm.frag
new file mode 100644
index 00000000000..702b826e5bb
--- /dev/null
+++ b/shaders-msl-no-opt/asm/frag/usage-tracking-modf-io-pointer.asm.frag
@@ -0,0 +1,28 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 14
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %_GLF_color
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %_GLF_color "_GLF_color"
+               OpDecorate %_GLF_color Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+    %float_1 = OpConstant %float 1
+    %float_0 = OpConstant %float 0
+         %10 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+ %_GLF_color = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %13 = OpExtInst %v4float %1 Modf %10 %_GLF_color
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.msl2.asm.tesc
new file mode 100644
index 00000000000..a3d489941c8
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.msl2.asm.tesc
@@ -0,0 +1,85 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 44
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %C "C"
+               OpMemberName %C 0 "v"
+               OpName %c "c"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %P "P"
+               OpMemberName %P 0 "v"
+               OpName %p "p"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %C Block
+               OpDecorate %c Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpMemberDecorate %P 0 Patch
+               OpDecorate %P Block
+               OpDecorate %p Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %C = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_C_uint_4 = OpTypeArray %C %uint_4
+%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4
+		%zero_c = OpConstantNull %_arr_C_uint_4
+          %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %P = OpTypeStruct %v4float
+%_ptr_Output_P = OpTypePointer Output %P
+		%zero_p = OpConstantNull %P
+          %p = OpVariable %_ptr_Output_P Output %zero_p
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output
+    %float_3 = OpConstant %float 3
+         %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpLoad %int %gl_InvocationID
+         %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0
+               OpStore %22 %20
+         %28 = OpAccessChain %_ptr_Output_v4float %p %int_0
+               OpStore %28 %27
+         %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0
+               OpStore %38 %37
+         %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1
+               OpStore %43 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc
new file mode 100644
index 00000000000..a3d489941c8
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-0.multi-patch.msl2.asm.tesc
@@ -0,0 +1,85 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 44
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %C "C"
+               OpMemberName %C 0 "v"
+               OpName %c "c"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %P "P"
+               OpMemberName %P 0 "v"
+               OpName %p "p"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %C Block
+               OpDecorate %c Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpMemberDecorate %P 0 Patch
+               OpDecorate %P Block
+               OpDecorate %p Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %C = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_C_uint_4 = OpTypeArray %C %uint_4
+%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4
+		%zero_c = OpConstantNull %_arr_C_uint_4
+          %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %P = OpTypeStruct %v4float
+%_ptr_Output_P = OpTypePointer Output %P
+		%zero_p = OpConstantNull %P
+          %p = OpVariable %_ptr_Output_P Output %zero_p
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output
+    %float_3 = OpConstant %float 3
+         %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpLoad %int %gl_InvocationID
+         %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0
+               OpStore %22 %20
+         %28 = OpAccessChain %_ptr_Output_v4float %p %int_0
+               OpStore %28 %27
+         %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0
+               OpStore %38 %37
+         %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1
+               OpStore %43 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.msl2.asm.tesc
new file mode 100644
index 00000000000..a3d489941c8
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.msl2.asm.tesc
@@ -0,0 +1,85 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 44
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %C "C"
+               OpMemberName %C 0 "v"
+               OpName %c "c"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %P "P"
+               OpMemberName %P 0 "v"
+               OpName %p "p"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %C Block
+               OpDecorate %c Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpMemberDecorate %P 0 Patch
+               OpDecorate %P Block
+               OpDecorate %p Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %C = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_C_uint_4 = OpTypeArray %C %uint_4
+%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4
+		%zero_c = OpConstantNull %_arr_C_uint_4
+          %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %P = OpTypeStruct %v4float
+%_ptr_Output_P = OpTypePointer Output %P
+		%zero_p = OpConstantNull %P
+          %p = OpVariable %_ptr_Output_P Output %zero_p
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output
+    %float_3 = OpConstant %float 3
+         %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpLoad %int %gl_InvocationID
+         %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0
+               OpStore %22 %20
+         %28 = OpAccessChain %_ptr_Output_v4float %p %int_0
+               OpStore %28 %27
+         %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0
+               OpStore %38 %37
+         %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1
+               OpStore %43 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc
new file mode 100644
index 00000000000..a3d489941c8
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-location-1.multi-patch.msl2.asm.tesc
@@ -0,0 +1,85 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 44
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %C "C"
+               OpMemberName %C 0 "v"
+               OpName %c "c"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %P "P"
+               OpMemberName %P 0 "v"
+               OpName %p "p"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %C Block
+               OpDecorate %c Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpMemberDecorate %P 0 Patch
+               OpDecorate %P Block
+               OpDecorate %p Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %C = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_C_uint_4 = OpTypeArray %C %uint_4
+%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4
+		%zero_c = OpConstantNull %_arr_C_uint_4
+          %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %P = OpTypeStruct %v4float
+%_ptr_Output_P = OpTypePointer Output %P
+		%zero_p = OpConstantNull %P
+          %p = OpVariable %_ptr_Output_P Output %zero_p
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output
+    %float_3 = OpConstant %float 3
+         %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpLoad %int %gl_InvocationID
+         %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0
+               OpStore %22 %20
+         %28 = OpAccessChain %_ptr_Output_v4float %p %int_0
+               OpStore %28 %27
+         %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0
+               OpStore %38 %37
+         %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1
+               OpStore %43 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.msl2.asm.tesc
new file mode 100644
index 00000000000..23424ff7ba3
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.msl2.asm.tesc
@@ -0,0 +1,86 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 44
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %C "C"
+               OpMemberName %C 0 "v"
+               OpName %c "c"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %P "P"
+               OpMemberName %P 0 "v"
+               OpName %p "p"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %C Block
+               OpDecorate %c Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpMemberDecorate %P 0 Patch
+               OpDecorate %P Block
+               OpDecorate %p Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %C = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_C_uint_4 = OpTypeArray %C %uint_4
+%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4
+		%zero_c = OpConstantNull %_arr_C_uint_4
+          %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %P = OpTypeStruct %v4float
+%_ptr_Output_P = OpTypePointer Output %P
+		%zero_p = OpConstantNull %P
+          %p = OpVariable %_ptr_Output_P Output %zero_p
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+	%zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+    %float_3 = OpConstant %float 3
+         %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpLoad %int %gl_InvocationID
+         %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0
+               OpStore %22 %20
+         %28 = OpAccessChain %_ptr_Output_v4float %p %int_0
+               OpStore %28 %27
+         %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0
+               OpStore %38 %37
+         %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1
+               OpStore %43 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc
new file mode 100644
index 00000000000..23424ff7ba3
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-point-size.multi-patch.msl2.asm.tesc
@@ -0,0 +1,86 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 44
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %C "C"
+               OpMemberName %C 0 "v"
+               OpName %c "c"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %P "P"
+               OpMemberName %P 0 "v"
+               OpName %p "p"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %C Block
+               OpDecorate %c Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpMemberDecorate %P 0 Patch
+               OpDecorate %P Block
+               OpDecorate %p Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %C = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_C_uint_4 = OpTypeArray %C %uint_4
+%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4
+		%zero_c = OpConstantNull %_arr_C_uint_4
+          %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %P = OpTypeStruct %v4float
+%_ptr_Output_P = OpTypePointer Output %P
+		%zero_p = OpConstantNull %P
+          %p = OpVariable %_ptr_Output_P Output %zero_p
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+	%zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+    %float_3 = OpConstant %float 3
+         %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpLoad %int %gl_InvocationID
+         %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0
+               OpStore %22 %20
+         %28 = OpAccessChain %_ptr_Output_v4float %p %int_0
+               OpStore %28 %27
+         %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0
+               OpStore %38 %37
+         %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1
+               OpStore %43 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.msl2.asm.tesc
new file mode 100644
index 00000000000..23424ff7ba3
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.msl2.asm.tesc
@@ -0,0 +1,86 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 44
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %C "C"
+               OpMemberName %C 0 "v"
+               OpName %c "c"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %P "P"
+               OpMemberName %P 0 "v"
+               OpName %p "p"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %C Block
+               OpDecorate %c Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpMemberDecorate %P 0 Patch
+               OpDecorate %P Block
+               OpDecorate %p Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %C = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_C_uint_4 = OpTypeArray %C %uint_4
+%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4
+		%zero_c = OpConstantNull %_arr_C_uint_4
+          %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %P = OpTypeStruct %v4float
+%_ptr_Output_P = OpTypePointer Output %P
+		%zero_p = OpConstantNull %P
+          %p = OpVariable %_ptr_Output_P Output %zero_p
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+	%zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+    %float_3 = OpConstant %float 3
+         %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpLoad %int %gl_InvocationID
+         %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0
+               OpStore %22 %20
+         %28 = OpAccessChain %_ptr_Output_v4float %p %int_0
+               OpStore %28 %27
+         %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0
+               OpStore %38 %37
+         %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1
+               OpStore %43 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc
new file mode 100644
index 00000000000..23424ff7ba3
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers-block.mask-position.multi-patch.msl2.asm.tesc
@@ -0,0 +1,86 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 44
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %c %gl_InvocationID %p %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %C "C"
+               OpMemberName %C 0 "v"
+               OpName %c "c"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %P "P"
+               OpMemberName %P 0 "v"
+               OpName %p "p"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %C Block
+               OpDecorate %c Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpMemberDecorate %P 0 Patch
+               OpDecorate %P Block
+               OpDecorate %p Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %C = OpTypeStruct %v4float
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_C_uint_4 = OpTypeArray %C %uint_4
+%_ptr_Output__arr_C_uint_4 = OpTypePointer Output %_arr_C_uint_4
+		%zero_c = OpConstantNull %_arr_C_uint_4
+          %c = OpVariable %_ptr_Output__arr_C_uint_4 Output %zero_c
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %20 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+          %P = OpTypeStruct %v4float
+%_ptr_Output_P = OpTypePointer Output %P
+		%zero_p = OpConstantNull %P
+          %p = OpVariable %_ptr_Output_P Output %zero_p
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+	%zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+    %float_3 = OpConstant %float 3
+         %37 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %17 = OpLoad %int %gl_InvocationID
+         %22 = OpAccessChain %_ptr_Output_v4float %c %17 %int_0
+               OpStore %22 %20
+         %28 = OpAccessChain %_ptr_Output_v4float %p %int_0
+               OpStore %28 %27
+         %38 = OpAccessChain %_ptr_Output_v4float %gl_out %17 %int_0
+               OpStore %38 %37
+         %43 = OpAccessChain %_ptr_Output_float %gl_out %17 %int_1
+               OpStore %43 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.asm.tesc
new file mode 100644
index 00000000000..6b616b04163
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.asm.tesc
@@ -0,0 +1,76 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 40
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foo "foo"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %foo_patch "foo_patch"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %foo Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo_patch Patch
+               OpDecorate %foo_patch Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+	%zero_foo = OpConstantNull %_arr_v4float_uint_4
+%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
+        %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+    %float_1 = OpConstant %float 1
+         %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+	%zero_foo_patch = OpConstantNull %v4float
+  %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch
+    %float_2 = OpConstant %float 2
+         %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+	%zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+      %int_0 = OpConstant %int 0
+    %float_3 = OpConstant %float 3
+         %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpLoad %int %gl_InvocationID
+         %20 = OpAccessChain %_ptr_Output_v4float %foo %16
+               OpStore %20 %18
+               OpStore %foo_patch %23
+         %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0
+               OpStore %34 %33
+         %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1
+               OpStore %39 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc
new file mode 100644
index 00000000000..6b616b04163
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers.mask-location-0.msl2.multi-patch.asm.tesc
@@ -0,0 +1,76 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 40
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foo "foo"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %foo_patch "foo_patch"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %foo Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo_patch Patch
+               OpDecorate %foo_patch Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+	%zero_foo = OpConstantNull %_arr_v4float_uint_4
+%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
+        %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+    %float_1 = OpConstant %float 1
+         %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+	%zero_foo_patch = OpConstantNull %v4float
+  %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch
+    %float_2 = OpConstant %float 2
+         %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+	%zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+      %int_0 = OpConstant %int 0
+    %float_3 = OpConstant %float 3
+         %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpLoad %int %gl_InvocationID
+         %20 = OpAccessChain %_ptr_Output_v4float %foo %16
+               OpStore %20 %18
+               OpStore %foo_patch %23
+         %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0
+               OpStore %34 %33
+         %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1
+               OpStore %39 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.asm.tesc
new file mode 100644
index 00000000000..6b616b04163
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.asm.tesc
@@ -0,0 +1,76 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 40
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foo "foo"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %foo_patch "foo_patch"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %foo Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo_patch Patch
+               OpDecorate %foo_patch Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+	%zero_foo = OpConstantNull %_arr_v4float_uint_4
+%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
+        %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+    %float_1 = OpConstant %float 1
+         %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+	%zero_foo_patch = OpConstantNull %v4float
+  %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch
+    %float_2 = OpConstant %float 2
+         %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+	%zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+      %int_0 = OpConstant %int 0
+    %float_3 = OpConstant %float 3
+         %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpLoad %int %gl_InvocationID
+         %20 = OpAccessChain %_ptr_Output_v4float %foo %16
+               OpStore %20 %18
+               OpStore %foo_patch %23
+         %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0
+               OpStore %34 %33
+         %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1
+               OpStore %39 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc
new file mode 100644
index 00000000000..6b616b04163
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers.mask-location-1.multi-patch.asm.tesc
@@ -0,0 +1,76 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 40
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foo "foo"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %foo_patch "foo_patch"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %foo Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo_patch Patch
+               OpDecorate %foo_patch Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+	%zero_foo = OpConstantNull %_arr_v4float_uint_4
+%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
+        %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+    %float_1 = OpConstant %float 1
+         %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+	%zero_foo_patch = OpConstantNull %v4float
+  %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch
+    %float_2 = OpConstant %float 2
+         %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+	%zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+      %int_0 = OpConstant %int 0
+    %float_3 = OpConstant %float 3
+         %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpLoad %int %gl_InvocationID
+         %20 = OpAccessChain %_ptr_Output_v4float %foo %16
+               OpStore %20 %18
+               OpStore %foo_patch %23
+         %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0
+               OpStore %34 %33
+         %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1
+               OpStore %39 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.asm.tesc
new file mode 100644
index 00000000000..6b616b04163
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.asm.tesc
@@ -0,0 +1,76 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 40
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foo "foo"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %foo_patch "foo_patch"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %foo Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo_patch Patch
+               OpDecorate %foo_patch Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+	%zero_foo = OpConstantNull %_arr_v4float_uint_4
+%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
+        %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+    %float_1 = OpConstant %float 1
+         %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+	%zero_foo_patch = OpConstantNull %v4float
+  %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch
+    %float_2 = OpConstant %float 2
+         %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+	%zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+      %int_0 = OpConstant %int 0
+    %float_3 = OpConstant %float 3
+         %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpLoad %int %gl_InvocationID
+         %20 = OpAccessChain %_ptr_Output_v4float %foo %16
+               OpStore %20 %18
+               OpStore %foo_patch %23
+         %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0
+               OpStore %34 %33
+         %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1
+               OpStore %39 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc
new file mode 100644
index 00000000000..6b616b04163
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers.mask-point-size.msl2.multi-patch.asm.tesc
@@ -0,0 +1,76 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 40
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foo "foo"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %foo_patch "foo_patch"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %foo Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo_patch Patch
+               OpDecorate %foo_patch Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+	%zero_foo = OpConstantNull %_arr_v4float_uint_4
+%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
+        %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+    %float_1 = OpConstant %float 1
+         %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+	%zero_foo_patch = OpConstantNull %v4float
+  %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch
+    %float_2 = OpConstant %float 2
+         %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+	%zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+      %int_0 = OpConstant %int 0
+    %float_3 = OpConstant %float 3
+         %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpLoad %int %gl_InvocationID
+         %20 = OpAccessChain %_ptr_Output_v4float %foo %16
+               OpStore %20 %18
+               OpStore %foo_patch %23
+         %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0
+               OpStore %34 %33
+         %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1
+               OpStore %39 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.asm.tesc
new file mode 100644
index 00000000000..6b616b04163
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.asm.tesc
@@ -0,0 +1,76 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 40
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foo "foo"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %foo_patch "foo_patch"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %foo Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo_patch Patch
+               OpDecorate %foo_patch Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+	%zero_foo = OpConstantNull %_arr_v4float_uint_4
+%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
+        %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+    %float_1 = OpConstant %float 1
+         %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+	%zero_foo_patch = OpConstantNull %v4float
+  %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch
+    %float_2 = OpConstant %float 2
+         %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+	%zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+      %int_0 = OpConstant %int 0
+    %float_3 = OpConstant %float 3
+         %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpLoad %int %gl_InvocationID
+         %20 = OpAccessChain %_ptr_Output_v4float %foo %16
+               OpStore %20 %18
+               OpStore %foo_patch %23
+         %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0
+               OpStore %34 %33
+         %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1
+               OpStore %39 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc b/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc
new file mode 100644
index 00000000000..6b616b04163
--- /dev/null
+++ b/shaders-msl-no-opt/asm/masking/initializers.mask-position.msl2.multi-patch.asm.tesc
@@ -0,0 +1,76 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 40
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability TessellationPointSize
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %foo %gl_InvocationID %foo_patch %gl_out
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foo "foo"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %foo_patch "foo_patch"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpDecorate %foo Location 0
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo_patch Patch
+               OpDecorate %foo_patch Location 1
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+	%zero_foo = OpConstantNull %_arr_v4float_uint_4
+%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
+        %foo = OpVariable %_ptr_Output__arr_v4float_uint_4 Output %zero_foo
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+    %float_1 = OpConstant %float 1
+         %18 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+	%zero_foo_patch = OpConstantNull %v4float
+  %foo_patch = OpVariable %_ptr_Output_v4float Output %zero_foo_patch
+    %float_2 = OpConstant %float 2
+         %23 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+	%zero_gl_out = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %zero_gl_out
+      %int_0 = OpConstant %int 0
+    %float_3 = OpConstant %float 3
+         %33 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+      %int_1 = OpConstant %int 1
+    %float_4 = OpConstant %float 4
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpLoad %int %gl_InvocationID
+         %20 = OpAccessChain %_ptr_Output_v4float %foo %16
+               OpStore %20 %18
+               OpStore %foo_patch %23
+         %34 = OpAccessChain %_ptr_Output_v4float %gl_out %16 %int_0
+               OpStore %34 %33
+         %39 = OpAccessChain %_ptr_Output_float %gl_out %16 %int_1
+               OpStore %39 %float_4
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/composite-extract-row-major.asm.comp b/shaders-msl-no-opt/asm/packing/composite-extract-row-major.asm.comp
new file mode 100644
index 00000000000..a37bdd91959
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/composite-extract-row-major.asm.comp
@@ -0,0 +1,48 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 21
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBORow "SSBORow"
+               OpMemberName %SSBORow 0 "v"
+               OpMemberName %SSBORow 1 "row_major0"
+               OpName %_ ""
+               OpMemberDecorate %SSBORow 0 Offset 0
+               OpMemberDecorate %SSBORow 1 RowMajor
+               OpMemberDecorate %SSBORow 1 Offset 16
+               OpMemberDecorate %SSBORow 1 MatrixStride 16
+               OpDecorate %SSBORow BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %SSBORow = OpTypeStruct %float %mat4v4float
+%_ptr_Uniform_SSBORow = OpTypePointer Uniform %SSBORow
+          %_ = OpVariable %_ptr_Uniform_SSBORow Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %row_ptr = OpAccessChain %_ptr_Uniform_v4float %_ %int_1 %int_1
+		 %vec = OpLoad %v4float %row_ptr
+		 %float_val = OpCompositeExtract %float %vec 2
+
+         %20 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %20 %float_val
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-2.asm.comp b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-2.asm.comp
new file mode 100644
index 00000000000..4c222454447
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-2.asm.comp
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 23
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_scalar_block_layout"
+               OpName %main "main"
+               OpName %SSBOScalar "SSBOScalar"
+               OpMemberName %SSBOScalar 0 "a"
+               OpMemberName %SSBOScalar 1 "b"
+               OpMemberName %SSBOScalar 2 "c"
+               OpName %_ ""
+               OpMemberDecorate %SSBOScalar 0 Offset 0
+               OpMemberDecorate %SSBOScalar 1 RowMajor
+               OpMemberDecorate %SSBOScalar 1 Offset 16
+               OpMemberDecorate %SSBOScalar 1 MatrixStride 16
+               OpMemberDecorate %SSBOScalar 2 RowMajor
+               OpMemberDecorate %SSBOScalar 2 Offset 64
+               OpMemberDecorate %SSBOScalar 2 MatrixStride 16
+               OpDecorate %SSBOScalar BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+ %SSBOScalar = OpTypeStruct %v3float %mat3v3float %mat3v3float
+%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar
+          %_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %b_ptr = OpAccessChain %_ptr_Uniform_mat3v3float %_ %int_1
+         %c_ptr = OpAccessChain %_ptr_Uniform_mat3v3float %_ %int_2
+         %b = OpLoad %mat3v3float %b_ptr
+         %c = OpLoad %mat3v3float %c_ptr
+		 OpStore %b_ptr %c
+         %19 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0
+         %20 = OpLoad %v3float %19
+         %21 = OpMatrixTimesVector %v3float %b %20
+         %22 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0
+               OpStore %22 %21
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-3.asm.comp b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-3.asm.comp
new file mode 100644
index 00000000000..85a220f516c
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-3.asm.comp
@@ -0,0 +1,48 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 22
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_scalar_block_layout"
+               OpName %main "main"
+               OpName %SSBOScalar "SSBOScalar"
+               OpMemberName %SSBOScalar 0 "a"
+               OpMemberName %SSBOScalar 1 "b"
+               OpMemberName %SSBOScalar 2 "c"
+               OpName %_ ""
+               OpMemberDecorate %SSBOScalar 0 Offset 0
+               OpMemberDecorate %SSBOScalar 1 Offset 12
+               OpMemberDecorate %SSBOScalar 2 Offset 24
+               OpDecorate %SSBOScalar BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+ %SSBOScalar = OpTypeStruct %v3float %v3float %v3float
+%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar
+          %_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+      %int_2 = OpConstant %int 2
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpAccessChain %_ptr_Uniform_v3float %_ %int_1
+         %16 = OpLoad %v3float %15
+         %18 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+         %19 = OpLoad %v3float %18
+		 	OpStore %18 %16
+         %20 = OpFMul %v3float %16 %19
+         %21 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0
+               OpStore %21 %20
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-4.asm.comp b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-4.asm.comp
new file mode 100644
index 00000000000..bef3fcb766c
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-4.asm.comp
@@ -0,0 +1,61 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 29
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_scalar_block_layout"
+               OpName %main "main"
+               OpName %SSBOScalar "SSBOScalar"
+               OpMemberName %SSBOScalar 0 "a"
+               OpMemberName %SSBOScalar 1 "b"
+               OpMemberName %SSBOScalar 2 "c"
+               OpName %_ ""
+               OpDecorate %_arr_v2float_uint_16 ArrayStride 16
+               OpDecorate %_arr_v2float_uint_16_0 ArrayStride 16
+               OpDecorate %_arr_float_uint_16 ArrayStride 16
+               OpMemberDecorate %SSBOScalar 0 Offset 0
+               OpMemberDecorate %SSBOScalar 1 Offset 256
+               OpMemberDecorate %SSBOScalar 2 Offset 512
+               OpDecorate %SSBOScalar BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+       %uint = OpTypeInt 32 0
+    %uint_16 = OpConstant %uint 16
+%_arr_v2float_uint_16 = OpTypeArray %v2float %uint_16
+%_arr_v2float_uint_16_0 = OpTypeArray %v2float %uint_16
+%_arr_float_uint_16 = OpTypeArray %float %uint_16
+ %SSBOScalar = OpTypeStruct %_arr_v2float_uint_16 %_arr_v2float_uint_16_0 %_arr_float_uint_16
+%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar
+          %_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %int_10 = OpConstant %int 10
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+      %int_2 = OpConstant %int 2
+	  %float_10 = OpConstant %float 10.0
+	  %float_11 = OpConstant %float 11.0
+	  %float_const = OpConstantComposite %v2float %float_10 %float_11
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1 %int_10
+         %22 = OpLoad %v2float %21
+         %25 = OpAccessChain %_ptr_Uniform_float %_ %int_2 %int_10
+         %26 = OpLoad %float %25
+		 OpStore %21 %float_const
+         %27 = OpVectorTimesScalar %v2float %22 %26
+         %28 = OpAccessChain %_ptr_Uniform_v2float %_ %int_0 %int_10
+               OpStore %28 %27
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-5.asm.comp b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-5.asm.comp
new file mode 100644
index 00000000000..8de22b82851
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding-5.asm.comp
@@ -0,0 +1,54 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 29
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_scalar_block_layout"
+               OpName %main "main"
+               OpName %SSBOScalar "SSBOScalar"
+               OpMemberName %SSBOScalar 0 "a"
+               OpMemberName %SSBOScalar 1 "b"
+               OpMemberName %SSBOScalar 2 "c"
+               OpName %_ ""
+               OpMemberDecorate %SSBOScalar 0 Offset 0
+               OpMemberDecorate %SSBOScalar 1 Offset 8
+               OpMemberDecorate %SSBOScalar 2 Offset 20
+               OpDecorate %SSBOScalar BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %v3float = OpTypeVector %float 3
+ %SSBOScalar = OpTypeStruct %v2float %v3float %v3float
+%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar
+          %_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+    %float_1 = OpConstant %float 1
+         %27 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpAccessChain %_ptr_Uniform_v3float %_ %int_1
+         %17 = OpLoad %v3float %16
+         %18 = OpVectorShuffle %v2float %17 %17 0 1
+         %20 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+         %21 = OpLoad %v3float %20
+         %22 = OpVectorShuffle %v2float %21 %21 1 2
+               OpStore %16 %27
+         %23 = OpFMul %v2float %18 %22
+         %25 = OpAccessChain %_ptr_Uniform_v2float %_ %int_0
+               OpStore %25 %23
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding.asm.comp b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding.asm.comp
new file mode 100644
index 00000000000..0b0ba53e8e1
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/load-packed-no-forwarding.asm.comp
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 23
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_scalar_block_layout"
+               OpName %main "main"
+               OpName %SSBOScalar "SSBOScalar"
+               OpMemberName %SSBOScalar 0 "a"
+               OpMemberName %SSBOScalar 1 "b"
+               OpMemberName %SSBOScalar 2 "c"
+               OpName %_ ""
+               OpMemberDecorate %SSBOScalar 0 Offset 0
+               OpMemberDecorate %SSBOScalar 1 RowMajor
+               OpMemberDecorate %SSBOScalar 1 Offset 12
+               OpMemberDecorate %SSBOScalar 1 MatrixStride 12
+               OpMemberDecorate %SSBOScalar 2 RowMajor
+               OpMemberDecorate %SSBOScalar 2 Offset 48
+               OpMemberDecorate %SSBOScalar 2 MatrixStride 12
+               OpDecorate %SSBOScalar BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+ %SSBOScalar = OpTypeStruct %v3float %mat3v3float %mat3v3float
+%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar
+          %_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat3v3float = OpTypePointer Uniform %mat3v3float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %b_ptr = OpAccessChain %_ptr_Uniform_mat3v3float %_ %int_1
+         %c_ptr = OpAccessChain %_ptr_Uniform_mat3v3float %_ %int_2
+         %b = OpLoad %mat3v3float %b_ptr
+         %c = OpLoad %mat3v3float %c_ptr
+		 OpStore %b_ptr %c
+         %19 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0
+         %20 = OpLoad %v3float %19
+         %21 = OpMatrixTimesVector %v3float %b %20
+         %22 = OpAccessChain %_ptr_Uniform_v3float %_ %int_0
+               OpStore %22 %21
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/packed-vector-extract-insert.asm.comp b/shaders-msl-no-opt/asm/packing/packed-vector-extract-insert.asm.comp
new file mode 100644
index 00000000000..70b17527919
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/packed-vector-extract-insert.asm.comp
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 28
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_scalar_block_layout"
+               OpName %main "main"
+               OpName %SSBOScalar "SSBOScalar"
+               OpMemberName %SSBOScalar 0 "a"
+               OpMemberName %SSBOScalar 1 "b"
+               OpMemberName %SSBOScalar 2 "c"
+               OpMemberName %SSBOScalar 3 "d"
+               OpName %_ ""
+               OpMemberDecorate %SSBOScalar 0 Offset 0
+               OpMemberDecorate %SSBOScalar 1 Offset 8
+               OpMemberDecorate %SSBOScalar 2 Offset 20
+               OpMemberDecorate %SSBOScalar 3 Offset 32
+               OpDecorate %SSBOScalar BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %v3float = OpTypeVector %float 3
+ %SSBOScalar = OpTypeStruct %v2float %v3float %v3float %v3float
+%_ptr_Uniform_SSBOScalar = OpTypePointer Uniform %SSBOScalar
+          %_ = OpVariable %_ptr_Uniform_SSBOScalar Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+      %int_2 = OpConstant %int 2
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+	 %float_2 = OpConstant %float 2.0
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %v3_ptr = OpAccessChain %_ptr_Uniform_v3float %_ %int_1
+         %v3 = OpLoad %v3float %v3_ptr
+		 %v3_mod = OpCompositeInsert %v3float %float_2 %v3 2
+         %v2 = OpVectorShuffle %v2float %v3 %v3 0 1
+		 %v1 = OpCompositeExtract %float %v3 2
+         %v2_mul = OpVectorTimesScalar %v2float %v2 %v1
+         %v2_ptr = OpAccessChain %_ptr_Uniform_v2float %_ %int_0
+               OpStore %v2_ptr %v2_mul
+			   OpStore %v3_ptr %v3_mod
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/row-major-split-access-chain.asm.comp b/shaders-msl-no-opt/asm/packing/row-major-split-access-chain.asm.comp
new file mode 100644
index 00000000000..398c8d135c8
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/row-major-split-access-chain.asm.comp
@@ -0,0 +1,48 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 21
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBORow "SSBORow"
+               OpMemberName %SSBORow 0 "v"
+               OpMemberName %SSBORow 1 "row_major0"
+               OpName %_ ""
+               OpMemberDecorate %SSBORow 0 Offset 0
+               OpMemberDecorate %SSBORow 1 RowMajor
+               OpMemberDecorate %SSBORow 1 Offset 16
+               OpMemberDecorate %SSBORow 1 MatrixStride 16
+               OpDecorate %SSBORow BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %SSBORow = OpTypeStruct %float %mat4v4float
+%_ptr_Uniform_SSBORow = OpTypePointer Uniform %SSBORow
+          %_ = OpVariable %_ptr_Uniform_SSBORow Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %row_ptr = OpAccessChain %_ptr_Uniform_v4float %_ %int_1 %int_1
+		 %float_ptr = OpAccessChain %_ptr_Uniform_float %row_ptr %uint_2
+
+         %19 = OpLoad %float %float_ptr
+         %20 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %20 %19
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/scalar-array-float2.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-array-float2.asm.frag
new file mode 100644
index 00000000000..85249d99810
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/scalar-array-float2.asm.frag
@@ -0,0 +1,54 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 29
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %out_var_SV_Target
+               OpExecutionMode %main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_Foo "type.Foo"
+               OpMemberName %type_Foo 0 "a"
+               OpMemberName %type_Foo 1 "b"
+               OpName %Foo "Foo"
+               OpName %out_var_SV_Target "out.var.SV_Target"
+               OpName %main "main"
+               OpDecorate %out_var_SV_Target Location 0
+               OpDecorate %Foo DescriptorSet 0
+               OpDecorate %Foo Binding 0
+               OpDecorate %_arr_v2float_uint_2 ArrayStride 16
+               OpMemberDecorate %type_Foo 0 Offset 0
+               OpMemberDecorate %type_Foo 1 Offset 24
+               OpDecorate %type_Foo Block
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%_arr_v2float_uint_2 = OpTypeArray %v2float %uint_2
+   %type_Foo = OpTypeStruct %_arr_v2float_uint_2 %float
+%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+       %void = OpTypeVoid
+         %16 = OpTypeFunction %void
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform
+%out_var_SV_Target = OpVariable %_ptr_Output_v2float Output
+       %main = OpFunction %void None %16
+         %19 = OpLabel
+         %20 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %int_0
+         %21 = OpLoad %v2float %20
+         %22 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %int_1
+         %23 = OpLoad %v2float %22
+         %24 = OpFAdd %v2float %21 %23
+         %25 = OpAccessChain %_ptr_Uniform_float %Foo %int_1
+         %26 = OpLoad %float %25
+         %27 = OpCompositeConstruct %v2float %26 %26
+         %28 = OpFAdd %v2float %24 %27
+               OpStore %out_var_SV_Target %28
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/scalar-array-float3-one-element.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-array-float3-one-element.asm.frag
new file mode 100644
index 00000000000..7ed32bee417
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/scalar-array-float3-one-element.asm.frag
@@ -0,0 +1,51 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 26
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %out_var_SV_Target
+               OpExecutionMode %main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_Foo "type.Foo"
+               OpMemberName %type_Foo 0 "a"
+               OpMemberName %type_Foo 1 "b"
+               OpName %Foo "Foo"
+               OpName %out_var_SV_Target "out.var.SV_Target"
+               OpName %main "main"
+               OpDecorate %out_var_SV_Target Location 0
+               OpDecorate %Foo DescriptorSet 0
+               OpDecorate %Foo Binding 0
+               OpDecorate %_arr_v3float_uint_1 ArrayStride 16
+               OpMemberDecorate %type_Foo 0 Offset 0
+               OpMemberDecorate %type_Foo 1 Offset 12
+               OpDecorate %type_Foo Block
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%_arr_v3float_uint_1 = OpTypeArray %v3float %uint_1
+   %type_Foo = OpTypeStruct %_arr_v3float_uint_1 %float
+%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+       %void = OpTypeVoid
+         %16 = OpTypeFunction %void
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform
+%out_var_SV_Target = OpVariable %_ptr_Output_v3float Output
+       %main = OpFunction %void None %16
+         %19 = OpLabel
+         %20 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %int_0
+         %21 = OpLoad %v3float %20
+         %22 = OpAccessChain %_ptr_Uniform_float %Foo %int_1
+         %23 = OpLoad %float %22
+         %24 = OpCompositeConstruct %v3float %23 %23 %23
+         %25 = OpFAdd %v3float %21 %24
+               OpStore %out_var_SV_Target %25
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/scalar-array-float3.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-array-float3.asm.frag
new file mode 100644
index 00000000000..406328b8d40
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/scalar-array-float3.asm.frag
@@ -0,0 +1,54 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 29
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %out_var_SV_Target
+               OpExecutionMode %main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_Foo "type.Foo"
+               OpMemberName %type_Foo 0 "a"
+               OpMemberName %type_Foo 1 "b"
+               OpName %Foo "Foo"
+               OpName %out_var_SV_Target "out.var.SV_Target"
+               OpName %main "main"
+               OpDecorate %out_var_SV_Target Location 0
+               OpDecorate %Foo DescriptorSet 0
+               OpDecorate %Foo Binding 0
+               OpDecorate %_arr_v3float_uint_2 ArrayStride 16
+               OpMemberDecorate %type_Foo 0 Offset 0
+               OpMemberDecorate %type_Foo 1 Offset 28
+               OpDecorate %type_Foo Block
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%_arr_v3float_uint_2 = OpTypeArray %v3float %uint_2
+   %type_Foo = OpTypeStruct %_arr_v3float_uint_2 %float
+%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+       %void = OpTypeVoid
+         %16 = OpTypeFunction %void
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform
+%out_var_SV_Target = OpVariable %_ptr_Output_v3float Output
+       %main = OpFunction %void None %16
+         %19 = OpLabel
+         %20 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %int_0
+         %21 = OpLoad %v3float %20
+         %22 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %int_1
+         %23 = OpLoad %v3float %22
+         %24 = OpFAdd %v3float %21 %23
+         %25 = OpAccessChain %_ptr_Uniform_float %Foo %int_1
+         %26 = OpLoad %float %25
+         %27 = OpCompositeConstruct %v3float %26 %26 %26
+         %28 = OpFAdd %v3float %24 %27
+               OpStore %out_var_SV_Target %28
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/scalar-float2x2-col-major.invalid.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float2x2-col-major.invalid.asm.frag
new file mode 100644
index 00000000000..b9b4f5a0172
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/scalar-float2x2-col-major.invalid.asm.frag
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %out_var_SV_Target
+               OpExecutionMode %main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_Foo "type.Foo"
+               OpMemberName %type_Foo 0 "a"
+               OpMemberName %type_Foo 1 "b"
+               OpName %Foo "Foo"
+               OpName %out_var_SV_Target "out.var.SV_Target"
+               OpName %main "main"
+               OpDecorate %out_var_SV_Target Location 0
+               OpDecorate %Foo DescriptorSet 0
+               OpDecorate %Foo Binding 0
+               OpMemberDecorate %type_Foo 0 Offset 0
+               OpMemberDecorate %type_Foo 0 MatrixStride 16
+               OpMemberDecorate %type_Foo 0 ColMajor
+               OpMemberDecorate %type_Foo 1 Offset 24
+               OpDecorate %type_Foo Block
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %uint_1 = OpConstant %uint 1
+      %int_1 = OpConstant %int 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%mat2v2float = OpTypeMatrix %v2float 2
+   %type_Foo = OpTypeStruct %mat2v2float %float
+%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+       %void = OpTypeVoid
+         %17 = OpTypeFunction %void
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform
+%out_var_SV_Target = OpVariable %_ptr_Output_v2float Output
+       %main = OpFunction %void None %17
+         %20 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_0
+         %22 = OpLoad %v2float %21
+         %23 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_1
+         %24 = OpLoad %v2float %23
+         %25 = OpFAdd %v2float %22 %24
+         %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1
+         %27 = OpLoad %float %26
+         %28 = OpCompositeConstruct %v2float %27 %27
+         %29 = OpFAdd %v2float %25 %28
+               OpStore %out_var_SV_Target %29
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/scalar-float2x2-row-major.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float2x2-row-major.asm.frag
new file mode 100644
index 00000000000..e1830e9cf91
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/scalar-float2x2-row-major.asm.frag
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %out_var_SV_Target
+               OpExecutionMode %main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_Foo "type.Foo"
+               OpMemberName %type_Foo 0 "a"
+               OpMemberName %type_Foo 1 "b"
+               OpName %Foo "Foo"
+               OpName %out_var_SV_Target "out.var.SV_Target"
+               OpName %main "main"
+               OpDecorate %out_var_SV_Target Location 0
+               OpDecorate %Foo DescriptorSet 0
+               OpDecorate %Foo Binding 0
+               OpMemberDecorate %type_Foo 0 Offset 0
+               OpMemberDecorate %type_Foo 0 MatrixStride 16
+               OpMemberDecorate %type_Foo 0 RowMajor
+               OpMemberDecorate %type_Foo 1 Offset 24
+               OpDecorate %type_Foo Block
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %uint_1 = OpConstant %uint 1
+      %int_1 = OpConstant %int 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%mat2v2float = OpTypeMatrix %v2float 2
+   %type_Foo = OpTypeStruct %mat2v2float %float
+%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+       %void = OpTypeVoid
+         %17 = OpTypeFunction %void
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform
+%out_var_SV_Target = OpVariable %_ptr_Output_v2float Output
+       %main = OpFunction %void None %17
+         %20 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_0
+         %22 = OpLoad %v2float %21
+         %23 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_1
+         %24 = OpLoad %v2float %23
+         %25 = OpFAdd %v2float %22 %24
+         %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1
+         %27 = OpLoad %float %26
+         %28 = OpCompositeConstruct %v2float %27 %27
+         %29 = OpFAdd %v2float %25 %28
+               OpStore %out_var_SV_Target %29
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/scalar-float2x3-col-major.invalid.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float2x3-col-major.invalid.asm.frag
new file mode 100644
index 00000000000..647939f2050
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/scalar-float2x3-col-major.invalid.asm.frag
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %out_var_SV_Target
+               OpExecutionMode %main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_Foo "type.Foo"
+               OpMemberName %type_Foo 0 "a"
+               OpMemberName %type_Foo 1 "b"
+               OpName %Foo "Foo"
+               OpName %out_var_SV_Target "out.var.SV_Target"
+               OpName %main "main"
+               OpDecorate %out_var_SV_Target Location 0
+               OpDecorate %Foo DescriptorSet 0
+               OpDecorate %Foo Binding 0
+               OpMemberDecorate %type_Foo 0 Offset 0
+               OpMemberDecorate %type_Foo 0 MatrixStride 16
+               OpMemberDecorate %type_Foo 0 ColMajor
+               OpMemberDecorate %type_Foo 1 Offset 28
+               OpDecorate %type_Foo Block
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %uint_1 = OpConstant %uint 1
+      %int_1 = OpConstant %int 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+   %type_Foo = OpTypeStruct %mat2v3float %float
+%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+       %void = OpTypeVoid
+         %17 = OpTypeFunction %void
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform
+%out_var_SV_Target = OpVariable %_ptr_Output_v3float Output
+       %main = OpFunction %void None %17
+         %20 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_0
+         %22 = OpLoad %v3float %21
+         %23 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_1
+         %24 = OpLoad %v3float %23
+         %25 = OpFAdd %v3float %22 %24
+         %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1
+         %27 = OpLoad %float %26
+         %28 = OpCompositeConstruct %v3float %27 %27 %27
+         %29 = OpFAdd %v3float %25 %28
+               OpStore %out_var_SV_Target %29
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/scalar-float2x3-row-major.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float2x3-row-major.asm.frag
new file mode 100644
index 00000000000..733465a0fb0
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/scalar-float2x3-row-major.asm.frag
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %out_var_SV_Target
+               OpExecutionMode %main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_Foo "type.Foo"
+               OpMemberName %type_Foo 0 "a"
+               OpMemberName %type_Foo 1 "b"
+               OpName %Foo "Foo"
+               OpName %out_var_SV_Target "out.var.SV_Target"
+               OpName %main "main"
+               OpDecorate %out_var_SV_Target Location 0
+               OpDecorate %Foo DescriptorSet 0
+               OpDecorate %Foo Binding 0
+               OpMemberDecorate %type_Foo 0 Offset 0
+               OpMemberDecorate %type_Foo 0 MatrixStride 16
+               OpMemberDecorate %type_Foo 0 RowMajor
+               OpMemberDecorate %type_Foo 1 Offset 40
+               OpDecorate %type_Foo Block
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %uint_1 = OpConstant %uint 1
+      %int_1 = OpConstant %int 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat2v3float = OpTypeMatrix %v3float 2
+   %type_Foo = OpTypeStruct %mat2v3float %float
+%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+       %void = OpTypeVoid
+         %17 = OpTypeFunction %void
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform
+%out_var_SV_Target = OpVariable %_ptr_Output_v3float Output
+       %main = OpFunction %void None %17
+         %20 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_0
+         %22 = OpLoad %v3float %21
+         %23 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_1
+         %24 = OpLoad %v3float %23
+         %25 = OpFAdd %v3float %22 %24
+         %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1
+         %27 = OpLoad %float %26
+         %28 = OpCompositeConstruct %v3float %27 %27 %27
+         %29 = OpFAdd %v3float %25 %28
+               OpStore %out_var_SV_Target %29
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/scalar-float3x2-col-major.invalid.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float3x2-col-major.invalid.asm.frag
new file mode 100644
index 00000000000..c97fb81f6a3
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/scalar-float3x2-col-major.invalid.asm.frag
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %out_var_SV_Target
+               OpExecutionMode %main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_Foo "type.Foo"
+               OpMemberName %type_Foo 0 "a"
+               OpMemberName %type_Foo 1 "b"
+               OpName %Foo "Foo"
+               OpName %out_var_SV_Target "out.var.SV_Target"
+               OpName %main "main"
+               OpDecorate %out_var_SV_Target Location 0
+               OpDecorate %Foo DescriptorSet 0
+               OpDecorate %Foo Binding 0
+               OpMemberDecorate %type_Foo 0 Offset 0
+               OpMemberDecorate %type_Foo 0 MatrixStride 16
+               OpMemberDecorate %type_Foo 0 ColMajor
+               OpMemberDecorate %type_Foo 1 Offset 40
+               OpDecorate %type_Foo Block
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %uint_1 = OpConstant %uint 1
+      %int_1 = OpConstant %int 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%mat3v2float = OpTypeMatrix %v2float 3
+   %type_Foo = OpTypeStruct %mat3v2float %float
+%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+       %void = OpTypeVoid
+         %17 = OpTypeFunction %void
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform
+%out_var_SV_Target = OpVariable %_ptr_Output_v2float Output
+       %main = OpFunction %void None %17
+         %20 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_0
+         %22 = OpLoad %v2float %21
+         %23 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_1
+         %24 = OpLoad %v2float %23
+         %25 = OpFAdd %v2float %22 %24
+         %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1
+         %27 = OpLoad %float %26
+         %28 = OpCompositeConstruct %v2float %27 %27
+         %29 = OpFAdd %v2float %25 %28
+               OpStore %out_var_SV_Target %29
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/scalar-float3x2-row-major.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float3x2-row-major.asm.frag
new file mode 100644
index 00000000000..b1cfa561e05
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/scalar-float3x2-row-major.asm.frag
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %out_var_SV_Target
+               OpExecutionMode %main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_Foo "type.Foo"
+               OpMemberName %type_Foo 0 "a"
+               OpMemberName %type_Foo 1 "b"
+               OpName %Foo "Foo"
+               OpName %out_var_SV_Target "out.var.SV_Target"
+               OpName %main "main"
+               OpDecorate %out_var_SV_Target Location 0
+               OpDecorate %Foo DescriptorSet 0
+               OpDecorate %Foo Binding 0
+               OpMemberDecorate %type_Foo 0 Offset 0
+               OpMemberDecorate %type_Foo 0 MatrixStride 16
+               OpMemberDecorate %type_Foo 0 RowMajor
+               OpMemberDecorate %type_Foo 1 Offset 28
+               OpDecorate %type_Foo Block
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %uint_1 = OpConstant %uint 1
+      %int_1 = OpConstant %int 1
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%mat3v2float = OpTypeMatrix %v2float 3
+   %type_Foo = OpTypeStruct %mat3v2float %float
+%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+       %void = OpTypeVoid
+         %17 = OpTypeFunction %void
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform
+%out_var_SV_Target = OpVariable %_ptr_Output_v2float Output
+       %main = OpFunction %void None %17
+         %20 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_0
+         %22 = OpLoad %v2float %21
+         %23 = OpAccessChain %_ptr_Uniform_v2float %Foo %int_0 %uint_1
+         %24 = OpLoad %v2float %23
+         %25 = OpFAdd %v2float %22 %24
+         %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1
+         %27 = OpLoad %float %26
+         %28 = OpCompositeConstruct %v2float %27 %27
+         %29 = OpFAdd %v2float %25 %28
+               OpStore %out_var_SV_Target %29
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/scalar-float3x3-col-major.invalid.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float3x3-col-major.invalid.asm.frag
new file mode 100644
index 00000000000..cef8308b2fb
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/scalar-float3x3-col-major.invalid.asm.frag
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %out_var_SV_Target
+               OpExecutionMode %main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_Foo "type.Foo"
+               OpMemberName %type_Foo 0 "a"
+               OpMemberName %type_Foo 1 "b"
+               OpName %Foo "Foo"
+               OpName %out_var_SV_Target "out.var.SV_Target"
+               OpName %main "main"
+               OpDecorate %out_var_SV_Target Location 0
+               OpDecorate %Foo DescriptorSet 0
+               OpDecorate %Foo Binding 0
+               OpMemberDecorate %type_Foo 0 Offset 0
+               OpMemberDecorate %type_Foo 0 MatrixStride 16
+               OpMemberDecorate %type_Foo 0 ColMajor
+               OpMemberDecorate %type_Foo 1 Offset 44
+               OpDecorate %type_Foo Block
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %uint_1 = OpConstant %uint 1
+      %int_1 = OpConstant %int 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+   %type_Foo = OpTypeStruct %mat3v3float %float
+%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+       %void = OpTypeVoid
+         %17 = OpTypeFunction %void
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform
+%out_var_SV_Target = OpVariable %_ptr_Output_v3float Output
+       %main = OpFunction %void None %17
+         %20 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_0
+         %22 = OpLoad %v3float %21
+         %23 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_1
+         %24 = OpLoad %v3float %23
+         %25 = OpFAdd %v3float %22 %24
+         %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1
+         %27 = OpLoad %float %26
+         %28 = OpCompositeConstruct %v3float %27 %27 %27
+         %29 = OpFAdd %v3float %25 %28
+               OpStore %out_var_SV_Target %29
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/packing/scalar-float3x3-row-major.asm.frag b/shaders-msl-no-opt/asm/packing/scalar-float3x3-row-major.asm.frag
new file mode 100644
index 00000000000..35d7ebc3192
--- /dev/null
+++ b/shaders-msl-no-opt/asm/packing/scalar-float3x3-row-major.asm.frag
@@ -0,0 +1,56 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %out_var_SV_Target
+               OpExecutionMode %main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_Foo "type.Foo"
+               OpMemberName %type_Foo 0 "a"
+               OpMemberName %type_Foo 1 "b"
+               OpName %Foo "Foo"
+               OpName %out_var_SV_Target "out.var.SV_Target"
+               OpName %main "main"
+               OpDecorate %out_var_SV_Target Location 0
+               OpDecorate %Foo DescriptorSet 0
+               OpDecorate %Foo Binding 0
+               OpMemberDecorate %type_Foo 0 Offset 0
+               OpMemberDecorate %type_Foo 0 MatrixStride 16
+               OpMemberDecorate %type_Foo 0 RowMajor
+               OpMemberDecorate %type_Foo 1 Offset 44
+               OpDecorate %type_Foo Block
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %uint_1 = OpConstant %uint 1
+      %int_1 = OpConstant %int 1
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%mat3v3float = OpTypeMatrix %v3float 3
+   %type_Foo = OpTypeStruct %mat3v3float %float
+%_ptr_Uniform_type_Foo = OpTypePointer Uniform %type_Foo
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+       %void = OpTypeVoid
+         %17 = OpTypeFunction %void
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %Foo = OpVariable %_ptr_Uniform_type_Foo Uniform
+%out_var_SV_Target = OpVariable %_ptr_Output_v3float Output
+       %main = OpFunction %void None %17
+         %20 = OpLabel
+         %21 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_0
+         %22 = OpLoad %v3float %21
+         %23 = OpAccessChain %_ptr_Uniform_v3float %Foo %int_0 %uint_1
+         %24 = OpLoad %v3float %23
+         %25 = OpFAdd %v3float %22 %24
+         %26 = OpAccessChain %_ptr_Uniform_float %Foo %int_1
+         %27 = OpLoad %float %26
+         %28 = OpCompositeConstruct %v3float %27 %27 %27
+         %29 = OpFAdd %v3float %25 %28
+               OpStore %out_var_SV_Target %29
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/temporary.zero-initialize.asm.frag b/shaders-msl-no-opt/asm/temporary.zero-initialize.asm.frag
new file mode 100644
index 00000000000..eccff08b331
--- /dev/null
+++ b/shaders-msl-no-opt/asm/temporary.zero-initialize.asm.frag
@@ -0,0 +1,93 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 65
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vA %vB
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %vA "vA"
+               OpName %vB "vB"
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %vA RelaxedPrecision
+               OpDecorate %vA Flat
+               OpDecorate %vA Location 0
+               OpDecorate %25 RelaxedPrecision
+               OpDecorate %30 RelaxedPrecision
+               OpDecorate %vB RelaxedPrecision
+               OpDecorate %vB Flat
+               OpDecorate %vB Location 1
+               OpDecorate %38 RelaxedPrecision
+               OpDecorate %40 RelaxedPrecision
+               OpDecorate %49 RelaxedPrecision
+               OpDecorate %51 RelaxedPrecision
+               OpDecorate %53 RelaxedPrecision
+               OpDecorate %56 RelaxedPrecision
+               OpDecorate %64 RelaxedPrecision
+               OpDecorate %58 RelaxedPrecision
+               OpDecorate %57 RelaxedPrecision
+               OpDecorate %60 RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_0 = OpConstant %float 0
+         %11 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Input_int = OpTypePointer Input %int
+         %vA = OpVariable %_ptr_Input_int Input
+       %bool = OpTypeBool
+     %int_20 = OpConstant %int 20
+     %int_50 = OpConstant %int 50
+         %vB = OpVariable %_ptr_Input_int Input
+     %int_40 = OpConstant %int 40
+     %int_60 = OpConstant %int 60
+     %int_10 = OpConstant %int 10
+    %float_1 = OpConstant %float 1
+         %63 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpStore %FragColor %11
+               OpBranch %17
+         %17 = OpLabel
+         %60 = OpPhi %int %int_0 %5 %58 %20
+         %57 = OpPhi %int %int_0 %5 %56 %20
+         %25 = OpLoad %int %vA
+         %27 = OpSLessThan %bool %57 %25
+               OpLoopMerge %19 %20 None
+               OpBranchConditional %27 %18 %19
+         %18 = OpLabel
+         %30 = OpIAdd %int %25 %57
+         %32 = OpIEqual %bool %30 %int_20
+               OpSelectionMerge %34 None
+               OpBranchConditional %32 %33 %36
+         %33 = OpLabel
+               OpBranch %34
+         %36 = OpLabel
+         %38 = OpLoad %int %vB
+         %40 = OpIAdd %int %38 %57
+         %42 = OpIEqual %bool %40 %int_40
+         %64 = OpSelect %int %42 %int_60 %60
+               OpBranch %34
+         %34 = OpLabel
+         %58 = OpPhi %int %int_50 %33 %64 %36
+         %49 = OpIAdd %int %58 %int_10
+         %51 = OpLoad %v4float %FragColor
+         %53 = OpFAdd %v4float %51 %63
+               OpStore %FragColor %53
+               OpBranch %20
+         %20 = OpLabel
+         %56 = OpIAdd %int %57 %49
+               OpBranch %17
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/tesc/array-control-point-initializer.asm.tesc b/shaders-msl-no-opt/asm/tesc/array-control-point-initializer.asm.tesc
new file mode 100644
index 00000000000..5bbe951df47
--- /dev/null
+++ b/shaders-msl-no-opt/asm/tesc/array-control-point-initializer.asm.tesc
@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 48
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %foo
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %foo "foo"
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+     %uint_4 = OpConstant %uint 4
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+     %uint_3 = OpConstant %uint 3
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+%_arr__arr_float_uint_3_uint_4 = OpTypeArray %_arr_float_uint_3 %uint_4
+%_ptr_Output__arr__arr_float_uint_3_uint_4 = OpTypePointer Output %_arr__arr_float_uint_3_uint_4
+%foo_zero = OpConstantNull %_arr__arr_float_uint_3_uint_4
+        %foo = OpVariable %_ptr_Output__arr__arr_float_uint_3_uint_4 Output %foo_zero
+%_ptr_Output_float = OpTypePointer Output %float
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+    %float_2 = OpConstant %float 2
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpLoad %int %gl_InvocationID
+         %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0
+               OpStore %24 %22
+         %30 = OpLoad %int %gl_InvocationID
+         %31 = OpLoad %int %gl_InvocationID
+         %32 = OpConvertSToF %float %31
+         %34 = OpAccessChain %_ptr_Output_float %foo %30 %int_0
+               OpStore %34 %32
+         %35 = OpLoad %int %gl_InvocationID
+         %37 = OpLoad %int %gl_InvocationID
+         %38 = OpConvertSToF %float %37
+         %39 = OpFAdd %float %38 %float_1
+         %40 = OpAccessChain %_ptr_Output_float %foo %35 %int_1
+               OpStore %40 %39
+         %41 = OpLoad %int %gl_InvocationID
+         %43 = OpLoad %int %gl_InvocationID
+         %44 = OpConvertSToF %float %43
+         %46 = OpFAdd %float %44 %float_2
+         %47 = OpAccessChain %_ptr_Output_float %foo %41 %int_2
+               OpStore %47 %46
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/tesc/block-control-point-initializer.asm.tesc b/shaders-msl-no-opt/asm/tesc/block-control-point-initializer.asm.tesc
new file mode 100644
index 00000000000..d4e14be4abe
--- /dev/null
+++ b/shaders-msl-no-opt/asm/tesc/block-control-point-initializer.asm.tesc
@@ -0,0 +1,70 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 35
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %verts
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %Verts "Verts"
+               OpMemberName %Verts 0 "a"
+               OpMemberName %Verts 1 "b"
+               OpName %verts "verts"
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %Verts Block
+               OpDecorate %verts Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+     %uint_4 = OpConstant %uint 4
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+;%gl_out_zero = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output ;%gl_out_zero
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+    %v2float = OpTypeVector %float 2
+      %Verts = OpTypeStruct %float %v2float
+%_arr_Verts_uint_4 = OpTypeArray %Verts %uint_4
+%_ptr_Output__arr_Verts_uint_4 = OpTypePointer Output %_arr_Verts_uint_4
+	%verts_zero = OpConstantNull %_arr_Verts_uint_4
+      %verts = OpVariable %_ptr_Output__arr_Verts_uint_4 Output %verts_zero
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpLoad %int %gl_InvocationID
+         %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0
+               OpStore %24 %22
+         %30 = OpLoad %int %gl_InvocationID
+         %31 = OpLoad %int %gl_InvocationID
+         %32 = OpConvertSToF %float %31
+         %34 = OpAccessChain %_ptr_Output_float %verts %30 %int_0
+               OpStore %34 %32
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/tesc/builtin-control-point-initializer.asm.tesc b/shaders-msl-no-opt/asm/tesc/builtin-control-point-initializer.asm.tesc
new file mode 100644
index 00000000000..1219183ca7e
--- /dev/null
+++ b/shaders-msl-no-opt/asm/tesc/builtin-control-point-initializer.asm.tesc
@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 35
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %verts
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpName %gl_out "gl_out"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %Verts "Verts"
+               OpMemberName %Verts 0 "a"
+               OpMemberName %Verts 1 "b"
+               OpName %verts "verts"
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %Verts Block
+               OpDecorate %verts Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%gl_PerVertex = OpTypeStruct %v4float %float
+     %uint_4 = OpConstant %uint 4
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+%gl_out_zero = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %gl_out_zero
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+    %v2float = OpTypeVector %float 2
+      %Verts = OpTypeStruct %float %v2float
+%_arr_Verts_uint_4 = OpTypeArray %Verts %uint_4
+%_ptr_Output__arr_Verts_uint_4 = OpTypePointer Output %_arr_Verts_uint_4
+	%verts_zero = OpConstantNull %_arr_Verts_uint_4
+      %verts = OpVariable %_ptr_Output__arr_Verts_uint_4 Output %verts_zero
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpLoad %int %gl_InvocationID
+         %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0
+               OpStore %24 %22
+         %30 = OpLoad %int %gl_InvocationID
+         %31 = OpLoad %int %gl_InvocationID
+         %32 = OpConvertSToF %float %31
+         %34 = OpAccessChain %_ptr_Output_float %verts %30 %int_0
+               OpStore %34 %32
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/tesc/composite-control-point-initializer.asm.tesc b/shaders-msl-no-opt/asm/tesc/composite-control-point-initializer.asm.tesc
new file mode 100644
index 00000000000..03ac99befb6
--- /dev/null
+++ b/shaders-msl-no-opt/asm/tesc/composite-control-point-initializer.asm.tesc
@@ -0,0 +1,69 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 35
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %foo
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %Foo "Foo"
+               OpMemberName %Foo 0 "a"
+               OpMemberName %Foo 1 "b"
+               OpMemberName %Foo 2 "c"
+               OpName %foo "foo"
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %foo Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+     %uint_4 = OpConstant %uint 4
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+    %v2float = OpTypeVector %float 2
+        %Foo = OpTypeStruct %float %v2float %v4float
+%_arr_Foo_uint_4 = OpTypeArray %Foo %uint_4
+%_ptr_Output__arr_Foo_uint_4 = OpTypePointer Output %_arr_Foo_uint_4
+	%foo_zero = OpConstantNull %_arr_Foo_uint_4
+        %foo = OpVariable %_ptr_Output__arr_Foo_uint_4 Output %foo_zero
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpLoad %int %gl_InvocationID
+         %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0
+               OpStore %24 %22
+         %30 = OpLoad %int %gl_InvocationID
+         %31 = OpLoad %int %gl_InvocationID
+         %32 = OpConvertSToF %float %31
+         %34 = OpAccessChain %_ptr_Output_float %foo %30 %int_0
+               OpStore %34 %32
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/tesc/copy-memory-control-point.asm.tesc b/shaders-msl-no-opt/asm/tesc/copy-memory-control-point.asm.tesc
new file mode 100644
index 00000000000..7c0a638f985
--- /dev/null
+++ b/shaders-msl-no-opt/asm/tesc/copy-memory-control-point.asm.tesc
@@ -0,0 +1,199 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Wine VKD3D Shader Compiler; 2
+; Bound: 126
+; Schema: 0
+               OpCapability Tessellation
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %1 "main" %4 %30 %80 %101 %103 %108 %110 %115 %117
+               OpExecutionMode %1 OutputVertices 3
+               OpExecutionMode %1 Triangles
+               OpExecutionMode %1 SpacingEqual
+               OpExecutionMode %1 VertexOrderCw
+               OpName %1 "main"
+               OpName %11 "opc"
+               OpName %14 "cb1_struct"
+               OpName %16 "cb0_0"
+               OpName %22 "vicp"
+               OpName %23 "fork0"
+               OpName %26 "vForkInstanceId"
+               OpName %34 "r0"
+               OpName %32 "fork0_epilogue"
+               OpName %75 "fork1"
+               OpName %81 "fork1_epilogue"
+               OpName %101 "v0"
+               OpName %103 "v1"
+               OpName %108 "vicp0"
+               OpName %110 "vocp0"
+               OpName %115 "vicp1"
+               OpName %117 "vocp1"
+               OpDecorate %4 BuiltIn InvocationId
+               OpDecorate %13 ArrayStride 16
+               OpDecorate %14 Block
+               OpMemberDecorate %14 0 Offset 0
+               OpDecorate %16 DescriptorSet 0
+               OpDecorate %16 Binding 0
+               OpDecorate %30 BuiltIn TessLevelOuter
+               OpDecorate %30 Patch
+               OpDecorate %30 Patch
+               OpDecorate %30 Patch
+               OpDecorate %30 Patch
+               OpDecorate %80 BuiltIn TessLevelInner
+               OpDecorate %80 Patch
+               OpDecorate %80 Patch
+               OpDecorate %101 Location 0
+               OpDecorate %103 Location 1
+               OpDecorate %108 Location 2
+               OpDecorate %110 Location 3
+               OpDecorate %115 Location 4
+               OpDecorate %117 Location 5
+          %2 = OpTypeInt 32 1
+          %3 = OpTypePointer Input %2
+          %4 = OpVariable %3 Input
+          %5 = OpTypeFloat 32
+          %6 = OpTypeVector %5 4
+          %7 = OpTypeInt 32 0
+          %8 = OpConstant %7 4
+          %9 = OpTypeArray %6 %8
+         %10 = OpTypePointer Private %9
+         %11 = OpVariable %10 Private
+         %12 = OpConstant %7 1
+         %13 = OpTypeArray %6 %12
+         %14 = OpTypeStruct %13
+         %15 = OpTypePointer Uniform %14
+         %16 = OpVariable %15 Uniform
+         %17 = OpConstant %7 3
+         %18 = OpTypeArray %6 %17
+         %19 = OpConstant %7 2
+         %20 = OpTypeArray %18 %19
+         %21 = OpTypePointer Private %20
+         %22 = OpVariable %21 Private
+         %24 = OpTypeVoid
+         %25 = OpTypeFunction %24 %7
+         %28 = OpTypeArray %5 %8
+         %29 = OpTypePointer Output %28
+         %30 = OpVariable %29 Output
+         %31 = OpConstant %7 0
+         %33 = OpTypePointer Function %6
+         %36 = OpTypePointer Function %5
+         %38 = OpTypePointer Uniform %6
+         %40 = OpTypePointer Uniform %5
+         %46 = OpTypePointer Private %6
+         %48 = OpTypePointer Private %5
+         %52 = OpVariable %46 Private
+         %55 = OpVariable %46 Private
+         %58 = OpVariable %46 Private
+         %60 = OpTypeFunction %24 %46 %46 %46
+         %69 = OpTypePointer Output %5
+         %76 = OpTypeFunction %24
+         %78 = OpTypeArray %5 %19
+         %79 = OpTypePointer Output %78
+         %80 = OpVariable %79 Output
+         %89 = OpVariable %46 Private
+         %91 = OpTypeFunction %24 %46
+         %98 = OpTypePointer Private %18
+        %100 = OpTypePointer Input %18
+        %101 = OpVariable %100 Input
+        %103 = OpVariable %100 Input
+        %105 = OpTypeVector %5 3
+        %106 = OpTypeArray %105 %17
+        %107 = OpTypePointer Input %106
+        %108 = OpVariable %107 Input
+        %109 = OpTypePointer Output %106
+        %110 = OpVariable %109 Output
+        %111 = OpTypePointer Output %105
+        %112 = OpTypePointer Input %105
+        %115 = OpVariable %100 Input
+        %116 = OpTypePointer Output %18
+        %117 = OpVariable %116 Output
+        %118 = OpTypePointer Output %6
+        %119 = OpTypePointer Input %6
+         %23 = OpFunction %24 None %25
+         %26 = OpFunctionParameter %7
+         %27 = OpLabel
+         %34 = OpVariable %33 Function
+         %35 = OpBitcast %5 %26
+         %37 = OpInBoundsAccessChain %36 %34 %31
+               OpStore %37 %35
+         %39 = OpAccessChain %38 %16 %31 %31
+         %41 = OpInBoundsAccessChain %40 %39 %31
+         %42 = OpLoad %5 %41
+         %43 = OpInBoundsAccessChain %36 %34 %31
+         %44 = OpLoad %5 %43
+         %45 = OpBitcast %2 %44
+         %47 = OpAccessChain %46 %11 %45
+         %49 = OpInBoundsAccessChain %48 %47 %31
+               OpStore %49 %42
+         %50 = OpAccessChain %46 %11 %31
+         %51 = OpLoad %6 %50
+               OpStore %52 %51
+         %53 = OpAccessChain %46 %11 %12
+         %54 = OpLoad %6 %53
+               OpStore %55 %54
+         %56 = OpAccessChain %46 %11 %19
+         %57 = OpLoad %6 %56
+               OpStore %58 %57
+         %59 = OpFunctionCall %24 %32 %52 %55 %58
+               OpReturn
+               OpFunctionEnd
+         %32 = OpFunction %24 None %60
+         %61 = OpFunctionParameter %46
+         %62 = OpFunctionParameter %46
+         %63 = OpFunctionParameter %46
+         %64 = OpLabel
+         %65 = OpLoad %6 %61
+         %66 = OpLoad %6 %62
+         %67 = OpLoad %6 %63
+         %68 = OpCompositeExtract %5 %65 0
+         %70 = OpAccessChain %69 %30 %31
+               OpStore %70 %68
+         %71 = OpCompositeExtract %5 %66 0
+         %72 = OpAccessChain %69 %30 %12
+               OpStore %72 %71
+         %73 = OpCompositeExtract %5 %67 0
+         %74 = OpAccessChain %69 %30 %19
+               OpStore %74 %73
+               OpReturn
+               OpFunctionEnd
+         %75 = OpFunction %24 None %76
+         %77 = OpLabel
+         %82 = OpAccessChain %38 %16 %31 %31
+         %83 = OpInBoundsAccessChain %40 %82 %31
+         %84 = OpLoad %5 %83
+         %85 = OpAccessChain %46 %11 %17
+         %86 = OpInBoundsAccessChain %48 %85 %31
+               OpStore %86 %84
+         %87 = OpAccessChain %46 %11 %17
+         %88 = OpLoad %6 %87
+               OpStore %89 %88
+         %90 = OpFunctionCall %24 %81 %89
+               OpReturn
+               OpFunctionEnd
+         %81 = OpFunction %24 None %91
+         %92 = OpFunctionParameter %46
+         %93 = OpLabel
+         %94 = OpLoad %6 %92
+         %95 = OpCompositeExtract %5 %94 0
+         %96 = OpAccessChain %69 %80 %31
+               OpStore %96 %95
+               OpReturn
+               OpFunctionEnd
+          %1 = OpFunction %24 None %76
+         %97 = OpLabel
+         %99 = OpInBoundsAccessChain %98 %22 %31
+               OpCopyMemory %99 %101
+        %102 = OpInBoundsAccessChain %98 %22 %12
+               OpCopyMemory %102 %103
+        %104 = OpLoad %2 %4
+        %113 = OpAccessChain %111 %110 %104
+        %114 = OpAccessChain %112 %108 %104
+               OpCopyMemory %113 %114
+        %120 = OpAccessChain %118 %117 %104
+        %121 = OpAccessChain %119 %115 %104
+               OpCopyMemory %120 %121
+        %122 = OpFunctionCall %24 %23 %31
+        %123 = OpFunctionCall %24 %23 %12
+        %124 = OpFunctionCall %24 %23 %19
+        %125 = OpFunctionCall %24 %75
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/tesc/copy-tess-level-tri.asm.tesc b/shaders-msl-no-opt/asm/tesc/copy-tess-level-tri.asm.tesc
new file mode 100644
index 00000000000..346466e61f5
--- /dev/null
+++ b/shaders-msl-no-opt/asm/tesc/copy-tess-level-tri.asm.tesc
@@ -0,0 +1,82 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 43
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %gl_TessLevelInner %gl_TessLevelOuter %gl_out %gl_InvocationID
+               OpExecutionMode %main OutputVertices 1
+			   OpExecutionMode %main Triangles
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_TessLevelInner "gl_TessLevelInner"
+               OpName %gl_TessLevelOuter "gl_TessLevelOuter"
+               OpName %inner "inner"
+               OpName %outer "outer"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output
+    %float_1 = OpConstant %float 1
+    %float_2 = OpConstant %float 2
+         %14 = OpConstantComposite %_arr_float_uint_2 %float_1 %float_2
+     %uint_4 = OpConstant %uint 4
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output
+    %float_3 = OpConstant %float 3
+    %float_4 = OpConstant %float 4
+         %21 = OpConstantComposite %_arr_float_uint_4 %float_1 %float_2 %float_3 %float_4
+%_ptr_Function__arr_float_uint_2 = OpTypePointer Function %_arr_float_uint_2
+%_ptr_Function__arr_float_uint_4 = OpTypePointer Function %_arr_float_uint_4
+    %v4float = OpTypeVector %float 4
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_1 = OpTypeArray %gl_PerVertex %uint_1
+%_ptr_Output__arr_gl_PerVertex_uint_1 = OpTypePointer Output %_arr_gl_PerVertex_uint_1
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_1 Output
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+         %40 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+      %inner = OpVariable %_ptr_Function__arr_float_uint_2 Function
+      %outer = OpVariable %_ptr_Function__arr_float_uint_4 Function
+               OpStore %gl_TessLevelInner %14
+               OpStore %gl_TessLevelOuter %21
+         %24 = OpLoad %_arr_float_uint_2 %gl_TessLevelInner
+               OpStore %inner %24
+         %27 = OpLoad %_arr_float_uint_4 %gl_TessLevelOuter
+               OpStore %outer %27
+         %38 = OpLoad %int %gl_InvocationID
+         %42 = OpAccessChain %_ptr_Output_v4float %gl_out %38 %int_0
+               OpStore %42 %40
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/tesc/plain-control-point-initializer.asm.tesc b/shaders-msl-no-opt/asm/tesc/plain-control-point-initializer.asm.tesc
new file mode 100644
index 00000000000..2a95da04b7b
--- /dev/null
+++ b/shaders-msl-no-opt/asm/tesc/plain-control-point-initializer.asm.tesc
@@ -0,0 +1,63 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 33
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %v
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %v "v"
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %v Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+     %uint_4 = OpConstant %uint 4
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+	%v_zero = OpConstantNull %_arr_float_uint_4
+          %v = OpVariable %_ptr_Output__arr_float_uint_4 Output %v_zero
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpLoad %int %gl_InvocationID
+         %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0
+               OpStore %24 %22
+         %28 = OpLoad %int %gl_InvocationID
+         %29 = OpLoad %int %gl_InvocationID
+         %30 = OpConvertSToF %float %29
+         %32 = OpAccessChain %_ptr_Output_float %v %28
+               OpStore %32 %30
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
similarity index 100%
rename from shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
rename to shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
diff --git a/shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc
similarity index 100%
rename from shaders/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
rename to shaders-msl-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.multi-patch.asm.tesc
diff --git a/shaders-msl-no-opt/asm/tesc/tess-level-initializer-quad.asm.tesc b/shaders-msl-no-opt/asm/tesc/tess-level-initializer-quad.asm.tesc
new file mode 100644
index 00000000000..53248f17e6a
--- /dev/null
+++ b/shaders-msl-no-opt/asm/tesc/tess-level-initializer-quad.asm.tesc
@@ -0,0 +1,88 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 47
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %gl_TessLevelInner %gl_TessLevelOuter
+               OpExecutionMode %main OutputVertices 4
+			   OpExecutionMode %main Quads
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %gl_TessLevelInner "gl_TessLevelInner"
+               OpName %gl_TessLevelOuter "gl_TessLevelOuter"
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+     %uint_4 = OpConstant %uint 4
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+     %uint_2 = OpConstant %uint 2
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+	%inner_zero = OpConstantNull %_arr_float_uint_2
+%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output %inner_zero
+%_ptr_Output_float = OpTypePointer Output %float
+      %int_1 = OpConstant %int 1
+    %float_2 = OpConstant %float 2
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+	%outer_zero = OpConstantNull %_arr_float_uint_4
+%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output %outer_zero
+    %float_3 = OpConstant %float 3
+    %float_4 = OpConstant %float 4
+      %int_2 = OpConstant %int 2
+    %float_5 = OpConstant %float 5
+      %int_3 = OpConstant %int 3
+    %float_6 = OpConstant %float 6
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpLoad %int %gl_InvocationID
+         %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0
+               OpStore %24 %22
+         %30 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_0
+               OpStore %30 %float_1
+         %33 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_1
+               OpStore %33 %float_2
+         %38 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_0
+               OpStore %38 %float_3
+         %40 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_1
+               OpStore %40 %float_4
+         %43 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_2
+               OpStore %43 %float_5
+         %46 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_3
+               OpStore %46 %float_6
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/tesc/tess-level-initializer-triangle.asm.tesc b/shaders-msl-no-opt/asm/tesc/tess-level-initializer-triangle.asm.tesc
new file mode 100644
index 00000000000..6fbc33dc22f
--- /dev/null
+++ b/shaders-msl-no-opt/asm/tesc/tess-level-initializer-triangle.asm.tesc
@@ -0,0 +1,88 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 47
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %gl_TessLevelInner %gl_TessLevelOuter
+               OpExecutionMode %main OutputVertices 4
+			   OpExecutionMode %main Triangles
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %gl_TessLevelInner "gl_TessLevelInner"
+               OpName %gl_TessLevelOuter "gl_TessLevelOuter"
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+     %uint_4 = OpConstant %uint 4
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+     %uint_2 = OpConstant %uint 2
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+	%inner_zero = OpConstantNull %_arr_float_uint_2
+%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output %inner_zero
+%_ptr_Output_float = OpTypePointer Output %float
+      %int_1 = OpConstant %int 1
+    %float_2 = OpConstant %float 2
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+	%outer_zero = OpConstantNull %_arr_float_uint_4
+%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output %outer_zero
+    %float_3 = OpConstant %float 3
+    %float_4 = OpConstant %float 4
+      %int_2 = OpConstant %int 2
+    %float_5 = OpConstant %float 5
+      %int_3 = OpConstant %int 3
+    %float_6 = OpConstant %float 6
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpLoad %int %gl_InvocationID
+         %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0
+               OpStore %24 %22
+         %30 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_0
+               OpStore %30 %float_1
+         %33 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_1
+               OpStore %33 %float_2
+         %38 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_0
+               OpStore %38 %float_3
+         %40 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_1
+               OpStore %40 %float_4
+         %43 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_2
+               OpStore %43 %float_5
+         %46 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_3
+               OpStore %46 %float_6
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/tesc/tess-level-read-write-in-function-tri.asm.tesc b/shaders-msl-no-opt/asm/tesc/tess-level-read-write-in-function-tri.asm.tesc
new file mode 100644
index 00000000000..33b8883cc82
--- /dev/null
+++ b/shaders-msl-no-opt/asm/tesc/tess-level-read-write-in-function-tri.asm.tesc
@@ -0,0 +1,109 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 64
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %gl_TessLevelInner %gl_TessLevelOuter %gl_out %gl_InvocationID
+               OpExecutionMode %main OutputVertices 1
+			   OpExecutionMode %main Triangles
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %load_tess_level_in_func_ "load_tess_level_in_func("
+               OpName %store_tess_level_in_func_ "store_tess_level_in_func("
+               OpName %gl_TessLevelInner "gl_TessLevelInner"
+               OpName %gl_TessLevelOuter "gl_TessLevelOuter"
+               OpName %v "v"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+          %7 = OpTypeFunction %float
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Output_float = OpTypePointer Output %float
+     %uint_4 = OpConstant %uint 4
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output
+      %int_1 = OpConstant %int 1
+    %float_1 = OpConstant %float 1
+    %float_2 = OpConstant %float 2
+    %float_3 = OpConstant %float 3
+    %float_4 = OpConstant %float 4
+      %int_2 = OpConstant %int 2
+    %float_5 = OpConstant %float 5
+      %int_3 = OpConstant %int 3
+    %float_6 = OpConstant %float 6
+%_ptr_Function_float = OpTypePointer Function %float
+    %v4float = OpTypeVector %float 4
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_arr_gl_PerVertex_uint_1 = OpTypeArray %gl_PerVertex %uint_1
+%_ptr_Output__arr_gl_PerVertex_uint_1 = OpTypePointer Output %_arr_gl_PerVertex_uint_1
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_1 Output
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %v = OpVariable %_ptr_Function_float Function
+         %46 = OpFunctionCall %void %store_tess_level_in_func_
+         %49 = OpFunctionCall %float %load_tess_level_in_func_
+               OpStore %v %49
+         %59 = OpLoad %int %gl_InvocationID
+         %60 = OpLoad %float %v
+         %61 = OpCompositeConstruct %v4float %60 %60 %60 %60
+         %63 = OpAccessChain %_ptr_Output_v4float %gl_out %59 %int_0
+               OpStore %63 %61
+               OpReturn
+               OpFunctionEnd
+%load_tess_level_in_func_ = OpFunction %float None %7
+          %9 = OpLabel
+         %20 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_0
+         %21 = OpLoad %float %20
+         %27 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_1
+         %28 = OpLoad %float %27
+         %29 = OpFAdd %float %21 %28
+               OpReturnValue %29
+               OpFunctionEnd
+%store_tess_level_in_func_ = OpFunction %void None %3
+         %11 = OpLabel
+         %33 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_0
+               OpStore %33 %float_1
+         %35 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_1
+               OpStore %35 %float_2
+         %37 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_0
+               OpStore %37 %float_3
+         %39 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_1
+               OpStore %39 %float_4
+         %42 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_2
+               OpStore %42 %float_5
+         %45 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_3
+               OpStore %45 %float_6
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/tese/copy-tess-level.asm.msl2.tese b/shaders-msl-no-opt/asm/tese/copy-tess-level.asm.msl2.tese
new file mode 100644
index 00000000000..5a7e730634c
--- /dev/null
+++ b/shaders-msl-no-opt/asm/tese/copy-tess-level.asm.msl2.tese
@@ -0,0 +1,58 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 35
+; Schema: 0
+               OpCapability Tessellation
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationEvaluation %Domain "main" %gl_TessLevelOuter %gl_TessLevelInner %in_var_CUSTOM_VALUE %gl_TessCoord %out_var_CUSTOM_VALUE
+               OpExecutionMode %Domain Quads
+               OpSource HLSL 600
+               OpName %in_var_CUSTOM_VALUE "in.var.CUSTOM_VALUE"
+               OpName %out_var_CUSTOM_VALUE "out.var.CUSTOM_VALUE"
+               OpName %Domain "Domain"
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorate %gl_TessCoord BuiltIn TessCoord
+               OpDecorate %gl_TessCoord Patch
+               OpDecorate %in_var_CUSTOM_VALUE Location 0
+               OpDecorate %out_var_CUSTOM_VALUE Location 0
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+      %float = OpTypeFloat 32
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4
+     %uint_2 = OpConstant %uint 2
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Input__arr_float_uint_2 = OpTypePointer Input %_arr_float_uint_2
+    %v4float = OpTypeVector %float 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+%_ptr_Input__arr_v4float_uint_4 = OpTypePointer Input %_arr_v4float_uint_4
+    %v3float = OpTypeVector %float 3
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %22 = OpTypeFunction %void
+%gl_TessLevelOuter = OpVariable %_ptr_Input__arr_float_uint_4 Input
+%gl_TessLevelInner = OpVariable %_ptr_Input__arr_float_uint_2 Input
+%in_var_CUSTOM_VALUE = OpVariable %_ptr_Input__arr_v4float_uint_4 Input
+%gl_TessCoord = OpVariable %_ptr_Input_v3float Input
+%out_var_CUSTOM_VALUE = OpVariable %_ptr_Output_v4float Output
+     %Domain = OpFunction %void None %22
+         %23 = OpLabel
+         %24 = OpLoad %_arr_float_uint_4 %gl_TessLevelOuter
+         %25 = OpLoad %_arr_float_uint_2 %gl_TessLevelInner
+         %26 = OpCompositeExtract %float %24 0
+         %27 = OpCompositeExtract %float %24 1
+         %28 = OpCompositeExtract %float %24 2
+         %29 = OpCompositeExtract %float %24 3
+         %30 = OpCompositeExtract %float %25 0
+         %31 = OpCompositeExtract %float %25 1
+         %32 = OpFAdd %float %26 %30
+         %33 = OpFAdd %float %27 %31
+         %34 = OpCompositeConstruct %v4float %32 %33 %28 %29
+               OpStore %out_var_CUSTOM_VALUE %34
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/tese/split-access-chain.asm.tese b/shaders-msl-no-opt/asm/tese/split-access-chain.asm.tese
new file mode 100644
index 00000000000..e13064f94f4
--- /dev/null
+++ b/shaders-msl-no-opt/asm/tese/split-access-chain.asm.tese
@@ -0,0 +1,35 @@
+               OpCapability Tessellation
+         %94 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationEvaluation %main "main" %in0 %o0
+               OpExecutionMode %main Quads
+               OpName %main "main"
+               OpName %in0 "in0"
+               OpName %o0 "o0"
+               OpDecorate %in0 Location 0
+               OpDecorate %o0 Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1
+%_ptr_Output_float = OpTypePointer Output %float
+%_ptr_Input__arr_v4float_uint_1 = OpTypePointer Input %_arr_v4float_uint_1
+      %in0 = OpVariable %_ptr_Input__arr_v4float_uint_1 Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+         %o0 = OpVariable %_ptr_Output_float Output
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Input_float = OpTypePointer Input %float
+       %main = OpFunction %void None %3
+          %4 = OpLabel
+         %ac = OpAccessChain %_ptr_Input_v4float %in0 %uint_0
+        %bac = OpInBoundsAccessChain %_ptr_Input_float %ac %uint_2
+     %loaded = OpLoad %float %bac
+               OpStore %o0 %loaded
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/vert/block-struct-initializer.asm.vert b/shaders-msl-no-opt/asm/vert/block-struct-initializer.asm.vert
new file mode 100644
index 00000000000..6ae3b67e59d
--- /dev/null
+++ b/shaders-msl-no-opt/asm/vert/block-struct-initializer.asm.vert
@@ -0,0 +1,43 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 13
+; Schema: 0
+OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Vertex %main "main" %_ %foo %gl_Position
+OpSource GLSL 450
+OpName %main "main"
+OpName %Vert "Vert"
+OpMemberName %Vert 0 "a"
+OpMemberName %Vert 1 "b"
+OpName %_ ""
+OpName %Foo "Foo"
+OpMemberName %Foo 0 "c"
+OpMemberName %Foo 1 "d"
+OpName %foo "foo"
+OpDecorate %Vert Block
+OpDecorate %_ Location 0
+OpDecorate %foo Location 2
+OpDecorate %gl_Position BuiltIn Position
+%void = OpTypeVoid
+%3 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%Vert = OpTypeStruct %float %float
+%vec4 = OpTypeVector %float 4
+%ptr_Output_vec4 = OpTypePointer Output %vec4
+%_ptr_Output_Vert = OpTypePointer Output %Vert
+%zero_vert = OpConstantNull %Vert
+%_ = OpVariable %_ptr_Output_Vert Output %zero_vert
+%gl_Position = OpVariable %ptr_Output_vec4 Output
+%Foo = OpTypeStruct %float %float
+%_ptr_Output_Foo = OpTypePointer Output %Foo
+%zero_foo = OpConstantNull %Foo
+%blank = OpConstantNull %vec4
+%foo = OpVariable %_ptr_Output_Foo Output %zero_foo
+%main = OpFunction %void None %3
+%5 = OpLabel
+OpStore %gl_Position %blank
+OpReturn
+OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/vert/builtin-output-initializer.asm.vert b/shaders-msl-no-opt/asm/vert/builtin-output-initializer.asm.vert
new file mode 100644
index 00000000000..a00d4b71bba
--- /dev/null
+++ b/shaders-msl-no-opt/asm/vert/builtin-output-initializer.asm.vert
@@ -0,0 +1,39 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 20
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpName %_ ""
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%gl_PerVertex = OpTypeStruct %v4float %float
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+	%zero = OpConstantNull %gl_PerVertex
+          %_ = OpVariable %_ptr_Output_gl_PerVertex Output %zero
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %17 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %19 %17
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/vert/composite-extract-physical-type-id.asm.vert b/shaders-msl-no-opt/asm/vert/composite-extract-physical-type-id.asm.vert
new file mode 100644
index 00000000000..d44e325b1cf
--- /dev/null
+++ b/shaders-msl-no-opt/asm/vert/composite-extract-physical-type-id.asm.vert
@@ -0,0 +1,63 @@
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %VSMain "main" %gl_VertexIndex %gl_Position
+               OpSource HLSL 600
+               OpName %type_Float2Array "type.Float2Array"
+               OpMemberName %type_Float2Array 0 "arr"
+               OpName %Float2Array "Float2Array"
+               OpName %VSMain "VSMain"
+               OpName %param_var_i "param.var.i"
+               OpName %src_VSMain "src.VSMain"
+               OpName %i "i"
+               OpName %bb_entry "bb.entry"
+               OpDecorate %gl_VertexIndex BuiltIn VertexIndex
+               OpDecorate %gl_Position BuiltIn Position
+               OpDecorate %Float2Array DescriptorSet 0
+               OpDecorate %Float2Array Binding 0
+               OpDecorate %_arr_v2float_uint_3 ArrayStride 16
+               OpMemberDecorate %type_Float2Array 0 Offset 0
+               OpDecorate %type_Float2Array Block
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+       %uint = OpTypeInt 32 0
+     %uint_3 = OpConstant %uint 3
+    %v2float = OpTypeVector %float 2
+%_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3
+%type_Float2Array = OpTypeStruct %_arr_v2float_uint_3
+%_ptr_Uniform_type_Float2Array = OpTypePointer Uniform %type_Float2Array
+%_ptr_Input_uint = OpTypePointer Input %uint
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %20 = OpTypeFunction %void
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %27 = OpTypeFunction %v4float %_ptr_Function_uint
+%_ptr_Uniform__arr_v2float_uint_3 = OpTypePointer Uniform %_arr_v2float_uint_3
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%Float2Array = OpVariable %_ptr_Uniform_type_Float2Array Uniform
+%gl_VertexIndex = OpVariable %_ptr_Input_uint Input
+%gl_Position = OpVariable %_ptr_Output_v4float Output
+     %VSMain = OpFunction %void None %20
+         %21 = OpLabel
+%param_var_i = OpVariable %_ptr_Function_uint Function
+         %24 = OpLoad %uint %gl_VertexIndex
+               OpStore %param_var_i %24
+         %25 = OpFunctionCall %v4float %src_VSMain %param_var_i
+               OpStore %gl_Position %25
+               OpReturn
+               OpFunctionEnd
+ %src_VSMain = OpFunction %v4float None %27
+          %i = OpFunctionParameter %_ptr_Function_uint
+   %bb_entry = OpLabel
+         %30 = OpLoad %uint %i
+         %32 = OpAccessChain %_ptr_Uniform__arr_v2float_uint_3 %Float2Array %int_0
+         %34 = OpAccessChain %_ptr_Uniform_v2float %32 %30
+         %35 = OpLoad %v2float %34
+         %36 = OpCompositeExtract %float %35 0
+         %37 = OpCompositeExtract %float %35 1
+         %38 = OpCompositeConstruct %v4float %36 %37 %float_0 %float_1
+               OpReturnValue %38
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/vert/constant-composite-block-no-array-stride.asm.vert b/shaders-msl-no-opt/asm/vert/constant-composite-block-no-array-stride.asm.vert
new file mode 100644
index 00000000000..992b1fc21be
--- /dev/null
+++ b/shaders-msl-no-opt/asm/vert/constant-composite-block-no-array-stride.asm.vert
@@ -0,0 +1,157 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 121
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %1 "main" %2 %3 %4 %5 %gl_VertexIndex %gl_InstanceIndex
+               OpMemberDecorate %_struct_8 0 BuiltIn Position
+               OpMemberDecorate %_struct_8 1 BuiltIn PointSize
+               OpMemberDecorate %_struct_8 2 BuiltIn ClipDistance
+               OpMemberDecorate %_struct_8 3 BuiltIn CullDistance
+               OpDecorate %_struct_8 Block
+               OpDecorate %3 Location 0
+               OpDecorate %4 Location 1
+               OpDecorate %5 Location 1
+               OpDecorate %gl_VertexIndex BuiltIn VertexIndex
+               OpDecorate %gl_InstanceIndex BuiltIn InstanceIndex
+               OpDecorate %9 ArrayStride 4
+               OpDecorate %10 Offset 0
+          %9 = OpDecorationGroup
+         %10 = OpDecorationGroup
+               OpDecorate %11 RelaxedPrecision
+               OpDecorate %12 RelaxedPrecision
+               OpDecorate %12 Flat
+               OpDecorate %12 Restrict
+         %13 = OpDecorationGroup
+         %11 = OpDecorationGroup
+         %12 = OpDecorationGroup
+               OpGroupMemberDecorate %10 %_struct_14 0 %_struct_15 0
+       %void = OpTypeVoid
+       %bool = OpTypeBool
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+      %float = OpTypeFloat 32
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+    %v2float = OpTypeVector %float 2
+      %v3int = OpTypeVector %int 3
+     %v3uint = OpTypeVector %uint 3
+    %v3float = OpTypeVector %float 3
+      %v4int = OpTypeVector %int 4
+     %v4uint = OpTypeVector %uint 4
+    %v4float = OpTypeVector %float 4
+     %v4bool = OpTypeVector %bool 4
+         %31 = OpTypeFunction %v4float %v4float
+         %32 = OpTypeFunction %bool
+         %33 = OpTypeFunction %void
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Input_int = OpTypePointer Input %int
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Input_v2int = OpTypePointer Input %v2int
+%_ptr_Input_v2uint = OpTypePointer Input %v2uint
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Input_v4int = OpTypePointer Input %v4int
+%_ptr_Input_v4uint = OpTypePointer Input %v4uint
+%_ptr_Output_float = OpTypePointer Output %float
+%_ptr_Output_int = OpTypePointer Output %int
+%_ptr_Output_uint = OpTypePointer Output %uint
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+%_ptr_Output_v2int = OpTypePointer Output %v2int
+%_ptr_Output_v2uint = OpTypePointer Output %v2uint
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_ptr_Output_v4int = OpTypePointer Output %v4int
+%_ptr_Output_v4uint = OpTypePointer Output %v4uint
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Function_int = OpTypePointer Function %int
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+    %float_1 = OpConstant %float 1
+    %float_0 = OpConstant %float 0
+  %float_0_5 = OpConstant %float 0.5
+   %float_n1 = OpConstant %float -1
+    %float_7 = OpConstant %float 7
+    %float_8 = OpConstant %float 8
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+      %int_3 = OpConstant %int 3
+      %int_4 = OpConstant %int 4
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+    %uint_32 = OpConstant %uint 32
+     %uint_4 = OpConstant %uint 4
+%uint_2147483647 = OpConstant %uint 2147483647
+         %74 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+         %75 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1
+         %76 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32
+%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3
+%_ptr_Input__arr_v4float_uint_32 = OpTypePointer Input %_arr_v4float_uint_32
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+  %_struct_8 = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output__struct_8 = OpTypePointer Output %_struct_8
+          %2 = OpVariable %_ptr_Output__struct_8 Output
+          %3 = OpVariable %_ptr_Input_v4float Input
+          %4 = OpVariable %_ptr_Output_v4float Output
+          %5 = OpVariable %_ptr_Input_v4float Input
+%gl_VertexIndex = OpVariable %_ptr_Input_int Input
+%gl_InstanceIndex = OpVariable %_ptr_Input_int Input
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+ %_struct_14 = OpTypeStruct %_arr_float_uint_3
+ %_struct_15 = OpTypeStruct %_arr_float_uint_3
+%_ptr_Function__struct_14 = OpTypePointer Function %_struct_14
+%_ptr_Function__struct_15 = OpTypePointer Function %_struct_15
+    %float_2 = OpConstant %float 2
+   %float_n2 = OpConstant %float -2
+         %93 = OpConstantComposite %_arr_float_uint_3 %float_1 %float_2 %float_1
+         %94 = OpConstantComposite %_arr_float_uint_3 %float_n1 %float_n2 %float_n1
+         %95 = OpConstantComposite %_struct_14 %93
+         %96 = OpConstantComposite %_struct_15 %94
+          %1 = OpFunction %void None %33
+         %97 = OpLabel
+         %98 = OpLoad %v4float %3
+         %99 = OpAccessChain %_ptr_Output_v4float %2 %int_0
+               OpStore %99 %98
+        %100 = OpLoad %v4float %5
+        %101 = OpFunctionCall %v4float %102 %100
+               OpStore %4 %101
+               OpReturn
+               OpFunctionEnd
+        %103 = OpFunction %bool None %32
+        %104 = OpLabel
+        %105 = OpLoad %int %gl_VertexIndex
+        %106 = OpIEqual %bool %105 %int_0
+               OpReturnValue %106
+               OpFunctionEnd
+        %102 = OpFunction %v4float None %31
+        %107 = OpFunctionParameter %v4float
+        %108 = OpLabel
+        %109 = OpVariable %_ptr_Function_v4float Function
+        %110 = OpVariable %_ptr_Function__struct_14 Function
+        %111 = OpVariable %_ptr_Function__struct_15 Function
+               OpStore %109 %107
+               OpStore %110 %95
+               OpStore %111 %96
+        %112 = OpAccessChain %_ptr_Function_float %110 %int_0 %int_2
+        %113 = OpLoad %float %112
+        %114 = OpAccessChain %_ptr_Function_float %111 %int_0 %int_2
+        %115 = OpLoad %float %114
+        %116 = OpFAdd %float %113 %115
+        %117 = OpAccessChain %_ptr_Function_float %109 %int_1
+        %118 = OpLoad %float %117
+        %119 = OpFAdd %float %116 %118
+               OpStore %117 %119
+        %120 = OpLoad %v4float %109
+               OpReturnValue %120
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/vert/duplicate-view-index.asm.vert b/shaders-msl-no-opt/asm/vert/duplicate-view-index.asm.vert
new file mode 100644
index 00000000000..00ad1ee9cbc
--- /dev/null
+++ b/shaders-msl-no-opt/asm/vert/duplicate-view-index.asm.vert
@@ -0,0 +1,66 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 23
+; Schema: 0
+               OpCapability Shader
+               OpCapability MultiView
+               OpExtension "SPV_KHR_multiview"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_ %gl_ViewIndex
+               OpEntryPoint Vertex %main2 "main2" %_ %gl_ViewIndex2
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_multiview"
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %_ ""
+               OpName %gl_ViewIndex "gl_ViewIndex"
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %gl_ViewIndex BuiltIn ViewIndex
+               OpDecorate %gl_ViewIndex2 BuiltIn ViewIndex
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+          %_ = OpVariable %_ptr_Output_gl_PerVertex Output
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_ViewIndex = OpVariable %_ptr_Input_int Input
+%gl_ViewIndex2 = OpVariable %_ptr_Input_int Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %18 = OpLoad %int %gl_ViewIndex
+         %19 = OpConvertSToF %float %18
+         %20 = OpCompositeConstruct %v4float %19 %19 %19 %19
+         %22 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %22 %20
+               OpReturn
+               OpFunctionEnd
+
+       %main2 = OpFunction %void None %3
+          %100 = OpLabel
+         %101 = OpLoad %int %gl_ViewIndex2
+         %102 = OpConvertSToF %float %101
+         %103 = OpCompositeConstruct %v4float %102 %102 %102 %102
+         %104 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %104 %103
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/asm/vert/pointer-to-pointer.asm.vert b/shaders-msl-no-opt/asm/vert/pointer-to-pointer.asm.vert
new file mode 100644
index 00000000000..22058d2c8ce
--- /dev/null
+++ b/shaders-msl-no-opt/asm/vert/pointer-to-pointer.asm.vert
@@ -0,0 +1,34 @@
+OpCapability Shader
+OpCapability VariablePointers
+OpCapability VariablePointersStorageBuffer
+OpMemoryModel Logical GLSL450
+
+OpEntryPoint Vertex %fn_vert "main"
+
+%F = OpTypeFloat 32
+%PF = OpTypePointer StorageBuffer %F
+%PPF = OpTypePointer Private %PF
+%PPPF = OpTypePointer Function %PPF
+
+%V = OpTypeVoid
+%Fn0V = OpTypeFunction %V
+
+%FnArg = OpTypeFunction %V %PPPF
+
+%uPPF = OpUndef %PPF
+
+%fn_ptr = OpFunction %V None %FnArg
+	%arg = OpFunctionParameter %PPPF
+	%fn_ptr_bb0 = OpLabel
+	OpReturn
+OpFunctionEnd
+
+%fn_vert = OpFunction %V None %Fn0V
+	%fn_vert_bb0 = OpLabel
+	%VPPPF = OpVariable %PPPF Function
+	OpStore %VPPPF %uPPF
+	%VV = OpFunctionCall %V %fn_ptr %VPPPF
+	OpReturn
+OpFunctionEnd
+
+
diff --git a/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp b/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp
new file mode 100644
index 00000000000..081c39626d1
--- /dev/null
+++ b/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp
@@ -0,0 +1,18 @@
+#version 450
+layout(local_size_x = 8) in;
+
+shared float shared_group[8][8];
+shared float shared_group_alt[8][8];
+
+void main()
+{
+	float blob[8];
+	for (int i = 0; i < 8; i++)
+		blob[i] = float(i);
+	shared_group[gl_LocalInvocationIndex] = blob;
+
+	barrier();
+
+	float copied_blob[8] = shared_group[gl_LocalInvocationIndex ^ 1u];
+	shared_group_alt[gl_LocalInvocationIndex] = shared_group[gl_LocalInvocationIndex];
+}
diff --git a/shaders-msl-no-opt/comp/basic.dynamic-buffer.msl2.invalid.comp b/shaders-msl-no-opt/comp/basic.dynamic-buffer.msl2.invalid.comp
new file mode 100644
index 00000000000..c2965731e9a
--- /dev/null
+++ b/shaders-msl-no-opt/comp/basic.dynamic-buffer.msl2.invalid.comp
@@ -0,0 +1,27 @@
+#version 450
+layout(local_size_x = 3, local_size_y = 3, local_size_z = 2) in;
+
+layout(set = 0, binding = 0) uniform Foo
+{
+	int a;
+	int b;
+};
+
+layout(set = 0, binding = 1) uniform Bar
+{
+	int c;
+	int d;
+};
+
+layout(set = 1, binding = 2) buffer Baz
+{
+	int e;
+	int f;
+} baz[3][3][2];
+
+void main()
+{
+	uvec3 coords = gl_GlobalInvocationID;
+	baz[coords.x][coords.y][coords.z].e = a + c;
+	baz[coords.x][coords.y][coords.z].f = b * d;
+}
diff --git a/shaders-msl-no-opt/comp/bda-restrict-pointer-variable.msl2.comp b/shaders-msl-no-opt/comp/bda-restrict-pointer-variable.msl2.comp
new file mode 100644
index 00000000000..8f1d97861c3
--- /dev/null
+++ b/shaders-msl-no-opt/comp/bda-restrict-pointer-variable.msl2.comp
@@ -0,0 +1,18 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+
+layout(buffer_reference) buffer Ref
+{
+	vec4 v;
+};
+
+layout(push_constant) uniform Registers
+{
+	Ref foo;
+};
+
+void main()
+{
+	restrict Ref ref = foo;
+	ref.v = vec4(1.0);
+}
diff --git a/shaders-msl/comp/bitcast-16bit-1.invalid.comp b/shaders-msl-no-opt/comp/bitcast-16bit-1.invalid.comp
similarity index 100%
rename from shaders-msl/comp/bitcast-16bit-1.invalid.comp
rename to shaders-msl-no-opt/comp/bitcast-16bit-1.invalid.comp
diff --git a/shaders-msl/comp/bitcast-16bit-2.invalid.comp b/shaders-msl-no-opt/comp/bitcast-16bit-2.invalid.comp
similarity index 100%
rename from shaders-msl/comp/bitcast-16bit-2.invalid.comp
rename to shaders-msl-no-opt/comp/bitcast-16bit-2.invalid.comp
diff --git a/shaders-msl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.msl23.comp b/shaders-msl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.msl23.comp
new file mode 100644
index 00000000000..56c11bbb75d
--- /dev/null
+++ b/shaders-msl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.msl23.comp
@@ -0,0 +1,21 @@
+#version 460
+
+#extension GL_EXT_buffer_reference: enable
+#extension GL_EXT_buffer_reference_uvec2: enable
+
+struct S {
+    vec3 v;
+};
+
+layout(buffer_reference) buffer SSBO{
+    S s[];
+};
+
+layout(push_constant) uniform PC {
+    uvec2 ptr;
+} pc;
+
+void main(){
+    SSBO ssbo = SSBO(pc.ptr);
+    ssbo.s[0].v = vec3(1.0);
+}
diff --git a/shaders-msl-no-opt/comp/glsl.std450.comp b/shaders-msl-no-opt/comp/glsl.std450.comp
new file mode 100644
index 00000000000..a17a82b82af
--- /dev/null
+++ b/shaders-msl-no-opt/comp/glsl.std450.comp
@@ -0,0 +1,129 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+	float res;
+	int ires;
+	uint ures;
+
+	vec4 f32;
+	ivec4 s32;
+	uvec4 u32;
+
+	mat2 m2;
+	mat3 m3;
+	mat4 m4;
+};
+
+void main()
+{
+	float tmp;
+	vec2 v2;
+	vec3 v3;
+	vec4 v4;
+	int itmp;
+
+	res = round(f32.x);
+	res = roundEven(f32.x);
+	res = trunc(f32.x);
+	res = abs(f32.x);
+	ires = abs(s32.x);
+	res = sign(f32.x);
+	ires = sign(s32.x);
+	res = floor(f32.x);
+	res = ceil(f32.x);
+	res = fract(f32.x);
+	res = radians(f32.x);
+	res = degrees(f32.x);
+	res = sin(f32.x);
+	res = cos(f32.x);
+	res = tan(f32.x);
+	res = asin(f32.x);
+	res = acos(f32.x);
+	res = atan(f32.x);
+	res = sinh(f32.x);
+	res = cosh(f32.x);
+	res = tanh(f32.x);
+	res = asinh(f32.x);
+	res = acosh(f32.x);
+	res = atanh(f32.x);
+	res = atan(f32.x, f32.y);
+	res = pow(f32.x, f32.y);
+	res = exp(f32.x);
+	res = log(f32.x);
+	res = exp2(f32.x);
+	res = log2(f32.x);
+	res = sqrt(f32.x);
+	res = inversesqrt(f32.x);
+
+	res = length(f32.x);
+	res = distance(f32.x, f32.y);
+	res = normalize(f32.x);
+	res = faceforward(f32.x, f32.y, f32.z);
+	res = reflect(f32.x, f32.y);
+	res = refract(f32.x, f32.y, f32.z);
+
+	res = length(f32.xy);
+	res = distance(f32.xy, f32.zw);
+	v2 = normalize(f32.xy);
+	v2 = faceforward(f32.xy, f32.yz, f32.zw);
+	v2 = reflect(f32.xy, f32.zw);
+	v2 = refract(f32.xy, f32.yz, f32.w);
+
+	v3 = cross(f32.xyz, f32.yzw);
+
+	res = determinant(m2);
+	res = determinant(m3);
+	res = determinant(m4);
+	m2 = inverse(m2);
+	m3 = inverse(m3);
+	m4 = inverse(m4);
+
+	res = modf(f32.x, tmp);
+	// ModfStruct
+
+	res = min(f32.x, f32.y);
+	ures = min(u32.x, u32.y);
+	ires = min(s32.x, s32.y);
+	res = max(f32.x, f32.y);
+	ures = max(u32.x, u32.y);
+	ires = max(s32.x, s32.y);
+
+	res = clamp(f32.x, f32.y, f32.z);
+	ures = clamp(u32.x, u32.y, u32.z);
+	ires = clamp(s32.x, s32.y, s32.z);
+
+	res = mix(f32.x, f32.y, f32.z);
+	res = step(f32.x, f32.y);
+	res = smoothstep(f32.x, f32.y, f32.z);
+	res = fma(f32.x, f32.y, f32.z);
+
+	res = frexp(f32.x, itmp);
+	// FrexpStruct
+	res = ldexp(f32.x, itmp);
+
+	ures = packSnorm4x8(f32);
+	ures = packUnorm4x8(f32);
+	ures = packSnorm2x16(f32.xy);
+	ures = packUnorm2x16(f32.xy);
+	ures = packHalf2x16(f32.xy);
+	// packDouble2x32
+
+	v2 = unpackSnorm2x16(u32.x);
+	v2 = unpackUnorm2x16(u32.x);
+	v2 = unpackHalf2x16(u32.x);
+	v4 = unpackSnorm4x8(u32.x);
+	v4 = unpackUnorm4x8(u32.x);
+	// unpackDouble2x32
+
+	s32 = findLSB(s32);
+	s32 = findLSB(u32);
+	s32 = findMSB(s32);
+	s32 = findMSB(u32);
+
+	// interpolateAtSample
+	// interpolateAtOffset
+
+	// NMin, NMax, NClamp
+}
diff --git a/shaders-msl-no-opt/comp/illegal-struct-name.asm.comp b/shaders-msl-no-opt/comp/illegal-struct-name.asm.comp
new file mode 100644
index 00000000000..f7a8787d3d8
--- /dev/null
+++ b/shaders-msl-no-opt/comp/illegal-struct-name.asm.comp
@@ -0,0 +1,62 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Foo "Foo"
+               OpMemberName %Foo 0 "abs"
+               OpName %f "f"
+               OpName %Foo_0 "Foo"
+               OpMemberName %Foo_0 0 "abs"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "foo"
+               OpMemberName %SSBO 1 "foo2"
+               OpName %_ ""
+               OpName %linear "abs"
+               OpMemberDecorate %Foo_0 0 Offset 0
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+        %Foo = OpTypeStruct %float
+%_ptr_Function_Foo = OpTypePointer Function %Foo
+      %Foo_0 = OpTypeStruct %float
+       %SSBO = OpTypeStruct %Foo_0 %Foo_0
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Function_int = OpTypePointer Function %int
+     %int_10 = OpConstant %int 10
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %f = OpVariable %_ptr_Function_Foo Function
+     %linear = OpVariable %_ptr_Function_int Function
+         %17 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_0
+         %18 = OpLoad %Foo_0 %17
+         %19 = OpCompositeExtract %float %18 0
+         %21 = OpAccessChain %_ptr_Function_float %f %int_0
+               OpStore %21 %19
+               OpStore %linear %int_10
+         %26 = OpLoad %Foo %f
+         %27 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_1
+         %28 = OpCompositeExtract %float %26 0
+         %30 = OpAccessChain %_ptr_Uniform_float %27 %int_0
+               OpStore %30 %28
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/comp/implicit-integer-promotion.comp b/shaders-msl-no-opt/comp/implicit-integer-promotion.comp
new file mode 100644
index 00000000000..a0ee95b3a1a
--- /dev/null
+++ b/shaders-msl-no-opt/comp/implicit-integer-promotion.comp
@@ -0,0 +1,85 @@
+#version 450
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+
+layout(set = 0, binding = 0) buffer BUF0
+{
+	f16vec2 f16s;
+	u16vec2 u16;
+	i16vec2 i16;
+	u16vec4 u16s;
+	i16vec4 i16s;
+	float16_t f16;
+};
+
+void test_i16()
+{
+	f16 += int16BitsToFloat16(i16.x + i16.y);
+	f16 += int16BitsToFloat16(i16.x - i16.y);
+	f16 += int16BitsToFloat16(i16.x * i16.y);
+	f16 += int16BitsToFloat16(i16.x / i16.y);
+	f16 += int16BitsToFloat16(i16.x % i16.y);
+	f16 += int16BitsToFloat16(i16.x << i16.y);
+	f16 += int16BitsToFloat16(i16.x >> i16.y);
+	f16 += int16BitsToFloat16(~i16.x);
+	f16 += int16BitsToFloat16(-i16.x);
+	f16 += int16BitsToFloat16(i16.x ^ i16.y);
+	f16 += int16BitsToFloat16(i16.x & i16.y);
+	f16 += int16BitsToFloat16(i16.x | i16.y);
+}
+
+void test_u16()
+{
+	f16 += uint16BitsToFloat16(u16.x + u16.y);
+	f16 += uint16BitsToFloat16(u16.x - u16.y);
+	f16 += uint16BitsToFloat16(u16.x * u16.y);
+	f16 += uint16BitsToFloat16(u16.x / u16.y);
+	f16 += uint16BitsToFloat16(u16.x % u16.y);
+	f16 += uint16BitsToFloat16(u16.x << u16.y);
+	f16 += uint16BitsToFloat16(u16.x >> u16.y);
+	f16 += uint16BitsToFloat16(~u16.x);
+	f16 += uint16BitsToFloat16(-u16.x);
+	f16 += uint16BitsToFloat16(u16.x ^ u16.y);
+	f16 += uint16BitsToFloat16(u16.x & u16.y);
+	f16 += uint16BitsToFloat16(u16.x | u16.y);
+}
+
+void test_u16s()
+{
+	f16s += uint16BitsToFloat16(u16s.xy + u16s.zw);
+	f16s += uint16BitsToFloat16(u16s.xy - u16s.zw);
+	f16s += uint16BitsToFloat16(u16s.xy * u16s.zw);
+	f16s += uint16BitsToFloat16(u16s.xy / u16s.zw);
+	f16s += uint16BitsToFloat16(u16s.xy % u16s.zw);
+	f16s += uint16BitsToFloat16(u16s.xy << u16s.zw);
+	f16s += uint16BitsToFloat16(u16s.xy >> u16s.zw);
+	f16s += uint16BitsToFloat16(~u16s.xy);
+	f16s += uint16BitsToFloat16(-u16s.xy);
+	f16s += uint16BitsToFloat16(u16s.xy ^ u16s.zw);
+	f16s += uint16BitsToFloat16(u16s.xy & u16s.zw);
+	f16s += uint16BitsToFloat16(u16s.xy | u16s.zw);
+}
+
+void test_i16s()
+{
+	f16s += int16BitsToFloat16(i16s.xy + i16s.zw);
+	f16s += int16BitsToFloat16(i16s.xy - i16s.zw);
+	f16s += int16BitsToFloat16(i16s.xy * i16s.zw);
+	f16s += int16BitsToFloat16(i16s.xy / i16s.zw);
+	f16s += int16BitsToFloat16(i16s.xy % i16s.zw);
+	f16s += int16BitsToFloat16(i16s.xy << i16s.zw);
+	f16s += int16BitsToFloat16(i16s.xy >> i16s.zw);
+	f16s += int16BitsToFloat16(~i16s.xy);
+	f16s += int16BitsToFloat16(-i16s.xy);
+	f16s += int16BitsToFloat16(i16s.xy ^ i16s.zw);
+	f16s += int16BitsToFloat16(i16s.xy & i16s.zw);
+	f16s += int16BitsToFloat16(i16s.xy | i16s.zw);
+}
+
+void main()
+{
+	test_u16();
+	test_i16();
+	test_u16s();
+	test_i16s();
+}
diff --git a/shaders-msl-no-opt/comp/int16min-literal.comp b/shaders-msl-no-opt/comp/int16min-literal.comp
new file mode 100644
index 00000000000..c1b345266d8
--- /dev/null
+++ b/shaders-msl-no-opt/comp/int16min-literal.comp
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 1) buffer SSBO
+{
+	float16_t a;
+};
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	float16_t b;
+};
+
+void main()
+{
+	int16_t v = float16BitsToInt16(b);
+	v ^= 0x8000s;
+	a = int16BitsToFloat16(v);
+}
diff --git a/shaders-msl/comp/int64.invalid.msl22.comp b/shaders-msl-no-opt/comp/int64.invalid.msl22.comp
similarity index 100%
rename from shaders-msl/comp/int64.invalid.msl22.comp
rename to shaders-msl-no-opt/comp/int64.invalid.msl22.comp
diff --git a/shaders-msl-no-opt/comp/int64min-literal.msl22.comp b/shaders-msl-no-opt/comp/int64min-literal.msl22.comp
new file mode 100644
index 00000000000..79296054462
--- /dev/null
+++ b/shaders-msl-no-opt/comp/int64min-literal.msl22.comp
@@ -0,0 +1,21 @@
+#version 450
+#extension GL_ARB_gpu_shader_int64 : require
+
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 1) buffer SSBO
+{
+	float a;
+};
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	float b;
+};
+
+void main()
+{
+	int64_t v = int64_t(floatBitsToInt(b));
+	v ^= 0x8000000000000000L;
+	a = intBitsToFloat(int(v));
+}
diff --git a/shaders-msl-no-opt/comp/intmin-literal.comp b/shaders-msl-no-opt/comp/intmin-literal.comp
new file mode 100644
index 00000000000..ee35cedabb9
--- /dev/null
+++ b/shaders-msl-no-opt/comp/intmin-literal.comp
@@ -0,0 +1,18 @@
+#version 450
+
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 1) buffer SSBO
+{
+	float a;
+};
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	float b;
+};
+
+void main()
+{
+	a = intBitsToFloat(floatBitsToInt(b) ^ 0x80000000);
+}
diff --git a/shaders-msl-no-opt/comp/std140-array-load-composite-construct.comp b/shaders-msl-no-opt/comp/std140-array-load-composite-construct.comp
new file mode 100644
index 00000000000..af1c47b32ce
--- /dev/null
+++ b/shaders-msl-no-opt/comp/std140-array-load-composite-construct.comp
@@ -0,0 +1,13 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, binding = 0) buffer SSBO
+{
+	float a[16];
+	vec4 b[16];
+};
+
+void main()
+{
+	b[gl_GlobalInvocationID.x] = vec4(a[gl_GlobalInvocationID.x]);
+}
diff --git a/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/shaders-msl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp
similarity index 94%
rename from shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp
rename to shaders-msl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp
index a0898cfc549..47d88912f75 100644
--- a/shaders-msl/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp
+++ b/shaders-msl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp
@@ -43,7 +43,8 @@ struct Content
     S3 m3;
     float m4;
 
-    S4 m3s[8];
+    // glslang seems to miscompile this atm into ArrayStride of 16 even in scalar layout.
+    //S4 m3s[8];
 };
 
 layout(binding = 2, scalar) restrict buffer SSBO2
diff --git a/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl12.emulate-subgroup.comp b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl12.emulate-subgroup.comp
new file mode 100644
index 00000000000..8a0be2269e5
--- /dev/null
+++ b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl12.emulate-subgroup.comp
@@ -0,0 +1,25 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	float FragColor;
+};
+
+// Reduced test for emulated functionality.
+
+void main()
+{
+	// basic
+	FragColor = float(gl_NumSubgroups);
+	FragColor = float(gl_SubgroupID);
+	FragColor = float(gl_SubgroupSize);
+	FragColor = float(gl_SubgroupInvocationID);
+	subgroupBarrier();
+	subgroupMemoryBarrier();
+	subgroupMemoryBarrierBuffer();
+	subgroupMemoryBarrierShared();
+	subgroupMemoryBarrierImage();
+	bool elected = subgroupElect();
+}
diff --git a/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.comp b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.comp
new file mode 100644
index 00000000000..f840d2aee89
--- /dev/null
+++ b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.comp
@@ -0,0 +1,145 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_shuffle : require
+#extension GL_KHR_shader_subgroup_shuffle_relative : require
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_clustered : require
+#extension GL_KHR_shader_subgroup_quad : require
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	float FragColor;
+};
+
+void main()
+{
+	// basic
+	FragColor = float(gl_NumSubgroups);
+	FragColor = float(gl_SubgroupID);
+	FragColor = float(gl_SubgroupSize);
+	FragColor = float(gl_SubgroupInvocationID);
+	subgroupBarrier();
+	subgroupMemoryBarrier();
+	subgroupMemoryBarrierBuffer();
+	subgroupMemoryBarrierShared();
+	subgroupMemoryBarrierImage();
+	bool elected = subgroupElect();
+
+	// ballot
+	FragColor = float(gl_SubgroupEqMask);
+	FragColor = float(gl_SubgroupGeMask);
+	FragColor = float(gl_SubgroupGtMask);
+	FragColor = float(gl_SubgroupLeMask);
+	FragColor = float(gl_SubgroupLtMask);
+	vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+	bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u);
+	vec3 first = subgroupBroadcastFirst(vec3(20.0));
+	bvec4 first_bool = subgroupBroadcastFirst(bvec4(false));
+	uvec4 ballot_value = subgroupBallot(true);
+	bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
+	bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
+	uint bit_count = subgroupBallotBitCount(ballot_value);
+	uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
+	uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
+	uint lsb = subgroupBallotFindLSB(ballot_value);
+	uint msb = subgroupBallotFindMSB(ballot_value);
+
+	// shuffle
+	uint shuffled = subgroupShuffle(10u, 8u);
+	bool shuffled_bool = subgroupShuffle(true, 9u);
+	uint shuffled_xor = subgroupShuffleXor(30u, 8u);
+	bool shuffled_xor_bool = subgroupShuffleXor(false, 9u);
+
+	// shuffle relative 
+	uint shuffled_up = subgroupShuffleUp(20u, 4u);
+	bool shuffled_up_bool = subgroupShuffleUp(true, 4u);
+	uint shuffled_down = subgroupShuffleDown(20u, 4u);
+	bool shuffled_down_bool = subgroupShuffleDown(false, 4u);
+
+	// vote
+	bool has_all = subgroupAll(true);
+	bool has_any = subgroupAny(true);
+	bool has_equal = subgroupAllEqual(0);
+	has_equal = subgroupAllEqual(true);
+	has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0));
+	has_equal = subgroupAllEqual(bvec4(true, true, false, true));
+
+	// arithmetic
+	vec4 added = subgroupAdd(vec4(20.0));
+	ivec4 iadded = subgroupAdd(ivec4(20));
+	vec4 multiplied = subgroupMul(vec4(20.0));
+	ivec4 imultiplied = subgroupMul(ivec4(20));
+	vec4 lo = subgroupMin(vec4(20.0));
+	vec4 hi = subgroupMax(vec4(20.0));
+	ivec4 slo = subgroupMin(ivec4(20));
+	ivec4 shi = subgroupMax(ivec4(20));
+	uvec4 ulo = subgroupMin(uvec4(20));
+	uvec4 uhi = subgroupMax(uvec4(20));
+	uvec4 anded = subgroupAnd(ballot_value);
+	uvec4 ored = subgroupOr(ballot_value);
+	uvec4 xored = subgroupXor(ballot_value);
+	bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42)));
+	bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42)));
+	bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42)));
+
+	added = subgroupInclusiveAdd(added);
+	iadded = subgroupInclusiveAdd(iadded);
+	multiplied = subgroupInclusiveMul(multiplied);
+	imultiplied = subgroupInclusiveMul(imultiplied);
+	//lo = subgroupInclusiveMin(lo);  // FIXME: Unsupported by Metal
+	//hi = subgroupInclusiveMax(hi);
+	//slo = subgroupInclusiveMin(slo);
+	//shi = subgroupInclusiveMax(shi);
+	//ulo = subgroupInclusiveMin(ulo);
+	//uhi = subgroupInclusiveMax(uhi);
+	//anded = subgroupInclusiveAnd(anded);
+	//ored = subgroupInclusiveOr(ored);
+	//xored = subgroupInclusiveXor(ored);
+	//added = subgroupExclusiveAdd(lo);
+
+	added = subgroupExclusiveAdd(multiplied);
+	multiplied = subgroupExclusiveMul(multiplied);
+	iadded = subgroupExclusiveAdd(imultiplied);
+	imultiplied = subgroupExclusiveMul(imultiplied);
+	//lo = subgroupExclusiveMin(lo);  // FIXME: Unsupported by Metal
+	//hi = subgroupExclusiveMax(hi);
+	//ulo = subgroupExclusiveMin(ulo);
+	//uhi = subgroupExclusiveMax(uhi);
+	//slo = subgroupExclusiveMin(slo);
+	//shi = subgroupExclusiveMax(shi);
+	//anded = subgroupExclusiveAnd(anded);
+	//ored = subgroupExclusiveOr(ored);
+	//xored = subgroupExclusiveXor(ored);
+
+	// clustered
+	added = subgroupClusteredAdd(added, 4u);
+	multiplied = subgroupClusteredMul(multiplied, 4u);
+	iadded = subgroupClusteredAdd(iadded, 4u);
+	imultiplied = subgroupClusteredMul(imultiplied, 4u);
+	lo = subgroupClusteredMin(lo, 4u);
+	hi = subgroupClusteredMax(hi, 4u);
+	ulo = subgroupClusteredMin(ulo, 4u);
+	uhi = subgroupClusteredMax(uhi, 4u);
+	slo = subgroupClusteredMin(slo, 4u);
+	shi = subgroupClusteredMax(shi, 4u);
+	anded = subgroupClusteredAnd(anded, 4u);
+	ored = subgroupClusteredOr(ored, 4u);
+	xored = subgroupClusteredXor(xored, 4u);
+
+	anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u);
+	ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u);
+	xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u);
+
+	// quad
+	vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
+	bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true));
+	vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
+	bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true));
+	vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
+	bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true));
+	vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
+	bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u);
+}
diff --git a/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.fixed-subgroup.comp
similarity index 86%
rename from shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
rename to shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.fixed-subgroup.comp
index f8f5133f8de..28c5d6b34d3 100644
--- a/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
+++ b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.fixed-subgroup.comp
@@ -35,7 +35,9 @@ void main()
 	FragColor = float(gl_SubgroupLeMask);
 	FragColor = float(gl_SubgroupLtMask);
 	vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+	bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u);
 	vec3 first = subgroupBroadcastFirst(vec3(20.0));
+	bvec4 first_bool = subgroupBroadcastFirst(bvec4(false));
 	uvec4 ballot_value = subgroupBallot(true);
 	bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
 	bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
@@ -47,17 +49,23 @@ void main()
 
 	// shuffle
 	uint shuffled = subgroupShuffle(10u, 8u);
+	bool shuffled_bool = subgroupShuffle(true, 9u);
 	uint shuffled_xor = subgroupShuffleXor(30u, 8u);
+	bool shuffled_xor_bool = subgroupShuffleXor(false, 9u);
 
 	// shuffle relative 
 	uint shuffled_up = subgroupShuffleUp(20u, 4u);
+	bool shuffled_up_bool = subgroupShuffleUp(true, 4u);
 	uint shuffled_down = subgroupShuffleDown(20u, 4u);
+	bool shuffled_down_bool = subgroupShuffleDown(false, 4u);
 
 	// vote
 	bool has_all = subgroupAll(true);
 	bool has_any = subgroupAny(true);
 	bool has_equal = subgroupAllEqual(0);
 	has_equal = subgroupAllEqual(true);
+	has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0));
+	has_equal = subgroupAllEqual(bvec4(true, true, false, true));
 
 	// arithmetic
 	vec4 added = subgroupAdd(vec4(20.0));
@@ -120,7 +128,11 @@ void main()
 
 	// quad
 	vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
+	bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true));
 	vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
+	bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true));
 	vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
+	bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true));
 	vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
+	bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u);
 }
diff --git a/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp
similarity index 70%
rename from shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp
rename to shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp
index 66eb4a20758..a78527f5428 100644
--- a/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp
+++ b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl21.ios.comp
@@ -27,15 +27,23 @@ void main()
 
 	// shuffle
 	uint shuffled = subgroupShuffle(10u, 8u);
+	bool shuffled_bool = subgroupShuffle(true, 9u);
 	uint shuffled_xor = subgroupShuffleXor(30u, 8u);
+	bool shuffled_xor_bool = subgroupShuffleXor(false, 9u);
 
 	// shuffle relative 
 	uint shuffled_up = subgroupShuffleUp(20u, 4u);
+	bool shuffled_up_bool = subgroupShuffleUp(true, 4u);
 	uint shuffled_down = subgroupShuffleDown(20u, 4u);
+	bool shuffled_down_bool = subgroupShuffleDown(false, 4u);
 
 	// quad
 	vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
+	bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true));
 	vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
+	bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true));
 	vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
+	bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true));
 	vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
+	bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u);
 }
diff --git a/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl22.ios.comp b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl22.ios.comp
new file mode 100644
index 00000000000..bc904a4f750
--- /dev/null
+++ b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl22.ios.comp
@@ -0,0 +1,79 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_shuffle : require
+#extension GL_KHR_shader_subgroup_shuffle_relative : require
+#extension GL_KHR_shader_subgroup_quad : require
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	float FragColor;
+};
+
+// Reduced test for functionality exposed on iOS.
+
+void main()
+{
+	// basic
+	FragColor = float(gl_NumSubgroups);
+	FragColor = float(gl_SubgroupID);
+	FragColor = float(gl_SubgroupSize);
+	FragColor = float(gl_SubgroupInvocationID);
+	subgroupBarrier();
+	subgroupMemoryBarrier();
+	subgroupMemoryBarrierBuffer();
+	subgroupMemoryBarrierShared();
+	subgroupMemoryBarrierImage();
+	bool elected = subgroupElect();
+
+	// ballot
+	FragColor = float(gl_SubgroupEqMask);
+	FragColor = float(gl_SubgroupGeMask);
+	FragColor = float(gl_SubgroupGtMask);
+	FragColor = float(gl_SubgroupLeMask);
+	FragColor = float(gl_SubgroupLtMask);
+	vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+	bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u);
+	vec3 first = subgroupBroadcastFirst(vec3(20.0));
+	bvec4 first_bool = subgroupBroadcastFirst(bvec4(false));
+	uvec4 ballot_value = subgroupBallot(true);
+	bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
+	bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
+	uint bit_count = subgroupBallotBitCount(ballot_value);
+	uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
+	uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
+	uint lsb = subgroupBallotFindLSB(ballot_value);
+	uint msb = subgroupBallotFindMSB(ballot_value);
+
+	// shuffle
+	uint shuffled = subgroupShuffle(10u, 8u);
+	bool shuffled_bool = subgroupShuffle(true, 9u);
+	uint shuffled_xor = subgroupShuffleXor(30u, 8u);
+	bool shuffled_xor_bool = subgroupShuffleXor(false, 9u);
+
+	// shuffle relative 
+	uint shuffled_up = subgroupShuffleUp(20u, 4u);
+	bool shuffled_up_bool = subgroupShuffleUp(true, 4u);
+	uint shuffled_down = subgroupShuffleDown(20u, 4u);
+	bool shuffled_down_bool = subgroupShuffleDown(false, 4u);
+
+	// vote
+	bool has_all = subgroupAll(true);
+	bool has_any = subgroupAny(true);
+	bool has_equal = subgroupAllEqual(0);
+	has_equal = subgroupAllEqual(true);
+	has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0));
+	has_equal = subgroupAllEqual(bvec4(true, true, false, true));
+
+	// quad
+	vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
+	bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true));
+	vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
+	bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true));
+	vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
+	bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true));
+	vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
+	bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u);
+}
diff --git a/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl23.ios.simd.comp b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl23.ios.simd.comp
new file mode 100644
index 00000000000..28c5d6b34d3
--- /dev/null
+++ b/shaders-msl-no-opt/comp/subgroups.nocompat.invalid.vk.msl23.ios.simd.comp
@@ -0,0 +1,138 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_shuffle : require
+#extension GL_KHR_shader_subgroup_shuffle_relative : require
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_clustered : require
+#extension GL_KHR_shader_subgroup_quad : require
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	float FragColor;
+};
+
+void main()
+{
+	// basic
+	FragColor = float(gl_NumSubgroups);
+	FragColor = float(gl_SubgroupID);
+	FragColor = float(gl_SubgroupSize);
+	FragColor = float(gl_SubgroupInvocationID);
+	subgroupBarrier();
+	subgroupMemoryBarrier();
+	subgroupMemoryBarrierBuffer();
+	subgroupMemoryBarrierShared();
+	subgroupMemoryBarrierImage();
+	bool elected = subgroupElect();
+
+	// ballot
+	FragColor = float(gl_SubgroupEqMask);
+	FragColor = float(gl_SubgroupGeMask);
+	FragColor = float(gl_SubgroupGtMask);
+	FragColor = float(gl_SubgroupLeMask);
+	FragColor = float(gl_SubgroupLtMask);
+	vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+	bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u);
+	vec3 first = subgroupBroadcastFirst(vec3(20.0));
+	bvec4 first_bool = subgroupBroadcastFirst(bvec4(false));
+	uvec4 ballot_value = subgroupBallot(true);
+	bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
+	bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
+	uint bit_count = subgroupBallotBitCount(ballot_value);
+	uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
+	uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
+	uint lsb = subgroupBallotFindLSB(ballot_value);
+	uint msb = subgroupBallotFindMSB(ballot_value);
+
+	// shuffle
+	uint shuffled = subgroupShuffle(10u, 8u);
+	bool shuffled_bool = subgroupShuffle(true, 9u);
+	uint shuffled_xor = subgroupShuffleXor(30u, 8u);
+	bool shuffled_xor_bool = subgroupShuffleXor(false, 9u);
+
+	// shuffle relative 
+	uint shuffled_up = subgroupShuffleUp(20u, 4u);
+	bool shuffled_up_bool = subgroupShuffleUp(true, 4u);
+	uint shuffled_down = subgroupShuffleDown(20u, 4u);
+	bool shuffled_down_bool = subgroupShuffleDown(false, 4u);
+
+	// vote
+	bool has_all = subgroupAll(true);
+	bool has_any = subgroupAny(true);
+	bool has_equal = subgroupAllEqual(0);
+	has_equal = subgroupAllEqual(true);
+	has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0));
+	has_equal = subgroupAllEqual(bvec4(true, true, false, true));
+
+	// arithmetic
+	vec4 added = subgroupAdd(vec4(20.0));
+	ivec4 iadded = subgroupAdd(ivec4(20));
+	vec4 multiplied = subgroupMul(vec4(20.0));
+	ivec4 imultiplied = subgroupMul(ivec4(20));
+	vec4 lo = subgroupMin(vec4(20.0));
+	vec4 hi = subgroupMax(vec4(20.0));
+	ivec4 slo = subgroupMin(ivec4(20));
+	ivec4 shi = subgroupMax(ivec4(20));
+	uvec4 ulo = subgroupMin(uvec4(20));
+	uvec4 uhi = subgroupMax(uvec4(20));
+	uvec4 anded = subgroupAnd(ballot_value);
+	uvec4 ored = subgroupOr(ballot_value);
+	uvec4 xored = subgroupXor(ballot_value);
+
+	added = subgroupInclusiveAdd(added);
+	iadded = subgroupInclusiveAdd(iadded);
+	multiplied = subgroupInclusiveMul(multiplied);
+	imultiplied = subgroupInclusiveMul(imultiplied);
+	//lo = subgroupInclusiveMin(lo);  // FIXME: Unsupported by Metal
+	//hi = subgroupInclusiveMax(hi);
+	//slo = subgroupInclusiveMin(slo);
+	//shi = subgroupInclusiveMax(shi);
+	//ulo = subgroupInclusiveMin(ulo);
+	//uhi = subgroupInclusiveMax(uhi);
+	//anded = subgroupInclusiveAnd(anded);
+	//ored = subgroupInclusiveOr(ored);
+	//xored = subgroupInclusiveXor(ored);
+	//added = subgroupExclusiveAdd(lo);
+
+	added = subgroupExclusiveAdd(multiplied);
+	multiplied = subgroupExclusiveMul(multiplied);
+	iadded = subgroupExclusiveAdd(imultiplied);
+	imultiplied = subgroupExclusiveMul(imultiplied);
+	//lo = subgroupExclusiveMin(lo);  // FIXME: Unsupported by Metal
+	//hi = subgroupExclusiveMax(hi);
+	//ulo = subgroupExclusiveMin(ulo);
+	//uhi = subgroupExclusiveMax(uhi);
+	//slo = subgroupExclusiveMin(slo);
+	//shi = subgroupExclusiveMax(shi);
+	//anded = subgroupExclusiveAnd(anded);
+	//ored = subgroupExclusiveOr(ored);
+	//xored = subgroupExclusiveXor(ored);
+
+	// clustered
+	added = subgroupClusteredAdd(added, 4u);
+	multiplied = subgroupClusteredMul(multiplied, 4u);
+	iadded = subgroupClusteredAdd(iadded, 4u);
+	imultiplied = subgroupClusteredMul(imultiplied, 4u);
+	lo = subgroupClusteredMin(lo, 4u);
+	hi = subgroupClusteredMax(hi, 4u);
+	ulo = subgroupClusteredMin(ulo, 4u);
+	uhi = subgroupClusteredMax(uhi, 4u);
+	slo = subgroupClusteredMin(slo, 4u);
+	shi = subgroupClusteredMax(shi, 4u);
+	anded = subgroupClusteredAnd(anded, 4u);
+	ored = subgroupClusteredOr(ored, 4u);
+	xored = subgroupClusteredXor(xored, 4u);
+
+	// quad
+	vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
+	bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true));
+	vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
+	bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true));
+	vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
+	bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true));
+	vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
+	bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u);
+}
diff --git a/shaders-msl-no-opt/comp/trivial-select-cast-vector.comp b/shaders-msl-no-opt/comp/trivial-select-cast-vector.comp
new file mode 100644
index 00000000000..c3e0922a166
--- /dev/null
+++ b/shaders-msl-no-opt/comp/trivial-select-cast-vector.comp
@@ -0,0 +1,14 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0) buffer A
+{
+	vec3 a;
+	vec3 b;
+};
+
+void main()
+{
+	bvec3 c = lessThan(b, vec3(1.0));
+	a = mix(vec3(1, 0, 0), vec3(0, 0, 1), c);
+}
diff --git a/shaders-msl-no-opt/comp/trivial-select-matrix.spv14.comp b/shaders-msl-no-opt/comp/trivial-select-matrix.spv14.comp
new file mode 100644
index 00000000000..5ffcc3f3a49
--- /dev/null
+++ b/shaders-msl-no-opt/comp/trivial-select-matrix.spv14.comp
@@ -0,0 +1,16 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0) buffer A
+{
+	mat3 a;
+	float b;
+};
+
+void main()
+{
+	// Scalar to Matrix
+	bool c = b < 1.0;
+	a = c ? mat3(vec3(1), vec3(1), vec3(1)) : mat3(vec3(0), vec3(0), vec3(0));
+	a = c ? mat3(1) : mat3(0);
+}
diff --git a/shaders-msl-no-opt/components/fragment-input-component.frag b/shaders-msl-no-opt/components/fragment-input-component.frag
new file mode 100644
index 00000000000..60d48bef7e6
--- /dev/null
+++ b/shaders-msl-no-opt/components/fragment-input-component.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(location = 0, component = 3) in float Foo1;
+layout(location = 0, component = 0) in vec3 Foo3;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = vec4(Foo3, Foo1);
+}
diff --git a/shaders-msl-no-opt/components/fragment-output-component.frag b/shaders-msl-no-opt/components/fragment-output-component.frag
new file mode 100644
index 00000000000..29a57dfa1f3
--- /dev/null
+++ b/shaders-msl-no-opt/components/fragment-output-component.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(location = 0, component = 0) out float FragColor0;
+layout(location = 0, component = 1) out vec2 FragColor1;
+layout(location = 0, component = 3) out float FragColor3;
+
+void main()
+{
+	FragColor0 = 1.0;
+	FragColor1 = vec2(2.0, 3.0);
+	FragColor3 = 4.0;
+}
diff --git a/shaders-msl-no-opt/components/fragment-output-component.pad-fragment.frag b/shaders-msl-no-opt/components/fragment-output-component.pad-fragment.frag
new file mode 100644
index 00000000000..ae9b7f75e76
--- /dev/null
+++ b/shaders-msl-no-opt/components/fragment-output-component.pad-fragment.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(location = 0, component = 0) out float FragColor0;
+layout(location = 0, component = 1) out vec2 FragColor1;
+
+void main()
+{
+	FragColor0 = 1.0;
+	FragColor1 = vec2(2.0, 3.0);
+}
diff --git a/shaders-msl-no-opt/components/vertex-input-component.vert b/shaders-msl-no-opt/components/vertex-input-component.vert
new file mode 100644
index 00000000000..7ba31bf6552
--- /dev/null
+++ b/shaders-msl-no-opt/components/vertex-input-component.vert
@@ -0,0 +1,11 @@
+#version 450
+
+layout(location = 0, component = 0) in vec3 Foo3;
+layout(location = 0, component = 3) in float Foo1;
+layout(location = 0) out vec3 Foo;
+
+void main()
+{
+	gl_Position = vec4(Foo3, Foo1);
+	Foo = Foo3 + Foo1;
+}
diff --git a/shaders-msl-no-opt/components/vertex-output-component.vert b/shaders-msl-no-opt/components/vertex-output-component.vert
new file mode 100644
index 00000000000..5abd8dc6ff4
--- /dev/null
+++ b/shaders-msl-no-opt/components/vertex-output-component.vert
@@ -0,0 +1,12 @@
+#version 450
+
+layout(location = 0) in vec4 vFoo;
+layout(location = 0) out vec3 Foo3;
+layout(location = 0, component = 3) out float Foo1;
+
+void main()
+{
+	gl_Position = vFoo;
+	Foo3 = vFoo.xyz;
+	Foo1 = vFoo.w;
+}
diff --git a/shaders-msl/frag/16bit-constants.frag b/shaders-msl-no-opt/frag/16bit-constants.invalid.frag
similarity index 100%
rename from shaders-msl/frag/16bit-constants.frag
rename to shaders-msl-no-opt/frag/16bit-constants.invalid.frag
diff --git a/shaders-msl-no-opt/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag b/shaders-msl-no-opt/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag
new file mode 100644
index 00000000000..8cce059bab2
--- /dev/null
+++ b/shaders-msl-no-opt/frag/demote-to-helper.vk.nocompat.msl21.invalid.frag
@@ -0,0 +1,8 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+void main()
+{
+	//demote;	// FIXME: Not implemented for MSL
+	bool helper = helperInvocationEXT();
+}
diff --git a/shaders-msl-no-opt/frag/depth-image-gather.asm.frag b/shaders-msl-no-opt/frag/depth-image-gather.asm.frag
new file mode 100644
index 00000000000..430899c6f05
--- /dev/null
+++ b/shaders-msl-no-opt/frag/depth-image-gather.asm.frag
@@ -0,0 +1,72 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google spiregg; 0
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+               OpExtension "SPV_GOOGLE_user_type"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %psMain "main" %gl_FragCoord %in_var_TEXCOORD0 %out_var_SV_Target0
+               OpExecutionMode %psMain OriginUpperLeft
+               OpSource HLSL 500
+               OpName %type_2d_image "type.2d.image"
+               OpName %g_depthTexture "g_depthTexture"
+               OpName %type_sampler "type.sampler"
+               OpName %g_sampler "g_sampler"
+               OpName %g_comp "g_comp"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %psMain "psMain"
+               OpName %type_sampled_image "type.sampled.image"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_Position"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %in_var_TEXCOORD0 Location 0
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %g_depthTexture DescriptorSet 0
+               OpDecorate %g_depthTexture Binding 0
+               OpDecorate %g_sampler DescriptorSet 0
+               OpDecorate %g_sampler Binding 0
+               OpDecorate %g_comp DescriptorSet 0
+               OpDecorate %g_comp Binding 1
+               OpDecorateString %g_depthTexture UserTypeGOOGLE "texture2d"
+      %float = OpTypeFloat 32
+  %float_0_5 = OpConstant %float 0.5
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %v2int = OpTypeVector %int 2
+         %16 = OpConstantComposite %v2int %int_0 %int_0
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %25 = OpTypeFunction %void
+%type_sampled_image = OpTypeSampledImage %type_2d_image
+%g_depthTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+  %g_sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+     %g_comp = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input_v2float Input
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+     %psMain = OpFunction %void None %25
+         %26 = OpLabel
+         %27 = OpLoad %v2float %in_var_TEXCOORD0
+         %28 = OpLoad %type_2d_image %g_depthTexture
+         %29 = OpLoad %type_sampler %g_comp
+         %30 = OpSampledImage %type_sampled_image %28 %29
+         %31 = OpImageDrefGather %v4float %30 %27 %float_0_5 None
+         %32 = OpLoad %type_sampler %g_sampler
+         %33 = OpSampledImage %type_sampled_image %28 %32
+         %34 = OpImageGather %v4float %33 %27 %int_0 ConstOffset %16
+         %35 = OpFMul %v4float %31 %34
+               OpStore %out_var_SV_Target0 %35
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/frag/force-active-resources.msl2.argument..force-active.discrete.frag b/shaders-msl-no-opt/frag/force-active-resources.msl2.argument..force-active.discrete.frag
new file mode 100644
index 00000000000..b1b058d0146
--- /dev/null
+++ b/shaders-msl-no-opt/frag/force-active-resources.msl2.argument..force-active.discrete.frag
@@ -0,0 +1,15 @@
+#version 450
+
+layout(location = 0) in vec2 vUV;
+layout(location = 0) out vec4 FragColor;
+
+layout(set = 0, binding = 0) uniform sampler2D uTexture1;
+layout(set = 0, binding = 1) uniform sampler2D uTexture2;
+layout(set = 2, binding = 0) uniform sampler2D uTextureDiscrete1;
+layout(set = 2, binding = 1) uniform sampler2D uTextureDiscrete2;
+
+void main()
+{
+	FragColor = texture(uTexture2, vUV);
+	FragColor += texture(uTextureDiscrete2, vUV);
+}
diff --git a/shaders-msl/frag/fp16.desktop.invalid.frag b/shaders-msl-no-opt/frag/fp16.desktop.invalid.frag
similarity index 100%
rename from shaders-msl/frag/fp16.desktop.invalid.frag
rename to shaders-msl-no-opt/frag/fp16.desktop.invalid.frag
diff --git a/shaders-msl-no-opt/frag/image-gather.frag b/shaders-msl-no-opt/frag/image-gather.frag
new file mode 100644
index 00000000000..b492cfbe903
--- /dev/null
+++ b/shaders-msl-no-opt/frag/image-gather.frag
@@ -0,0 +1,14 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+layout(set = 0, binding = 0) uniform sampler2D uSamp;
+layout(set = 0, binding = 1) uniform sampler2DShadow uSampShadow;
+layout(location = 0) in vec3 vUV;
+
+void main()
+{
+	FragColor = textureGather(uSamp, vUV.xy, 0);
+	FragColor += textureGather(uSamp, vUV.xy, 1);
+	FragColor += textureGather(uSampShadow, vUV.xy, vUV.z);
+}
diff --git a/shaders-msl/asm/frag/min-max-clamp.invalid.asm.frag b/shaders-msl-no-opt/frag/min-max-clamp.invalid.asm.frag
similarity index 100%
rename from shaders-msl/asm/frag/min-max-clamp.invalid.asm.frag
rename to shaders-msl-no-opt/frag/min-max-clamp.invalid.asm.frag
diff --git a/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag b/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
new file mode 100644
index 00000000000..ad566615f56
--- /dev/null
+++ b/shaders-msl-no-opt/frag/min-max-clamp.relax-nan.invalid.asm.frag
@@ -0,0 +1,293 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 205
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpExtension "SPV_AMD_gpu_shader_half_float"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %v1 %v2 %v3 %v4 %h1 %h2 %h3 %h4
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_AMD_gpu_shader_half_float"
+               OpName %main "main"
+               OpName %res "res"
+               OpName %res2 "res2"
+               OpName %res3 "res3"
+               OpName %res4 "res4"
+               OpName %hres "hres"
+               OpName %hres2 "hres2"
+               OpName %hres3 "hres3"
+               OpName %hres4 "hres4"
+               OpName %v1 "v1"
+               OpName %v2 "v2"
+               OpName %v3 "v3"
+               OpName %v4 "v4"
+               OpName %h1 "h1"
+               OpName %h2 "h2"
+               OpName %h3 "h3"
+               OpName %h4 "h4"
+               OpDecorate %v1 Location 0
+               OpDecorate %v2 Location 1
+               OpDecorate %v3 Location 2
+               OpDecorate %v4 Location 3
+               OpDecorate %h1 Location 4
+               OpDecorate %h2 Location 5
+               OpDecorate %h3 Location 6
+               OpDecorate %h4 Location 7
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+     %v3half = OpTypeVector %half 3
+     %v4half = OpTypeVector %half 4
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Function_half = OpTypePointer Function %half
+%_ptr_Input_half = OpTypePointer Input %half
+%_ptr_Function_v2half = OpTypePointer Function %v2half
+%_ptr_Input_v2half = OpTypePointer Input %v2half
+%_ptr_Function_v3half = OpTypePointer Function %v3half
+%_ptr_Input_v3half = OpTypePointer Input %v3half
+%_ptr_Function_v4half = OpTypePointer Function %v4half
+%_ptr_Input_v4half = OpTypePointer Input %v4half
+         %v1 = OpVariable %_ptr_Input_float Input
+         %v2 = OpVariable %_ptr_Input_v2float Input
+         %v3 = OpVariable %_ptr_Input_v3float Input
+         %v4 = OpVariable %_ptr_Input_v4float Input
+         %h1 = OpVariable %_ptr_Input_half Input
+         %h2 = OpVariable %_ptr_Input_v2half Input
+         %h3 = OpVariable %_ptr_Input_v3half Input
+         %h4 = OpVariable %_ptr_Input_v4half Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %res = OpVariable %_ptr_Function_float Function
+         %46 = OpLoad %float %v1
+         %47 = OpLoad %float %v1
+         %48 = OpExtInst %float %1 FMin %46 %47
+               OpStore %res %48
+         %49 = OpLoad %float %v1
+         %50 = OpLoad %float %v1
+         %51 = OpExtInst %float %1 FMax %49 %50
+               OpStore %res %51
+         %52 = OpLoad %float %v1
+         %53 = OpLoad %float %v1
+         %54 = OpLoad %float %v1
+         %55 = OpExtInst %float %1 FClamp %52 %53 %54
+               OpStore %res %55
+         %56 = OpLoad %float %v1
+         %57 = OpLoad %float %v1
+         %58 = OpExtInst %float %1 NMin %56 %57
+               OpStore %res %58
+         %59 = OpLoad %float %v1
+         %60 = OpLoad %float %v1
+         %61 = OpExtInst %float %1 NMax %59 %60
+               OpStore %res %61
+         %62 = OpLoad %float %v1
+         %63 = OpLoad %float %v1
+         %64 = OpLoad %float %v1
+         %65 = OpExtInst %float %1 NClamp %62 %63 %64
+               OpStore %res %65
+       %res2 = OpVariable %_ptr_Function_v2float Function
+         %66 = OpLoad %v2float %v2
+         %67 = OpLoad %v2float %v2
+         %68 = OpExtInst %v2float %1 FMin %66 %67
+               OpStore %res2 %68
+         %69 = OpLoad %v2float %v2
+         %70 = OpLoad %v2float %v2
+         %71 = OpExtInst %v2float %1 FMax %69 %70
+               OpStore %res2 %71
+         %72 = OpLoad %v2float %v2
+         %73 = OpLoad %v2float %v2
+         %74 = OpLoad %v2float %v2
+         %75 = OpExtInst %v2float %1 FClamp %72 %73 %74
+               OpStore %res2 %75
+         %76 = OpLoad %v2float %v2
+         %77 = OpLoad %v2float %v2
+         %78 = OpExtInst %v2float %1 NMin %76 %77
+               OpStore %res2 %78
+         %79 = OpLoad %v2float %v2
+         %80 = OpLoad %v2float %v2
+         %81 = OpExtInst %v2float %1 NMax %79 %80
+               OpStore %res2 %81
+         %82 = OpLoad %v2float %v2
+         %83 = OpLoad %v2float %v2
+         %84 = OpLoad %v2float %v2
+         %85 = OpExtInst %v2float %1 NClamp %82 %83 %84
+               OpStore %res2 %85
+       %res3 = OpVariable %_ptr_Function_v3float Function
+         %86 = OpLoad %v3float %v3
+         %87 = OpLoad %v3float %v3
+         %88 = OpExtInst %v3float %1 FMin %86 %87
+               OpStore %res3 %88
+         %89 = OpLoad %v3float %v3
+         %90 = OpLoad %v3float %v3
+         %91 = OpExtInst %v3float %1 FMax %89 %90
+               OpStore %res3 %91
+         %92 = OpLoad %v3float %v3
+         %93 = OpLoad %v3float %v3
+         %94 = OpLoad %v3float %v3
+         %95 = OpExtInst %v3float %1 FClamp %92 %93 %94
+               OpStore %res3 %95
+         %96 = OpLoad %v3float %v3
+         %97 = OpLoad %v3float %v3
+         %98 = OpExtInst %v3float %1 NMin %96 %97
+               OpStore %res3 %98
+         %99 = OpLoad %v3float %v3
+        %100 = OpLoad %v3float %v3
+        %101 = OpExtInst %v3float %1 NMax %99 %100
+               OpStore %res3 %101
+        %102 = OpLoad %v3float %v3
+        %103 = OpLoad %v3float %v3
+        %104 = OpLoad %v3float %v3
+        %105 = OpExtInst %v3float %1 NClamp %102 %103 %104
+               OpStore %res3 %105
+       %res4 = OpVariable %_ptr_Function_v4float Function
+        %106 = OpLoad %v4float %v4
+        %107 = OpLoad %v4float %v4
+        %108 = OpExtInst %v4float %1 FMin %106 %107
+               OpStore %res4 %108
+        %109 = OpLoad %v4float %v4
+        %110 = OpLoad %v4float %v4
+        %111 = OpExtInst %v4float %1 FMax %109 %110
+               OpStore %res4 %111
+        %112 = OpLoad %v4float %v4
+        %113 = OpLoad %v4float %v4
+        %114 = OpLoad %v4float %v4
+        %115 = OpExtInst %v4float %1 FClamp %112 %113 %114
+               OpStore %res4 %115
+        %116 = OpLoad %v4float %v4
+        %117 = OpLoad %v4float %v4
+        %118 = OpExtInst %v4float %1 NMin %116 %117
+               OpStore %res4 %118
+        %119 = OpLoad %v4float %v4
+        %120 = OpLoad %v4float %v4
+        %121 = OpExtInst %v4float %1 NMax %119 %120
+               OpStore %res4 %121
+        %122 = OpLoad %v4float %v4
+        %123 = OpLoad %v4float %v4
+        %124 = OpLoad %v4float %v4
+        %125 = OpExtInst %v4float %1 NClamp %122 %123 %124
+               OpStore %res4 %125
+       %hres = OpVariable %_ptr_Function_half Function
+        %126 = OpLoad %half %h1
+        %127 = OpLoad %half %h1
+        %128 = OpExtInst %half %1 FMin %126 %127
+               OpStore %hres %128
+        %129 = OpLoad %half %h1
+        %130 = OpLoad %half %h1
+        %131 = OpExtInst %half %1 FMax %129 %130
+               OpStore %hres %131
+        %132 = OpLoad %half %h1
+        %133 = OpLoad %half %h1
+        %134 = OpLoad %half %h1
+        %135 = OpExtInst %half %1 FClamp %132 %133 %134
+               OpStore %hres %135
+        %136 = OpLoad %half %h1
+        %137 = OpLoad %half %h1
+        %138 = OpExtInst %half %1 NMin %136 %137
+               OpStore %hres %138
+        %139 = OpLoad %half %h1
+        %140 = OpLoad %half %h1
+        %141 = OpExtInst %half %1 NMax %139 %140
+               OpStore %hres %141
+        %142 = OpLoad %half %h1
+        %143 = OpLoad %half %h1
+        %144 = OpLoad %half %h1
+        %145 = OpExtInst %half %1 NClamp %142 %143 %144
+               OpStore %hres %145
+      %hres2 = OpVariable %_ptr_Function_v2half Function
+        %146 = OpLoad %v2half %h2
+        %147 = OpLoad %v2half %h2
+        %148 = OpExtInst %v2half %1 FMin %146 %147
+               OpStore %hres2 %148
+        %149 = OpLoad %v2half %h2
+        %150 = OpLoad %v2half %h2
+        %151 = OpExtInst %v2half %1 FMax %149 %150
+               OpStore %hres2 %151
+        %152 = OpLoad %v2half %h2
+        %153 = OpLoad %v2half %h2
+        %154 = OpLoad %v2half %h2
+        %155 = OpExtInst %v2half %1 FClamp %152 %153 %154
+               OpStore %hres2 %155
+        %156 = OpLoad %v2half %h2
+        %157 = OpLoad %v2half %h2
+        %158 = OpExtInst %v2half %1 NMin %156 %157
+               OpStore %hres2 %158
+        %159 = OpLoad %v2half %h2
+        %160 = OpLoad %v2half %h2
+        %161 = OpExtInst %v2half %1 NMax %159 %160
+               OpStore %hres2 %161
+        %162 = OpLoad %v2half %h2
+        %163 = OpLoad %v2half %h2
+        %164 = OpLoad %v2half %h2
+        %165 = OpExtInst %v2half %1 NClamp %162 %163 %164
+               OpStore %hres2 %165
+      %hres3 = OpVariable %_ptr_Function_v3half Function
+        %166 = OpLoad %v3half %h3
+        %167 = OpLoad %v3half %h3
+        %168 = OpExtInst %v3half %1 FMin %166 %167
+               OpStore %hres3 %168
+        %169 = OpLoad %v3half %h3
+        %170 = OpLoad %v3half %h3
+        %171 = OpExtInst %v3half %1 FMax %169 %170
+               OpStore %hres3 %171
+        %172 = OpLoad %v3half %h3
+        %173 = OpLoad %v3half %h3
+        %174 = OpLoad %v3half %h3
+        %175 = OpExtInst %v3half %1 FClamp %172 %173 %174
+               OpStore %hres3 %175
+        %176 = OpLoad %v3half %h3
+        %177 = OpLoad %v3half %h3
+        %178 = OpExtInst %v3half %1 NMin %176 %177
+               OpStore %hres3 %178
+        %179 = OpLoad %v3half %h3
+        %180 = OpLoad %v3half %h3
+        %181 = OpExtInst %v3half %1 NMax %179 %180
+               OpStore %hres3 %181
+        %182 = OpLoad %v3half %h3
+        %183 = OpLoad %v3half %h3
+        %184 = OpLoad %v3half %h3
+        %185 = OpExtInst %v3half %1 NClamp %182 %183 %184
+               OpStore %hres3 %185
+      %hres4 = OpVariable %_ptr_Function_v4half Function
+        %186 = OpLoad %v4half %h4
+        %187 = OpLoad %v4half %h4
+        %188 = OpExtInst %v4half %1 FMin %186 %187
+               OpStore %hres4 %188
+        %189 = OpLoad %v4half %h4
+        %190 = OpLoad %v4half %h4
+        %191 = OpExtInst %v4half %1 FMax %189 %190
+               OpStore %hres4 %191
+        %192 = OpLoad %v4half %h4
+        %193 = OpLoad %v4half %h4
+        %194 = OpLoad %v4half %h4
+        %195 = OpExtInst %v4half %1 FClamp %192 %193 %194
+               OpStore %hres4 %195
+        %196 = OpLoad %v4half %h4
+        %197 = OpLoad %v4half %h4
+        %198 = OpExtInst %v4half %1 NMin %196 %197
+               OpStore %hres4 %198
+        %199 = OpLoad %v4half %h4
+        %200 = OpLoad %v4half %h4
+        %201 = OpExtInst %v4half %1 NMax %199 %200
+               OpStore %hres4 %201
+        %202 = OpLoad %v4half %h4
+        %203 = OpLoad %v4half %h4
+        %204 = OpLoad %v4half %h4
+        %205 = OpExtInst %v4half %1 NClamp %202 %203 %204
+               OpStore %hres4 %205
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/frag/nonuniform-constructor.msl2.frag b/shaders-msl-no-opt/frag/nonuniform-constructor.msl2.frag
new file mode 100644
index 00000000000..4e0460afbbb
--- /dev/null
+++ b/shaders-msl-no-opt/frag/nonuniform-constructor.msl2.frag
@@ -0,0 +1,14 @@
+#version 450
+#extension GL_EXT_nonuniform_qualifier : require
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec2 vUV;
+layout(location = 1) flat in int vIndex;
+
+layout(set = 0, binding = 0) uniform texture2D uTex[10];
+layout(set = 1, binding = 0) uniform sampler Immut;
+
+void main()
+{
+	FragColor = texture(nonuniformEXT(sampler2D(uTex[vIndex], Immut)), vUV);
+}
diff --git a/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag b/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag
new file mode 100644
index 00000000000..59079fe58b4
--- /dev/null
+++ b/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO0
+{
+	uint values0[];
+};
+
+layout(set = 0, binding = 1, std430) buffer SSBO1
+{
+	uint values1[];
+};
+
+void callee2()
+{
+	values1[int(gl_FragCoord.x)] += 1;
+}
+
+void callee()
+{
+	values0[int(gl_FragCoord.x)] += 1;
+	callee2();
+}
+
+void main()
+{
+	beginInvocationInterlockARB();
+	callee();
+	endInvocationInterlockARB();
+}
diff --git a/shaders-msl-no-opt/frag/pull-interpolant-access-chain.msl23.frag b/shaders-msl-no-opt/frag/pull-interpolant-access-chain.msl23.frag
new file mode 100644
index 00000000000..a2be5ef62f3
--- /dev/null
+++ b/shaders-msl-no-opt/frag/pull-interpolant-access-chain.msl23.frag
@@ -0,0 +1,12 @@
+#version 450
+layout(location = 0) centroid in vec4 a[2];
+layout(location = 2) centroid in vec4 b[2];
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor.x = interpolateAtOffset(a[0].x, vec2(0.5));
+	FragColor.y = interpolateAtOffset(a[1].y, vec2(0.5));
+	FragColor.z = interpolateAtOffset(b[0].z, vec2(0.5));
+	FragColor.w = interpolateAtOffset(b[1].w, vec2(0.5));
+}
diff --git a/shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag b/shaders-msl-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
similarity index 100%
rename from shaders-msl/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
rename to shaders-msl-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
diff --git a/shaders-msl/frag/shadow-compare-global-alias.invalid.frag b/shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag
similarity index 100%
rename from shaders-msl/frag/shadow-compare-global-alias.invalid.frag
rename to shaders-msl-no-opt/frag/shadow-compare-global-alias.invalid.frag
diff --git a/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag b/shaders-msl-no-opt/frag/subgroups.nocompat.invalid.vk.msl22.frag
similarity index 85%
rename from shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
rename to shaders-msl-no-opt/frag/subgroups.nocompat.invalid.vk.msl22.frag
index 3a2cf0234cb..05aa5212026 100644
--- a/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
+++ b/shaders-msl-no-opt/frag/subgroups.nocompat.invalid.vk.msl22.frag
@@ -28,7 +28,9 @@ void main()
 	FragColor = float(gl_SubgroupLeMask);
 	FragColor = float(gl_SubgroupLtMask);
 	vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+	bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u);
 	vec3 first = subgroupBroadcastFirst(vec3(20.0));
+	bvec4 first_bool = subgroupBroadcastFirst(bvec4(false));
 	uvec4 ballot_value = subgroupBallot(true);
 	bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
 	bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
@@ -40,17 +42,23 @@ void main()
 
 	// shuffle
 	uint shuffled = subgroupShuffle(10u, 8u);
+	bool shuffled_bool = subgroupShuffle(true, 9u);
 	uint shuffled_xor = subgroupShuffleXor(30u, 8u);
+	bool shuffled_xor_bool = subgroupShuffleXor(false, 9u);
 
 	// shuffle relative 
 	uint shuffled_up = subgroupShuffleUp(20u, 4u);
+	bool shuffled_up_bool = subgroupShuffleUp(true, 4u);
 	uint shuffled_down = subgroupShuffleDown(20u, 4u);
+	bool shuffled_down_bool = subgroupShuffleDown(false, 4u);
 
 	// vote
 	bool has_all = subgroupAll(true);
 	bool has_any = subgroupAny(true);
 	bool has_equal = subgroupAllEqual(0);
 	has_equal = subgroupAllEqual(true);
+	has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0));
+	has_equal = subgroupAllEqual(bvec4(true, true, false, true));
 
 	// arithmetic
 	vec4 added = subgroupAdd(vec4(20.0));
@@ -113,7 +121,11 @@ void main()
 
 	// quad
 	vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
+	bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true));
 	vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
+	bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true));
 	vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
+	bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true));
 	vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
+	bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u);
 }
diff --git a/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl20.ios.framebuffer-fetch.frag b/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl20.ios.framebuffer-fetch.frag
new file mode 100644
index 00000000000..70822aee999
--- /dev/null
+++ b/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl20.ios.framebuffer-fetch.frag
@@ -0,0 +1,9 @@
+#version 450
+
+layout(binding = 4, input_attachment_index = 1) uniform subpassInput uInput;
+layout(location = 1) out vec4 FragColor;
+
+void main()
+{
+	FragColor = subpassLoad(uInput);
+}
diff --git a/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl23.framebuffer-fetch.frag b/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl23.framebuffer-fetch.frag
new file mode 100644
index 00000000000..70822aee999
--- /dev/null
+++ b/shaders-msl-no-opt/frag/subpass-input-attachment-index-fallback.msl23.framebuffer-fetch.frag
@@ -0,0 +1,9 @@
+#version 450
+
+layout(binding = 4, input_attachment_index = 1) uniform subpassInput uInput;
+layout(location = 1) out vec4 FragColor;
+
+void main()
+{
+	FragColor = subpassLoad(uInput);
+}
diff --git a/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.ios.frag b/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.ios.frag
new file mode 100644
index 00000000000..ef9ef77d56f
--- /dev/null
+++ b/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.ios.frag
@@ -0,0 +1,24 @@
+#version 450
+
+layout(set = 0, input_attachment_index = 0, binding = 0) uniform subpassInput uSub;
+layout(location = 0) out vec4 FragColor;
+
+vec4 samp3(subpassInput uS)
+{
+	return subpassLoad(uS);
+}
+
+vec4 samp2(subpassInput uS)
+{
+	return subpassLoad(uS) + samp3(uS);
+}
+
+vec4 samp()
+{
+	return subpassLoad(uSub) + samp3(uSub);
+}
+
+void main()
+{
+	FragColor = samp() + samp2(uSub);
+}
diff --git a/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.msl23.frag b/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.msl23.frag
new file mode 100644
index 00000000000..ef9ef77d56f
--- /dev/null
+++ b/shaders-msl-no-opt/frag/subpass-input-function-argument.framebuffer-fetch.msl23.frag
@@ -0,0 +1,24 @@
+#version 450
+
+layout(set = 0, input_attachment_index = 0, binding = 0) uniform subpassInput uSub;
+layout(location = 0) out vec4 FragColor;
+
+vec4 samp3(subpassInput uS)
+{
+	return subpassLoad(uS);
+}
+
+vec4 samp2(subpassInput uS)
+{
+	return subpassLoad(uS) + samp3(uS);
+}
+
+vec4 samp()
+{
+	return subpassLoad(uSub) + samp3(uSub);
+}
+
+void main()
+{
+	FragColor = samp() + samp2(uSub);
+}
diff --git a/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.argument.frag b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.argument.frag
new file mode 100644
index 00000000000..671a4d1b416
--- /dev/null
+++ b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.argument.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(set = 0, binding = 10, input_attachment_index = 1) uniform subpassInput uSub;
+layout(location = 0) out vec4 FragColor;
+
+layout(set = 0, binding = 9) uniform texture2D uTex;
+layout(set = 0, binding = 8) uniform sampler uSampler;
+
+void main()
+{
+	FragColor = subpassLoad(uSub) + texture(sampler2D(uTex, uSampler), vec2(0.5));
+}
diff --git a/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.frag b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.frag
new file mode 100644
index 00000000000..671a4d1b416
--- /dev/null
+++ b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.framebuffer-fetch.msl23.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(set = 0, binding = 10, input_attachment_index = 1) uniform subpassInput uSub;
+layout(location = 0) out vec4 FragColor;
+
+layout(set = 0, binding = 9) uniform texture2D uTex;
+layout(set = 0, binding = 8) uniform sampler uSampler;
+
+void main()
+{
+	FragColor = subpassLoad(uSub) + texture(sampler2D(uTex, uSampler), vec2(0.5));
+}
diff --git a/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.argument.frag b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.argument.frag
new file mode 100644
index 00000000000..671a4d1b416
--- /dev/null
+++ b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.argument.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(set = 0, binding = 10, input_attachment_index = 1) uniform subpassInput uSub;
+layout(location = 0) out vec4 FragColor;
+
+layout(set = 0, binding = 9) uniform texture2D uTex;
+layout(set = 0, binding = 8) uniform sampler uSampler;
+
+void main()
+{
+	FragColor = subpassLoad(uSub) + texture(sampler2D(uTex, uSampler), vec2(0.5));
+}
diff --git a/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.frag b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.frag
new file mode 100644
index 00000000000..671a4d1b416
--- /dev/null
+++ b/shaders-msl-no-opt/frag/subpass-input.decoration-binding.ios.framebuffer-fetch.msl2.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(set = 0, binding = 10, input_attachment_index = 1) uniform subpassInput uSub;
+layout(location = 0) out vec4 FragColor;
+
+layout(set = 0, binding = 9) uniform texture2D uTex;
+layout(set = 0, binding = 8) uniform sampler uSampler;
+
+void main()
+{
+	FragColor = subpassLoad(uSub) + texture(sampler2D(uTex, uSampler), vec2(0.5));
+}
diff --git a/shaders-msl-no-opt/frag/texture-gather-uint-component.asm.frag b/shaders-msl-no-opt/frag/texture-gather-uint-component.asm.frag
new file mode 100644
index 00000000000..b4d9509ab49
--- /dev/null
+++ b/shaders-msl-no-opt/frag/texture-gather-uint-component.asm.frag
@@ -0,0 +1,42 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 22
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vUV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uSamp "uSamp"
+               OpName %vUV "vUV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uSamp DescriptorSet 0
+               OpDecorate %uSamp Binding 0
+               OpDecorate %vUV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %10 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %11 = OpTypeSampledImage %10
+%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11
+      %uSamp = OpVariable %_ptr_UniformConstant_11 UniformConstant
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %vUV = OpVariable %_ptr_Input_v2float Input
+        %int = OpTypeInt 32 0
+      %int_1 = OpConstant %int 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %14 = OpLoad %11 %uSamp
+         %18 = OpLoad %v2float %vUV
+         %21 = OpImageGather %v4float %14 %18 %int_1
+               OpStore %FragColor %21
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.argument.msl2.frag b/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.argument.msl2.frag
new file mode 100644
index 00000000000..f3cf0e190d9
--- /dev/null
+++ b/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.argument.msl2.frag
@@ -0,0 +1,18 @@
+#version 450
+
+struct Foo
+{
+	vec4 v;
+};
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	Foo foo;
+} ubos[2];
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = ubos[1].foo.v;
+}
diff --git a/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.frag b/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.frag
new file mode 100644
index 00000000000..f3cf0e190d9
--- /dev/null
+++ b/shaders-msl-no-opt/frag/ubo-array-multiple-structs-access-chain.frag
@@ -0,0 +1,18 @@
+#version 450
+
+struct Foo
+{
+	vec4 v;
+};
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	Foo foo;
+} ubos[2];
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = ubos[1].foo.v;
+}
diff --git a/shaders-msl-no-opt/frag/ubo-offset-out-of-order.frag b/shaders-msl-no-opt/frag/ubo-offset-out-of-order.frag
new file mode 100644
index 00000000000..77760522f94
--- /dev/null
+++ b/shaders-msl-no-opt/frag/ubo-offset-out-of-order.frag
@@ -0,0 +1,16 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	layout(offset = 16) mat4 m;
+	layout(offset = 0) vec4 v;
+};
+
+layout(location = 0) in vec4 vColor;
+
+void main()
+{
+	FragColor = m * vColor + v;
+}
diff --git a/shaders-msl-no-opt/frag/variables.zero-initialize.frag b/shaders-msl-no-opt/frag/variables.zero-initialize.frag
new file mode 100644
index 00000000000..41da8001f47
--- /dev/null
+++ b/shaders-msl-no-opt/frag/variables.zero-initialize.frag
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) in vec4 vColor;
+layout(location = 0) out vec4 FragColor;
+
+int uninit_int;
+ivec4 uninit_vector;
+mat4 uninit_matrix;
+
+struct Foo { int a; };
+Foo uninit_foo;
+
+void main()
+{
+	int uninit_function_int;
+	if (vColor.x > 10.0)
+		uninit_function_int = 10;
+	else
+		uninit_function_int = 20;
+	FragColor = vColor;
+}
diff --git a/shaders-msl-no-opt/frag/volatile-helper-invocation.msl23.spv16.frag b/shaders-msl-no-opt/frag/volatile-helper-invocation.msl23.spv16.frag
new file mode 100644
index 00000000000..9a8d9d20b25
--- /dev/null
+++ b/shaders-msl-no-opt/frag/volatile-helper-invocation.msl23.spv16.frag
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+layout(location = 0) out float FragColor;
+
+void main()
+{
+	FragColor = float(gl_HelperInvocation);
+	demote;
+	FragColor = float(gl_HelperInvocation);
+}
diff --git a/shaders-msl-no-opt/packing/array-of-vec3.comp b/shaders-msl-no-opt/packing/array-of-vec3.comp
new file mode 100644
index 00000000000..61572122222
--- /dev/null
+++ b/shaders-msl-no-opt/packing/array-of-vec3.comp
@@ -0,0 +1,13 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0, scalar) buffer SSBO
+{
+	vec3 v[16];
+};
+
+void main()
+{
+	v[1] = v[0];
+}
diff --git a/shaders-msl-no-opt/packing/array-of-vec4.comp b/shaders-msl-no-opt/packing/array-of-vec4.comp
new file mode 100644
index 00000000000..c5bf5e8a3fa
--- /dev/null
+++ b/shaders-msl-no-opt/packing/array-of-vec4.comp
@@ -0,0 +1,13 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0, scalar) buffer SSBO
+{
+	vec4 v[16];
+};
+
+void main()
+{
+	v[1] = v[0];
+}
diff --git a/shaders-msl-no-opt/packing/isolated-scalar-access.comp b/shaders-msl-no-opt/packing/isolated-scalar-access.comp
new file mode 100644
index 00000000000..32c92889934
--- /dev/null
+++ b/shaders-msl-no-opt/packing/isolated-scalar-access.comp
@@ -0,0 +1,25 @@
+#version 450
+
+layout(set = 0, binding = 0) buffer SSBO
+{
+	vec4 v;
+	mat4 cm;
+	layout(row_major) mat4 rm;
+
+	vec3 v3;
+	float f;
+};
+
+shared vec4 shared_vec4;
+shared vec3 shared_vec3;
+
+void main()
+{
+	v.x = 10.0;
+	v3.y = 40.0;
+	cm[1][2] = 20.0;
+	rm[3][1] = 30.0;
+
+	shared_vec4.z = 40.0;
+	shared_vec3.y = 1.0;
+}
diff --git a/shaders-msl-no-opt/packing/load-store-col-rows.comp b/shaders-msl-no-opt/packing/load-store-col-rows.comp
new file mode 100644
index 00000000000..b3f2897034d
--- /dev/null
+++ b/shaders-msl-no-opt/packing/load-store-col-rows.comp
@@ -0,0 +1,59 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+layout(binding = 0, std140) buffer SSBO1
+{
+	mat2 a;
+	layout(row_major) mat2 a2;
+};
+
+layout(scalar, binding = 1) buffer SSBO2
+{
+	mat2x3 b;
+	layout(row_major) mat3x2 b2;
+};
+
+void load_store_column()
+{
+	vec2 u = a[0];
+	vec2 v = a[1];
+	u += v;
+	a[0] = u;
+	a[1] = v;
+}
+
+void load_store_row()
+{
+	vec2 u = a2[0];
+	vec2 v = a2[1];
+	u += v;
+	a2[0] = u;
+	a2[1] = v;
+}
+
+void load_store_packed_column()
+{
+	vec3 u = b[0];
+	vec3 v = b[1];
+	u += v;
+	b[0] = u;
+	b[1] = v;
+}
+
+void load_store_packed_row()
+{
+	vec2 u = b2[0];
+	vec2 v = b2[1];
+	u += v;
+	b2[0] = u;
+	b2[1] = v;
+}
+
+void main()
+{
+	load_store_column();
+	load_store_row();
+	load_store_packed_column();
+	load_store_packed_row();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp b/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp
new file mode 100644
index 00000000000..6a94c86ac0d
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-2x2-scalar.comp
@@ -0,0 +1,86 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+#define T mat2
+#define PACKING scalar
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-2x2-std140.comp b/shaders-msl-no-opt/packing/matrix-2x2-std140.comp
new file mode 100644
index 00000000000..3940e5c3b9d
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-2x2-std140.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat2
+#define PACKING std140
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-2x2-std430.comp b/shaders-msl-no-opt/packing/matrix-2x2-std430.comp
new file mode 100644
index 00000000000..342c3989ca5
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-2x2-std430.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat2
+#define PACKING std430
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp b/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp
new file mode 100644
index 00000000000..cf40f89a052
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-2x3-scalar.comp
@@ -0,0 +1,86 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+#define T mat2x3
+#define PACKING scalar
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-2x3-std140.comp b/shaders-msl-no-opt/packing/matrix-2x3-std140.comp
new file mode 100644
index 00000000000..6fbe149d1fe
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-2x3-std140.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat2x3
+#define PACKING std140
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-2x3-std430.comp b/shaders-msl-no-opt/packing/matrix-2x3-std430.comp
new file mode 100644
index 00000000000..36a6bab1457
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-2x3-std430.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat2x3
+#define PACKING std430
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp b/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp
new file mode 100644
index 00000000000..70fa4748d8e
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-2x4-scalar.comp
@@ -0,0 +1,86 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+#define T mat2x4
+#define PACKING scalar
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-2x4-std140.comp b/shaders-msl-no-opt/packing/matrix-2x4-std140.comp
new file mode 100644
index 00000000000..6c5d06fe514
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-2x4-std140.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat2x4
+#define PACKING std140
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-2x4-std430.comp b/shaders-msl-no-opt/packing/matrix-2x4-std430.comp
new file mode 100644
index 00000000000..177b9669402
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-2x4-std430.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat2x4
+#define PACKING std430
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp b/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp
new file mode 100644
index 00000000000..296efa673c4
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-3x2-scalar.comp
@@ -0,0 +1,86 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+#define T mat3x2
+#define PACKING scalar
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-3x2-std140.comp b/shaders-msl-no-opt/packing/matrix-3x2-std140.comp
new file mode 100644
index 00000000000..1334c4eae70
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-3x2-std140.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat3x2
+#define PACKING std140
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-3x2-std430.comp b/shaders-msl-no-opt/packing/matrix-3x2-std430.comp
new file mode 100644
index 00000000000..fe82993ddba
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-3x2-std430.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat3x2
+#define PACKING std430
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp b/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp
new file mode 100644
index 00000000000..0741384ea23
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-3x3-scalar.comp
@@ -0,0 +1,86 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+#define T mat3
+#define PACKING scalar
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-3x3-std140.comp b/shaders-msl-no-opt/packing/matrix-3x3-std140.comp
new file mode 100644
index 00000000000..0de5d599c12
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-3x3-std140.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat3
+#define PACKING std140
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-3x3-std430.comp b/shaders-msl-no-opt/packing/matrix-3x3-std430.comp
new file mode 100644
index 00000000000..8e48109e935
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-3x3-std430.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat3
+#define PACKING std430
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp b/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp
new file mode 100644
index 00000000000..23297d5c632
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-3x4-scalar.comp
@@ -0,0 +1,86 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+#define T mat3x4
+#define PACKING scalar
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-3x4-std140.comp b/shaders-msl-no-opt/packing/matrix-3x4-std140.comp
new file mode 100644
index 00000000000..11135eeccfc
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-3x4-std140.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat3x4
+#define PACKING std140
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-3x4-std430.comp b/shaders-msl-no-opt/packing/matrix-3x4-std430.comp
new file mode 100644
index 00000000000..78c577f2ef2
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-3x4-std430.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat3x4
+#define PACKING std430
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp b/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp
new file mode 100644
index 00000000000..412c208148d
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-4x2-scalar.comp
@@ -0,0 +1,86 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+#define T mat4x2
+#define PACKING scalar
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-4x2-std140.comp b/shaders-msl-no-opt/packing/matrix-4x2-std140.comp
new file mode 100644
index 00000000000..e130cb0a465
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-4x2-std140.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat4x2
+#define PACKING std140
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-4x2-std430.comp b/shaders-msl-no-opt/packing/matrix-4x2-std430.comp
new file mode 100644
index 00000000000..76aa9ae4a64
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-4x2-std430.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat4x2
+#define PACKING std430
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp b/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp
new file mode 100644
index 00000000000..8468b289cdb
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-4x3-scalar.comp
@@ -0,0 +1,86 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+#define T mat4x3
+#define PACKING scalar
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-4x3-std140.comp b/shaders-msl-no-opt/packing/matrix-4x3-std140.comp
new file mode 100644
index 00000000000..8223eae49f2
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-4x3-std140.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat4x3
+#define PACKING std140
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-4x3-std430.comp b/shaders-msl-no-opt/packing/matrix-4x3-std430.comp
new file mode 100644
index 00000000000..aa4d685cf9b
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-4x3-std430.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat4x3
+#define PACKING std430
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp b/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp
new file mode 100644
index 00000000000..6f14c07311e
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-4x4-scalar.comp
@@ -0,0 +1,86 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+#define T mat4
+#define PACKING scalar
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-4x4-std140.comp b/shaders-msl-no-opt/packing/matrix-4x4-std140.comp
new file mode 100644
index 00000000000..45193b3257f
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-4x4-std140.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat4
+#define PACKING std140
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-4x4-std430.comp b/shaders-msl-no-opt/packing/matrix-4x4-std430.comp
new file mode 100644
index 00000000000..3a1eb9f020f
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-4x4-std430.comp
@@ -0,0 +1,85 @@
+#version 450
+layout(local_size_x = 1) in;
+
+#define T mat4
+#define PACKING std430
+
+layout(set = 0, binding = 0, PACKING) buffer SSBOCol
+{
+	layout(column_major) T col_major0;
+	layout(column_major) T col_major1;
+};
+
+layout(set = 0, binding = 1, PACKING) buffer SSBORow
+{
+	layout(row_major) T row_major0;
+	layout(row_major) T row_major1;
+};
+
+void load_store_to_variable_col_major()
+{
+	// Load to variable.
+	T loaded = col_major0;
+
+	// Store from variable.
+	col_major1 = loaded;
+}
+
+void load_store_to_variable_row_major()
+{
+	// Load to variable.
+	T loaded = row_major0;
+
+	// Store to variable.
+	row_major0 = loaded;
+}
+
+void copy_col_major_to_col_major()
+{
+	// Copy col -> col
+	col_major0 = col_major1;
+}
+
+void copy_row_major_to_col_major()
+{
+	// Copy row -> col
+	col_major0 = row_major0;
+}
+
+void copy_col_major_to_row_major()
+{
+	// Copy col -> row
+	row_major0 = col_major0;
+}
+
+void copy_row_major_to_row_major()
+{
+	// Copy row -> row
+	row_major0 = row_major1;
+}
+
+void copy_columns()
+{
+	// Copy columns/rows.
+	col_major0[1] = row_major0[1];
+	row_major0[1] = col_major0[1];
+}
+
+void copy_elements()
+{
+	// Copy individual elements.
+	col_major0[0][1] = row_major0[0][1];
+	row_major0[0][1] = col_major0[0][1];
+}
+
+void main()
+{
+	load_store_to_variable_col_major();
+	load_store_to_variable_row_major();
+	copy_col_major_to_col_major();
+	copy_col_major_to_row_major();
+	copy_row_major_to_col_major();
+	copy_row_major_to_row_major();
+	copy_columns();
+	copy_elements();
+}
diff --git a/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp b/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp
new file mode 100644
index 00000000000..9b7b9fc37ef
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-multiply-row-major.comp
@@ -0,0 +1,16 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(row_major, set = 0, binding = 0) buffer SSBO
+{
+	mat3 m0;
+	mat3 m1;
+	vec3 v0;
+	vec3 v1;
+};
+
+void main()
+{
+	v0 = (m0 * m1) * v1;
+	v0 = m0 * (m1 * v1);
+}
diff --git a/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp
new file mode 100644
index 00000000000..cd77d242a6d
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major-2.comp
@@ -0,0 +1,19 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+layout(scalar, set = 0, binding = 0) buffer SSBO
+{
+	mat3 m0;
+	mat3 m1;
+	vec3 v0;
+	vec3 v1;
+};
+
+void main()
+{
+	v0 = (m0 * m1) * v1;
+	v0 = m0 * (m1 * v1);
+	v0 = (v1 * m0) * m1;
+	v0 = v1 * (m0 * m1);
+}
diff --git a/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp
new file mode 100644
index 00000000000..847d2e8f608
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-col-major.comp
@@ -0,0 +1,18 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, set = 0, binding = 0) buffer SSBO
+{
+	mat2 m0;
+	mat2 m1;
+	vec2 v0;
+	vec2 v1;
+};
+
+void main()
+{
+	v0 = (m0 * m1) * v1;
+	v0 = m0 * (m1 * v1);
+	v0 = (v1 * m0) * m1;
+	v0 = v1 * (m0 * m1);
+}
diff --git a/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp
new file mode 100644
index 00000000000..60a3da0aa35
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major-2.comp
@@ -0,0 +1,19 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+layout(scalar, row_major, set = 0, binding = 0) buffer SSBO
+{
+	mat3 m0;
+	mat3 m1;
+	vec3 v0;
+	vec3 v1;
+};
+
+void main()
+{
+	v0 = (m0 * m1) * v1;
+	v0 = m0 * (m1 * v1);
+	v0 = (v1 * m0) * m1;
+	v0 = v1 * (m0 * m1);
+}
diff --git a/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp
new file mode 100644
index 00000000000..5b71ae9bc7a
--- /dev/null
+++ b/shaders-msl-no-opt/packing/matrix-multiply-unpacked-row-major.comp
@@ -0,0 +1,18 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, row_major, set = 0, binding = 0) buffer SSBO
+{
+	mat2 m0;
+	mat2 m1;
+	vec2 v0;
+	vec2 v1;
+};
+
+void main()
+{
+	v0 = (m0 * m1) * v1;
+	v0 = m0 * (m1 * v1);
+	v0 = (v1 * m0) * m1;
+	v0 = v1 * (m0 * m1);
+}
diff --git a/shaders-msl-no-opt/packing/member-padding.comp b/shaders-msl-no-opt/packing/member-padding.comp
new file mode 100644
index 00000000000..a413662f386
--- /dev/null
+++ b/shaders-msl-no-opt/packing/member-padding.comp
@@ -0,0 +1,14 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, set = 0, binding = 0) buffer SSBO
+{
+	layout(offset = 16) float a;
+	layout(offset = 40) float b;
+};
+
+void main()
+{
+	a = 10.0;
+	b = 20.0;
+}
diff --git a/shaders-msl-no-opt/packing/std140-array-of-vectors.comp b/shaders-msl-no-opt/packing/std140-array-of-vectors.comp
new file mode 100644
index 00000000000..260a49810ab
--- /dev/null
+++ b/shaders-msl-no-opt/packing/std140-array-of-vectors.comp
@@ -0,0 +1,47 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, set = 0, binding = 0) buffer SSBO
+{
+	float v1[4];
+	vec2 v2[4];
+	vec3 v3[4];
+	vec4 v4[4];
+
+	float v1_array_of_array[4][4];
+	vec2 v2_array_of_array[4][4];
+	vec3 v3_array_of_array[4][4];
+	vec4 v4_array_of_array[4][4];
+
+	float v_unsized[];
+};
+
+void main()
+{
+	float loaded1 = v1[1];
+	v1[2] = loaded1;
+
+	vec2 loaded2 = v2[1];
+	v2[2] = loaded2;
+
+	vec3 loaded3 = v3[1];
+	v3[2] = loaded3;
+
+	vec4 loaded4 = v4[1];
+	v4[2] = loaded4;
+
+	loaded1 = v1_array_of_array[1][2];
+	v1_array_of_array[2][3] = loaded1;
+
+	loaded2 = v2_array_of_array[1][2];
+	v2_array_of_array[2][3] = loaded2;
+
+	loaded3 = v3_array_of_array[1][2];
+	v3_array_of_array[2][3] = loaded3;
+
+	loaded4 = v4_array_of_array[1][2];
+	v4_array_of_array[2][3] = loaded4;
+
+	loaded1 = v_unsized[1];
+	v_unsized[2] = loaded1;
+}
diff --git a/shaders-msl-no-opt/packing/struct-alignment.comp b/shaders-msl-no-opt/packing/struct-alignment.comp
new file mode 100644
index 00000000000..f9f58b7327f
--- /dev/null
+++ b/shaders-msl-no-opt/packing/struct-alignment.comp
@@ -0,0 +1,22 @@
+#version 450
+layout(local_size_x = 1) in;
+
+struct Foo
+{
+	vec3 a; // <- This one should become packed_float3, and the MSL alignment of the struct is now 4.
+	float b;
+};
+
+layout(std140, set = 0, binding = 0) buffer SSBO
+{
+	vec2 a;
+	float b;
+	// <- We expect 4 bytes of padding here since MSL alignment of Foo must be lowered to 4.
+	Foo foo;
+};
+
+void main()
+{
+	a.x = 10.0;
+	b = 20.0;
+}
diff --git a/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp b/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp
new file mode 100644
index 00000000000..08742d5bba5
--- /dev/null
+++ b/shaders-msl-no-opt/packing/struct-packing-array-of-scalar.comp
@@ -0,0 +1,18 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+struct Foo
+{
+	vec3 a;
+};
+
+layout(scalar, set = 0, binding = 0) buffer SSBOScalar
+{
+	Foo v[];
+} buffer_scalar;
+
+void main()
+{
+	buffer_scalar.v[1].a.y = 1.0;
+}
diff --git a/shaders-msl-no-opt/packing/struct-packing-recursive.comp b/shaders-msl-no-opt/packing/struct-packing-recursive.comp
new file mode 100644
index 00000000000..c3281b9ce01
--- /dev/null
+++ b/shaders-msl-no-opt/packing/struct-packing-recursive.comp
@@ -0,0 +1,29 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+struct Foo
+{
+	vec4 a;
+};
+
+struct Bar
+{
+	Foo a;
+};
+
+struct Baz
+{
+	Bar a;
+};
+
+layout(scalar, set = 0, binding = 0) buffer SSBOScalar
+{
+	float v;
+	Baz baz;
+} buffer_scalar;
+
+void main()
+{
+	buffer_scalar.baz.a.a.a.a.x = 10.0;
+}
diff --git a/shaders-msl-no-opt/packing/struct-packing.comp b/shaders-msl-no-opt/packing/struct-packing.comp
new file mode 100644
index 00000000000..69a80382e48
--- /dev/null
+++ b/shaders-msl-no-opt/packing/struct-packing.comp
@@ -0,0 +1,27 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 1) in;
+
+// Foo will be marked packed_float3 because offset of bar is just 12 bytes after foo.
+struct Foo
+{
+	vec3 a;
+};
+
+// Bar will be marked as packed due to alignment of the struct itself cannot work without packed.
+struct Bar
+{
+	vec3 a;
+};
+
+layout(scalar, set = 0, binding = 0) buffer SSBOScalar
+{
+	Foo foo;
+	Bar bar;
+} buffer_scalar;
+
+void main()
+{
+	buffer_scalar.foo.a.x = 10.0;
+	buffer_scalar.bar.a.x = 20.0;
+}
diff --git a/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp b/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp
new file mode 100644
index 00000000000..ef1ba65cf4f
--- /dev/null
+++ b/shaders-msl-no-opt/packing/struct-size-padding-array-of-array.comp
@@ -0,0 +1,45 @@
+#version 450
+layout(local_size_x = 1) in;
+
+struct A
+{
+	float v;
+};
+
+struct B
+{
+	vec2 v;
+};
+
+struct C
+{
+	vec3 v;
+};
+
+struct D
+{
+	vec4 v;
+};
+
+struct E
+{
+	vec4 a;
+	vec2 b;
+};
+
+layout(std140, set = 0, binding = 0) buffer SSBO
+{
+	A a[2][4];
+	B b[2][4];
+	C c[2][4];
+	D d[2][4];
+	mat2 e[2][4];
+	E f[];
+};
+
+void main()
+{
+	f[0].a = vec4(2.0);
+	mat2 tmp = e[0][1];
+	e[1][2] = tmp;
+}
diff --git a/shaders-msl-no-opt/packing/struct-size-padding.comp b/shaders-msl-no-opt/packing/struct-size-padding.comp
new file mode 100644
index 00000000000..ad65415bafe
--- /dev/null
+++ b/shaders-msl-no-opt/packing/struct-size-padding.comp
@@ -0,0 +1,45 @@
+#version 450
+layout(local_size_x = 1) in;
+
+struct A
+{
+	float v;
+};
+
+struct B
+{
+	vec2 v;
+};
+
+struct C
+{
+	vec3 v;
+};
+
+struct D
+{
+	vec4 v;
+};
+
+struct E
+{
+	vec4 a;
+	vec2 b;
+};
+
+layout(std140, set = 0, binding = 0) buffer SSBO
+{
+	A a[4];
+	B b[4];
+	C c[4];
+	D d[4];
+	mat2 e[4];
+	E f[];
+};
+
+void main()
+{
+	f[0].a = vec4(2.0);
+	mat2 tmp = e[1];
+	e[2] = tmp;
+}
diff --git a/shaders-msl-no-opt/tesc/copy-tess-level.tesc b/shaders-msl-no-opt/tesc/copy-tess-level.tesc
new file mode 100644
index 00000000000..7510ff8f675
--- /dev/null
+++ b/shaders-msl-no-opt/tesc/copy-tess-level.tesc
@@ -0,0 +1,12 @@
+#version 450
+layout(vertices = 1) out;
+
+void main()
+{
+	gl_TessLevelInner = float[](1.0, 2.0);
+	gl_TessLevelOuter = float[](1.0, 2.0, 3.0, 4.0);
+
+	float inner[2] = gl_TessLevelInner;
+	float outer[4] = gl_TessLevelOuter;
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+}
diff --git a/shaders-msl-no-opt/tesc/passthrough-clip-cull.multi-patch.tesc b/shaders-msl-no-opt/tesc/passthrough-clip-cull.multi-patch.tesc
new file mode 100644
index 00000000000..16b60e495ad
--- /dev/null
+++ b/shaders-msl-no-opt/tesc/passthrough-clip-cull.multi-patch.tesc
@@ -0,0 +1,22 @@
+#version 450
+
+layout(vertices = 4) out;
+
+in gl_PerVertex
+{
+	float gl_ClipDistance[2];
+	float gl_CullDistance[1];
+} gl_in[];
+
+out gl_PerVertex
+{
+	float gl_ClipDistance[2];
+	float gl_CullDistance[1];
+} gl_out[];
+
+void main()
+{
+	gl_out[gl_InvocationID].gl_ClipDistance[0] = gl_in[gl_InvocationID].gl_ClipDistance[0];
+	gl_out[gl_InvocationID].gl_ClipDistance[1] = gl_in[gl_InvocationID].gl_ClipDistance[1];
+	gl_out[gl_InvocationID].gl_CullDistance[0] = gl_in[gl_InvocationID].gl_CullDistance[0];
+}
diff --git a/shaders-msl-no-opt/tesc/tess-level-read-write-in-function-quad.tesc b/shaders-msl-no-opt/tesc/tess-level-read-write-in-function-quad.tesc
new file mode 100644
index 00000000000..4ccfa28267a
--- /dev/null
+++ b/shaders-msl-no-opt/tesc/tess-level-read-write-in-function-quad.tesc
@@ -0,0 +1,24 @@
+#version 450
+layout(vertices = 1) out;
+
+float load_tess_level_in_func()
+{
+	return gl_TessLevelInner[0] + gl_TessLevelOuter[1];
+}
+
+void store_tess_level_in_func()
+{
+	gl_TessLevelInner[0] = 1.0;
+	gl_TessLevelInner[1] = 2.0;
+	gl_TessLevelOuter[0] = 3.0;
+	gl_TessLevelOuter[1] = 4.0;
+	gl_TessLevelOuter[2] = 5.0;
+	gl_TessLevelOuter[3] = 6.0;
+}
+
+void main()
+{
+	store_tess_level_in_func();
+	float v = load_tess_level_in_func();
+	gl_out[gl_InvocationID].gl_Position = vec4(v);
+}
diff --git a/shaders-msl-no-opt/tese/builtin-input-automatic-attribute-assignment.tese b/shaders-msl-no-opt/tese/builtin-input-automatic-attribute-assignment.tese
new file mode 100644
index 00000000000..1d8a50062e9
--- /dev/null
+++ b/shaders-msl-no-opt/tese/builtin-input-automatic-attribute-assignment.tese
@@ -0,0 +1,10 @@
+#version 450
+layout(quads) in;
+
+layout(location = 0) patch in vec4 FragColor;
+layout(location = 2) in vec4 FragColors[];
+
+void main()
+{
+	gl_Position = vec4(1.0) + FragColor + FragColors[0] + FragColors[1] + gl_TessLevelInner[0] + gl_TessLevelOuter[gl_PrimitiveID & 1] + gl_in[0].gl_Position;
+}
diff --git a/shaders-msl-no-opt/tese/load-clip-cull.msl2.tese b/shaders-msl-no-opt/tese/load-clip-cull.msl2.tese
new file mode 100644
index 00000000000..e86619e1a02
--- /dev/null
+++ b/shaders-msl-no-opt/tese/load-clip-cull.msl2.tese
@@ -0,0 +1,19 @@
+#version 450
+layout(quads) in;
+
+in gl_PerVertex
+{
+	float gl_ClipDistance[2];
+	float gl_CullDistance[3];
+	vec4 gl_Position;
+} gl_in[];
+
+void main()
+{
+	gl_Position.x = gl_in[0].gl_ClipDistance[0];
+	gl_Position.y = gl_in[1].gl_CullDistance[0];
+	gl_Position.z = gl_in[0].gl_ClipDistance[1];
+	gl_Position.w = gl_in[1].gl_CullDistance[1];
+	gl_Position += gl_in[0].gl_Position;
+	gl_Position += gl_in[1].gl_Position;
+}
diff --git a/shaders-msl-no-opt/vert/cull-distance.for-tess.vert b/shaders-msl-no-opt/vert/cull-distance.for-tess.vert
new file mode 100644
index 00000000000..8df181cdb30
--- /dev/null
+++ b/shaders-msl-no-opt/vert/cull-distance.for-tess.vert
@@ -0,0 +1,10 @@
+#version 450
+
+out float gl_CullDistance[2];
+
+void main()
+{
+	gl_CullDistance[0] = 1.0;
+	gl_CullDistance[1] = 3.0;
+	gl_Position = vec4(1.0);
+}
diff --git a/shaders-msl/vert/layer.msl11.invalid.vert b/shaders-msl-no-opt/vert/layer.msl11.invalid.vert
similarity index 100%
rename from shaders-msl/vert/layer.msl11.invalid.vert
rename to shaders-msl-no-opt/vert/layer.msl11.invalid.vert
diff --git a/shaders-msl-no-opt/vert/modf-storage-class.capture.vert b/shaders-msl-no-opt/vert/modf-storage-class.capture.vert
new file mode 100644
index 00000000000..447c4975d93
--- /dev/null
+++ b/shaders-msl-no-opt/vert/modf-storage-class.capture.vert
@@ -0,0 +1,9 @@
+#version 450
+
+layout(location = 0) out vec4 f;
+layout(location = 0) in vec4 f2;
+
+void main()
+{
+	gl_Position = modf(f2, f);
+}
diff --git a/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert b/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert
new file mode 100644
index 00000000000..2c142a78105
--- /dev/null
+++ b/shaders-msl-no-opt/vert/pass-array-by-value.force-native-array.vert
@@ -0,0 +1,26 @@
+#version 310 es
+
+layout(location = 0) in int Index1;
+layout(location = 1) in int Index2;
+
+vec4 consume_constant_arrays2(const vec4 positions[4], const vec4 positions2[4])
+{
+	return positions[Index1] + positions2[Index2];
+}
+
+vec4 consume_constant_arrays(const vec4 positions[4], const vec4 positions2[4])
+{
+	return consume_constant_arrays2(positions, positions2);
+}
+
+const vec4 LUT1[] = vec4[](vec4(0.0), vec4(1.0), vec4(2.0), vec4(3.0));
+
+void main()
+{
+	vec4 LUT2[4];
+	LUT2[0] = vec4(10.0);
+	LUT2[1] = vec4(11.0);
+	LUT2[2] = vec4(12.0);
+	LUT2[3] = vec4(13.0);
+	gl_Position = consume_constant_arrays(LUT1, LUT2);
+}
diff --git a/shaders-msl-no-opt/vert/uninitialized-vertex-output.vert b/shaders-msl-no-opt/vert/uninitialized-vertex-output.vert
new file mode 100644
index 00000000000..54c7afd07aa
--- /dev/null
+++ b/shaders-msl-no-opt/vert/uninitialized-vertex-output.vert
@@ -0,0 +1,8 @@
+#version 450
+
+layout(location = 0) out vec4 Pos;
+
+void main()
+{
+	gl_Position = vec4(1.0);
+}
diff --git a/shaders-msl-no-opt/vert/unused-subgroup-builtin.msl22.vert b/shaders-msl-no-opt/vert/unused-subgroup-builtin.msl22.vert
new file mode 100644
index 00000000000..4ec228df294
--- /dev/null
+++ b/shaders-msl-no-opt/vert/unused-subgroup-builtin.msl22.vert
@@ -0,0 +1,7 @@
+#version 450
+#extension GL_KHR_shader_subgroup_ballot : require
+
+void main()
+{
+	gl_SubgroupEqMask;
+}
diff --git a/shaders-msl/vert/viewport-index.msl2.invalid.vert b/shaders-msl-no-opt/vert/viewport-index.msl2.invalid.vert
similarity index 100%
rename from shaders-msl/vert/viewport-index.msl2.invalid.vert
rename to shaders-msl-no-opt/vert/viewport-index.msl2.invalid.vert
diff --git a/shaders-msl/amd/shader_trinary_minmax.msl21.comp b/shaders-msl/amd/shader_trinary_minmax.msl21.comp
new file mode 100644
index 00000000000..f836146a172
--- /dev/null
+++ b/shaders-msl/amd/shader_trinary_minmax.msl21.comp
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_AMD_shader_trinary_minmax : require
+
+layout (local_size_x = 64) in;
+
+void main ()
+{
+    int t11 = min3(0, 3, 2);
+    int t12 = max3(0, 3, 2);
+    int t13 = mid3(0, 3, 2);
+}
diff --git a/shaders-msl/asm/comp/bitcast_icmp.asm.comp b/shaders-msl/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000000..b7b4e0b2e1e
--- /dev/null
+++ b/shaders-msl/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,101 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+			   OpDecorate %inputs Restrict
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+			   OpDecorate %outputs Restrict
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+		  %bool = OpTypeBool
+		  %bvec4 = OpTypeVector %bool 4
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+		 %uzero = OpConstant %uint 0
+		 %uone = OpConstant %uint 1
+		 %utrue = OpConstantComposite %uvec4 %uone %uone %uone %uone
+		 %ufalse = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+
+         %result_slt = OpSLessThan %bvec4 %input0 %input1
+         %result_sle = OpSLessThanEqual %bvec4 %input0 %input1
+         %result_ult = OpULessThan %bvec4 %input0 %input1
+         %result_ule = OpULessThanEqual %bvec4 %input0 %input1
+         %result_sgt = OpSGreaterThan %bvec4 %input0 %input1
+         %result_sge = OpSGreaterThanEqual %bvec4 %input0 %input1
+         %result_ugt = OpUGreaterThan %bvec4 %input0 %input1
+         %result_uge = OpUGreaterThanEqual %bvec4 %input0 %input1
+
+		 %int_slt = OpSelect %uvec4 %result_slt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_slt
+
+		 %int_sle = OpSelect %uvec4 %result_sle %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sle
+
+		 %int_ult = OpSelect %uvec4 %result_ult %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ult
+
+		 %int_ule = OpSelect %uvec4 %result_ule %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ule
+
+		 %int_sgt = OpSelect %uvec4 %result_sgt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sgt
+
+		 %int_sge = OpSelect %uvec4 %result_sge %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sge
+
+		 %int_ugt = OpSelect %uvec4 %result_ugt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ugt
+
+		 %int_uge = OpSelect %uvec4 %result_uge %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_uge
+
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp b/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp
new file mode 100644
index 00000000000..b01262f5bd4
--- /dev/null
+++ b/shaders-msl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp
@@ -0,0 +1,43 @@
+OpCapability Shader
+OpExtension "SPV_KHR_storage_buffer_storage_class"
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %main "main" %var_id
+OpExecutionMode %main LocalSize 1 1 1
+OpDecorate %var_id BuiltIn GlobalInvocationId
+OpDecorate %var_input Binding 0
+OpDecorate %var_input DescriptorSet 0
+OpDecorate %var_outdata Binding 1
+OpDecorate %var_outdata DescriptorSet 0
+OpMemberDecorate %type_container_struct 0 Offset 0
+OpMemberDecorate %type_container_struct 1 Offset 4
+OpMemberDecorate %type_container_struct 2 Offset 8
+OpMemberDecorate %type_container_struct 3 Offset 12
+OpDecorate %type_container_struct Block
+%bool      = OpTypeBool
+%void      = OpTypeVoid
+%voidf     = OpTypeFunction %void
+%u32       = OpTypeInt 32 0
+%i32       = OpTypeInt 32 1
+%f32       = OpTypeFloat 32
+%uvec3     = OpTypeVector %u32 3
+%fvec3     = OpTypeVector %f32 3
+%uvec3ptr  = OpTypePointer Input %uvec3
+%i32ptr    = OpTypePointer Uniform %i32
+%f32ptr    = OpTypePointer Uniform %f32
+%i32arr    = OpTypeRuntimeArray %i32
+%f32arr    = OpTypeRuntimeArray %f32
+%type_empty_struct					= OpTypeStruct
+%type_container_struct				= OpTypeStruct %i32 %type_empty_struct %type_empty_struct %i32
+%type_container_struct_ubo_ptr		= OpTypePointer Uniform %type_container_struct
+%type_container_struct_ssbo_ptr	= OpTypePointer StorageBuffer %type_container_struct
+%var_id							= OpVariable %uvec3ptr Input
+%var_input						= OpVariable %type_container_struct_ssbo_ptr StorageBuffer
+%var_outdata					= OpVariable %type_container_struct_ssbo_ptr StorageBuffer
+
+%main								= OpFunction %void None %voidf
+%label								= OpLabel
+%input_copy					= OpCopyObject %type_container_struct_ssbo_ptr %var_input
+%result						= OpLoad %type_container_struct %input_copy
+OpStore %var_outdata %result
+OpReturn
+OpFunctionEnd
diff --git a/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp b/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp
new file mode 100644
index 00000000000..63df59ac328
--- /dev/null
+++ b/shaders-msl/asm/comp/copy-object-ubo-to-ssbo.asm.comp
@@ -0,0 +1,43 @@
+OpCapability Shader
+OpExtension "SPV_KHR_storage_buffer_storage_class"
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %main "main" %var_id
+OpExecutionMode %main LocalSize 1 1 1
+OpDecorate %var_id BuiltIn GlobalInvocationId
+OpDecorate %var_input Binding 0
+OpDecorate %var_input DescriptorSet 0
+OpDecorate %var_outdata Binding 1
+OpDecorate %var_outdata DescriptorSet 0
+OpMemberDecorate %type_container_struct 0 Offset 0
+OpMemberDecorate %type_container_struct 1 Offset 16
+OpMemberDecorate %type_container_struct 2 Offset 32
+OpMemberDecorate %type_container_struct 3 Offset 48
+OpDecorate %type_container_struct Block
+%bool      = OpTypeBool
+%void      = OpTypeVoid
+%voidf     = OpTypeFunction %void
+%u32       = OpTypeInt 32 0
+%i32       = OpTypeInt 32 1
+%f32       = OpTypeFloat 32
+%uvec3     = OpTypeVector %u32 3
+%fvec3     = OpTypeVector %f32 3
+%uvec3ptr  = OpTypePointer Input %uvec3
+%i32ptr    = OpTypePointer Uniform %i32
+%f32ptr    = OpTypePointer Uniform %f32
+%i32arr    = OpTypeRuntimeArray %i32
+%f32arr    = OpTypeRuntimeArray %f32
+%type_empty_struct					= OpTypeStruct
+%type_container_struct				= OpTypeStruct %i32 %type_empty_struct %type_empty_struct %i32
+%type_container_struct_ubo_ptr		= OpTypePointer Uniform %type_container_struct
+%type_container_struct_ssbo_ptr	= OpTypePointer StorageBuffer %type_container_struct
+%var_id							= OpVariable %uvec3ptr Input
+%var_input						= OpVariable %type_container_struct_ubo_ptr Uniform
+%var_outdata					= OpVariable %type_container_struct_ssbo_ptr StorageBuffer
+
+%main								= OpFunction %void None %voidf
+%label								= OpLabel
+%input_copy					= OpCopyObject %type_container_struct_ubo_ptr %var_input
+%result						= OpLoad %type_container_struct %input_copy
+OpStore %var_outdata %result
+OpReturn
+OpFunctionEnd
diff --git a/shaders-msl/asm/comp/image-load-store-short-vector.asm.comp b/shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp
similarity index 100%
rename from shaders-msl/asm/comp/image-load-store-short-vector.asm.comp
rename to shaders-msl/asm/comp/image-load-store-short-vector.invalid.asm.comp
diff --git a/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp b/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp
new file mode 100644
index 00000000000..65a7eedd90b
--- /dev/null
+++ b/shaders-msl/asm/comp/op-spec-constant-op-vector-related.asm.comp
@@ -0,0 +1,107 @@
+OpCapability Shader
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %main "main" %id 
+OpExecutionMode %main LocalSize 1 1 1
+OpName %main           "main"
+OpName %id             "gl_GlobalInvocationID"
+OpDecorate %id BuiltIn GlobalInvocationId
+OpDecorate %sc_0  SpecId 0
+OpDecorate %sc_1  SpecId 1
+OpDecorate %sc_2  SpecId 2
+OpDecorate %i32arr ArrayStride 4
+OpDecorate %buf BufferBlock
+OpDecorate %indata DescriptorSet 0
+OpDecorate %indata Binding 0
+OpDecorate %outdata DescriptorSet 0
+OpDecorate %outdata Binding 1
+OpDecorate %f32arr ArrayStride 4
+OpMemberDecorate %buf 0 Offset 0
+%bool      = OpTypeBool
+%void      = OpTypeVoid
+%voidf     = OpTypeFunction %void
+%u32       = OpTypeInt 32 0
+%i32       = OpTypeInt 32 1
+%f32       = OpTypeFloat 32
+%uvec3     = OpTypeVector %u32 3
+%fvec3     = OpTypeVector %f32 3
+%uvec3ptr  = OpTypePointer Input %uvec3
+%i32ptr    = OpTypePointer Uniform %i32
+%f32ptr    = OpTypePointer Uniform %f32
+%i32arr    = OpTypeRuntimeArray %i32
+%f32arr    = OpTypeRuntimeArray %f32
+%ivec3       = OpTypeVector %i32 3
+%zero        = OpConstant %i32 0
+%one         = OpConstant %i32 1
+%two         = OpConstant %i32 2
+%three       = OpConstant %i32 3
+%iarr3       = OpTypeArray %i32 %three
+%imat3       = OpTypeArray %iarr3 %three
+%struct      = OpTypeStruct %imat3
+%buf         = OpTypeStruct %i32arr
+%bufptr      = OpTypePointer Uniform %buf
+%indata      = OpVariable %bufptr Uniform
+%outdata     = OpVariable %bufptr Uniform
+%id          = OpVariable %uvec3ptr Input
+%ivec3_0     = OpConstantComposite %ivec3 %zero %zero %zero
+%vec3_undef  = OpUndef %ivec3
+%iarr3_0     = OpConstantComposite %iarr3 %zero %zero %zero
+%imat3_0     = OpConstantComposite %imat3 %iarr3_0 %iarr3_0 %iarr3_0
+%struct_0    = OpConstantComposite %struct %imat3_0
+%sc_0        = OpSpecConstant %i32 0
+%sc_1        = OpSpecConstant %i32 0
+%sc_2        = OpSpecConstant %i32 0
+%iarr3_a     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_0     0
+%iarr3_b     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_a     1
+%iarr3_c     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_b     2
+%iarr3_d     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_0     0
+%iarr3_e     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_d     1
+%iarr3_f     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_e     2
+%iarr3_g     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_0     0
+%iarr3_h     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_g     1
+%iarr3_i     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_h     2
+%imat3_a     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_c     %imat3_0     0
+%imat3_b     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_f     %imat3_a     1
+%imat3_c     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_i     %imat3_b     2
+%struct_a    = OpSpecConstantOp %struct CompositeInsert  %imat3_c     %struct_0    0
+%struct_b    = OpSpecConstantOp %struct CompositeInsert  %sc_2        %struct_a    0 1 2
+%comp_0_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 0 0
+%comp_1_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 1 0
+%comp_0_1    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 0 1
+%comp_2_2    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 2 2
+%comp_2_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 2 0
+%comp_1_1    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 1 1
+%cmpres_0    = OpSpecConstantOp %bool   IEqual %comp_0_0 %comp_1_0
+%cmpres_1    = OpSpecConstantOp %bool   IEqual %comp_0_1 %comp_2_2
+%cmpres_2    = OpSpecConstantOp %bool   IEqual %comp_2_0 %comp_1_1
+%mustbe_0    = OpSpecConstantOp %i32    Select %cmpres_0 %one %zero
+%mustbe_1    = OpSpecConstantOp %i32    Select %cmpres_1 %one %zero
+%mustbe_2    = OpSpecConstantOp %i32    Select %cmpres_2 %two %one
+%sc_vec3_0   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_0        %ivec3_0     0
+%sc_vec3_1   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_1        %ivec3_0     1
+%sc_vec3_2   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_2        %ivec3_0     2
+%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2
+%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0
+%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5
+%sc_vec3_01  = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4
+%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2
+%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0
+%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1
+%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2
+%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1
+%sc_factor   = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2
+%main      = OpFunction %void None %voidf
+%label     = OpLabel
+%subf_a      = OpISub %i32 %one %mustbe_0
+%subf_b      = OpIMul %i32 %subf_a %mustbe_1
+%subf_c      = OpISub %i32 %mustbe_2 %one
+%factor      = OpIMul %i32 %subf_b %subf_c
+%sc_final    = OpIMul %i32 %factor %sc_factor
+%idval     = OpLoad %uvec3 %id
+%x         = OpCompositeExtract %u32 %idval 0
+%inloc     = OpAccessChain %i32ptr %indata %zero %x
+%inval     = OpLoad %i32 %inloc
+%final     = OpIAdd %i32 %inval %sc_final
+%outloc    = OpAccessChain %i32ptr %outdata %zero %x
+             OpStore %outloc %final
+             OpReturn
+             OpFunctionEnd
diff --git a/shaders-msl/asm/comp/uint_smulextended.asm.comp b/shaders-msl/asm/comp/uint_smulextended.asm.comp
new file mode 100644
index 00000000000..32d483636a2
--- /dev/null
+++ b/shaders-msl/asm/comp/uint_smulextended.asm.comp
@@ -0,0 +1,61 @@
+                         OpCapability Shader
+
+                         OpMemoryModel Logical GLSL450
+                         OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationId
+                         OpExecutionMode %main LocalSize 1 1 1
+
+                         OpDecorate %gl_GlobalInvocationId BuiltIn GlobalInvocationId
+                         OpDecorate %ra_uint ArrayStride 4
+                         OpDecorate %struct_uint4 BufferBlock
+                         OpMemberDecorate %struct_uint4 0 Offset 0
+                         OpDecorate %input0 DescriptorSet 0
+                         OpDecorate %input0 Binding 0
+                         OpDecorate %input1 DescriptorSet 0
+                         OpDecorate %input1 Binding 1
+                         OpDecorate %output0 DescriptorSet 0
+                         OpDecorate %output0 Binding 2
+                         OpDecorate %output1 DescriptorSet 0
+                         OpDecorate %output1 Binding 3
+
+                 %uint = OpTypeInt 32 0
+             %ptr_uint = OpTypePointer Uniform %uint
+       %ptr_input_uint = OpTypePointer Input %uint
+                %uint3 = OpTypeVector %uint 3
+      %ptr_input_uint3 = OpTypePointer Input %uint3
+                 %void = OpTypeVoid
+               %voidFn = OpTypeFunction %void
+
+               %uint_0 = OpConstant %uint 0
+               %uint_1 = OpConstant %uint 1
+              %ra_uint = OpTypeRuntimeArray %uint
+                %uint4 = OpTypeVector %uint 4
+         %struct_uint4 = OpTypeStruct %ra_uint
+     %ptr_struct_uint4 = OpTypePointer Uniform %struct_uint4
+           %resulttype = OpTypeStruct %uint %uint
+%gl_GlobalInvocationId = OpVariable %ptr_input_uint3 Input
+               %input0 = OpVariable %ptr_struct_uint4 Uniform
+               %input1 = OpVariable %ptr_struct_uint4 Uniform
+
+              %output0 = OpVariable %ptr_struct_uint4 Uniform
+              %output1 = OpVariable %ptr_struct_uint4 Uniform
+
+                 %main = OpFunction %void None %voidFn
+            %mainStart = OpLabel
+            %index_ptr = OpAccessChain %ptr_input_uint %gl_GlobalInvocationId %uint_0
+                %index = OpLoad %uint %index_ptr
+              %in_ptr0 = OpAccessChain %ptr_uint %input0 %uint_0 %index
+             %invalue0 = OpLoad %uint %in_ptr0
+              %in_ptr1 = OpAccessChain %ptr_uint %input1 %uint_0 %index
+             %invalue1 = OpLoad %uint %in_ptr1
+
+             %outvalue = OpSMulExtended %resulttype %invalue0 %invalue1
+            %outvalue0 = OpCompositeExtract %uint %outvalue 0
+             %out_ptr0 = OpAccessChain %ptr_uint %output0 %uint_0 %index
+                         OpStore %out_ptr0 %outvalue0
+            %outvalue1 = OpCompositeExtract %uint %outvalue 1
+             %out_ptr1 = OpAccessChain %ptr_uint %output1 %uint_0 %index
+                         OpStore %out_ptr1 %outvalue1
+
+
+                         OpReturn
+                         OpFunctionEnd
diff --git a/shaders-msl/asm/comp/undefined-constant-composite.asm.comp b/shaders-msl/asm/comp/undefined-constant-composite.asm.comp
new file mode 100644
index 00000000000..9de0501fe21
--- /dev/null
+++ b/shaders-msl/asm/comp/undefined-constant-composite.asm.comp
@@ -0,0 +1,102 @@
+;
+; The shader below is based on the following GLSL shader:
+;
+;     #version 450
+;
+;     struct Pair {
+;         int first;
+;         int second;
+;     };
+;
+;     const Pair constant_pair = { 100, 200 };
+;
+;     layout(set=0, binding=0, std430) buffer InputBlock {
+;         int array[10];
+;     } inputValues;
+;
+;     layout(set=0, binding=1, std430) buffer OutputBlock {
+;         int array[10];
+;     } outputValues;
+;
+;     int add_second (int value, Pair pair) {
+;         return value + pair.second;
+;     }
+;
+;     void main() {
+;         uint idx = gl_GlobalInvocationID.x;
+;         outputValues.array[idx] = add_second(inputValues.array[idx], constant_pair);
+;     }
+;
+; However, the first element of constant_pair has been modified to be undefined.
+;
+                            OpCapability Shader
+                  %std450 = OpExtInstImport "GLSL.std.450"
+                            OpMemoryModel Logical GLSL450
+                            OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+                            OpExecutionMode %main LocalSize 1 1 1
+                            OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+                            OpDecorate %_arr_int_uint_10 ArrayStride 4
+                            OpMemberDecorate %OutputBlock 0 Offset 0
+                            OpDecorate %OutputBlock BufferBlock
+                            OpDecorate %outputValues DescriptorSet 0
+                            OpDecorate %outputValues Binding 1
+                            OpMemberDecorate %InputBlock 0 Offset 0
+                            OpDecorate %InputBlock BufferBlock
+                            OpDecorate %inputValues DescriptorSet 0
+                            OpDecorate %inputValues Binding 0
+                    %void = OpTypeVoid
+               %void_func = OpTypeFunction %void
+                     %int = OpTypeInt 32 1
+                    %uint = OpTypeInt 32 0
+                  %v3uint = OpTypeVector %uint 3
+                   %int_0 = OpConstant %int 0
+                   %int_1 = OpConstant %int 1
+                 %int_200 = OpConstant %int 200
+                  %uint_0 = OpConstant %uint 0
+                 %uint_10 = OpConstant %uint 10
+       %_ptr_Function_int = OpTypePointer Function %int
+                    %Pair = OpTypeStruct %int %int
+      %_ptr_Function_Pair = OpTypePointer Function %Pair
+    %add_second_func_type = OpTypeFunction %int %_ptr_Function_int %_ptr_Function_Pair
+      %_ptr_Function_uint = OpTypePointer Function %uint
+       %_ptr_Input_v3uint = OpTypePointer Input %v3uint
+         %_ptr_Input_uint = OpTypePointer Input %uint
+        %_arr_int_uint_10 = OpTypeArray %int %uint_10
+             %OutputBlock = OpTypeStruct %_arr_int_uint_10
+%_ptr_Uniform_OutputBlock = OpTypePointer Uniform %OutputBlock
+            %outputValues = OpVariable %_ptr_Uniform_OutputBlock Uniform
+              %InputBlock = OpTypeStruct %_arr_int_uint_10
+ %_ptr_Uniform_InputBlock = OpTypePointer Uniform %InputBlock
+             %inputValues = OpVariable %_ptr_Uniform_InputBlock Uniform
+                            ; Replaced %int_100 with an undefined int.
+               %undef_int = OpUndef %int
+                            ; Composed a constant Pair with the undefined int in the first member.
+              %const_Pair = OpConstantComposite %Pair %undef_int %int_200
+        %_ptr_Uniform_int = OpTypePointer Uniform %int
+   %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+                    %main = OpFunction %void None %void_func
+              %main_label = OpLabel
+                 %param_1 = OpVariable %_ptr_Function_int Function
+                 %param_2 = OpVariable %_ptr_Function_Pair Function
+                %gidx_ptr = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+                    %gidx = OpLoad %uint %gidx_ptr
+         %input_value_ptr = OpAccessChain %_ptr_Uniform_int %inputValues %int_0 %gidx
+             %input_value = OpLoad %int %input_value_ptr
+                            OpStore %param_1 %input_value
+                            OpStore %param_2 %const_Pair
+                  %retval = OpFunctionCall %int %add_second %param_1 %param_2
+        %output_value_ptr = OpAccessChain %_ptr_Uniform_int %outputValues %int_0 %gidx
+                            OpStore %output_value_ptr %retval
+                            OpReturn
+                            OpFunctionEnd
+              %add_second = OpFunction %int None %add_second_func_type
+               %value_ptr = OpFunctionParameter %_ptr_Function_int
+                    %pair = OpFunctionParameter %_ptr_Function_Pair
+        %add_second_label = OpLabel
+                   %value = OpLoad %int %value_ptr
+                            ; Access the second struct member, which is defined.
+         %pair_second_ptr = OpAccessChain %_ptr_Function_int %pair %int_1
+             %pair_second = OpLoad %int %pair_second_ptr
+              %add_result = OpIAdd %int %value %pair_second
+                            OpReturnValue %add_result
+                            OpFunctionEnd
diff --git a/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp b/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp
new file mode 100644
index 00000000000..d89a402bf5f
--- /dev/null
+++ b/shaders-msl/asm/comp/undefined-spec-constant-composite.asm.comp
@@ -0,0 +1,122 @@
+;
+; The shader below is based on the following GLSL shader:
+;
+;     #version 450
+;
+;     struct Pair {
+;         int first;
+;         int second;
+;     };
+;
+;     const Pair constant_pair = { 100, 200 };
+;
+;     layout (constant_id=0) const int constantFirst = 0;
+;
+;     Pair spec_constant_pair = { constantFirst, 200 };
+;
+;     layout(set=0, binding=0, std430) buffer InputBlock {
+;         int array[10];
+;     } inputValues;
+;
+;     layout(set=0, binding=1, std430) buffer OutputBlock {
+;         int array[10];
+;     } outputValues;
+;
+;     int add_first_and_second (int value, Pair p1, Pair p2) {
+;         return value + p1.first + p2.second;
+;     }
+;
+;     void main() {
+;         uint idx = gl_GlobalInvocationID.x;
+;         outputValues.array[idx] = add_first_and_second(inputValues.array[idx], spec_constant_pair, constant_pair);
+;     }
+;
+; However, both the constant_pair and the spec_constant_pair have one of their members replaced by undefined values.
+;
+                              OpCapability Shader
+                    %std450 = OpExtInstImport "GLSL.std.450"
+                              OpMemoryModel Logical GLSL450
+                              OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+                              OpExecutionMode %main LocalSize 1 1 1
+                              OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+                              OpDecorate %_arr_int_uint_10 ArrayStride 4
+                              OpMemberDecorate %OutputBlock 0 Offset 0
+                              OpDecorate %OutputBlock BufferBlock
+                              OpDecorate %outputValues DescriptorSet 0
+                              OpDecorate %outputValues Binding 1
+                              OpMemberDecorate %InputBlock 0 Offset 0
+                              OpDecorate %InputBlock BufferBlock
+                              OpDecorate %inputValues DescriptorSet 0
+                              OpDecorate %inputValues Binding 0
+                              OpDecorate %spec_constant SpecId 0
+                      %void = OpTypeVoid
+                 %void_func = OpTypeFunction %void
+                       %int = OpTypeInt 32 1
+                      %uint = OpTypeInt 32 0
+                    %v3uint = OpTypeVector %uint 3
+                     %int_0 = OpConstant %int 0
+                     %int_1 = OpConstant %int 1
+                   %int_200 = OpConstant %int 200
+                    %uint_0 = OpConstant %uint 0
+                   %uint_10 = OpConstant %uint 10
+         %_ptr_Function_int = OpTypePointer Function %int
+                      %Pair = OpTypeStruct %int %int
+        %_ptr_Function_Pair = OpTypePointer Function %Pair
+%add_pair_members_func_type = OpTypeFunction %int %_ptr_Function_int %_ptr_Function_Pair %_ptr_Function_Pair
+        %_ptr_Function_uint = OpTypePointer Function %uint
+         %_ptr_Input_v3uint = OpTypePointer Input %v3uint
+           %_ptr_Input_uint = OpTypePointer Input %uint
+          %_arr_int_uint_10 = OpTypeArray %int %uint_10
+               %OutputBlock = OpTypeStruct %_arr_int_uint_10
+  %_ptr_Uniform_OutputBlock = OpTypePointer Uniform %OutputBlock
+              %outputValues = OpVariable %_ptr_Uniform_OutputBlock Uniform
+                %InputBlock = OpTypeStruct %_arr_int_uint_10
+   %_ptr_Uniform_InputBlock = OpTypePointer Uniform %InputBlock
+               %inputValues = OpVariable %_ptr_Uniform_InputBlock Uniform
+                              ; Replaced %int_100 with an undefined int.
+                 %undef_int = OpUndef %int
+                              ; Composed a spec constant Pair with an undefined int in the second member.
+             %spec_constant = OpSpecConstant %int 0
+           %spec_const_Pair = OpSpecConstantComposite %Pair %spec_constant %undef_int
+                              ; Composed a constant Pair with the undefined int in the first member.
+                %const_Pair = OpConstantComposite %Pair %undef_int %int_200
+          %_ptr_Uniform_int = OpTypePointer Uniform %int
+     %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+                      %main = OpFunction %void None %void_func
+                %main_label = OpLabel
+                   %param_1 = OpVariable %_ptr_Function_int Function
+                   %param_2 = OpVariable %_ptr_Function_Pair Function
+                   %param_3 = OpVariable %_ptr_Function_Pair Function
+                  %gidx_ptr = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+                      %gidx = OpLoad %uint %gidx_ptr
+           %input_value_ptr = OpAccessChain %_ptr_Uniform_int %inputValues %int_0 %gidx
+               %input_value = OpLoad %int %input_value_ptr
+                              OpStore %param_1 %input_value
+                              OpStore %param_2 %spec_const_Pair
+                              OpStore %param_3 %const_Pair
+                              ; Pass the input value as the first argument.
+                              ; Pass the specialization constant Pair as the second argument.
+                              ; Pass the constant Pair as the third argument.
+                    %retval = OpFunctionCall %int %add_pair_members %param_1 %param_2 %param_3
+          %output_value_ptr = OpAccessChain %_ptr_Uniform_int %outputValues %int_0 %gidx
+                              OpStore %output_value_ptr %retval
+                              OpReturn
+                              OpFunctionEnd
+          %add_pair_members = OpFunction %int None %add_pair_members_func_type
+                 %value_ptr = OpFunctionParameter %_ptr_Function_int
+                    %pair_1 = OpFunctionParameter %_ptr_Function_Pair
+                    %pair_2 = OpFunctionParameter %_ptr_Function_Pair
+    %add_pair_members_label = OpLabel
+                     %value = OpLoad %int %value_ptr
+                              ; Access the first struct member from the first pair.
+                              ; Access the second struct member from the second pair.
+                              ; Both should be defined according to the function call above.
+          %pair_1_first_ptr = OpAccessChain %_ptr_Function_int %pair_1 %int_0
+         %pair_2_second_ptr = OpAccessChain %_ptr_Function_int %pair_2 %int_1
+              %pair_1_first = OpLoad %int %pair_1_first_ptr
+             %pair_2_second = OpLoad %int %pair_2_second_ptr
+            %partial_result = OpIAdd %int %value %pair_1_first
+              %final_result = OpIAdd %int %partial_result %pair_2_second
+                              OpReturnValue %final_result
+                              OpFunctionEnd
+
diff --git a/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag b/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag
new file mode 100644
index 00000000000..0be26d1c055
--- /dev/null
+++ b/shaders-msl/asm/frag/depth-image-color-format-fetch.asm.frag
@@ -0,0 +1,170 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 132
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %1 "main" %2 %3 %4
+               OpExecutionMode %1 OriginUpperLeft
+               OpDecorate %3 Location 0
+               OpDecorate %2 Location 1
+               OpDecorate %4 BuiltIn FragCoord
+               OpDecorate %5 ArrayStride 4
+               OpDecorate %6 ArrayStride 16
+               OpMemberDecorate %7 0 Offset 0
+               OpDecorate %7 BufferBlock
+               OpDecorate %8 DescriptorSet 0
+               OpDecorate %8 Binding 0
+               OpDecorate %9 DescriptorSet 0
+               OpDecorate %9 Binding 1
+               OpDecorate %10 DescriptorSet 0
+               OpDecorate %10 Binding 2
+         %11 = OpTypeVoid
+         %12 = OpTypeBool
+         %13 = OpTypeInt 32 1
+         %14 = OpTypeInt 32 0
+         %16 = OpTypeFloat 32
+         %17 = OpTypeVector %13 2
+         %18 = OpTypeVector %14 2
+         %19 = OpTypeVector %16 2
+         %20 = OpTypeVector %13 3
+         %21 = OpTypeVector %14 3
+         %22 = OpTypeVector %16 3
+         %23 = OpTypeVector %13 4
+         %24 = OpTypeVector %14 4
+         %25 = OpTypeVector %16 4
+         %26 = OpTypeVector %12 4
+         %27 = OpTypeFunction %25 %25
+         %28 = OpTypeFunction %12
+         %29 = OpTypeFunction %11
+         %30 = OpTypePointer Input %16
+         %31 = OpTypePointer Input %13
+         %32 = OpTypePointer Input %14
+         %33 = OpTypePointer Input %19
+         %34 = OpTypePointer Input %17
+         %35 = OpTypePointer Input %18
+         %38 = OpTypePointer Input %22
+         %40 = OpTypePointer Input %25
+         %41 = OpTypePointer Input %23
+         %42 = OpTypePointer Input %24
+         %43 = OpTypePointer Output %16
+         %44 = OpTypePointer Output %13
+         %45 = OpTypePointer Output %14
+         %46 = OpTypePointer Output %19
+         %47 = OpTypePointer Output %17
+         %48 = OpTypePointer Output %18
+         %49 = OpTypePointer Output %25
+         %50 = OpTypePointer Output %23
+         %51 = OpTypePointer Output %24
+         %52 = OpTypePointer Function %16
+         %53 = OpTypePointer Function %13
+         %54 = OpTypePointer Function %25
+         %55 = OpConstant %16 1
+         %56 = OpConstant %16 0
+         %57 = OpConstant %16 0.5
+         %58 = OpConstant %16 -1
+         %59 = OpConstant %16 7
+         %60 = OpConstant %16 8
+         %61 = OpConstant %13 0
+         %62 = OpConstant %13 1
+         %63 = OpConstant %13 2
+         %64 = OpConstant %13 3
+         %65 = OpConstant %13 4
+         %66 = OpConstant %14 0
+         %67 = OpConstant %14 1
+         %68 = OpConstant %14 2
+         %69 = OpConstant %14 3
+         %70 = OpConstant %14 32
+         %71 = OpConstant %14 4
+         %72 = OpConstant %14 2147483647
+         %73 = OpConstantComposite %25 %55 %55 %55 %55
+         %74 = OpConstantComposite %25 %55 %56 %56 %55
+         %75 = OpConstantComposite %25 %57 %57 %57 %57
+         %76 = OpTypeArray %16 %67
+         %77 = OpTypeArray %16 %68
+         %78 = OpTypeArray %25 %69
+         %79 = OpTypeArray %16 %71
+         %80 = OpTypeArray %25 %70
+         %81 = OpTypePointer Input %78
+         %82 = OpTypePointer Input %80
+         %83 = OpTypePointer Output %77
+         %84 = OpTypePointer Output %78
+         %85 = OpTypePointer Output %79
+          %4 = OpVariable %40 Input
+          %3 = OpVariable %49 Output
+          %2 = OpVariable %40 Input
+         %86 = OpConstant %14 64
+         %87 = OpConstant %13 64
+         %88 = OpConstant %13 8
+         %89 = OpConstantComposite %19 %60 %60
+          %5 = OpTypeArray %16 %86
+          %6 = OpTypeArray %25 %86
+         %90 = OpTypePointer Uniform %16
+         %91 = OpTypePointer Uniform %25
+          %7 = OpTypeStruct %6
+         %92 = OpTypePointer Uniform %7
+         %10 = OpVariable %92 Uniform
+         %93 = OpTypeImage %16 2D 1 0 0 1 Rgba32f
+         %94 = OpTypePointer UniformConstant %93
+          %8 = OpVariable %94 UniformConstant
+         %95 = OpTypeSampler
+         %96 = OpTypePointer UniformConstant %95
+          %9 = OpVariable %96 UniformConstant
+         %97 = OpTypeSampledImage %93
+         %98 = OpTypeFunction %11 %13
+          %1 = OpFunction %11 None %29
+         %99 = OpLabel
+        %100 = OpLoad %25 %2
+        %101 = OpFunctionCall %25 %102 %100
+               OpStore %3 %101
+               OpReturn
+               OpFunctionEnd
+        %103 = OpFunction %12 None %28
+        %104 = OpLabel
+        %105 = OpAccessChain %30 %4 %61
+        %106 = OpAccessChain %30 %4 %62
+        %107 = OpLoad %16 %105
+        %108 = OpLoad %16 %106
+        %109 = OpFOrdEqual %12 %107 %57
+        %110 = OpFOrdEqual %12 %108 %57
+        %111 = OpLogicalAnd %12 %109 %110
+               OpReturnValue %111
+               OpFunctionEnd
+        %112 = OpFunction %11 None %98
+        %113 = OpFunctionParameter %13
+        %114 = OpLabel
+        %115 = OpSRem %13 %113 %88
+        %116 = OpSDiv %13 %113 %88
+        %117 = OpCompositeConstruct %17 %115 %116
+        %118 = OpConvertSToF %19 %117
+        %119 = OpFDiv %19 %118 %89
+        %120 = OpLoad %93 %8
+        %121 = OpImageFetch %25 %120 %117
+         %36 = OpAccessChain %91 %10 %61 %113
+               OpStore %36 %121
+               OpReturn
+               OpFunctionEnd
+        %102 = OpFunction %25 None %27
+        %122 = OpFunctionParameter %25
+        %123 = OpLabel
+        %124 = OpVariable %53 Function
+               OpStore %124 %61
+               OpBranch %125
+        %125 = OpLabel
+         %15 = OpLoad %13 %124
+        %126 = OpSLessThan %12 %15 %87
+               OpLoopMerge %127 %128 None
+               OpBranchConditional %126 %129 %127
+        %129 = OpLabel
+        %130 = OpLoad %13 %124
+        %131 = OpFunctionCall %11 %112 %130
+               OpBranch %128
+        %128 = OpLabel
+         %37 = OpLoad %13 %124
+         %39 = OpIAdd %13 %37 %62
+               OpStore %124 %39
+               OpBranch %125
+        %127 = OpLabel
+               OpReturnValue %122
+               OpFunctionEnd
diff --git a/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag b/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag
new file mode 100644
index 00000000000..97e88b55a0a
--- /dev/null
+++ b/shaders-msl/asm/frag/depth-image-color-format-sampled.asm.frag
@@ -0,0 +1,173 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 134
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %1 "main" %2 %3 %4
+               OpExecutionMode %1 OriginUpperLeft
+               OpDecorate %3 Location 0
+               OpDecorate %2 Location 1
+               OpDecorate %4 BuiltIn FragCoord
+               OpDecorate %5 ArrayStride 4
+               OpDecorate %6 ArrayStride 16
+               OpMemberDecorate %7 0 Offset 0
+               OpDecorate %7 BufferBlock
+               OpDecorate %8 DescriptorSet 0
+               OpDecorate %8 Binding 0
+               OpDecorate %9 DescriptorSet 0
+               OpDecorate %9 Binding 1
+               OpDecorate %10 DescriptorSet 0
+               OpDecorate %10 Binding 2
+         %11 = OpTypeVoid
+         %12 = OpTypeBool
+         %13 = OpTypeInt 32 1
+         %14 = OpTypeInt 32 0
+         %16 = OpTypeFloat 32
+         %17 = OpTypeVector %13 2
+         %18 = OpTypeVector %14 2
+         %19 = OpTypeVector %16 2
+         %20 = OpTypeVector %13 3
+         %21 = OpTypeVector %14 3
+         %22 = OpTypeVector %16 3
+         %23 = OpTypeVector %13 4
+         %24 = OpTypeVector %14 4
+         %25 = OpTypeVector %16 4
+         %26 = OpTypeVector %12 4
+         %27 = OpTypeFunction %25 %25
+         %28 = OpTypeFunction %12
+         %29 = OpTypeFunction %11
+         %30 = OpTypePointer Input %16
+         %31 = OpTypePointer Input %13
+         %32 = OpTypePointer Input %14
+         %33 = OpTypePointer Input %19
+         %34 = OpTypePointer Input %17
+         %35 = OpTypePointer Input %18
+         %38 = OpTypePointer Input %22
+         %40 = OpTypePointer Input %25
+         %41 = OpTypePointer Input %23
+         %42 = OpTypePointer Input %24
+         %43 = OpTypePointer Output %16
+         %44 = OpTypePointer Output %13
+         %45 = OpTypePointer Output %14
+         %46 = OpTypePointer Output %19
+         %47 = OpTypePointer Output %17
+         %48 = OpTypePointer Output %18
+         %49 = OpTypePointer Output %25
+         %50 = OpTypePointer Output %23
+         %51 = OpTypePointer Output %24
+         %52 = OpTypePointer Function %16
+         %53 = OpTypePointer Function %13
+         %54 = OpTypePointer Function %25
+         %55 = OpConstant %16 1
+         %56 = OpConstant %16 0
+         %57 = OpConstant %16 0.5
+         %58 = OpConstant %16 -1
+         %59 = OpConstant %16 7
+         %60 = OpConstant %16 8
+         %61 = OpConstant %13 0
+         %62 = OpConstant %13 1
+         %63 = OpConstant %13 2
+         %64 = OpConstant %13 3
+         %65 = OpConstant %13 4
+         %66 = OpConstant %14 0
+         %67 = OpConstant %14 1
+         %68 = OpConstant %14 2
+         %69 = OpConstant %14 3
+         %70 = OpConstant %14 32
+         %71 = OpConstant %14 4
+         %72 = OpConstant %14 2147483647
+         %73 = OpConstantComposite %25 %55 %55 %55 %55
+         %74 = OpConstantComposite %25 %55 %56 %56 %55
+         %75 = OpConstantComposite %25 %57 %57 %57 %57
+         %76 = OpTypeArray %16 %67
+         %77 = OpTypeArray %16 %68
+         %78 = OpTypeArray %25 %69
+         %79 = OpTypeArray %16 %71
+         %80 = OpTypeArray %25 %70
+         %81 = OpTypePointer Input %78
+         %82 = OpTypePointer Input %80
+         %83 = OpTypePointer Output %77
+         %84 = OpTypePointer Output %78
+         %85 = OpTypePointer Output %79
+          %4 = OpVariable %40 Input
+          %3 = OpVariable %49 Output
+          %2 = OpVariable %40 Input
+         %86 = OpConstant %14 64
+         %87 = OpConstant %13 64
+         %88 = OpConstant %13 8
+         %89 = OpConstantComposite %19 %60 %60
+          %5 = OpTypeArray %16 %86
+          %6 = OpTypeArray %25 %86
+         %90 = OpTypePointer Uniform %16
+         %91 = OpTypePointer Uniform %25
+          %7 = OpTypeStruct %6
+         %92 = OpTypePointer Uniform %7
+         %10 = OpVariable %92 Uniform
+         %93 = OpTypeImage %16 2D 1 0 0 1 Rgba32f
+         %94 = OpTypePointer UniformConstant %93
+          %8 = OpVariable %94 UniformConstant
+         %95 = OpTypeSampler
+         %96 = OpTypePointer UniformConstant %95
+          %9 = OpVariable %96 UniformConstant
+         %97 = OpTypeSampledImage %93
+         %98 = OpTypeFunction %11 %13
+          %1 = OpFunction %11 None %29
+         %99 = OpLabel
+        %100 = OpLoad %25 %2
+        %101 = OpFunctionCall %25 %102 %100
+               OpStore %3 %101
+               OpReturn
+               OpFunctionEnd
+        %103 = OpFunction %12 None %28
+        %104 = OpLabel
+        %105 = OpAccessChain %30 %4 %61
+        %106 = OpAccessChain %30 %4 %62
+        %107 = OpLoad %16 %105
+        %108 = OpLoad %16 %106
+        %109 = OpFOrdEqual %12 %107 %57
+        %110 = OpFOrdEqual %12 %108 %57
+        %111 = OpLogicalAnd %12 %109 %110
+               OpReturnValue %111
+               OpFunctionEnd
+        %112 = OpFunction %11 None %98
+        %113 = OpFunctionParameter %13
+        %114 = OpLabel
+        %115 = OpSRem %13 %113 %88
+        %116 = OpSDiv %13 %113 %88
+        %117 = OpCompositeConstruct %17 %115 %116
+        %118 = OpConvertSToF %19 %117
+        %119 = OpFDiv %19 %118 %89
+        %120 = OpLoad %93 %8
+        %121 = OpLoad %95 %9
+        %122 = OpSampledImage %97 %120 %121
+        %123 = OpImageSampleExplicitLod %25 %122 %119 Lod %56
+         %36 = OpAccessChain %91 %10 %61 %113
+               OpStore %36 %123
+               OpReturn
+               OpFunctionEnd
+        %102 = OpFunction %25 None %27
+        %124 = OpFunctionParameter %25
+        %125 = OpLabel
+        %126 = OpVariable %53 Function
+               OpStore %126 %61
+               OpBranch %127
+        %127 = OpLabel
+         %15 = OpLoad %13 %126
+        %128 = OpSLessThan %12 %15 %87
+               OpLoopMerge %129 %130 None
+               OpBranchConditional %128 %131 %129
+        %131 = OpLabel
+        %132 = OpLoad %13 %126
+        %133 = OpFunctionCall %11 %112 %132
+               OpBranch %130
+        %130 = OpLabel
+         %37 = OpLoad %13 %126
+         %39 = OpIAdd %13 %37 %62
+               OpStore %126 %39
+               OpBranch %127
+        %129 = OpLabel
+               OpReturnValue %124
+               OpFunctionEnd
+
diff --git a/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag b/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag
new file mode 100644
index 00000000000..02d018267a3
--- /dev/null
+++ b/shaders-msl/asm/frag/disable-renamed-output.frag-output.asm.frag
@@ -0,0 +1,83 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+               OpCapability StencilExportEXT
+               OpExtension "SPV_EXT_shader_stencil_export"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %o0 %o1 %o2 %o3 %o4 %o5 %o6 %o7 %oDepth %oStencil
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main DepthReplacing
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_shader_stencil_export"
+               OpName %main "main"
+               OpName %o0 "o0"
+               OpName %o1 "o1"
+               OpName %o2 "o2"
+               OpName %o3 "o3"
+               OpName %o4 "o4"
+               OpName %o5 "o5"
+               OpName %o6 "o6"
+               OpName %o7 "o7"
+               OpName %oDepth "oDepth"
+               OpName %oStencil "oStencil"
+               OpDecorate %o0 Location 0
+               OpDecorate %o1 Location 1
+               OpDecorate %o2 Location 2
+               OpDecorate %o3 Location 3
+               OpDecorate %o4 Location 4
+               OpDecorate %o5 Location 5
+               OpDecorate %o6 Location 6
+               OpDecorate %o7 Location 7
+               OpDecorate %oDepth BuiltIn FragDepth
+               OpDecorate %oStencil BuiltIn FragStencilRefEXT
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+         %o0 = OpVariable %_ptr_Output_v4float Output
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+         %12 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
+         %o1 = OpVariable %_ptr_Output_v4float Output
+         %14 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1
+         %o2 = OpVariable %_ptr_Output_v4float Output
+         %16 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1
+         %o3 = OpVariable %_ptr_Output_v4float Output
+         %18 = OpConstantComposite %v4float %float_0 %float_0 %float_1 %float_1
+         %o4 = OpVariable %_ptr_Output_v4float Output
+  %float_0_5 = OpConstant %float 0.5
+         %21 = OpConstantComposite %v4float %float_1 %float_0 %float_1 %float_0_5
+         %o5 = OpVariable %_ptr_Output_v4float Output
+ %float_0_25 = OpConstant %float 0.25
+         %24 = OpConstantComposite %v4float %float_0_25 %float_0_25 %float_0_25 %float_0_25
+         %o6 = OpVariable %_ptr_Output_v4float Output
+ %float_0_75 = OpConstant %float 0.75
+         %27 = OpConstantComposite %v4float %float_0_75 %float_0_75 %float_0_75 %float_0_75
+         %o7 = OpVariable %_ptr_Output_v4float Output
+         %29 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_float = OpTypePointer Output %float
+     %oDepth = OpVariable %_ptr_Output_float Output
+%float_0_899999976 = OpConstant %float 0.899999976
+        %int = OpTypeInt 32 1
+%_ptr_Output_int = OpTypePointer Output %int
+   %oStencil = OpVariable %_ptr_Output_int Output
+    %int_127 = OpConstant %int 127
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpStore %o0 %12
+               OpStore %o1 %14
+               OpStore %o2 %16
+               OpStore %o3 %18
+               OpStore %o4 %21
+               OpStore %o5 %24
+               OpStore %o6 %27
+               OpStore %o7 %29
+               OpStore %oDepth %float_0_899999976
+               OpStore %oStencil %int_127
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag b/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag
new file mode 100644
index 00000000000..9f03d77e3de
--- /dev/null
+++ b/shaders-msl/asm/frag/pull-model-interpolation.asm.msl23.frag
@@ -0,0 +1,425 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 325
+; Schema: 0
+               OpCapability Shader
+               OpCapability SampleRateShading
+               OpCapability InterpolationFunction
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %baz %a %s %foo %sid %bar %b %c
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %func_ "func("
+               OpName %FragColor "FragColor"
+               OpName %baz "baz"
+               OpName %a "a"
+               OpName %_ ""
+               OpMemberName %_ 0 "x"
+               OpMemberName %_ 1 "y"
+               OpMemberName %_ 2 "z"
+               OpMemberName %_ 3 "u"
+               OpMemberName %_ 4 "v"
+               OpMemberName %_ 5 "w"
+               OpName %s "s"
+               OpName %foo "foo"
+               OpName %sid "sid"
+               OpName %bar "bar"
+               OpName %b "b"
+               OpName %c "c"
+               OpDecorate %FragColor Location 0
+               OpDecorate %baz Sample
+               OpDecorate %baz Location 2
+               OpDecorate %a Location 4
+               OpDecorate %s Location 10
+               OpDecorate %foo NoPerspective
+               OpDecorate %foo Location 0
+               OpDecorate %sid Flat
+               OpDecorate %sid Location 3
+               OpDecorate %bar Centroid
+               OpDecorate %bar Location 1
+               OpDecorate %b Centroid
+               OpDecorate %b Location 6
+               OpDecorate %c Sample
+               OpDecorate %c Location 8
+               OpMemberDecorate %_ 1 Centroid
+               OpMemberDecorate %_ 1 NoPerspective
+               OpMemberDecorate %_ 2 Sample
+               OpMemberDecorate %_ 3 Centroid
+               OpMemberDecorate %_ 4 Sample
+               OpMemberDecorate %_ 4 NoPerspective
+       %void = OpTypeVoid
+         %15 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %baz = OpVariable %_ptr_Input_v2float Input
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Output_float = OpTypePointer Output %float
+     %uint_1 = OpConstant %uint 1
+        %int = OpTypeInt 32 1
+      %int_3 = OpConstant %int 3
+%float_n0_100000001 = OpConstant %float -0.100000001
+%float_0_100000001 = OpConstant %float 0.100000001
+         %30 = OpConstantComposite %v2float %float_n0_100000001 %float_0_100000001
+     %uint_2 = OpConstant %uint 2
+%_arr_v2float_uint_2 = OpTypeArray %v2float %uint_2
+%_ptr_Input__arr_v2float_uint_2 = OpTypePointer Input %_arr_v2float_uint_2
+          %a = OpVariable %_ptr_Input__arr_v2float_uint_2 Input
+      %int_1 = OpConstant %int 1
+      %int_0 = OpConstant %int 0
+      %int_2 = OpConstant %int 2
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_ptr_Input__arr_v4float_uint_2 = OpTypePointer Input %_arr_v4float_uint_2
+     %uint_3 = OpConstant %uint 3
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+          %_ = OpTypeStruct %v4float %v4float %v4float %_arr_v4float_uint_2 %_arr_v2float_uint_2 %_arr_float_uint_3
+%_ptr_Input__ = OpTypePointer Input %_
+          %s = OpVariable %_ptr_Input__ Input
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+        %foo = OpVariable %_ptr_Input_v4float Input
+%_ptr_Input_int = OpTypePointer Input %int
+        %sid = OpVariable %_ptr_Input_int Input
+         %44 = OpConstantComposite %v2float %float_0_100000001 %float_0_100000001
+    %v3float = OpTypeVector %float 3
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+        %bar = OpVariable %_ptr_Input_v3float Input
+         %47 = OpConstantComposite %v2float %float_n0_100000001 %float_n0_100000001
+          %b = OpVariable %_ptr_Input__arr_v2float_uint_2 Input
+          %c = OpVariable %_ptr_Input__arr_v2float_uint_2 Input
+      %int_4 = OpConstant %int 4
+      %int_5 = OpConstant %int 5
+       %main = OpFunction %void None %15
+         %50 = OpLabel
+         %51 = OpLoad %v4float %foo
+               OpStore %FragColor %51
+         %52 = OpExtInst %v4float %1 InterpolateAtCentroid %foo
+         %53 = OpLoad %v4float %FragColor
+         %54 = OpFAdd %v4float %53 %52
+               OpStore %FragColor %54
+         %55 = OpLoad %int %sid
+         %56 = OpExtInst %v4float %1 InterpolateAtSample %foo %55
+         %57 = OpLoad %v4float %FragColor
+         %58 = OpFAdd %v4float %57 %56
+               OpStore %FragColor %58
+         %59 = OpExtInst %v4float %1 InterpolateAtOffset %foo %44
+         %60 = OpLoad %v4float %FragColor
+         %61 = OpFAdd %v4float %60 %59
+               OpStore %FragColor %61
+         %62 = OpLoad %v3float %bar
+         %63 = OpLoad %v4float %FragColor
+         %64 = OpVectorShuffle %v3float %63 %63 0 1 2
+         %65 = OpFAdd %v3float %64 %62
+         %66 = OpLoad %v4float %FragColor
+         %67 = OpVectorShuffle %v4float %66 %65 4 5 6 3
+               OpStore %FragColor %67
+         %68 = OpExtInst %v3float %1 InterpolateAtCentroid %bar
+         %69 = OpLoad %v4float %FragColor
+         %70 = OpVectorShuffle %v3float %69 %69 0 1 2
+         %71 = OpFAdd %v3float %70 %68
+         %72 = OpLoad %v4float %FragColor
+         %73 = OpVectorShuffle %v4float %72 %71 4 5 6 3
+               OpStore %FragColor %73
+         %74 = OpLoad %int %sid
+         %75 = OpExtInst %v3float %1 InterpolateAtSample %bar %74
+         %76 = OpLoad %v4float %FragColor
+         %77 = OpVectorShuffle %v3float %76 %76 0 1 2
+         %78 = OpFAdd %v3float %77 %75
+         %79 = OpLoad %v4float %FragColor
+         %80 = OpVectorShuffle %v4float %79 %78 4 5 6 3
+               OpStore %FragColor %80
+         %81 = OpExtInst %v3float %1 InterpolateAtOffset %bar %47
+         %82 = OpLoad %v4float %FragColor
+         %83 = OpVectorShuffle %v3float %82 %82 0 1 2
+         %84 = OpFAdd %v3float %83 %81
+         %85 = OpLoad %v4float %FragColor
+         %86 = OpVectorShuffle %v4float %85 %84 4 5 6 3
+               OpStore %FragColor %86
+         %87 = OpAccessChain %_ptr_Input_v2float %b %int_0
+         %88 = OpLoad %v2float %87
+         %89 = OpLoad %v4float %FragColor
+         %90 = OpVectorShuffle %v2float %89 %89 0 1
+         %91 = OpFAdd %v2float %90 %88
+         %92 = OpLoad %v4float %FragColor
+         %93 = OpVectorShuffle %v4float %92 %91 4 5 2 3
+               OpStore %FragColor %93
+         %94 = OpAccessChain %_ptr_Input_v2float %b %int_1
+         %95 = OpExtInst %v2float %1 InterpolateAtCentroid %94
+         %96 = OpLoad %v4float %FragColor
+         %97 = OpVectorShuffle %v2float %96 %96 0 1
+         %98 = OpFAdd %v2float %97 %95
+         %99 = OpLoad %v4float %FragColor
+        %100 = OpVectorShuffle %v4float %99 %98 4 5 2 3
+               OpStore %FragColor %100
+        %101 = OpAccessChain %_ptr_Input_v2float %b %int_0
+        %102 = OpExtInst %v2float %1 InterpolateAtSample %101 %int_2
+        %103 = OpLoad %v4float %FragColor
+        %104 = OpVectorShuffle %v2float %103 %103 0 1
+        %105 = OpFAdd %v2float %104 %102
+        %106 = OpLoad %v4float %FragColor
+        %107 = OpVectorShuffle %v4float %106 %105 4 5 2 3
+               OpStore %FragColor %107
+        %108 = OpAccessChain %_ptr_Input_v2float %b %int_1
+        %109 = OpExtInst %v2float %1 InterpolateAtOffset %108 %30
+        %110 = OpLoad %v4float %FragColor
+        %111 = OpVectorShuffle %v2float %110 %110 0 1
+        %112 = OpFAdd %v2float %111 %109
+        %113 = OpLoad %v4float %FragColor
+        %114 = OpVectorShuffle %v4float %113 %112 4 5 2 3
+               OpStore %FragColor %114
+        %115 = OpAccessChain %_ptr_Input_v2float %c %int_0
+        %116 = OpLoad %v2float %115
+        %117 = OpLoad %v4float %FragColor
+        %118 = OpVectorShuffle %v2float %117 %117 0 1
+        %119 = OpFAdd %v2float %118 %116
+        %120 = OpLoad %v4float %FragColor
+        %121 = OpVectorShuffle %v4float %120 %119 4 5 2 3
+               OpStore %FragColor %121
+        %122 = OpAccessChain %_ptr_Input_v2float %c %int_1
+        %123 = OpExtInst %v2float %1 InterpolateAtCentroid %122
+        %124 = OpVectorShuffle %v2float %123 %123 0 1
+        %125 = OpLoad %v4float %FragColor
+        %126 = OpVectorShuffle %v2float %125 %125 0 1
+        %127 = OpFAdd %v2float %126 %124
+        %128 = OpLoad %v4float %FragColor
+        %129 = OpVectorShuffle %v4float %128 %127 4 5 2 3
+               OpStore %FragColor %129
+        %130 = OpAccessChain %_ptr_Input_v2float %c %int_0
+        %131 = OpExtInst %v2float %1 InterpolateAtSample %130 %int_2
+        %132 = OpVectorShuffle %v2float %131 %131 1 0
+        %133 = OpLoad %v4float %FragColor
+        %134 = OpVectorShuffle %v2float %133 %133 0 1
+        %135 = OpFAdd %v2float %134 %132
+        %136 = OpLoad %v4float %FragColor
+        %137 = OpVectorShuffle %v4float %136 %135 4 5 2 3
+               OpStore %FragColor %137
+        %138 = OpAccessChain %_ptr_Input_v2float %c %int_1
+        %139 = OpExtInst %v2float %1 InterpolateAtOffset %138 %30
+        %140 = OpVectorShuffle %v2float %139 %139 0 0
+        %141 = OpLoad %v4float %FragColor
+        %142 = OpVectorShuffle %v2float %141 %141 0 1
+        %143 = OpFAdd %v2float %142 %140
+        %144 = OpLoad %v4float %FragColor
+        %145 = OpVectorShuffle %v4float %144 %143 4 5 2 3
+               OpStore %FragColor %145
+        %146 = OpAccessChain %_ptr_Input_v4float %s %int_0
+        %147 = OpLoad %v4float %146
+        %148 = OpLoad %v4float %FragColor
+        %149 = OpFAdd %v4float %148 %147
+               OpStore %FragColor %149
+        %150 = OpAccessChain %_ptr_Input_v4float %s %int_0
+        %151 = OpExtInst %v4float %1 InterpolateAtCentroid %150
+        %152 = OpLoad %v4float %FragColor
+        %153 = OpFAdd %v4float %152 %151
+               OpStore %FragColor %153
+        %154 = OpAccessChain %_ptr_Input_v4float %s %int_0
+        %155 = OpLoad %int %sid
+        %156 = OpExtInst %v4float %1 InterpolateAtSample %154 %155
+        %157 = OpLoad %v4float %FragColor
+        %158 = OpFAdd %v4float %157 %156
+               OpStore %FragColor %158
+        %159 = OpAccessChain %_ptr_Input_v4float %s %int_0
+        %160 = OpExtInst %v4float %1 InterpolateAtOffset %159 %44
+        %161 = OpLoad %v4float %FragColor
+        %162 = OpFAdd %v4float %161 %160
+               OpStore %FragColor %162
+        %163 = OpAccessChain %_ptr_Input_v4float %s %int_1
+        %164 = OpLoad %v4float %163
+        %165 = OpLoad %v4float %FragColor
+        %166 = OpFAdd %v4float %165 %164
+               OpStore %FragColor %166
+        %167 = OpAccessChain %_ptr_Input_v4float %s %int_1
+        %168 = OpExtInst %v4float %1 InterpolateAtCentroid %167
+        %169 = OpLoad %v4float %FragColor
+        %170 = OpFAdd %v4float %169 %168
+               OpStore %FragColor %170
+        %171 = OpAccessChain %_ptr_Input_v4float %s %int_1
+        %172 = OpLoad %int %sid
+        %173 = OpExtInst %v4float %1 InterpolateAtSample %171 %172
+        %174 = OpLoad %v4float %FragColor
+        %175 = OpFAdd %v4float %174 %173
+               OpStore %FragColor %175
+        %176 = OpAccessChain %_ptr_Input_v4float %s %int_1
+        %177 = OpExtInst %v4float %1 InterpolateAtOffset %176 %47
+        %178 = OpLoad %v4float %FragColor
+        %179 = OpFAdd %v4float %178 %177
+               OpStore %FragColor %179
+        %180 = OpAccessChain %_ptr_Input_v2float %s %int_4 %int_0
+        %181 = OpLoad %v2float %180
+        %182 = OpLoad %v4float %FragColor
+        %183 = OpVectorShuffle %v2float %182 %182 0 1
+        %184 = OpFAdd %v2float %183 %181
+        %185 = OpLoad %v4float %FragColor
+        %186 = OpVectorShuffle %v4float %185 %184 4 5 2 3
+               OpStore %FragColor %186
+        %187 = OpAccessChain %_ptr_Input_v2float %s %int_4 %int_1
+        %188 = OpExtInst %v2float %1 InterpolateAtCentroid %187
+        %189 = OpLoad %v4float %FragColor
+        %190 = OpVectorShuffle %v2float %189 %189 0 1
+        %191 = OpFAdd %v2float %190 %188
+        %192 = OpLoad %v4float %FragColor
+        %193 = OpVectorShuffle %v4float %192 %191 4 5 2 3
+               OpStore %FragColor %193
+        %194 = OpAccessChain %_ptr_Input_v2float %s %int_4 %int_0
+        %195 = OpExtInst %v2float %1 InterpolateAtSample %194 %int_2
+        %196 = OpLoad %v4float %FragColor
+        %197 = OpVectorShuffle %v2float %196 %196 0 1
+        %198 = OpFAdd %v2float %197 %195
+        %199 = OpLoad %v4float %FragColor
+        %200 = OpVectorShuffle %v4float %199 %198 4 5 2 3
+               OpStore %FragColor %200
+        %201 = OpAccessChain %_ptr_Input_v2float %s %int_4 %int_1
+        %202 = OpExtInst %v2float %1 InterpolateAtOffset %201 %30
+        %203 = OpLoad %v4float %FragColor
+        %204 = OpVectorShuffle %v2float %203 %203 0 1
+        %205 = OpFAdd %v2float %204 %202
+        %206 = OpLoad %v4float %FragColor
+        %207 = OpVectorShuffle %v4float %206 %205 4 5 2 3
+               OpStore %FragColor %207
+        %208 = OpAccessChain %_ptr_Input_float %s %int_5 %int_0
+        %209 = OpLoad %float %208
+        %210 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+        %211 = OpLoad %float %210
+        %212 = OpFAdd %float %211 %209
+        %213 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+               OpStore %213 %212
+        %214 = OpAccessChain %_ptr_Input_float %s %int_5 %int_1
+        %215 = OpExtInst %float %1 InterpolateAtCentroid %214
+        %216 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+        %217 = OpLoad %float %216
+        %218 = OpFAdd %float %217 %215
+        %219 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+               OpStore %219 %218
+        %220 = OpAccessChain %_ptr_Input_float %s %int_5 %int_0
+        %221 = OpExtInst %float %1 InterpolateAtSample %220 %int_2
+        %222 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+        %223 = OpLoad %float %222
+        %224 = OpFAdd %float %223 %221
+        %225 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+               OpStore %225 %224
+        %226 = OpAccessChain %_ptr_Input_float %s %int_5 %int_1
+        %227 = OpExtInst %float %1 InterpolateAtOffset %226 %30
+        %228 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+        %229 = OpLoad %float %228
+        %230 = OpFAdd %float %229 %227
+        %231 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+               OpStore %231 %230
+        %232 = OpFunctionCall %void %func_
+               OpReturn
+               OpFunctionEnd
+      %func_ = OpFunction %void None %15
+        %233 = OpLabel
+        %234 = OpLoad %v2float %baz
+        %235 = OpLoad %v4float %FragColor
+        %236 = OpVectorShuffle %v2float %235 %235 0 1
+        %237 = OpFAdd %v2float %236 %234
+        %238 = OpLoad %v4float %FragColor
+        %239 = OpVectorShuffle %v4float %238 %237 4 5 2 3
+               OpStore %FragColor %239
+        %240 = OpAccessChain %_ptr_Input_float %baz %uint_0
+        %241 = OpExtInst %float %1 InterpolateAtCentroid %240
+        %242 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+        %243 = OpLoad %float %242
+        %244 = OpFAdd %float %243 %241
+        %245 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+               OpStore %245 %244
+        %246 = OpAccessChain %_ptr_Input_float %baz %uint_1
+        %247 = OpExtInst %float %1 InterpolateAtSample %246 %int_3
+        %248 = OpAccessChain %_ptr_Output_float %FragColor %uint_1
+        %249 = OpLoad %float %248
+        %250 = OpFAdd %float %249 %247
+        %251 = OpAccessChain %_ptr_Output_float %FragColor %uint_1
+               OpStore %251 %250
+        %252 = OpAccessChain %_ptr_Input_float %baz %uint_1
+        %253 = OpExtInst %float %1 InterpolateAtOffset %252 %30
+        %254 = OpAccessChain %_ptr_Output_float %FragColor %uint_2
+        %255 = OpLoad %float %254
+        %256 = OpFAdd %float %255 %253
+        %257 = OpAccessChain %_ptr_Output_float %FragColor %uint_2
+               OpStore %257 %256
+        %258 = OpAccessChain %_ptr_Input_v2float %a %int_1
+        %259 = OpExtInst %v2float %1 InterpolateAtCentroid %258
+        %260 = OpLoad %v4float %FragColor
+        %261 = OpVectorShuffle %v2float %260 %260 0 1
+        %262 = OpFAdd %v2float %261 %259
+        %263 = OpLoad %v4float %FragColor
+        %264 = OpVectorShuffle %v4float %263 %262 4 5 2 3
+               OpStore %FragColor %264
+        %265 = OpAccessChain %_ptr_Input_v2float %a %int_0
+        %266 = OpExtInst %v2float %1 InterpolateAtSample %265 %int_2
+        %267 = OpLoad %v4float %FragColor
+        %268 = OpVectorShuffle %v2float %267 %267 0 1
+        %269 = OpFAdd %v2float %268 %266
+        %270 = OpLoad %v4float %FragColor
+        %271 = OpVectorShuffle %v4float %270 %269 4 5 2 3
+               OpStore %FragColor %271
+        %272 = OpAccessChain %_ptr_Input_v2float %a %int_1
+        %273 = OpExtInst %v2float %1 InterpolateAtOffset %272 %30
+        %274 = OpLoad %v4float %FragColor
+        %275 = OpVectorShuffle %v2float %274 %274 0 1
+        %276 = OpFAdd %v2float %275 %273
+        %277 = OpLoad %v4float %FragColor
+        %278 = OpVectorShuffle %v4float %277 %276 4 5 2 3
+               OpStore %FragColor %278
+        %279 = OpAccessChain %_ptr_Input_v4float %s %int_2
+        %280 = OpLoad %v4float %279
+        %281 = OpLoad %v4float %FragColor
+        %282 = OpFAdd %v4float %281 %280
+               OpStore %FragColor %282
+        %283 = OpAccessChain %_ptr_Input_v4float %s %int_2
+        %284 = OpExtInst %v4float %1 InterpolateAtCentroid %283
+        %285 = OpVectorShuffle %v2float %284 %284 1 1
+        %286 = OpLoad %v4float %FragColor
+        %287 = OpVectorShuffle %v2float %286 %286 0 1
+        %288 = OpFAdd %v2float %287 %285
+        %289 = OpLoad %v4float %FragColor
+        %290 = OpVectorShuffle %v4float %289 %288 4 5 2 3
+               OpStore %FragColor %290
+        %291 = OpAccessChain %_ptr_Input_v4float %s %int_2
+        %292 = OpExtInst %v4float %1 InterpolateAtSample %291 %int_3
+        %293 = OpVectorShuffle %v2float %292 %292 0 1
+        %294 = OpLoad %v4float %FragColor
+        %295 = OpVectorShuffle %v2float %294 %294 1 2
+        %296 = OpFAdd %v2float %295 %293
+        %297 = OpLoad %v4float %FragColor
+        %298 = OpVectorShuffle %v4float %297 %296 0 4 5 3
+               OpStore %FragColor %298
+        %299 = OpAccessChain %_ptr_Input_v4float %s %int_2
+        %300 = OpExtInst %v4float %1 InterpolateAtOffset %299 %30
+        %301 = OpVectorShuffle %v2float %300 %300 3 0
+        %302 = OpLoad %v4float %FragColor
+        %303 = OpVectorShuffle %v2float %302 %302 2 3
+        %304 = OpFAdd %v2float %303 %301
+        %305 = OpLoad %v4float %FragColor
+        %306 = OpVectorShuffle %v4float %305 %304 0 1 4 5
+               OpStore %FragColor %306
+        %308 = OpAccessChain %_ptr_Input_v4float %s %int_3 %int_0
+        %309 = OpLoad %v4float %308
+        %310 = OpLoad %v4float %FragColor
+        %311 = OpFAdd %v4float %310 %309
+               OpStore %FragColor %311
+        %312 = OpAccessChain %_ptr_Input__arr_v4float_uint_2 %s %int_3
+        %313 = OpAccessChain %_ptr_Input_v4float %312 %int_1
+        %314 = OpExtInst %v4float %1 InterpolateAtCentroid %313
+        %315 = OpLoad %v4float %FragColor
+        %316 = OpFAdd %v4float %315 %314
+               OpStore %FragColor %316
+        %317 = OpAccessChain %_ptr_Input_v4float %s %int_3 %int_0
+        %318 = OpExtInst %v4float %1 InterpolateAtSample %317 %int_2
+        %319 = OpLoad %v4float %FragColor
+        %320 = OpFAdd %v4float %319 %318
+               OpStore %FragColor %320
+        %321 = OpAccessChain %_ptr_Input_v4float %s %int_3 %int_1
+        %322 = OpExtInst %v4float %1 InterpolateAtOffset %321 %30
+        %323 = OpLoad %v4float %FragColor
+        %324 = OpFAdd %v4float %323 %322
+               OpStore %FragColor %324
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/frag/switch-different-sizes.asm.frag b/shaders-msl/asm/frag/switch-different-sizes.asm.frag
new file mode 100644
index 00000000000..ee6daa3d2b0
--- /dev/null
+++ b/shaders-msl/asm/frag/switch-different-sizes.asm.frag
@@ -0,0 +1,106 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 10
+; Bound: 42
+; Schema: 0
+               OpCapability Shader
+               OpCapability Int8
+               OpCapability Int16
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 330
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpName %main "main"
+               OpName %sw0 "sw0"
+               OpName %result "result"
+               OpName %sw1 "sw1"
+               OpName %sw2 "sw2"
+               OpName %sw3 "sw3"
+               OpDecorate %sw1 RelaxedPrecision
+               OpDecorate %21 RelaxedPrecision
+               OpDecorate %sw2 RelaxedPrecision
+               OpDecorate %29 RelaxedPrecision
+                     %void = OpTypeVoid
+                        %3 = OpTypeFunction %void
+                      %int = OpTypeInt 32 1
+                 %lowp_int = OpTypeInt 8 1
+                %highp_int = OpTypeInt 16 1
+        %_ptr_Function_int = OpTypePointer Function %int
+   %_ptr_Function_lowp_int = OpTypePointer Function %lowp_int
+  %_ptr_Function_highp_int = OpTypePointer Function %highp_int
+                   %int_42 = OpConstant %int 42
+                    %int_0 = OpConstant %int 0
+                  %int_420 = OpConstant %int 420
+                   %int_10 = OpConstant %int 10
+                  %int_512 = OpConstant %int 512
+              %lowp_int_10 = OpConstant %lowp_int 10
+             %highp_int_10 = OpConstant %highp_int 10
+                     %main = OpFunction %void None %3
+                        %5 = OpLabel
+                      %sw0 = OpVariable %_ptr_Function_int Function
+                   %result = OpVariable %_ptr_Function_int Function
+                      %sw1 = OpVariable %_ptr_Function_lowp_int Function
+                      %sw2 = OpVariable %_ptr_Function_highp_int Function
+                      %sw3 = OpVariable %_ptr_Function_highp_int Function
+                             OpStore %sw0 %int_42
+                             OpStore %result %int_0
+                       %12 = OpLoad %int %sw0
+                             OpSelectionMerge %16 None
+                             OpSwitch %12 %16 -42 %13 420 %14 -1234 %15
+                       %13 = OpLabel
+                             OpStore %result %int_42
+                             OpBranch %14
+                       %14 = OpLabel
+                             OpStore %result %int_420
+                             OpBranch %15
+                       %15 = OpLabel
+                             OpStore %result %int_420
+                             OpBranch %16
+                       %16 = OpLabel
+                             OpStore %sw1 %lowp_int_10
+                       %21 = OpLoad %lowp_int %sw1
+                             OpSelectionMerge %25 None
+                             OpSwitch %21 %25 -42 %22 42 %23 -123 %24
+                       %22 = OpLabel
+                             OpStore %result %int_42
+                             OpBranch %23
+                       %23 = OpLabel
+                             OpStore %result %int_420
+                             OpBranch %24
+                       %24 = OpLabel
+                             OpStore %result %int_512
+                             OpBranch %25
+                       %25 = OpLabel
+                             OpStore %sw2 %highp_int_10
+                       %29 = OpLoad %highp_int %sw2
+                             OpSelectionMerge %33 None
+                             OpSwitch %29 %33 -42 %30 42 %31 -1234 %32
+                       %30 = OpLabel
+                             OpStore %result %int_42
+                             OpBranch %31
+                       %31 = OpLabel
+                             OpStore %result %int_420
+                             OpBranch %32
+                       %32 = OpLabel
+                             OpStore %result %int_512
+                             OpBranch %33
+                       %33 = OpLabel
+                             OpStore %sw3 %highp_int_10
+                       %36 = OpLoad %highp_int %sw3
+                             OpSelectionMerge %40 None
+                             OpSwitch %36 %40 -42 %37 42 %38 -1234 %39
+                       %37 = OpLabel
+                             OpStore %result %int_42
+                             OpBranch %38
+                       %38 = OpLabel
+                             OpStore %result %int_420
+                             OpBranch %39
+                       %39 = OpLabel
+                             OpStore %result %int_512
+                             OpBranch %40
+                       %40 = OpLabel
+                             OpReturn
+                             OpFunctionEnd
diff --git a/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag b/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag
new file mode 100644
index 00000000000..62f2dc68073
--- /dev/null
+++ b/shaders-msl/asm/frag/switch-long-case.asm.msl22.frag
@@ -0,0 +1,48 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 21
+; Schema: 0
+               OpCapability Shader
+               OpCapability Int64
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 330
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpName %main "main"
+               OpName %sw "sw"
+               OpName %result "result"
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+       %long = OpTypeInt 64 1
+%_ptr_Function_int = OpTypePointer Function %int
+%_ptr_Function_long = OpTypePointer Function %long
+     %int_42 = OpConstant %int 42
+      %int_0 = OpConstant %int 0
+    %int_420 = OpConstant %int 420
+    %long_42 = OpConstant %long 42
+       %main = OpFunction %void None %6
+         %15 = OpLabel
+         %sw = OpVariable %_ptr_Function_long Function
+     %result = OpVariable %_ptr_Function_int Function
+               OpStore %sw %long_42
+               OpStore %result %int_0
+         %16 = OpLoad %long %sw
+               OpSelectionMerge %17 None
+               OpSwitch %16 %17 -42 %18 420 %19 -34359738368 %20
+         %18 = OpLabel
+               OpStore %result %int_42
+               OpBranch %19
+         %19 = OpLabel
+               OpStore %result %int_420
+               OpBranch %20
+         %20 = OpLabel
+               OpStore %result %int_420
+               OpBranch %17
+         %17 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag b/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag
new file mode 100644
index 00000000000..cea32b420a6
--- /dev/null
+++ b/shaders-msl/asm/frag/switch-unsigned-long-case.asm.msl22.frag
@@ -0,0 +1,48 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 21
+; Schema: 0
+               OpCapability Shader
+               OpCapability Int64
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 330
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpName %main "main"
+               OpName %sw "sw"
+               OpName %result "result"
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+       %long = OpTypeInt 64 0
+%_ptr_Function_int = OpTypePointer Function %int
+%_ptr_Function_long = OpTypePointer Function %long
+     %int_42 = OpConstant %int 42
+      %int_0 = OpConstant %int 0
+    %int_420 = OpConstant %int 420
+    %long_42 = OpConstant %long 42
+       %main = OpFunction %void None %6
+         %15 = OpLabel
+         %sw = OpVariable %_ptr_Function_long Function
+     %result = OpVariable %_ptr_Function_int Function
+               OpStore %sw %long_42
+               OpStore %result %int_0
+         %16 = OpLoad %long %sw
+               OpSelectionMerge %17 None
+               OpSwitch %16 %17 42 %18 420 %19 343597383680 %20
+         %18 = OpLabel
+               OpStore %result %int_42
+               OpBranch %19
+         %19 = OpLabel
+               OpStore %result %int_420
+               OpBranch %20
+         %20 = OpLabel
+               OpStore %result %int_420
+               OpBranch %17
+         %17 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag b/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
index ae7a972d7b2..e7e6f37ea27 100644
--- a/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
+++ b/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
@@ -5,6 +5,7 @@
 ; Schema: 0
                OpCapability Shader
                OpCapability StorageInputOutput16
+               OpCapability Float16
                OpExtension "SPV_KHR_16bit_storage"
           %1 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical GLSL450
diff --git a/shaders-msl/asm/frag/unord-relational-op.asm.frag b/shaders-msl/asm/frag/unord-relational-op.asm.frag
index 3e4cd6c2c29..824c0512911 100644
--- a/shaders-msl/asm/frag/unord-relational-op.asm.frag
+++ b/shaders-msl/asm/frag/unord-relational-op.asm.frag
@@ -114,6 +114,8 @@
                OpStore %t1 %b
          %15 = OpFUnordEqual %bool %a %b
                OpStore %c1 %15
+         %ordered = OpFOrdNotEqual %bool %a %b
+               OpStore %c1 %ordered
          %17 = OpFUnordNotEqual %bool %a %b
                OpStore %c2 %17
          %19 = OpFUnordLessThan %bool %a %b
diff --git a/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag b/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
new file mode 100644
index 00000000000..824c0512911
--- /dev/null
+++ b/shaders-msl/asm/frag/unord-relational-op.relax-nan.asm.frag
@@ -0,0 +1,207 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 122
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %c %d %e %f %g %h %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 460
+               OpName %main "main"
+               OpName %t0 "t0"
+               OpName %a "a"
+               OpName %t1 "t1"
+               OpName %b "b"
+               OpName %c1 "c1"
+               OpName %c2 "c2"
+               OpName %c3 "c3"
+               OpName %c4 "c4"
+               OpName %c5 "c5"
+               OpName %c6 "c6"
+               OpName %c7 "c7"
+               OpName %c "c"
+               OpName %d "d"
+               OpName %c8 "c8"
+               OpName %c9 "c9"
+               OpName %c10 "c10"
+               OpName %c11 "c11"
+               OpName %c12 "c12"
+               OpName %c13 "c13"
+               OpName %e "e"
+               OpName %f "f"
+               OpName %c14 "c14"
+               OpName %c15 "c15"
+               OpName %c16 "c16"
+               OpName %c17 "c17"
+               OpName %c18 "c18"
+               OpName %c19 "c19"
+               OpName %g "g"
+               OpName %h "h"
+               OpName %c20 "c20"
+               OpName %c21 "c21"
+               OpName %c22 "c22"
+               OpName %c23 "c23"
+               OpName %c24 "c24"
+               OpName %FragColor "FragColor"
+               OpDecorate %a SpecId 1
+               OpDecorate %b SpecId 2
+               OpDecorate %c Location 2
+               OpDecorate %d Location 3
+               OpDecorate %e Location 4
+               OpDecorate %f Location 5
+               OpDecorate %g Location 6
+               OpDecorate %h Location 7
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+          %a = OpSpecConstant %float 1
+          %b = OpSpecConstant %float 2
+       %bool = OpTypeBool
+%_ptr_Function_bool = OpTypePointer Function %bool
+     %v2bool = OpTypeVector %bool 2
+%_ptr_Function_v2bool = OpTypePointer Function %v2bool
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+          %c = OpVariable %_ptr_Input_v2float Input
+          %d = OpVariable %_ptr_Input_v2float Input
+     %v3bool = OpTypeVector %bool 3
+%_ptr_Function_v3bool = OpTypePointer Function %v3bool
+    %v3float = OpTypeVector %float 3
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+          %e = OpVariable %_ptr_Input_v3float Input
+          %f = OpVariable %_ptr_Input_v3float Input
+     %v4bool = OpTypeVector %bool 4
+%_ptr_Function_v4bool = OpTypePointer Function %v4bool
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+          %g = OpVariable %_ptr_Input_v4float Input
+          %h = OpVariable %_ptr_Input_v4float Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %t0 = OpVariable %_ptr_Function_float Function
+         %t1 = OpVariable %_ptr_Function_float Function
+         %c1 = OpVariable %_ptr_Function_bool Function
+         %c2 = OpVariable %_ptr_Function_bool Function
+         %c3 = OpVariable %_ptr_Function_bool Function
+         %c4 = OpVariable %_ptr_Function_bool Function
+         %c5 = OpVariable %_ptr_Function_bool Function
+         %c6 = OpVariable %_ptr_Function_bool Function
+         %c7 = OpVariable %_ptr_Function_v2bool Function
+         %c8 = OpVariable %_ptr_Function_v2bool Function
+         %c9 = OpVariable %_ptr_Function_v2bool Function
+        %c10 = OpVariable %_ptr_Function_v2bool Function
+        %c11 = OpVariable %_ptr_Function_v2bool Function
+        %c12 = OpVariable %_ptr_Function_v2bool Function
+        %c13 = OpVariable %_ptr_Function_v3bool Function
+        %c14 = OpVariable %_ptr_Function_v3bool Function
+        %c15 = OpVariable %_ptr_Function_v3bool Function
+        %c16 = OpVariable %_ptr_Function_v3bool Function
+        %c17 = OpVariable %_ptr_Function_v3bool Function
+        %c18 = OpVariable %_ptr_Function_v3bool Function
+        %c19 = OpVariable %_ptr_Function_v4bool Function
+        %c20 = OpVariable %_ptr_Function_v4bool Function
+        %c21 = OpVariable %_ptr_Function_v4bool Function
+        %c22 = OpVariable %_ptr_Function_v4bool Function
+        %c23 = OpVariable %_ptr_Function_v4bool Function
+        %c24 = OpVariable %_ptr_Function_v4bool Function
+               OpStore %t0 %a
+               OpStore %t1 %b
+         %15 = OpFUnordEqual %bool %a %b
+               OpStore %c1 %15
+         %ordered = OpFOrdNotEqual %bool %a %b
+               OpStore %c1 %ordered
+         %17 = OpFUnordNotEqual %bool %a %b
+               OpStore %c2 %17
+         %19 = OpFUnordLessThan %bool %a %b
+               OpStore %c3 %19
+         %21 = OpFUnordGreaterThan %bool %a %b
+               OpStore %c4 %21
+         %23 = OpFUnordLessThanEqual %bool %a %b
+               OpStore %c5 %23
+         %25 = OpFUnordGreaterThanEqual %bool %a %b
+               OpStore %c6 %25
+         %32 = OpLoad %v2float %c
+         %34 = OpLoad %v2float %d
+         %35 = OpFUnordEqual %v2bool %32 %34
+               OpStore %c7 %35
+         %37 = OpLoad %v2float %c
+         %38 = OpLoad %v2float %d
+         %39 = OpFUnordNotEqual %v2bool %37 %38
+               OpStore %c8 %39
+         %41 = OpLoad %v2float %c
+         %42 = OpLoad %v2float %d
+         %43 = OpFUnordLessThan %v2bool %41 %42
+               OpStore %c9 %43
+         %45 = OpLoad %v2float %c
+         %46 = OpLoad %v2float %d
+         %47 = OpFUnordGreaterThan %v2bool %45 %46
+               OpStore %c10 %47
+         %49 = OpLoad %v2float %c
+         %50 = OpLoad %v2float %d
+         %51 = OpFUnordLessThanEqual %v2bool %49 %50
+               OpStore %c11 %51
+         %53 = OpLoad %v2float %c
+         %54 = OpLoad %v2float %d
+         %55 = OpFUnordGreaterThanEqual %v2bool %53 %54
+               OpStore %c12 %55
+         %62 = OpLoad %v3float %e
+         %64 = OpLoad %v3float %f
+         %65 = OpFUnordEqual %v3bool %62 %64
+               OpStore %c13 %65
+         %67 = OpLoad %v3float %e
+         %68 = OpLoad %v3float %f
+         %69 = OpFUnordNotEqual %v3bool %67 %68
+               OpStore %c14 %69
+         %71 = OpLoad %v3float %e
+         %72 = OpLoad %v3float %f
+         %73 = OpFUnordLessThan %v3bool %71 %72
+               OpStore %c15 %73
+         %75 = OpLoad %v3float %e
+         %76 = OpLoad %v3float %f
+         %77 = OpFUnordGreaterThan %v3bool %75 %76
+               OpStore %c16 %77
+         %79 = OpLoad %v3float %e
+         %80 = OpLoad %v3float %f
+         %81 = OpFUnordLessThanEqual %v3bool %79 %80
+               OpStore %c17 %81
+         %83 = OpLoad %v3float %e
+         %84 = OpLoad %v3float %f
+         %85 = OpFUnordGreaterThanEqual %v3bool %83 %84
+               OpStore %c18 %85
+         %92 = OpLoad %v4float %g
+         %94 = OpLoad %v4float %h
+         %95 = OpFUnordEqual %v4bool %92 %94
+               OpStore %c19 %95
+         %97 = OpLoad %v4float %g
+         %98 = OpLoad %v4float %h
+         %99 = OpFUnordNotEqual %v4bool %97 %98
+               OpStore %c20 %99
+        %101 = OpLoad %v4float %g
+        %102 = OpLoad %v4float %h
+        %103 = OpFUnordLessThan %v4bool %101 %102
+               OpStore %c21 %103
+        %105 = OpLoad %v4float %g
+        %106 = OpLoad %v4float %h
+        %107 = OpFUnordGreaterThan %v4bool %105 %106
+               OpStore %c22 %107
+        %109 = OpLoad %v4float %g
+        %110 = OpLoad %v4float %h
+        %111 = OpFUnordLessThanEqual %v4bool %109 %110
+               OpStore %c23 %111
+        %113 = OpLoad %v4float %g
+        %114 = OpLoad %v4float %h
+        %115 = OpFUnordGreaterThanEqual %v4bool %113 %114
+               OpStore %c24 %115
+        %118 = OpLoad %float %t0
+        %119 = OpLoad %float %t1
+        %120 = OpFAdd %float %118 %119
+        %121 = OpCompositeConstruct %v4float %120 %120 %120 %120
+               OpStore %FragColor %121
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc b/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc
new file mode 100644
index 00000000000..b21a2d3dd56
--- /dev/null
+++ b/shaders-msl/asm/tesc/tess-level-overrun.multi-patch.asm.tesc
@@ -0,0 +1,102 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 46
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %gl_TessLevelInner %gl_TessLevelOuter
+               OpExecutionMode %main OutputVertices 1
+               OpExecutionMode %main Triangles
+               OpSource ESSL 310
+               OpSourceExtension "GL_EXT_shader_io_blocks"
+               OpSourceExtension "GL_EXT_tessellation_shader"
+               OpName %main "main"
+               OpName %gl_TessLevelInner "gl_TessLevelInner"
+               OpName %TessLevels "TessLevels"
+               OpMemberName %TessLevels 0 "inner0"
+               OpMemberName %TessLevels 1 "inner1"
+               OpMemberName %TessLevels 2 "outer0"
+               OpMemberName %TessLevels 3 "outer1"
+               OpMemberName %TessLevels 4 "outer2"
+               OpMemberName %TessLevels 5 "outer3"
+               OpName %sb_levels "sb_levels"
+               OpName %gl_TessLevelOuter "gl_TessLevelOuter"
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpMemberDecorate %TessLevels 0 Restrict
+               OpMemberDecorate %TessLevels 0 NonWritable
+               OpMemberDecorate %TessLevels 0 Offset 0
+               OpMemberDecorate %TessLevels 1 Restrict
+               OpMemberDecorate %TessLevels 1 NonWritable
+               OpMemberDecorate %TessLevels 1 Offset 4
+               OpMemberDecorate %TessLevels 2 Restrict
+               OpMemberDecorate %TessLevels 2 NonWritable
+               OpMemberDecorate %TessLevels 2 Offset 8
+               OpMemberDecorate %TessLevels 3 Restrict
+               OpMemberDecorate %TessLevels 3 NonWritable
+               OpMemberDecorate %TessLevels 3 Offset 12
+               OpMemberDecorate %TessLevels 4 Restrict
+               OpMemberDecorate %TessLevels 4 NonWritable
+               OpMemberDecorate %TessLevels 4 Offset 16
+               OpMemberDecorate %TessLevels 5 Restrict
+               OpMemberDecorate %TessLevels 5 NonWritable
+               OpMemberDecorate %TessLevels 5 Offset 20
+               OpDecorate %TessLevels Block
+               OpDecorate %sb_levels DescriptorSet 0
+               OpDecorate %sb_levels Binding 0
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+ %TessLevels = OpTypeStruct %float %float %float %float %float %float
+%_ptr_StorageBuffer_TessLevels = OpTypePointer StorageBuffer %TessLevels
+  %sb_levels = OpVariable %_ptr_StorageBuffer_TessLevels StorageBuffer
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+%_ptr_Output_float = OpTypePointer Output %float
+      %int_1 = OpConstant %int 1
+     %uint_4 = OpConstant %uint 4
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output
+      %int_2 = OpConstant %int 2
+      %int_3 = OpConstant %int 3
+      %int_4 = OpConstant %int 4
+      %int_5 = OpConstant %int 5
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %18 = OpAccessChain %_ptr_StorageBuffer_float %sb_levels %int_0
+         %19 = OpLoad %float %18
+         %21 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_0
+               OpStore %21 %19
+         %23 = OpAccessChain %_ptr_StorageBuffer_float %sb_levels %int_1
+         %24 = OpLoad %float %23
+         %25 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_1
+               OpStore %25 %24
+         %31 = OpAccessChain %_ptr_StorageBuffer_float %sb_levels %int_2
+         %32 = OpLoad %float %31
+         %33 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_0
+               OpStore %33 %32
+         %35 = OpAccessChain %_ptr_StorageBuffer_float %sb_levels %int_3
+         %36 = OpLoad %float %35
+         %37 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_1
+               OpStore %37 %36
+         %39 = OpAccessChain %_ptr_StorageBuffer_float %sb_levels %int_4
+         %40 = OpLoad %float %39
+         %41 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_2
+               OpStore %41 %40
+         %43 = OpAccessChain %_ptr_StorageBuffer_float %sb_levels %int_5
+         %44 = OpLoad %float %43
+         %45 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_3
+               OpStore %45 %44
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert b/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert
new file mode 100644
index 00000000000..59ec3f91984
--- /dev/null
+++ b/shaders-msl/asm/vert/clip-distance-plain-variable.asm.vert
@@ -0,0 +1,91 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 56
+; Schema: 0
+               OpCapability Shader
+               OpCapability ClipDistance
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %pos_1 %_entryPointOutput_pos %_entryPointOutput_clip
+               OpSource HLSL 500
+               OpName %main "main"
+               OpName %VSOut "VSOut"
+               OpMemberName %VSOut 0 "pos"
+               OpMemberName %VSOut 1 "clip"
+               OpName %_main_vf4_ "@main(vf4;"
+               OpName %pos "pos"
+               OpName %vout "vout"
+               OpName %pos_0 "pos"
+               OpName %pos_1 "pos"
+               OpName %flattenTemp "flattenTemp"
+               OpName %param "param"
+               OpName %_entryPointOutput_pos "@entryPointOutput.pos"
+               OpName %_entryPointOutput_clip "@entryPointOutput.clip"
+               OpDecorate %pos_1 Location 0
+               OpDecorate %_entryPointOutput_pos BuiltIn Position
+               OpDecorate %_entryPointOutput_clip BuiltIn ClipDistance
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+    %v2float = OpTypeVector %float 2
+      %VSOut = OpTypeStruct %v4float %v2float
+         %11 = OpTypeFunction %VSOut %_ptr_Function_v4float
+%_ptr_Function_VSOut = OpTypePointer Function %VSOut
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+      %pos_1 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_entryPointOutput_pos = OpVariable %_ptr_Output_v4float Output
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+%_entryPointOutput_clip = OpVariable %_ptr_Output__arr_float_uint_2 Output
+     %uint_0 = OpConstant %uint 0
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Output_float = OpTypePointer Output %float
+     %uint_1 = OpConstant %uint 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+      %pos_0 = OpVariable %_ptr_Function_v4float Function
+%flattenTemp = OpVariable %_ptr_Function_VSOut Function
+      %param = OpVariable %_ptr_Function_v4float Function
+         %32 = OpLoad %v4float %pos_1
+               OpStore %pos_0 %32
+         %35 = OpLoad %v4float %pos_0
+               OpStore %param %35
+         %36 = OpFunctionCall %VSOut %_main_vf4_ %param
+               OpStore %flattenTemp %36
+         %39 = OpAccessChain %_ptr_Function_v4float %flattenTemp %int_0
+         %40 = OpLoad %v4float %39
+               OpStore %_entryPointOutput_pos %40
+         %48 = OpAccessChain %_ptr_Function_float %flattenTemp %int_1 %uint_0
+         %49 = OpLoad %float %48
+         %51 = OpAccessChain %_ptr_Output_float %_entryPointOutput_clip %int_0
+               OpStore %51 %49
+         %53 = OpAccessChain %_ptr_Function_float %flattenTemp %int_1 %uint_1
+         %54 = OpLoad %float %53
+         %55 = OpAccessChain %_ptr_Output_float %_entryPointOutput_clip %int_1
+               OpStore %55 %54
+               OpReturn
+               OpFunctionEnd
+ %_main_vf4_ = OpFunction %VSOut None %11
+        %pos = OpFunctionParameter %_ptr_Function_v4float
+         %14 = OpLabel
+       %vout = OpVariable %_ptr_Function_VSOut Function
+         %19 = OpLoad %v4float %pos
+         %20 = OpAccessChain %_ptr_Function_v4float %vout %int_0
+               OpStore %20 %19
+         %22 = OpLoad %v4float %pos
+         %23 = OpVectorShuffle %v2float %22 %22 0 1
+         %25 = OpAccessChain %_ptr_Function_v2float %vout %int_1
+               OpStore %25 %23
+         %26 = OpLoad %VSOut %vout
+               OpReturnValue %26
+               OpFunctionEnd
diff --git a/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert b/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert
new file mode 100644
index 00000000000..59ec3f91984
--- /dev/null
+++ b/shaders-msl/asm/vert/clip-distance-plain-variable.no-user-varying.asm.vert
@@ -0,0 +1,91 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 56
+; Schema: 0
+               OpCapability Shader
+               OpCapability ClipDistance
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %pos_1 %_entryPointOutput_pos %_entryPointOutput_clip
+               OpSource HLSL 500
+               OpName %main "main"
+               OpName %VSOut "VSOut"
+               OpMemberName %VSOut 0 "pos"
+               OpMemberName %VSOut 1 "clip"
+               OpName %_main_vf4_ "@main(vf4;"
+               OpName %pos "pos"
+               OpName %vout "vout"
+               OpName %pos_0 "pos"
+               OpName %pos_1 "pos"
+               OpName %flattenTemp "flattenTemp"
+               OpName %param "param"
+               OpName %_entryPointOutput_pos "@entryPointOutput.pos"
+               OpName %_entryPointOutput_clip "@entryPointOutput.clip"
+               OpDecorate %pos_1 Location 0
+               OpDecorate %_entryPointOutput_pos BuiltIn Position
+               OpDecorate %_entryPointOutput_clip BuiltIn ClipDistance
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+    %v2float = OpTypeVector %float 2
+      %VSOut = OpTypeStruct %v4float %v2float
+         %11 = OpTypeFunction %VSOut %_ptr_Function_v4float
+%_ptr_Function_VSOut = OpTypePointer Function %VSOut
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+      %pos_1 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_entryPointOutput_pos = OpVariable %_ptr_Output_v4float Output
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+%_entryPointOutput_clip = OpVariable %_ptr_Output__arr_float_uint_2 Output
+     %uint_0 = OpConstant %uint 0
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Output_float = OpTypePointer Output %float
+     %uint_1 = OpConstant %uint 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+      %pos_0 = OpVariable %_ptr_Function_v4float Function
+%flattenTemp = OpVariable %_ptr_Function_VSOut Function
+      %param = OpVariable %_ptr_Function_v4float Function
+         %32 = OpLoad %v4float %pos_1
+               OpStore %pos_0 %32
+         %35 = OpLoad %v4float %pos_0
+               OpStore %param %35
+         %36 = OpFunctionCall %VSOut %_main_vf4_ %param
+               OpStore %flattenTemp %36
+         %39 = OpAccessChain %_ptr_Function_v4float %flattenTemp %int_0
+         %40 = OpLoad %v4float %39
+               OpStore %_entryPointOutput_pos %40
+         %48 = OpAccessChain %_ptr_Function_float %flattenTemp %int_1 %uint_0
+         %49 = OpLoad %float %48
+         %51 = OpAccessChain %_ptr_Output_float %_entryPointOutput_clip %int_0
+               OpStore %51 %49
+         %53 = OpAccessChain %_ptr_Function_float %flattenTemp %int_1 %uint_1
+         %54 = OpLoad %float %53
+         %55 = OpAccessChain %_ptr_Output_float %_entryPointOutput_clip %int_1
+               OpStore %55 %54
+               OpReturn
+               OpFunctionEnd
+ %_main_vf4_ = OpFunction %VSOut None %11
+        %pos = OpFunctionParameter %_ptr_Function_v4float
+         %14 = OpLabel
+       %vout = OpVariable %_ptr_Function_VSOut Function
+         %19 = OpLoad %v4float %pos
+         %20 = OpAccessChain %_ptr_Function_v4float %vout %int_0
+               OpStore %20 %19
+         %22 = OpLoad %v4float %pos
+         %23 = OpVectorShuffle %v2float %22 %22 0 1
+         %25 = OpAccessChain %_ptr_Function_v2float %vout %int_1
+               OpStore %25 %23
+         %26 = OpLoad %VSOut %vout
+               OpReturnValue %26
+               OpFunctionEnd
diff --git a/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert b/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert
new file mode 100644
index 00000000000..429d3e4127c
--- /dev/null
+++ b/shaders-msl/asm/vert/packed-bool-to-uint.asm.vert
@@ -0,0 +1,111 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 62
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_ %gl_VertexIndex %a_position
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %_ ""
+               OpName %Struct "Struct"
+               OpMemberName %Struct 0 "flags"
+               OpName %defaultUniformsVS "defaultUniformsVS"
+               OpMemberName %defaultUniformsVS 0 "flags"
+               OpMemberName %defaultUniformsVS 1 "uquad"
+               OpMemberName %defaultUniformsVS 2 "umatrix"
+               OpName %__0 ""
+               OpName %gl_VertexIndex "gl_VertexIndex"
+               OpName %a_position "a_position"
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %_arr_uint_uint_1 ArrayStride 16
+               OpMemberDecorate %Struct 0 Offset 0
+               OpDecorate %_arr_v2float_uint_4 ArrayStride 16
+               OpMemberDecorate %defaultUniformsVS 0 Offset 0
+               OpMemberDecorate %defaultUniformsVS 1 Offset 16
+               OpMemberDecorate %defaultUniformsVS 2 ColMajor
+               OpMemberDecorate %defaultUniformsVS 2 Offset 80
+               OpMemberDecorate %defaultUniformsVS 2 MatrixStride 16
+               OpDecorate %defaultUniformsVS Block
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+               OpDecorate %gl_VertexIndex BuiltIn VertexIndex
+               OpDecorate %a_position Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+          %_ = OpVariable %_ptr_Output_gl_PerVertex Output
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_arr_uint_uint_1 = OpTypeArray %uint %uint_1
+     %Struct = OpTypeStruct %_arr_uint_uint_1
+    %v2float = OpTypeVector %float 2
+     %uint_4 = OpConstant %uint 4
+%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
+%mat4v4float = OpTypeMatrix %v4float 4
+%defaultUniformsVS = OpTypeStruct %Struct %_arr_v2float_uint_4 %mat4v4float
+%_ptr_Uniform_defaultUniformsVS = OpTypePointer Uniform %defaultUniformsVS
+        %__0 = OpVariable %_ptr_Uniform_defaultUniformsVS Uniform
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+      %int_1 = OpConstant %int 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_VertexIndex = OpVariable %_ptr_Input_int Input
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+ %a_position = OpVariable %_ptr_Input_v4float Input
+     %uint_2 = OpConstant %uint 2
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_3 = OpConstant %uint 3
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+       %bool = OpTypeBool
+     %uint_0 = OpConstant %uint 0
+    %float_0 = OpConstant %float 0
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %27 = OpAccessChain %_ptr_Uniform_mat4v4float %__0 %int_2
+         %28 = OpLoad %mat4v4float %27
+         %32 = OpLoad %int %gl_VertexIndex
+         %34 = OpAccessChain %_ptr_Uniform_v2float %__0 %int_1 %32
+         %35 = OpLoad %v2float %34
+         %40 = OpAccessChain %_ptr_Input_float %a_position %uint_2
+         %41 = OpLoad %float %40
+         %43 = OpAccessChain %_ptr_Input_float %a_position %uint_3
+         %44 = OpLoad %float %43
+         %45 = OpCompositeExtract %float %35 0
+         %46 = OpCompositeExtract %float %35 1
+         %47 = OpCompositeConstruct %v4float %45 %46 %41 %44
+         %48 = OpMatrixTimesVector %v4float %28 %47
+         %50 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %50 %48
+         %52 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %int_0 %int_0
+         %53 = OpLoad %uint %52
+         %56 = OpINotEqual %bool %53 %uint_0
+               OpSelectionMerge %58 None
+               OpBranchConditional %56 %57 %58
+         %57 = OpLabel
+         %61 = OpAccessChain %_ptr_Output_float %_ %int_0 %uint_2
+               OpStore %61 %float_0
+               OpBranch %58
+         %58 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert b/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert
new file mode 100644
index 00000000000..8448265c1dd
--- /dev/null
+++ b/shaders-msl/asm/vert/packed-bool2-to-packed_uint2.asm.vert
@@ -0,0 +1,113 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 64
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_ %gl_VertexIndex %a_position
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %_ ""
+               OpName %Struct "Struct"
+               OpMemberName %Struct 0 "flags"
+               OpName %defaultUniformsVS "defaultUniformsVS"
+               OpMemberName %defaultUniformsVS 0 "flags"
+               OpMemberName %defaultUniformsVS 1 "uquad"
+               OpMemberName %defaultUniformsVS 2 "umatrix"
+               OpName %__0 ""
+               OpName %gl_VertexIndex "gl_VertexIndex"
+               OpName %a_position "a_position"
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %_arr_v2uint_uint_1 ArrayStride 16
+               OpMemberDecorate %Struct 0 Offset 0
+               OpDecorate %_arr_v2float_uint_4 ArrayStride 16
+               OpMemberDecorate %defaultUniformsVS 0 Offset 0
+               OpMemberDecorate %defaultUniformsVS 1 Offset 16
+               OpMemberDecorate %defaultUniformsVS 2 ColMajor
+               OpMemberDecorate %defaultUniformsVS 2 Offset 80
+               OpMemberDecorate %defaultUniformsVS 2 MatrixStride 16
+               OpDecorate %defaultUniformsVS Block
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+               OpDecorate %gl_VertexIndex BuiltIn VertexIndex
+               OpDecorate %a_position Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+          %_ = OpVariable %_ptr_Output_gl_PerVertex Output
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %v2uint = OpTypeVector %uint 2
+%_arr_v2uint_uint_1 = OpTypeArray %v2uint %uint_1
+     %Struct = OpTypeStruct %_arr_v2uint_uint_1
+    %v2float = OpTypeVector %float 2
+     %uint_4 = OpConstant %uint 4
+%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
+%mat4v4float = OpTypeMatrix %v4float 4
+%defaultUniformsVS = OpTypeStruct %Struct %_arr_v2float_uint_4 %mat4v4float
+%_ptr_Uniform_defaultUniformsVS = OpTypePointer Uniform %defaultUniformsVS
+        %__0 = OpVariable %_ptr_Uniform_defaultUniformsVS Uniform
+      %int_2 = OpConstant %int 2
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+      %int_1 = OpConstant %int 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_VertexIndex = OpVariable %_ptr_Input_int Input
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+ %a_position = OpVariable %_ptr_Input_v4float Input
+     %uint_2 = OpConstant %uint 2
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_3 = OpConstant %uint 3
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %bool = OpTypeBool
+     %v2bool = OpTypeVector %bool 2
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+    %float_0 = OpConstant %float 0
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %28 = OpAccessChain %_ptr_Uniform_mat4v4float %__0 %int_2
+         %29 = OpLoad %mat4v4float %28
+         %33 = OpLoad %int %gl_VertexIndex
+         %35 = OpAccessChain %_ptr_Uniform_v2float %__0 %int_1 %33
+         %36 = OpLoad %v2float %35
+         %41 = OpAccessChain %_ptr_Input_float %a_position %uint_2
+         %42 = OpLoad %float %41
+         %44 = OpAccessChain %_ptr_Input_float %a_position %uint_3
+         %45 = OpLoad %float %44
+         %46 = OpCompositeExtract %float %36 0
+         %47 = OpCompositeExtract %float %36 1
+         %48 = OpCompositeConstruct %v4float %46 %47 %42 %45
+         %49 = OpMatrixTimesVector %v4float %29 %48
+         %51 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %51 %49
+         %56 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %int_0 %int_0 %uint_0
+         %57 = OpLoad %uint %56
+         %58 = OpINotEqual %bool %57 %uint_0
+               OpSelectionMerge %60 None
+               OpBranchConditional %58 %59 %60
+         %59 = OpLabel
+         %63 = OpAccessChain %_ptr_Output_float %_ %int_0 %uint_2
+               OpStore %63 %float_0
+               OpBranch %60
+         %60 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert b/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
index b566a3d1a0f..64f6c92ce95 100644
--- a/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
+++ b/shaders-msl/asm/vert/spec-constant-op-composite.asm.vert
@@ -49,8 +49,10 @@
          %28 = OpConstant %17 2
          %33 = OpConstant %12 20
          %34 = OpConstant %12 30
+      %int_3 = OpConstant %12 -3
+        %bar = OpSpecConstantOp %12 SRem %13 %int_3
          %35 = OpTypeVector %12 4
-         %36 = OpSpecConstantComposite %35 %33 %34 %15 %15
+         %36 = OpSpecConstantComposite %35 %33 %34 %15 %bar
          %40 = OpTypeVector %12 2
          %41 = OpSpecConstantOp %40 VectorShuffle %36 %36 1 0
 		 %foo = OpSpecConstantOp %12 CompositeExtract %36 1
@@ -63,6 +65,7 @@
          %53 = OpConstant %12 0
          %55 = OpTypePointer Output %7
          %57 = OpSpecConstant %6 3.14159
+        %baz = OpSpecConstantOp %6 QuantizeToF16 %57
           %4 = OpFunction %2 None %3
           %5 = OpLabel
           %9 = OpVariable %8 Function
diff --git a/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp b/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp
new file mode 100644
index 00000000000..72ca8899ad1
--- /dev/null
+++ b/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp
@@ -0,0 +1,10 @@
+#version 450
+
+layout(set = 0, binding = 1, r32f) writeonly uniform image2D uImage;
+layout(set = 0, binding = 2, r32f) readonly uniform image2D uImageRead;
+
+void main()
+{
+   ivec2 coord = ivec2(gl_GlobalInvocationID.xy);
+   imageStore(uImage, coord, imageLoad(uImageRead, coord));
+}
diff --git a/shaders-msl/comp/basic.dispatchbase.comp b/shaders-msl/comp/basic.dispatchbase.comp
new file mode 100644
index 00000000000..2c873468cc7
--- /dev/null
+++ b/shaders-msl/comp/basic.dispatchbase.comp
@@ -0,0 +1,29 @@
+#version 310 es
+layout(local_size_x_id = 10) in;
+
+layout(std430, binding = 0) readonly buffer SSBO
+{
+    vec4 in_data[];
+};
+
+layout(std430, binding = 1) writeonly buffer SSBO2
+{
+    vec4 out_data[];
+};
+
+layout(std430, binding = 2) buffer SSBO3
+{
+    uint counter;
+};
+
+void main()
+{
+    uint ident = gl_GlobalInvocationID.x;
+    uint workgroup = gl_WorkGroupID.x;
+    vec4 idata = in_data[ident];
+    if (dot(idata, vec4(1.0, 5.0, 6.0, 2.0)) > 8.2)
+    {
+        out_data[atomicAdd(counter, 1u)] = idata;
+    }
+}
+
diff --git a/shaders-msl/comp/basic.dispatchbase.msl11.comp b/shaders-msl/comp/basic.dispatchbase.msl11.comp
new file mode 100644
index 00000000000..91453332aa4
--- /dev/null
+++ b/shaders-msl/comp/basic.dispatchbase.msl11.comp
@@ -0,0 +1,29 @@
+#version 310 es
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) readonly buffer SSBO
+{
+    vec4 in_data[];
+};
+
+layout(std430, binding = 1) writeonly buffer SSBO2
+{
+    vec4 out_data[];
+};
+
+layout(std430, binding = 2) buffer SSBO3
+{
+    uint counter;
+};
+
+void main()
+{
+    uint ident = gl_GlobalInvocationID.x;
+    uint workgroup = gl_WorkGroupID.x;
+    vec4 idata = in_data[ident];
+    if (dot(idata, vec4(1.0, 5.0, 6.0, 2.0)) > 8.2)
+    {
+        out_data[atomicAdd(counter, 1u)] = idata;
+    }
+}
+
diff --git a/shaders-msl/comp/basic.inline-block.msl2.comp b/shaders-msl/comp/basic.inline-block.msl2.comp
new file mode 100644
index 00000000000..8e1144a98e8
--- /dev/null
+++ b/shaders-msl/comp/basic.inline-block.msl2.comp
@@ -0,0 +1,37 @@
+#version 450
+#extension GL_EXT_scalar_block_layout : require
+layout(local_size_x = 3, local_size_y = 3, local_size_z = 2) in;
+
+struct X
+{
+	int x;
+	int y;
+	float z;
+};
+
+layout(set = 0, binding = 0, scalar) uniform Foo
+{
+	int a;
+	int b;
+	mat4 c;
+	X x[2];
+};
+
+layout(set = 0, binding = 1) uniform Bar
+{
+	int d;
+	int e;
+};
+
+layout(set = 1, binding = 2) buffer Baz
+{
+	int f;
+	int g;
+} baz[3];
+
+void main()
+{
+	uvec3 coords = gl_GlobalInvocationID;
+	baz[coords.x].f = a + d;
+	baz[coords.x].g = b * e;
+}
diff --git a/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp b/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
new file mode 100644
index 00000000000..9212e04c1ad
--- /dev/null
+++ b/shaders-msl/comp/buffer_device_address-packed-vec-and-cast-to-and-from-uvec2.msl23.comp
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference_uvec2 : require
+
+layout(buffer_reference, buffer_reference_align = 4) buffer SSBO
+{
+	vec3 a1;			// Will be 12-byte packed
+	float a2;
+};
+
+layout(push_constant) uniform UBO
+{
+	uvec2 b;
+};
+
+void main()
+{
+	SSBO(b).a1 = vec3(1.0, 2.0, 3.0);		// uvec2 -> buff ref and assign to packed
+	uvec2 v2 = uvec2(SSBO(b + 32));			// uvec2 -> buff ref -> uvec2
+	vec3 v3 = SSBO(v2).a1;					// uvec2 -> buff ref and assign from packed
+	SSBO(v2).a1 = v3 + 1.0;					// uvec2 -> buff ref and assign to packed
+}
diff --git a/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp b/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
new file mode 100644
index 00000000000..2bb19eedad2
--- /dev/null
+++ b/shaders-msl/comp/buffer_device_address-recursive-struct-pointers.msl23.comp
@@ -0,0 +1,64 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(buffer_reference) buffer t21;
+layout(buffer_reference, buffer_reference_align = 16, std140) buffer t21
+{
+    int m0[2];
+    int m1;
+    layout(row_major) t21 m2[2];
+    layout(row_major) t21 m3;
+    layout(row_major) mat2 m4;
+};
+
+layout(set = 0, binding = 1, std140) uniform t24
+{
+    int m0[2];
+    int m1;
+    layout(row_major) t21 m2[2];
+    layout(row_major) t21 m3;
+    layout(row_major) mat2 m4;
+} u24;
+
+layout(push_constant, std430) uniform t35
+{
+    int m0[32];
+} u35;
+
+layout(set = 0, binding = 0, r32ui) uniform writeonly uimage2D v295;
+
+void main()
+{
+    int v8 = 0;
+    v8 |= (u24.m0[0] - 0);
+    v8 |= (u24.m0[u35.m0[1]] - 1);
+    v8 |= (u24.m1 - 2);
+    v8 |= int(u24.m4[0].x - 3.0);
+    v8 |= int(u24.m4[0].y - 5.0);
+    v8 |= int(u24.m4[1].x - 4.0);
+    v8 |= int(u24.m4[1].y - 6.0);
+    v8 |= (u24.m2[0].m0[0] - 3);
+    v8 |= (u24.m2[0].m0[u35.m0[1]] - 4);
+    v8 |= (u24.m2[0].m1 - 5);
+    v8 |= int(u24.m2[0].m4[0].x - 6.0);
+    v8 |= int(u24.m2[0].m4[0].y - 8.0);
+    v8 |= int(u24.m2[0].m4[1].x - 7.0);
+    v8 |= int(u24.m2[0].m4[1].y - 9.0);
+    v8 |= (u24.m2[u35.m0[1]].m0[0] - 6);
+    v8 |= (u24.m2[u35.m0[1]].m0[u35.m0[1]] - 7);
+    v8 |= (u24.m2[u35.m0[1]].m1 - 8);
+    v8 |= int(u24.m2[u35.m0[1]].m4[0].x - 9.0);
+    v8 |= int(u24.m2[u35.m0[1]].m4[0].y - 11.0);
+    v8 |= int(u24.m2[u35.m0[1]].m4[1].x - 10.0);
+    v8 |= int(u24.m2[u35.m0[1]].m4[1].y - 12.0);
+    v8 |= (u24.m3.m0[0] - 9);
+    v8 |= (u24.m3.m0[u35.m0[1]] - 10);
+    v8 |= (u24.m3.m1 - 11);
+    v8 |= int(u24.m3.m4[0].x - 12.0);
+    v8 |= int(u24.m3.m4[0].y - 14.0);
+    v8 |= int(u24.m3.m4[1].x - 13.0);
+    v8 |= int(u24.m3.m4[1].y - 15.0);
+    uvec4 v284 = mix(uvec4(1u, 0u, 0u, 1u), uvec4(0u), bvec4(v8 != 0));
+    imageStore(v295, ivec2(gl_GlobalInvocationID.xy), v284);
+}
diff --git a/shaders-msl/comp/buffer_device_address.msl2.comp b/shaders-msl/comp/buffer_device_address.msl2.comp
new file mode 100644
index 00000000000..14ac1ef9dc0
--- /dev/null
+++ b/shaders-msl/comp/buffer_device_address.msl2.comp
@@ -0,0 +1,86 @@
+/* Copyright (c) 2021, Arm Limited and Contributors
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 the "License";
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#version 450
+
+// Allows buffer_reference.
+#extension GL_EXT_buffer_reference : require
+
+layout(local_size_x = 8, local_size_y = 8) in;
+
+// If we mark a buffer as buffer_reference, this is treated as a pointer type.
+// A variable with the type Position is a 64-bit pointer to the data within.
+// We can freely cast between pointer types if we wish, but that is not necessary in this sample.
+// buffer_reference_align is used to let the underlying implementation know which alignment to expect.
+// The pointer can have scalar alignment, which is something the compiler cannot know unless you tell it.
+// It is best to use vector alignment when you can for optimal performance, but scalar alignment is sometimes useful.
+// With SSBOs, the API has a minimum offset alignment which guarantees a minimum level of alignment from API side.
+
+// It is possible to forward reference a pointer, so you can contain a pointer to yourself inside a struct.
+// Useful if you need something like a linked list on the GPU.
+// Here it's not particularly useful, but something to know about.
+layout(buffer_reference) buffer Position;
+
+layout(std430, buffer_reference, buffer_reference_align = 8) writeonly buffer Position
+{
+    vec2 positions[];
+};
+
+layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer PositionReferences
+{
+    // This buffer contains an array of pointers to other buffers.
+    Position buffers[];
+};
+
+// In push constant we place a pointer to VBO pointers, spicy!
+// This way we don't need any descriptor sets, but there's nothing wrong with combining use of descriptor sets and buffer device addresses.
+// It is mostly done for convenience here.
+layout(push_constant) uniform Registers
+{
+    PositionReferences references;
+    // A buffer reference is 64-bit, so offset of fract_time is 8 bytes.
+    float fract_time;
+} registers;
+
+void main()
+{
+    // Every slice is a 8x8 grid of vertices which we update here in compute.
+    uvec2 local_offset = gl_GlobalInvocationID.xy;
+    uint local_index = local_offset.y * gl_WorkGroupSize.x * gl_NumWorkGroups.x + local_offset.x;
+    uint slice = gl_WorkGroupID.z;
+
+    restrict Position positions = registers.references.buffers[slice];
+
+    // This is a trivial wave-like function. Arbitrary for demonstration purposes.
+    const float TWO_PI = 3.1415628 * 2.0;
+    float offset = TWO_PI * fract(registers.fract_time + float(slice) * 0.1);
+
+    // Simple grid.
+    vec2 pos = vec2(local_offset);
+
+    // Wobble, wobble.
+    pos.x += 0.2 * sin(2.2 * pos.x + offset);
+    pos.y += 0.2 * sin(2.25 * pos.y + 2.0 * offset);
+    pos.x += 0.2 * cos(1.8 * pos.y + 3.0 * offset);
+    pos.y += 0.2 * cos(2.85 * pos.x + 4.0 * offset);
+    pos.x += 0.5 * sin(offset);
+    pos.y += 0.5 * sin(offset + 0.3);
+
+    // Center the mesh in [-0.5, 0.5] range.
+    // Here we write to a raw pointer.
+    // Be aware, there is no robustness support for buffer_device_address since we don't have a complete descriptor!
+    positions.positions[local_index] = pos / (vec2(gl_WorkGroupSize.xy) * vec2(gl_NumWorkGroups.xy) - 1.0) - 0.5;
+}
diff --git a/shaders-msl/comp/complex-composite-constant-array.comp b/shaders-msl/comp/complex-composite-constant-array.comp
new file mode 100644
index 00000000000..96a3f8951d0
--- /dev/null
+++ b/shaders-msl/comp/complex-composite-constant-array.comp
@@ -0,0 +1,19 @@
+#version 450
+
+layout(std430, set = 0, binding = 0) buffer SSBO
+{
+	mat4 a;
+	uint index;
+};
+
+const mat4 as[] = mat4[](mat4(1.0), mat4(2.0));
+
+void write_global()
+{
+	a = as[index];
+}
+
+void main()
+{
+	write_global();
+}
diff --git a/shaders-msl/comp/composite-array-initialization.force-native-array.comp b/shaders-msl/comp/composite-array-initialization.force-native-array.comp
new file mode 100644
index 00000000000..1ecf4bcd406
--- /dev/null
+++ b/shaders-msl/comp/composite-array-initialization.force-native-array.comp
@@ -0,0 +1,28 @@
+#version 450
+layout(local_size_x = 2) in;
+
+struct Data
+{
+	float a;
+	float b;
+};
+
+layout(std430, binding = 0) buffer SSBO
+{
+	Data outdata[];
+};
+
+layout(constant_id = 0) const float X = 4.0;
+
+Data data[2] = Data[](Data(1.0, 2.0), Data(3.0, 4.0));
+Data data2[2] = Data[](Data(X, 2.0), Data(3.0, 5.0));
+
+Data combine(Data a, Data b)
+{
+	return Data(a.a + b.a, a.b + b.b);
+}
+
+void main()
+{
+	outdata[gl_WorkGroupID.x] = combine(data[gl_LocalInvocationID.x], data2[gl_LocalInvocationID.x]);
+}
diff --git a/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp b/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp
new file mode 100644
index 00000000000..edf87195b86
--- /dev/null
+++ b/shaders-msl/comp/copy-array-of-arrays.force-native-array.comp
@@ -0,0 +1,21 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0, std430) buffer BUF
+{
+	int a;
+	float b;
+	float c;
+} o;
+
+void main()
+{
+	const float a[2][2][2] = float[][][](float[][](float[](1.0, 2.0), float[](3.0, 4.0)), float[][](float[](1.0, 2.0), float[](3.0, 4.0)));
+	float b[2][2][2] = a;
+	float c[2][2][2] = b;
+	o.a = int(c[1][1][1]);
+
+	float d[2][2][2] = float[][][](float[][](float[](o.b, o.c), float[](o.b, o.b)), float[][](float[](o.c, o.c), float[](o.c, o.b)));
+	float e[2][2][2] = d;
+	o.b = e[1][0][1];
+}
diff --git a/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp b/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp
new file mode 100644
index 00000000000..862cd212978
--- /dev/null
+++ b/shaders-msl/comp/image-atomic-automatic-bindings.argument.msl2.comp
@@ -0,0 +1,16 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0, r32ui) uniform uimage2D uImage;
+layout(set = 0, binding = 1) uniform sampler2D uTexture;
+
+layout(set = 0, binding = 2) buffer SSBO
+{
+	vec4 outdata;
+};
+
+void main()
+{
+	uint ret = imageAtomicAdd(uImage, ivec2(gl_GlobalInvocationID.xy), 10u);
+	outdata = textureLod(uTexture, vec2(gl_GlobalInvocationID.xy), 0.0) + float(ret);
+}
diff --git a/shaders-msl/comp/image-atomic-automatic-bindings.comp b/shaders-msl/comp/image-atomic-automatic-bindings.comp
new file mode 100644
index 00000000000..862cd212978
--- /dev/null
+++ b/shaders-msl/comp/image-atomic-automatic-bindings.comp
@@ -0,0 +1,16 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0, r32ui) uniform uimage2D uImage;
+layout(set = 0, binding = 1) uniform sampler2D uTexture;
+
+layout(set = 0, binding = 2) buffer SSBO
+{
+	vec4 outdata;
+};
+
+void main()
+{
+	uint ret = imageAtomicAdd(uImage, ivec2(gl_GlobalInvocationID.xy), 10u);
+	outdata = textureLod(uTexture, vec2(gl_GlobalInvocationID.xy), 0.0) + float(ret);
+}
diff --git a/shaders-msl/comp/mat3-row-maj-read-write-const.comp b/shaders-msl/comp/mat3-row-maj-read-write-const.comp
new file mode 100644
index 00000000000..068ad79721c
--- /dev/null
+++ b/shaders-msl/comp/mat3-row-maj-read-write-const.comp
@@ -0,0 +1,17 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 1, std430) buffer model_t
+{
+    layout(row_major) mediump mat3 mtx_rm;
+} model;
+
+void main()
+{
+    mat3 mtx_cm = model.mtx_rm;
+    mat3 mtx1 = mtx_cm * mat3(vec3(4.0, -3.0, 1.0), vec3(-7.0, 7.0, -7.0), vec3(-5.0, 6.0, -8.0));
+    if (mtx1[0][0] != 0.0)
+    {
+	    model.mtx_rm = mat3(vec3(-5.0, -3.0, -5.0), vec3(-2.0, 2.0, -5.0), vec3(6.0, 3.0, -8.0));
+    }
+}
diff --git a/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp b/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp
new file mode 100644
index 00000000000..eea6a3df46b
--- /dev/null
+++ b/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.device-argument-buffer.msl2.comp
@@ -0,0 +1,114 @@
+#version 450
+layout(local_size_x = 64) in;
+
+layout(set = 0, binding = 0) buffer SSBO_A
+{
+	float data[];
+} ssbo_a;
+
+layout(set = 0, binding = 0) buffer SSBO_B
+{
+	uvec2 data[];
+} ssbo_b;
+
+layout(set = 0, binding = 0) readonly buffer SSBO_BRO
+{
+	uvec2 data[];
+} ssbo_b_readonly;
+
+layout(set = 0, binding = 1) uniform UBO_C
+{
+	float data[1024];
+} ubo_c;
+
+layout(set = 0, binding = 1) uniform UBO_D
+{
+	uvec2 data[1024];
+} ubo_d;
+
+layout(set = 0, binding = 2) buffer SSBO_As 
+{
+	float data[];
+} ssbo_as[4];
+
+layout(set = 0, binding = 2) buffer SSBO_Bs
+{
+	uvec2 data[1024];
+} ssbo_bs[4];
+
+layout(set = 0, binding = 2) readonly buffer SSBO_BsRO
+{
+	uvec2 data[1024];
+} ssbo_bs_readonly[4];
+
+layout(set = 0, binding = 3) uniform UBO_Cs
+{
+	float data[1024];
+} ubo_cs[4];
+
+layout(set = 0, binding = 3) uniform UBO_Ds
+{
+	uvec2 data[1024];
+} ubo_ds[4];
+
+layout(set = 2, binding = 0) buffer SSBO_E
+{
+	float data[];
+} ssbo_e;
+
+layout(set = 2, binding = 0) buffer SSBO_F
+{
+	uvec2 data[];
+} ssbo_f;
+
+layout(set = 2, binding = 1) uniform UBO_G
+{
+	float data[1024];
+} ubo_g;
+
+layout(set = 2, binding = 1) uniform UBO_H
+{
+	uvec2 data[1024];
+} ubo_h;
+
+layout(set = 2, binding = 0) readonly buffer SSBO_I
+{
+	uvec2 data[];
+} ssbo_i;
+
+layout(push_constant) uniform Registers
+{
+	float reg;
+};
+
+void func0()
+{
+	ssbo_a.data[gl_GlobalInvocationID.x] = ubo_c.data[gl_WorkGroupID.x] + reg;
+	ssbo_b.data[gl_GlobalInvocationID.x] =
+		ubo_d.data[gl_WorkGroupID.y] + ssbo_b_readonly.data[gl_GlobalInvocationID.x];
+}
+
+void func1()
+{
+	ssbo_as[gl_WorkGroupID.x].data[gl_GlobalInvocationID.x] = ubo_cs[gl_WorkGroupID.x].data[0];
+}
+
+void func2()
+{
+	ssbo_bs[gl_WorkGroupID.x].data[gl_GlobalInvocationID.x] =
+		ubo_ds[gl_WorkGroupID.x].data[0] + ssbo_bs_readonly[gl_WorkGroupID.x].data[gl_GlobalInvocationID.x];
+}
+
+void func3()
+{
+	ssbo_e.data[gl_GlobalInvocationID.x] = ubo_g.data[gl_WorkGroupID.x];
+	ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y] + ssbo_i.data[gl_GlobalInvocationID.x];
+}
+
+void main()
+{
+	func0();
+	func1();
+	func2();
+	func3();
+}
diff --git a/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp b/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp
new file mode 100644
index 00000000000..eea6a3df46b
--- /dev/null
+++ b/shaders-msl/comp/raw-buffer-descriptor-aliasing.argument.discrete.msl2.comp
@@ -0,0 +1,114 @@
+#version 450
+layout(local_size_x = 64) in;
+
+layout(set = 0, binding = 0) buffer SSBO_A
+{
+	float data[];
+} ssbo_a;
+
+layout(set = 0, binding = 0) buffer SSBO_B
+{
+	uvec2 data[];
+} ssbo_b;
+
+layout(set = 0, binding = 0) readonly buffer SSBO_BRO
+{
+	uvec2 data[];
+} ssbo_b_readonly;
+
+layout(set = 0, binding = 1) uniform UBO_C
+{
+	float data[1024];
+} ubo_c;
+
+layout(set = 0, binding = 1) uniform UBO_D
+{
+	uvec2 data[1024];
+} ubo_d;
+
+layout(set = 0, binding = 2) buffer SSBO_As 
+{
+	float data[];
+} ssbo_as[4];
+
+layout(set = 0, binding = 2) buffer SSBO_Bs
+{
+	uvec2 data[1024];
+} ssbo_bs[4];
+
+layout(set = 0, binding = 2) readonly buffer SSBO_BsRO
+{
+	uvec2 data[1024];
+} ssbo_bs_readonly[4];
+
+layout(set = 0, binding = 3) uniform UBO_Cs
+{
+	float data[1024];
+} ubo_cs[4];
+
+layout(set = 0, binding = 3) uniform UBO_Ds
+{
+	uvec2 data[1024];
+} ubo_ds[4];
+
+layout(set = 2, binding = 0) buffer SSBO_E
+{
+	float data[];
+} ssbo_e;
+
+layout(set = 2, binding = 0) buffer SSBO_F
+{
+	uvec2 data[];
+} ssbo_f;
+
+layout(set = 2, binding = 1) uniform UBO_G
+{
+	float data[1024];
+} ubo_g;
+
+layout(set = 2, binding = 1) uniform UBO_H
+{
+	uvec2 data[1024];
+} ubo_h;
+
+layout(set = 2, binding = 0) readonly buffer SSBO_I
+{
+	uvec2 data[];
+} ssbo_i;
+
+layout(push_constant) uniform Registers
+{
+	float reg;
+};
+
+void func0()
+{
+	ssbo_a.data[gl_GlobalInvocationID.x] = ubo_c.data[gl_WorkGroupID.x] + reg;
+	ssbo_b.data[gl_GlobalInvocationID.x] =
+		ubo_d.data[gl_WorkGroupID.y] + ssbo_b_readonly.data[gl_GlobalInvocationID.x];
+}
+
+void func1()
+{
+	ssbo_as[gl_WorkGroupID.x].data[gl_GlobalInvocationID.x] = ubo_cs[gl_WorkGroupID.x].data[0];
+}
+
+void func2()
+{
+	ssbo_bs[gl_WorkGroupID.x].data[gl_GlobalInvocationID.x] =
+		ubo_ds[gl_WorkGroupID.x].data[0] + ssbo_bs_readonly[gl_WorkGroupID.x].data[gl_GlobalInvocationID.x];
+}
+
+void func3()
+{
+	ssbo_e.data[gl_GlobalInvocationID.x] = ubo_g.data[gl_WorkGroupID.x];
+	ssbo_f.data[gl_GlobalInvocationID.x] = ubo_h.data[gl_WorkGroupID.y] + ssbo_i.data[gl_GlobalInvocationID.x];
+}
+
+void main()
+{
+	func0();
+	func1();
+	func2();
+	func3();
+}
diff --git a/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp b/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp
new file mode 100644
index 00000000000..fba72ad0d2d
--- /dev/null
+++ b/shaders-msl/comp/ray-query.spv14.vk.ios.msl24..invalid.comp
@@ -0,0 +1,58 @@
+#version 460
+#extension GL_EXT_ray_query : require
+#extension GL_EXT_ray_tracing : require
+#extension GL_EXT_ray_flags_primitive_culling : require
+layout(primitive_culling);
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT AS0;
+layout(set = 0, binding = 1) uniform accelerationStructureEXT AS1;
+
+layout(set = 0, binding = 2) uniform Params
+{
+	uint ray_flags;
+	uint cull_mask;
+	vec3 origin;
+	float tmin;
+	vec3 dir;
+	float tmax;
+	float thit;
+};
+
+rayQueryEXT q2[2];
+
+void main()
+{
+	rayQueryEXT q;
+	bool res;
+	uint type;
+	float fval;
+	vec3 fvals;
+	int ival;
+	mat4x3 matrices;
+
+	rayQueryInitializeEXT(q, AS0, ray_flags, cull_mask, origin, tmin, dir, tmax);
+	rayQueryInitializeEXT(q2[1], AS1, ray_flags, cull_mask, origin, tmin, dir, tmax);
+
+	res = rayQueryProceedEXT(q);
+	rayQueryTerminateEXT(q2[0]);
+	rayQueryGenerateIntersectionEXT(q, thit);
+	rayQueryConfirmIntersectionEXT(q2[1]);
+	fval = rayQueryGetRayTMinEXT(q);
+	fvals = rayQueryGetWorldRayDirectionEXT(q);
+	fvals = rayQueryGetWorldRayOriginEXT(q);
+	type = rayQueryGetIntersectionTypeEXT(q2[1], true);
+	type = rayQueryGetIntersectionTypeEXT(q2[0], false);
+	res = rayQueryGetIntersectionCandidateAABBOpaqueEXT(q2[1]);
+	fval = rayQueryGetIntersectionTEXT(q2[1], true);
+	fval = rayQueryGetIntersectionTEXT(q2[1], false);
+	ival = rayQueryGetIntersectionInstanceCustomIndexEXT(q, true);
+	ival = rayQueryGetIntersectionInstanceIdEXT(q2[0], false);
+	ival = rayQueryGetIntersectionGeometryIndexEXT(q2[1], false);
+	ival = rayQueryGetIntersectionPrimitiveIndexEXT(q, true);
+	fvals.xy = rayQueryGetIntersectionBarycentricsEXT(q2[0], false);
+	res = rayQueryGetIntersectionFrontFaceEXT(q, true);
+	fvals = rayQueryGetIntersectionObjectRayDirectionEXT(q, false);
+	fvals = rayQueryGetIntersectionObjectRayOriginEXT(q2[0], true);
+	matrices = rayQueryGetIntersectionObjectToWorldEXT(q, false);
+	matrices = rayQueryGetIntersectionWorldToObjectEXT(q2[1], true);
+}
diff --git a/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
index 37414737fd2..63546322981 100644
--- a/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
+++ b/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
@@ -8,6 +8,7 @@ layout(std430, set = 0, binding = 0) buffer SSBO
 	float c;
 	float d;
 	float e;
+	float f;
 };
 
 void main()
@@ -15,4 +16,5 @@ void main()
 	c = distance(a, b);
 	d = length(a);
 	e = normalize(a);
+	f = distance(a-1, b-2);
 }
diff --git a/shaders-msl/comp/shared-matrix-array-of-array.comp b/shaders-msl/comp/shared-matrix-array-of-array.comp
new file mode 100644
index 00000000000..3bbd4c0f0c3
--- /dev/null
+++ b/shaders-msl/comp/shared-matrix-array-of-array.comp
@@ -0,0 +1,65 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, binding = 0) buffer block { highp uint passed; };
+struct S1 {
+	mediump mat4x3 a[2];
+	lowp float b;
+	lowp vec2 c[3];
+};
+struct S2 {
+	highp ivec4 a;
+	bool b[3][1][3];
+};
+
+bool compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05; }
+bool compare_vec2     (highp vec2 a, highp vec2 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); }
+bool compare_vec3     (highp vec3 a, highp vec3 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z); }
+bool compare_mat4x3   (highp mat4x3 a, highp mat4x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2])&&compare_vec3(a[3], b[3]); }
+bool compare_ivec4    (highp ivec4 a, highp ivec4 b)  { return a == b; }
+bool compare_bool     (bool a, bool b)                { return a == b; }
+
+shared S1 s1;
+shared S2 s2;
+
+void main (void) {
+	s1.a[0] = mat4x3(0.0, 2.0, -8.0, 6.0, 7.0, 5.0, -6.0, 1.0, 9.0, -4.0, -3.0, 4.0);
+	s1.a[1] = mat4x3(4.0, 9.0, -9.0, -8.0, -9.0, 8.0, 0.0, 4.0, -4.0, 7.0, 2.0, -1.0);
+	s1.b = 7.0;
+	s1.c[0] = vec2(-5.0, -4.0);
+	s1.c[1] = vec2(3.0, -5.0);
+	s1.c[2] = vec2(-3.0, -1.0);
+	s2.a = ivec4(1, 0, -3, 1);
+	s2.b[0][0][0] = true;
+	s2.b[0][0][1] = false;
+	s2.b[0][0][2] = false;
+	s2.b[1][0][0] = true;
+	s2.b[1][0][1] = false;
+	s2.b[1][0][2] = true;
+	s2.b[2][0][0] = false;
+	s2.b[2][0][1] = true;
+	s2.b[2][0][2] = true;
+
+	barrier();
+	memoryBarrier();
+	bool allOk = true;
+	allOk = allOk && compare_mat4x3(mat4x3(0.0, 2.0, -8.0, 6.0, 7.0, 5.0, -6.0, 1.0, 9.0, -4.0, -3.0, 4.0), s1.a[0]);
+	allOk = allOk && compare_mat4x3(mat4x3(4.0, 9.0, -9.0, -8.0, -9.0, 8.0, 0.0, 4.0, -4.0, 7.0, 2.0, -1.0), s1.a[1]);
+	allOk = allOk && compare_float(7.0, s1.b);
+	allOk = allOk && compare_vec2(vec2(-5.0, -4.0), s1.c[0]);
+	allOk = allOk && compare_vec2(vec2(3.0, -5.0), s1.c[1]);
+	allOk = allOk && compare_vec2(vec2(-3.0, -1.0), s1.c[2]);
+	allOk = allOk && compare_ivec4(ivec4(1, 0, -3, 1), s2.a);
+	allOk = allOk && compare_bool(true, s2.b[0][0][0]);
+	allOk = allOk && compare_bool(false, s2.b[0][0][1]);
+	allOk = allOk && compare_bool(false, s2.b[0][0][2]);
+	allOk = allOk && compare_bool(true, s2.b[1][0][0]);
+	allOk = allOk && compare_bool(false, s2.b[1][0][1]);
+	allOk = allOk && compare_bool(true, s2.b[1][0][2]);
+	allOk = allOk && compare_bool(false, s2.b[2][0][0]);
+	allOk = allOk && compare_bool(true, s2.b[2][0][1]);
+	allOk = allOk && compare_bool(true, s2.b[2][0][2]);
+	if (allOk)
+		passed++;
+
+}
diff --git a/shaders-msl/comp/shared-matrix-cast.comp b/shaders-msl/comp/shared-matrix-cast.comp
new file mode 100644
index 00000000000..7e46fed7ae2
--- /dev/null
+++ b/shaders-msl/comp/shared-matrix-cast.comp
@@ -0,0 +1,33 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, binding = 0) buffer block { highp uint passed; };
+struct S1 {
+	mediump vec4 a;
+	highp mat3x2 b;
+	bvec4 c;
+};
+
+bool compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05; }
+bool compare_vec2     (highp vec2 a, highp vec2 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); }
+bool compare_vec4     (highp vec4 a, highp vec4 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z)&&compare_float(a.w, b.w); }
+bool compare_mat3x2   (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); }
+bool compare_bvec4    (bvec4 a, bvec4 b)              { return a == b; }
+
+shared S1 s1;
+
+void main (void) {
+	s1.a = vec4(1.0, -5.0, -9.0, -5.0);
+	s1.b = mat3x2(1.0, -7.0, 1.0, 2.0, 8.0, 7.0);
+	s1.c = bvec4(false, true, false, false);
+
+	barrier();
+	memoryBarrier();
+	bool allOk = true;
+	allOk = allOk && compare_vec4(vec4(1.0, -5.0, -9.0, -5.0), s1.a);
+	allOk = allOk && compare_mat3x2(mat3x2(1.0, -7.0, 1.0, 2.0, 8.0, 7.0), s1.b);
+	allOk = allOk && compare_bvec4(bvec4(false, true, false, false), s1.c);
+	if (allOk)
+		passed++;
+
+}
diff --git a/shaders-msl/comp/shared-matrix-nested-struct-array.comp b/shaders-msl/comp/shared-matrix-nested-struct-array.comp
new file mode 100644
index 00000000000..59ab24d8480
--- /dev/null
+++ b/shaders-msl/comp/shared-matrix-nested-struct-array.comp
@@ -0,0 +1,87 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, binding = 0) buffer block { highp uint passed; };
+struct sA
+{
+	mediump mat2x3 mA;
+};
+struct sB
+{
+	mediump mat2 mA;
+	mediump mat3x2 mB;
+	highp uvec3 mC;
+};
+struct sC
+{
+	sA mA;
+	sB mB;
+};
+struct sD
+{
+	sC mA;
+};
+struct sE
+{
+	lowp mat3x2 mA;
+	lowp mat4x3 mB;
+};
+struct sF
+{
+	sE mA;
+};
+struct sG
+{
+	sF mA;
+};
+struct sH
+{
+	bvec3 mA[2];
+};
+struct S1 {
+	sD a;
+	sG b;
+	sH c[2];
+};
+
+bool compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05; }
+bool compare_vec2     (highp vec2 a, highp vec2 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); }
+bool compare_vec3     (highp vec3 a, highp vec3 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z); }
+bool compare_mat2     (highp mat2 a, highp mat2 b)    { return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1]); }
+bool compare_mat2x3   (highp mat2x3 a, highp mat2x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1]); }
+bool compare_mat3x2   (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); }
+bool compare_mat4x3   (highp mat4x3 a, highp mat4x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2])&&compare_vec3(a[3], b[3]); }
+bool compare_uvec3    (highp uvec3 a, highp uvec3 b)  { return a == b; }
+bool compare_bvec3    (bvec3 a, bvec3 b)              { return a == b; }
+
+shared S1 s1;
+
+void main (void) {
+	s1.a.mA.mA.mA = mat2x3(6.0, 8.0, 8.0, 0.0, -4.0, -5.0);
+	s1.a.mA.mB.mA = mat2(9.0, -4.0, -6.0, -1.0);
+	s1.a.mA.mB.mB = mat3x2(-1.0, -2.0, 1.0, 6.0, 5.0, 7.0);
+	s1.a.mA.mB.mC = uvec3(3u, 1u, 5u);
+	s1.b.mA.mA.mA = mat3x2(8.0, 3.0, 0.0, 2.0, 1.0, 8.0);
+	s1.b.mA.mA.mB = mat4x3(0.0, 9.0, -1.0, -1.0, -7.0, 7.0, -4.0, -3.0, 1.0, -4.0, -9.0, 1.0);
+	s1.c[0].mA[0] = bvec3(true, false, false);
+	s1.c[0].mA[1] = bvec3(true, false, false);
+	s1.c[1].mA[0] = bvec3(false, false, false);
+	s1.c[1].mA[1] = bvec3(false, false, false);
+
+	barrier();
+	memoryBarrier();
+	bool allOk = true;
+	allOk = allOk && compare_mat2x3(mat2x3(6.0, 8.0, 8.0, 0.0, -4.0, -5.0), s1.a.mA.mA.mA);
+	allOk = allOk && compare_mat2(mat2(9.0, -4.0, -6.0, -1.0), s1.a.mA.mB.mA);
+	allOk = allOk && compare_mat3x2(mat3x2(-1.0, -2.0, 1.0, 6.0, 5.0, 7.0), s1.a.mA.mB.mB);
+	allOk = allOk && compare_uvec3(uvec3(3u, 1u, 5u), s1.a.mA.mB.mC);
+	allOk = allOk && compare_mat3x2(mat3x2(8.0, 3.0, 0.0, 2.0, 1.0, 8.0), s1.b.mA.mA.mA);
+	allOk = allOk && compare_mat4x3(mat4x3(0.0, 9.0, -1.0, -1.0, -7.0, 7.0, -4.0, -3.0, 1.0, -4.0, -9.0, 1.0), s1.b.mA.mA.mB);
+	allOk = allOk && compare_bvec3(bvec3(true, false, false), s1.c[0].mA[0]);
+	allOk = allOk && compare_bvec3(bvec3(true, false, false), s1.c[0].mA[1]);
+	allOk = allOk && compare_bvec3(bvec3(false, false, false), s1.c[1].mA[0]);
+	allOk = allOk && compare_bvec3(bvec3(false, false, false), s1.c[1].mA[1]);
+	if (allOk)
+		passed++;
+
+}
diff --git a/shaders-msl/comp/shared-matrix-nested-struct.comp b/shaders-msl/comp/shared-matrix-nested-struct.comp
new file mode 100644
index 00000000000..c481f54a860
--- /dev/null
+++ b/shaders-msl/comp/shared-matrix-nested-struct.comp
@@ -0,0 +1,141 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, binding = 0) buffer block { highp uint passed; };
+struct sA
+{
+	highp mat4 mA;
+	bvec3 mB;
+	bvec4 mC;
+};
+struct sB
+{
+	bvec2 mA;
+};
+struct sC
+{
+	highp float mA;
+	mediump uvec4 mB;
+	mediump float mC;
+};
+struct sD
+{
+	sA mA;
+	sB mB;
+	sC mC;
+};
+struct sE
+{
+	sD mA;
+};
+struct sF
+{
+	lowp uvec3 mA;
+	bool mB;
+};
+struct sG
+{
+	sF mA;
+	highp mat3x2 mB;
+};
+struct sH
+{
+	sG mA;
+	mediump vec2 mB;
+};
+struct sI
+{
+	mediump mat2 mA;
+	bvec3 mB;
+	bvec4 mC;
+};
+struct sJ
+{
+	sI mA;
+	bvec3 mB;
+};
+struct sK
+{
+	bvec2 mA;
+	sJ mB;
+	mediump ivec2 mC;
+};
+struct S1 {
+	lowp uint a;
+	mediump vec4 b;
+};
+struct S2 {
+	sE a;
+	highp ivec3 b;
+	sH c;
+	sK d;
+};
+
+bool compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05; }
+bool compare_vec2     (highp vec2 a, highp vec2 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); }
+bool compare_vec4     (highp vec4 a, highp vec4 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z)&&compare_float(a.w, b.w); }
+bool compare_mat2     (highp mat2 a, highp mat2 b)    { return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1]); }
+bool compare_mat3x2   (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); }
+bool compare_mat4     (highp mat4 a, highp mat4 b)    { return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1])&&compare_vec4(a[2], b[2])&&compare_vec4(a[3], b[3]); }
+bool compare_ivec2    (highp ivec2 a, highp ivec2 b)  { return a == b; }
+bool compare_ivec3    (highp ivec3 a, highp ivec3 b)  { return a == b; }
+bool compare_uint     (highp uint a, highp uint b)    { return a == b; }
+bool compare_uvec3    (highp uvec3 a, highp uvec3 b)  { return a == b; }
+bool compare_uvec4    (highp uvec4 a, highp uvec4 b)  { return a == b; }
+bool compare_bool     (bool a, bool b)                { return a == b; }
+bool compare_bvec2    (bvec2 a, bvec2 b)              { return a == b; }
+bool compare_bvec3    (bvec3 a, bvec3 b)              { return a == b; }
+bool compare_bvec4    (bvec4 a, bvec4 b)              { return a == b; }
+
+shared S1 s1;
+shared S2 s2;
+
+void main (void) {
+	s1.a = 0u;
+	s1.b = vec4(8.0, 8.0, 0.0, -4.0);
+	s2.a.mA.mA.mA = mat4(-5.0, 9.0, -4.0, -6.0, -1.0, -1.0, -2.0, 1.0, 6.0, 5.0, 7.0, -2.0, -4.0, -9.0, 8.0, 3.0);
+	s2.a.mA.mA.mB = bvec3(true, false, false);
+	s2.a.mA.mA.mC = bvec4(true, true, true, false);
+	s2.a.mA.mB.mA = bvec2(true, true);
+	s2.a.mA.mC.mA = 7.0;
+	s2.a.mA.mC.mB = uvec4(8u, 6u, 2u, 0u);
+	s2.a.mA.mC.mC = -9.0;
+	s2.b = ivec3(1, -4, 0);
+	s2.c.mA.mA.mA = uvec3(4u, 9u, 1u);
+	s2.c.mA.mA.mB = false;
+	s2.c.mA.mB = mat3x2(3.0, -5.0, -1.0, -5.0, -1.0, -9.0);
+	s2.c.mB = vec2(-6.0, -9.0);
+	s2.d.mA = bvec2(true, false);
+	s2.d.mB.mA.mA = mat2(-2.0, 3.0, 7.0, 2.0);
+	s2.d.mB.mA.mB = bvec3(false, false, false);
+	s2.d.mB.mA.mC = bvec4(false, false, false, true);
+	s2.d.mB.mB = bvec3(true, false, false);
+	s2.d.mC = ivec2(-9, 0);
+
+	barrier();
+	memoryBarrier();
+	bool allOk = true;
+	allOk = allOk && compare_uint(0u, s1.a);
+	allOk = allOk && compare_vec4(vec4(8.0, 8.0, 0.0, -4.0), s1.b);
+	allOk = allOk && compare_mat4(mat4(-5.0, 9.0, -4.0, -6.0, -1.0, -1.0, -2.0, 1.0, 6.0, 5.0, 7.0, -2.0, -4.0, -9.0, 8.0, 3.0), s2.a.mA.mA.mA);
+	allOk = allOk && compare_bvec3(bvec3(true, false, false), s2.a.mA.mA.mB);
+	allOk = allOk && compare_bvec4(bvec4(true, true, true, false), s2.a.mA.mA.mC);
+	allOk = allOk && compare_bvec2(bvec2(true, true), s2.a.mA.mB.mA);
+	allOk = allOk && compare_float(7.0, s2.a.mA.mC.mA);
+	allOk = allOk && compare_uvec4(uvec4(8u, 6u, 2u, 0u), s2.a.mA.mC.mB);
+	allOk = allOk && compare_float(-9.0, s2.a.mA.mC.mC);
+	allOk = allOk && compare_ivec3(ivec3(1, -4, 0), s2.b);
+	allOk = allOk && compare_uvec3(uvec3(4u, 9u, 1u), s2.c.mA.mA.mA);
+	allOk = allOk && compare_bool(false, s2.c.mA.mA.mB);
+	allOk = allOk && compare_mat3x2(mat3x2(3.0, -5.0, -1.0, -5.0, -1.0, -9.0), s2.c.mA.mB);
+	allOk = allOk && compare_vec2(vec2(-6.0, -9.0), s2.c.mB);
+	allOk = allOk && compare_bvec2(bvec2(true, false), s2.d.mA);
+	allOk = allOk && compare_mat2(mat2(-2.0, 3.0, 7.0, 2.0), s2.d.mB.mA.mA);
+	allOk = allOk && compare_bvec3(bvec3(false, false, false), s2.d.mB.mA.mB);
+	allOk = allOk && compare_bvec4(bvec4(false, false, false, true), s2.d.mB.mA.mC);
+	allOk = allOk && compare_bvec3(bvec3(true, false, false), s2.d.mB.mB);
+	allOk = allOk && compare_ivec2(ivec2(-9, 0), s2.d.mC);
+	if (allOk)
+		passed++;
+
+}
diff --git a/shaders-msl/comp/shared-struct-bool-cast.comp b/shaders-msl/comp/shared-struct-bool-cast.comp
new file mode 100644
index 00000000000..d6479b3e446
--- /dev/null
+++ b/shaders-msl/comp/shared-struct-bool-cast.comp
@@ -0,0 +1,35 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std140, binding = 0) buffer block { highp uint passed; };
+struct S1 {
+	mediump ivec3 a;
+	highp uvec2 b;
+	bvec4 c;
+	mediump uint d;
+};
+
+bool compare_ivec3    (highp ivec3 a, highp ivec3 b)  { return a == b; }
+bool compare_uint     (highp uint a, highp uint b)    { return a == b; }
+bool compare_uvec2    (highp uvec2 a, highp uvec2 b)  { return a == b; }
+bool compare_bvec4    (bvec4 a, bvec4 b)              { return a == b; }
+
+shared S1 s1;
+
+void main (void) {
+	s1.a = ivec3(6, 8, 8);
+	s1.b = uvec2(4u, 4u);
+	s1.c = bvec4(false, false, false, true);
+	s1.d = 6u;
+
+	barrier();
+	memoryBarrier();
+	bool allOk = true;
+	allOk = allOk && compare_ivec3(ivec3(6, 8, 8), s1.a);
+	allOk = allOk && compare_uvec2(uvec2(4u, 4u), s1.b);
+	allOk = allOk && compare_bvec4(bvec4(false, false, false, true), s1.c);
+	allOk = allOk && compare_uint(6u, s1.d);
+	if (allOk)
+		passed++;
+
+}
diff --git a/shaders-msl/comp/threadgroup-boolean-workaround.comp b/shaders-msl/comp/threadgroup-boolean-workaround.comp
new file mode 100644
index 00000000000..8dce77a252c
--- /dev/null
+++ b/shaders-msl/comp/threadgroup-boolean-workaround.comp
@@ -0,0 +1,21 @@
+#version 450
+layout(local_size_x = 4) in;
+
+shared bvec4 foo[4];
+
+layout(binding = 0) buffer SSBO
+{
+	vec4 values[];
+};
+
+void in_function()
+{
+	foo[gl_LocalInvocationIndex] = notEqual(values[gl_GlobalInvocationID.x], vec4(10.0));
+	barrier();
+	values[gl_GlobalInvocationID.x] = mix(vec4(40.0), vec4(30.0), foo[gl_LocalInvocationIndex ^ 3]);
+}
+
+void main()
+{
+	in_function();
+}
diff --git a/shaders-msl/comp/type_casting_i64.msl22.comp b/shaders-msl/comp/type_casting_i64.msl22.comp
new file mode 100644
index 00000000000..45e682e586a
--- /dev/null
+++ b/shaders-msl/comp/type_casting_i64.msl22.comp
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_gpu_shader_int64 : require
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(constant_id = 0) const int base_val = 0;
+layout(constant_id = 1) const int64_t shift_val = 0;
+const int offset = base_val >> shift_val;
+
+layout(set = 0, binding = 0, std430) buffer src_buff_t
+{
+    int m0[];
+} src_buff;
+
+layout(set = 0, binding = 1, std430) buffer dst_buff_t
+{
+    int m0[];
+} dst_buff;
+
+void main()
+{
+    dst_buff.m0[gl_GlobalInvocationID.x] = src_buff.m0[gl_GlobalInvocationID.x] + offset;
+}
+
diff --git a/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc b/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc
new file mode 100644
index 00000000000..a258afb367b
--- /dev/null
+++ b/shaders-msl/desktop-only/tesc/basic.desktop.sso.multi-patch.tesc
@@ -0,0 +1,32 @@
+#version 450
+layout(vertices = 1) out;
+
+in gl_PerVertex
+{
+   vec4 gl_Position;
+} gl_in[gl_MaxPatchVertices];
+
+out gl_PerVertex
+{
+   vec4 gl_Position;
+} gl_out[1];
+
+layout(location = 0) patch out vec3 vFoo;
+
+void set_position()
+{
+    gl_out[gl_InvocationID].gl_Position = gl_in[0].gl_Position + gl_in[1].gl_Position;
+}
+
+void main()
+{
+    gl_TessLevelInner[0] = 8.9;
+    gl_TessLevelInner[1] = 6.9;
+    gl_TessLevelOuter[0] = 8.9;
+    gl_TessLevelOuter[1] = 6.9;
+    gl_TessLevelOuter[2] = 3.9;
+    gl_TessLevelOuter[3] = 4.9;
+    vFoo = vec3(1.0);
+
+    set_position();
+}
diff --git a/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc b/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc
new file mode 100644
index 00000000000..78d0d00cb68
--- /dev/null
+++ b/shaders-msl/desktop-only/tesc/struct-copy.desktop.sso.multi-patch.tesc
@@ -0,0 +1,22 @@
+#version 450
+
+struct Boo
+{
+        vec3 a;
+        uvec3 b;
+};
+
+layout(vertices = 4) out;
+layout(location = 0) out Boo vVertex[];
+layout(location = 0) in Boo vInput[];
+
+void main()
+{
+        vVertex[gl_InvocationID] = vInput[gl_InvocationID];
+        gl_TessLevelOuter[0] = 1.0;
+        gl_TessLevelOuter[1] = 2.0;
+        gl_TessLevelOuter[2] = 3.0;
+        gl_TessLevelOuter[3] = 4.0;
+        gl_TessLevelInner[0] = 1.0;
+        gl_TessLevelInner[1] = 2.0;
+}
diff --git a/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert b/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert
new file mode 100644
index 00000000000..9c0f1d5f369
--- /dev/null
+++ b/shaders-msl/desktop-only/vert/clip-cull-distance..no-user-varying.desktop.vert
@@ -0,0 +1,10 @@
+#version 450
+
+void main()
+{
+   gl_Position = vec4(10.0);
+   gl_ClipDistance[0] = 1.0;
+   gl_ClipDistance[1] = 4.0;
+   //gl_CullDistance[0] = 4.0;
+   //gl_CullDistance[1] = 9.0;
+}
diff --git a/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert b/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert
new file mode 100644
index 00000000000..fadd1e73bfd
--- /dev/null
+++ b/shaders-msl/desktop-only/vert/shader-draw-parameters.desktop.for-tess.vert
@@ -0,0 +1,11 @@
+#version 460
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+};
+
+void main()
+{
+    gl_Position = vec4(gl_BaseVertex, gl_BaseInstance, 0, 1);
+}
diff --git a/shaders-msl/frag/array-component-io.frag b/shaders-msl/frag/array-component-io.frag
new file mode 100644
index 00000000000..8d88249e78f
--- /dev/null
+++ b/shaders-msl/frag/array-component-io.frag
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 1, component = 0) out float A[2];
+layout(location = 1, component = 2) out vec2 B[2];
+layout(location = 0, component = 1) out float C[3];
+layout(location = 0, component = 3) out float D;
+
+layout(location = 1, component = 0) flat in float InA[2];
+layout(location = 1, component = 2) flat in vec2 InB[2];
+layout(location = 0, component = 1) flat in float InC[3];
+layout(location = 3, component = 1) sample in float InD;
+layout(location = 4, component = 2) noperspective in float InE;
+layout(location = 5, component = 3) centroid in float InF;
+
+void main()
+{
+	A = InA;
+	B = InB;
+	C = InC;
+	D = InD + InE + InF;
+}
diff --git a/shaders-msl/frag/array-of-array-lut.frag b/shaders-msl/frag/array-of-array-lut.frag
new file mode 100644
index 00000000000..c401a3fe372
--- /dev/null
+++ b/shaders-msl/frag/array-of-array-lut.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(location = 0) out float vOutput;
+layout(location = 0) flat in int vIndex1;
+layout(location = 1) flat in int vIndex2;
+
+const float FOO[2][3] = float[][](float[](1.0, 2.0, 3.0), float[](4.0, 5.0, 6.0));
+
+void main()
+{
+	vOutput = FOO[vIndex1][vIndex2];
+}
diff --git a/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag b/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag
new file mode 100644
index 00000000000..ec25ceb1c8f
--- /dev/null
+++ b/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.argument.discrete.swizzle.frag
@@ -0,0 +1,34 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler2D uSampler[4];
+layout(set = 0, binding = 1) uniform sampler2D uSamp;
+
+layout(set = 0, binding = 2) uniform UBO
+{
+	uint index;
+} uUBO;
+
+layout(set = 0, binding = 3) uniform UBO2
+{
+	uint index2;
+};
+
+layout(location = 0) in vec2 vUV;
+
+layout(location = 0) out vec4 FragColor;
+
+vec4 sample_in_func()
+{
+	return texture(uSampler[uUBO.index], vUV);
+}
+
+vec4 sample_single_in_func(sampler2D s)
+{
+	return texture(s, vUV);
+}
+
+void main()
+{
+	FragColor = sample_in_func();
+	FragColor += sample_single_in_func(uSampler[index2]);
+}
diff --git a/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag b/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag
new file mode 100644
index 00000000000..ec25ceb1c8f
--- /dev/null
+++ b/shaders-msl/frag/array-of-texture-swizzle-nonconstant-uniform.msl2.swizzle.frag
@@ -0,0 +1,34 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler2D uSampler[4];
+layout(set = 0, binding = 1) uniform sampler2D uSamp;
+
+layout(set = 0, binding = 2) uniform UBO
+{
+	uint index;
+} uUBO;
+
+layout(set = 0, binding = 3) uniform UBO2
+{
+	uint index2;
+};
+
+layout(location = 0) in vec2 vUV;
+
+layout(location = 0) out vec4 FragColor;
+
+vec4 sample_in_func()
+{
+	return texture(uSampler[uUBO.index], vUV);
+}
+
+vec4 sample_single_in_func(sampler2D s)
+{
+	return texture(s, vUV);
+}
+
+void main()
+{
+	FragColor = sample_in_func();
+	FragColor += sample_single_in_func(uSampler[index2]);
+}
diff --git a/shaders-msl/frag/basic.force-sample.frag b/shaders-msl/frag/basic.force-sample.frag
new file mode 100644
index 00000000000..dd9a8f85074
--- /dev/null
+++ b/shaders-msl/frag/basic.force-sample.frag
@@ -0,0 +1,13 @@
+#version 310 es
+precision mediump float;
+
+layout(location = 0) in vec4 vColor;
+layout(location = 1) in vec2 vTex;
+layout(binding = 0) uniform sampler2D uTex;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vColor * texture(uTex, vTex);
+}
+
diff --git a/shaders-msl/frag/bitcasting.1d-as-2d.frag b/shaders-msl/frag/bitcasting.1d-as-2d.frag
new file mode 100644
index 00000000000..adaa749f754
--- /dev/null
+++ b/shaders-msl/frag/bitcasting.1d-as-2d.frag
@@ -0,0 +1,23 @@
+#version 450
+
+layout(binding = 0) uniform sampler1D TextureBase;
+layout(binding = 1) uniform sampler1D TextureDetail;
+
+layout(location = 0) in vec4 VertGeom;
+
+layout(location = 0) out vec4 FragColor0;
+layout(location = 1) out vec4 FragColor1;
+
+void main()
+{
+	vec4 texSample0 = texture(TextureBase, VertGeom.x);
+	vec4 texSample1 = textureOffset(TextureDetail, VertGeom.x, 3);
+	
+    ivec4 iResult0 = floatBitsToInt(texSample0);
+    ivec4 iResult1 = floatBitsToInt(texSample1);
+    FragColor0 = (intBitsToFloat(iResult0) * intBitsToFloat(iResult1));    
+    
+    uvec4 uResult0 = floatBitsToUint(texSample0);
+    uvec4 uResult1 = floatBitsToUint(texSample1);
+    FragColor1 = (uintBitsToFloat(uResult0) * uintBitsToFloat(uResult1));    
+}
diff --git a/shaders-msl/frag/clip-distance-varying.frag b/shaders-msl/frag/clip-distance-varying.frag
new file mode 100644
index 00000000000..df49bd515c6
--- /dev/null
+++ b/shaders-msl/frag/clip-distance-varying.frag
@@ -0,0 +1,10 @@
+#version 450
+
+in float gl_ClipDistance[2];
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(1.0 - gl_ClipDistance[0] - gl_ClipDistance[1]);
+}
diff --git a/shaders-msl/frag/cull-distance-varying.frag b/shaders-msl/frag/cull-distance-varying.frag
new file mode 100644
index 00000000000..8bade07e1dd
--- /dev/null
+++ b/shaders-msl/frag/cull-distance-varying.frag
@@ -0,0 +1,10 @@
+#version 450
+
+in float gl_CullDistance[2];
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(1.0 - gl_CullDistance[0] - gl_CullDistance[1]);
+}
diff --git a/shaders-msl/frag/depth-out-early-frag-tests.frag b/shaders-msl/frag/depth-out-early-frag-tests.frag
new file mode 100644
index 00000000000..4208d79a061
--- /dev/null
+++ b/shaders-msl/frag/depth-out-early-frag-tests.frag
@@ -0,0 +1,11 @@
+#version 430
+layout(depth_less) out float gl_FragDepth;
+layout(early_fragment_tests) in;
+
+layout(location = 0) out vec4 color_out;
+
+void main()
+{
+    color_out = vec4(1.0, 0.0, 0.0, 1.0);
+    gl_FragDepth = 0.699999988079071044921875;
+}
diff --git a/shaders-msl/frag/depth-out-no-early-frag-tests.frag b/shaders-msl/frag/depth-out-no-early-frag-tests.frag
new file mode 100644
index 00000000000..84502079985
--- /dev/null
+++ b/shaders-msl/frag/depth-out-no-early-frag-tests.frag
@@ -0,0 +1,10 @@
+#version 430
+layout(depth_less) out float gl_FragDepth;
+
+layout(location = 0) out vec4 color_out;
+
+void main()
+{
+    color_out = vec4(1.0, 0.0, 0.0, 1.0);
+    gl_FragDepth = 0.699999988079071044921875;
+}
diff --git a/shaders-msl/frag/disable-frag-output.frag-output.frag b/shaders-msl/frag/disable-frag-output.frag-output.frag
new file mode 100644
index 00000000000..7e149b86404
--- /dev/null
+++ b/shaders-msl/frag/disable-frag-output.frag-output.frag
@@ -0,0 +1,25 @@
+#version 450
+#extension GL_ARB_shader_stencil_export : require
+
+layout(location = 0) out vec4 buf0;
+layout(location = 1) out vec4 buf1;
+layout(location = 2) out vec4 buf2;
+layout(location = 3) out vec4 buf3;
+layout(location = 4) out vec4 buf4;
+layout(location = 5) out vec4 buf5;
+layout(location = 6) out vec4 buf6;
+layout(location = 7) out vec4 buf7;
+
+void main() {
+	buf0 = vec4(0, 0, 0, 1);
+	buf1 = vec4(1, 0, 0, 1);
+	buf2 = vec4(0, 1, 0, 1);
+	buf3 = vec4(0, 0, 1, 1);
+	buf4 = vec4(1, 0, 1, 0.5);
+	buf5 = vec4(0.25, 0.25, 0.25, 0.25);
+	buf6 = vec4(0.75, 0.75, 0.75, 0.75);
+	buf7 = vec4(1, 1, 1, 1);
+	gl_FragDepth = 0.9;
+	gl_FragStencilRefARB = 127;
+}
+
diff --git a/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag b/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag
new file mode 100644
index 00000000000..9799a1392f8
--- /dev/null
+++ b/shaders-msl/frag/frag-demote-checks.discard-checks.msl23.frag
@@ -0,0 +1,33 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : enable
+
+layout(set=0, binding=0, std430) buffer foo_t
+{
+	float x;
+	uint y;
+} foo;
+
+layout(r32ui, set=0, binding=1) uniform uimage2D bar;
+
+layout(location=0) out vec4 fragColor;
+
+vec4 frag_body() {
+	foo.x = 1.0f;
+	atomicExchange(foo.y, 0);
+	if (int(gl_FragCoord.x) == 3)
+		demote;
+	imageStore(bar, ivec2(gl_FragCoord.xy), uvec4(1));
+	atomicAdd(foo.y, 42);
+	imageAtomicOr(bar, ivec2(gl_FragCoord.xy), 0x3e);
+	atomicAnd(foo.y, 0xffff);
+	atomicXor(foo.y, 0xffffff00);
+	atomicMin(foo.y, 1);
+	imageAtomicMax(bar, ivec2(gl_FragCoord.xy), 100);
+	imageAtomicCompSwap(bar, ivec2(gl_FragCoord.xy), 100, 42);
+	return vec4(1.0f, float(helperInvocationEXT()), 0.0f, 1.0f);
+}
+
+void main() {
+	fragColor = frag_body();
+}
+
diff --git a/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag b/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag
new file mode 100644
index 00000000000..a7f37a5a700
--- /dev/null
+++ b/shaders-msl/frag/frag-discard-checks-continue-block.discard-checks.msl23.frag
@@ -0,0 +1,17 @@
+#version 450
+
+layout(binding=0, set=0, std430) buffer foo
+{
+	int x;
+};
+
+layout(location=0) out vec4 fragColor;
+
+void main(void)
+{
+	if (gl_FragCoord.y == 7)
+		discard;
+	for (x = 0; x < gl_FragCoord.x; ++x)
+		;
+	fragColor = vec4(x, 0, 0, 1);
+}
diff --git a/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag b/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag
new file mode 100644
index 00000000000..8b40e60c19d
--- /dev/null
+++ b/shaders-msl/frag/frag-discard-checks.discard-checks.msl23.frag
@@ -0,0 +1,32 @@
+#version 450
+
+layout(set=0, binding=0, std430) buffer foo_t
+{
+	float x;
+	uint y;
+} foo;
+
+layout(r32ui, set=0, binding=1) uniform uimage2D bar;
+
+layout(location=0) out vec4 fragColor;
+
+vec4 frag_body() {
+	foo.x = 1.0f;
+	atomicExchange(foo.y, 0);
+	if (int(gl_FragCoord.x) == 3)
+		discard;
+	imageStore(bar, ivec2(gl_FragCoord.xy), uvec4(1));
+	atomicAdd(foo.y, 42);
+	imageAtomicOr(bar, ivec2(gl_FragCoord.xy), 0x3e);
+	atomicAnd(foo.y, 0xffff);
+	atomicXor(foo.y, 0xffffff00);
+	atomicMin(foo.y, 1);
+	imageAtomicMax(bar, ivec2(gl_FragCoord.xy), 100);
+	imageAtomicCompSwap(bar, ivec2(gl_FragCoord.xy), 100, 42);
+	return vec4(1.0f, 0.0f, 0.0f, 1.0f);
+}
+
+void main() {
+	fragColor = frag_body();
+}
+
diff --git a/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag b/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag
new file mode 100644
index 00000000000..28d2b4ae8df
--- /dev/null
+++ b/shaders-msl/frag/huge-argument-buffer.device-argument-buffer.argument.msl2.frag
@@ -0,0 +1,26 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec2 vUV;
+layout(set = 0, binding = 0) uniform sampler2D uSamplers[10000];
+layout(set = 2, binding = 0) uniform sampler2D uSampler;
+
+layout(set = 1, binding = 0) uniform UBO
+{
+	vec4 v;
+} vs[10000];
+
+vec4 samp_array()
+{
+	return texture(uSamplers[9999], vUV) + vs[5000].v;
+}
+
+vec4 samp_single()
+{
+	return texture(uSampler, vUV);
+}
+
+void main()
+{
+	FragColor = samp_array() + samp_single();
+}
diff --git a/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag b/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag
new file mode 100644
index 00000000000..a7529b18838
--- /dev/null
+++ b/shaders-msl/frag/in_block_with_multiple_structs_of_same_type.frag
@@ -0,0 +1,19 @@
+#version 450
+
+struct Foo
+{
+	float a;
+	float b;
+};
+
+layout(location = 1) in Foo foos[4];
+layout(location = 10) in Foo bars[4];
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor.x = foos[0].a;
+	FragColor.y = foos[1].b;
+	FragColor.z = foos[2].a;
+	FragColor.w = bars[3].b.x;
+}
\ No newline at end of file
diff --git a/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag b/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag
new file mode 100644
index 00000000000..b3d44c94364
--- /dev/null
+++ b/shaders-msl/frag/input-attachment-ms.arrayed-subpass.msl21.frag
@@ -0,0 +1,15 @@
+#version 450
+
+layout(input_attachment_index = 0, set = 0, binding = 0) uniform subpassInputMS uSubpass0;
+layout(input_attachment_index = 1, set = 0, binding = 1) uniform subpassInputMS uSubpass1;
+layout(location = 0) out vec4 FragColor;
+
+vec4 load_subpasses(mediump subpassInputMS uInput)
+{
+	return subpassLoad(uInput, gl_SampleID);
+}
+
+void main()
+{
+    FragColor = subpassLoad(uSubpass0, 1) + subpassLoad(uSubpass1, 2) + load_subpasses(uSubpass0);
+}
diff --git a/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag b/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag
new file mode 100644
index 00000000000..b3d44c94364
--- /dev/null
+++ b/shaders-msl/frag/input-attachment-ms.multiview.msl21.frag
@@ -0,0 +1,15 @@
+#version 450
+
+layout(input_attachment_index = 0, set = 0, binding = 0) uniform subpassInputMS uSubpass0;
+layout(input_attachment_index = 1, set = 0, binding = 1) uniform subpassInputMS uSubpass1;
+layout(location = 0) out vec4 FragColor;
+
+vec4 load_subpasses(mediump subpassInputMS uInput)
+{
+	return subpassLoad(uInput, gl_SampleID);
+}
+
+void main()
+{
+    FragColor = subpassLoad(uSubpass0, 1) + subpassLoad(uSubpass1, 2) + load_subpasses(uSubpass0);
+}
diff --git a/shaders-msl/frag/input-attachment.arrayed-subpass.frag b/shaders-msl/frag/input-attachment.arrayed-subpass.frag
new file mode 100644
index 00000000000..877d0525a48
--- /dev/null
+++ b/shaders-msl/frag/input-attachment.arrayed-subpass.frag
@@ -0,0 +1,16 @@
+#version 310 es
+precision mediump float;
+
+layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0;
+layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1;
+layout(location = 0) out vec4 FragColor;
+
+vec4 load_subpasses(mediump subpassInput uInput)
+{
+	return subpassLoad(uInput);
+}
+
+void main()
+{
+    FragColor = subpassLoad(uSubpass0) + load_subpasses(uSubpass1);
+}
diff --git a/shaders-msl/frag/input-attachment.multiview.frag b/shaders-msl/frag/input-attachment.multiview.frag
new file mode 100644
index 00000000000..877d0525a48
--- /dev/null
+++ b/shaders-msl/frag/input-attachment.multiview.frag
@@ -0,0 +1,16 @@
+#version 310 es
+precision mediump float;
+
+layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0;
+layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1;
+layout(location = 0) out vec4 FragColor;
+
+vec4 load_subpasses(mediump subpassInput uInput)
+{
+	return subpassLoad(uInput);
+}
+
+void main()
+{
+    FragColor = subpassLoad(uSubpass0) + load_subpasses(uSubpass1);
+}
diff --git a/shaders-msl/frag/modf-access-tracking-function.frag b/shaders-msl/frag/modf-access-tracking-function.frag
new file mode 100644
index 00000000000..c1f1a1266f1
--- /dev/null
+++ b/shaders-msl/frag/modf-access-tracking-function.frag
@@ -0,0 +1,15 @@
+#version 450
+
+layout(location = 0) in vec4 v;
+layout(location = 0) out vec4 vo0;
+layout(location = 1) out vec4 vo1;
+
+vec4 modf_inner()
+{
+	return modf(v, vo1);
+}
+
+void main()
+{
+	vo0 = modf_inner();
+}
diff --git a/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
new file mode 100644
index 00000000000..ceac8cc50e4
--- /dev/null
+++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
@@ -0,0 +1,36 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2, rgba8) uniform readonly image2D img3;
+layout(binding = 3) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+layout(binding = 4) buffer Buffer2
+{
+	uint quux;
+};
+
+layout(binding = 5, rgba8) uniform writeonly image2D img4;
+layout(binding = 6) buffer Buffer3
+{
+	int baz;
+};
+
+void main()
+{
+	// Deliberately outside the critical section to test usage tracking.
+	baz = 0;
+	imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, quux);
+	endInvocationInterlockARB();
+}
diff --git a/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
new file mode 100644
index 00000000000..ceac8cc50e4
--- /dev/null
+++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
@@ -0,0 +1,36 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2, rgba8) uniform readonly image2D img3;
+layout(binding = 3) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+layout(binding = 4) buffer Buffer2
+{
+	uint quux;
+};
+
+layout(binding = 5, rgba8) uniform writeonly image2D img4;
+layout(binding = 6) buffer Buffer3
+{
+	int baz;
+};
+
+void main()
+{
+	// Deliberately outside the critical section to test usage tracking.
+	baz = 0;
+	imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, quux);
+	endInvocationInterlockARB();
+}
diff --git a/shaders-msl/frag/post-depth-coverage.ios.msl2.frag b/shaders-msl/frag/post-depth-coverage.ios.msl2.frag
new file mode 100644
index 00000000000..4f134b4f3bb
--- /dev/null
+++ b/shaders-msl/frag/post-depth-coverage.ios.msl2.frag
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_ARB_post_depth_coverage : require
+
+layout(post_depth_coverage) in;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = vec4(gl_SampleMaskIn[0]);
+}
diff --git a/shaders-msl/frag/post-depth-coverage.msl23.frag b/shaders-msl/frag/post-depth-coverage.msl23.frag
new file mode 100644
index 00000000000..4f134b4f3bb
--- /dev/null
+++ b/shaders-msl/frag/post-depth-coverage.msl23.frag
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_ARB_post_depth_coverage : require
+
+layout(post_depth_coverage) in;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = vec4(gl_SampleMaskIn[0]);
+}
diff --git a/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag b/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag
new file mode 100644
index 00000000000..d2d94fcbd95
--- /dev/null
+++ b/shaders-msl/frag/ray-query-object-in-function.spv14.vk.msl24.frag
@@ -0,0 +1,31 @@
+#version 460
+
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_EXT_ray_query : enable
+
+layout(location = 0) in  vec4 inPos;
+layout(location = 0) out vec4 outColor;
+
+layout(binding = 0) uniform accelerationStructureEXT topLevelAS;
+
+uint doRay(vec3 rayOrigin, vec3 rayDirection, float rayDistance) {
+	rayQueryEXT rayQuery;
+	rayQueryInitializeEXT(rayQuery, topLevelAS, gl_RayFlagsTerminateOnFirstHitEXT, 0xFF,
+			rayOrigin, 0.001, rayDirection, rayDistance);
+
+	while(rayQueryProceedEXT(rayQuery))
+		;
+
+	return rayQueryGetIntersectionTypeEXT(rayQuery, true);
+}
+
+void main() {
+	vec3  rayOrigin    = vec3(inPos.xy*4.0-vec2(2.0),1.0);
+	vec3  rayDirection = vec3(0,0,-1);
+	float rayDistance  = 2.0;
+
+	if(doRay(rayOrigin,rayDirection,rayDistance) == gl_RayQueryCommittedIntersectionNoneEXT)
+		discard;
+
+	outColor = inPos;
+}
diff --git a/shaders-msl/frag/read-cull-clip-distance-in-function.frag b/shaders-msl/frag/read-cull-clip-distance-in-function.frag
new file mode 100644
index 00000000000..0b82dc2df92
--- /dev/null
+++ b/shaders-msl/frag/read-cull-clip-distance-in-function.frag
@@ -0,0 +1,20 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+in float gl_CullDistance[2];
+in float gl_ClipDistance[2];
+
+vec4 read_in_func()
+{
+	return vec4(
+			gl_CullDistance[0],
+			gl_CullDistance[1],
+			gl_ClipDistance[0],
+			gl_ClipDistance[1]);
+}
+
+void main()
+{
+	FragColor = read_in_func();
+}
diff --git a/shaders-msl/frag/return-value-after-discard-terminator.frag b/shaders-msl/frag/return-value-after-discard-terminator.frag
new file mode 100644
index 00000000000..2ab410cb1b1
--- /dev/null
+++ b/shaders-msl/frag/return-value-after-discard-terminator.frag
@@ -0,0 +1,17 @@
+#version 450
+
+layout(set = 0, binding = 0, std430) buffer buff_t
+{
+    int m0[1024];
+} buff;
+
+layout(location = 0) out vec4 frag_clr;
+
+void main()
+{
+    ivec2 frag_coord = ivec2(ivec4(gl_FragCoord).xy);
+    int buff_idx = (frag_coord.y * 32) + frag_coord.x;
+    frag_clr = vec4(0.0, 0.0, 1.0, 1.0);
+    buff.m0[buff_idx] = 1;
+    discard;
+}
diff --git a/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag b/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag
new file mode 100644
index 00000000000..9a855ac7a80
--- /dev/null
+++ b/shaders-msl/frag/sample-depth-propagate-state-from-resource.frag
@@ -0,0 +1,29 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform texture2D uTexture;
+layout(set = 0, binding = 1) uniform sampler uSampler;
+layout(set = 0, binding = 2) uniform samplerShadow uSamplerShadow;
+
+layout(location = 0) out float FragColor;
+layout(location = 0) in vec3 vUV;
+
+float sample_normal2(texture2D tex)
+{
+	return texture(sampler2D(tex, uSampler), vUV.xy).x;
+}
+
+float sample_normal(texture2D tex)
+{
+	return sample_normal2(tex);
+}
+
+float sample_comp(texture2D tex)
+{
+	return texture(sampler2DShadow(tex, uSamplerShadow), vUV);
+}
+
+void main()
+{
+	FragColor = sample_normal(uTexture);
+	FragColor += sample_comp(uTexture);
+}
diff --git a/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag b/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag
new file mode 100644
index 00000000000..b78ee61e81f
--- /dev/null
+++ b/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.force-sample.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = vec4(1.0);
+	gl_SampleMask[0] = gl_SampleMaskIn[0];
+}
+
diff --git a/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag b/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag
new file mode 100644
index 00000000000..b78ee61e81f
--- /dev/null
+++ b/shaders-msl/frag/sample-mask-in-and-out.fixed-sample-mask.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = vec4(1.0);
+	gl_SampleMask[0] = gl_SampleMaskIn[0];
+}
+
diff --git a/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag b/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag
new file mode 100644
index 00000000000..c3eaf5e19fb
--- /dev/null
+++ b/shaders-msl/frag/sample-mask-not-used.fixed-sample-mask.frag
@@ -0,0 +1,8 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = vec4(1.0);
+}
diff --git a/shaders-msl/frag/sample-mask.fixed-sample-mask.frag b/shaders-msl/frag/sample-mask.fixed-sample-mask.frag
new file mode 100644
index 00000000000..33ff0b2e695
--- /dev/null
+++ b/shaders-msl/frag/sample-mask.fixed-sample-mask.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = vec4(1.0);
+	gl_SampleMask[0] = 0;
+}
+
diff --git a/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag b/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag
new file mode 100644
index 00000000000..202dba0bdfa
--- /dev/null
+++ b/shaders-msl/frag/sample-rate-frag-coord-sample-id.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler2DArray tex;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = texture(tex, vec3(gl_FragCoord.xy, float(gl_SampleID)));
+}
diff --git a/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag b/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag
new file mode 100644
index 00000000000..b131fb032c5
--- /dev/null
+++ b/shaders-msl/frag/sample-rate-frag-coord-sample-input.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler2DArray tex;
+
+layout(location = 0) sample in float foo;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = texture(tex, vec3(gl_FragCoord.xy, foo));
+}
diff --git a/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag b/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag
new file mode 100644
index 00000000000..c8c3be96df6
--- /dev/null
+++ b/shaders-msl/frag/sample-rate-frag-coord-sample-pos.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler2D tex;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = texture(tex, gl_FragCoord.xy - gl_SamplePosition);
+}
diff --git a/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag b/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag
new file mode 100644
index 00000000000..a6b47e4bbed
--- /dev/null
+++ b/shaders-msl/frag/sample-rate-frag-coord.force-sample.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler2D tex;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = texture(tex, gl_FragCoord.xy);
+}
diff --git a/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag b/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag
new file mode 100644
index 00000000000..f4526f39d06
--- /dev/null
+++ b/shaders-msl/frag/sampler-1d-lod.1d-as-2d.frag
@@ -0,0 +1,12 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in float vTex;
+layout(binding = 0) uniform sampler1D uSampler;
+
+void main()
+{
+	FragColor += texture(uSampler, vTex, 2.0) +
+		textureLod(uSampler, vTex, 3.0) +
+		textureGrad(uSampler, vTex, 5.0, 8.0);
+}
diff --git a/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag b/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag
new file mode 100644
index 00000000000..158c7600311
--- /dev/null
+++ b/shaders-msl/frag/sampler-compare-bias.msl23.1d-as-2d.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(binding = 0) uniform texture1DArray uTex;
+layout(binding = 1) uniform samplerShadow uShadow;
+layout(location = 0) in vec3 vUV;
+layout(location = 0) out float FragColor;
+
+void main()
+{
+	FragColor = texture(sampler1DArrayShadow(uTex, uShadow), vUV, 1.0);
+}
diff --git a/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag b/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag
new file mode 100644
index 00000000000..5c1c8937fb2
--- /dev/null
+++ b/shaders-msl/frag/sampler-compare-cascade-gradient.msl23.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(binding = 0) uniform texture2DArray uTex;
+layout(binding = 1) uniform samplerShadow uShadow;
+layout(location = 0) in vec4 vUV;
+layout(location = 0) out float FragColor;
+
+void main()
+{
+	FragColor = textureGrad(sampler2DArrayShadow(uTex, uShadow), vUV, vec2(0.0), vec2(0.0)) + textureGrad(sampler2DArrayShadow(uTex, uShadow), vUV, vec2(1.0), vec2(1.0));
+}
diff --git a/shaders-msl/frag/subgroup-globals-extract.msl22.frag b/shaders-msl/frag/subgroup-globals-extract.msl22.frag
new file mode 100644
index 00000000000..f763163dc2e
--- /dev/null
+++ b/shaders-msl/frag/subgroup-globals-extract.msl22.frag
@@ -0,0 +1,30 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+
+layout(location = 0) out uvec2 FragColor;
+
+uint sub1() {
+	return subgroupBallotFindLSB(uvec4(1,2,3,4));
+}
+
+uint sub2() {
+	return subgroupBallotFindMSB(uvec4(1,2,3,4));
+}
+
+uint sub3() {
+	return subgroupBallotBitCount(uvec4(1,2,3,4));
+}
+
+uint sub4() {
+	return subgroupBallotInclusiveBitCount(uvec4(1,2,3,4));
+}
+
+uint sub5() {
+	return subgroupBallotExclusiveBitCount(uvec4(1,2,3,4));
+}
+
+void main()
+{
+	FragColor.x = sub1() + sub2() + sub3() + sub4() + sub5();
+}
diff --git a/shaders-msl/frag/switch-unreachable-break.frag b/shaders-msl/frag/switch-unreachable-break.frag
new file mode 100644
index 00000000000..4be90f86b5f
--- /dev/null
+++ b/shaders-msl/frag/switch-unreachable-break.frag
@@ -0,0 +1,30 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vInput;
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	int cond;
+	int cond2;
+};
+
+void main()
+{
+	FragColor = vec4(10.0);
+	switch (cond)
+	{
+	case 1:
+		if (cond2 < 50)
+			break;
+		else
+			discard;
+
+		break;
+
+	default:
+		FragColor = vec4(20.0);
+		break;
+	}
+}
+
diff --git a/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag b/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag
new file mode 100644
index 00000000000..8e15e36e354
--- /dev/null
+++ b/shaders-msl/frag/texel-fetch-offset.1d-as-2d.frag
@@ -0,0 +1,10 @@
+#version 450
+layout(location = 0) out vec4 FragColor;
+layout(binding = 0) uniform sampler2D uTexture;
+layout(binding = 1) uniform sampler1D uTexture2;
+
+void main()
+{
+	FragColor = texelFetchOffset(uTexture, ivec2(gl_FragCoord.xy), 0, ivec2(1, 1));
+	FragColor += texelFetchOffset(uTexture2, int(gl_FragCoord.x), 0, int(-1));
+}
diff --git a/shaders-msl/frag/texture-cube-array.frag b/shaders-msl/frag/texture-cube-array.frag
new file mode 100644
index 00000000000..91a55f933bc
--- /dev/null
+++ b/shaders-msl/frag/texture-cube-array.frag
@@ -0,0 +1,16 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform samplerCube cubeSampler;
+layout(set = 0, binding = 1) uniform samplerCubeArray cubeArraySampler;
+layout(set = 0, binding = 2) uniform sampler2DArray texArraySampler;
+
+layout(location = 0) in vec4 vUV;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	vec4 a = texture(cubeSampler, vUV.xyz);
+	vec4 b = texture(cubeArraySampler, vUV);
+	vec4 c = texture(texArraySampler, vUV.xyz);
+	FragColor = a + b + c;
+}
diff --git a/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag b/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag
new file mode 100644
index 00000000000..91a55f933bc
--- /dev/null
+++ b/shaders-msl/frag/texture-cube-array.ios.emulate-cube-array.frag
@@ -0,0 +1,16 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform samplerCube cubeSampler;
+layout(set = 0, binding = 1) uniform samplerCubeArray cubeArraySampler;
+layout(set = 0, binding = 2) uniform sampler2DArray texArraySampler;
+
+layout(location = 0) in vec4 vUV;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	vec4 a = texture(cubeSampler, vUV.xyz);
+	vec4 b = texture(cubeArraySampler, vUV);
+	vec4 c = texture(texArraySampler, vUV.xyz);
+	FragColor = a + b + c;
+}
diff --git a/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag b/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag
new file mode 100644
index 00000000000..900c5b006c9
--- /dev/null
+++ b/shaders-msl/frag/vecsize-mismatch.shader-inputs.frag
@@ -0,0 +1,17 @@
+#version 450
+
+#extension GL_AMD_gpu_shader_int16 : require
+
+layout(location = 0) flat in int16_t a;
+layout(location = 1) flat in ivec2 b;
+layout(location = 2) flat in uint16_t c[2];
+layout(location = 4) flat in uvec4 e[2];
+layout(location = 6) in vec2 d;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(float(int(a)), float(b.x), vec2(uint(c[1]), float(e[0].w)) + d);
+}
+
diff --git a/shaders-msl/intel/shader-integer-functions2.asm.comp b/shaders-msl/intel/shader-integer-functions2.asm.comp
new file mode 100644
index 00000000000..9189794ef78
--- /dev/null
+++ b/shaders-msl/intel/shader-integer-functions2.asm.comp
@@ -0,0 +1,137 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 97
+; Schema: 0
+               OpCapability Shader
+               OpCapability IntegerFunctions2INTEL
+               OpExtension "SPV_INTEL_shader_integer_functions2"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %main "main"
+               OpName %foo "foo"
+               OpMemberName %foo 0 "a"
+               OpMemberName %foo 1 "b"
+               OpMemberName %foo 2 "c"
+               OpMemberName %foo 3 "d"
+               OpName %_ ""
+               OpMemberDecorate %foo 0 Offset 0
+               OpMemberDecorate %foo 1 Offset 4
+               OpMemberDecorate %foo 2 Offset 8
+               OpMemberDecorate %foo 3 Offset 12
+               OpDecorate %foo Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+        %int = OpTypeInt 32 1
+        %foo = OpTypeStruct %uint %uint %int %int
+%_ptr_StorageBuffer_foo = OpTypePointer StorageBuffer %foo
+          %_ = OpVariable %_ptr_StorageBuffer_foo StorageBuffer
+      %int_0 = OpConstant %int 0
+%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int
+      %int_3 = OpConstant %int 3
+       %main = OpFunction %void None %6
+         %15 = OpLabel
+         %16 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+         %17 = OpLoad %uint %16
+         %18 = OpUCountLeadingZerosINTEL %uint %17
+         %19 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+               OpStore %19 %18
+         %20 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+         %21 = OpLoad %uint %20
+         %22 = OpUCountTrailingZerosINTEL %uint %21
+         %23 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+               OpStore %23 %22
+         %24 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2
+         %25 = OpLoad %int %24
+         %26 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_3
+         %27 = OpLoad %int %26
+         %28 = OpAbsISubINTEL %uint %25 %27
+         %29 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+               OpStore %29 %28
+         %30 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+         %31 = OpLoad %uint %30
+         %32 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_1
+         %33 = OpLoad %uint %32
+         %34 = OpAbsUSubINTEL %uint %31 %33
+         %35 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+               OpStore %35 %34
+         %37 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2
+         %38 = OpLoad %int %37
+         %39 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_3
+         %40 = OpLoad %int %39
+         %41 = OpIAddSatINTEL %int %38 %40
+         %42 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2
+               OpStore %42 %41
+         %43 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+         %44 = OpLoad %uint %43
+         %45 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_1
+         %46 = OpLoad %uint %45
+         %47 = OpUAddSatINTEL %uint %44 %46
+         %48 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+               OpStore %48 %47
+         %49 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2
+         %50 = OpLoad %int %49
+         %51 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_3
+         %52 = OpLoad %int %51
+         %53 = OpIAverageINTEL %int %50 %52
+         %54 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2
+               OpStore %54 %53
+         %55 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+         %56 = OpLoad %uint %55
+         %57 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_1
+         %58 = OpLoad %uint %57
+         %59 = OpUAverageINTEL %uint %56 %58
+         %60 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+               OpStore %60 %59
+         %61 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2
+         %62 = OpLoad %int %61
+         %63 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_3
+         %64 = OpLoad %int %63
+         %65 = OpIAverageRoundedINTEL %int %62 %64
+         %66 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2
+               OpStore %66 %65
+         %67 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+         %68 = OpLoad %uint %67
+         %69 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_1
+         %70 = OpLoad %uint %69
+         %71 = OpUAverageRoundedINTEL %uint %68 %70
+         %72 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+               OpStore %72 %71
+         %73 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2
+         %74 = OpLoad %int %73
+         %75 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_3
+         %76 = OpLoad %int %75
+         %77 = OpISubSatINTEL %int %74 %76
+         %78 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2
+               OpStore %78 %77
+         %79 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+         %80 = OpLoad %uint %79
+         %81 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_1
+         %82 = OpLoad %uint %81
+         %83 = OpUSubSatINTEL %uint %80 %82
+         %84 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+               OpStore %84 %83
+         %85 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2
+         %86 = OpLoad %int %85
+         %87 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_3
+         %88 = OpLoad %int %87
+         %89 = OpIMul32x16INTEL %int %86 %88
+         %90 = OpAccessChain %_ptr_StorageBuffer_int %_ %int_2
+               OpStore %90 %89
+         %91 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+         %92 = OpLoad %uint %91
+         %93 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_1
+         %94 = OpLoad %uint %93
+         %95 = OpUMul32x16INTEL %uint %92 %94
+         %96 = OpAccessChain %_ptr_StorageBuffer_uint %_ %int_0
+               OpStore %96 %95
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc
new file mode 100644
index 00000000000..e69a7a1697d
--- /dev/null
+++ b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.multi-patch.tesc
@@ -0,0 +1,17 @@
+#version 450
+
+layout(vertices = 4) out;
+layout(location = 0) out vec4 Foo[][2];
+layout(location = 0) in vec4 iFoo[][2];
+layout(location = 2) patch out vec4 pFoo[2];
+layout(location = 2) in vec4 ipFoo[];
+
+void main()
+{
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+	Foo[gl_InvocationID] = iFoo[gl_InvocationID];
+	if (gl_InvocationID == 0)
+	{
+		pFoo = vec4[](ipFoo[0], ipFoo[1]);
+	}
+}
diff --git a/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc
new file mode 100644
index 00000000000..e69a7a1697d
--- /dev/null
+++ b/shaders-msl/masking/copy-arrays.mask-location-0.msl2.tesc
@@ -0,0 +1,17 @@
+#version 450
+
+layout(vertices = 4) out;
+layout(location = 0) out vec4 Foo[][2];
+layout(location = 0) in vec4 iFoo[][2];
+layout(location = 2) patch out vec4 pFoo[2];
+layout(location = 2) in vec4 ipFoo[];
+
+void main()
+{
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+	Foo[gl_InvocationID] = iFoo[gl_InvocationID];
+	if (gl_InvocationID == 0)
+	{
+		pFoo = vec4[](ipFoo[0], ipFoo[1]);
+	}
+}
diff --git a/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc
new file mode 100644
index 00000000000..e69a7a1697d
--- /dev/null
+++ b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.multi-patch.tesc
@@ -0,0 +1,17 @@
+#version 450
+
+layout(vertices = 4) out;
+layout(location = 0) out vec4 Foo[][2];
+layout(location = 0) in vec4 iFoo[][2];
+layout(location = 2) patch out vec4 pFoo[2];
+layout(location = 2) in vec4 ipFoo[];
+
+void main()
+{
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+	Foo[gl_InvocationID] = iFoo[gl_InvocationID];
+	if (gl_InvocationID == 0)
+	{
+		pFoo = vec4[](ipFoo[0], ipFoo[1]);
+	}
+}
diff --git a/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc
new file mode 100644
index 00000000000..e69a7a1697d
--- /dev/null
+++ b/shaders-msl/masking/copy-arrays.mask-location-1.msl2.tesc
@@ -0,0 +1,17 @@
+#version 450
+
+layout(vertices = 4) out;
+layout(location = 0) out vec4 Foo[][2];
+layout(location = 0) in vec4 iFoo[][2];
+layout(location = 2) patch out vec4 pFoo[2];
+layout(location = 2) in vec4 ipFoo[];
+
+void main()
+{
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+	Foo[gl_InvocationID] = iFoo[gl_InvocationID];
+	if (gl_InvocationID == 0)
+	{
+		pFoo = vec4[](ipFoo[0], ipFoo[1]);
+	}
+}
diff --git a/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert b/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert
new file mode 100644
index 00000000000..2d8fdf19a22
--- /dev/null
+++ b/shaders-msl/masking/write-outputs-block.mask-location-0.for-tess.vert
@@ -0,0 +1,16 @@
+#version 450
+
+layout(location = 0) out V
+{
+	vec4 a;
+	vec4 b;
+	vec4 c;
+	vec4 d;
+};
+
+void main()
+{
+	gl_Position = vec4(1.0);
+	a = vec4(2.0);
+	b = vec4(3.0);
+}
diff --git a/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc b/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
new file mode 100644
index 00000000000..955f2c41872
--- /dev/null
+++ b/shaders-msl/masking/write-outputs-block.mask-location-0.msl2.tesc
@@ -0,0 +1,28 @@
+#version 450
+
+layout(vertices = 4) out;
+patch out P
+{
+	layout(location = 0) float a;
+	layout(location = 2) float b;
+};
+
+out C
+{
+	layout(location = 1) float a;
+	layout(location = 3) float b;
+} c[];
+
+void write_in_function()
+{
+	a = 1.0;
+	b = 2.0;
+	c[gl_InvocationID].a = 3.0;
+	c[gl_InvocationID].b = 4.0;
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+}
+
+void main()
+{
+	write_in_function();
+}
diff --git a/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc b/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc
new file mode 100644
index 00000000000..955f2c41872
--- /dev/null
+++ b/shaders-msl/masking/write-outputs-block.mask-location-0.multi-patch.msl2.tesc
@@ -0,0 +1,28 @@
+#version 450
+
+layout(vertices = 4) out;
+patch out P
+{
+	layout(location = 0) float a;
+	layout(location = 2) float b;
+};
+
+out C
+{
+	layout(location = 1) float a;
+	layout(location = 3) float b;
+} c[];
+
+void write_in_function()
+{
+	a = 1.0;
+	b = 2.0;
+	c[gl_InvocationID].a = 3.0;
+	c[gl_InvocationID].b = 4.0;
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+}
+
+void main()
+{
+	write_in_function();
+}
diff --git a/shaders-msl/masking/write-outputs-block.mask-location-0.vert b/shaders-msl/masking/write-outputs-block.mask-location-0.vert
new file mode 100644
index 00000000000..2d8fdf19a22
--- /dev/null
+++ b/shaders-msl/masking/write-outputs-block.mask-location-0.vert
@@ -0,0 +1,16 @@
+#version 450
+
+layout(location = 0) out V
+{
+	vec4 a;
+	vec4 b;
+	vec4 c;
+	vec4 d;
+};
+
+void main()
+{
+	gl_Position = vec4(1.0);
+	a = vec4(2.0);
+	b = vec4(3.0);
+}
diff --git a/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert b/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert
new file mode 100644
index 00000000000..2d8fdf19a22
--- /dev/null
+++ b/shaders-msl/masking/write-outputs-block.mask-location-1.for-tess.vert
@@ -0,0 +1,16 @@
+#version 450
+
+layout(location = 0) out V
+{
+	vec4 a;
+	vec4 b;
+	vec4 c;
+	vec4 d;
+};
+
+void main()
+{
+	gl_Position = vec4(1.0);
+	a = vec4(2.0);
+	b = vec4(3.0);
+}
diff --git a/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc b/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
new file mode 100644
index 00000000000..955f2c41872
--- /dev/null
+++ b/shaders-msl/masking/write-outputs-block.mask-location-1.msl2.tesc
@@ -0,0 +1,28 @@
+#version 450
+
+layout(vertices = 4) out;
+patch out P
+{
+	layout(location = 0) float a;
+	layout(location = 2) float b;
+};
+
+out C
+{
+	layout(location = 1) float a;
+	layout(location = 3) float b;
+} c[];
+
+void write_in_function()
+{
+	a = 1.0;
+	b = 2.0;
+	c[gl_InvocationID].a = 3.0;
+	c[gl_InvocationID].b = 4.0;
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+}
+
+void main()
+{
+	write_in_function();
+}
diff --git a/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc b/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc
new file mode 100644
index 00000000000..955f2c41872
--- /dev/null
+++ b/shaders-msl/masking/write-outputs-block.mask-location-1.multi-patch.msl2.tesc
@@ -0,0 +1,28 @@
+#version 450
+
+layout(vertices = 4) out;
+patch out P
+{
+	layout(location = 0) float a;
+	layout(location = 2) float b;
+};
+
+out C
+{
+	layout(location = 1) float a;
+	layout(location = 3) float b;
+} c[];
+
+void write_in_function()
+{
+	a = 1.0;
+	b = 2.0;
+	c[gl_InvocationID].a = 3.0;
+	c[gl_InvocationID].b = 4.0;
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+}
+
+void main()
+{
+	write_in_function();
+}
diff --git a/shaders-msl/masking/write-outputs-block.mask-location-1.vert b/shaders-msl/masking/write-outputs-block.mask-location-1.vert
new file mode 100644
index 00000000000..2d8fdf19a22
--- /dev/null
+++ b/shaders-msl/masking/write-outputs-block.mask-location-1.vert
@@ -0,0 +1,16 @@
+#version 450
+
+layout(location = 0) out V
+{
+	vec4 a;
+	vec4 b;
+	vec4 c;
+	vec4 d;
+};
+
+void main()
+{
+	gl_Position = vec4(1.0);
+	a = vec4(2.0);
+	b = vec4(3.0);
+}
diff --git a/shaders-msl/masking/write-outputs.mask-clip-distance.vert b/shaders-msl/masking/write-outputs.mask-clip-distance.vert
new file mode 100644
index 00000000000..35b462fdf8b
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-clip-distance.vert
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) out vec4 v0;
+layout(location = 1) out vec4 v1;
+
+out float gl_ClipDistance[2];
+
+void write_in_func()
+{
+	v0 = vec4(1.0);
+	v1 = vec4(2.0);
+	gl_Position = vec4(3.0);
+	gl_PointSize = 4.0;
+	gl_ClipDistance[0] = 1.0;
+	gl_ClipDistance[1] = 0.5;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert b/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert
new file mode 100644
index 00000000000..35b462fdf8b
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-location-0.for-tess.vert
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) out vec4 v0;
+layout(location = 1) out vec4 v1;
+
+out float gl_ClipDistance[2];
+
+void write_in_func()
+{
+	v0 = vec4(1.0);
+	v1 = vec4(2.0);
+	gl_Position = vec4(3.0);
+	gl_PointSize = 4.0;
+	gl_ClipDistance[0] = 1.0;
+	gl_ClipDistance[1] = 0.5;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc b/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc
new file mode 100644
index 00000000000..c291fef03a0
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-location-0.msl2.tesc
@@ -0,0 +1,26 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1;
+
+void write_in_func()
+{
+	v0[gl_InvocationID] = vec4(1.0);
+	v0[gl_InvocationID][0] = 2.0;
+	if (gl_InvocationID == 0)
+	{
+		v1 = vec4(2.0);
+		v1[3] = 4.0;
+	}
+	gl_out[gl_InvocationID].gl_Position = vec4(3.0);
+	gl_out[gl_InvocationID].gl_PointSize = 4.0;
+	gl_out[gl_InvocationID].gl_Position[2] = 5.0;
+	gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc b/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc
new file mode 100644
index 00000000000..9f3ca9fcffe
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-location-0.multi-patch.tesc
@@ -0,0 +1,29 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1[2];
+layout(location = 3) patch out vec4 v3;
+
+void write_in_func()
+{
+	v0[gl_InvocationID] = vec4(1.0);
+	v0[gl_InvocationID].z = 3.0;
+	if (gl_InvocationID == 0)
+	{
+		v1[0] = vec4(2.0);
+		v1[0].x = 3.0;
+		v1[1] = vec4(2.0);
+		v1[1].x = 5.0;
+	}
+	v3 = vec4(5.0);
+	gl_out[gl_InvocationID].gl_Position = vec4(10.0);
+	gl_out[gl_InvocationID].gl_Position.z = 20.0;
+	gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-location-0.tesc b/shaders-msl/masking/write-outputs.mask-location-0.tesc
new file mode 100644
index 00000000000..9f3ca9fcffe
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-location-0.tesc
@@ -0,0 +1,29 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1[2];
+layout(location = 3) patch out vec4 v3;
+
+void write_in_func()
+{
+	v0[gl_InvocationID] = vec4(1.0);
+	v0[gl_InvocationID].z = 3.0;
+	if (gl_InvocationID == 0)
+	{
+		v1[0] = vec4(2.0);
+		v1[0].x = 3.0;
+		v1[1] = vec4(2.0);
+		v1[1].x = 5.0;
+	}
+	v3 = vec4(5.0);
+	gl_out[gl_InvocationID].gl_Position = vec4(10.0);
+	gl_out[gl_InvocationID].gl_Position.z = 20.0;
+	gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-location-0.vert b/shaders-msl/masking/write-outputs.mask-location-0.vert
new file mode 100644
index 00000000000..35b462fdf8b
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-location-0.vert
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) out vec4 v0;
+layout(location = 1) out vec4 v1;
+
+out float gl_ClipDistance[2];
+
+void write_in_func()
+{
+	v0 = vec4(1.0);
+	v1 = vec4(2.0);
+	gl_Position = vec4(3.0);
+	gl_PointSize = 4.0;
+	gl_ClipDistance[0] = 1.0;
+	gl_ClipDistance[1] = 0.5;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert b/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert
new file mode 100644
index 00000000000..35b462fdf8b
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-location-1.for-tess.vert
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) out vec4 v0;
+layout(location = 1) out vec4 v1;
+
+out float gl_ClipDistance[2];
+
+void write_in_func()
+{
+	v0 = vec4(1.0);
+	v1 = vec4(2.0);
+	gl_Position = vec4(3.0);
+	gl_PointSize = 4.0;
+	gl_ClipDistance[0] = 1.0;
+	gl_ClipDistance[1] = 0.5;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc b/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc
new file mode 100644
index 00000000000..c291fef03a0
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-location-1.msl2.tesc
@@ -0,0 +1,26 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1;
+
+void write_in_func()
+{
+	v0[gl_InvocationID] = vec4(1.0);
+	v0[gl_InvocationID][0] = 2.0;
+	if (gl_InvocationID == 0)
+	{
+		v1 = vec4(2.0);
+		v1[3] = 4.0;
+	}
+	gl_out[gl_InvocationID].gl_Position = vec4(3.0);
+	gl_out[gl_InvocationID].gl_PointSize = 4.0;
+	gl_out[gl_InvocationID].gl_Position[2] = 5.0;
+	gl_out[gl_InvocationID].gl_PointSize = 4.0;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc b/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc
new file mode 100644
index 00000000000..9f3ca9fcffe
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-location-1.multi-patch.tesc
@@ -0,0 +1,29 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1[2];
+layout(location = 3) patch out vec4 v3;
+
+void write_in_func()
+{
+	v0[gl_InvocationID] = vec4(1.0);
+	v0[gl_InvocationID].z = 3.0;
+	if (gl_InvocationID == 0)
+	{
+		v1[0] = vec4(2.0);
+		v1[0].x = 3.0;
+		v1[1] = vec4(2.0);
+		v1[1].x = 5.0;
+	}
+	v3 = vec4(5.0);
+	gl_out[gl_InvocationID].gl_Position = vec4(10.0);
+	gl_out[gl_InvocationID].gl_Position.z = 20.0;
+	gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-location-1.tesc b/shaders-msl/masking/write-outputs.mask-location-1.tesc
new file mode 100644
index 00000000000..9f3ca9fcffe
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-location-1.tesc
@@ -0,0 +1,29 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1[2];
+layout(location = 3) patch out vec4 v3;
+
+void write_in_func()
+{
+	v0[gl_InvocationID] = vec4(1.0);
+	v0[gl_InvocationID].z = 3.0;
+	if (gl_InvocationID == 0)
+	{
+		v1[0] = vec4(2.0);
+		v1[0].x = 3.0;
+		v1[1] = vec4(2.0);
+		v1[1].x = 5.0;
+	}
+	v3 = vec4(5.0);
+	gl_out[gl_InvocationID].gl_Position = vec4(10.0);
+	gl_out[gl_InvocationID].gl_Position.z = 20.0;
+	gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-location-1.vert b/shaders-msl/masking/write-outputs.mask-location-1.vert
new file mode 100644
index 00000000000..35b462fdf8b
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-location-1.vert
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) out vec4 v0;
+layout(location = 1) out vec4 v1;
+
+out float gl_ClipDistance[2];
+
+void write_in_func()
+{
+	v0 = vec4(1.0);
+	v1 = vec4(2.0);
+	gl_Position = vec4(3.0);
+	gl_PointSize = 4.0;
+	gl_ClipDistance[0] = 1.0;
+	gl_ClipDistance[1] = 0.5;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert b/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert
new file mode 100644
index 00000000000..35b462fdf8b
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-point-size.for-tess.vert
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) out vec4 v0;
+layout(location = 1) out vec4 v1;
+
+out float gl_ClipDistance[2];
+
+void write_in_func()
+{
+	v0 = vec4(1.0);
+	v1 = vec4(2.0);
+	gl_Position = vec4(3.0);
+	gl_PointSize = 4.0;
+	gl_ClipDistance[0] = 1.0;
+	gl_ClipDistance[1] = 0.5;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc b/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc
new file mode 100644
index 00000000000..9f3ca9fcffe
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-point-size.multi-patch.tesc
@@ -0,0 +1,29 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1[2];
+layout(location = 3) patch out vec4 v3;
+
+void write_in_func()
+{
+	v0[gl_InvocationID] = vec4(1.0);
+	v0[gl_InvocationID].z = 3.0;
+	if (gl_InvocationID == 0)
+	{
+		v1[0] = vec4(2.0);
+		v1[0].x = 3.0;
+		v1[1] = vec4(2.0);
+		v1[1].x = 5.0;
+	}
+	v3 = vec4(5.0);
+	gl_out[gl_InvocationID].gl_Position = vec4(10.0);
+	gl_out[gl_InvocationID].gl_Position.z = 20.0;
+	gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-point-size.tesc b/shaders-msl/masking/write-outputs.mask-point-size.tesc
new file mode 100644
index 00000000000..9f3ca9fcffe
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-point-size.tesc
@@ -0,0 +1,29 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1[2];
+layout(location = 3) patch out vec4 v3;
+
+void write_in_func()
+{
+	v0[gl_InvocationID] = vec4(1.0);
+	v0[gl_InvocationID].z = 3.0;
+	if (gl_InvocationID == 0)
+	{
+		v1[0] = vec4(2.0);
+		v1[0].x = 3.0;
+		v1[1] = vec4(2.0);
+		v1[1].x = 5.0;
+	}
+	v3 = vec4(5.0);
+	gl_out[gl_InvocationID].gl_Position = vec4(10.0);
+	gl_out[gl_InvocationID].gl_Position.z = 20.0;
+	gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-point-size.vert b/shaders-msl/masking/write-outputs.mask-point-size.vert
new file mode 100644
index 00000000000..35b462fdf8b
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-point-size.vert
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) out vec4 v0;
+layout(location = 1) out vec4 v1;
+
+out float gl_ClipDistance[2];
+
+void write_in_func()
+{
+	v0 = vec4(1.0);
+	v1 = vec4(2.0);
+	gl_Position = vec4(3.0);
+	gl_PointSize = 4.0;
+	gl_ClipDistance[0] = 1.0;
+	gl_ClipDistance[1] = 0.5;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc b/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc
new file mode 100644
index 00000000000..9f3ca9fcffe
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-position.multi-patch.tesc
@@ -0,0 +1,29 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1[2];
+layout(location = 3) patch out vec4 v3;
+
+void write_in_func()
+{
+	v0[gl_InvocationID] = vec4(1.0);
+	v0[gl_InvocationID].z = 3.0;
+	if (gl_InvocationID == 0)
+	{
+		v1[0] = vec4(2.0);
+		v1[0].x = 3.0;
+		v1[1] = vec4(2.0);
+		v1[1].x = 5.0;
+	}
+	v3 = vec4(5.0);
+	gl_out[gl_InvocationID].gl_Position = vec4(10.0);
+	gl_out[gl_InvocationID].gl_Position.z = 20.0;
+	gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/masking/write-outputs.mask-position.tesc b/shaders-msl/masking/write-outputs.mask-position.tesc
new file mode 100644
index 00000000000..9f3ca9fcffe
--- /dev/null
+++ b/shaders-msl/masking/write-outputs.mask-position.tesc
@@ -0,0 +1,29 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) out vec4 v0[];
+layout(location = 1) patch out vec4 v1[2];
+layout(location = 3) patch out vec4 v3;
+
+void write_in_func()
+{
+	v0[gl_InvocationID] = vec4(1.0);
+	v0[gl_InvocationID].z = 3.0;
+	if (gl_InvocationID == 0)
+	{
+		v1[0] = vec4(2.0);
+		v1[0].x = 3.0;
+		v1[1] = vec4(2.0);
+		v1[1].x = 5.0;
+	}
+	v3 = vec4(5.0);
+	gl_out[gl_InvocationID].gl_Position = vec4(10.0);
+	gl_out[gl_InvocationID].gl_Position.z = 20.0;
+	gl_out[gl_InvocationID].gl_PointSize = 40.0;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc b/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc
new file mode 100644
index 00000000000..0fc300d6886
--- /dev/null
+++ b/shaders-msl/tesc/arrayed-block-io.multi-patch.tesc
@@ -0,0 +1,64 @@
+#version 310 es
+#extension GL_EXT_tessellation_shader : require
+
+layout(vertices = 5) out;
+
+layout(location = 0) patch out highp vec2 in_te_positionScale;
+layout(location = 1) patch out highp vec2 in_te_positionOffset;
+
+struct S
+{
+	highp int x;
+	highp vec4 y;
+	highp float z[2];
+};
+layout(location = 2) patch out TheBlock
+{
+	highp float blockFa[3];
+	S blockSa[2];
+	highp float blockF;
+} tcBlock[2];
+
+layout(location = 0) in highp float in_tc_attr[];
+
+void main (void)
+{
+	{
+		highp float v = 1.3;
+
+		// Assign values to output tcBlock
+		for (int i0 = 0; i0 < 2; ++i0)
+		{
+			for (int i1 = 0; i1 < 3; ++i1)
+			{
+				tcBlock[i0].blockFa[i1] = v;
+				v += 0.4;
+			}
+			for (int i1 = 0; i1 < 2; ++i1)
+			{
+				tcBlock[i0].blockSa[i1].x = int(v);
+				v += 0.4;
+				tcBlock[i0].blockSa[i1].y = vec4(v, v+0.8, v+1.6, v+2.4);
+				v += 0.4;
+				for (int i2 = 0; i2 < 2; ++i2)
+				{
+					tcBlock[i0].blockSa[i1].z[i2] = v;
+					v += 0.4;
+				}
+			}
+			tcBlock[i0].blockF = v;
+			v += 0.4;
+		}
+	}
+
+	gl_TessLevelInner[0] = in_tc_attr[0];
+	gl_TessLevelInner[1] = in_tc_attr[1];
+
+	gl_TessLevelOuter[0] = in_tc_attr[2];
+	gl_TessLevelOuter[1] = in_tc_attr[3];
+	gl_TessLevelOuter[2] = in_tc_attr[4];
+	gl_TessLevelOuter[3] = in_tc_attr[5];
+
+	in_te_positionScale  = vec2(in_tc_attr[6], in_tc_attr[7]);
+	in_te_positionOffset = vec2(in_tc_attr[8], in_tc_attr[9]);
+}
diff --git a/shaders-msl/tesc/basic.multi-patch.tesc b/shaders-msl/tesc/basic.multi-patch.tesc
new file mode 100644
index 00000000000..0a41f98c830
--- /dev/null
+++ b/shaders-msl/tesc/basic.multi-patch.tesc
@@ -0,0 +1,17 @@
+#version 310 es
+#extension GL_EXT_tessellation_shader : require
+
+layout(location = 0) patch out vec3 vFoo;
+
+layout(vertices = 1) out;
+
+void main()
+{
+    gl_TessLevelInner[0] = 8.9;
+    gl_TessLevelInner[1] = 6.9;
+    gl_TessLevelOuter[0] = 8.9;
+    gl_TessLevelOuter[1] = 6.9;
+    gl_TessLevelOuter[2] = 3.9;
+    gl_TessLevelOuter[3] = 4.9;
+    vFoo = vec3(1.0);
+}
diff --git a/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc b/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc
new file mode 100644
index 00000000000..aec8db7fac8
--- /dev/null
+++ b/shaders-msl/tesc/complex-control-point-inout-types.multi-patch.tesc
@@ -0,0 +1,70 @@
+#version 450
+layout(vertices = 4) out;
+
+struct Meep
+{
+	float a;
+	float b;
+};
+
+layout(location = 0) out float a[][2];
+layout(location = 2) out float b[];
+layout(location = 3) out mat2 m[];
+layout(location = 5) out Meep meep[];
+layout(location = 7) out Meep meeps[][2];
+
+layout(location = 11) out Block
+{
+	float a[2];
+	float b;
+	mat2 m;
+	Meep meep;
+	Meep meeps[2];
+} B[];
+
+layout(location = 0) in float in_a[][2];
+layout(location = 2) in float in_b[];
+layout(location = 3) in mat2 in_m[];
+layout(location = 5) in Meep in_meep[];
+layout(location = 7) in Meep in_meeps[][2];
+
+layout(location = 11) in Block
+{
+	float a[2];
+	float b;
+	mat2 m;
+	Meep meep;
+	Meep meeps[2];
+} in_B[];
+
+void write_in_func()
+{
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+
+	a[gl_InvocationID][0] = in_a[gl_InvocationID][0];
+	a[gl_InvocationID][1] = in_a[gl_InvocationID][1];
+	b[gl_InvocationID] = in_b[gl_InvocationID];
+	m[gl_InvocationID] = in_m[gl_InvocationID];
+	meep[gl_InvocationID].a = in_meep[gl_InvocationID].a;
+	meep[gl_InvocationID].b = in_meep[gl_InvocationID].b;
+	meeps[gl_InvocationID][0].a = in_meeps[gl_InvocationID][0].a;
+	meeps[gl_InvocationID][0].b = in_meeps[gl_InvocationID][0].b;
+	meeps[gl_InvocationID][1].a = in_meeps[gl_InvocationID][1].a;
+	meeps[gl_InvocationID][1].b = in_meeps[gl_InvocationID][1].b;
+
+	B[gl_InvocationID].a[0] = in_B[gl_InvocationID].a[0];
+	B[gl_InvocationID].a[1] = in_B[gl_InvocationID].a[1];
+	B[gl_InvocationID].b = in_B[gl_InvocationID].b;
+	B[gl_InvocationID].m = in_B[gl_InvocationID].m;
+	B[gl_InvocationID].meep.a = in_B[gl_InvocationID].meep.a;
+	B[gl_InvocationID].meep.b = in_B[gl_InvocationID].meep.b;
+	B[gl_InvocationID].meeps[0].a = in_B[gl_InvocationID].meeps[0].a;
+	B[gl_InvocationID].meeps[0].b = in_B[gl_InvocationID].meeps[0].b;
+	B[gl_InvocationID].meeps[1].a = in_B[gl_InvocationID].meeps[1].a;
+	B[gl_InvocationID].meeps[1].b = in_B[gl_InvocationID].meeps[1].b;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/tesc/complex-control-point-inout-types.tesc b/shaders-msl/tesc/complex-control-point-inout-types.tesc
new file mode 100644
index 00000000000..b7bb21d6b35
--- /dev/null
+++ b/shaders-msl/tesc/complex-control-point-inout-types.tesc
@@ -0,0 +1,68 @@
+#version 450
+layout(vertices = 4) out;
+
+struct Meep
+{
+	float a;
+	float b;
+};
+
+layout(location = 0) out float a[][2];
+layout(location = 2) out float b[];
+layout(location = 3) out mat2 m[];
+layout(location = 5) out Meep meep[];
+layout(location = 7) out Meep meeps[][2];
+
+layout(location = 11) out Block
+{
+	float a[2];
+	float b;
+	mat2 m;
+	Meep meep;
+	Meep meeps[2];
+} B[];
+
+layout(location = 0) in float in_a[][2];
+layout(location = 2) in float in_b[];
+layout(location = 3) in mat2 in_m[];
+layout(location = 5) in Meep in_meep[];
+
+layout(location = 11) in Block
+{
+	float a[2];
+	float b;
+	mat2 m;
+	// Non-multi-patch path cannot support structs inside structs.
+} in_B[];
+
+void write_in_func()
+{
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+
+	a[gl_InvocationID][0] = in_a[gl_InvocationID][0];
+	a[gl_InvocationID][1] = in_a[gl_InvocationID][1];
+	b[gl_InvocationID] = in_b[gl_InvocationID];
+	m[gl_InvocationID] = in_m[gl_InvocationID];
+	meep[gl_InvocationID].a = in_meep[gl_InvocationID].a;
+	meep[gl_InvocationID].b = in_meep[gl_InvocationID].b;
+	meeps[gl_InvocationID][0].a = 1.0;
+	meeps[gl_InvocationID][0].b = 2.0;
+	meeps[gl_InvocationID][1].a = 3.0;
+	meeps[gl_InvocationID][1].b = 4.0;
+
+	B[gl_InvocationID].a[0] = in_B[gl_InvocationID].a[0];
+	B[gl_InvocationID].a[1] = in_B[gl_InvocationID].a[1];
+	B[gl_InvocationID].b = in_B[gl_InvocationID].b;
+	B[gl_InvocationID].m = in_B[gl_InvocationID].m;
+	B[gl_InvocationID].meep.a = 10.0;
+	B[gl_InvocationID].meep.b = 20.0;
+	B[gl_InvocationID].meeps[0].a = 5.0;
+	B[gl_InvocationID].meeps[0].b = 6.0;
+	B[gl_InvocationID].meeps[1].a = 7.0;
+	B[gl_InvocationID].meeps[1].b = 8.0;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/tesc/complex-patch-out-types.tesc b/shaders-msl/tesc/complex-patch-out-types.tesc
new file mode 100644
index 00000000000..fd56ae46bd0
--- /dev/null
+++ b/shaders-msl/tesc/complex-patch-out-types.tesc
@@ -0,0 +1,55 @@
+#version 450
+layout(vertices = 4) out;
+
+struct Meep
+{
+	float a;
+	float b;
+};
+
+layout(location = 0) patch out float a[2];
+layout(location = 2) patch out float b;
+layout(location = 3) patch out mat2 m;
+layout(location = 5) patch out Meep meep;
+layout(location = 7) patch out Meep meeps[2];
+
+layout(location = 11) patch out Block
+{
+	float a[2];
+	float b;
+	mat2 m;
+	Meep meep;
+	Meep meeps[2];
+} B;
+
+void write_in_func()
+{
+	gl_out[gl_InvocationID].gl_Position = vec4(1.0);
+
+	a[0] = 1.0;
+	a[1] = 2.0;
+	b = 3.0;
+	m = mat2(2.0);
+	meep.a = 4.0;
+	meep.b = 5.0;
+	meeps[0].a = 6.0;
+	meeps[0].b = 7.0;
+	meeps[1].a = 8.0;
+	meeps[1].b = 9.0;
+
+	B.a[0] = 1.0;
+	B.a[1] = 2.0;
+	B.b = 3.0;
+	B.m = mat2(4.0);
+	B.meep.a = 4.0;
+	B.meep.b = 5.0;
+	B.meeps[0].a = 6.0;
+	B.meeps[0].b = 7.0;
+	B.meeps[1].a = 8.0;
+	B.meeps[1].b = 9.0;
+}
+
+void main()
+{
+	write_in_func();
+}
diff --git a/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc b/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc
new file mode 100644
index 00000000000..36b16681e28
--- /dev/null
+++ b/shaders-msl/tesc/load-control-point-array-of-matrix.multi-patch.tesc
@@ -0,0 +1,12 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) in mat4 vInputs[gl_MaxPatchVertices];
+layout(location = 0) out mat4 vOutputs[4];
+
+void main()
+{
+	mat4 tmp[gl_MaxPatchVertices] = vInputs;
+	vOutputs[gl_InvocationID] = tmp[gl_InvocationID];
+}
diff --git a/shaders-msl/tesc/load-control-point-array-of-matrix.tesc b/shaders-msl/tesc/load-control-point-array-of-matrix.tesc
new file mode 100644
index 00000000000..36b16681e28
--- /dev/null
+++ b/shaders-msl/tesc/load-control-point-array-of-matrix.tesc
@@ -0,0 +1,12 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) in mat4 vInputs[gl_MaxPatchVertices];
+layout(location = 0) out mat4 vOutputs[4];
+
+void main()
+{
+	mat4 tmp[gl_MaxPatchVertices] = vInputs;
+	vOutputs[gl_InvocationID] = tmp[gl_InvocationID];
+}
diff --git a/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc b/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc
new file mode 100644
index 00000000000..4b4d5bfcd13
--- /dev/null
+++ b/shaders-msl/tesc/load-control-point-array-of-struct.multi-patch.tesc
@@ -0,0 +1,21 @@
+#version 450
+
+layout(vertices = 4) out;
+
+struct VertexData
+{
+	mat4 a;
+	vec4 b[2];
+	vec4 c;
+};
+
+layout(location = 0) in VertexData vInputs[gl_MaxPatchVertices];
+layout(location = 0) out vec4 vOutputs[4];
+
+void main()
+{
+	VertexData tmp[gl_MaxPatchVertices] = vInputs;
+	VertexData tmp_single = vInputs[gl_InvocationID ^ 1];
+
+	vOutputs[gl_InvocationID] = tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1] + tmp[gl_InvocationID].c + tmp_single.c;
+}
diff --git a/shaders-msl/tesc/load-control-point-array-of-struct.tesc b/shaders-msl/tesc/load-control-point-array-of-struct.tesc
new file mode 100644
index 00000000000..4b4d5bfcd13
--- /dev/null
+++ b/shaders-msl/tesc/load-control-point-array-of-struct.tesc
@@ -0,0 +1,21 @@
+#version 450
+
+layout(vertices = 4) out;
+
+struct VertexData
+{
+	mat4 a;
+	vec4 b[2];
+	vec4 c;
+};
+
+layout(location = 0) in VertexData vInputs[gl_MaxPatchVertices];
+layout(location = 0) out vec4 vOutputs[4];
+
+void main()
+{
+	VertexData tmp[gl_MaxPatchVertices] = vInputs;
+	VertexData tmp_single = vInputs[gl_InvocationID ^ 1];
+
+	vOutputs[gl_InvocationID] = tmp[gl_InvocationID].a[1] + tmp[gl_InvocationID].b[1] + tmp[gl_InvocationID].c + tmp_single.c;
+}
diff --git a/shaders-msl/tesc/load-control-point-array.multi-patch.tesc b/shaders-msl/tesc/load-control-point-array.multi-patch.tesc
new file mode 100644
index 00000000000..1a5924b895f
--- /dev/null
+++ b/shaders-msl/tesc/load-control-point-array.multi-patch.tesc
@@ -0,0 +1,12 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) in vec4 vInputs[gl_MaxPatchVertices];
+layout(location = 0) out vec4 vOutputs[4];
+
+void main()
+{
+	vec4 tmp[gl_MaxPatchVertices] = vInputs;
+	vOutputs[gl_InvocationID] = tmp[gl_InvocationID];
+}
diff --git a/shaders-msl/tesc/load-control-point-array.tesc b/shaders-msl/tesc/load-control-point-array.tesc
new file mode 100644
index 00000000000..1a5924b895f
--- /dev/null
+++ b/shaders-msl/tesc/load-control-point-array.tesc
@@ -0,0 +1,12 @@
+#version 450
+
+layout(vertices = 4) out;
+
+layout(location = 0) in vec4 vInputs[gl_MaxPatchVertices];
+layout(location = 0) out vec4 vOutputs[4];
+
+void main()
+{
+	vec4 tmp[gl_MaxPatchVertices] = vInputs;
+	vOutputs[gl_InvocationID] = tmp[gl_InvocationID];
+}
diff --git a/shaders-msl/tesc/matrix-output.multi-patch.tesc b/shaders-msl/tesc/matrix-output.multi-patch.tesc
new file mode 100644
index 00000000000..0d23861980c
--- /dev/null
+++ b/shaders-msl/tesc/matrix-output.multi-patch.tesc
@@ -0,0 +1,28 @@
+#version 310 es
+#extension GL_EXT_tessellation_shader : require
+
+layout(vertices = 3) out;
+
+layout(location = 0) in  highp float in_tc_attr[];
+layout(location = 0) out highp float in_te_attr[];
+
+layout(location = 1) out mediump mat4x3 in_te_data0[];
+layout(location = 5) out mediump mat4x3 in_te_data1[];
+
+void main (void)
+{
+    mat4x3 d = mat4x3(gl_InvocationID);
+    in_te_data0[gl_InvocationID] = d;
+    barrier();
+    in_te_data1[gl_InvocationID] = d + in_te_data0[(gl_InvocationID + 1) % 3];
+
+    in_te_attr[gl_InvocationID] = in_tc_attr[gl_InvocationID];
+
+    gl_TessLevelInner[0] = 1.0;
+    gl_TessLevelInner[1] = 1.0;
+
+    gl_TessLevelOuter[0] = 1.0;
+    gl_TessLevelOuter[1] = 1.0;
+    gl_TessLevelOuter[2] = 1.0;
+    gl_TessLevelOuter[3] = 1.0;
+}
diff --git a/shaders-msl/tesc/reload-tess-level.multi-patch.tesc b/shaders-msl/tesc/reload-tess-level.multi-patch.tesc
new file mode 100644
index 00000000000..c3f0195cc76
--- /dev/null
+++ b/shaders-msl/tesc/reload-tess-level.multi-patch.tesc
@@ -0,0 +1,17 @@
+#version 450
+layout(vertices = 4) out;
+
+void main()
+{
+	if (gl_InvocationID == 0)
+	{
+		gl_TessLevelOuter[0] = 2.0;
+		gl_TessLevelOuter[1] = 3.0;
+		gl_TessLevelOuter[2] = 4.0;
+		gl_TessLevelOuter[3] = 5.0;
+		gl_TessLevelInner[0] = mix(gl_TessLevelOuter[0], gl_TessLevelOuter[3], 0.5);
+		gl_TessLevelInner[1] = mix(gl_TessLevelOuter[2], gl_TessLevelOuter[1], 0.5);
+	}
+
+	gl_out[gl_InvocationID].gl_Position =  gl_in[gl_InvocationID].gl_Position;
+}
diff --git a/shaders-msl/tesc/reload-tess-level.tesc b/shaders-msl/tesc/reload-tess-level.tesc
new file mode 100644
index 00000000000..c3f0195cc76
--- /dev/null
+++ b/shaders-msl/tesc/reload-tess-level.tesc
@@ -0,0 +1,17 @@
+#version 450
+layout(vertices = 4) out;
+
+void main()
+{
+	if (gl_InvocationID == 0)
+	{
+		gl_TessLevelOuter[0] = 2.0;
+		gl_TessLevelOuter[1] = 3.0;
+		gl_TessLevelOuter[2] = 4.0;
+		gl_TessLevelOuter[3] = 5.0;
+		gl_TessLevelInner[0] = mix(gl_TessLevelOuter[0], gl_TessLevelOuter[3], 0.5);
+		gl_TessLevelInner[1] = mix(gl_TessLevelOuter[2], gl_TessLevelOuter[1], 0.5);
+	}
+
+	gl_out[gl_InvocationID].gl_Position =  gl_in[gl_InvocationID].gl_Position;
+}
diff --git a/shaders-msl/tesc/struct-output.multi-patch.tesc b/shaders-msl/tesc/struct-output.multi-patch.tesc
new file mode 100644
index 00000000000..a1511a475a3
--- /dev/null
+++ b/shaders-msl/tesc/struct-output.multi-patch.tesc
@@ -0,0 +1,36 @@
+#version 310 es
+#extension GL_EXT_tessellation_shader : require
+
+layout(vertices = 3) out;
+
+layout(location = 0) in  highp float in_tc_attr[];
+layout(location = 0) out highp float in_te_attr[];
+
+struct te_data
+{
+    mediump float a;
+    mediump float b;
+    mediump uint c;
+};
+
+layout(location = 1) out te_data in_te_data0[];
+layout(location = 4) out te_data in_te_data1[];
+
+void main (void)
+{
+    te_data d = te_data(float(gl_InvocationID), float(gl_InvocationID + 1), uint(gl_InvocationID));
+    in_te_data0[gl_InvocationID] = d;
+    barrier();
+	te_data e = in_te_data0[(gl_InvocationID + 1) % 3];
+    in_te_data1[gl_InvocationID] = te_data(d.a + e.a, d.b + e.b, d.c + e.c);
+
+    in_te_attr[gl_InvocationID] = in_tc_attr[gl_InvocationID];
+
+    gl_TessLevelInner[0] = 1.0;
+    gl_TessLevelInner[1] = 1.0;
+
+    gl_TessLevelOuter[0] = 1.0;
+    gl_TessLevelOuter[1] = 1.0;
+    gl_TessLevelOuter[2] = 1.0;
+    gl_TessLevelOuter[3] = 1.0;
+}
diff --git a/shaders-msl/tesc/water_tess.multi-patch.tesc b/shaders-msl/tesc/water_tess.multi-patch.tesc
new file mode 100644
index 00000000000..3ecdc3d1a96
--- /dev/null
+++ b/shaders-msl/tesc/water_tess.multi-patch.tesc
@@ -0,0 +1,115 @@
+#version 310 es
+#extension GL_EXT_tessellation_shader : require
+
+layout(vertices = 1) out;
+layout(location = 0) in vec2 vPatchPosBase[];
+
+layout(std140) uniform UBO
+{
+    vec4 uScale;
+    highp vec3 uCamPos;
+    vec2 uPatchSize;
+    vec2 uMaxTessLevel;
+    float uDistanceMod;
+    vec4 uFrustum[6];
+};
+
+layout(location = 1) patch out vec2 vOutPatchPosBase;
+layout(location = 2) patch out vec4 vPatchLods;
+
+float lod_factor(vec2 pos_)
+{
+    vec2 pos = pos_ * uScale.xy;
+    vec3 dist_to_cam = uCamPos - vec3(pos.x, 0.0, pos.y);
+    float level = log2((length(dist_to_cam) + 0.0001) * uDistanceMod);
+    return clamp(level, 0.0, uMaxTessLevel.x);
+}
+
+float tess_level(float lod)
+{
+    return uMaxTessLevel.y * exp2(-lod);
+}
+
+vec4 tess_level(vec4 lod)
+{
+    return uMaxTessLevel.y * exp2(-lod);
+}
+
+// Guard band for vertex displacement.
+#define GUARD_BAND 10.0
+bool frustum_cull(vec2 p0)
+{
+    vec2 min_xz = (p0 - GUARD_BAND) * uScale.xy;
+    vec2 max_xz = (p0 + uPatchSize + GUARD_BAND) * uScale.xy;
+
+    vec3 bb_min = vec3(min_xz.x, -GUARD_BAND, min_xz.y);
+    vec3 bb_max = vec3(max_xz.x, +GUARD_BAND, max_xz.y);
+    vec3 center = 0.5 * (bb_min + bb_max);
+    float radius = 0.5 * length(bb_max - bb_min);
+
+    vec3 f0 = vec3(
+        dot(uFrustum[0], vec4(center, 1.0)),
+        dot(uFrustum[1], vec4(center, 1.0)),
+        dot(uFrustum[2], vec4(center, 1.0)));
+
+    vec3 f1 = vec3(
+        dot(uFrustum[3], vec4(center, 1.0)),
+        dot(uFrustum[4], vec4(center, 1.0)),
+        dot(uFrustum[5], vec4(center, 1.0)));
+
+    return !(any(lessThanEqual(f0, vec3(-radius))) || any(lessThanEqual(f1, vec3(-radius))));
+}
+
+void compute_tess_levels(vec2 p0)
+{
+    vOutPatchPosBase = p0;
+
+    float l00 = lod_factor(p0 + vec2(-0.5, -0.5) * uPatchSize);
+    float l10 = lod_factor(p0 + vec2(+0.5, -0.5) * uPatchSize);
+    float l20 = lod_factor(p0 + vec2(+1.5, -0.5) * uPatchSize);
+    float l01 = lod_factor(p0 + vec2(-0.5, +0.5) * uPatchSize);
+    float l11 = lod_factor(p0 + vec2(+0.5, +0.5) * uPatchSize);
+    float l21 = lod_factor(p0 + vec2(+1.5, +0.5) * uPatchSize);
+    float l02 = lod_factor(p0 + vec2(-0.5, +1.5) * uPatchSize);
+    float l12 = lod_factor(p0 + vec2(+0.5, +1.5) * uPatchSize);
+    float l22 = lod_factor(p0 + vec2(+1.5, +1.5) * uPatchSize);
+
+    vec4 lods = vec4(
+        dot(vec4(l01, l11, l02, l12), vec4(0.25)),
+        dot(vec4(l00, l10, l01, l11), vec4(0.25)),
+        dot(vec4(l10, l20, l11, l21), vec4(0.25)),
+        dot(vec4(l11, l21, l12, l22), vec4(0.25)));
+
+    vPatchLods = lods;
+
+    vec4 outer_lods = min(lods.xyzw, lods.yzwx);
+    vec4 levels = tess_level(outer_lods);
+    gl_TessLevelOuter[0] = levels.x;
+    gl_TessLevelOuter[1] = levels.y;
+    gl_TessLevelOuter[2] = levels.z;
+    gl_TessLevelOuter[3] = levels.w;
+
+    float min_lod = min(min(lods.x, lods.y), min(lods.z, lods.w));
+    float inner = tess_level(min(min_lod, l11));
+    gl_TessLevelInner[0] = inner;
+    gl_TessLevelInner[1] = inner;
+}
+
+void main()
+{
+    vec2 p0 = vPatchPosBase[0];
+    if (!frustum_cull(p0))
+    {
+        gl_TessLevelOuter[0] = -1.0;
+        gl_TessLevelOuter[1] = -1.0;
+        gl_TessLevelOuter[2] = -1.0;
+        gl_TessLevelOuter[3] = -1.0;
+        gl_TessLevelInner[0] = -1.0;
+        gl_TessLevelInner[1] = -1.0;
+    }
+    else
+    {
+        compute_tess_levels(p0);
+    }
+}
+
diff --git a/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese b/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese
new file mode 100644
index 00000000000..a32c13096a5
--- /dev/null
+++ b/shaders-msl/tese/in-array-of-struct.raw-tess-in.tese
@@ -0,0 +1,19 @@
+#version 450
+#extension GL_EXT_tessellation_shader : require
+
+layout(triangles) in;
+layout(location = 0) in struct {
+  float dummy;
+  vec4 variableInStruct;
+} testStructArray[][3];
+layout(location = 0) out float outResult;
+void main(void)
+{
+  gl_Position = vec4(gl_TessCoord.xy * 2.0 - 1.0, 0.0, 1.0);
+  float result;
+  result = float(abs(testStructArray[0][2].variableInStruct.x - -4.0) < 0.001) *
+		   float(abs(testStructArray[0][2].variableInStruct.y - -9.0) < 0.001) *
+		   float(abs(testStructArray[0][2].variableInStruct.z - 3.0) < 0.001) *
+		   float(abs(testStructArray[0][2].variableInStruct.w - 7.0) < 0.001);
+  outResult = result;
+}
diff --git a/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese b/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese
new file mode 100644
index 00000000000..629415a8736
--- /dev/null
+++ b/shaders-msl/tese/in-block-with-nested-struct.raw-tess-in.tese
@@ -0,0 +1,31 @@
+#version 450
+layout(triangles, ccw, equal_spacing) in;
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct t35
+{
+    vec2 m0;
+    vec4 m1;
+};
+
+layout(location = 0) in t36
+{
+    vec2 m0;
+    t35 m1;
+} v40[32];
+
+layout(location = 0) out float v80;
+
+void main()
+{
+    gl_Position = vec4((gl_TessCoord.xy * 2.0) - vec2(1.0), 0.0, 1.0);
+    float v34 = ((float(abs(v40[0].m1.m1.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(v40[0].m1.m1.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(v40[0].m1.m1.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(v40[0].m1.m1.w - 7.0) < 0.001000000047497451305389404296875);
+    v80 = v34;
+}
diff --git a/shaders-msl/tese/in-block-with-nested-struct.tese b/shaders-msl/tese/in-block-with-nested-struct.tese
new file mode 100644
index 00000000000..629415a8736
--- /dev/null
+++ b/shaders-msl/tese/in-block-with-nested-struct.tese
@@ -0,0 +1,31 @@
+#version 450
+layout(triangles, ccw, equal_spacing) in;
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct t35
+{
+    vec2 m0;
+    vec4 m1;
+};
+
+layout(location = 0) in t36
+{
+    vec2 m0;
+    t35 m1;
+} v40[32];
+
+layout(location = 0) out float v80;
+
+void main()
+{
+    gl_Position = vec4((gl_TessCoord.xy * 2.0) - vec2(1.0), 0.0, 1.0);
+    float v34 = ((float(abs(v40[0].m1.m1.x - (-4.0)) < 0.001000000047497451305389404296875) * float(abs(v40[0].m1.m1.y - (-9.0)) < 0.001000000047497451305389404296875)) * float(abs(v40[0].m1.m1.z - 3.0) < 0.001000000047497451305389404296875)) * float(abs(v40[0].m1.m1.w - 7.0) < 0.001000000047497451305389404296875);
+    v80 = v34;
+}
diff --git a/shaders-msl/tese/input-types.raw-tess-in.tese b/shaders-msl/tese/input-types.raw-tess-in.tese
new file mode 100644
index 00000000000..3157953fdd5
--- /dev/null
+++ b/shaders-msl/tese/input-types.raw-tess-in.tese
@@ -0,0 +1,75 @@
+#version 450
+
+layout(ccw, quads, fractional_even_spacing) in;
+
+// Try to use the whole taxonomy of input methods.
+
+// Per-vertex vector.
+layout(location = 0) in vec4 vColor[];
+// Per-patch vector.
+layout(location = 1) patch in vec4 vColors;
+// Per-patch vector array.
+layout(location = 2) patch in vec4 vColorsArray[2];
+
+// I/O blocks, per patch and per control point.
+layout(location = 4) in Block
+{
+        vec4 a;
+        vec4 b;
+} blocks[];
+
+layout(location = 6) patch in PatchBlock
+{
+        vec4 a;
+        vec4 b;
+} patch_block;
+
+// Composites.
+struct Foo
+{
+        vec4 a;
+        vec4 b;
+};
+layout(location = 8) patch in Foo vFoo;
+//layout(location = 10) patch in Foo vFooArray[2];  // FIXME: Handling of array-of-struct input is broken!
+
+// Per-control point struct.
+layout(location = 14) in Foo vFoos[];
+
+void set_from_function()
+{
+        gl_Position = blocks[0].a;
+        gl_Position += blocks[0].b;
+        gl_Position += blocks[1].a;
+        gl_Position += blocks[1].b;
+        gl_Position += patch_block.a;
+        gl_Position += patch_block.b;
+        gl_Position += vColor[0];
+        gl_Position += vColor[1];
+        gl_Position += vColors;
+
+        Foo foo = vFoo;
+        gl_Position += foo.a;
+        gl_Position += foo.b;
+
+        /*foo = vFooArray[0];
+        gl_Position += foo.a;
+        gl_Position += foo.b;
+
+        foo = vFooArray[1];
+        gl_Position += foo.a;
+        gl_Position += foo.b;*/
+
+        foo = vFoos[0];
+        gl_Position += foo.a;
+        gl_Position += foo.b;
+
+        foo = vFoos[1];
+        gl_Position += foo.a;
+        gl_Position += foo.b;
+}
+
+void main()
+{
+        set_from_function();
+}
diff --git a/shaders-msl/tese/load-control-point-array-of-matrix.tese b/shaders-msl/tese/load-control-point-array-of-matrix.tese
new file mode 100644
index 00000000000..479b3e651b7
--- /dev/null
+++ b/shaders-msl/tese/load-control-point-array-of-matrix.tese
@@ -0,0 +1,13 @@
+#version 450
+
+layout(cw, quads) in;
+layout(location = 0) in mat4 vInputs[gl_MaxPatchVertices];
+layout(location = 4) patch in vec4 vBoo[4];
+layout(location = 8) patch in int vIndex;
+
+void main()
+{
+	mat4 tmp[gl_MaxPatchVertices] = vInputs;
+	gl_Position = tmp[0][vIndex] + tmp[1][vIndex] + vBoo[vIndex];
+
+}
diff --git a/shaders-msl/tese/load-control-point-array.tese b/shaders-msl/tese/load-control-point-array.tese
new file mode 100644
index 00000000000..4fa0bb1242a
--- /dev/null
+++ b/shaders-msl/tese/load-control-point-array.tese
@@ -0,0 +1,13 @@
+#version 450
+
+layout(cw, quads) in;
+layout(location = 0) in vec4 vInputs[gl_MaxPatchVertices];
+layout(location = 1) patch in vec4 vBoo[4];
+layout(location = 5) patch in int vIndex;
+
+void main()
+{
+	vec4 tmp[gl_MaxPatchVertices] = vInputs;
+	gl_Position = tmp[0] + tmp[1] + vBoo[vIndex];
+
+}
diff --git a/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese b/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese
new file mode 100644
index 00000000000..0c289ac5bd2
--- /dev/null
+++ b/shaders-msl/tese/read-tess-level-in-func-quad.msl2.tese
@@ -0,0 +1,17 @@
+#version 450
+layout(quads) in;
+
+vec4 read_tess_levels()
+{
+	return vec4(
+		gl_TessLevelOuter[0],
+		gl_TessLevelOuter[1],
+		gl_TessLevelOuter[2],
+		gl_TessLevelOuter[3]) +
+		vec2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy;
+}
+
+void main()
+{
+	gl_Position = read_tess_levels();
+}
diff --git a/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese b/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese
new file mode 100644
index 00000000000..0c289ac5bd2
--- /dev/null
+++ b/shaders-msl/tese/read-tess-level-in-func-quad.raw-tess-in.msl2.tese
@@ -0,0 +1,17 @@
+#version 450
+layout(quads) in;
+
+vec4 read_tess_levels()
+{
+	return vec4(
+		gl_TessLevelOuter[0],
+		gl_TessLevelOuter[1],
+		gl_TessLevelOuter[2],
+		gl_TessLevelOuter[3]) +
+		vec2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy;
+}
+
+void main()
+{
+	gl_Position = read_tess_levels();
+}
diff --git a/shaders-msl/tese/read-tess-level-in-func.msl2.tese b/shaders-msl/tese/read-tess-level-in-func.msl2.tese
new file mode 100644
index 00000000000..8cf1f1a8f75
--- /dev/null
+++ b/shaders-msl/tese/read-tess-level-in-func.msl2.tese
@@ -0,0 +1,17 @@
+#version 450
+layout(triangles) in;
+
+vec4 read_tess_levels()
+{
+	return vec4(
+		gl_TessLevelOuter[0],
+		gl_TessLevelOuter[1],
+		gl_TessLevelOuter[2],
+		gl_TessLevelOuter[3]) +
+		vec2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy;
+}
+
+void main()
+{
+	gl_Position = read_tess_levels();
+}
diff --git a/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese b/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese
new file mode 100644
index 00000000000..8cf1f1a8f75
--- /dev/null
+++ b/shaders-msl/tese/read-tess-level-in-func.raw-tess-in.msl2.tese
@@ -0,0 +1,17 @@
+#version 450
+layout(triangles) in;
+
+vec4 read_tess_levels()
+{
+	return vec4(
+		gl_TessLevelOuter[0],
+		gl_TessLevelOuter[1],
+		gl_TessLevelOuter[2],
+		gl_TessLevelOuter[3]) +
+		vec2(gl_TessLevelInner[0], gl_TessLevelInner[1]).xyxy;
+}
+
+void main()
+{
+	gl_Position = read_tess_levels();
+}
diff --git a/shaders-msl/tese/water_tess.raw-tess-in.tese b/shaders-msl/tese/water_tess.raw-tess-in.tese
new file mode 100644
index 00000000000..32d6bc9391b
--- /dev/null
+++ b/shaders-msl/tese/water_tess.raw-tess-in.tese
@@ -0,0 +1,65 @@
+#version 310 es
+#extension GL_EXT_tessellation_shader : require
+precision highp int;
+
+layout(cw, quads, fractional_even_spacing) in;
+
+layout(location = 0) patch in vec2 vOutPatchPosBase;
+layout(location = 1) patch in vec4 vPatchLods;
+
+layout(binding = 1, std140) uniform UBO
+{
+    mat4 uMVP;
+    vec4 uScale;
+    vec2 uInvScale;
+    vec3 uCamPos;
+    vec2 uPatchSize;
+    vec2 uInvHeightmapSize;
+};
+layout(binding = 0) uniform mediump sampler2D uHeightmapDisplacement;
+
+layout(location = 0) highp out vec3 vWorld;
+layout(location = 1) highp out vec4 vGradNormalTex;
+
+vec2 lerp_vertex(vec2 tess_coord)
+{
+    return vOutPatchPosBase + tess_coord * uPatchSize;
+}
+
+mediump vec2 lod_factor(vec2 tess_coord)
+{
+    mediump vec2 x = mix(vPatchLods.yx, vPatchLods.zw, tess_coord.x);
+    mediump float level = mix(x.x, x.y, tess_coord.y);
+    mediump float floor_level = floor(level);
+    mediump float fract_level = level - floor_level;
+    return vec2(floor_level, fract_level);
+}
+
+mediump vec3 sample_height_displacement(vec2 uv, vec2 off, mediump vec2 lod)
+{
+    return mix(
+            textureLod(uHeightmapDisplacement, uv + 0.5 * off, lod.x).xyz,
+            textureLod(uHeightmapDisplacement, uv + 1.0 * off, lod.x + 1.0).xyz,
+            lod.y);
+}
+
+void main()
+{
+    vec2 tess_coord = gl_TessCoord.xy;
+    vec2 pos = lerp_vertex(tess_coord);
+    mediump vec2 lod = lod_factor(tess_coord);
+
+    vec2 tex = pos * uInvHeightmapSize.xy;
+    pos *= uScale.xy;
+
+    mediump float delta_mod = exp2(lod.x);
+    vec2 off = uInvHeightmapSize.xy * delta_mod;
+
+    vGradNormalTex = vec4(tex + 0.5 * uInvHeightmapSize.xy, tex * uScale.zw);
+    vec3 height_displacement = sample_height_displacement(tex, off, lod);
+
+    pos += height_displacement.yz;
+    vWorld = vec3(pos.x, height_displacement.x, pos.y);
+    gl_Position = uMVP * vec4(vWorld, 1.0);
+}
+
diff --git a/shaders-msl/vert/array-component-io.for-tess.vert b/shaders-msl/vert/array-component-io.for-tess.vert
new file mode 100644
index 00000000000..257ac848d16
--- /dev/null
+++ b/shaders-msl/vert/array-component-io.for-tess.vert
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 1, component = 0) out float A[2];
+layout(location = 1, component = 2) out vec2 B[2];
+layout(location = 0, component = 1) out float C[3];
+layout(location = 0, component = 3) out float D;
+
+layout(location = 1, component = 0) in float InA[2];
+layout(location = 1, component = 2) in vec2 InB[2];
+layout(location = 0, component = 1) in float InC[3];
+layout(location = 0, component = 3) in float InD;
+layout(location = 4) in vec4 Pos;
+
+void main()
+{
+	gl_Position = Pos;
+	A = InA;
+	B = InB;
+	C = InC;
+	D = InD;
+}
diff --git a/shaders-msl/vert/array-component-io.vert b/shaders-msl/vert/array-component-io.vert
new file mode 100644
index 00000000000..257ac848d16
--- /dev/null
+++ b/shaders-msl/vert/array-component-io.vert
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 1, component = 0) out float A[2];
+layout(location = 1, component = 2) out vec2 B[2];
+layout(location = 0, component = 1) out float C[3];
+layout(location = 0, component = 3) out float D;
+
+layout(location = 1, component = 0) in float InA[2];
+layout(location = 1, component = 2) in vec2 InB[2];
+layout(location = 0, component = 1) in float InC[3];
+layout(location = 0, component = 3) in float InD;
+layout(location = 4) in vec4 Pos;
+
+void main()
+{
+	gl_Position = Pos;
+	A = InA;
+	B = InB;
+	C = InC;
+	D = InD;
+}
diff --git a/shaders-msl/vert/basic.for-tess.vert b/shaders-msl/vert/basic.for-tess.vert
new file mode 100644
index 00000000000..8191dc2d0fc
--- /dev/null
+++ b/shaders-msl/vert/basic.for-tess.vert
@@ -0,0 +1,17 @@
+#version 310 es
+
+layout(std140) uniform UBO
+{
+    uniform mat4 uMVP;
+};
+
+layout(location = 0) in vec4 aVertex;
+layout(location = 1) in vec3 aNormal;
+
+layout(location = 0) out vec3 vNormal;
+
+void main()
+{
+    gl_Position = uMVP * aVertex;
+    vNormal = aNormal;
+}
diff --git a/shaders-msl/vert/buffer_device_address.msl2.vert b/shaders-msl/vert/buffer_device_address.msl2.vert
new file mode 100644
index 00000000000..ffc88713060
--- /dev/null
+++ b/shaders-msl/vert/buffer_device_address.msl2.vert
@@ -0,0 +1,83 @@
+/* Copyright (c) 2021, Arm Limited and Contributors
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 the "License";
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#version 450
+
+// Allows buffer_reference.
+#extension GL_EXT_buffer_reference : require
+
+// Since we did not enable vertexPipelineStoresAndAtomics, we must mark everything readonly.
+layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer Position
+{
+    vec2 positions[];
+};
+
+layout(std430, buffer_reference, buffer_reference_align = 8) readonly buffer PositionReferences
+{
+    // Represents an array of pointers, where each pointer points to its own VBO (Position).
+    // The size of a pointer (VkDeviceAddress) is always 8 in Vulkan.
+    Position buffers[];
+};
+
+layout(push_constant) uniform Registers
+{
+    mat4 view_projection;
+
+    // This is a pointer to an array of pointers, essentially:
+    // const VBO * const *vbos
+    PositionReferences references;
+} registers;
+
+// Flat shading looks a little cooler here :)
+layout(location = 0) flat out vec4 out_color;
+
+void main()
+{
+    int slice = gl_InstanceIndex;
+
+    // One VBO per instance, load the VBO pointer.
+    // The cool thing here is that a compute shader could hypothetically
+    // write the pointer list where vertices are stored.
+    // With vertex attributes we do not have the luxury to modify VBO bindings on the GPU.
+    // The best we can do is to just modify the vertexOffset in an indirect draw call,
+    // but that's not always flexible enough, and enforces a very specific engine design to work.
+    // We can even modify the attribute layout per slice here, since we can just cast the pointer
+    // to something else if we want.
+    restrict Position positions = registers.references.buffers[slice];
+
+    // Load the vertex based on VertexIndex instead of an attribute. Fully flexible.
+    // Only downside is that we do not get format conversion for free like we do with normal vertex attributes.
+    vec2 pos = positions.positions[gl_VertexIndex] * 2.5;
+
+    // Place the quad meshes on screen and center it.
+    pos += 3.0 * (vec2(slice % 8, slice / 8) - 3.5);
+
+    // Normal projection.
+    gl_Position = registers.view_projection * vec4(pos, 0.0, 1.0);
+
+    // Color the vertex. Use a combination of a wave and checkerboard, completely arbitrary.
+    int index_x = gl_VertexIndex % 16;
+    int index_y = gl_VertexIndex / 16;
+
+    float r = 0.5 + 0.3 * sin(float(index_x));
+    float g = 0.5 + 0.3 * sin(float(index_y));
+
+    int checkerboard = (index_x ^ index_y) & 1;
+    r *= float(checkerboard) * 0.8 + 0.2;
+    g *= float(checkerboard) * 0.8 + 0.2;
+
+    out_color = vec4(r, g, 0.15, 1.0);
+}
diff --git a/shaders-msl/vert/clip-distance-block.no-user-varying.vert b/shaders-msl/vert/clip-distance-block.no-user-varying.vert
new file mode 100644
index 00000000000..93ed31150c8
--- /dev/null
+++ b/shaders-msl/vert/clip-distance-block.no-user-varying.vert
@@ -0,0 +1,15 @@
+#version 450
+
+layout(location = 0) in vec4 Position;
+out gl_PerVertex
+{
+	vec4 gl_Position;
+	float gl_ClipDistance[2];
+};
+
+void main()
+{
+    gl_Position = Position;
+    gl_ClipDistance[0] = Position.x;
+    gl_ClipDistance[1] = Position.y;
+}
diff --git a/shaders-msl/vert/clip-distance-block.vert b/shaders-msl/vert/clip-distance-block.vert
new file mode 100644
index 00000000000..93ed31150c8
--- /dev/null
+++ b/shaders-msl/vert/clip-distance-block.vert
@@ -0,0 +1,15 @@
+#version 450
+
+layout(location = 0) in vec4 Position;
+out gl_PerVertex
+{
+	vec4 gl_Position;
+	float gl_ClipDistance[2];
+};
+
+void main()
+{
+    gl_Position = Position;
+    gl_ClipDistance[0] = Position.x;
+    gl_ClipDistance[1] = Position.y;
+}
diff --git a/shaders-msl/vert/float-math.invariant-float-math.vert b/shaders-msl/vert/float-math.invariant-float-math.vert
new file mode 100644
index 00000000000..caa8639a895
--- /dev/null
+++ b/shaders-msl/vert/float-math.invariant-float-math.vert
@@ -0,0 +1,25 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform Matrices
+{
+	mat4   vpMatrix;
+	mat4   wMatrix;
+	mat4x3 wMatrix4x3;
+	mat3x4 wMatrix3x4;
+};
+
+layout(location = 0) in vec3 InPos;
+layout(location = 1) in vec3 InNormal;
+
+layout(location = 0) out vec3 OutNormal;
+layout(location = 1) out vec4 OutWorldPos[4];
+
+void main()
+{
+	gl_Position = vpMatrix * wMatrix * vec4(InPos, 1);
+	OutWorldPos[0] = wMatrix * vec4(InPos, 1);
+	OutWorldPos[1] = vec4(InPos, 1) * wMatrix;
+	OutWorldPos[2] = wMatrix3x4 * InPos;
+	OutWorldPos[3] = InPos * wMatrix4x3;
+	OutNormal = (wMatrix * vec4(InNormal, 0)).xyz;
+}
diff --git a/shaders-msl/vert/float-math.vert b/shaders-msl/vert/float-math.vert
new file mode 100644
index 00000000000..caa8639a895
--- /dev/null
+++ b/shaders-msl/vert/float-math.vert
@@ -0,0 +1,25 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform Matrices
+{
+	mat4   vpMatrix;
+	mat4   wMatrix;
+	mat4x3 wMatrix4x3;
+	mat3x4 wMatrix3x4;
+};
+
+layout(location = 0) in vec3 InPos;
+layout(location = 1) in vec3 InNormal;
+
+layout(location = 0) out vec3 OutNormal;
+layout(location = 1) out vec4 OutWorldPos[4];
+
+void main()
+{
+	gl_Position = vpMatrix * wMatrix * vec4(InPos, 1);
+	OutWorldPos[0] = wMatrix * vec4(InPos, 1);
+	OutWorldPos[1] = vec4(InPos, 1) * wMatrix;
+	OutWorldPos[2] = wMatrix3x4 * InPos;
+	OutWorldPos[3] = InPos * wMatrix4x3;
+	OutNormal = (wMatrix * vec4(InNormal, 0)).xyz;
+}
diff --git a/shaders-msl/vert/implicit-position-1.vert b/shaders-msl/vert/implicit-position-1.vert
new file mode 100644
index 00000000000..54300d8daa7
--- /dev/null
+++ b/shaders-msl/vert/implicit-position-1.vert
@@ -0,0 +1,6 @@
+#version 450
+layout(location = 0) out vec4 V;
+void main()
+{
+	V = vec4(1.0);
+}
diff --git a/shaders-msl/vert/implicit-position-2.vert b/shaders-msl/vert/implicit-position-2.vert
new file mode 100644
index 00000000000..9996ddaad21
--- /dev/null
+++ b/shaders-msl/vert/implicit-position-2.vert
@@ -0,0 +1,4 @@
+#version 450
+void main()
+{
+}
diff --git a/shaders-msl/vert/interface-block-single-element-array.vert b/shaders-msl/vert/interface-block-single-element-array.vert
new file mode 100644
index 00000000000..993484fba82
--- /dev/null
+++ b/shaders-msl/vert/interface-block-single-element-array.vert
@@ -0,0 +1,17 @@
+#version 460
+
+layout(location = 0) out TDPickVertex
+{
+vec4 c;
+vec3 uv[1];
+} oTDVert;
+
+layout(location = 0) in vec3 P;
+layout(location = 1) in vec3 uv[1];
+
+void main()
+{
+gl_Position = vec4(P, 1.0);
+oTDVert.uv[0] = uv[0];
+oTDVert.c = vec4(1.);
+}
\ No newline at end of file
diff --git a/shaders-msl/vert/leaf-function.for-tess.vert b/shaders-msl/vert/leaf-function.for-tess.vert
new file mode 100644
index 00000000000..cdb60fae31c
--- /dev/null
+++ b/shaders-msl/vert/leaf-function.for-tess.vert
@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(std140) uniform UBO
+{
+    uniform mat4 uMVP;
+};
+
+layout(location = 0) in vec4 aVertex;
+layout(location = 1) in vec3 aNormal;
+
+layout(location = 0) out vec3 vNormal;
+
+void set_output()
+{
+    gl_Position = uMVP * aVertex;
+    vNormal = aNormal;
+}
+
+void main()
+{
+    set_output();
+}
diff --git a/shaders-msl/vert/no-contraction.vert b/shaders-msl/vert/no-contraction.vert
new file mode 100644
index 00000000000..206fbf0de80
--- /dev/null
+++ b/shaders-msl/vert/no-contraction.vert
@@ -0,0 +1,15 @@
+#version 450
+
+layout(location = 0) in vec4 vA;
+layout(location = 1) in vec4 vB;
+layout(location = 2) in vec4 vC;
+
+void main()
+{
+	precise vec4 mul = vA * vB;
+	precise vec4 add = vA + vB;
+	precise vec4 sub = vA - vB;
+	precise vec4 mad = vA * vB + vC;
+	precise vec4 summed = mul + add + sub + mad;
+	gl_Position = summed;
+}
diff --git a/shaders-msl/vert/no-disable-vertex-out.frag-output.vert b/shaders-msl/vert/no-disable-vertex-out.frag-output.vert
new file mode 100644
index 00000000000..7ea3790a02a
--- /dev/null
+++ b/shaders-msl/vert/no-disable-vertex-out.frag-output.vert
@@ -0,0 +1,16 @@
+#version 400
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_ARB_shading_language_420pack : enable
+layout(std140, binding = 0) uniform buf {
+	mat4 MVP;
+	vec4 position[12*3];
+	vec4 attr[12*3];
+} ubuf;
+layout (location = 0) out vec4 texcoord;
+layout (location = 1) out vec3 frag_pos;
+void main() 
+{
+	texcoord = ubuf.attr[gl_VertexIndex];
+	gl_Position = ubuf.MVP * ubuf.position[gl_VertexIndex];
+	frag_pos = gl_Position.xyz;
+}
diff --git a/shaders-msl/vert/no_stage_out.for-tess.vert b/shaders-msl/vert/no_stage_out.for-tess.vert
new file mode 100644
index 00000000000..3c2573a628d
--- /dev/null
+++ b/shaders-msl/vert/no_stage_out.for-tess.vert
@@ -0,0 +1,14 @@
+#version 450
+
+layout(binding = 0, std430) writeonly buffer _10_12
+{
+	uvec4 _m0[1024];
+} _12;
+
+layout(location = 0) in uvec4 _19;
+
+void main()
+{
+	_12._m0[gl_VertexIndex] = _19;
+}
+
diff --git a/shaders-msl/vert/out-block-with-nested-struct-array.vert b/shaders-msl/vert/out-block-with-nested-struct-array.vert
new file mode 100644
index 00000000000..444e7ca9730
--- /dev/null
+++ b/shaders-msl/vert/out-block-with-nested-struct-array.vert
@@ -0,0 +1,28 @@
+#version 450
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct t21
+{
+    vec4 m0;
+    vec4 m1;
+};
+
+layout(location = 0) in vec4 v17;
+layout(location = 0) out t24
+{
+    t21 m0[3];
+} v26;
+
+
+void main()
+{
+    gl_Position = v17;
+    v26.m0[1].m1 = vec4(-4.0, -9.0, 3.0, 7.0);
+}
diff --git a/shaders-msl/vert/out-block-with-struct-array.vert b/shaders-msl/vert/out-block-with-struct-array.vert
new file mode 100644
index 00000000000..2cb13b30959
--- /dev/null
+++ b/shaders-msl/vert/out-block-with-struct-array.vert
@@ -0,0 +1,24 @@
+#version 450
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct t21
+{
+    float m0;
+    vec4 m1;
+};
+
+layout(location = 0) in vec4 v17;
+layout(location = 0) out t21 v25[3];
+
+void main()
+{
+    gl_Position = v17;
+    v25[2].m1 = vec4(-4.0, -9.0, 3.0, 7.0);
+}
diff --git a/shaders-msl/vert/packed-bool-to-uint.vert b/shaders-msl/vert/packed-bool-to-uint.vert
new file mode 100644
index 00000000000..933a15e621d
--- /dev/null
+++ b/shaders-msl/vert/packed-bool-to-uint.vert
@@ -0,0 +1,22 @@
+#version 450 core
+
+struct Struct
+{
+    bool flags[1];
+};
+
+layout(set=0, binding=0, std140) uniform defaultUniformsVS
+{
+    Struct flags;
+    vec2 uquad[4];
+    mat4 umatrix;
+};
+
+layout (location = 0) in vec4 a_position;
+
+void main()
+{
+    gl_Position = umatrix * vec4(uquad[gl_VertexIndex], a_position.z, a_position.w);
+    if (flags.flags[0])
+        gl_Position.z = 0.0;
+}
diff --git a/shaders-msl/vert/packed-bool2-to-packed_uint2.vert b/shaders-msl/vert/packed-bool2-to-packed_uint2.vert
new file mode 100644
index 00000000000..e3939a4519d
--- /dev/null
+++ b/shaders-msl/vert/packed-bool2-to-packed_uint2.vert
@@ -0,0 +1,22 @@
+#version 450 core
+
+struct Struct
+{
+    bvec2 flags[1];
+};
+
+layout(set=0, binding=0, std140) uniform defaultUniformsVS
+{
+    Struct flags;
+    vec2 uquad[4];
+    mat4 umatrix;
+};
+
+layout (location = 0) in vec4 a_position;
+
+void main()
+{
+    gl_Position = umatrix * vec4(uquad[gl_VertexIndex], a_position.z, a_position.w);
+    if (flags.flags[0].x)
+        gl_Position.z = 0.0;
+}
diff --git a/shaders-msl/vert/return-array.force-native-array.vert b/shaders-msl/vert/return-array.force-native-array.vert
new file mode 100644
index 00000000000..708460114e5
--- /dev/null
+++ b/shaders-msl/vert/return-array.force-native-array.vert
@@ -0,0 +1,22 @@
+#version 310 es
+
+layout(location = 0) in vec4 vInput0;
+layout(location = 1) in vec4 vInput1;
+
+vec4[2] test()
+{
+	return vec4[](vec4(10.0), vec4(20.0));
+}
+
+vec4[2] test2()
+{
+	vec4 foobar[2];
+	foobar[0] = vInput0;
+	foobar[1] = vInput1;
+	return foobar;
+}
+
+void main()
+{
+	gl_Position = test()[0] + test2()[1];
+}
diff --git a/shaders-msl/vert/signedness-mismatch.shader-inputs.vert b/shaders-msl/vert/signedness-mismatch.shader-inputs.vert
new file mode 100644
index 00000000000..dc0f7e6b52e
--- /dev/null
+++ b/shaders-msl/vert/signedness-mismatch.shader-inputs.vert
@@ -0,0 +1,14 @@
+#version 450
+
+#extension GL_AMD_gpu_shader_int16 : require
+
+layout(location = 0) in int16_t a;
+layout(location = 1) in ivec2 b;
+layout(location = 2) in uint16_t c[2];
+layout(location = 4) in uvec4 d[2];
+
+void main()
+{
+    gl_Position = vec4(float(int(a)), float(b.x), float(uint(c[1])), float(d[0].w));
+}
+
diff --git a/shaders-msl/vert/uniform-struct-out-of-order-offests.vert b/shaders-msl/vert/uniform-struct-out-of-order-offests.vert
new file mode 100644
index 00000000000..21234f94164
--- /dev/null
+++ b/shaders-msl/vert/uniform-struct-out-of-order-offests.vert
@@ -0,0 +1,31 @@
+#version 450
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+layout(set = 0, binding = 0, std140) uniform data_u_t
+{
+    layout(offset = 80) mediump int m0[8];
+    layout(offset = 0) mediump ivec4 m1[3];
+    layout(offset = 64) uvec3 m2;
+    layout(offset = 48) mediump uint m3;
+} data_u;
+
+layout(location = 0) in vec4 vtx_posn;
+layout(location = 0) out mediump float foo;
+
+void main()
+{
+    gl_Position = vtx_posn;
+    ivec4 a = data_u.m1[1];
+    uvec3 b = data_u.m2;
+    int c = data_u.m0[4];
+    foo = (a.xyz + b).y * c;
+}
+
+
diff --git a/shaders-msl/vert/uniform-struct-packing-nested.vert b/shaders-msl/vert/uniform-struct-packing-nested.vert
new file mode 100644
index 00000000000..6744b783622
--- /dev/null
+++ b/shaders-msl/vert/uniform-struct-packing-nested.vert
@@ -0,0 +1,50 @@
+#version 450
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[1];
+};
+
+struct s0
+{
+    mediump mat2x3 m0;
+    ivec4 m1;
+    mat4 m2;
+    uvec2 m3;
+};
+
+struct s1
+{
+    mediump mat3x4 m0;
+    mediump int m1;
+    uvec3 m2;
+    s0 m3;
+};
+
+layout(set = 0, binding = 0, std140) uniform data_u_t
+{
+    layout(row_major, offset = 368) mediump mat2x3 m0;
+    layout(offset = 0) vec2 m1[5];
+    layout(row_major, offset = 128) s1 m2;
+    layout(row_major, offset = 80) mediump mat4x2 m3;
+    layout(offset = 112) ivec4 m4;
+} data_u;
+
+layout(location = 0) in vec4 vtx_posn;
+layout(location = 0) out mediump float foo;
+
+void main()
+{
+    gl_Position = vtx_posn;
+    vec2 a = data_u.m1[3];
+    ivec4 b = data_u.m4;
+    mat2x3 c = data_u.m0;
+    mat3x4 d = data_u.m2.m0;
+    mat4 e = data_u.m2.m3.m2;
+    foo = (a.y + b.z) * c[1][2] * d[2][3] * e[3][3];
+}
+
+
diff --git a/shaders-msl/vert/unused-position.vert b/shaders-msl/vert/unused-position.vert
new file mode 100644
index 00000000000..61e30b431ce
--- /dev/null
+++ b/shaders-msl/vert/unused-position.vert
@@ -0,0 +1,13 @@
+#version 450
+
+out gl_PerVertex
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+};
+
+void main()
+{
+    gl_PointSize = 1.0;
+}
diff --git a/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag b/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag
new file mode 100644
index 00000000000..963493b871d
--- /dev/null
+++ b/shaders-msl/vulkan/frag/basic.multiview.no-layered.nocompat.vk.frag
@@ -0,0 +1,14 @@
+#version 310 es
+#extension GL_EXT_multiview : require
+precision mediump float;
+
+layout(location = 0) in vec4 vColor;
+layout(location = 1) in vec2 vTex[4];
+layout(binding = 0) uniform sampler2D uTex;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vColor * texture(uTex, vTex[gl_ViewIndex]);
+}
+
diff --git a/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag b/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag
new file mode 100644
index 00000000000..ba57b8c5afa
--- /dev/null
+++ b/shaders-msl/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.msl23.frag
@@ -0,0 +1,41 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 19
+; Schema: 0
+               OpCapability Shader
+               OpCapability DemoteToHelperInvocationEXT
+               OpExtension "SPV_EXT_demote_to_helper_invocation"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_demote_to_helper_invocation"
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %bool = OpTypeBool
+%_ptr_Function_bool = OpTypePointer Function %bool
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+    %float_0 = OpConstant %float 0
+         %19 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %9 = OpIsHelperInvocationEXT %bool
+               OpDemoteToHelperInvocationEXT
+         %10 = OpLogicalNot %bool %9
+               OpSelectionMerge %12 None
+               OpBranchConditional %10 %11 %12
+         %11 = OpLabel
+               OpStore %FragColor %19
+               OpBranch %12
+         %12 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag b/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag
new file mode 100644
index 00000000000..18407988db1
--- /dev/null
+++ b/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.frag
@@ -0,0 +1,18 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+void foo()
+{
+	demote;
+}
+
+void bar()
+{
+	bool helper = helperInvocationEXT();
+}
+
+void main()
+{
+	foo();
+	bar();
+}
diff --git a/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag b/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag
new file mode 100644
index 00000000000..8b8bb61ff7b
--- /dev/null
+++ b/shaders-msl/vulkan/frag/demote-to-helper.vk.nocompat.msl23.ios.frag
@@ -0,0 +1,8 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+void main()
+{
+	demote;
+	bool helper = helperInvocationEXT();
+}
diff --git a/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert b/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert
new file mode 100644
index 00000000000..d54931a6de9
--- /dev/null
+++ b/shaders-msl/vulkan/vert/device-group.multiview.viewfromdev.nocompat.vk.vert
@@ -0,0 +1,8 @@
+#version 450 core
+#extension GL_EXT_device_group : require
+#extension GL_EXT_multiview : require
+
+void main()
+{
+	gl_Position = vec4(gl_DeviceIndex, gl_ViewIndex, 0.0, 1.0);
+}
diff --git a/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert b/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert
new file mode 100644
index 00000000000..16ed51b15ef
--- /dev/null
+++ b/shaders-msl/vulkan/vert/device-group.nocompat.vk.vert
@@ -0,0 +1,7 @@
+#version 450 core
+#extension GL_EXT_device_group : require
+
+void main()
+{
+	gl_Position = vec4(gl_DeviceIndex);
+}
diff --git a/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert b/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert
new file mode 100644
index 00000000000..eb1bc766f2d
--- /dev/null
+++ b/shaders-msl/vulkan/vert/multiview.multiview.no-layered.nocompat.vk.vert
@@ -0,0 +1,14 @@
+#version 310 es
+#extension GL_EXT_multiview : require
+
+layout(std140, binding = 0) uniform MVPs
+{
+	mat4 MVP[2];
+};
+
+layout(location = 0) in vec4 Position;
+
+void main()
+{
+	gl_Position = MVP[gl_ViewIndex] * Position;
+}
diff --git a/shaders-no-opt/asm/comp/access-tracking-function-call-result.asm.comp b/shaders-no-opt/asm/comp/access-tracking-function-call-result.asm.comp
new file mode 100644
index 00000000000..c11d4cdd0a9
--- /dev/null
+++ b/shaders-no-opt/asm/comp/access-tracking-function-call-result.asm.comp
@@ -0,0 +1,54 @@
+; SPIR-V
+; Version: 1.5
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 460
+               OpName %main "main"
+               OpName %foo_ "foo("
+               OpName %Output "Output"
+               OpMemberName %Output 0 "myout"
+               OpName %_ ""
+               OpMemberDecorate %Output 0 Offset 0
+               OpDecorate %Output BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+          %9 = OpTypeFunction %int
+     %int_12 = OpConstant %int 12
+       %bool = OpTypeBool
+       %true = OpConstantTrue %bool
+     %Output = OpTypeStruct %int
+%_ptr_Uniform_Output = OpTypePointer Uniform %Output
+          %_ = OpVariable %_ptr_Uniform_Output Uniform
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+       %main = OpFunction %void None %7
+         %16 = OpLabel
+         %17 = OpFunctionCall %int %foo_
+               OpBranch %18
+         %18 = OpLabel
+               OpLoopMerge %19 %20 None
+               OpBranchConditional %true %21 %19
+         %21 = OpLabel
+         %22 = OpAccessChain %_ptr_Uniform_int %_ %int_0
+               OpStore %22 %17
+               OpReturn
+         %20 = OpLabel
+               OpBranch %18
+         %19 = OpLabel
+         %23 = OpAccessChain %_ptr_Uniform_int %_ %int_0
+               OpStore %23 %17
+               OpReturn
+               OpFunctionEnd
+       %foo_ = OpFunction %int None %9
+         %24 = OpLabel
+               OpReturnValue %int_12
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/shaders-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp
new file mode 100644
index 00000000000..87aee2db54f
--- /dev/null
+++ b/shaders-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp
@@ -0,0 +1,77 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %T "T"
+               OpMemberName %T 0 "a"
+               OpName %v "v"
+               OpName %T_0 "T"
+               OpMemberName %T_0 0 "b"
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "foo"
+               OpName %_ ""
+               OpName %T_1 "T"
+               OpMemberName %T_1 0 "c"
+               OpName %SSBO2 "SSBO2"
+               OpMemberName %SSBO2 0 "bar"
+               OpName %__0 ""
+               OpMemberDecorate %T_0 0 Offset 0
+               OpDecorate %_runtimearr_T_0 ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpMemberDecorate %T_1 0 Offset 0
+               OpDecorate %_runtimearr_T_1 ArrayStride 16
+               OpMemberDecorate %SSBO2 0 Offset 0
+               OpDecorate %SSBO2 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 1
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+          %T = OpTypeStruct %float
+%_ptr_Function_T = OpTypePointer Function %T
+   %float_40 = OpConstant %float 40
+         %11 = OpConstantComposite %T %float_40
+        %T_0 = OpTypeStruct %float
+%_runtimearr_T_0 = OpTypeRuntimeArray %T_0
+      %SSBO1 = OpTypeStruct %_runtimearr_T_0
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %int_10 = OpConstant %int 10
+%_ptr_Uniform_T_0 = OpTypePointer Uniform %T_0
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+        %T_1 = OpTypeStruct %float
+%_runtimearr_T_1 = OpTypeRuntimeArray %T_1
+      %SSBO2 = OpTypeStruct %_runtimearr_T_1
+%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2
+        %__0 = OpVariable %_ptr_Uniform_SSBO2 Uniform
+     %int_30 = OpConstant %int 30
+%_ptr_Uniform_T_1 = OpTypePointer Uniform %T_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %v = OpVariable %_ptr_Function_T Function
+               OpStore %v %11
+         %20 = OpLoad %T %v
+         %22 = OpAccessChain %_ptr_Uniform_T_0 %_ %int_0 %int_10
+         %23 = OpCompositeExtract %float %20 0
+         %25 = OpAccessChain %_ptr_Uniform_float %22 %int_0
+               OpStore %25 %23
+         %32 = OpLoad %T %v
+         %34 = OpAccessChain %_ptr_Uniform_T_1 %__0 %int_0 %int_30
+         %35 = OpCompositeExtract %float %32 0
+         %36 = OpAccessChain %_ptr_Uniform_float %34 %int_0
+               OpStore %36 %35
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp b/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp
index 0e1ce235d21..504a9546c5a 100644
--- a/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp
+++ b/shaders-no-opt/asm/comp/arithmetic-conversion-signs.asm.nocompat.vk.comp
@@ -111,11 +111,23 @@
 		OpStore %ptr_f32 %s16_to_f32_signed
 		%u16_to_f32_signed = OpConvertSToF %float %u16
 		OpStore %ptr_f32 %u16_to_f32_signed
+
+		%s32_to_f32_signed = OpConvertSToF %float %s32
+		OpStore %ptr_f32 %s32_to_f32_signed
+		%u32_to_f32_signed = OpConvertSToF %float %u32
+		OpStore %ptr_f32 %u32_to_f32_signed
+
+		; UToF
 		%s16_to_f32_unsigned = OpConvertUToF %float %s16
 		OpStore %ptr_f32 %s16_to_f32_unsigned
 		%u16_to_f32_unsigned = OpConvertUToF %float %u16
 		OpStore %ptr_f32 %u16_to_f32_unsigned
 
+		%s32_to_f32_unsigned = OpConvertUToF %float %s32
+		OpStore %ptr_f32 %s32_to_f32_unsigned
+		%u32_to_f32_unsigned = OpConvertUToF %float %u32
+		OpStore %ptr_f32 %u32_to_f32_unsigned
+
 		; FToS
 		%f32_to_s16_signed = OpConvertFToS %short %f32
 		OpStore %ptr_s16 %f32_to_s16_signed
diff --git a/shaders-no-opt/asm/comp/atomic-load-store.asm.comp b/shaders-no-opt/asm/comp/atomic-load-store.asm.comp
new file mode 100644
index 00000000000..3f2d141a1f5
--- /dev/null
+++ b/shaders-no-opt/asm/comp/atomic-load-store.asm.comp
@@ -0,0 +1,48 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 23
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %c "c"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a"
+               OpMemberName %SSBO 1 "b"
+               OpName %_ ""
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+       %SSBO = OpTypeStruct %uint %uint
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+      %int_0 = OpConstant %int 0
+     %v3uint = OpTypeVector %uint 3
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %c = OpVariable %_ptr_Function_uint Function
+         %15 = OpAccessChain %_ptr_Uniform_uint %_ %int_1
+         %16 = OpAtomicLoad %uint %15 %int_1 %int_0
+               OpStore %c %16
+         %18 = OpLoad %uint %c
+         %19 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
+               OpAtomicStore %19 %int_1 %int_0 %18 
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/basic.spv16.asm.comp b/shaders-no-opt/asm/comp/basic.spv16.asm.comp
new file mode 100644
index 00000000000..4675c50e134
--- /dev/null
+++ b/shaders-no-opt/asm/comp/basic.spv16.asm.comp
@@ -0,0 +1,48 @@
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %_ %gl_GlobalInvocationID
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values"
+               OpName %_ ""
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpDecorate %_runtimearr_float ArrayStride 4
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_runtimearr_float = OpTypeRuntimeArray %float
+       %SSBO = OpTypeStruct %_runtimearr_float
+%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
+          %_ = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+    %float_2 = OpConstant %float 2
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %20 = OpLoad %uint %19
+         %23 = OpAccessChain %_ptr_StorageBuffer_float %_ %int_0 %20
+         %24 = OpLoad %float %23
+         %25 = OpFAdd %float %24 %float_2
+         %26 = OpAccessChain %_ptr_StorageBuffer_float %_ %int_0 %20
+               OpStore %26 %25
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp b/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp
new file mode 100644
index 00000000000..3651a4de527
--- /dev/null
+++ b/shaders-no-opt/asm/comp/bitcast-fp16-fp32.asm.vk.comp
@@ -0,0 +1,63 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 33
+; Schema: 0
+               OpCapability Shader
+               OpCapability Float16
+               OpCapability StorageBuffer16BitAccess
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types"
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a"
+               OpMemberName %SSBO 1 "b"
+               OpMemberName %SSBO 2 "c"
+               OpMemberName %SSBO 3 "d"
+               OpName %_ ""
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpMemberDecorate %SSBO 2 Offset 8
+               OpMemberDecorate %SSBO 3 Offset 12
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+     %v2half = OpTypeVector %half 2
+      %float = OpTypeFloat 32
+       %SSBO = OpTypeStruct %v2half %float %float %v2half
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half
+       %uint = OpTypeInt 32 0
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+      %int_3 = OpConstant %int 3
+      %int_2 = OpConstant %int 2
+     %v3uint = OpTypeVector %uint 3
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpAccessChain %_ptr_Uniform_v2half %_ %int_0
+         %17 = OpLoad %v2half %16
+         %20 = OpBitcast %float %17
+         %22 = OpAccessChain %_ptr_Uniform_float %_ %int_1
+               OpStore %22 %20
+         %25 = OpAccessChain %_ptr_Uniform_float %_ %int_2
+         %26 = OpLoad %float %25
+         %28 = OpBitcast %v2half %26
+         %29 = OpAccessChain %_ptr_Uniform_v2half %_ %int_3
+               OpStore %29 %28
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/shaders-no-opt/asm/comp/bitfield-signed-operations.asm.comp
new file mode 100644
index 00000000000..435fa322215
--- /dev/null
+++ b/shaders-no-opt/asm/comp/bitfield-signed-operations.asm.comp
@@ -0,0 +1,97 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 26
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "ints"
+               OpMemberName %SSBO 1 "uints"
+               OpName %_ ""
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 16
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %v4int = OpTypeVector %int 4
+       %uint = OpTypeInt 32 0
+     %v4uint = OpTypeVector %uint 4
+
+	 %int_1 = OpConstant %int 1
+	 %uint_11 = OpConstant %uint 11
+
+       %SSBO = OpTypeStruct %v4int %v4uint
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
+%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %ints_ptr = OpAccessChain %_ptr_Uniform_v4int %_ %int_0
+         %uints_ptr = OpAccessChain %_ptr_Uniform_v4uint %_ %int_1
+         %ints = OpLoad %v4int %ints_ptr
+         %uints = OpLoad %v4uint %uints_ptr
+
+		 %ints_alt = OpVectorShuffle %v4int %ints %ints 3 2 1 0
+		 %uints_alt = OpVectorShuffle %v4uint %uints %uints 3 2 1 0
+
+         %int_to_int_popcount = OpBitCount %v4int %ints
+         %int_to_uint_popcount = OpBitCount %v4uint %ints
+         %uint_to_int_popcount = OpBitCount %v4int %uints
+         %uint_to_uint_popcount = OpBitCount %v4uint %uints
+
+		; BitReverse must have matching types w.r.t. sign, yay.
+         %int_to_int_reverse = OpBitReverse %v4int %ints
+         ;%int_to_uint_reverse = OpBitReverse %v4uint %ints
+         ;%uint_to_int_reverse = OpBitReverse %v4int %uints
+         %uint_to_uint_reverse = OpBitReverse %v4uint %uints
+
+		; Base and Result must match.
+         %int_to_int_sbit = OpBitFieldSExtract %v4int %ints %int_1 %uint_11
+         ;%int_to_uint_sbit = OpBitFieldSExtract %v4uint %ints %offset %count
+         ;%uint_to_int_sbit = OpBitFieldSExtract %v4int %uints %offset %count
+         %uint_to_uint_sbit = OpBitFieldSExtract %v4uint %uints %uint_11 %int_1
+
+		; Base and Result must match.
+         %int_to_int_ubit = OpBitFieldUExtract %v4int %ints %int_1 %uint_11
+         ;%int_to_uint_ubit = OpBitFieldUExtract %v4uint %ints %offset %count
+         ;%uint_to_int_ubit = OpBitFieldUExtract %v4int %uints %offset %count
+         %uint_to_uint_ubit = OpBitFieldUExtract %v4uint %uints %uint_11 %int_1
+
+		 %int_to_int_insert = OpBitFieldInsert %v4int %ints %ints_alt %int_1 %uint_11
+		 %uint_to_uint_insert = OpBitFieldInsert %v4uint %uints %uints_alt %uint_11 %int_1
+
+               OpStore %ints_ptr %int_to_int_popcount
+               OpStore %uints_ptr %int_to_uint_popcount
+               OpStore %ints_ptr %uint_to_int_popcount
+               OpStore %uints_ptr %uint_to_uint_popcount
+
+               OpStore %ints_ptr %int_to_int_reverse
+               ;OpStore %uints_ptr %int_to_uint_reverse
+               ;OpStore %ints_ptr %uint_to_int_reverse
+               OpStore %uints_ptr %uint_to_uint_reverse
+
+               OpStore %ints_ptr %int_to_int_sbit
+               ;OpStore %uints_ptr %int_to_uint_sbit
+               ;OpStore %ints_ptr %uint_to_int_sbit
+               OpStore %uints_ptr %uint_to_uint_sbit
+
+               OpStore %ints_ptr %int_to_int_ubit
+               ;OpStore %uints_ptr %int_to_uint_ubit
+               ;OpStore %ints_ptr %uint_to_int_ubit
+               OpStore %uints_ptr %uint_to_uint_ubit
+
+			   OpStore %ints_ptr %int_to_int_insert
+			   OpStore %uints_ptr %uint_to_uint_insert
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/bitscan.asm.comp b/shaders-no-opt/asm/comp/bitscan.asm.comp
new file mode 100644
index 00000000000..e3b785cd52b
--- /dev/null
+++ b/shaders-no-opt/asm/comp/bitscan.asm.comp
@@ -0,0 +1,72 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 35
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "u"
+               OpMemberName %SSBO 1 "i"
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 16
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %ivec4 = OpTypeVector %int 4
+       %uint = OpTypeInt 32 0
+      %uvec4 = OpTypeVector %uint 4
+       %SSBO = OpTypeStruct %uvec4 %ivec4
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_uvec4 = OpTypePointer Uniform %uvec4
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_ivec4 = OpTypePointer Uniform %ivec4
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %uptr = OpAccessChain %_ptr_Uniform_uvec4 %_ %int_0
+         %iptr = OpAccessChain %_ptr_Uniform_ivec4 %_ %int_1
+         %uvalue = OpLoad %uvec4 %uptr
+         %ivalue = OpLoad %ivec4 %iptr
+
+         %lsb_uint_to_uint = OpExtInst %uvec4 %1 FindILsb %uvalue
+         %lsb_uint_to_int = OpExtInst %ivec4 %1 FindILsb %uvalue
+         %lsb_int_to_uint = OpExtInst %uvec4 %1 FindILsb %ivalue
+         %lsb_int_to_int = OpExtInst %ivec4 %1 FindILsb %ivalue
+
+         %umsb_uint_to_uint = OpExtInst %uvec4 %1 FindUMsb %uvalue
+         %umsb_uint_to_int = OpExtInst %ivec4 %1 FindUMsb %uvalue
+         %umsb_int_to_uint = OpExtInst %uvec4 %1 FindUMsb %ivalue
+         %umsb_int_to_int = OpExtInst %ivec4 %1 FindUMsb %ivalue
+
+         %smsb_uint_to_uint = OpExtInst %uvec4 %1 FindSMsb %uvalue
+         %smsb_uint_to_int = OpExtInst %ivec4 %1 FindSMsb %uvalue
+         %smsb_int_to_uint = OpExtInst %uvec4 %1 FindSMsb %ivalue
+         %smsb_int_to_int = OpExtInst %ivec4 %1 FindSMsb %ivalue
+
+	OpStore %uptr %lsb_uint_to_uint
+	OpStore %iptr %lsb_uint_to_int
+	OpStore %uptr %lsb_int_to_uint
+	OpStore %iptr %lsb_int_to_int
+
+	OpStore %uptr %umsb_uint_to_uint
+	OpStore %iptr %umsb_uint_to_int
+	OpStore %uptr %umsb_int_to_uint
+	OpStore %iptr %umsb_int_to_int
+
+	OpStore %uptr %smsb_uint_to_uint
+	OpStore %iptr %smsb_uint_to_int
+	OpStore %uptr %smsb_int_to_uint
+	OpStore %iptr %smsb_int_to_int
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/buffer-atomic-nonuniform.vk.nocompat.asm.comp b/shaders-no-opt/asm/comp/buffer-atomic-nonuniform.vk.nocompat.asm.comp
new file mode 100644
index 00000000000..132f38bf72d
--- /dev/null
+++ b/shaders-no-opt/asm/comp/buffer-atomic-nonuniform.vk.nocompat.asm.comp
@@ -0,0 +1,53 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 26
+; Schema: 0
+               OpCapability Shader
+               OpCapability ShaderNonUniform
+               OpCapability RuntimeDescriptorArray
+               OpCapability StorageBufferArrayNonUniformIndexing
+               OpExtension "SPV_EXT_descriptor_indexing"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_nonuniform_qualifier"
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "v"
+               OpName %ssbos "ssbos"
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %ssbos DescriptorSet 0
+               OpDecorate %ssbos Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %22 NonUniform
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+       %SSBO = OpTypeStruct %uint
+%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
+%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO
+      %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_2 = OpConstant %uint 2
+%_ptr_Input_uint = OpTypePointer Input %uint
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+     %uint_1 = OpConstant %uint 1
+     %uint_0 = OpConstant %uint 0
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2
+         %17 = OpLoad %uint %16
+         %18 = OpCopyObject %uint %17
+         %22 = OpAccessChain %_ptr_Uniform_uint %ssbos %18 %int_0
+         %25 = OpAtomicIAdd %uint %22 %uint_1 %uint_0 %uint_1
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/buffer-device-address-ptr-casting.vk.nocompat.asm.comp b/shaders-no-opt/asm/comp/buffer-device-address-ptr-casting.vk.nocompat.asm.comp
new file mode 100644
index 00000000000..ed8d0ba6f5e
--- /dev/null
+++ b/shaders-no-opt/asm/comp/buffer-device-address-ptr-casting.vk.nocompat.asm.comp
@@ -0,0 +1,106 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 62
+; Schema: 0
+               OpCapability Shader
+               OpCapability Int64
+               OpCapability PhysicalStorageBufferAddresses
+               OpExtension "SPV_KHR_physical_storage_buffer"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel PhysicalStorageBuffer64 GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_gpu_shader_int64"
+               OpSourceExtension "GL_EXT_buffer_reference"
+               OpSourceExtension "GL_EXT_buffer_reference_uvec2"
+               OpName %main "main"
+               OpName %SomeBuffer "SomeBuffer"
+               OpMemberName %SomeBuffer 0 "v"
+               OpMemberName %SomeBuffer 1 "a"
+               OpMemberName %SomeBuffer 2 "b"
+               OpName %Registers "Registers"
+               OpMemberName %Registers 0 "address"
+               OpMemberName %Registers 1 "address2"
+               OpName %registers "registers"
+               OpName %a "a"
+               OpName %b "b"
+               OpMemberDecorate %SomeBuffer 0 Offset 0
+               OpMemberDecorate %SomeBuffer 1 Offset 16
+               OpMemberDecorate %SomeBuffer 2 Offset 24
+               OpDecorate %SomeBuffer Block
+               OpMemberDecorate %Registers 0 Offset 0
+               OpMemberDecorate %Registers 1 Offset 8
+               OpDecorate %Registers Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+               OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_SomeBuffer PhysicalStorageBuffer
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+      %ulong = OpTypeInt 64 0
+       %uint = OpTypeInt 32 0
+     %v2uint = OpTypeVector %uint 2
+ %SomeBuffer = OpTypeStruct %v4float %ulong %v2uint
+%_ptr_PhysicalStorageBuffer_SomeBuffer = OpTypePointer PhysicalStorageBuffer %SomeBuffer
+%_ptr_Function__ptr_PhysicalStorageBuffer_SomeBuffer = OpTypePointer Function %_ptr_PhysicalStorageBuffer_SomeBuffer
+  %Registers = OpTypeStruct %ulong %v2uint
+%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers
+  %registers = OpVariable %_ptr_PushConstant_Registers PushConstant
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_PushConstant_ulong = OpTypePointer PushConstant %ulong
+      %int_1 = OpConstant %int 1
+%_ptr_PushConstant_v2uint = OpTypePointer PushConstant %v2uint
+    %float_1 = OpConstant %float 1
+    %float_2 = OpConstant %float 2
+    %float_3 = OpConstant %float 3
+    %float_4 = OpConstant %float 4
+         %35 = OpConstantComposite %v4float %float_1 %float_2 %float_3 %float_4
+%_ptr_PhysicalStorageBuffer_v4float = OpTypePointer PhysicalStorageBuffer %v4float
+    %float_5 = OpConstant %float 5
+    %float_6 = OpConstant %float 6
+    %float_7 = OpConstant %float 7
+    %float_8 = OpConstant %float 8
+         %43 = OpConstantComposite %v4float %float_5 %float_6 %float_7 %float_8
+%_ptr_Function_ulong = OpTypePointer Function %ulong
+%_ptr_Function_v2uint = OpTypePointer Function %v2uint
+%_ptr_PhysicalStorageBuffer_ulong = OpTypePointer PhysicalStorageBuffer %ulong
+      %int_2 = OpConstant %int 2
+%_ptr_PhysicalStorageBuffer_v2uint = OpTypePointer PhysicalStorageBuffer %v2uint
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %a = OpVariable %_ptr_Function_ulong Function
+          %b = OpVariable %_ptr_Function_v2uint Function
+         %21 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_0
+         %27 = OpAccessChain %_ptr_PushConstant_v2uint %registers %int_1
+         %uint_ptr0 = OpLoad %ulong %21
+         %uint_ptr1 = OpLoad %v2uint %27
+
+		 ; ConvertUToPtr and vice versa do not accept vectors.
+         %ulong_ptr0 = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr0
+         %ulong_ptr1 = OpBitcast %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr0
+         %uvec2_ptr0 = OpBitcast %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr1
+
+         %vec4_write0 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %ulong_ptr0 %int_0
+         %vec4_write1 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %ulong_ptr1 %int_0
+         %vec4_write2 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %uvec2_ptr0 %int_0
+
+		   OpStore %vec4_write0 %35 Aligned 16
+		   OpStore %vec4_write1 %35 Aligned 16
+		   OpStore %vec4_write2 %35 Aligned 16
+
+         %ulong_from_ptr0 = OpConvertPtrToU %ulong %ulong_ptr0
+         %ulong_from_ptr1 = OpBitcast %ulong %ulong_ptr1
+         %uvec2_from_ptr0 = OpBitcast %v2uint %uvec2_ptr0
+
+         %ptr0 = OpAccessChain %_ptr_PhysicalStorageBuffer_ulong %ulong_ptr0 %int_1
+         %ptr1 = OpAccessChain %_ptr_PhysicalStorageBuffer_ulong %ulong_ptr1 %int_1
+         %ptr2 = OpAccessChain %_ptr_PhysicalStorageBuffer_v2uint %uvec2_ptr0 %int_2
+
+		   OpStore %ptr0 %ulong_from_ptr0 Aligned 8
+		   OpStore %ptr1 %ulong_from_ptr1 Aligned 8
+		   OpStore %ptr2 %uvec2_from_ptr0 Aligned 8
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/buffer-reference-aliased-block-name.nocompat.vk.asm.comp b/shaders-no-opt/asm/comp/buffer-reference-aliased-block-name.nocompat.vk.asm.comp
new file mode 100644
index 00000000000..816985a108e
--- /dev/null
+++ b/shaders-no-opt/asm/comp/buffer-reference-aliased-block-name.nocompat.vk.asm.comp
@@ -0,0 +1,110 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 59
+; Schema: 0
+               OpCapability Shader
+               OpCapability PhysicalStorageBufferAddresses
+               OpExtension "SPV_EXT_physical_storage_buffer"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel PhysicalStorageBuffer64 GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionMode %main LocalSize 64 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_buffer_reference"
+               OpName %main "main"
+               OpName %Registers "Registers"
+               OpMemberName %Registers 0 "ro"
+               OpMemberName %Registers 1 "rw"
+               OpMemberName %Registers 2 "wo"
+               OpName %RO "Alias"
+               OpMemberName %RO 0 "v"
+               OpName %RW "Alias"
+               OpMemberName %RW 0 "v"
+               OpName %WO "Alias"
+               OpMemberName %WO 0 "v"
+               OpName %registers "registers"
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpMemberDecorate %Registers 0 Offset 0
+               OpMemberDecorate %Registers 1 Offset 8
+               OpMemberDecorate %Registers 2 Offset 16
+               OpDecorate %Registers Block
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %RO 0 NonWritable
+               OpMemberDecorate %RO 0 Offset 0
+               OpDecorate %RO Block
+               OpDecorate %_runtimearr_v4float_0 ArrayStride 16
+               OpMemberDecorate %RW 0 Restrict
+               OpMemberDecorate %RW 0 Offset 0
+               OpDecorate %RW Block
+               OpDecorate %_runtimearr_v4float_1 ArrayStride 16
+               OpMemberDecorate %WO 0 Coherent
+               OpMemberDecorate %WO 0 NonReadable
+               OpMemberDecorate %WO 0 Offset 0
+               OpDecorate %WO Block
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+               OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_RO PhysicalStorageBuffer
+               OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_RW PhysicalStorageBuffer
+               OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_WO PhysicalStorageBuffer
+  %Registers = OpTypeStruct %_ptr_PhysicalStorageBuffer_RO %_ptr_PhysicalStorageBuffer_RW %_ptr_PhysicalStorageBuffer_WO
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+         %RO = OpTypeStruct %_runtimearr_v4float
+%_ptr_PhysicalStorageBuffer_RO = OpTypePointer PhysicalStorageBuffer %RO
+%_runtimearr_v4float_0 = OpTypeRuntimeArray %v4float
+         %RW = OpTypeStruct %_runtimearr_v4float_0
+%_ptr_PhysicalStorageBuffer_RW = OpTypePointer PhysicalStorageBuffer %RW
+%_runtimearr_v4float_1 = OpTypeRuntimeArray %v4float
+         %WO = OpTypeStruct %_runtimearr_v4float_1
+%_ptr_PhysicalStorageBuffer_WO = OpTypePointer PhysicalStorageBuffer %WO
+%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers
+  %registers = OpVariable %_ptr_PushConstant_Registers PushConstant
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_PushConstant__ptr_PhysicalStorageBuffer_RW = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_RW
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_PushConstant__ptr_PhysicalStorageBuffer_RO = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_RO
+%_ptr_PhysicalStorageBuffer_v4float = OpTypePointer PhysicalStorageBuffer %v4float
+      %int_2 = OpConstant %int 2
+%_ptr_PushConstant__ptr_PhysicalStorageBuffer_WO = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_WO
+    %uint_64 = OpConstant %uint 64
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %23 = OpAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_RW %registers %int_1
+         %24 = OpLoad %_ptr_PhysicalStorageBuffer_RW %23
+         %32 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %33 = OpLoad %uint %32
+         %35 = OpAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_RO %registers %int_0
+         %36 = OpLoad %_ptr_PhysicalStorageBuffer_RO %35
+         %37 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %38 = OpLoad %uint %37
+         %40 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %36 %int_0 %38
+         %41 = OpLoad %v4float %40 Aligned 16
+         %42 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %24 %int_0 %33
+               OpStore %42 %41 Aligned 16
+         %45 = OpAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_WO %registers %int_2
+         %46 = OpLoad %_ptr_PhysicalStorageBuffer_WO %45
+         %47 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %48 = OpLoad %uint %47
+         %49 = OpAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_RO %registers %int_0
+         %50 = OpLoad %_ptr_PhysicalStorageBuffer_RO %49
+         %51 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %52 = OpLoad %uint %51
+         %53 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %50 %int_0 %52
+         %54 = OpLoad %v4float %53 Aligned 16
+         %55 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %46 %int_0 %48
+               OpStore %55 %54 Aligned 16
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/buffer-reference-pointer-to-pod-in-buffer.asm.nocompat.vk.comp b/shaders-no-opt/asm/comp/buffer-reference-pointer-to-pod-in-buffer.asm.nocompat.vk.comp
new file mode 100644
index 00000000000..8fda30e109e
--- /dev/null
+++ b/shaders-no-opt/asm/comp/buffer-reference-pointer-to-pod-in-buffer.asm.nocompat.vk.comp
@@ -0,0 +1,44 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+               OpCapability PhysicalStorageBufferAddresses
+               OpExtension "SPV_EXT_physical_storage_buffer"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel PhysicalStorageBuffer64 GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_buffer_reference"
+               OpName %main "main"
+               OpName %Push "Push"
+               OpMemberName %Push 0 "ptr"
+               OpName %_ ""
+               OpMemberDecorate %Push 0 Offset 0
+               OpDecorate %Push Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %v4uint = OpTypeVector %uint 4
+%_ptr_PhysicalStorageBuffer_uintPtr = OpTypePointer PhysicalStorageBuffer %v4uint
+       %Push = OpTypeStruct %_ptr_PhysicalStorageBuffer_uintPtr
+%_ptr_PushConstant_Push = OpTypePointer PushConstant %Push
+          %_ = OpVariable %_ptr_PushConstant_Push PushConstant
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_PushConstant__ptr_PhysicalStorageBuffer_uintPtr = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_uintPtr
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+     %uint_4 = OpConstant %uint 4
+         %22 = OpConstantComposite %v4uint %uint_1 %uint_2 %uint_3 %uint_4
+%_ptr_PhysicalStorageBuffer_v4uint = OpTypePointer PhysicalStorageBuffer %v4uint
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_uintPtr %_ %int_0
+         %17 = OpLoad %_ptr_PhysicalStorageBuffer_uintPtr %16
+               OpStore %17 %22 Aligned 8
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/buffer-reference-pointer-to-unused-pod-in-buffer.asm.nocompat.vk.comp b/shaders-no-opt/asm/comp/buffer-reference-pointer-to-unused-pod-in-buffer.asm.nocompat.vk.comp
new file mode 100644
index 00000000000..d7ca03f6639
--- /dev/null
+++ b/shaders-no-opt/asm/comp/buffer-reference-pointer-to-unused-pod-in-buffer.asm.nocompat.vk.comp
@@ -0,0 +1,44 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+               OpCapability PhysicalStorageBufferAddresses
+               OpExtension "SPV_EXT_physical_storage_buffer"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel PhysicalStorageBuffer64 GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_buffer_reference"
+               OpName %main "main"
+               OpName %Push "Push"
+               OpMemberName %Push 0 "ptr"
+               OpName %_ ""
+               OpMemberDecorate %Push 0 Offset 0
+               OpDecorate %Push Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %v4uint = OpTypeVector %uint 4
+%_ptr_PhysicalStorageBuffer_uintPtr = OpTypePointer PhysicalStorageBuffer %v4uint
+       %Push = OpTypeStruct %_ptr_PhysicalStorageBuffer_uintPtr
+%_ptr_PushConstant_Push = OpTypePointer PushConstant %Push
+          %_ = OpVariable %_ptr_PushConstant_Push PushConstant
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_PushConstant__ptr_PhysicalStorageBuffer_uintPtr = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_uintPtr
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+     %uint_4 = OpConstant %uint 4
+         %22 = OpConstantComposite %v4uint %uint_1 %uint_2 %uint_3 %uint_4
+%_ptr_PhysicalStorageBuffer_v4uint = OpTypePointer PhysicalStorageBuffer %v4uint
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         ;%16 = OpAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_uintPtr %_ %int_0
+         ;%17 = OpLoad %_ptr_PhysicalStorageBuffer_uintPtr %16
+         ;      OpStore %17 %22 Aligned 8
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/shaders-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp
new file mode 100644
index 00000000000..e1dcb0ef8e2
--- /dev/null
+++ b/shaders-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp
@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 49
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID %gl_LocalInvocationID
+               OpExecutionMode %main LocalSize 4 4 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values"
+               OpName %_ ""
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpName %gl_LocalInvocationID "gl_LocalInvocationID"
+               OpName %indexable "indexable"
+               OpName %indexable_0 "indexable"
+			   OpName %25 "indexable"
+			   OpName %38 "indexable"
+               OpDecorate %_runtimearr_int ArrayStride 4
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_runtimearr_int = OpTypeRuntimeArray %int
+       %SSBO = OpTypeStruct %_runtimearr_int
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+     %uint_4 = OpConstant %uint 4
+%_arr_int_uint_4 = OpTypeArray %int %uint_4
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+      %int_3 = OpConstant %int 3
+         %25 = OpConstantComposite %_arr_int_uint_4 %int_0 %int_1 %int_2 %int_3
+%gl_LocalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%_ptr_Function__arr_int_uint_4 = OpTypePointer Function %_arr_int_uint_4
+%_ptr_Function_int = OpTypePointer Function %int
+      %int_4 = OpConstant %int 4
+      %int_5 = OpConstant %int 5
+      %int_6 = OpConstant %int 6
+      %int_7 = OpConstant %int 7
+         %38 = OpConstantComposite %_arr_int_uint_4 %int_4 %int_5 %int_6 %int_7
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_4 %uint_4 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+  %indexable = OpVariable %_ptr_Function__arr_int_uint_4 Function
+%indexable_0 = OpVariable %_ptr_Function__arr_int_uint_4 Function
+         %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %19 = OpLoad %uint %18
+         %27 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0
+         %28 = OpLoad %uint %27
+               OpStore %indexable %25
+         %32 = OpAccessChain %_ptr_Function_int %indexable %28
+         %33 = OpLoad %int %32
+         %40 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_1
+         %41 = OpLoad %uint %40
+               OpStore %indexable_0 %38
+         %43 = OpAccessChain %_ptr_Function_int %indexable_0 %41
+         %44 = OpLoad %int %43
+         %45 = OpIAdd %int %33 %44
+         %47 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19
+               OpStore %47 %45
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/copy-logical.spv14.asm.comp b/shaders-no-opt/asm/comp/copy-logical.spv14.asm.comp
new file mode 100644
index 00000000000..20fa0b099b8
--- /dev/null
+++ b/shaders-no-opt/asm/comp/copy-logical.spv14.asm.comp
@@ -0,0 +1,69 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %ssbo
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+		OpName %B1 "B1"
+		OpName %A "A"
+		OpName %C "C"
+		OpName %B2 "B2"
+		OpMemberName %A 0 "a"
+		OpMemberName %A 1 "b1"
+		OpMemberName %A 2 "b1_array"
+		OpMemberName %C 0 "c"
+		OpMemberName %C 1 "b2"
+		OpMemberName %C 2 "b2_array"
+		OpMemberName %B1 0 "elem1"
+		OpMemberName %B2 0 "elem2"
+		OpMemberName %SSBO 0 "a_block"
+		OpMemberName %SSBO 1 "c_block"
+		OpDecorate %B1Array ArrayStride 16
+		OpDecorate %B2Array ArrayStride 16
+               OpMemberDecorate %B1 0 Offset 0
+               OpMemberDecorate %A 0 Offset 0
+               OpMemberDecorate %A 1 Offset 16
+               OpMemberDecorate %A 2 Offset 32
+               OpMemberDecorate %B2 0 Offset 0
+               OpMemberDecorate %C 0 Offset 0
+               OpMemberDecorate %C 1 Offset 16
+               OpMemberDecorate %C 2 Offset 32
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 96
+               OpDecorate %SSBO Block
+               OpDecorate %ssbo DescriptorSet 0
+               OpDecorate %ssbo Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+	%uint = OpTypeInt 32 0
+	%uint_4 = OpConstant %uint 4
+    %v4float = OpTypeVector %float 4
+         %B2 = OpTypeStruct %v4float
+	%B2Array = OpTypeArray %B2 %uint_4
+          %C = OpTypeStruct %v4float %B2 %B2Array
+         %B1 = OpTypeStruct %v4float
+	%B1Array = OpTypeArray %B1 %uint_4
+          %A = OpTypeStruct %v4float %B1 %B1Array
+       %SSBO = OpTypeStruct %A %C
+%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO
+       %ssbo = OpVariable %_ptr_Uniform_SSBO StorageBuffer
+        %int = OpTypeInt 32 1
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_C = OpTypePointer StorageBuffer %C
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_A = OpTypePointer StorageBuffer %A
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %22 = OpAccessChain %_ptr_Uniform_C %ssbo %int_1
+         %39 = OpAccessChain %_ptr_Uniform_A %ssbo %int_0
+         %23 = OpLoad %C %22
+         %24 = OpCopyLogical %A %23
+               OpStore %39 %24
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/shaders-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp
new file mode 100644
index 00000000000..73f3ceee1ad
--- /dev/null
+++ b/shaders-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp
@@ -0,0 +1,59 @@
+; SPIR-V
+; Version: 1.5
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 26
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               ;OpEntryPoint GLCompute %main "main" %Samp %ubo %ssbo %v %w
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 64 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Samp "Samp"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "v"
+               OpName %ubo "ubo"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "v"
+               OpName %ssbo "ssbo"
+               OpName %v "v"
+               OpName %w "w"
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+               OpDecorate %Samp DescriptorSet 0
+               OpDecorate %Samp Binding 0
+               OpMemberDecorate %UBO 0 Offset 0
+               OpDecorate %UBO Block
+               OpDecorate %ubo DescriptorSet 0
+               OpDecorate %ubo Binding 1
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO Block
+               OpDecorate %ssbo DescriptorSet 0
+               OpDecorate %ssbo Binding 2
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+    %uint_64 = OpConstant %uint 64
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1
+      %float = OpTypeFloat 32
+         %12 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %13 = OpTypeSampledImage %12
+%_ptr_UniformConstant_13 = OpTypePointer UniformConstant %13
+       %Samp = OpVariable %_ptr_UniformConstant_13 UniformConstant
+        %UBO = OpTypeStruct %float
+%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO
+        %ubo = OpVariable %_ptr_Uniform_UBO Uniform
+       %SSBO = OpTypeStruct %float
+%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO
+       %ssbo = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer
+%_ptr_Private_float = OpTypePointer Private %float
+          %v = OpVariable %_ptr_Private_float Private
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+          %w = OpVariable %_ptr_Workgroup_float Workgroup
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp b/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp
new file mode 100644
index 00000000000..e5ca1cbb508
--- /dev/null
+++ b/shaders-no-opt/asm/comp/expression-nesting-limits.asm.comp
@@ -0,0 +1,288 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 10117
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %1 "main" %gl_GlobalInvocationID
+               OpExecutionMode %1 LocalSize 1 1 1
+               OpSource GLSL 430
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %_arr_uint_int_16 ArrayStride 4
+               OpMemberDecorate %_struct_4 0 Offset 0
+               OpDecorate %_struct_4 BufferBlock
+               OpDecorate %5 DescriptorSet 0
+               OpDecorate %5 Binding 0
+               OpDecorate %6 DescriptorSet 0
+               OpDecorate %6 Binding 1
+               OpDecorate %7 DescriptorSet 0
+               OpDecorate %7 Binding 2
+       %void = OpTypeVoid
+       %bool = OpTypeBool
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+      %float = OpTypeFloat 32
+      %v2int = OpTypeVector %int 2
+     %v2uint = OpTypeVector %uint 2
+    %v2float = OpTypeVector %float 2
+      %v3int = OpTypeVector %int 3
+     %v3uint = OpTypeVector %uint 3
+    %v3float = OpTypeVector %float 3
+      %v4int = OpTypeVector %int 4
+     %v4uint = OpTypeVector %uint 4
+    %v4float = OpTypeVector %float 4
+     %v4bool = OpTypeVector %bool 4
+         %23 = OpTypeFunction %v4float %v4float
+         %24 = OpTypeFunction %bool
+         %25 = OpTypeFunction %void
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Input_int = OpTypePointer Input %int
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Input_v2int = OpTypePointer Input %v2int
+%_ptr_Input_v2uint = OpTypePointer Input %v2uint
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Input_v4int = OpTypePointer Input %v4int
+%_ptr_Input_v4uint = OpTypePointer Input %v4uint
+%_ptr_Output_float = OpTypePointer Output %float
+%_ptr_Output_int = OpTypePointer Output %int
+%_ptr_Output_uint = OpTypePointer Output %uint
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+%_ptr_Output_v2int = OpTypePointer Output %v2int
+%_ptr_Output_v2uint = OpTypePointer Output %v2uint
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_ptr_Output_v4int = OpTypePointer Output %v4int
+%_ptr_Output_v4uint = OpTypePointer Output %v4uint
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Function_int = OpTypePointer Function %int
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+    %float_1 = OpConstant %float 1
+    %float_0 = OpConstant %float 0
+  %float_0_5 = OpConstant %float 0.5
+   %float_n1 = OpConstant %float -1
+    %float_7 = OpConstant %float 7
+    %float_8 = OpConstant %float 8
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+      %int_3 = OpConstant %int 3
+      %int_4 = OpConstant %int 4
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+    %uint_32 = OpConstant %uint 32
+     %uint_4 = OpConstant %uint 4
+%uint_2147483647 = OpConstant %uint 2147483647
+         %66 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+         %67 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1
+         %68 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_arr_v4float_uint_32 = OpTypeArray %v4float %uint_32
+%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3
+%_ptr_Input__arr_v4float_uint_32 = OpTypePointer Input %_arr_v4float_uint_32
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %int_16 = OpConstant %int 16
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_arr_uint_int_16 = OpTypeArray %uint %int_16
+  %_struct_4 = OpTypeStruct %_arr_uint_int_16
+%_ptr_Uniform__struct_4 = OpTypePointer Uniform %_struct_4
+          %5 = OpVariable %_ptr_Uniform__struct_4 Uniform
+          %6 = OpVariable %_ptr_Uniform__struct_4 Uniform
+          %7 = OpVariable %_ptr_Uniform__struct_4 Uniform
+          %1 = OpFunction %void None %25
+         %83 = OpLabel
+         %84 = OpLoad %v3uint %gl_GlobalInvocationID
+         %85 = OpCompositeConstruct %v4uint %84 %uint_0
+         %86 = OpConvertUToF %v4float %85
+         %87 = OpFunctionCall %v4float %88 %86
+               OpReturn
+               OpFunctionEnd
+         %88 = OpFunction %v4float None %23
+         %89 = OpFunctionParameter %v4float
+         %92 = OpLabel
+         %93 = OpVariable %_ptr_Function_int Function
+               OpStore %93 %int_0
+               OpBranch %94
+         %94 = OpLabel
+         %95 = OpLoad %int %93
+         %96 = OpSLessThan %bool %95 %int_16
+               OpLoopMerge %97 %10100 None
+               OpBranchConditional %96 %10101 %97
+      %10101 = OpLabel
+      %10102 = OpLoad %int %93
+         %90 = OpAccessChain %_ptr_Uniform_uint %6 %int_0 %10102
+         %91 = OpLoad %uint %90
+         %98 = OpAccessChain %_ptr_Uniform_uint %5 %int_0 %10102
+         %99 = OpLoad %uint %98
+        %100 = OpIAdd %uint %91 %99
+        %101 = OpIAdd %uint %91 %100
+        %102 = OpIAdd %uint %91 %101
+        %103 = OpIAdd %uint %91 %102
+        %104 = OpIAdd %uint %91 %103
+        %105 = OpIAdd %uint %91 %104
+        %106 = OpIAdd %uint %91 %105
+        %107 = OpIAdd %uint %91 %106
+        %108 = OpIAdd %uint %91 %107
+        %109 = OpIAdd %uint %91 %108
+        %110 = OpIAdd %uint %91 %109
+        %111 = OpIAdd %uint %91 %110
+        %112 = OpIAdd %uint %91 %111
+        %113 = OpIAdd %uint %91 %112
+        %114 = OpIAdd %uint %91 %113
+        %115 = OpIAdd %uint %91 %114
+        %116 = OpIAdd %uint %91 %115
+        %117 = OpIAdd %uint %91 %116
+        %118 = OpIAdd %uint %91 %117
+        %119 = OpIAdd %uint %91 %118
+        %120 = OpIAdd %uint %91 %119
+        %121 = OpIAdd %uint %91 %120
+        %122 = OpIAdd %uint %91 %121
+        %123 = OpIAdd %uint %91 %122
+        %124 = OpIAdd %uint %91 %123
+        %125 = OpIAdd %uint %91 %124
+        %126 = OpIAdd %uint %91 %125
+        %127 = OpIAdd %uint %91 %126
+        %128 = OpIAdd %uint %91 %127
+        %129 = OpIAdd %uint %91 %128
+        %130 = OpIAdd %uint %91 %129
+        %131 = OpIAdd %uint %91 %130
+        %132 = OpIAdd %uint %91 %131
+        %133 = OpIAdd %uint %91 %132
+        %134 = OpIAdd %uint %91 %133
+        %135 = OpIAdd %uint %91 %134
+        %136 = OpIAdd %uint %91 %135
+        %137 = OpIAdd %uint %91 %136
+        %138 = OpIAdd %uint %91 %137
+        %139 = OpIAdd %uint %91 %138
+        %140 = OpIAdd %uint %91 %139
+        %141 = OpIAdd %uint %91 %140
+        %142 = OpIAdd %uint %91 %141
+        %143 = OpIAdd %uint %91 %142
+        %144 = OpIAdd %uint %91 %143
+        %145 = OpIAdd %uint %91 %144
+        %146 = OpIAdd %uint %91 %145
+        %147 = OpIAdd %uint %91 %146
+        %148 = OpIAdd %uint %91 %147
+        %149 = OpIAdd %uint %91 %148
+        %150 = OpIAdd %uint %91 %149
+        %151 = OpIAdd %uint %91 %150
+        %152 = OpIAdd %uint %91 %151
+        %153 = OpIAdd %uint %91 %152
+        %154 = OpIAdd %uint %91 %153
+        %155 = OpIAdd %uint %91 %154
+        %156 = OpIAdd %uint %91 %155
+        %157 = OpIAdd %uint %91 %156
+        %158 = OpIAdd %uint %91 %157
+        %159 = OpIAdd %uint %91 %158
+        %160 = OpIAdd %uint %91 %159
+        %161 = OpIAdd %uint %91 %160
+        %162 = OpIAdd %uint %91 %161
+        %163 = OpIAdd %uint %91 %162
+        %164 = OpIAdd %uint %91 %163
+        %165 = OpIAdd %uint %91 %164
+        %166 = OpIAdd %uint %91 %165
+        %167 = OpIAdd %uint %91 %166
+        %168 = OpIAdd %uint %91 %167
+        %169 = OpIAdd %uint %91 %168
+        %170 = OpIAdd %uint %91 %169
+        %171 = OpIAdd %uint %91 %170
+        %172 = OpIAdd %uint %91 %171
+        %173 = OpIAdd %uint %91 %172
+        %174 = OpIAdd %uint %91 %173
+        %175 = OpIAdd %uint %91 %174
+        %176 = OpIAdd %uint %91 %175
+        %177 = OpIAdd %uint %91 %176
+        %178 = OpIAdd %uint %91 %177
+        %179 = OpIAdd %uint %91 %178
+        %180 = OpIAdd %uint %91 %179
+        %181 = OpIAdd %uint %91 %180
+        %182 = OpIAdd %uint %91 %181
+        %183 = OpIAdd %uint %91 %182
+        %184 = OpIAdd %uint %91 %183
+        %185 = OpIAdd %uint %91 %184
+        %186 = OpIAdd %uint %91 %185
+        %187 = OpIAdd %uint %91 %186
+        %188 = OpIAdd %uint %91 %187
+        %189 = OpIAdd %uint %91 %188
+        %190 = OpIAdd %uint %91 %189
+        %191 = OpIAdd %uint %91 %190
+        %192 = OpIAdd %uint %91 %191
+        %193 = OpIAdd %uint %91 %192
+        %194 = OpIAdd %uint %91 %193
+        %195 = OpIAdd %uint %91 %194
+        %196 = OpIAdd %uint %91 %195
+        %197 = OpIAdd %uint %91 %196
+        %198 = OpIAdd %uint %91 %197
+        %199 = OpIAdd %uint %91 %198
+        %200 = OpIAdd %uint %91 %199
+        %201 = OpIAdd %uint %91 %200
+        %202 = OpIAdd %uint %91 %201
+        %203 = OpIAdd %uint %91 %202
+        %204 = OpIAdd %uint %91 %203
+        %205 = OpIAdd %uint %91 %204
+        %206 = OpIAdd %uint %91 %205
+        %207 = OpIAdd %uint %91 %206
+        %208 = OpIAdd %uint %91 %207
+        %209 = OpIAdd %uint %91 %208
+        %210 = OpIAdd %uint %91 %209
+        %211 = OpIAdd %uint %91 %210
+        %212 = OpIAdd %uint %91 %211
+        %213 = OpIAdd %uint %91 %212
+        %214 = OpIAdd %uint %91 %213
+        %215 = OpIAdd %uint %91 %214
+        %216 = OpIAdd %uint %91 %215
+        %217 = OpIAdd %uint %91 %216
+        %218 = OpIAdd %uint %91 %217
+        %219 = OpIAdd %uint %91 %218
+        %220 = OpIAdd %uint %91 %219
+        %221 = OpIAdd %uint %91 %220
+        %222 = OpIAdd %uint %91 %221
+        %223 = OpIAdd %uint %91 %222
+        %224 = OpIAdd %uint %91 %223
+        %225 = OpIAdd %uint %91 %224
+        %226 = OpIAdd %uint %91 %225
+        %227 = OpIAdd %uint %91 %226
+        %228 = OpIAdd %uint %91 %227
+        %229 = OpIAdd %uint %91 %228
+        %230 = OpIAdd %uint %91 %229
+        %231 = OpIAdd %uint %91 %230
+        %232 = OpIAdd %uint %91 %231
+        %233 = OpIAdd %uint %91 %232
+        %234 = OpIAdd %uint %91 %233
+        %235 = OpIAdd %uint %91 %234
+        %236 = OpIAdd %uint %91 %235
+        %result = OpIAdd %uint %91 %236
+      %10103 = OpAccessChain %_ptr_Uniform_uint %7 %int_0 %10102
+               OpStore %10103 %result
+               OpBranch %10100
+      %10100 = OpLabel
+      %10104 = OpLoad %int %93
+      %10105 = OpIAdd %int %10104 %int_1
+               OpStore %93 %10105
+               OpBranch %94
+         %97 = OpLabel
+               OpReturnValue %89
+               OpFunctionEnd
+      %10106 = OpFunction %bool None %24
+      %10107 = OpLabel
+      %10108 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+      %10109 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1
+      %10110 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2
+      %10111 = OpLoad %uint %10108
+      %10112 = OpLoad %uint %10109
+      %10113 = OpLoad %uint %10110
+      %10114 = OpBitwiseOr %uint %10111 %10112
+      %10115 = OpBitwiseOr %uint %10113 %10114
+      %10116 = OpIEqual %bool %10115 %uint_0
+               OpReturnValue %10116
+               OpFunctionEnd
diff --git a/shaders/asm/extended-debug-extinst.invalid.asm.comp b/shaders-no-opt/asm/comp/extended-debug-extinst.invalid.asm.comp
similarity index 100%
rename from shaders/asm/extended-debug-extinst.invalid.asm.comp
rename to shaders-no-opt/asm/comp/extended-debug-extinst.invalid.asm.comp
diff --git a/shaders-no-opt/asm/comp/fuzz-collapse-degenerate-loop.asm.comp b/shaders-no-opt/asm/comp/fuzz-collapse-degenerate-loop.asm.comp
new file mode 100644
index 00000000000..e1efd564cb8
--- /dev/null
+++ b/shaders-no-opt/asm/comp/fuzz-collapse-degenerate-loop.asm.comp
@@ -0,0 +1,118 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 71
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %1 "main"
+               OpExecutionMode %1 LocalSize 1 1 1
+               OpDecorate %_struct_2 BufferBlock
+               OpMemberDecorate %_struct_2 0 Offset 0
+               OpDecorate %_arr_uint_uint_2 ArrayStride 4
+               OpDecorate %_struct_4 BufferBlock
+               OpMemberDecorate %_struct_4 0 Offset 0
+               OpDecorate %_arr_uint_uint_3 ArrayStride 4
+               OpDecorate %_struct_6 BufferBlock
+               OpMemberDecorate %_struct_6 0 Offset 0
+               OpDecorate %_arr_uint_uint_11 ArrayStride 4
+               OpDecorate %8 DescriptorSet 0
+               OpDecorate %8 Binding 0
+               OpDecorate %9 DescriptorSet 0
+               OpDecorate %9 Binding 1
+               OpDecorate %10 DescriptorSet 0
+               OpDecorate %10 Binding 2
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+       %bool = OpTypeBool
+       %uint = OpTypeInt 32 0
+       %true = OpConstantTrue %bool
+     %uint_0 = OpConstant %uint 0
+   %uint_0_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+     %uint_8 = OpConstant %uint 8
+     %uint_9 = OpConstant %uint 9
+    %uint_10 = OpConstant %uint 10
+    %uint_11 = OpConstant %uint 11
+    %uint_12 = OpConstant %uint 12
+    %uint_13 = OpConstant %uint 13
+    %uint_14 = OpConstant %uint 14
+%_arr_uint_uint_2 = OpTypeArray %uint %uint_2
+  %_struct_2 = OpTypeStruct %_arr_uint_uint_2
+%_ptr_Uniform__struct_2 = OpTypePointer Uniform %_struct_2
+          %9 = OpVariable %_ptr_Uniform__struct_2 Uniform
+%_arr_uint_uint_3 = OpTypeArray %uint %uint_3
+  %_struct_4 = OpTypeStruct %_arr_uint_uint_3
+%_ptr_Uniform__struct_4 = OpTypePointer Uniform %_struct_4
+          %8 = OpVariable %_ptr_Uniform__struct_4 Uniform
+%_arr_uint_uint_11 = OpTypeArray %uint %uint_11
+  %_struct_6 = OpTypeStruct %_arr_uint_uint_11
+%_ptr_Uniform__struct_6 = OpTypePointer Uniform %_struct_6
+         %10 = OpVariable %_ptr_Uniform__struct_6 Uniform
+%_ptr_Function_uint = OpTypePointer Function %uint
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+          %1 = OpFunction %void None %12
+         %33 = OpLabel
+         %34 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %35 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %36 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %37 = OpLoad %uint %34
+         %38 = OpAccessChain %_ptr_Uniform_uint %10 %uint_0_0 %37
+               OpStore %38 %uint_8
+         %39 = OpIAdd %uint %37 %uint_1
+               OpStore %34 %39
+               OpBranch %40
+         %40 = OpLabel
+         %41 = OpLoad %uint %34
+         %42 = OpAccessChain %_ptr_Uniform_uint %10 %uint_0_0 %41
+               OpStore %42 %uint_9
+         %43 = OpIAdd %uint %41 %uint_1
+               OpStore %34 %43
+         %44 = OpLoad %uint %35
+         %45 = OpAccessChain %_ptr_Uniform_uint %8 %uint_0_0 %44
+         %46 = OpLoad %uint %45
+         %47 = OpIEqual %bool %46 %uint_1
+         %48 = OpIAdd %uint %44 %uint_1
+               OpStore %35 %48
+               OpLoopMerge %49 %50 None
+               OpBranchConditional %47 %51 %49
+         %51 = OpLabel
+         %52 = OpLoad %uint %34
+         %53 = OpAccessChain %_ptr_Uniform_uint %10 %uint_0_0 %52
+               OpStore %53 %uint_12
+         %54 = OpIAdd %uint %52 %uint_1
+               OpStore %34 %54
+         %55 = OpLoad %uint %36
+         %56 = OpAccessChain %_ptr_Uniform_uint %9 %uint_0_0 %55
+         %57 = OpLoad %uint %56
+         %58 = OpIEqual %bool %57 %uint_1
+         %59 = OpIAdd %uint %55 %uint_1
+               OpStore %36 %59
+               OpLoopMerge %60 %61 None
+               OpBranchConditional %58 %60 %60
+         %49 = OpLabel
+         %62 = OpLoad %uint %34
+         %63 = OpAccessChain %_ptr_Uniform_uint %10 %uint_0_0 %62
+               OpStore %63 %uint_10
+         %64 = OpIAdd %uint %62 %uint_1
+               OpStore %34 %64
+               OpReturn
+         %60 = OpLabel
+         %65 = OpLoad %uint %34
+         %66 = OpAccessChain %_ptr_Uniform_uint %10 %uint_0_0 %65
+               OpStore %66 %uint_13
+         %67 = OpIAdd %uint %65 %uint_1
+               OpStore %34 %67
+               OpBranch %50
+         %61 = OpLabel
+               OpBranch %51
+         %50 = OpLabel
+         %68 = OpLoad %uint %34
+         %69 = OpAccessChain %_ptr_Uniform_uint %10 %uint_0_0 %68
+               OpStore %69 %uint_11
+         %70 = OpIAdd %uint %68 %uint_1
+               OpStore %34 %70
+               OpBranch %40
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/fuzz-loop-variable-dominator-with-unreachable-continue-block.asm.comp b/shaders-no-opt/asm/comp/fuzz-loop-variable-dominator-with-unreachable-continue-block.asm.comp
new file mode 100644
index 00000000000..7fb41ed3f81
--- /dev/null
+++ b/shaders-no-opt/asm/comp/fuzz-loop-variable-dominator-with-unreachable-continue-block.asm.comp
@@ -0,0 +1,376 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 257
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %1 "main" %gl_LocalInvocationIndex %gl_WorkGroupID
+               OpExecutionMode %1 LocalSize 1 1 1
+               OpDecorate %_struct_4 BufferBlock
+               OpMemberDecorate %_struct_4 0 Offset 0
+               OpDecorate %_arr_uint_uint_1 ArrayStride 4
+               OpDecorate %_struct_6 BufferBlock
+               OpMemberDecorate %_struct_6 0 Offset 0
+               OpDecorate %_arr_uint_uint_2 ArrayStride 4
+               OpDecorate %_struct_8 BufferBlock
+               OpMemberDecorate %_struct_8 0 Offset 0
+               OpDecorate %_arr_uint_uint_3 ArrayStride 4
+               OpDecorate %_struct_10 BufferBlock
+               OpMemberDecorate %_struct_10 0 Offset 0
+               OpDecorate %_arr_uint_uint_37 ArrayStride 4
+               OpDecorate %12 DescriptorSet 0
+               OpDecorate %12 Binding 0
+               OpDecorate %13 DescriptorSet 0
+               OpDecorate %13 Binding 1
+               OpDecorate %14 DescriptorSet 0
+               OpDecorate %14 Binding 2
+               OpDecorate %15 DescriptorSet 0
+               OpDecorate %15 Binding 3
+               OpDecorate %16 DescriptorSet 0
+               OpDecorate %16 Binding 4
+               OpDecorate %17 DescriptorSet 0
+               OpDecorate %17 Binding 5
+               OpDecorate %18 DescriptorSet 0
+               OpDecorate %18 Binding 6
+               OpDecorate %19 DescriptorSet 0
+               OpDecorate %19 Binding 7
+               OpDecorate %20 DescriptorSet 0
+               OpDecorate %20 Binding 8
+               OpDecorate %21 DescriptorSet 0
+               OpDecorate %21 Binding 9
+               OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
+               OpDecorate %gl_WorkGroupID BuiltIn WorkgroupId
+       %void = OpTypeVoid
+         %23 = OpTypeFunction %void
+       %bool = OpTypeBool
+       %uint = OpTypeInt 32 0
+       %true = OpConstantTrue %bool
+     %uint_0 = OpConstant %uint 0
+   %uint_666 = OpConstant %uint 666
+   %uint_0_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+     %uint_8 = OpConstant %uint 8
+     %uint_9 = OpConstant %uint 9
+    %uint_10 = OpConstant %uint 10
+    %uint_11 = OpConstant %uint 11
+    %uint_12 = OpConstant %uint 12
+    %uint_13 = OpConstant %uint 13
+    %uint_14 = OpConstant %uint 14
+    %uint_15 = OpConstant %uint 15
+    %uint_16 = OpConstant %uint 16
+    %uint_17 = OpConstant %uint 17
+    %uint_18 = OpConstant %uint 18
+    %uint_19 = OpConstant %uint 19
+    %uint_20 = OpConstant %uint 20
+    %uint_21 = OpConstant %uint 21
+    %uint_22 = OpConstant %uint 22
+    %uint_23 = OpConstant %uint 23
+    %uint_24 = OpConstant %uint 24
+    %uint_25 = OpConstant %uint 25
+    %uint_26 = OpConstant %uint 26
+    %uint_27 = OpConstant %uint 27
+    %uint_28 = OpConstant %uint 28
+    %uint_29 = OpConstant %uint 29
+    %uint_30 = OpConstant %uint 30
+    %uint_31 = OpConstant %uint 31
+    %uint_32 = OpConstant %uint 32
+    %uint_33 = OpConstant %uint 33
+    %uint_37 = OpConstant %uint 37
+%_arr_uint_uint_1 = OpTypeArray %uint %uint_1
+  %_struct_4 = OpTypeStruct %_arr_uint_uint_1
+%_ptr_Uniform__struct_4 = OpTypePointer Uniform %_struct_4
+         %12 = OpVariable %_ptr_Uniform__struct_4 Uniform
+         %13 = OpVariable %_ptr_Uniform__struct_4 Uniform
+         %19 = OpVariable %_ptr_Uniform__struct_4 Uniform
+%_arr_uint_uint_2 = OpTypeArray %uint %uint_2
+  %_struct_6 = OpTypeStruct %_arr_uint_uint_2
+%_ptr_Uniform__struct_6 = OpTypePointer Uniform %_struct_6
+         %14 = OpVariable %_ptr_Uniform__struct_6 Uniform
+         %15 = OpVariable %_ptr_Uniform__struct_6 Uniform
+         %16 = OpVariable %_ptr_Uniform__struct_6 Uniform
+         %17 = OpVariable %_ptr_Uniform__struct_6 Uniform
+         %18 = OpVariable %_ptr_Uniform__struct_6 Uniform
+%_arr_uint_uint_3 = OpTypeArray %uint %uint_3
+  %_struct_8 = OpTypeStruct %_arr_uint_uint_3
+%_ptr_Uniform__struct_8 = OpTypePointer Uniform %_struct_8
+         %20 = OpVariable %_ptr_Uniform__struct_8 Uniform
+%_arr_uint_uint_37 = OpTypeArray %uint %uint_37
+ %_struct_10 = OpTypeStruct %_arr_uint_uint_37
+%_ptr_Uniform__struct_10 = OpTypePointer Uniform %_struct_10
+         %21 = OpVariable %_ptr_Uniform__struct_10 Uniform
+%_ptr_Function_uint = OpTypePointer Function %uint
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_ptr_Input_uint = OpTypePointer Input %uint
+%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_WorkGroupID = OpVariable %_ptr_Input_v3uint Input
+          %1 = OpFunction %void None %23
+         %69 = OpLabel
+         %70 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %71 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %72 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %73 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %74 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %75 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %76 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %77 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %78 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %79 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %80 = OpLoad %uint %gl_LocalInvocationIndex
+         %81 = OpAccessChain %_ptr_Input_uint %gl_WorkGroupID %uint_0_0
+         %82 = OpAccessChain %_ptr_Input_uint %gl_WorkGroupID %uint_1
+         %83 = OpAccessChain %_ptr_Input_uint %gl_WorkGroupID %uint_2
+         %84 = OpLoad %uint %81
+         %85 = OpLoad %uint %82
+         %86 = OpLoad %uint %83
+         %87 = OpIMul %uint %86 %uint_1
+         %88 = OpIMul %uint %85 %uint_1
+         %89 = OpIAdd %uint %88 %87
+         %90 = OpIAdd %uint %89 %84
+         %91 = OpIMul %uint %80 %uint_1
+         %92 = OpIMul %uint %80 %uint_1
+         %93 = OpIMul %uint %80 %uint_2
+         %94 = OpIMul %uint %80 %uint_2
+         %95 = OpIMul %uint %80 %uint_2
+         %96 = OpIMul %uint %80 %uint_2
+         %97 = OpIMul %uint %80 %uint_2
+         %98 = OpIMul %uint %80 %uint_1
+         %99 = OpIMul %uint %80 %uint_3
+        %100 = OpIMul %uint %90 %uint_1
+        %101 = OpIMul %uint %90 %uint_1
+        %102 = OpIMul %uint %90 %uint_2
+        %103 = OpIMul %uint %90 %uint_2
+        %104 = OpIMul %uint %90 %uint_2
+        %105 = OpIMul %uint %90 %uint_2
+        %106 = OpIMul %uint %90 %uint_2
+        %107 = OpIMul %uint %90 %uint_1
+        %108 = OpIMul %uint %90 %uint_3
+        %109 = OpIAdd %uint %100 %91
+        %110 = OpIAdd %uint %101 %92
+        %111 = OpIAdd %uint %102 %93
+        %112 = OpIAdd %uint %103 %94
+        %113 = OpIAdd %uint %104 %95
+        %114 = OpIAdd %uint %105 %96
+        %115 = OpIAdd %uint %106 %97
+        %116 = OpIAdd %uint %107 %98
+        %117 = OpIAdd %uint %108 %99
+        %118 = OpIMul %uint %80 %uint_37
+        %119 = OpIMul %uint %90 %uint_37
+        %120 = OpIAdd %uint %119 %118
+               OpStore %71 %109
+               OpStore %72 %110
+               OpStore %73 %111
+               OpStore %74 %112
+               OpStore %75 %113
+               OpStore %76 %114
+               OpStore %77 %115
+               OpStore %78 %116
+               OpStore %79 %117
+               OpStore %70 %120
+        %121 = OpLoad %uint %70
+        %122 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %121
+               OpStore %122 %uint_8
+        %123 = OpIAdd %uint %121 %uint_1
+        %124 = OpLoad %uint %71
+        %125 = OpAccessChain %_ptr_Uniform_uint %12 %uint_0_0 %124
+        %126 = OpLoad %uint %125
+        %127 = OpIAdd %uint %124 %uint_1
+               OpStore %71 %127
+               OpSelectionMerge %128 None
+               OpSwitch %126 %128
+        %128 = OpLabel
+        %129 = OpPhi %uint %130 %131 %123 %69
+        %132 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %129
+               OpStore %132 %uint_9
+        %133 = OpIAdd %uint %129 %uint_1
+               OpLoopMerge %134 %131 None
+               OpBranch %135
+        %135 = OpLabel
+        %136 = OpPhi %uint %uint_666 %137 %133 %128
+        %138 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %136
+               OpStore %138 %uint_12
+        %139 = OpIAdd %uint %136 %uint_1
+               OpLoopMerge %140 %137 None
+               OpBranch %140
+        %140 = OpLabel
+        %141 = OpPhi %uint %139 %135 %uint_666 %142
+        %143 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %141
+               OpStore %143 %uint_13
+        %144 = OpIAdd %uint %141 %uint_1
+        %145 = OpLoad %uint %75
+               OpLoopMerge %146 %142 None
+               OpBranch %147
+        %137 = OpLabel
+               OpBranch %135
+        %147 = OpLabel
+        %148 = OpPhi %uint %144 %140
+        %149 = OpPhi %uint %145 %140
+        %150 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %148
+               OpStore %150 %uint_17
+        %151 = OpIAdd %uint %148 %uint_1
+        %152 = OpAccessChain %_ptr_Uniform_uint %16 %uint_0_0 %149
+        %153 = OpLoad %uint %152
+        %154 = OpIEqual %bool %153 %uint_1
+        %155 = OpIAdd %uint %149 %uint_1
+               OpStore %75 %155
+               OpSelectionMerge %156 None
+               OpBranchConditional %154 %157 %156
+        %157 = OpLabel
+        %158 = OpPhi %uint %151 %147
+        %159 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %158
+               OpStore %159 %uint_19
+        %160 = OpIAdd %uint %158 %uint_1
+        %161 = OpLoad %uint %74
+               OpBranch %146
+        %156 = OpLabel
+               OpBranch %142
+        %142 = OpLabel
+               OpBranchConditional %true %140 %146
+        %146 = OpLabel
+        %162 = OpPhi %uint %160 %157 %uint_666 %142
+        %163 = OpPhi %uint %161 %157 %uint_666 %142
+        %164 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %162
+               OpStore %164 %uint_15
+        %165 = OpIAdd %uint %162 %uint_1
+        %166 = OpAccessChain %_ptr_Uniform_uint %15 %uint_0_0 %163
+        %167 = OpLoad %uint %166
+        %168 = OpIEqual %bool %167 %uint_1
+        %169 = OpIAdd %uint %163 %uint_1
+               OpStore %74 %169
+        %170 = OpLoad %uint %76
+               OpSelectionMerge %171 None
+               OpBranchConditional %168 %172 %173
+        %173 = OpLabel
+        %174 = OpPhi %uint %165 %146
+        %175 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %174
+               OpStore %175 %uint_22
+        %176 = OpIAdd %uint %174 %uint_1
+        %177 = OpLoad %uint %76
+               OpBranch %172
+        %172 = OpLabel
+        %178 = OpPhi %uint %176 %173 %165 %146
+        %179 = OpPhi %uint %177 %173 %170 %146
+        %180 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %178
+               OpStore %180 %uint_21
+        %181 = OpIAdd %uint %178 %uint_1
+        %182 = OpAccessChain %_ptr_Uniform_uint %17 %uint_0_0 %179
+        %183 = OpLoad %uint %182
+        %184 = OpIAdd %uint %179 %uint_1
+               OpStore %76 %184
+               OpSelectionMerge %185 None
+               OpSwitch %183 %185
+        %185 = OpLabel
+        %186 = OpPhi %uint %uint_666 %187 %181 %172
+        %188 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %186
+               OpStore %188 %uint_23
+        %189 = OpIAdd %uint %186 %uint_1
+               OpLoopMerge %190 %187 None
+               OpBranch %190
+        %190 = OpLabel
+        %191 = OpPhi %uint %189 %185 %192 %193
+        %194 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %191
+               OpStore %194 %uint_24
+        %195 = OpIAdd %uint %191 %uint_1
+        %196 = OpLoad %uint %79
+               OpLoopMerge %197 %193 None
+               OpBranch %198
+        %187 = OpLabel
+               OpBranch %185
+        %198 = OpLabel
+        %199 = OpPhi %uint %195 %190
+        %200 = OpPhi %uint %196 %190
+        %201 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %199
+               OpStore %201 %uint_28
+        %202 = OpIAdd %uint %199 %uint_1
+        %203 = OpAccessChain %_ptr_Uniform_uint %20 %uint_0_0 %200
+        %204 = OpLoad %uint %203
+        %205 = OpIAdd %uint %200 %uint_1
+               OpStore %79 %205
+               OpSelectionMerge %206 None
+               OpSwitch %204 %207 1 %206
+        %207 = OpLabel
+        %208 = OpPhi %uint %202 %198
+        %209 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %208
+               OpStore %209 %uint_30
+        %210 = OpIAdd %uint %208 %uint_1
+        %211 = OpLoad %uint %77
+               OpBranch %197
+        %206 = OpLabel
+        %212 = OpPhi %uint %202 %198
+        %213 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %212
+               OpStore %213 %uint_29
+        %214 = OpIAdd %uint %212 %uint_1
+        %215 = OpLoad %uint %78
+               OpBranch %193
+        %193 = OpLabel
+        %216 = OpPhi %uint %214 %206
+        %217 = OpPhi %uint %215 %206
+        %218 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %216
+               OpStore %218 %uint_27
+        %192 = OpIAdd %uint %216 %uint_1
+        %219 = OpAccessChain %_ptr_Uniform_uint %19 %uint_0_0 %217
+        %220 = OpLoad %uint %219
+        %221 = OpIEqual %bool %220 %uint_1
+        %222 = OpIAdd %uint %217 %uint_1
+               OpStore %78 %222
+        %223 = OpLoad %uint %77
+               OpBranchConditional %221 %190 %197
+        %197 = OpLabel
+        %224 = OpPhi %uint %210 %207 %192 %193
+        %225 = OpPhi %uint %211 %207 %223 %193
+        %226 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %224
+               OpStore %226 %uint_26
+        %227 = OpIAdd %uint %224 %uint_1
+        %228 = OpAccessChain %_ptr_Uniform_uint %18 %uint_0_0 %225
+        %229 = OpLoad %uint %228
+        %230 = OpIEqual %bool %229 %uint_1
+        %231 = OpIAdd %uint %225 %uint_1
+               OpStore %77 %231
+        %232 = OpLoad %uint %73
+               OpBranchConditional %230 %131 %171
+        %171 = OpLabel
+               OpBranch %131
+        %131 = OpLabel
+        %233 = OpPhi %uint %uint_666 %171 %227 %197
+        %234 = OpPhi %uint %uint_666 %171 %232 %197
+        %235 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %233
+               OpStore %235 %uint_11
+        %130 = OpIAdd %uint %233 %uint_1
+        %236 = OpAccessChain %_ptr_Uniform_uint %14 %uint_0_0 %234
+        %237 = OpLoad %uint %236
+        %238 = OpIEqual %bool %237 %uint_1
+        %239 = OpIAdd %uint %234 %uint_1
+               OpStore %73 %239
+        %240 = OpLoad %uint %72
+               OpBranchConditional %238 %128 %134
+        %134 = OpLabel
+        %241 = OpPhi %uint %130 %131
+        %242 = OpPhi %uint %240 %131
+        %243 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %241
+               OpStore %243 %uint_10
+        %244 = OpIAdd %uint %241 %uint_1
+        %245 = OpAccessChain %_ptr_Uniform_uint %13 %uint_0_0 %242
+        %246 = OpLoad %uint %245
+        %247 = OpIAdd %uint %242 %uint_1
+               OpStore %72 %247
+               OpSelectionMerge %248 None
+               OpSwitch %246 %249 1 %250
+        %249 = OpLabel
+        %251 = OpPhi %uint %244 %134
+        %252 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %251
+               OpStore %252 %uint_32
+        %253 = OpIAdd %uint %251 %uint_1
+               OpBranch %248
+        %250 = OpLabel
+               OpBranch %248
+        %248 = OpLabel
+        %254 = OpPhi %uint %253 %249 %uint_666 %250
+        %255 = OpAccessChain %_ptr_Uniform_uint %21 %uint_0_0 %254
+               OpStore %255 %uint_31
+        %256 = OpIAdd %uint %254 %uint_2
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp b/shaders-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp
new file mode 100644
index 00000000000..30db11d45bc
--- /dev/null
+++ b/shaders-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp
@@ -0,0 +1,55 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+ %ResTypeMod = OpTypeStruct %float %float
+%_ptr_Function_ResTypeMod = OpTypePointer Function %ResTypeMod
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+   %float_20 = OpConstant %float 20
+      %int_1 = OpConstant %int 1
+%_ptr_Function_float = OpTypePointer Function %float
+%ResTypeFrexp = OpTypeStruct %float %int
+%_ptr_Function_ResTypeFrexp = OpTypePointer Function %ResTypeFrexp
+   %float_40 = OpConstant %float 40
+%_ptr_Function_int = OpTypePointer Function %int
+       %SSBO = OpTypeStruct %float %int
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %modres = OpExtInst %ResTypeMod %1 ModfStruct %float_20
+         %frexpres = OpExtInst %ResTypeFrexp %1 FrexpStruct %float_40
+
+		 %modres_f = OpCompositeExtract %float %modres 0
+		 %modres_i = OpCompositeExtract %float %modres 1
+		 %frexpres_f = OpCompositeExtract %float %frexpres 0
+		 %frexpres_i = OpCompositeExtract %int %frexpres 1
+
+         %float_ptr = OpAccessChain %_ptr_Uniform_float %_ %int_0
+         %int_ptr = OpAccessChain %_ptr_Uniform_int %_ %int_1
+
+               OpStore %float_ptr %modres_f
+               OpStore %float_ptr %modres_i
+               OpStore %float_ptr %frexpres_f
+               OpStore %int_ptr %frexpres_i
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/image-atomic-nonuniform.vk.nocompat.asm.comp b/shaders-no-opt/asm/comp/image-atomic-nonuniform.vk.nocompat.asm.comp
new file mode 100644
index 00000000000..5dad9dd5ed8
--- /dev/null
+++ b/shaders-no-opt/asm/comp/image-atomic-nonuniform.vk.nocompat.asm.comp
@@ -0,0 +1,55 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 32
+; Schema: 0
+               OpCapability Shader
+               OpCapability ShaderNonUniform
+               OpCapability RuntimeDescriptorArray
+               OpCapability StorageImageArrayNonUniformIndexing
+               OpExtension "SPV_EXT_descriptor_indexing"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_nonuniform_qualifier"
+               OpName %main "main"
+               OpName %uImage "uImage"
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpDecorate %uImage DescriptorSet 0
+               OpDecorate %uImage Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %30 NonUniform
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+          %7 = OpTypeImage %uint 2D 0 0 0 2 R32ui
+%_runtimearr_7 = OpTypeRuntimeArray %7
+%_ptr_UniformConstant__runtimearr_7 = OpTypePointer UniformConstant %_runtimearr_7
+     %uImage = OpVariable %_ptr_UniformConstant__runtimearr_7 UniformConstant
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_2 = OpConstant %uint 2
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_UniformConstant_7 = OpTypePointer UniformConstant %7
+     %v2uint = OpTypeVector %uint 2
+        %int = OpTypeInt 32 1
+      %v2int = OpTypeVector %int 2
+     %uint_1 = OpConstant %uint 1
+     %uint_0 = OpConstant %uint 0
+%_ptr_Image_uint = OpTypePointer Image %uint
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %16 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2
+         %17 = OpLoad %uint %16
+         %18 = OpCopyObject %uint %17
+         %20 = OpAccessChain %_ptr_UniformConstant_7 %uImage %18
+         %22 = OpLoad %v3uint %gl_GlobalInvocationID
+         %23 = OpVectorShuffle %v2uint %22 %22 0 1
+         %26 = OpBitcast %v2int %23
+         %30 = OpImageTexelPointer %_ptr_Image_uint %20 %26 %uint_0
+         %31 = OpAtomicIAdd %uint %30 %uint_1 %uint_0 %uint_1
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp b/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp
new file mode 100644
index 00000000000..2eaef4bdbee
--- /dev/null
+++ b/shaders-no-opt/asm/comp/local-size-id-override.vk.asm.comp
@@ -0,0 +1,60 @@
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values"
+               OpName %_ ""
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %spec_1 SpecId 1
+               OpDecorate %spec_2 SpecId 2
+               OpDecorate %spec_3 SpecId 3
+               OpDecorate %spec_4 SpecId 4
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+       %SSBO = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+    %float_2 = OpConstant %float 2
+%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float
+         %spec_1 = OpSpecConstant %uint 11
+         %spec_2 = OpSpecConstant %uint 12
+         %spec_3 = OpSpecConstant %uint 13
+         %spec_4 = OpSpecConstant %uint 14
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %uint_3 %spec_1 %spec_2
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %21 = OpLoad %uint %20
+         %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21
+         %25 = OpLoad %v4float %24
+         %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2
+         %27 = OpFAdd %v4float %25 %26
+         %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21
+               OpStore %28 %27
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp b/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp
new file mode 100644
index 00000000000..3031f4bb8af
--- /dev/null
+++ b/shaders-no-opt/asm/comp/local-size-id.vk.asm.comp
@@ -0,0 +1,76 @@
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values"
+               OpName %_ ""
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %spec_1 SpecId 1
+               OpDecorate %spec_2 SpecId 2
+               OpDecorate %spec_3 SpecId 3
+               OpDecorate %spec_4 SpecId 4
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+     %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+       %SSBO = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+    %float_2 = OpConstant %float 2
+%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float
+		; Test that we can declare the spec constant as signed.
+		; Needs implicit bitcast since WorkGroupSize is uint.
+         %spec_1 = OpSpecConstant %int 11
+         %spec_2 = OpSpecConstant %int 12
+         %spec_3 = OpSpecConstant %int 13
+         %spec_4 = OpSpecConstant %int 14
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+	 ; Test that we can build spec constant composites out of local size id values.
+	 ; Needs special case handling.
+	 %spec_3_op = OpSpecConstantOp %uint IAdd %spec_3 %uint_3
+%WorkGroupSize = OpSpecConstantComposite %v3uint %spec_3_op %spec_4 %uint_2
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %21 = OpLoad %uint %20
+         %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21
+         %25 = OpLoad %v4float %24
+         %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2
+         %27 = OpFAdd %v4float %25 %26
+		 %wg_f = OpConvertUToF %v3float %WorkGroupSize
+		 %wg_f4 = OpVectorShuffle %v4float %wg_f %wg_f 0 1 2 2
+	 ; Test that we can use the spec constants directly which needs to translate to gl_WorkGroupSize.elem.
+	 ; Needs special case handling.
+		 %res = OpFAdd %v4float %27 %wg_f4
+		 %f0 = OpConvertSToF %float %spec_3
+		 %f1 = OpConvertSToF %float %spec_4
+		 %f2 = OpConvertSToF %float %uint_2
+		 %res1 = OpVectorTimesScalar %v4float %res %f0
+		 %res2 = OpVectorTimesScalar %v4float %res1 %f1
+		 %res3 = OpVectorTimesScalar %v4float %res2 %f2
+         %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21
+               OpStore %28 %res3
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/loop-variable-early-read-with-initializer.asm.comp b/shaders-no-opt/asm/comp/loop-variable-early-read-with-initializer.asm.comp
new file mode 100644
index 00000000000..b928099db85
--- /dev/null
+++ b/shaders-no-opt/asm/comp/loop-variable-early-read-with-initializer.asm.comp
@@ -0,0 +1,185 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 114
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %7 "main"
+               OpExecutionMode %7 LocalSize 1 1 1
+               OpDecorate %_struct_21 BufferBlock
+               OpMemberDecorate %_struct_21 0 Offset 0
+               OpDecorate %_arr_uint_uint_1 ArrayStride 4
+               OpDecorate %_struct_23 BufferBlock
+               OpMemberDecorate %_struct_23 0 Offset 0
+               OpDecorate %_arr_uint_uint_2 ArrayStride 4
+               OpDecorate %_struct_25 BufferBlock
+               OpMemberDecorate %_struct_25 0 Offset 0
+               OpDecorate %_arr_uint_uint_11 ArrayStride 4
+               OpDecorate %27 DescriptorSet 0
+               OpDecorate %27 Binding 0
+               OpDecorate %28 DescriptorSet 0
+               OpDecorate %28 Binding 1
+               OpDecorate %29 DescriptorSet 0
+               OpDecorate %29 Binding 2
+               OpDecorate %30 DescriptorSet 0
+               OpDecorate %30 Binding 3
+               OpDecorate %31 DescriptorSet 0
+               OpDecorate %31 Binding 4
+               OpDecorate %32 DescriptorSet 0
+               OpDecorate %32 Binding 5
+       %void = OpTypeVoid
+          %2 = OpTypeFunction %void
+       %bool = OpTypeBool
+       %uint = OpTypeInt 32 0
+       %true = OpConstantTrue %bool
+     %uint_0 = OpConstant %uint 0
+   %uint_0_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_8 = OpConstant %uint 8
+     %uint_9 = OpConstant %uint 9
+    %uint_10 = OpConstant %uint 10
+    %uint_11 = OpConstant %uint 11
+    %uint_12 = OpConstant %uint 12
+    %uint_13 = OpConstant %uint 13
+    %uint_14 = OpConstant %uint 14
+    %uint_15 = OpConstant %uint 15
+    %uint_16 = OpConstant %uint 16
+    %uint_17 = OpConstant %uint 17
+    %uint_18 = OpConstant %uint 18
+    %uint_19 = OpConstant %uint 19
+    %uint_20 = OpConstant %uint 20
+%_arr_uint_uint_1 = OpTypeArray %uint %uint_1
+ %_struct_21 = OpTypeStruct %_arr_uint_uint_1
+%_ptr_Uniform__struct_21 = OpTypePointer Uniform %_struct_21
+         %31 = OpVariable %_ptr_Uniform__struct_21 Uniform
+         %28 = OpVariable %_ptr_Uniform__struct_21 Uniform
+         %29 = OpVariable %_ptr_Uniform__struct_21 Uniform
+         %30 = OpVariable %_ptr_Uniform__struct_21 Uniform
+%_arr_uint_uint_2 = OpTypeArray %uint %uint_2
+ %_struct_23 = OpTypeStruct %_arr_uint_uint_2
+%_ptr_Uniform__struct_23 = OpTypePointer Uniform %_struct_23
+         %27 = OpVariable %_ptr_Uniform__struct_23 Uniform
+%_arr_uint_uint_11 = OpTypeArray %uint %uint_11
+ %_struct_25 = OpTypeStruct %_arr_uint_uint_11
+%_ptr_Uniform__struct_25 = OpTypePointer Uniform %_struct_25
+         %32 = OpVariable %_ptr_Uniform__struct_25 Uniform
+%_ptr_Function_uint = OpTypePointer Function %uint
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+          %7 = OpFunction %void None %2
+          %8 = OpLabel
+         %54 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %55 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %56 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %57 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %58 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %59 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %60 = OpLoad %uint %54
+         %61 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %60
+               OpStore %61 %uint_8
+         %62 = OpIAdd %uint %60 %uint_1
+               OpStore %54 %62
+               OpBranch %9
+          %9 = OpLabel
+         %63 = OpLoad %uint %54
+         %64 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %63
+               OpStore %64 %uint_9
+         %65 = OpIAdd %uint %63 %uint_1
+               OpStore %54 %65
+         %66 = OpLoad %uint %55
+         %67 = OpAccessChain %_ptr_Uniform_uint %27 %uint_0_0 %66
+         %68 = OpLoad %uint %67
+         %69 = OpIEqual %bool %68 %uint_1
+         %70 = OpIAdd %uint %66 %uint_1
+               OpStore %55 %70
+               OpLoopMerge %10 %11 None
+               OpBranchConditional %69 %12 %13
+         %12 = OpLabel
+         %71 = OpLoad %uint %54
+         %72 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %71
+               OpStore %72 %uint_12
+         %73 = OpIAdd %uint %71 %uint_1
+               OpStore %54 %73
+               OpReturn
+         %13 = OpLabel
+         %74 = OpLoad %uint %54
+         %75 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %74
+               OpStore %75 %uint_13
+         %76 = OpIAdd %uint %74 %uint_1
+               OpStore %54 %76
+         %77 = OpLoad %uint %56
+         %78 = OpAccessChain %_ptr_Uniform_uint %28 %uint_0_0 %77
+         %79 = OpLoad %uint %78
+         %80 = OpIEqual %bool %79 %uint_1
+         %81 = OpIAdd %uint %77 %uint_1
+               OpStore %56 %81
+               OpBranchConditional %80 %11 %10
+         %11 = OpLabel
+         %82 = OpLoad %uint %54
+         %83 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %82
+               OpStore %83 %uint_11
+         %84 = OpIAdd %uint %82 %uint_1
+               OpStore %54 %84
+               OpBranch %14
+         %14 = OpLabel
+         %85 = OpLoad %uint %54
+         %86 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %85
+               OpStore %86 %uint_14
+         %87 = OpIAdd %uint %85 %uint_1
+               OpStore %54 %87
+         %88 = OpLoad %uint %57
+         %89 = OpAccessChain %_ptr_Uniform_uint %29 %uint_0_0 %88
+         %90 = OpLoad %uint %89
+         %91 = OpIAdd %uint %88 %uint_1
+               OpStore %57 %91
+               OpSelectionMerge %15 None
+               OpSwitch %90 %16
+         %16 = OpLabel
+         %92 = OpLoad %uint %54
+         %93 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %92
+               OpStore %93 %uint_16
+         %94 = OpIAdd %uint %92 %uint_1
+               OpStore %54 %94
+               OpBranch %15
+         %15 = OpLabel
+         %95 = OpLoad %uint %54
+         %96 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %95
+               OpStore %96 %uint_15
+         %97 = OpIAdd %uint %95 %uint_1
+               OpStore %54 %97
+         %98 = OpLoad %uint %58
+         %99 = OpAccessChain %_ptr_Uniform_uint %30 %uint_0_0 %98
+        %100 = OpLoad %uint %99
+        %101 = OpIEqual %bool %100 %uint_1
+        %102 = OpIAdd %uint %98 %uint_1
+               OpStore %58 %102
+               OpSelectionMerge %17 None
+               OpBranchConditional %101 %18 %19
+         %18 = OpLabel
+               OpBranch %17
+         %19 = OpLabel
+        %103 = OpLoad %uint %54
+        %104 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %103
+               OpStore %104 %uint_19
+        %105 = OpIAdd %uint %103 %uint_1
+               OpStore %54 %105
+               OpBranch %17
+         %17 = OpLabel
+        %106 = OpLoad %uint %54
+        %107 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %106
+               OpStore %107 %uint_17
+        %108 = OpIAdd %uint %106 %uint_1
+               OpStore %54 %108
+        %109 = OpLoad %uint %59
+        %110 = OpAccessChain %_ptr_Uniform_uint %31 %uint_0_0 %109
+        %111 = OpLoad %uint %110
+        %112 = OpIEqual %bool %111 %uint_1
+        %113 = OpIAdd %uint %109 %uint_1
+               OpStore %59 %113
+               OpBranchConditional %112 %9 %10
+         %10 = OpLabel
+               OpBranch %20
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/loop-variable-early-read-with-undef.asm.comp b/shaders-no-opt/asm/comp/loop-variable-early-read-with-undef.asm.comp
new file mode 100644
index 00000000000..ebee277b7f1
--- /dev/null
+++ b/shaders-no-opt/asm/comp/loop-variable-early-read-with-undef.asm.comp
@@ -0,0 +1,185 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 114
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %7 "main"
+               OpExecutionMode %7 LocalSize 1 1 1
+               OpDecorate %_struct_21 BufferBlock
+               OpMemberDecorate %_struct_21 0 Offset 0
+               OpDecorate %_arr_uint_uint_1 ArrayStride 4
+               OpDecorate %_struct_23 BufferBlock
+               OpMemberDecorate %_struct_23 0 Offset 0
+               OpDecorate %_arr_uint_uint_2 ArrayStride 4
+               OpDecorate %_struct_25 BufferBlock
+               OpMemberDecorate %_struct_25 0 Offset 0
+               OpDecorate %_arr_uint_uint_11 ArrayStride 4
+               OpDecorate %27 DescriptorSet 0
+               OpDecorate %27 Binding 0
+               OpDecorate %28 DescriptorSet 0
+               OpDecorate %28 Binding 1
+               OpDecorate %29 DescriptorSet 0
+               OpDecorate %29 Binding 2
+               OpDecorate %30 DescriptorSet 0
+               OpDecorate %30 Binding 3
+               OpDecorate %31 DescriptorSet 0
+               OpDecorate %31 Binding 4
+               OpDecorate %32 DescriptorSet 0
+               OpDecorate %32 Binding 5
+       %void = OpTypeVoid
+          %2 = OpTypeFunction %void
+       %bool = OpTypeBool
+       %uint = OpTypeInt 32 0
+       %true = OpConstantTrue %bool
+     %uint_0 = OpConstant %uint 0
+   %uint_0_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_8 = OpConstant %uint 8
+     %uint_9 = OpConstant %uint 9
+    %uint_10 = OpConstant %uint 10
+    %uint_11 = OpConstant %uint 11
+    %uint_12 = OpConstant %uint 12
+    %uint_13 = OpConstant %uint 13
+    %uint_14 = OpConstant %uint 14
+    %uint_15 = OpConstant %uint 15
+    %uint_16 = OpConstant %uint 16
+    %uint_17 = OpConstant %uint 17
+    %uint_18 = OpConstant %uint 18
+    %uint_19 = OpConstant %uint 19
+    %uint_20 = OpConstant %uint 20
+%_arr_uint_uint_1 = OpTypeArray %uint %uint_1
+ %_struct_21 = OpTypeStruct %_arr_uint_uint_1
+%_ptr_Uniform__struct_21 = OpTypePointer Uniform %_struct_21
+         %31 = OpVariable %_ptr_Uniform__struct_21 Uniform
+         %28 = OpVariable %_ptr_Uniform__struct_21 Uniform
+         %29 = OpVariable %_ptr_Uniform__struct_21 Uniform
+         %30 = OpVariable %_ptr_Uniform__struct_21 Uniform
+%_arr_uint_uint_2 = OpTypeArray %uint %uint_2
+ %_struct_23 = OpTypeStruct %_arr_uint_uint_2
+%_ptr_Uniform__struct_23 = OpTypePointer Uniform %_struct_23
+         %27 = OpVariable %_ptr_Uniform__struct_23 Uniform
+%_arr_uint_uint_11 = OpTypeArray %uint %uint_11
+ %_struct_25 = OpTypeStruct %_arr_uint_uint_11
+%_ptr_Uniform__struct_25 = OpTypePointer Uniform %_struct_25
+         %32 = OpVariable %_ptr_Uniform__struct_25 Uniform
+%_ptr_Function_uint = OpTypePointer Function %uint
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+          %7 = OpFunction %void None %2
+          %8 = OpLabel
+         %54 = OpVariable %_ptr_Function_uint Function
+         %55 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %56 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %57 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %58 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %59 = OpVariable %_ptr_Function_uint Function %uint_0_0
+         %60 = OpLoad %uint %54
+         %61 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %60
+               OpStore %61 %uint_8
+         %62 = OpIAdd %uint %60 %uint_1
+               OpStore %54 %62
+               OpBranch %9
+          %9 = OpLabel
+         %63 = OpLoad %uint %54
+         %64 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %63
+               OpStore %64 %uint_9
+         %65 = OpIAdd %uint %63 %uint_1
+               OpStore %54 %65
+         %66 = OpLoad %uint %55
+         %67 = OpAccessChain %_ptr_Uniform_uint %27 %uint_0_0 %66
+         %68 = OpLoad %uint %67
+         %69 = OpIEqual %bool %68 %uint_1
+         %70 = OpIAdd %uint %66 %uint_1
+               OpStore %55 %70
+               OpLoopMerge %10 %11 None
+               OpBranchConditional %69 %12 %13
+         %12 = OpLabel
+         %71 = OpLoad %uint %54
+         %72 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %71
+               OpStore %72 %uint_12
+         %73 = OpIAdd %uint %71 %uint_1
+               OpStore %54 %73
+               OpReturn
+         %13 = OpLabel
+         %74 = OpLoad %uint %54
+         %75 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %74
+               OpStore %75 %uint_13
+         %76 = OpIAdd %uint %74 %uint_1
+               OpStore %54 %76
+         %77 = OpLoad %uint %56
+         %78 = OpAccessChain %_ptr_Uniform_uint %28 %uint_0_0 %77
+         %79 = OpLoad %uint %78
+         %80 = OpIEqual %bool %79 %uint_1
+         %81 = OpIAdd %uint %77 %uint_1
+               OpStore %56 %81
+               OpBranchConditional %80 %11 %10
+         %11 = OpLabel
+         %82 = OpLoad %uint %54
+         %83 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %82
+               OpStore %83 %uint_11
+         %84 = OpIAdd %uint %82 %uint_1
+               OpStore %54 %84
+               OpBranch %14
+         %14 = OpLabel
+         %85 = OpLoad %uint %54
+         %86 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %85
+               OpStore %86 %uint_14
+         %87 = OpIAdd %uint %85 %uint_1
+               OpStore %54 %87
+         %88 = OpLoad %uint %57
+         %89 = OpAccessChain %_ptr_Uniform_uint %29 %uint_0_0 %88
+         %90 = OpLoad %uint %89
+         %91 = OpIAdd %uint %88 %uint_1
+               OpStore %57 %91
+               OpSelectionMerge %15 None
+               OpSwitch %90 %16
+         %16 = OpLabel
+         %92 = OpLoad %uint %54
+         %93 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %92
+               OpStore %93 %uint_16
+         %94 = OpIAdd %uint %92 %uint_1
+               OpStore %54 %94
+               OpBranch %15
+         %15 = OpLabel
+         %95 = OpLoad %uint %54
+         %96 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %95
+               OpStore %96 %uint_15
+         %97 = OpIAdd %uint %95 %uint_1
+               OpStore %54 %97
+         %98 = OpLoad %uint %58
+         %99 = OpAccessChain %_ptr_Uniform_uint %30 %uint_0_0 %98
+        %100 = OpLoad %uint %99
+        %101 = OpIEqual %bool %100 %uint_1
+        %102 = OpIAdd %uint %98 %uint_1
+               OpStore %58 %102
+               OpSelectionMerge %17 None
+               OpBranchConditional %101 %18 %19
+         %18 = OpLabel
+               OpBranch %17
+         %19 = OpLabel
+        %103 = OpLoad %uint %54
+        %104 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %103
+               OpStore %104 %uint_19
+        %105 = OpIAdd %uint %103 %uint_1
+               OpStore %54 %105
+               OpBranch %17
+         %17 = OpLabel
+        %106 = OpLoad %uint %54
+        %107 = OpAccessChain %_ptr_Uniform_uint %32 %uint_0_0 %106
+               OpStore %107 %uint_17
+        %108 = OpIAdd %uint %106 %uint_1
+               OpStore %54 %108
+        %109 = OpLoad %uint %59
+        %110 = OpAccessChain %_ptr_Uniform_uint %31 %uint_0_0 %109
+        %111 = OpLoad %uint %110
+        %112 = OpIEqual %bool %111 %uint_1
+        %113 = OpIAdd %uint %109 %uint_1
+               OpStore %59 %113
+               OpBranchConditional %112 %9 %10
+         %10 = OpLabel
+               OpBranch %20
+         %20 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/loop-variable-with-initializer.asm.comp b/shaders-no-opt/asm/comp/loop-variable-with-initializer.asm.comp
new file mode 100644
index 00000000000..f40377b1181
--- /dev/null
+++ b/shaders-no-opt/asm/comp/loop-variable-with-initializer.asm.comp
@@ -0,0 +1,31 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos; 0
+; Bound: 62
+; Schema: 0
+               OpCapability Shader
+			   OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+			   OpExecutionMode %main LocalSize 1 1 1
+               OpName %main "main"
+               OpName %i "i"
+       %uint = OpTypeInt 32 0
+       %void = OpTypeVoid
+         %11 = OpTypeFunction %void
+      %uint_0 = OpConstant %uint 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+         %31 = OpConstantNull %uint
+       %main = OpFunction %void None %11
+         %14 = OpLabel
+          %i = OpVariable %_ptr_Function_uint Function %31
+               OpStore %i %uint_0
+               OpBranch %32
+         %32 = OpLabel
+               OpLoopMerge %33 %34 None
+               OpBranch %33
+         %34 = OpLabel
+         %57 = OpLoad %uint %i
+               OpBranch %32
+         %33 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/multi-break-switch-out-of-loop.asm.comp b/shaders-no-opt/asm/comp/multi-break-switch-out-of-loop.asm.comp
new file mode 100644
index 00000000000..821370379e0
--- /dev/null
+++ b/shaders-no-opt/asm/comp/multi-break-switch-out-of-loop.asm.comp
@@ -0,0 +1,94 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 53
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %count "count"
+               OpName %i "i"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "v"
+               OpName %_ ""
+               OpMemberDecorate %UBO 0 Offset 0
+               OpDecorate %UBO Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+       %bool = OpTypeBool
+        %UBO = OpTypeStruct %int
+%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO
+          %_ = OpVariable %_ptr_Uniform_UBO Uniform
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+     %int_20 = OpConstant %int 20
+      %int_1 = OpConstant %int 1
+     %v3uint = OpTypeVector %uint 3
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+      %count = OpVariable %_ptr_Function_uint Function
+          %i = OpVariable %_ptr_Function_int Function
+               OpStore %count %uint_0
+               OpStore %i %int_0
+               OpBranch %14
+         %14 = OpLabel
+               OpLoopMerge %16 %17 None
+               OpBranch %18
+         %18 = OpLabel
+         %19 = OpLoad %int %i
+         %22 = OpSLessThan %bool %19 %int_4
+               OpBranchConditional %22 %15 %16
+         %15 = OpLabel
+               OpSelectionMerge %24 None
+               OpSwitch %int_0 %23
+         %23 = OpLabel
+               OpSelectionMerge %26 None
+               OpSwitch %int_0 %25
+         %25 = OpLabel
+               OpSelectionMerge %28 None
+               OpSwitch %int_0 %27
+         %27 = OpLabel
+         %33 = OpAccessChain %_ptr_Uniform_int %_ %int_0
+         %34 = OpLoad %int %33
+         %36 = OpIEqual %bool %34 %int_20
+               OpSelectionMerge %38 None
+               OpBranchConditional %36 %37 %38
+         %37 = OpLabel
+               OpBranch %16
+         %38 = OpLabel
+               OpBranch %28
+         %28 = OpLabel
+               OpBranch %26
+         %26 = OpLabel
+         %42 = OpLoad %uint %count
+         %44 = OpIAdd %uint %42 %int_1
+               OpStore %count %44
+               OpBranch %24
+         %24 = OpLabel
+         %46 = OpLoad %uint %count
+         %47 = OpIAdd %uint %46 %int_1
+               OpStore %count %47
+               OpBranch %17
+         %17 = OpLabel
+         %48 = OpLoad %int %i
+         %49 = OpIAdd %int %48 %int_1
+               OpStore %i %49
+               OpBranch %14
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/nonuniform-bracket-handling.vk.nocompat.asm.comp b/shaders-no-opt/asm/comp/nonuniform-bracket-handling.vk.nocompat.asm.comp
new file mode 100644
index 00000000000..5d5fac622a9
--- /dev/null
+++ b/shaders-no-opt/asm/comp/nonuniform-bracket-handling.vk.nocompat.asm.comp
@@ -0,0 +1,299 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 233
+; Schema: 0
+               OpCapability Shader
+               OpCapability SampledBuffer
+               OpCapability ImageBuffer
+               OpCapability ImageQuery
+               OpCapability StorageImageWriteWithoutFormat
+               OpCapability GroupNonUniformBallot
+               OpCapability RuntimeDescriptorArray
+               OpCapability UniformTexelBufferArrayDynamicIndexing
+               OpCapability StorageTexelBufferArrayDynamicIndexing
+               OpCapability UniformTexelBufferArrayNonUniformIndexing
+               OpCapability StorageTexelBufferArrayNonUniformIndexing
+               OpCapability PhysicalStorageBufferAddresses
+               OpExtension "SPV_EXT_descriptor_indexing"
+               OpExtension "SPV_KHR_physical_storage_buffer"
+               OpMemoryModel PhysicalStorageBuffer64 GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionMode %main LocalSize 1 1 1
+               OpName %main "main"
+               OpName %RootConstants "RootConstants"
+               OpName %registers "registers"
+               OpName %SSBO_Offsets "SSBO_Offsets"
+               OpDecorate %RootConstants Block
+               OpMemberDecorate %RootConstants 0 Offset 0
+               OpMemberDecorate %RootConstants 1 Offset 4
+               OpMemberDecorate %RootConstants 2 Offset 8
+               OpMemberDecorate %RootConstants 3 Offset 12
+               OpMemberDecorate %RootConstants 4 Offset 16
+               OpMemberDecorate %RootConstants 5 Offset 20
+               OpMemberDecorate %RootConstants 6 Offset 24
+               OpMemberDecorate %RootConstants 7 Offset 28
+               OpDecorate %_runtimearr_v2uint ArrayStride 8
+               OpMemberDecorate %SSBO_Offsets 0 Offset 0
+               OpDecorate %SSBO_Offsets Block
+               OpDecorate %13 DescriptorSet 0
+               OpDecorate %13 Binding 0
+               OpDecorate %13 NonWritable
+               OpDecorate %13 Restrict
+               OpDecorate %18 DescriptorSet 1
+               OpDecorate %18 Binding 0
+               OpDecorate %22 DescriptorSet 4
+               OpDecorate %22 Binding 0
+               OpDecorate %26 DescriptorSet 4
+               OpDecorate %26 Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %148 NonUniform
+               OpDecorate %149 NonUniform
+               OpDecorate %172 NonUniform
+               OpDecorate %173 NonUniform
+               OpDecorate %196 NonUniform
+               OpDecorate %197 NonUniform
+               OpDecorate %205 NonUniform
+               OpDecorate %212 NonUniform
+       %void = OpTypeVoid
+          %2 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%RootConstants = OpTypeStruct %uint %uint %uint %uint %uint %uint %uint %uint
+%_ptr_PushConstant_RootConstants = OpTypePointer PushConstant %RootConstants
+  %registers = OpVariable %_ptr_PushConstant_RootConstants PushConstant
+     %v2uint = OpTypeVector %uint 2
+%_runtimearr_v2uint = OpTypeRuntimeArray %v2uint
+%SSBO_Offsets = OpTypeStruct %_runtimearr_v2uint
+%_ptr_StorageBuffer_SSBO_Offsets = OpTypePointer StorageBuffer %SSBO_Offsets
+         %13 = OpVariable %_ptr_StorageBuffer_SSBO_Offsets StorageBuffer
+      %float = OpTypeFloat 32
+         %15 = OpTypeImage %float Buffer 0 0 0 1 Unknown
+%_runtimearr_15 = OpTypeRuntimeArray %15
+%_ptr_UniformConstant__runtimearr_15 = OpTypePointer UniformConstant %_runtimearr_15
+         %18 = OpVariable %_ptr_UniformConstant__runtimearr_15 UniformConstant
+         %19 = OpTypeImage %float Buffer 0 0 0 2 R32f
+%_runtimearr_19 = OpTypeRuntimeArray %19
+%_ptr_UniformConstant__runtimearr_19 = OpTypePointer UniformConstant %_runtimearr_19
+         %22 = OpVariable %_ptr_UniformConstant__runtimearr_19 UniformConstant
+         %23 = OpTypeImage %uint Buffer 0 0 0 2 R32ui
+%_runtimearr_23 = OpTypeRuntimeArray %23
+%_ptr_UniformConstant__runtimearr_23 = OpTypePointer UniformConstant %_runtimearr_23
+         %26 = OpVariable %_ptr_UniformConstant__runtimearr_23 UniformConstant
+%_ptr_UniformConstant_23 = OpTypePointer UniformConstant %23
+%_ptr_PushConstant_uint = OpTypePointer PushConstant %uint
+     %uint_4 = OpConstant %uint 4
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
+     %uint_0 = OpConstant %uint 0
+%_ptr_UniformConstant_19 = OpTypePointer UniformConstant %19
+%_ptr_UniformConstant_15 = OpTypePointer UniformConstant %15
+     %uint_1 = OpConstant %uint 1
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%_ptr_Input_uint = OpTypePointer Input %uint
+       %bool = OpTypeBool
+%uint_4294967295 = OpConstant %uint 4294967295
+    %v4float = OpTypeVector %float 4
+  %uint_1024 = OpConstant %uint 1024
+  %uint_2048 = OpConstant %uint 2048
+%_ptr_Image_uint = OpTypePointer Image %uint
+    %uint_40 = OpConstant %uint 40
+    %uint_50 = OpConstant %uint 50
+    %uint_70 = OpConstant %uint 70
+       %main = OpFunction %void None %2
+          %4 = OpLabel
+               OpBranch %231
+        %231 = OpLabel
+         %30 = OpAccessChain %_ptr_PushConstant_uint %registers %uint_4
+         %32 = OpLoad %uint %30
+         %33 = OpIAdd %uint %32 %uint_2
+         %28 = OpAccessChain %_ptr_UniformConstant_23 %26 %33
+         %35 = OpLoad %23 %28
+         %36 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %33
+         %39 = OpAccessChain %_ptr_StorageBuffer_v2uint %13 %uint_0 %36
+         %41 = OpLoad %v2uint %39
+         %44 = OpAccessChain %_ptr_PushConstant_uint %registers %uint_4
+         %45 = OpLoad %uint %44
+         %43 = OpAccessChain %_ptr_UniformConstant_19 %22 %45
+         %46 = OpLoad %19 %43
+         %47 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %45
+         %48 = OpAccessChain %_ptr_StorageBuffer_v2uint %13 %uint_0 %47
+         %49 = OpLoad %v2uint %48
+         %52 = OpAccessChain %_ptr_PushConstant_uint %registers %uint_1
+         %54 = OpLoad %uint %52
+         %55 = OpIAdd %uint %54 %uint_1
+         %51 = OpAccessChain %_ptr_UniformConstant_15 %18 %55
+         %56 = OpLoad %15 %51
+         %57 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %55
+         %58 = OpAccessChain %_ptr_StorageBuffer_v2uint %13 %uint_0 %57
+         %59 = OpLoad %v2uint %58
+         %64 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %65 = OpLoad %uint %64
+         %66 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1
+         %67 = OpLoad %uint %66
+         %68 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2
+         %69 = OpLoad %uint %68
+         %70 = OpIAdd %uint %65 %uint_4
+         %71 = OpCompositeExtract %uint %49 0
+         %72 = OpCompositeExtract %uint %49 1
+         %73 = OpIAdd %uint %70 %71
+         %75 = OpULessThan %bool %70 %72
+         %76 = OpSelect %uint %75 %73 %uint_4294967295
+         %79 = OpImageRead %v4float %46 %76
+         %80 = OpCompositeExtract %float %79 0
+         %81 = OpCompositeExtract %float %79 1
+         %82 = OpCompositeExtract %float %79 2
+         %83 = OpCompositeExtract %float %79 3
+         %84 = OpIAdd %uint %65 %uint_1024
+         %86 = OpCompositeExtract %uint %49 0
+         %87 = OpCompositeExtract %uint %49 1
+         %88 = OpIAdd %uint %84 %86
+         %89 = OpULessThan %bool %84 %87
+         %90 = OpSelect %uint %89 %88 %uint_4294967295
+         %91 = OpCompositeConstruct %v4float %80 %81 %82 %83
+               OpImageWrite %46 %90 %91
+         %92 = OpIAdd %uint %65 %uint_2
+         %93 = OpCompositeExtract %uint %59 0
+         %94 = OpCompositeExtract %uint %59 1
+         %95 = OpIAdd %uint %92 %93
+         %96 = OpULessThan %bool %92 %94
+         %97 = OpSelect %uint %96 %95 %uint_4294967295
+         %98 = OpImageFetch %v4float %56 %97
+         %99 = OpCompositeExtract %float %98 0
+        %100 = OpCompositeExtract %float %98 1
+        %101 = OpCompositeExtract %float %98 2
+        %102 = OpCompositeExtract %float %98 3
+        %103 = OpIAdd %uint %65 %uint_2048
+        %105 = OpCompositeExtract %uint %49 0
+        %106 = OpCompositeExtract %uint %49 1
+        %107 = OpIAdd %uint %103 %105
+        %108 = OpULessThan %bool %103 %106
+        %109 = OpSelect %uint %108 %107 %uint_4294967295
+        %110 = OpCompositeConstruct %v4float %99 %100 %101 %102
+               OpImageWrite %46 %109 %110
+        %111 = OpCompositeExtract %uint %41 0
+        %112 = OpCompositeExtract %uint %41 1
+        %113 = OpIAdd %uint %65 %111
+        %114 = OpULessThan %bool %65 %112
+        %115 = OpSelect %uint %114 %113 %uint_4294967295
+        %117 = OpImageTexelPointer %_ptr_Image_uint %28 %115 %uint_0
+        %118 = OpAtomicIAdd %uint %117 %uint_1 %uint_0 %uint_40
+        %120 = OpCompositeExtract %uint %41 0
+        %121 = OpCompositeExtract %uint %41 1
+        %122 = OpIAdd %uint %67 %120
+        %123 = OpULessThan %bool %67 %121
+        %124 = OpSelect %uint %123 %122 %uint_4294967295
+        %125 = OpImageTexelPointer %_ptr_Image_uint %28 %124 %uint_0
+        %126 = OpAtomicCompareExchange %uint %125 %uint_1 %uint_0 %uint_0 %uint_50 %uint_40
+        %128 = OpCompositeExtract %uint %49 1
+        %129 = OpConvertUToF %float %128
+        %130 = OpCompositeExtract %uint %49 0
+        %131 = OpCompositeExtract %uint %49 1
+        %132 = OpIAdd %uint %uint_0 %130
+        %133 = OpULessThan %bool %uint_0 %131
+        %134 = OpSelect %uint %133 %132 %uint_4294967295
+        %135 = OpCompositeConstruct %v4float %129 %129 %129 %129
+               OpImageWrite %46 %134 %135
+        %136 = OpCompositeExtract %uint %59 1
+        %137 = OpConvertUToF %float %136
+        %138 = OpCompositeExtract %uint %49 0
+        %139 = OpCompositeExtract %uint %49 1
+        %140 = OpIAdd %uint %uint_1 %138
+        %141 = OpULessThan %bool %uint_1 %139
+        %142 = OpSelect %uint %141 %140 %uint_4294967295
+        %143 = OpCompositeConstruct %v4float %137 %137 %137 %137
+               OpImageWrite %46 %142 %143
+        %144 = OpIAdd %uint %69 %uint_0
+        %146 = OpAccessChain %_ptr_PushConstant_uint %registers %uint_4
+        %147 = OpLoad %uint %146
+        %148 = OpIAdd %uint %147 %144
+        %145 = OpAccessChain %_ptr_UniformConstant_19 %22 %148
+        %149 = OpLoad %19 %145
+        %150 = OpAccessChain %_ptr_StorageBuffer_v2uint %13 %uint_0 %148
+        %151 = OpLoad %v2uint %150
+        %152 = OpCompositeExtract %uint %151 0
+        %153 = OpCompositeExtract %uint %151 1
+        %154 = OpIAdd %uint %70 %152
+        %155 = OpULessThan %bool %70 %153
+        %156 = OpSelect %uint %155 %154 %uint_4294967295
+        %157 = OpImageRead %v4float %149 %156
+        %158 = OpCompositeExtract %float %157 0
+        %159 = OpCompositeExtract %float %157 1
+        %160 = OpCompositeExtract %float %157 2
+        %161 = OpCompositeExtract %float %157 3
+        %162 = OpCompositeExtract %uint %151 0
+        %163 = OpCompositeExtract %uint %151 1
+        %164 = OpIAdd %uint %84 %162
+        %165 = OpULessThan %bool %84 %163
+        %166 = OpSelect %uint %165 %164 %uint_4294967295
+        %167 = OpCompositeConstruct %v4float %158 %159 %160 %161
+               OpImageWrite %149 %166 %167
+        %168 = OpIAdd %uint %69 %uint_0
+        %170 = OpAccessChain %_ptr_PushConstant_uint %registers %uint_1
+        %171 = OpLoad %uint %170
+        %172 = OpIAdd %uint %171 %168
+        %169 = OpAccessChain %_ptr_UniformConstant_15 %18 %172
+        %173 = OpLoad %15 %169
+        %174 = OpAccessChain %_ptr_StorageBuffer_v2uint %13 %uint_0 %172
+        %175 = OpLoad %v2uint %174
+        %176 = OpCompositeExtract %uint %175 0
+        %177 = OpCompositeExtract %uint %175 1
+        %178 = OpIAdd %uint %70 %176
+        %179 = OpULessThan %bool %70 %177
+        %180 = OpSelect %uint %179 %178 %uint_4294967295
+        %181 = OpImageFetch %v4float %173 %180
+        %182 = OpCompositeExtract %float %181 0
+        %183 = OpCompositeExtract %float %181 1
+        %184 = OpCompositeExtract %float %181 2
+        %185 = OpCompositeExtract %float %181 3
+        %186 = OpCompositeExtract %uint %151 0
+        %187 = OpCompositeExtract %uint %151 1
+        %188 = OpIAdd %uint %103 %186
+        %189 = OpULessThan %bool %103 %187
+        %190 = OpSelect %uint %189 %188 %uint_4294967295
+        %191 = OpCompositeConstruct %v4float %182 %183 %184 %185
+               OpImageWrite %149 %190 %191
+        %192 = OpIAdd %uint %69 %uint_0
+        %194 = OpAccessChain %_ptr_PushConstant_uint %registers %uint_4
+        %195 = OpLoad %uint %194
+        %196 = OpIAdd %uint %195 %192
+        %193 = OpAccessChain %_ptr_UniformConstant_23 %26 %196
+        %197 = OpLoad %23 %193
+        %198 = OpAccessChain %_ptr_StorageBuffer_v2uint %13 %uint_0 %196
+        %199 = OpLoad %v2uint %198
+        %200 = OpCompositeExtract %uint %199 0
+        %201 = OpCompositeExtract %uint %199 1
+        %202 = OpIAdd %uint %67 %200
+        %203 = OpULessThan %bool %67 %201
+        %204 = OpSelect %uint %203 %202 %uint_4294967295
+        %205 = OpImageTexelPointer %_ptr_Image_uint %193 %204 %uint_0
+        %206 = OpAtomicIAdd %uint %205 %uint_1 %uint_0 %uint_40
+        %207 = OpCompositeExtract %uint %199 0
+        %208 = OpCompositeExtract %uint %199 1
+        %209 = OpIAdd %uint %67 %207
+        %210 = OpULessThan %bool %67 %208
+        %211 = OpSelect %uint %210 %209 %uint_4294967295
+        %212 = OpImageTexelPointer %_ptr_Image_uint %193 %211 %uint_0
+        %213 = OpAtomicCompareExchange %uint %212 %uint_1 %uint_0 %uint_0 %uint_70 %uint_40
+        %215 = OpCompositeExtract %uint %151 1
+        %216 = OpConvertUToF %float %215
+        %217 = OpCompositeExtract %uint %49 0
+        %218 = OpCompositeExtract %uint %49 1
+        %219 = OpIAdd %uint %uint_2 %217
+        %220 = OpULessThan %bool %uint_2 %218
+        %221 = OpSelect %uint %220 %219 %uint_4294967295
+        %222 = OpCompositeConstruct %v4float %216 %216 %216 %216
+               OpImageWrite %46 %221 %222
+        %223 = OpCompositeExtract %uint %175 1
+        %224 = OpConvertUToF %float %223
+        %225 = OpCompositeExtract %uint %49 0
+        %226 = OpCompositeExtract %uint %49 1
+        %227 = OpIAdd %uint %uint_3 %225
+        %228 = OpULessThan %bool %uint_3 %226
+        %229 = OpSelect %uint %228 %227 %uint_4294967295
+        %230 = OpCompositeConstruct %v4float %224 %224 %224 %224
+               OpImageWrite %46 %229 %230
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/shaders-no-opt/asm/comp/phi-temporary-copy-loop-variable.asm.invalid.comp
similarity index 100%
rename from shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
rename to shaders-no-opt/asm/comp/phi-temporary-copy-loop-variable.asm.invalid.comp
diff --git a/shaders-no-opt/asm/comp/ray-query-force-temporary-rtas.spv14.asm.vk.nocompat.comp b/shaders-no-opt/asm/comp/ray-query-force-temporary-rtas.spv14.asm.vk.nocompat.comp
new file mode 100644
index 00000000000..d3b746a6ec9
--- /dev/null
+++ b/shaders-no-opt/asm/comp/ray-query-force-temporary-rtas.spv14.asm.vk.nocompat.comp
@@ -0,0 +1,81 @@
+; SPIR-V
+; Version: 1.5
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 43
+; Schema: 0
+               OpCapability Shader
+               OpCapability RayQueryKHR
+               OpExtension "SPV_KHR_ray_query"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %_ %__0 %rq
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 460
+               OpSourceExtension "GL_EXT_ray_query"
+               OpSourceExtension "GL_EXT_ray_tracing"
+               OpName %main "main"
+               OpName %va "va"
+               OpName %Buf "Buf"
+               OpMemberName %Buf 0 "vas"
+               OpName %_ ""
+               OpName %Registers "Registers"
+               OpMemberName %Registers 0 "index"
+               OpName %__0 ""
+               OpName %rq "rq"
+               OpDecorate %_arr_v2uint_uint_1024 ArrayStride 8
+               OpMemberDecorate %Buf 0 NonWritable
+               OpMemberDecorate %Buf 0 Offset 0
+               OpDecorate %Buf Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpMemberDecorate %Registers 0 Offset 0
+               OpDecorate %Registers Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+     %v2uint = OpTypeVector %uint 2
+%_ptr_Function_v2uint = OpTypePointer Function %v2uint
+  %uint_1024 = OpConstant %uint 1024
+%_arr_v2uint_uint_1024 = OpTypeArray %v2uint %uint_1024
+        %Buf = OpTypeStruct %_arr_v2uint_uint_1024
+%_ptr_StorageBuffer_Buf = OpTypePointer StorageBuffer %Buf
+          %_ = OpVariable %_ptr_StorageBuffer_Buf StorageBuffer
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+  %Registers = OpTypeStruct %uint
+%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers
+        %__0 = OpVariable %_ptr_PushConstant_Registers PushConstant
+%_ptr_PushConstant_uint = OpTypePointer PushConstant %uint
+%_ptr_StorageBuffer_v2uint = OpTypePointer StorageBuffer %v2uint
+       %bool = OpTypeBool
+      %false = OpConstantFalse %bool
+         %32 = OpTypeRayQueryKHR
+%_ptr_Private_32 = OpTypePointer Private %32
+         %rq = OpVariable %_ptr_Private_32 Private
+         %36 = OpTypeAccelerationStructureKHR
+     %uint_0 = OpConstant %uint 0
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+    %float_0 = OpConstant %float 0
+         %42 = OpConstantComposite %v3float %float_0 %float_0 %float_0
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %va = OpVariable %_ptr_Function_v2uint Function
+               OpBranch %6
+          %6 = OpLabel
+               OpLoopMerge %8 %9 None
+               OpBranch %7
+          %7 = OpLabel
+         %25 = OpAccessChain %_ptr_PushConstant_uint %__0 %int_0
+         %26 = OpLoad %uint %25
+         %28 = OpAccessChain %_ptr_StorageBuffer_v2uint %_ %int_0 %26
+         %29 = OpLoad %v2uint %28
+               OpStore %va %29
+         %37 = OpConvertUToAccelerationStructureKHR %36 %29
+               OpBranch %9
+          %9 = OpLabel
+               OpBranchConditional %false %6 %8
+          %8 = OpLabel
+               OpRayQueryInitializeKHR %rq %37 %uint_0 %uint_0 %42 %float_0 %42 %float_0
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/ray-query-function-object.spv14.asm.vk.nocompat.comp b/shaders-no-opt/asm/comp/ray-query-function-object.spv14.asm.vk.nocompat.comp
new file mode 100644
index 00000000000..6f4538a9a33
--- /dev/null
+++ b/shaders-no-opt/asm/comp/ray-query-function-object.spv14.asm.vk.nocompat.comp
@@ -0,0 +1,39 @@
+               OpCapability Shader
+               OpCapability RayTracingKHR
+               OpCapability RayQueryKHR
+               OpExtension "SPV_KHR_ray_tracing"
+               OpExtension "SPV_KHR_ray_query"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %RTAS %gl_LocalInvocationIndex
+               OpExecutionMode %main LocalSize 64 1 1
+               OpSource GLSL 460
+               OpName %accelerationStructureNV "accelerationStructureNV"
+               OpName %RTAS "RTAS"
+               OpName %main "main"
+               OpName %rayQueryKHR "rayQueryKHR"
+               OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
+               OpDecorate %RTAS DescriptorSet 0
+               OpDecorate %RTAS Binding 0
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+   %uint_255 = OpConstant %uint 255
+%accelerationStructureNV = OpTypeAccelerationStructureKHR
+%_ptr_UniformConstant_accelerationStructureNV = OpTypePointer UniformConstant %accelerationStructureNV
+%_ptr_Input_uint = OpTypePointer Input %uint
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+%rayQueryKHR = OpTypeRayQueryKHR
+%_ptr_Function_rayQueryKHR = OpTypePointer Function %rayQueryKHR
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+       %RTAS = OpVariable %_ptr_UniformConstant_accelerationStructureNV UniformConstant
+%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
+         %16 = OpUndef %float
+         %17 = OpUndef %v3float
+       %main = OpFunction %void None %12
+         %18 = OpLabel
+         %19 = OpVariable %_ptr_Function_rayQueryKHR Function
+         %20 = OpLoad %accelerationStructureNV %RTAS
+               OpRayQueryInitializeKHR %19 %20 %uint_2 %uint_255 %17 %16 %17 %16
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp b/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp
new file mode 100644
index 00000000000..b4e622baced
--- /dev/null
+++ b/shaders-no-opt/asm/comp/spec-constant-name-aliasing.vk.asm.comp
@@ -0,0 +1,78 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 35
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values"
+               OpName %_ ""
+               OpName %gl_GlobalInvocationID "gl_GlobalInvocationID"
+               OpName %A "A"
+               OpName %B "A"
+               OpName %C "A"
+               OpName %D "A"
+               OpName %E "A"
+               OpName %F "A"
+               OpName %G "A"
+               OpName %H "A"
+               OpName %I "A"
+               OpName %J "A"
+               OpName %K "A"
+               OpName %L "A"
+               OpDecorate %_runtimearr_int ArrayStride 4
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %A SpecId 0
+               OpDecorate %B SpecId 1
+               OpDecorate %C SpecId 2
+               OpDecorate %D SpecId 3
+               OpDecorate %E SpecId 4
+               OpDecorate %F SpecId 5
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_runtimearr_int = OpTypeRuntimeArray %int
+       %SSBO = OpTypeStruct %_runtimearr_int
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+          %A = OpSpecConstant %int 0
+          %B = OpSpecConstant %int 1
+          %C = OpSpecConstant %int 2
+          %D = OpSpecConstant %int 3
+          %E = OpSpecConstant %int 4
+          %F = OpSpecConstant %int 5
+          %G = OpSpecConstantOp %int ISub %A %B
+          %H = OpSpecConstantOp %int ISub %G %C
+          %I = OpSpecConstantOp %int ISub %H %D
+          %J = OpSpecConstantOp %int ISub %I %E
+          %K = OpSpecConstantOp %int ISub %J %F
+		  %L = OpSpecConstantOp %int IAdd %K %F
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %19 = OpLoad %uint %18
+         %32 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19
+               OpStore %32 %L
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp b/shaders-no-opt/asm/comp/storage-buffer-basic.asm.comp
similarity index 95%
rename from shaders/asm/comp/storage-buffer-basic.invalid.asm.comp
rename to shaders-no-opt/asm/comp/storage-buffer-basic.asm.comp
index edb1a05e549..db9a8490df6 100644
--- a/shaders/asm/comp/storage-buffer-basic.invalid.asm.comp
+++ b/shaders-no-opt/asm/comp/storage-buffer-basic.asm.comp
@@ -4,9 +4,9 @@
 ; Bound: 31
 ; Schema: 0
                OpCapability Shader
-               OpCapability VariablePointers
+               ;OpCapability VariablePointers
                OpExtension "SPV_KHR_storage_buffer_storage_class"
-               OpExtension "SPV_KHR_variable_pointers"
+               ;OpExtension "SPV_KHR_variable_pointers"
                OpMemoryModel Logical GLSL450
                OpEntryPoint GLCompute %22 "main" %gl_WorkGroupID
                OpSource OpenCL_C 120
diff --git a/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp b/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp
new file mode 100644
index 00000000000..deaae421fdd
--- /dev/null
+++ b/shaders-no-opt/asm/comp/temorary-access-terminator.vk.nocompat.asm.comp
@@ -0,0 +1,86 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 55
+; Schema: 0
+               OpCapability Shader
+               OpCapability SampledBuffer
+               OpCapability ImageBuffer
+               OpCapability GroupNonUniform
+               OpCapability GroupNonUniformBallot
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+               OpExecutionMode %main LocalSize 64 1 1
+               OpName %main "main"
+               OpName %WaveMatch "WaveMatch"
+               OpDecorate %8 DescriptorSet 0
+               OpDecorate %8 Binding 0
+               OpDecorate %11 DescriptorSet 0
+               OpDecorate %11 Binding 0
+               OpDecorate %11 NonReadable
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+       %void = OpTypeVoid
+          %2 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+          %6 = OpTypeImage %uint Buffer 0 0 0 1 Unknown
+%_ptr_UniformConstant_6 = OpTypePointer UniformConstant %6
+          %8 = OpVariable %_ptr_UniformConstant_6 UniformConstant
+          %9 = OpTypeImage %uint Buffer 0 0 0 2 R32ui
+%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9
+         %11 = OpVariable %_ptr_UniformConstant_9 UniformConstant
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%_ptr_Input_uint = OpTypePointer Input %uint
+     %uint_0 = OpConstant %uint 0
+     %v4uint = OpTypeVector %uint 4
+         %24 = OpTypeFunction %v4uint %uint
+     %uint_3 = OpConstant %uint 3
+       %bool = OpTypeBool
+     %uint_4 = OpConstant %uint 4
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %main = OpFunction %void None %2
+          %4 = OpLabel
+               OpBranch %53
+         %53 = OpLabel
+         %12 = OpLoad %9 %11
+         %13 = OpLoad %6 %8
+         %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %20 = OpLoad %uint %18
+         %22 = OpImageFetch %v4uint %13 %20
+         %23 = OpCompositeExtract %uint %22 0
+         %37 = OpFunctionCall %v4uint %WaveMatch %23
+         %38 = OpCompositeExtract %uint %37 0
+         %39 = OpCompositeExtract %uint %37 1
+         %40 = OpCompositeExtract %uint %37 2
+         %41 = OpCompositeExtract %uint %37 3
+         %42 = OpIMul %uint %20 %uint_4
+         %44 = OpCompositeConstruct %v4uint %38 %38 %38 %38
+               OpImageWrite %12 %42 %44
+         %45 = OpCompositeConstruct %v4uint %39 %39 %39 %39
+         %46 = OpIAdd %uint %42 %uint_1
+               OpImageWrite %12 %46 %45
+         %48 = OpCompositeConstruct %v4uint %40 %40 %40 %40
+         %49 = OpIAdd %uint %42 %uint_2
+               OpImageWrite %12 %49 %48
+         %51 = OpCompositeConstruct %v4uint %41 %41 %41 %41
+         %52 = OpIAdd %uint %42 %uint_3
+               OpImageWrite %12 %52 %51
+               OpReturn
+               OpFunctionEnd
+  %WaveMatch = OpFunction %v4uint None %24
+         %25 = OpFunctionParameter %uint
+         %27 = OpLabel
+               OpBranch %28
+         %28 = OpLabel
+               OpLoopMerge %30 %29 None
+               OpBranch %29
+         %29 = OpLabel
+         %31 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %25
+         %34 = OpIEqual %bool %25 %31
+         %35 = OpGroupNonUniformBallot %v4uint %uint_3 %34
+               OpBranchConditional %34 %30 %28
+         %30 = OpLabel
+               OpReturnValue %35
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/degenerate-selection-constructs.asm.frag b/shaders-no-opt/asm/degenerate-selection-constructs.asm.frag
new file mode 100644
index 00000000000..eac8fadf74d
--- /dev/null
+++ b/shaders-no-opt/asm/degenerate-selection-constructs.asm.frag
@@ -0,0 +1,310 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 816
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord %_GLF_color
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 320
+               OpName %main "main"
+               OpName %checkSwap_f1_f1_ "checkSwap(f1;f1;"
+               OpName %a "a"
+               OpName %b "b"
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %buf1 "buf1"
+               OpMemberName %buf1 0 "resolution"
+               OpName %_ ""
+               OpName %i "i"
+               OpName %data "data"
+               OpName %buf0 "buf0"
+               OpMemberName %buf0 0 "injectionSwitch"
+               OpName %__0 ""
+               OpName %i_0 "i"
+               OpName %j "j"
+               OpName %doSwap "doSwap"
+               OpName %param "param"
+               OpName %param_0 "param"
+               OpName %temp "temp"
+               OpName %_GLF_color "_GLF_color"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpMemberDecorate %buf1 0 Offset 0
+               OpDecorate %buf1 Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpMemberDecorate %buf0 0 Offset 0
+               OpDecorate %buf0 Block
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+               OpDecorate %_GLF_color Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+       %bool = OpTypeBool
+          %9 = OpTypeFunction %bool %_ptr_Function_float %_ptr_Function_float
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_ptr_Input_float = OpTypePointer Input %float
+    %v2float = OpTypeVector %float 2
+       %buf1 = OpTypeStruct %v2float
+%_ptr_Uniform_buf1 = OpTypePointer Uniform %buf1
+          %_ = OpVariable %_ptr_Uniform_buf1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+    %float_2 = OpConstant %float 2
+%_ptr_Function_bool = OpTypePointer Function %bool
+%_ptr_Function_int = OpTypePointer Function %int
+     %int_10 = OpConstant %int 10
+    %uint_10 = OpConstant %uint 10
+%_arr_float_uint_10 = OpTypeArray %float %uint_10
+%_ptr_Function__arr_float_uint_10 = OpTypePointer Function %_arr_float_uint_10
+       %buf0 = OpTypeStruct %v2float
+%_ptr_Uniform_buf0 = OpTypePointer Uniform %buf0
+        %__0 = OpVariable %_ptr_Uniform_buf0 Uniform
+      %int_1 = OpConstant %int 1
+      %int_9 = OpConstant %int 9
+     %uint_0 = OpConstant %uint 0
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+ %_GLF_color = OpVariable %_ptr_Output_v4float Output
+   %float_10 = OpConstant %float 10
+      %int_5 = OpConstant %int 5
+    %float_1 = OpConstant %float 1
+    %float_0 = OpConstant %float 0
+      %false = OpConstantFalse %bool
+       %true = OpConstantTrue %bool
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+       %data = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %i_0 = OpVariable %_ptr_Function_int Function
+          %j = OpVariable %_ptr_Function_int Function
+     %doSwap = OpVariable %_ptr_Function_bool Function
+      %param = OpVariable %_ptr_Function_float Function
+    %param_0 = OpVariable %_ptr_Function_float Function
+       %temp = OpVariable %_ptr_Function_float Function
+               OpStore %i %int_0
+               OpBranch %50
+         %50 = OpLabel
+               OpLoopMerge %52 %53 None
+               OpBranch %54
+         %54 = OpLabel
+         %55 = OpLoad %int %i
+         %57 = OpSLessThan %bool %55 %int_10
+               OpBranchConditional %57 %51 %52
+         %51 = OpLabel
+         %62 = OpLoad %int %i
+         %63 = OpLoad %int %i
+         %64 = OpISub %int %int_10 %63
+         %65 = OpConvertSToF %float %64
+         %69 = OpAccessChain %_ptr_Uniform_float %__0 %int_0 %uint_1
+         %70 = OpLoad %float %69
+         %71 = OpFMul %float %65 %70
+         %72 = OpAccessChain %_ptr_Function_float %data %62
+               OpStore %72 %71
+               OpBranch %53
+         %53 = OpLabel
+         %73 = OpLoad %int %i
+         %75 = OpIAdd %int %73 %int_1
+               OpStore %i %75
+               OpBranch %50
+         %52 = OpLabel
+               OpStore %i_0 %int_0
+               OpBranch %77
+         %77 = OpLabel
+               OpLoopMerge %79 %80 None
+               OpBranch %81
+         %81 = OpLabel
+         %82 = OpLoad %int %i_0
+         %84 = OpSLessThan %bool %82 %int_9
+               OpBranchConditional %84 %78 %79
+         %78 = OpLabel
+               OpStore %j %int_0
+               OpBranch %86
+         %86 = OpLabel
+               OpLoopMerge %88 %89 None
+               OpBranch %90
+         %90 = OpLabel
+         %91 = OpLoad %int %j
+         %92 = OpSLessThan %bool %91 %int_10
+               OpBranchConditional %92 %87 %88
+         %87 = OpLabel
+         %93 = OpLoad %int %j
+         %94 = OpLoad %int %i_0
+         %95 = OpIAdd %int %94 %int_1
+         %96 = OpSLessThan %bool %93 %95
+               OpSelectionMerge %98 None
+               OpBranchConditional %96 %97 %98
+         %97 = OpLabel
+               OpBranch %89
+         %98 = OpLabel
+        %101 = OpLoad %int %i_0
+        %102 = OpLoad %int %j
+        %104 = OpAccessChain %_ptr_Function_float %data %101
+        %105 = OpLoad %float %104
+               OpStore %param %105
+        %107 = OpAccessChain %_ptr_Function_float %data %102
+        %108 = OpLoad %float %107
+               OpStore %param_0 %108
+        %109 = OpFunctionCall %bool %checkSwap_f1_f1_ %param %param_0
+               OpStore %doSwap %109
+        %110 = OpLoad %bool %doSwap
+               OpSelectionMerge %112 None
+               OpBranchConditional %110 %111 %112
+        %111 = OpLabel
+        %114 = OpLoad %int %i_0
+        %115 = OpAccessChain %_ptr_Function_float %data %114
+        %116 = OpLoad %float %115
+               OpStore %temp %116
+        %117 = OpLoad %int %i_0
+        %118 = OpLoad %int %j
+        %119 = OpAccessChain %_ptr_Function_float %data %118
+        %120 = OpLoad %float %119
+        %121 = OpAccessChain %_ptr_Function_float %data %117
+               OpStore %121 %120
+        %122 = OpLoad %int %j
+        %123 = OpLoad %float %temp
+        %124 = OpAccessChain %_ptr_Function_float %data %122
+               OpStore %124 %123
+               OpBranch %112
+        %112 = OpLabel
+               OpBranch %89
+         %89 = OpLabel
+        %125 = OpLoad %int %j
+        %126 = OpIAdd %int %125 %int_1
+               OpStore %j %126
+               OpBranch %86
+         %88 = OpLabel
+               OpBranch %80
+         %80 = OpLabel
+        %127 = OpLoad %int %i_0
+        %128 = OpIAdd %int %127 %int_1
+               OpStore %i_0 %128
+               OpBranch %77
+         %79 = OpLabel
+        %130 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+        %131 = OpLoad %float %130
+        %132 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %uint_0
+        %133 = OpLoad %float %132
+        %134 = OpFDiv %float %133 %float_2
+        %135 = OpFOrdLessThan %bool %131 %134
+               OpSelectionMerge %137 None
+               OpBranchConditional %135 %136 %153
+        %136 = OpLabel
+        %140 = OpAccessChain %_ptr_Function_float %data %int_0
+        %141 = OpLoad %float %140
+        %143 = OpFDiv %float %141 %float_10
+        %145 = OpAccessChain %_ptr_Function_float %data %int_5
+        %146 = OpLoad %float %145
+        %147 = OpFDiv %float %146 %float_10
+        %148 = OpAccessChain %_ptr_Function_float %data %int_9
+        %149 = OpLoad %float %148
+        %150 = OpFDiv %float %149 %float_10
+        %152 = OpCompositeConstruct %v4float %143 %147 %150 %float_1
+               OpStore %_GLF_color %152
+               OpBranch %137
+        %153 = OpLabel
+        %154 = OpAccessChain %_ptr_Function_float %data %int_5
+        %155 = OpLoad %float %154
+        %156 = OpFDiv %float %155 %float_10
+        %157 = OpAccessChain %_ptr_Function_float %data %int_9
+        %158 = OpLoad %float %157
+        %159 = OpFDiv %float %158 %float_10
+        %160 = OpAccessChain %_ptr_Function_float %data %int_0
+        %161 = OpLoad %float %160
+        %162 = OpFDiv %float %161 %float_10
+        %163 = OpCompositeConstruct %v4float %156 %159 %162 %float_1
+               OpStore %_GLF_color %163
+               OpBranch %137
+        %137 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%checkSwap_f1_f1_ = OpFunction %bool None %9
+          %a = OpFunctionParameter %_ptr_Function_float
+          %b = OpFunctionParameter %_ptr_Function_float
+         %13 = OpLabel
+         %35 = OpVariable %_ptr_Function_bool Function
+         %20 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_1
+         %21 = OpLoad %float %20
+         %29 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %uint_1
+         %30 = OpLoad %float %29
+         %32 = OpFDiv %float %30 %float_2
+         %33 = OpFOrdLessThan %bool %21 %32
+               OpBranch %36
+         %36 = OpLabel
+               OpSelectionMerge %351 None
+               OpBranchConditional %33 %352 %354
+        %352 = OpLabel
+        %353 = OpLoad %float %a
+               OpBranch %351
+        %354 = OpLabel
+        %355 = OpCopyObject %float %float_0
+               OpBranch %351
+        %351 = OpLabel
+         %38 = OpPhi %float %353 %352 %355 %354
+               OpSelectionMerge %386 None
+               OpBranchConditional %false %385 %385
+        %385 = OpLabel
+               OpSelectionMerge %356 None
+               OpBranchConditional %33 %357 %359
+        %357 = OpLabel
+        %358 = OpLoad %float %b
+               OpBranch %356
+        %359 = OpLabel
+        %360 = OpCopyObject %float %float_0
+               OpBranch %356
+        %356 = OpLabel
+         %39 = OpPhi %float %358 %357 %360 %359
+         %40 = OpFOrdGreaterThan %bool %38 %39
+               OpBranch %362
+        %362 = OpLabel
+               OpSelectionMerge %479 None
+               OpBranchConditional %33 %480 %479
+        %480 = OpLabel
+               OpStore %35 %40
+               OpBranch %479
+        %479 = OpLabel
+               OpBranchConditional %true %361 %386
+        %361 = OpLabel
+               OpBranch %386
+        %386 = OpLabel
+               OpBranch %41
+         %41 = OpLabel
+               OpSelectionMerge %363 None
+               OpBranchConditional %33 %366 %364
+        %364 = OpLabel
+        %365 = OpLoad %float %a
+               OpBranch %363
+        %366 = OpLabel
+        %367 = OpCopyObject %float %float_0
+               OpBranch %363
+        %363 = OpLabel
+         %42 = OpPhi %float %365 %364 %367 %366
+               OpSelectionMerge %368 None
+               OpBranchConditional %33 %371 %369
+        %369 = OpLabel
+        %370 = OpLoad %float %b
+               OpBranch %368
+        %371 = OpLabel
+        %372 = OpCopyObject %float %float_0
+               OpBranch %368
+        %368 = OpLabel
+         %43 = OpPhi %float %370 %369 %372 %371
+         %44 = OpFOrdLessThan %bool %42 %43
+               OpSelectionMerge %373 None
+               OpBranchConditional %33 %373 %374
+        %374 = OpLabel
+               OpStore %35 %44
+               OpBranch %373
+        %373 = OpLabel
+               OpBranch %37
+         %37 = OpLabel
+         %45 = OpLoad %bool %35
+               OpReturnValue %45
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag b/shaders-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag
new file mode 100644
index 00000000000..6782b124730
--- /dev/null
+++ b/shaders-no-opt/asm/frag/anonymous-inner-struct-names.asm.frag
@@ -0,0 +1,83 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %_
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpMemberName %AA 0 "foo"
+               OpMemberName %AB 0 "foo"
+               OpMemberName %A 0 "_aa"
+               OpMemberName %A 1 "ab"
+               OpMemberName %BA 0 "foo"
+               OpMemberName %BB 0 "foo"
+               OpMemberName %B 0 "_ba"
+               OpMemberName %B 1 "bb"
+               OpName %VertexData "VertexData"
+               OpMemberName %VertexData 0 "_a"
+               OpMemberName %VertexData 1 "b"
+               OpName %_ ""
+               OpMemberName %CA 0 "foo"
+               OpMemberName %C 0 "_ca"
+               OpMemberName %DA 0 "foo"
+               OpMemberName %D 0 "da"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "_c"
+               OpMemberName %UBO 1 "d"
+               OpName %__0 ""
+               OpMemberName %E 0 "a"
+               OpName %SSBO "SSBO"
+               ;OpMemberName %SSBO 0 "e" Test that we don't try to assign bogus aliases.
+               OpMemberName %SSBO 1 "_e"
+               OpMemberName %SSBO 2 "f"
+               OpName %__1 ""
+               OpDecorate %VertexData Block
+               OpDecorate %_ Location 0
+               OpMemberDecorate %CA 0 Offset 0
+               OpMemberDecorate %C 0 Offset 0
+               OpMemberDecorate %DA 0 Offset 0
+               OpMemberDecorate %D 0 Offset 0
+               OpMemberDecorate %UBO 0 Offset 0
+               OpMemberDecorate %UBO 1 Offset 16
+               OpDecorate %UBO Block
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+               OpMemberDecorate %E 0 Offset 0
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpMemberDecorate %SSBO 2 Offset 8
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %__1 DescriptorSet 0
+               OpDecorate %__1 Binding 1
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+         %AA = OpTypeStruct %int
+         %AB = OpTypeStruct %int
+          %A = OpTypeStruct %AA %AB
+         %BA = OpTypeStruct %int
+         %BB = OpTypeStruct %int
+          %B = OpTypeStruct %BA %BB
+ %VertexData = OpTypeStruct %A %B
+%_ptr_Input_VertexData = OpTypePointer Input %VertexData
+          %_ = OpVariable %_ptr_Input_VertexData Input
+         %CA = OpTypeStruct %int
+          %C = OpTypeStruct %CA
+         %DA = OpTypeStruct %int
+          %D = OpTypeStruct %DA
+        %UBO = OpTypeStruct %C %D
+%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO
+        %__0 = OpVariable %_ptr_Uniform_UBO Uniform
+          %E = OpTypeStruct %int
+       %SSBO = OpTypeStruct %E %E %E
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+        %__1 = OpVariable %_ptr_Uniform_SSBO Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/array-builtin-bitcast-load-store.asm.frag b/shaders-no-opt/asm/frag/array-builtin-bitcast-load-store.asm.frag
new file mode 100644
index 00000000000..387764c92c8
--- /dev/null
+++ b/shaders-no-opt/asm/frag/array-builtin-bitcast-load-store.asm.frag
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google Tint Compiler; 0
+; Bound: 29
+; Schema: 0
+                                OpCapability Shader
+                                OpMemoryModel Logical GLSL450
+                                OpEntryPoint Fragment %main "main" %fragColor %gl_SampleMask
+                                OpExecutionMode %main OriginUpperLeft
+                                OpName %fragColor "fragColor"
+                                OpName %uBuffer "uBuffer"
+                                OpMemberName %uBuffer 0 "color"
+                                OpName %x_12 "x_12"
+                                OpName %gl_SampleMask "gl_SampleMask"
+                                OpName %main "main"
+                                OpDecorate %fragColor Location 0
+                                OpDecorate %uBuffer Block
+                                OpMemberDecorate %uBuffer 0 Offset 0
+                                OpDecorate %x_12 DescriptorSet 0
+                                OpDecorate %x_12 Binding 0
+                                OpDecorate %gl_SampleMask BuiltIn SampleMask
+                       %float = OpTypeFloat 32
+                     %v4float = OpTypeVector %float 4
+         %_ptr_Output_v4float = OpTypePointer Output %v4float
+                           %5 = OpConstantNull %v4float
+                   %fragColor = OpVariable %_ptr_Output_v4float Output %5
+                     %uBuffer = OpTypeStruct %v4float
+        %_ptr_Uniform_uBuffer = OpTypePointer Uniform %uBuffer
+                        %x_12 = OpVariable %_ptr_Uniform_uBuffer Uniform
+                        %uint = OpTypeInt 32 0
+                      %uint_1 = OpConstant %uint 1
+            %_arr_uint_uint_1 = OpTypeArray %uint %uint_1
+%_ptr_Output__arr_uint_uint_1 = OpTypePointer Output %_arr_uint_uint_1
+                          %14 = OpConstantNull %_arr_uint_uint_1
+               %gl_SampleMask = OpVariable %_ptr_Output__arr_uint_uint_1 Output %14
+                        %void = OpTypeVoid
+                          %15 = OpTypeFunction %void
+                      %uint_0 = OpConstant %uint 0
+        %_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+                         %int = OpTypeInt 32 1
+                       %int_0 = OpConstant %int 0
+            %_ptr_Output_uint = OpTypePointer Output %uint
+                       %int_6 = OpConstant %int 6
+                        %main = OpFunction %void None %15
+                          %18 = OpLabel
+                          %21 = OpAccessChain %_ptr_Uniform_v4float %x_12 %uint_0
+                          %22 = OpLoad %v4float %21
+                                OpStore %fragColor %22
+                          %26 = OpAccessChain %_ptr_Output_uint %gl_SampleMask %int_0
+                          %27 = OpBitcast %uint %int_6
+                                OpStore %26 %27
+                            %loaded_scalar = OpLoad %uint %26
+								OpStore %26 %loaded_scalar
+                             %loaded = OpLoad %_arr_uint_uint_1 %gl_SampleMask
+                                OpStore %gl_SampleMask %loaded
+                                OpReturn
+                                OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/collapsed-switch-phi-flush.asm.frag b/shaders-no-opt/asm/frag/collapsed-switch-phi-flush.asm.frag
new file mode 100644
index 00000000000..d5a07b5497e
--- /dev/null
+++ b/shaders-no-opt/asm/frag/collapsed-switch-phi-flush.asm.frag
@@ -0,0 +1,38 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 20
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vIndex
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %vIndex "vIndex"
+               OpDecorate %FragColor Location 0
+               OpDecorate %vIndex Flat
+               OpDecorate %vIndex Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+         %15 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Input_int = OpTypePointer Input %int
+     %vIndex = OpVariable %_ptr_Input_int Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpSelectionMerge %9 None
+               OpSwitch %int_0 %9
+          %9 = OpLabel
+		  %tmp = OpPhi %v4float %15 %5
+               OpStore %FragColor %tmp
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/combined-image-sampler-dxc-min16float.asm.invalid.frag b/shaders-no-opt/asm/frag/combined-image-sampler-dxc-min16float.asm.invalid.frag
new file mode 100644
index 00000000000..dda2f0279ca
--- /dev/null
+++ b/shaders-no-opt/asm/frag/combined-image-sampler-dxc-min16float.asm.invalid.frag
@@ -0,0 +1,95 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 48
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %PSMain "main" %in_var_COLOR %in_var_TEXCOORD0 %out_var_SV_TARGET
+               OpExecutionMode %PSMain OriginUpperLeft
+			   ; Not actually ESSL, but makes testing easier.
+               OpSource ESSL 310 
+               OpName %type_2d_image "type.2d.image"
+               OpName %tex "tex"
+               OpName %type_sampler "type.sampler"
+               OpName %Samp "Samp"
+               OpName %in_var_COLOR "in.var.COLOR"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %out_var_SV_TARGET "out.var.SV_TARGET"
+               OpName %PSMain "PSMain"
+               OpName %PSInput "PSInput"
+               OpMemberName %PSInput 0 "color"
+               OpMemberName %PSInput 1 "uv"
+               OpName %param_var_input "param.var.input"
+               OpName %src_PSMain "src.PSMain"
+               OpName %input "input"
+               OpName %bb_entry "bb.entry"
+               OpName %a "a"
+               OpName %type_sampled_image "type.sampled.image"
+               OpDecorate %in_var_COLOR Location 0
+               OpDecorate %in_var_TEXCOORD0 Location 1
+               OpDecorate %out_var_SV_TARGET Location 0
+               OpDecorate %tex DescriptorSet 0
+               OpDecorate %tex Binding 0
+               OpDecorate %Samp DescriptorSet 0
+               OpDecorate %Samp Binding 1
+               OpDecorate %tex RelaxedPrecision
+               OpDecorate %a RelaxedPrecision
+               OpDecorate %38 RelaxedPrecision
+               OpDecorate %45 RelaxedPrecision
+               OpDecorate %47 RelaxedPrecision
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+      %float = OpTypeFloat 32
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %21 = OpTypeFunction %void
+    %PSInput = OpTypeStruct %v4float %v2float
+%_ptr_Function_PSInput = OpTypePointer Function %PSInput
+         %31 = OpTypeFunction %v4float %_ptr_Function_PSInput
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%type_sampled_image = OpTypeSampledImage %type_2d_image
+        %tex = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+       %Samp = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%in_var_COLOR = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input_v2float Input
+%out_var_SV_TARGET = OpVariable %_ptr_Output_v4float Output
+     %PSMain = OpFunction %void None %21
+         %22 = OpLabel
+%param_var_input = OpVariable %_ptr_Function_PSInput Function
+         %26 = OpLoad %v4float %in_var_COLOR
+         %27 = OpLoad %v2float %in_var_TEXCOORD0
+         %28 = OpCompositeConstruct %PSInput %26 %27
+               OpStore %param_var_input %28
+         %29 = OpFunctionCall %v4float %src_PSMain %param_var_input
+               OpStore %out_var_SV_TARGET %29
+               OpReturn
+               OpFunctionEnd
+ %src_PSMain = OpFunction %v4float None %31
+      %input = OpFunctionParameter %_ptr_Function_PSInput
+   %bb_entry = OpLabel
+          %a = OpVariable %_ptr_Function_v4float Function
+         %36 = OpAccessChain %_ptr_Function_v4float %input %int_0
+         %37 = OpLoad %v4float %36
+         %38 = OpLoad %type_2d_image %tex
+         %39 = OpLoad %type_sampler %Samp
+         %41 = OpAccessChain %_ptr_Function_v2float %input %int_1
+         %42 = OpLoad %v2float %41
+         %44 = OpSampledImage %type_sampled_image %38 %39
+         %45 = OpImageSampleImplicitLod %v4float %44 %42 None
+         %46 = OpFMul %v4float %37 %45
+               OpStore %a %46
+         %47 = OpLoad %v4float %a
+               OpReturnValue %47
+               OpFunctionEnd
+
diff --git a/shaders-no-opt/asm/frag/complex-opaque-handle-reuse-in-loop.asm.frag b/shaders-no-opt/asm/frag/complex-opaque-handle-reuse-in-loop.asm.frag
new file mode 100644
index 00000000000..ccb7a60fe71
--- /dev/null
+++ b/shaders-no-opt/asm/frag/complex-opaque-handle-reuse-in-loop.asm.frag
@@ -0,0 +1,120 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 71
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %ps_main "main" %out_var_SV_TARGET1
+               OpExecutionMode %ps_main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_scene "type.scene"
+               OpMemberName %type_scene 0 "myConsts"
+               OpName %MyConsts "MyConsts"
+               OpMemberName %MyConsts 0 "opt"
+               OpName %scene "scene"
+               OpName %type_sampler "type.sampler"
+               OpName %mySampler "mySampler"
+               OpName %type_2d_image "type.2d.image"
+               OpName %texTable "texTable"
+               OpName %out_var_SV_TARGET1 "out.var.SV_TARGET1"
+               OpName %ps_main "ps_main"
+               OpName %type_sampled_image "type.sampled.image"
+               OpDecorate %out_var_SV_TARGET1 Location 1
+               OpDecorate %scene DescriptorSet 0
+               OpDecorate %scene Binding 3
+               OpDecorate %mySampler DescriptorSet 0
+               OpDecorate %mySampler Binding 2
+               OpDecorate %texTable DescriptorSet 0
+               OpDecorate %texTable Binding 0
+               OpMemberDecorate %MyConsts 0 Offset 0
+               OpMemberDecorate %type_scene 0 Offset 0
+               OpDecorate %type_scene Block
+      %float = OpTypeFloat 32
+    %float_1 = OpConstant %float 1
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+       %uint = OpTypeInt 32 0
+%uint_16777215 = OpConstant %uint 16777215
+     %uint_0 = OpConstant %uint 0
+    %float_0 = OpConstant %float 0
+         %21 = OpConstantComposite %v2float %float_0 %float_0
+   %MyConsts = OpTypeStruct %uint
+ %type_scene = OpTypeStruct %MyConsts
+%_ptr_Uniform_type_scene = OpTypePointer Uniform %type_scene
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+     %uint_1 = OpConstant %uint 1
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_arr_type_2d_image_uint_1 = OpTypeArray %type_2d_image %uint_1
+%_ptr_UniformConstant__arr_type_2d_image_uint_1 = OpTypePointer UniformConstant %_arr_type_2d_image_uint_1
+%_ptr_Output_uint = OpTypePointer Output %uint
+       %void = OpTypeVoid
+         %29 = OpTypeFunction %void
+     %v4uint = OpTypeVector %uint 4
+    %v3float = OpTypeVector %float 3
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+       %bool = OpTypeBool
+%type_sampled_image = OpTypeSampledImage %type_2d_image
+    %v4float = OpTypeVector %float 4
+      %scene = OpVariable %_ptr_Uniform_type_scene Uniform
+  %mySampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+   %texTable = OpVariable %_ptr_UniformConstant__arr_type_2d_image_uint_1 UniformConstant
+%out_var_SV_TARGET1 = OpVariable %_ptr_Output_uint Output
+   %float_n1 = OpConstant %float -1
+         %37 = OpUndef %v4uint
+    %ps_main = OpFunction %void None %29
+         %38 = OpLabel
+               OpSelectionMerge %39 None
+               OpSwitch %uint_0 %40
+         %40 = OpLabel
+         %41 = OpCompositeExtract %uint %37 1
+         %42 = OpBitwiseAnd %uint %41 %uint_16777215
+         %43 = OpAccessChain %_ptr_UniformConstant_type_2d_image %texTable %42
+         %44 = OpLoad %type_2d_image %43
+         %45 = OpAccessChain %_ptr_Uniform_uint %scene %int_0 %int_0
+         %46 = OpLoad %uint %45
+         %47 = OpINotEqual %bool %46 %uint_0
+               OpSelectionMerge %48 DontFlatten
+               OpBranchConditional %47 %49 %50
+         %50 = OpLabel
+         %51 = OpLoad %type_sampler %mySampler
+         %52 = OpSampledImage %type_sampled_image %44 %51
+         %53 = OpImageSampleExplicitLod %v4float %52 %21 Lod %float_0
+         %54 = OpCompositeExtract %float %53 0
+               OpBranch %39
+         %49 = OpLabel
+               OpBranch %39
+         %48 = OpLabel
+               OpUnreachable
+         %39 = OpLabel
+         %55 = OpPhi %float %54 %50 %float_1 %49
+         %56 = OpCompositeConstruct %v3float %float_n1 %float_n1 %55
+               OpSelectionMerge %57 None
+               OpSwitch %uint_0 %58
+         %58 = OpLabel
+               OpSelectionMerge %59 DontFlatten
+               OpBranchConditional %47 %60 %61
+         %61 = OpLabel
+         %62 = OpLoad %type_sampler %mySampler
+         %63 = OpSampledImage %type_sampled_image %44 %62
+         %64 = OpImageSampleExplicitLod %v4float %63 %21 Lod %float_0
+         %65 = OpCompositeExtract %float %64 0
+               OpBranch %57
+         %60 = OpLabel
+               OpBranch %57
+         %59 = OpLabel
+               OpUnreachable
+         %57 = OpLabel
+         %66 = OpPhi %float %65 %61 %float_1 %60
+         %67 = OpCompositeConstruct %v3float %float_1 %float_1 %66
+         %68 = OpExtInst %v3float %1 Cross %56 %67
+         %69 = OpCompositeExtract %float %68 0
+         %70 = OpConvertFToU %uint %69
+               OpStore %out_var_SV_TARGET1 %70
+               OpReturn
+               OpFunctionEnd
+
diff --git a/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-1.asm.frag b/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-1.asm.frag
new file mode 100644
index 00000000000..050a3385307
--- /dev/null
+++ b/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-1.asm.frag
@@ -0,0 +1,80 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 65
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values0"
+               OpName %_ ""
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %__0 ""
+               OpName %FragColor "FragColor"
+               OpDecorate %_runtimearr_float ArrayStride 4
+               OpMemberDecorate %SSBO 0 NonWritable
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %_runtimearr_float_0 ArrayStride 4
+               OpMemberDecorate %SSBO1 0 NonWritable
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 1
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %float_0 = OpConstant %float 0
+         %11 = OpConstantComposite %v2float %float_0 %float_0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %int_16 = OpConstant %int 16
+       %bool = OpTypeBool
+%_runtimearr_float = OpTypeRuntimeArray %float
+       %SSBO = OpTypeStruct %_runtimearr_float
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_runtimearr_float_0 = OpTypeRuntimeArray %float
+      %SSBO1 = OpTypeStruct %_runtimearr_float_0
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+        %__0 = OpVariable %_ptr_Uniform_SSBO1 Uniform
+      %int_1 = OpConstant %int 1
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+  %FragColor = OpVariable %_ptr_Output_v2float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpBranch %17
+         %17 = OpLabel
+         %61 = OpPhi %v2float %11 %5 %d %18
+         %60 = OpPhi %int %int_0 %5 %49 %18
+         %25 = OpSLessThan %bool %60 %int_16
+               OpLoopMerge %19 %18 None
+               OpBranchConditional %25 %pre18 %19
+	   %pre18 = OpLabel
+			   OpBranch %18
+         %18 = OpLabel
+         %32 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %60
+         %43 = OpAccessChain %_ptr_Uniform_float %__0 %int_0 %60
+         %33 = OpLoad %float %32
+         %44 = OpLoad %float %43
+		 %a = OpFMul %v2float %61 %61
+         %b = OpCompositeInsert %v2float %33 %a 0
+         %c = OpCompositeInsert %v2float %44 %b 1
+		 %d = OpFAdd %v2float %61 %c
+         %49 = OpIAdd %int %60 %int_1
+               OpBranch %17
+         %19 = OpLabel
+               OpStore %FragColor %61
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-2.asm.frag b/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-2.asm.frag
new file mode 100644
index 00000000000..14f1f6efd9e
--- /dev/null
+++ b/shaders-no-opt/asm/frag/composite-insert-hoisted-temporaries-2.asm.frag
@@ -0,0 +1,82 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 65
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "values0"
+               OpName %_ ""
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %__0 ""
+               OpName %FragColor "FragColor"
+               OpDecorate %_runtimearr_float ArrayStride 4
+               OpMemberDecorate %SSBO 0 NonWritable
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %_runtimearr_float_0 ArrayStride 4
+               OpMemberDecorate %SSBO1 0 NonWritable
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 1
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %float_0 = OpConstant %float 0
+         %11 = OpConstantComposite %v2float %float_0 %float_0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %int_16 = OpConstant %int 16
+       %bool = OpTypeBool
+%_runtimearr_float = OpTypeRuntimeArray %float
+       %SSBO = OpTypeStruct %_runtimearr_float
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_runtimearr_float_0 = OpTypeRuntimeArray %float
+      %SSBO1 = OpTypeStruct %_runtimearr_float_0
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+        %__0 = OpVariable %_ptr_Uniform_SSBO1 Uniform
+      %int_1 = OpConstant %int 1
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+  %FragColor = OpVariable %_ptr_Output_v2float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpBranch %17
+         %17 = OpLabel
+         %61 = OpPhi %v2float %11 %5 %d %cont
+         %60 = OpPhi %int %int_0 %5 %49 %cont
+         %25 = OpSLessThan %bool %60 %int_16
+               OpLoopMerge %19 %cont None
+               OpBranchConditional %25 %pre18 %19
+	   %pre18 = OpLabel
+			   OpBranch %18
+         %18 = OpLabel
+         %32 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %60
+         %43 = OpAccessChain %_ptr_Uniform_float %__0 %int_0 %60
+         %33 = OpLoad %float %32
+         %44 = OpLoad %float %43
+		 %a = OpFMul %v2float %61 %61
+         %b = OpCompositeInsert %v2float %33 %a 0
+         %c = OpCompositeInsert %v2float %44 %b 1
+		 OpBranch %cont
+		 %cont = OpLabel
+		 %d = OpFAdd %v2float %61 %c
+         %49 = OpIAdd %int %60 %int_1
+               OpBranch %17
+         %19 = OpLabel
+               OpStore %FragColor %61
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/composite-insert-inheritance.asm.frag b/shaders-no-opt/asm/frag/composite-insert-inheritance.asm.frag
new file mode 100644
index 00000000000..9408e69ac09
--- /dev/null
+++ b/shaders-no-opt/asm/frag/composite-insert-inheritance.asm.frag
@@ -0,0 +1,127 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vInput %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %vInput "vInput"
+               OpName %FragColor "FragColor"
+			   OpName %phi "PHI"
+               OpDecorate %vInput RelaxedPrecision
+               OpDecorate %vInput Location 0
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+			   OpDecorate %b0 RelaxedPrecision
+			   OpDecorate %b1 RelaxedPrecision
+			   OpDecorate %b2 RelaxedPrecision
+			   OpDecorate %b3 RelaxedPrecision
+			   OpDecorate %c1 RelaxedPrecision
+			   OpDecorate %c3 RelaxedPrecision
+			   OpDecorate %d4_mp RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+     %vInput = OpVariable %_ptr_Input_v4float Input
+    %float_1 = OpConstant %float 1
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Function_float = OpTypePointer Function %float
+    %float_2 = OpConstant %float 2
+     %uint_1 = OpConstant %uint 1
+    %float_3 = OpConstant %float 3
+     %uint_2 = OpConstant %uint 2
+    %float_4 = OpConstant %float 4
+     %uint_3 = OpConstant %uint 3
+	 %v4float_arr2 = OpTypeArray %v4float %uint_2
+	 %v44float = OpTypeMatrix %v4float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+	%v4undef = OpUndef %v4float
+	%v4const = OpConstantNull %v4float
+	%v4arrconst = OpConstantNull %v4float_arr2
+	%v44const = OpConstantNull %v44float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+
+         %loaded0 = OpLoad %v4float %vInput
+
+		; Basic case (highp).
+         %a0 = OpCompositeInsert %v4float %float_1 %loaded0 0
+         %a1 = OpCompositeInsert %v4float %float_2 %a0 1
+         %a2 = OpCompositeInsert %v4float %float_3 %a1 2
+         %a3 = OpCompositeInsert %v4float %float_4 %a2 3
+		 	OpStore %FragColor %a3
+
+		; Basic case (mediump).
+         %b0 = OpCompositeInsert %v4float %float_1 %loaded0 0
+         %b1 = OpCompositeInsert %v4float %float_2 %b0 1
+         %b2 = OpCompositeInsert %v4float %float_3 %b1 2
+         %b3 = OpCompositeInsert %v4float %float_4 %b2 3
+		 	OpStore %FragColor %b3
+
+		; Mix relaxed precision.
+         %c0 = OpCompositeInsert %v4float %float_1 %loaded0 0
+         %c1 = OpCompositeInsert %v4float %float_2 %c0 1
+         %c2 = OpCompositeInsert %v4float %float_3 %c1 2
+         %c3 = OpCompositeInsert %v4float %float_4 %c2 3
+		 	OpStore %FragColor %c3
+
+		; SSA use after insert
+         %d0 = OpCompositeInsert %v4float %float_1 %loaded0 0
+         %d1 = OpCompositeInsert %v4float %float_2 %d0 1
+         %d2 = OpCompositeInsert %v4float %float_3 %d1 2
+         %d3 = OpCompositeInsert %v4float %float_4 %d2 3
+		 %d4 = OpFAdd %v4float %d3 %d0
+		 	OpStore %FragColor %d4
+		 %d4_mp = OpFAdd %v4float %d3 %d1
+		 	OpStore %FragColor %d4_mp
+
+		; Verify Insert behavior on Undef.
+		  %e0 = OpCompositeInsert %v4float %float_1 %v4undef 0
+		  %e1 = OpCompositeInsert %v4float %float_2 %e0 1
+		  %e2 = OpCompositeInsert %v4float %float_3 %e1 2
+		  %e3 = OpCompositeInsert %v4float %float_4 %e2 3
+		 	OpStore %FragColor %e3
+
+		; Verify Insert behavior on Constant.
+		  %f0 = OpCompositeInsert %v4float %float_1 %v4const 0
+		 	OpStore %FragColor %f0
+
+		; Verify Insert behavior on Array.
+		  %g0 = OpCompositeInsert %v4float_arr2 %float_1 %v4arrconst 1 2
+		  %g1 = OpCompositeInsert %v4float_arr2 %float_2 %g0 0 3
+		  %g2 = OpCompositeExtract %v4float %g1 0
+		 	OpStore %FragColor %g2
+		  %g3 = OpCompositeExtract %v4float %g1 1
+		 	OpStore %FragColor %g3
+
+		; Verify Insert behavior on Matrix.
+		  %h0 = OpCompositeInsert %v44float %float_1 %v44const 1 2
+		  %h1 = OpCompositeInsert %v44float %float_2 %h0 2 3
+		  %h2 = OpCompositeExtract %v4float %h1 0
+		 	OpStore %FragColor %h2
+		  %h3 = OpCompositeExtract %v4float %h1 1
+		 	OpStore %FragColor %h3
+		  %h4 = OpCompositeExtract %v4float %h1 2
+		 	OpStore %FragColor %h4
+		  %h5 = OpCompositeExtract %v4float %h1 3
+		 	OpStore %FragColor %h5
+
+		; Verify that we cannot RMW PHI variables.
+		OpBranch %next
+		%next = OpLabel
+		%phi = OpPhi %v4float %d2 %5
+         %i0 = OpCompositeInsert %v4float %float_4 %phi 3
+		 	OpStore %FragColor %i0
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/demote-impure-function-call.vk.nocompat.asm.frag b/shaders-no-opt/asm/frag/demote-impure-function-call.vk.nocompat.asm.frag
new file mode 100644
index 00000000000..9f1a4573ddc
--- /dev/null
+++ b/shaders-no-opt/asm/frag/demote-impure-function-call.vk.nocompat.asm.frag
@@ -0,0 +1,63 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 33
+; Schema: 0
+               OpCapability Shader
+               OpCapability DemoteToHelperInvocationEXT
+               OpExtension "SPV_EXT_demote_to_helper_invocation"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vA %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_demote_to_helper_invocation"
+               OpName %main "main"
+               OpName %foobar_i1_ "foobar(i1;"
+               OpName %a "a"
+               OpName %a_0 "a"
+               OpName %vA "vA"
+               OpName %param "param"
+               OpName %FragColor "FragColor"
+               OpDecorate %vA Flat
+               OpDecorate %vA Location 0
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+         %10 = OpTypeFunction %v4float %_ptr_Function_int
+      %int_0 = OpConstant %int 0
+       %bool = OpTypeBool
+   %float_10 = OpConstant %float 10
+         %21 = OpConstantComposite %v4float %float_10 %float_10 %float_10 %float_10
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_int = OpTypePointer Input %int
+         %vA = OpVariable %_ptr_Input_int Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %a_0 = OpVariable %_ptr_Function_v4float Function
+      %param = OpVariable %_ptr_Function_int Function
+         %29 = OpLoad %int %vA
+               OpStore %param %29
+         %30 = OpFunctionCall %v4float %foobar_i1_ %param
+               OpStore %FragColor %21
+               OpReturn
+               OpFunctionEnd
+ %foobar_i1_ = OpFunction %v4float None %10
+          %a = OpFunctionParameter %_ptr_Function_int
+         %13 = OpLabel
+         %14 = OpLoad %int %a
+         %17 = OpSLessThan %bool %14 %int_0
+               OpSelectionMerge %19 None
+               OpBranchConditional %17 %18 %19
+         %18 = OpLabel
+               OpDemoteToHelperInvocationEXT
+               OpBranch %19
+         %19 = OpLabel
+               OpReturnValue %21
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/discard-impure-function-call.asm.frag b/shaders-no-opt/asm/frag/discard-impure-function-call.asm.frag
new file mode 100644
index 00000000000..0f039166b07
--- /dev/null
+++ b/shaders-no-opt/asm/frag/discard-impure-function-call.asm.frag
@@ -0,0 +1,59 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 34
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vA %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foobar_i1_ "foobar(i1;"
+               OpName %a "a"
+               OpName %a_0 "a"
+               OpName %vA "vA"
+               OpName %param "param"
+               OpName %FragColor "FragColor"
+               OpDecorate %vA Flat
+               OpDecorate %vA Location 0
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+         %10 = OpTypeFunction %v4float %_ptr_Function_int
+      %int_0 = OpConstant %int 0
+       %bool = OpTypeBool
+   %float_10 = OpConstant %float 10
+         %22 = OpConstantComposite %v4float %float_10 %float_10 %float_10 %float_10
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_int = OpTypePointer Input %int
+         %vA = OpVariable %_ptr_Input_int Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %a_0 = OpVariable %_ptr_Function_v4float Function
+      %param = OpVariable %_ptr_Function_int Function
+         %30 = OpLoad %int %vA
+               OpStore %param %30
+         %31 = OpFunctionCall %v4float %foobar_i1_ %param
+               OpStore %FragColor %22
+               OpReturn
+               OpFunctionEnd
+ %foobar_i1_ = OpFunction %v4float None %10
+          %a = OpFunctionParameter %_ptr_Function_int
+         %13 = OpLabel
+         %14 = OpLoad %int %a
+         %17 = OpSLessThan %bool %14 %int_0
+               OpSelectionMerge %19 None
+               OpBranchConditional %17 %18 %19
+         %18 = OpLabel
+               OpKill
+         %19 = OpLabel
+               OpReturnValue %22
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/do-while-continue-phi.asm.invalid.frag b/shaders-no-opt/asm/frag/do-while-continue-phi.asm.invalid.frag
new file mode 100644
index 00000000000..97400dfb16e
--- /dev/null
+++ b/shaders-no-opt/asm/frag/do-while-continue-phi.asm.invalid.frag
@@ -0,0 +1,64 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 42
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord %_GLF_color
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %_GLF_color "_GLF_color"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_GLF_color Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+       %bool = OpTypeBool
+      %false = OpConstantFalse %bool
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+ %_GLF_color = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+    %float_0 = OpConstant %float 0
+         %31 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1
+       %true = OpConstantTrue %bool
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpBranch %33
+         %33 = OpLabel
+               OpLoopMerge %32 %35 None
+               OpBranch %6
+          %6 = OpLabel
+               OpLoopMerge %8 %24 None
+               OpBranch %7
+          %7 = OpLabel
+         %17 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %18 = OpLoad %float %17
+         %22 = OpFOrdNotEqual %bool %18 %18
+               OpSelectionMerge %24 None
+               OpBranchConditional %22 %23 %24
+         %23 = OpLabel
+               OpBranch %8
+         %24 = OpLabel
+               OpBranchConditional %false %6 %8
+          %8 = OpLabel
+         %41 = OpPhi %bool %true %23 %false %24
+               OpSelectionMerge %39 None
+               OpBranchConditional %41 %32 %39
+         %39 = OpLabel
+               OpStore %_GLF_color %31
+               OpBranch %32
+         %35 = OpLabel
+               OpBranch %33
+         %32 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/early-conditional-return-switch.asm.frag b/shaders-no-opt/asm/frag/early-conditional-return-switch.asm.frag
new file mode 100644
index 00000000000..d789ce36b0d
--- /dev/null
+++ b/shaders-no-opt/asm/frag/early-conditional-return-switch.asm.frag
@@ -0,0 +1,133 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google spiregg; 0
+; Bound: 81
+; Schema: 0
+               OpCapability Shader
+               OpCapability Sampled1D
+               OpCapability Image1D
+               OpCapability SampledBuffer
+               OpCapability ImageBuffer
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %PsTextureLoadArray "main" %gl_FragCoord %out_var_SV_TARGET
+               OpExecutionMode %PsTextureLoadArray OriginUpperLeft
+               OpSource HLSL 500
+               OpName %type_2d_image "type.2d.image"
+               OpName %type_gCBuffarrayIndex "type.gCBuffarrayIndex"
+               OpMemberName %type_gCBuffarrayIndex 0 "gArrayIndex"
+               OpName %gCBuffarrayIndex "gCBuffarrayIndex"
+               OpName %g_textureArray0 "g_textureArray0"
+               OpName %g_textureArray1 "g_textureArray1"
+               OpName %g_textureArray2 "g_textureArray2"
+               OpName %g_textureArray3 "g_textureArray3"
+               OpName %out_var_SV_TARGET "out.var.SV_TARGET"
+               OpName %PsTextureLoadArray "PsTextureLoadArray"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %out_var_SV_TARGET Location 0
+               OpDecorate %gCBuffarrayIndex DescriptorSet 0
+               OpDecorate %gCBuffarrayIndex Binding 0
+               OpDecorate %g_textureArray0 DescriptorSet 0
+               OpDecorate %g_textureArray0 Binding 0
+               OpDecorate %g_textureArray1 DescriptorSet 0
+               OpDecorate %g_textureArray1 Binding 1
+               OpDecorate %g_textureArray2 DescriptorSet 0
+               OpDecorate %g_textureArray2 Binding 2
+               OpDecorate %g_textureArray3 DescriptorSet 0
+               OpDecorate %g_textureArray3 Binding 3
+               OpMemberDecorate %type_gCBuffarrayIndex 0 Offset 0
+               OpDecorate %type_gCBuffarrayIndex Block
+       %uint = OpTypeInt 32 0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+    %v4float = OpTypeVector %float 4
+         %18 = OpConstantComposite %v4float %float_0 %float_1 %float_0 %float_1
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+%type_gCBuffarrayIndex = OpTypeStruct %uint
+%_ptr_Uniform_type_gCBuffarrayIndex = OpTypePointer Uniform %type_gCBuffarrayIndex
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %24 = OpTypeFunction %void
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+      %v3int = OpTypeVector %int 3
+      %v2int = OpTypeVector %int 2
+%gCBuffarrayIndex = OpVariable %_ptr_Uniform_type_gCBuffarrayIndex Uniform
+%g_textureArray0 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%g_textureArray1 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%g_textureArray2 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%g_textureArray3 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%out_var_SV_TARGET = OpVariable %_ptr_Output_v4float Output
+     %uint_0 = OpConstant %uint 0
+       %bool = OpTypeBool
+      %false = OpConstantFalse %bool
+       %true = OpConstantTrue %bool
+         %32 = OpUndef %v4float
+%PsTextureLoadArray = OpFunction %void None %24
+         %33 = OpLabel
+         %34 = OpLoad %v4float %gl_FragCoord
+               OpSelectionMerge %35 None
+               OpSwitch %uint_0 %36
+         %36 = OpLabel
+         %37 = OpAccessChain %_ptr_Uniform_uint %gCBuffarrayIndex %int_0
+         %38 = OpLoad %uint %37
+               OpSelectionMerge %39 None
+               OpSwitch %38 %40 0 %41 1 %42 2 %43 3 %44
+         %41 = OpLabel
+         %45 = OpCompositeExtract %float %34 0
+         %46 = OpCompositeExtract %float %34 1
+         %47 = OpConvertFToS %int %45
+         %48 = OpConvertFToS %int %46
+         %49 = OpCompositeConstruct %v3int %47 %48 %int_0
+         %50 = OpVectorShuffle %v2int %49 %49 0 1
+         %51 = OpLoad %type_2d_image %g_textureArray0
+         %52 = OpImageFetch %v4float %51 %50 Lod %int_0
+               OpBranch %39
+         %42 = OpLabel
+         %53 = OpCompositeExtract %float %34 0
+         %54 = OpCompositeExtract %float %34 1
+         %55 = OpConvertFToS %int %53
+         %56 = OpConvertFToS %int %54
+         %57 = OpCompositeConstruct %v3int %55 %56 %int_0
+         %58 = OpVectorShuffle %v2int %57 %57 0 1
+         %59 = OpLoad %type_2d_image %g_textureArray1
+         %60 = OpImageFetch %v4float %59 %58 Lod %int_0
+               OpBranch %39
+         %43 = OpLabel
+         %61 = OpCompositeExtract %float %34 0
+         %62 = OpCompositeExtract %float %34 1
+         %63 = OpConvertFToS %int %61
+         %64 = OpConvertFToS %int %62
+         %65 = OpCompositeConstruct %v3int %63 %64 %int_0
+         %66 = OpVectorShuffle %v2int %65 %65 0 1
+         %67 = OpLoad %type_2d_image %g_textureArray2
+         %68 = OpImageFetch %v4float %67 %66 Lod %int_0
+               OpBranch %39
+         %44 = OpLabel
+         %69 = OpCompositeExtract %float %34 0
+         %70 = OpCompositeExtract %float %34 1
+         %71 = OpConvertFToS %int %69
+         %72 = OpConvertFToS %int %70
+         %73 = OpCompositeConstruct %v3int %71 %72 %int_0
+         %74 = OpVectorShuffle %v2int %73 %73 0 1
+         %75 = OpLoad %type_2d_image %g_textureArray3
+         %76 = OpImageFetch %v4float %75 %74 Lod %int_0
+               OpBranch %39
+         %40 = OpLabel
+               OpBranch %39
+         %39 = OpLabel
+         %77 = OpPhi %v4float %52 %41 %60 %42 %68 %43 %76 %44 %32 %40
+         %78 = OpPhi %bool %true %41 %true %42 %true %43 %true %44 %false %40
+               OpSelectionMerge %79 None
+               OpBranchConditional %78 %35 %79
+         %79 = OpLabel
+               OpBranch %35
+         %35 = OpLabel
+         %80 = OpPhi %v4float %77 %39 %18 %79
+               OpStore %out_var_SV_TARGET %80
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/empty-struct-in-struct.asm.frag b/shaders-no-opt/asm/frag/empty-struct-in-struct.asm.frag
new file mode 100644
index 00000000000..a9650ddbb6b
--- /dev/null
+++ b/shaders-no-opt/asm/frag/empty-struct-in-struct.asm.frag
@@ -0,0 +1,61 @@
+; SPIR-V
+; Version: 1.2
+; Generator: Khronos; 0
+; Bound: 43
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %EntryPoint_Main "main"
+               OpExecutionMode %EntryPoint_Main OriginUpperLeft
+               OpSource Unknown 100
+               OpName %EmptyStructTest "EmptyStructTest"
+               OpName %EmptyStruct2Test "EmptyStruct2Test"
+               OpName %GetValue "GetValue"
+               OpName %GetValue2 "GetValue"
+               OpName %self "self"
+               OpName %self2 "self"
+               OpName %emptyStruct "emptyStruct"
+               OpName %value "value"
+               OpName %EntryPoint_Main "EntryPoint_Main"
+
+%EmptyStructTest = OpTypeStruct
+%EmptyStruct2Test = OpTypeStruct %EmptyStructTest
+%_ptr_Function_EmptyStruct2Test = OpTypePointer Function %EmptyStruct2Test
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+          %5 = OpTypeFunction %float %_ptr_Function_EmptyStruct2Test
+          %6 = OpTypeFunction %float %EmptyStruct2Test
+       %void = OpTypeVoid
+%_ptr_Function_void = OpTypePointer Function %void
+          %8 = OpTypeFunction %void %_ptr_Function_EmptyStruct2Test
+          %9 = OpTypeFunction %void
+    %float_0 = OpConstant %float 0
+	  %value4 = OpConstantNull %EmptyStruct2Test
+
+   %GetValue = OpFunction %float None %5
+       %self = OpFunctionParameter %_ptr_Function_EmptyStruct2Test
+         %13 = OpLabel
+               OpReturnValue %float_0
+               OpFunctionEnd
+
+   %GetValue2 = OpFunction %float None %6
+       %self2 = OpFunctionParameter %EmptyStruct2Test
+         %14 = OpLabel
+               OpReturnValue %float_0
+               OpFunctionEnd
+
+%EntryPoint_Main = OpFunction %void None %9
+         %37 = OpLabel
+     %emptyStruct = OpVariable %_ptr_Function_EmptyStruct2Test Function
+         %18 = OpVariable %_ptr_Function_EmptyStruct2Test Function
+      %value = OpVariable %_ptr_Function_float Function
+	  %value2 = OpCompositeConstruct %EmptyStructTest
+	  %value3 = OpCompositeConstruct %EmptyStruct2Test %value2
+         %22 = OpFunctionCall %float %GetValue %emptyStruct
+         %23 = OpFunctionCall %float %GetValue2 %value3
+         %24 = OpFunctionCall %float %GetValue2 %value4
+               OpStore %value %22
+               OpStore %value %23
+               OpStore %value %24
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.frag b/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.invalid.frag
similarity index 100%
rename from shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.frag
rename to shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-inverted.asm.invalid.frag
diff --git a/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.frag b/shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.invalid.frag
similarity index 100%
rename from shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.frag
rename to shaders-no-opt/asm/frag/for-loop-dedicated-merge-block-non-inverted.asm.invalid.frag
diff --git a/shaders-no-opt/asm/frag/late-expression-invalidation-2.asm.frag b/shaders-no-opt/asm/frag/late-expression-invalidation-2.asm.frag
new file mode 100644
index 00000000000..a10970e9637
--- /dev/null
+++ b/shaders-no-opt/asm/frag/late-expression-invalidation-2.asm.frag
@@ -0,0 +1,625 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 761
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord %_GLF_color
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 320
+               OpName %main "main"
+               OpName %pos "pos"
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %buf0 "buf0"
+               OpMemberName %buf0 0 "resolution"
+               OpName %_ ""
+               OpName %ipos "ipos"
+               OpName %i "i"
+               OpName %map "map"
+               OpName %p "p"
+               OpName %canwalk "canwalk"
+               OpName %v "v"
+               OpName %directions "directions"
+               OpName %j "j"
+               OpName %d "d"
+               OpName %_GLF_color "_GLF_color"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpMemberDecorate %buf0 0 Offset 0
+               OpDecorate %buf0 Block
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %_GLF_color Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+       %buf0 = OpTypeStruct %v2float
+%_ptr_Uniform_buf0 = OpTypePointer Uniform %buf0
+          %_ = OpVariable %_ptr_Uniform_buf0 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+      %v2int = OpTypeVector %int 2
+%_ptr_Function_v2int = OpTypePointer Function %v2int
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Function_float = OpTypePointer Function %float
+   %float_16 = OpConstant %float 16
+     %uint_1 = OpConstant %uint 1
+%_ptr_Function_int = OpTypePointer Function %int
+    %int_256 = OpConstant %int 256
+       %bool = OpTypeBool
+   %uint_256 = OpConstant %uint 256
+%_arr_int_uint_256 = OpTypeArray %int %uint_256
+%_ptr_Private__arr_int_uint_256 = OpTypePointer Private %_arr_int_uint_256
+        %map = OpVariable %_ptr_Private__arr_int_uint_256 Private
+%_ptr_Private_int = OpTypePointer Private %int
+      %int_1 = OpConstant %int 1
+         %63 = OpConstantComposite %v2int %int_0 %int_0
+%_ptr_Function_bool = OpTypePointer Function %bool
+       %true = OpConstantTrue %bool
+      %int_2 = OpConstant %int 2
+     %int_16 = OpConstant %int 16
+     %int_14 = OpConstant %int 14
+      %false = OpConstantFalse %bool
+      %int_8 = OpConstant %int 8
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+ %_GLF_color = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+        %437 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+    %float_0 = OpConstant %float 0
+        %441 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
+%mat2v4float = OpTypeMatrix %v4float 2
+%_ptr_Private_mat2v4float = OpTypePointer Private %mat2v4float
+        %556 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+        %557 = OpConstantComposite %mat2v4float %556 %556
+        %558 = OpVariable %_ptr_Private_mat2v4float Private %557
+        %760 = OpConstantNull %bool
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %pos = OpVariable %_ptr_Function_v2float Function
+       %ipos = OpVariable %_ptr_Function_v2int Function
+          %i = OpVariable %_ptr_Function_int Function
+          %p = OpVariable %_ptr_Function_v2int Function
+    %canwalk = OpVariable %_ptr_Function_bool Function
+          %v = OpVariable %_ptr_Function_int Function
+ %directions = OpVariable %_ptr_Function_int Function
+          %j = OpVariable %_ptr_Function_int Function
+          %d = OpVariable %_ptr_Function_int Function
+         %13 = OpLoad %v4float %gl_FragCoord
+         %14 = OpVectorShuffle %v2float %13 %13 0 1
+        %564 = OpISub %int %int_256 %int_14
+         %21 = OpAccessChain %_ptr_Uniform_v2float %_ %int_0
+         %22 = OpLoad %v2float %21
+        %566 = OpSNegate %int %564
+         %23 = OpFDiv %v2float %14 %22
+               OpStore %pos %23
+         %30 = OpAccessChain %_ptr_Function_float %pos %uint_0
+         %31 = OpLoad %float %30
+         %33 = OpFMul %float %31 %float_16
+         %34 = OpConvertFToS %int %33
+         %36 = OpAccessChain %_ptr_Function_float %pos %uint_1
+         %37 = OpLoad %float %36
+         %38 = OpFMul %float %37 %float_16
+         %39 = OpConvertFToS %int %38
+         %40 = OpCompositeConstruct %v2int %34 %39
+               OpStore %ipos %40
+               OpStore %i %int_0
+               OpBranch %43
+         %43 = OpLabel
+               OpLoopMerge %45 %46 None
+               OpBranch %47
+         %47 = OpLabel
+         %48 = OpLoad %int %i
+         %51 = OpSLessThan %bool %48 %int_256
+               OpBranchConditional %51 %44 %45
+         %44 = OpLabel
+         %56 = OpLoad %int %i
+         %58 = OpAccessChain %_ptr_Private_int %map %56
+               OpStore %58 %int_0
+               OpBranch %46
+         %46 = OpLabel
+         %59 = OpLoad %int %i
+         %61 = OpIAdd %int %59 %int_1
+               OpStore %i %61
+               OpBranch %43
+         %45 = OpLabel
+               OpStore %p %63
+               OpStore %canwalk %true
+               OpStore %v %int_0
+               OpBranch %68
+         %68 = OpLabel
+               OpLoopMerge %70 %71 None
+               OpBranch %69
+         %69 = OpLabel
+         %72 = OpLoad %int %v
+         %73 = OpIAdd %int %72 %int_1
+               OpStore %v %73
+               OpStore %directions %int_0
+         %75 = OpAccessChain %_ptr_Function_int %p %uint_0
+         %76 = OpLoad %int %75
+         %77 = OpSGreaterThan %bool %76 %int_0
+               OpSelectionMerge %79 None
+               OpBranchConditional %77 %78 %79
+         %78 = OpLabel
+         %80 = OpAccessChain %_ptr_Function_int %p %uint_0
+         %81 = OpLoad %int %80
+         %83 = OpISub %int %81 %int_2
+         %84 = OpAccessChain %_ptr_Function_int %p %uint_1
+         %85 = OpLoad %int %84
+         %87 = OpIMul %int %85 %int_16
+         %88 = OpIAdd %int %83 %87
+         %89 = OpAccessChain %_ptr_Private_int %map %88
+         %90 = OpLoad %int %89
+         %91 = OpIEqual %bool %90 %int_0
+               OpBranch %79
+         %79 = OpLabel
+         %92 = OpPhi %bool %77 %69 %91 %78
+               OpSelectionMerge %94 None
+               OpBranchConditional %92 %93 %94
+         %93 = OpLabel
+         %95 = OpLoad %int %directions
+         %96 = OpIAdd %int %95 %int_1
+               OpStore %directions %96
+               OpBranch %94
+         %94 = OpLabel
+         %97 = OpAccessChain %_ptr_Function_int %p %uint_1
+         %98 = OpLoad %int %97
+         %99 = OpSGreaterThan %bool %98 %int_0
+               OpSelectionMerge %101 None
+               OpBranchConditional %99 %100 %101
+        %100 = OpLabel
+        %102 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %103 = OpLoad %int %102
+        %104 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %105 = OpLoad %int %104
+        %106 = OpISub %int %105 %int_2
+        %107 = OpIMul %int %106 %int_16
+        %108 = OpIAdd %int %103 %107
+        %109 = OpAccessChain %_ptr_Private_int %map %108
+        %110 = OpLoad %int %109
+        %111 = OpIEqual %bool %110 %int_0
+               OpBranch %101
+        %101 = OpLabel
+        %112 = OpPhi %bool %99 %94 %111 %100
+               OpSelectionMerge %114 None
+               OpBranchConditional %112 %113 %114
+        %113 = OpLabel
+        %115 = OpLoad %int %directions
+        %116 = OpIAdd %int %115 %int_1
+               OpStore %directions %116
+               OpBranch %114
+        %114 = OpLabel
+        %117 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %118 = OpLoad %int %117
+        %120 = OpSLessThan %bool %118 %int_14
+               OpSelectionMerge %122 None
+               OpBranchConditional %120 %121 %122
+        %121 = OpLabel
+        %123 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %124 = OpLoad %int %123
+        %125 = OpIAdd %int %124 %int_2
+        %126 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %127 = OpLoad %int %126
+        %128 = OpIMul %int %127 %int_16
+        %129 = OpIAdd %int %125 %128
+        %130 = OpAccessChain %_ptr_Private_int %map %129
+        %131 = OpLoad %int %130
+        %132 = OpIEqual %bool %131 %int_0
+               OpBranch %122
+        %122 = OpLabel
+        %133 = OpPhi %bool %120 %114 %132 %121
+               OpSelectionMerge %135 None
+               OpBranchConditional %133 %134 %135
+        %134 = OpLabel
+        %136 = OpLoad %int %directions
+        %137 = OpIAdd %int %136 %int_1
+               OpStore %directions %137
+               OpBranch %135
+        %135 = OpLabel
+        %594 = OpISub %int %int_256 %566
+        %138 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %139 = OpLoad %int %138
+        %140 = OpSLessThan %bool %139 %int_14
+               OpSelectionMerge %142 None
+               OpBranchConditional %140 %141 %142
+        %141 = OpLabel
+        %143 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %144 = OpLoad %int %143
+        %145 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %146 = OpLoad %int %145
+        %147 = OpIAdd %int %146 %int_2
+        %148 = OpIMul %int %147 %int_16
+        %149 = OpIAdd %int %144 %148
+        %150 = OpAccessChain %_ptr_Private_int %map %149
+        %151 = OpLoad %int %150
+        %152 = OpIEqual %bool %151 %int_0
+               OpBranch %142
+        %142 = OpLabel
+        %153 = OpPhi %bool %140 %135 %152 %141
+               OpSelectionMerge %155 None
+               OpBranchConditional %153 %154 %155
+        %154 = OpLabel
+        %156 = OpLoad %int %directions
+        %157 = OpIAdd %int %156 %int_1
+               OpStore %directions %157
+               OpBranch %155
+        %155 = OpLabel
+        %158 = OpLoad %int %directions
+        %159 = OpIEqual %bool %158 %int_0
+               OpSelectionMerge %161 None
+               OpBranchConditional %159 %160 %207
+        %160 = OpLabel
+               OpStore %canwalk %false
+               OpStore %i %int_0
+               OpBranch %163
+        %163 = OpLabel
+               OpLoopMerge %165 %166 None
+               OpBranch %167
+        %167 = OpLabel
+        %168 = OpLoad %int %i
+        %170 = OpSLessThan %bool %168 %int_8
+               OpBranchConditional %170 %164 %165
+        %164 = OpLabel
+               OpStore %j %int_0
+        %609 = OpISub %int %594 %168
+               OpStore %558 %557
+               OpBranchConditional %760 %166 %172
+        %172 = OpLabel
+               OpLoopMerge %174 %175 Unroll
+               OpBranch %176
+        %176 = OpLabel
+        %177 = OpLoad %int %j
+        %178 = OpSLessThan %bool %177 %int_8
+               OpBranchConditional %178 %173 %174
+        %173 = OpLabel
+        %179 = OpLoad %int %j
+        %180 = OpIMul %int %179 %int_2
+        %181 = OpLoad %int %i
+        %182 = OpIMul %int %181 %int_2
+        %183 = OpIMul %int %182 %int_16
+        %184 = OpIAdd %int %180 %183
+        %185 = OpAccessChain %_ptr_Private_int %map %184
+        %186 = OpLoad %int %185
+        %187 = OpIEqual %bool %186 %int_0
+               OpSelectionMerge %189 None
+               OpBranchConditional %187 %188 %189
+        %188 = OpLabel
+        %190 = OpLoad %int %j
+        %191 = OpIMul %int %190 %int_2
+        %192 = OpAccessChain %_ptr_Function_int %p %uint_0
+               OpStore %192 %191
+        %193 = OpLoad %int %i
+        %194 = OpIMul %int %193 %int_2
+        %195 = OpAccessChain %_ptr_Function_int %p %uint_1
+               OpStore %195 %194
+               OpStore %canwalk %true
+               OpBranch %189
+        %189 = OpLabel
+               OpBranch %175
+        %175 = OpLabel
+        %196 = OpLoad %int %j
+        %197 = OpIAdd %int %196 %int_1
+               OpStore %j %197
+               OpBranch %172
+        %174 = OpLabel
+               OpBranch %166
+        %166 = OpLabel
+        %198 = OpLoad %int %i
+        %199 = OpIAdd %int %198 %int_1
+               OpStore %i %199
+               OpBranch %163
+        %165 = OpLabel
+        %200 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %201 = OpLoad %int %200
+        %202 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %203 = OpLoad %int %202
+        %204 = OpIMul %int %203 %int_16
+        %205 = OpIAdd %int %201 %204
+        %206 = OpAccessChain %_ptr_Private_int %map %205
+               OpStore %206 %int_1
+               OpBranch %161
+        %207 = OpLabel
+        %209 = OpLoad %int %v
+        %210 = OpLoad %int %directions
+        %211 = OpSMod %int %209 %210
+               OpStore %d %211
+        %212 = OpLoad %int %directions
+        %213 = OpLoad %int %v
+        %214 = OpIAdd %int %213 %212
+               OpStore %v %214
+        %215 = OpLoad %int %d
+        %216 = OpSGreaterThanEqual %bool %215 %int_0
+               OpSelectionMerge %218 None
+               OpBranchConditional %216 %217 %218
+        %217 = OpLabel
+        %219 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %220 = OpLoad %int %219
+        %221 = OpSGreaterThan %bool %220 %int_0
+               OpBranch %218
+        %218 = OpLabel
+        %222 = OpPhi %bool %216 %207 %221 %217
+               OpSelectionMerge %224 None
+               OpBranchConditional %222 %223 %224
+        %223 = OpLabel
+        %225 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %226 = OpLoad %int %225
+        %227 = OpISub %int %226 %int_2
+        %228 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %229 = OpLoad %int %228
+        %230 = OpIMul %int %229 %int_16
+        %231 = OpIAdd %int %227 %230
+        %232 = OpAccessChain %_ptr_Private_int %map %231
+        %233 = OpLoad %int %232
+        %234 = OpIEqual %bool %233 %int_0
+               OpBranch %224
+        %224 = OpLabel
+        %235 = OpPhi %bool %222 %218 %234 %223
+               OpSelectionMerge %237 None
+               OpBranchConditional %235 %236 %237
+        %236 = OpLabel
+        %238 = OpLoad %int %d
+        %239 = OpISub %int %238 %int_1
+               OpStore %d %239
+        %240 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %241 = OpLoad %int %240
+        %242 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %243 = OpLoad %int %242
+        %244 = OpIMul %int %243 %int_16
+        %245 = OpIAdd %int %241 %244
+        %246 = OpAccessChain %_ptr_Private_int %map %245
+               OpStore %246 %int_1
+        %247 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %248 = OpLoad %int %247
+        %249 = OpISub %int %248 %int_1
+        %250 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %251 = OpLoad %int %250
+        %252 = OpIMul %int %251 %int_16
+        %253 = OpIAdd %int %249 %252
+        %254 = OpAccessChain %_ptr_Private_int %map %253
+               OpStore %254 %int_1
+        %255 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %256 = OpLoad %int %255
+        %257 = OpISub %int %256 %int_2
+        %258 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %259 = OpLoad %int %258
+        %260 = OpIMul %int %259 %int_16
+        %261 = OpIAdd %int %257 %260
+        %262 = OpAccessChain %_ptr_Private_int %map %261
+               OpStore %262 %int_1
+        %263 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %264 = OpLoad %int %263
+        %265 = OpISub %int %264 %int_2
+        %266 = OpAccessChain %_ptr_Function_int %p %uint_0
+               OpStore %266 %265
+               OpBranch %237
+        %237 = OpLabel
+        %267 = OpLoad %int %d
+        %268 = OpSGreaterThanEqual %bool %267 %int_0
+               OpSelectionMerge %270 None
+               OpBranchConditional %268 %269 %270
+        %269 = OpLabel
+        %271 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %272 = OpLoad %int %271
+        %273 = OpSGreaterThan %bool %272 %int_0
+               OpBranch %270
+        %270 = OpLabel
+        %274 = OpPhi %bool %268 %237 %273 %269
+               OpSelectionMerge %276 None
+               OpBranchConditional %274 %275 %276
+        %275 = OpLabel
+        %277 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %278 = OpLoad %int %277
+        %279 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %280 = OpLoad %int %279
+        %281 = OpISub %int %280 %int_2
+        %282 = OpIMul %int %281 %int_16
+        %283 = OpIAdd %int %278 %282
+        %284 = OpAccessChain %_ptr_Private_int %map %283
+        %285 = OpLoad %int %284
+        %286 = OpIEqual %bool %285 %int_0
+               OpBranch %276
+        %276 = OpLabel
+        %287 = OpPhi %bool %274 %270 %286 %275
+               OpSelectionMerge %289 None
+               OpBranchConditional %287 %288 %289
+        %288 = OpLabel
+        %290 = OpLoad %int %d
+        %291 = OpISub %int %290 %int_1
+               OpStore %d %291
+        %292 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %293 = OpLoad %int %292
+        %294 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %295 = OpLoad %int %294
+        %296 = OpIMul %int %295 %int_16
+        %297 = OpIAdd %int %293 %296
+        %298 = OpAccessChain %_ptr_Private_int %map %297
+               OpStore %298 %int_1
+        %299 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %300 = OpLoad %int %299
+        %301 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %302 = OpLoad %int %301
+        %303 = OpISub %int %302 %int_1
+        %304 = OpIMul %int %303 %int_16
+        %305 = OpIAdd %int %300 %304
+        %306 = OpAccessChain %_ptr_Private_int %map %305
+               OpStore %306 %int_1
+        %307 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %308 = OpLoad %int %307
+        %309 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %310 = OpLoad %int %309
+        %311 = OpISub %int %310 %int_2
+        %312 = OpIMul %int %311 %int_16
+        %313 = OpIAdd %int %308 %312
+        %314 = OpAccessChain %_ptr_Private_int %map %313
+               OpStore %314 %int_1
+        %315 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %316 = OpLoad %int %315
+        %317 = OpISub %int %316 %int_2
+        %318 = OpAccessChain %_ptr_Function_int %p %uint_1
+               OpStore %318 %317
+               OpBranch %289
+        %289 = OpLabel
+        %319 = OpLoad %int %d
+        %320 = OpSGreaterThanEqual %bool %319 %int_0
+               OpSelectionMerge %322 None
+               OpBranchConditional %320 %321 %322
+        %321 = OpLabel
+        %323 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %324 = OpLoad %int %323
+        %325 = OpSLessThan %bool %324 %int_14
+               OpBranch %322
+        %322 = OpLabel
+        %326 = OpPhi %bool %320 %289 %325 %321
+               OpSelectionMerge %328 None
+               OpBranchConditional %326 %327 %328
+        %327 = OpLabel
+        %329 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %330 = OpLoad %int %329
+        %331 = OpIAdd %int %330 %int_2
+        %332 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %333 = OpLoad %int %332
+        %334 = OpIMul %int %333 %int_16
+        %335 = OpIAdd %int %331 %334
+        %336 = OpAccessChain %_ptr_Private_int %map %335
+        %337 = OpLoad %int %336
+        %338 = OpIEqual %bool %337 %int_0
+               OpBranch %328
+        %328 = OpLabel
+        %339 = OpPhi %bool %326 %322 %338 %327
+               OpSelectionMerge %341 None
+               OpBranchConditional %339 %340 %341
+        %340 = OpLabel
+        %342 = OpLoad %int %d
+        %343 = OpISub %int %342 %int_1
+               OpStore %d %343
+        %344 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %345 = OpLoad %int %344
+        %346 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %347 = OpLoad %int %346
+        %348 = OpIMul %int %347 %int_16
+        %349 = OpIAdd %int %345 %348
+        %350 = OpAccessChain %_ptr_Private_int %map %349
+               OpStore %350 %int_1
+        %351 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %352 = OpLoad %int %351
+        %353 = OpIAdd %int %352 %int_1
+        %354 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %355 = OpLoad %int %354
+        %356 = OpIMul %int %355 %int_16
+        %357 = OpIAdd %int %353 %356
+        %358 = OpAccessChain %_ptr_Private_int %map %357
+               OpStore %358 %int_1
+        %359 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %360 = OpLoad %int %359
+        %361 = OpIAdd %int %360 %int_2
+        %362 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %363 = OpLoad %int %362
+        %364 = OpIMul %int %363 %int_16
+        %365 = OpIAdd %int %361 %364
+        %366 = OpAccessChain %_ptr_Private_int %map %365
+               OpStore %366 %int_1
+        %367 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %368 = OpLoad %int %367
+        %369 = OpIAdd %int %368 %int_2
+        %370 = OpAccessChain %_ptr_Function_int %p %uint_0
+               OpStore %370 %369
+               OpBranch %341
+        %341 = OpLabel
+        %371 = OpLoad %int %d
+        %372 = OpSGreaterThanEqual %bool %371 %int_0
+               OpSelectionMerge %374 None
+               OpBranchConditional %372 %373 %374
+        %373 = OpLabel
+        %375 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %376 = OpLoad %int %375
+        %377 = OpSLessThan %bool %376 %int_14
+               OpBranch %374
+        %374 = OpLabel
+        %378 = OpPhi %bool %372 %341 %377 %373
+               OpSelectionMerge %380 None
+               OpBranchConditional %378 %379 %380
+        %379 = OpLabel
+        %381 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %382 = OpLoad %int %381
+        %383 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %384 = OpLoad %int %383
+        %385 = OpIAdd %int %384 %int_2
+        %386 = OpIMul %int %385 %int_16
+        %387 = OpIAdd %int %382 %386
+        %388 = OpAccessChain %_ptr_Private_int %map %387
+        %389 = OpLoad %int %388
+        %390 = OpIEqual %bool %389 %int_0
+               OpBranch %380
+        %380 = OpLabel
+        %391 = OpPhi %bool %378 %374 %390 %379
+               OpSelectionMerge %393 None
+               OpBranchConditional %391 %392 %393
+        %392 = OpLabel
+        %394 = OpLoad %int %d
+        %395 = OpISub %int %394 %int_1
+               OpStore %d %395
+        %396 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %397 = OpLoad %int %396
+        %398 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %399 = OpLoad %int %398
+        %400 = OpIMul %int %399 %int_16
+        %401 = OpIAdd %int %397 %400
+        %402 = OpAccessChain %_ptr_Private_int %map %401
+               OpStore %402 %int_1
+        %403 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %404 = OpLoad %int %403
+        %405 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %406 = OpLoad %int %405
+        %407 = OpIAdd %int %406 %int_1
+        %408 = OpIMul %int %407 %int_16
+        %409 = OpIAdd %int %404 %408
+        %410 = OpAccessChain %_ptr_Private_int %map %409
+               OpStore %410 %int_1
+        %411 = OpAccessChain %_ptr_Function_int %p %uint_0
+        %412 = OpLoad %int %411
+        %413 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %414 = OpLoad %int %413
+        %415 = OpIAdd %int %414 %int_2
+        %416 = OpIMul %int %415 %int_16
+        %417 = OpIAdd %int %412 %416
+        %418 = OpAccessChain %_ptr_Private_int %map %417
+               OpStore %418 %int_1
+        %419 = OpAccessChain %_ptr_Function_int %p %uint_1
+        %420 = OpLoad %int %419
+        %421 = OpIAdd %int %420 %int_2
+        %422 = OpAccessChain %_ptr_Function_int %p %uint_1
+               OpStore %422 %421
+               OpBranch %393
+        %393 = OpLabel
+               OpBranch %161
+        %161 = OpLabel
+        %423 = OpAccessChain %_ptr_Function_int %ipos %uint_1
+        %424 = OpLoad %int %423
+        %425 = OpIMul %int %424 %int_16
+        %426 = OpAccessChain %_ptr_Function_int %ipos %uint_0
+        %427 = OpLoad %int %426
+        %428 = OpIAdd %int %425 %427
+        %429 = OpAccessChain %_ptr_Private_int %map %428
+        %430 = OpLoad %int %429
+        %431 = OpIEqual %bool %430 %int_1
+               OpSelectionMerge %433 None
+               OpBranchConditional %431 %432 %433
+        %432 = OpLabel
+               OpStore %_GLF_color %437
+               OpReturn
+        %433 = OpLabel
+               OpBranch %71
+         %71 = OpLabel
+        %439 = OpLoad %bool %canwalk
+               OpBranchConditional %439 %68 %70
+         %70 = OpLabel
+               OpStore %_GLF_color %441
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/late-expression-invalidation.asm.frag b/shaders-no-opt/asm/frag/late-expression-invalidation.asm.frag
new file mode 100644
index 00000000000..6f9192cd200
--- /dev/null
+++ b/shaders-no-opt/asm/frag/late-expression-invalidation.asm.frag
@@ -0,0 +1,109 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 68
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord %_GLF_color
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %_GLF_color "_GLF_color"
+               OpName %m "m"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_GLF_color Location 0
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+   %float_10 = OpConstant %float 10
+       %bool = OpTypeBool
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+ %_GLF_color = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+    %float_0 = OpConstant %float 0
+         %19 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1
+%mat4v4float = OpTypeMatrix %v4float 4
+         %21 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+         %22 = OpConstantComposite %mat4v4float %21 %21 %21 %21
+     %uint_4 = OpConstant %uint 4
+%_arr_mat4v4float_uint_4 = OpTypeArray %mat4v4float %uint_4
+%_ptr_Function__arr_mat4v4float_uint_4 = OpTypePointer Function %_arr_mat4v4float_uint_4
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+    %v2float = OpTypeVector %float 2
+         %30 = OpConstantComposite %v2float %float_1 %float_1
+      %int_1 = OpConstant %int 1
+     %uint_3 = OpConstant %uint 3
+%_ptr_Function_float = OpTypePointer Function %float
+         %34 = OpConstantComposite %_arr_mat4v4float_uint_4 %22 %22 %22 %22
+       %main = OpFunction %void None %7
+         %35 = OpLabel
+          %m = OpVariable %_ptr_Function__arr_mat4v4float_uint_4 Function
+               OpBranch %36
+         %36 = OpLabel
+               OpLoopMerge %37 %38 None
+               OpBranch %39
+         %39 = OpLabel
+         %40 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %41 = OpLoad %float %40
+         %42 = OpFOrdLessThan %bool %41 %float_10
+               OpSelectionMerge %43 None
+               OpBranchConditional %42 %44 %43
+         %44 = OpLabel
+               OpStore %_GLF_color %19
+               OpBranch %37
+         %43 = OpLabel
+               OpStore %m %34
+               OpBranch %45
+         %45 = OpLabel
+         %46 = OpPhi %int %int_0 %43 %47 %48
+         %49 = OpSLessThan %bool %46 %int_4
+               OpLoopMerge %50 %48 None
+               OpBranchConditional %49 %51 %50
+         %51 = OpLabel
+               OpBranch %52
+         %52 = OpLabel
+         %53 = OpPhi %int %int_0 %51 %54 %55
+         %56 = OpSLessThan %bool %53 %int_4
+               OpLoopMerge %57 %55 None
+               OpBranchConditional %56 %58 %57
+         %58 = OpLabel
+         %59 = OpSelect %int %56 %int_1 %int_0
+         %60 = OpAccessChain %_ptr_Function_float %m %59 %46 %uint_3
+         %61 = OpLoad %float %60
+         %62 = OpCompositeConstruct %v2float %61 %61
+         %63 = OpFDiv %v2float %30 %62
+         %64 = OpExtInst %float %1 Distance %30 %63
+         %65 = OpFOrdLessThan %bool %64 %float_1
+               OpSelectionMerge %66 None
+               OpBranchConditional %65 %67 %55
+         %67 = OpLabel
+               OpStore %_GLF_color %21
+               OpBranch %55
+         %66 = OpLabel
+               OpBranch %55
+         %55 = OpLabel
+         %54 = OpIAdd %int %53 %int_1
+               OpBranch %52
+         %57 = OpLabel
+               OpBranch %48
+         %48 = OpLabel
+         %47 = OpIAdd %int %46 %int_1
+               OpBranch %45
+         %50 = OpLabel
+               OpBranch %37
+         %38 = OpLabel
+               OpBranch %36
+         %37 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/ldexp-uint-exponent.asm.frag b/shaders-no-opt/asm/frag/ldexp-uint-exponent.asm.frag
new file mode 100644
index 00000000000..9baebc20f40
--- /dev/null
+++ b/shaders-no-opt/asm/frag/ldexp-uint-exponent.asm.frag
@@ -0,0 +1,36 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 20
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %_GLF_color
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %_GLF_color "_GLF_color"
+               OpDecorate %_GLF_color Location 0
+               OpDecorate %18 RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+ %_GLF_color = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+         %11 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+       %uint = OpTypeInt 32 0
+     %v4uint = OpTypeVector %uint 4
+     %uint_1 = OpConstant %uint 1
+         %15 = OpConstantComposite %v4uint %uint_1 %uint_1 %uint_1 %uint_1
+        %int = OpTypeInt 32 1
+      %v4int = OpTypeVector %int 4
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %18 = OpBitCount %v4uint %15
+         %19 = OpExtInst %v4float %1 Ldexp %11 %18
+               OpStore %_GLF_color %19
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/loop-merge-to-continue.asm.frag b/shaders-no-opt/asm/frag/loop-merge-to-continue.asm.invalid.frag
similarity index 100%
rename from shaders/asm/frag/loop-merge-to-continue.asm.frag
rename to shaders-no-opt/asm/frag/loop-merge-to-continue.asm.invalid.frag
diff --git a/shaders-no-opt/asm/frag/nonuniform-bracket-handling-2.vk.nocompat.asm.frag b/shaders-no-opt/asm/frag/nonuniform-bracket-handling-2.vk.nocompat.asm.frag
new file mode 100644
index 00000000000..ea85ed0b964
--- /dev/null
+++ b/shaders-no-opt/asm/frag/nonuniform-bracket-handling-2.vk.nocompat.asm.frag
@@ -0,0 +1,96 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 53
+; Schema: 0
+               OpCapability Shader
+               OpCapability ShaderNonUniform
+               OpCapability RuntimeDescriptorArray
+               OpCapability SampledImageArrayNonUniformIndexing
+               OpExtension "SPV_EXT_descriptor_indexing"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vUV %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_nonuniform_qualifier"
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uSamplers "uSamplers"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "indices"
+               OpName %_ ""
+               OpName %vUV "vUV"
+               OpName %uSampler "uSampler"
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uSamplers DescriptorSet 0
+               OpDecorate %uSamplers Binding 0
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO 0 NonWritable
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %26 NonUniform
+               OpDecorate %28 NonUniform
+               OpDecorate %29 NonUniform
+               OpDecorate %vUV Location 0
+               OpDecorate %uSampler DescriptorSet 1
+               OpDecorate %uSampler Binding 0
+               OpDecorate %38 NonUniform
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %10 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %11 = OpTypeSampledImage %10
+%_runtimearr_11 = OpTypeRuntimeArray %11
+%_ptr_UniformConstant__runtimearr_11 = OpTypePointer UniformConstant %_runtimearr_11
+  %uSamplers = OpVariable %_ptr_UniformConstant__runtimearr_11 UniformConstant
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+       %SSBO = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %int_10 = OpConstant %int 10
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %vUV = OpVariable %_ptr_Input_v2float Input
+    %float_0 = OpConstant %float 0
+   %uSampler = OpVariable %_ptr_UniformConstant_11 UniformConstant
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_1 = OpConstant %uint 1
+%_ptr_Input_float = OpTypePointer Input %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %24 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %int_10
+         %26 = OpLoad %uint %24
+         %28 = OpAccessChain %_ptr_UniformConstant_11 %uSamplers %26
+         %29 = OpLoad %11 %28
+         %33 = OpLoad %v2float %vUV
+         %35 = OpImageSampleExplicitLod %v4float %29 %33 Lod %float_0
+               OpStore %FragColor %35
+         %37 = OpLoad %11 %uSampler
+         %38 = OpCopyObject %11 %37
+         %39 = OpLoad %v2float %vUV
+         %44 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_1
+         %45 = OpLoad %float %44
+         %46 = OpConvertFToS %int %45
+         %47 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %46
+         %48 = OpLoad %uint %47
+         %49 = OpConvertUToF %float %48
+         %50 = OpImageSampleExplicitLod %v4float %38 %39 Lod %49
+         %51 = OpLoad %v4float %FragColor
+         %52 = OpFAdd %v4float %51 %50
+               OpStore %FragColor %52
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/nonuniform-ssbo.nocompat.vk.asm.frag b/shaders-no-opt/asm/frag/nonuniform-ssbo.nocompat.vk.asm.frag
new file mode 100644
index 00000000000..c5428a8bb9b
--- /dev/null
+++ b/shaders-no-opt/asm/frag/nonuniform-ssbo.nocompat.vk.asm.frag
@@ -0,0 +1,99 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 59
+; Schema: 0
+               OpCapability Shader
+               OpCapability ShaderNonUniform
+               OpCapability RuntimeDescriptorArray
+               OpCapability StorageBufferArrayNonUniformIndexing
+               OpExtension "SPV_EXT_descriptor_indexing"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vIndex %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_nonuniform_qualifier"
+               OpSourceExtension "GL_EXT_samplerless_texture_functions"
+               OpName %main "main"
+               OpName %i "i"
+               OpName %vIndex "vIndex"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "counter"
+               OpMemberName %SSBO 1 "v"
+               OpName %ssbos "ssbos"
+               OpName %FragColor "FragColor"
+               OpDecorate %vIndex Flat
+               OpDecorate %vIndex Location 0
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 16
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %ssbos DescriptorSet 0
+               OpDecorate %ssbos Binding 3
+               OpDecorate %32 NonUniform
+               OpDecorate %39 NonUniform
+               OpDecorate %49 NonUniform
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+%_ptr_Input_int = OpTypePointer Input %int
+     %vIndex = OpVariable %_ptr_Input_int Input
+       %uint = OpTypeInt 32 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+       %SSBO = OpTypeStruct %uint %_runtimearr_v4float
+%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
+%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO
+      %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform
+     %int_60 = OpConstant %int 60
+      %int_1 = OpConstant %int 1
+     %int_70 = OpConstant %int 70
+   %float_20 = OpConstant %float 20
+         %30 = OpConstantComposite %v4float %float_20 %float_20 %float_20 %float_20
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+    %int_100 = OpConstant %int 100
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+   %uint_100 = OpConstant %uint 100
+     %uint_1 = OpConstant %uint 1
+     %uint_0 = OpConstant %uint 0
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+     %uint_2 = OpConstant %uint 2
+%_ptr_Output_float = OpTypePointer Output %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+         %11 = OpLoad %int %vIndex
+               OpStore %i %11
+         %20 = OpLoad %int %i
+         %22 = OpIAdd %int %20 %int_60
+         %23 = OpCopyObject %int %22
+         %25 = OpLoad %int %i
+         %27 = OpIAdd %int %25 %int_70
+         %28 = OpCopyObject %int %27
+         %32 = OpAccessChain %_ptr_Uniform_v4float %ssbos %23 %int_1 %28
+               OpStore %32 %30
+         %33 = OpLoad %int %i
+         %35 = OpIAdd %int %33 %int_100
+         %36 = OpCopyObject %int %35
+         %39 = OpAccessChain %_ptr_Uniform_uint %ssbos %36 %int_0
+         %43 = OpAtomicIAdd %uint %39 %uint_1 %uint_0 %uint_100
+         %46 = OpLoad %int %i
+         %47 = OpCopyObject %int %46
+         %49 = OpAccessChain %_ptr_Uniform_SSBO %ssbos %47
+         %50 = OpArrayLength %uint %49 1
+         %51 = OpBitcast %int %50
+         %52 = OpConvertSToF %float %51
+         %55 = OpAccessChain %_ptr_Output_float %FragColor %uint_2
+         %56 = OpLoad %float %55
+         %57 = OpFAdd %float %56 %52
+         %58 = OpAccessChain %_ptr_Output_float %FragColor %uint_2
+               OpStore %58 %57
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/only-initializer-frag-depth.asm.frag b/shaders-no-opt/asm/frag/only-initializer-frag-depth.asm.frag
new file mode 100644
index 00000000000..17aab1d8f77
--- /dev/null
+++ b/shaders-no-opt/asm/frag/only-initializer-frag-depth.asm.frag
@@ -0,0 +1,25 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 10
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragDepth
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main DepthReplacing
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_FragDepth "gl_FragDepth"
+               OpDecorate %gl_FragDepth BuiltIn FragDepth
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Output_float = OpTypePointer Output %float
+  %float_0_5 = OpConstant %float 0.5
+%gl_FragDepth = OpVariable %_ptr_Output_float Output %float_0_5
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/phi.zero-initialize.asm.frag b/shaders-no-opt/asm/frag/phi.zero-initialize.asm.frag
new file mode 100644
index 00000000000..3696660d36d
--- /dev/null
+++ b/shaders-no-opt/asm/frag/phi.zero-initialize.asm.frag
@@ -0,0 +1,69 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 40
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vColor %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %vColor "vColor"
+               OpName %uninit_function_int "uninit_function_int"
+               OpName %FragColor "FragColor"
+               OpName %uninit_int "uninit_int"
+               OpName %uninit_vector "uninit_vector"
+               OpName %uninit_matrix "uninit_matrix"
+               OpName %Foo "Foo"
+               OpMemberName %Foo 0 "a"
+               OpName %uninit_foo "uninit_foo"
+               OpDecorate %vColor Location 0
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+     %vColor = OpVariable %_ptr_Input_v4float Input
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+   %float_10 = OpConstant %float 10
+       %bool = OpTypeBool
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+     %int_10 = OpConstant %int 10
+     %int_20 = OpConstant %int 20
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+%_ptr_Private_int = OpTypePointer Private %int
+ %uninit_int = OpUndef %int
+      %v4int = OpTypeVector %int 4
+%_ptr_Private_v4int = OpTypePointer Private %v4int
+%uninit_vector = OpUndef %v4int
+%mat4v4float = OpTypeMatrix %v4float 4
+%_ptr_Private_mat4v4float = OpTypePointer Private %mat4v4float
+%uninit_matrix = OpUndef %mat4v4float
+        %Foo = OpTypeStruct %int
+%_ptr_Private_Foo = OpTypePointer Private %Foo
+ %uninit_foo = OpUndef %Foo
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+%uninit_function_int = OpVariable %_ptr_Function_int Function
+         %13 = OpAccessChain %_ptr_Input_float %vColor %uint_0
+         %14 = OpLoad %float %13
+         %17 = OpFOrdGreaterThan %bool %14 %float_10
+               OpSelectionMerge %19 None
+               OpBranchConditional %17 %18 %24
+         %18 = OpLabel
+               OpBranch %19
+         %24 = OpLabel
+               OpBranch %19
+         %19 = OpLabel
+		 %27 = OpPhi %int %int_10 %18 %int_20 %24
+         %28 = OpLoad %v4float %vColor
+               OpStore %FragColor %28
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag
new file mode 100644
index 00000000000..ebd8d6bab75
--- /dev/null
+++ b/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag
@@ -0,0 +1,89 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+               OpReturn
+               OpFunctionEnd
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+               OpBeginInvocationInterlockEXT
+         %43 = OpFunctionCall %void %callee2_
+               OpEndInvocationInterlockEXT
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag
new file mode 100644
index 00000000000..69b8f911204
--- /dev/null
+++ b/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag
@@ -0,0 +1,121 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+
+               OpMemberDecorate %SSBO2 0 Offset 0
+               OpDecorate %SSBO2 BufferBlock
+               OpDecorate %ssbo2 DescriptorSet 0
+               OpDecorate %ssbo2 Binding 2
+
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+      %SSBO2 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+          %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+	  %uint_4 = OpConstant %uint 4
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+	%bool = OpTypeBool
+	%true = OpConstantTrue %bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+		 %callee3_res = OpFunctionCall %void %callee3_
+               OpReturn
+               OpFunctionEnd
+
+   %callee3_ = OpFunction %void None %3
+   	%calle3_block = OpLabel
+         %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %frag_coord_x = OpLoad %float %frag_coord_x_ptr
+         %frag_coord_int = OpConvertFToS %int %frag_coord_x
+         %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int
+		 OpStore %ssbo_ptr %uint_4
+	OpReturn
+	OpFunctionEnd
+
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+         %43 = OpFunctionCall %void %callee2_
+
+		 OpSelectionMerge %merged_block None
+		 OpBranchConditional %true %dummy_block %merged_block
+		 %dummy_block = OpLabel
+		 	OpBeginInvocationInterlockEXT
+		 	OpEndInvocationInterlockEXT
+			OpBranch %merged_block
+
+			%merged_block = OpLabel
+               OpReturn
+
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag b/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag
new file mode 100644
index 00000000000..7c0fe9a2b24
--- /dev/null
+++ b/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag
@@ -0,0 +1,102 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 45
+; Schema: 0
+               OpCapability Shader
+               OpCapability FragmentShaderPixelInterlockEXT
+               OpExtension "SPV_EXT_fragment_shader_interlock"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpExecutionMode %main PixelInterlockOrderedEXT
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_fragment_shader_interlock"
+               OpName %main "main"
+               OpName %callee2_ "callee2("
+               OpName %callee_ "callee("
+               OpName %SSBO1 "SSBO1"
+               OpMemberName %SSBO1 0 "values1"
+               OpName %_ ""
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %SSBO0 "SSBO0"
+               OpMemberName %SSBO0 0 "values0"
+               OpName %__0 ""
+               OpDecorate %_runtimearr_uint ArrayStride 4
+               OpMemberDecorate %SSBO1 0 Offset 0
+               OpDecorate %SSBO1 BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 1
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_runtimearr_uint_0 ArrayStride 4
+               OpMemberDecorate %SSBO0 0 Offset 0
+               OpDecorate %SSBO0 BufferBlock
+               OpDecorate %__0 DescriptorSet 0
+               OpDecorate %__0 Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_runtimearr_uint = OpTypeRuntimeArray %uint
+      %SSBO1 = OpTypeStruct %_runtimearr_uint
+%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1
+          %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_runtimearr_uint_0 = OpTypeRuntimeArray %uint
+      %SSBO0 = OpTypeStruct %_runtimearr_uint_0
+%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0
+        %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %44 = OpFunctionCall %void %callee_
+		 %call3res = OpFunctionCall %void %callee3_
+		 %call4res = OpFunctionCall %void %callee4_
+               OpReturn
+               OpFunctionEnd
+
+   %callee3_ = OpFunction %void None %3
+   	      %begin3 = OpLabel
+               OpBeginInvocationInterlockEXT
+			   OpReturn
+               OpFunctionEnd
+
+   %callee4_ = OpFunction %void None %3
+   	      %begin4 = OpLabel
+               OpEndInvocationInterlockEXT
+			   OpReturn
+               OpFunctionEnd
+
+   %callee2_ = OpFunction %void None %3
+          %7 = OpLabel
+         %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %24 = OpLoad %float %23
+         %25 = OpConvertFToS %int %24
+         %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+         %29 = OpLoad %uint %28
+         %30 = OpIAdd %uint %29 %uint_1
+         %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25
+               OpStore %31 %30
+               OpReturn
+               OpFunctionEnd
+    %callee_ = OpFunction %void None %3
+          %9 = OpLabel
+         %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %37 = OpLoad %float %36
+         %38 = OpConvertFToS %int %37
+         %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+         %40 = OpLoad %uint %39
+         %41 = OpIAdd %uint %40 %uint_1
+         %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38
+               OpStore %42 %41
+         %43 = OpFunctionCall %void %callee2_
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporaries.asm.frag b/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporaries.asm.frag
new file mode 100644
index 00000000000..bccea17b392
--- /dev/null
+++ b/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporaries.asm.frag
@@ -0,0 +1,97 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 52
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vColor %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %a "a"
+               OpName %vColor "vColor"
+               OpName %b "b"
+               OpName %i "i"
+               OpName %FragColor "FragColor"
+               OpDecorate %a RelaxedPrecision
+               OpDecorate %vColor RelaxedPrecision
+               OpDecorate %vColor Location 0
+               OpDecorate %16 RelaxedPrecision
+               OpDecorate %20 RelaxedPrecision
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %37 RelaxedPrecision
+               OpDecorate %38 RelaxedPrecision
+               OpDecorate %39 RelaxedPrecision
+               OpDecorate %43 RelaxedPrecision
+               OpDecorate %44 RelaxedPrecision
+               OpDecorate %45 RelaxedPrecision
+               OpDecorate %49 RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+     %vColor = OpVariable %_ptr_Input_v4float Input
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+       %bool = OpTypeBool
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+      %int_1 = OpConstant %int 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %a = OpVariable %_ptr_Function_float Function
+          %b = OpVariable %_ptr_Function_float Function
+          %i = OpVariable %_ptr_Function_int Function
+         %15 = OpAccessChain %_ptr_Input_float %vColor %uint_0
+         %16 = OpLoad %float %15
+               OpStore %a %16
+         %19 = OpAccessChain %_ptr_Input_float %vColor %uint_1
+         %20 = OpLoad %float %19
+               OpStore %b %20
+               OpStore %i %int_0
+               OpBranch %25
+         %25 = OpLabel
+               OpLoopMerge %27 %28 None
+               OpBranch %29
+         %29 = OpLabel
+         %30 = OpLoad %int %i
+         %33 = OpSLessThan %bool %30 %int_4
+               OpBranchConditional %33 %26 %27
+         %26 = OpLabel
+         %37 = OpLoad %v4float %FragColor
+         %38 = OpCompositeConstruct %v4float %float_1 %float_1 %float_1 %float_1
+         %39 = OpFAdd %v4float %37 %38
+               OpStore %FragColor %39
+               OpBranch %28
+         %28 = OpLabel
+         %40 = OpLoad %int %i
+         %42 = OpIAdd %int %40 %int_1
+               OpStore %i %42
+         %43 = OpLoad %float %a
+         %44 = OpLoad %float %a
+         %45 = OpFMul %float %43 %44
+         %force_tmp = OpFMul %float %45 %44
+         %46 = OpLoad %float %b
+         %47 = OpFAdd %float %46 %force_tmp
+               OpStore %b %47
+               OpBranch %25
+         %27 = OpLabel
+         %48 = OpLoad %float %b
+         %49 = OpLoad %v4float %FragColor
+         %50 = OpCompositeConstruct %v4float %48 %48 %48 %48
+         %51 = OpFAdd %v4float %49 %50
+               OpStore %FragColor %51
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporary.asm.frag b/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporary.asm.frag
new file mode 100644
index 00000000000..42d72dc410c
--- /dev/null
+++ b/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules-hoisted-temporary.asm.frag
@@ -0,0 +1,47 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vColor %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %b "b"
+               OpName %vColor "vColor"
+               OpName %FragColor "FragColor"
+               OpDecorate %b RelaxedPrecision
+               OpDecorate %vColor RelaxedPrecision
+               OpDecorate %vColor Location 0
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Input_float = OpTypePointer Input %float
+     %vColor = OpVariable %_ptr_Input_float Input
+       %bool = OpTypeBool
+      %false = OpConstantFalse %bool
+%_ptr_Output_float = OpTypePointer Output %float
+  %FragColor = OpVariable %_ptr_Output_float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpBranch %6
+          %6 = OpLabel
+               OpLoopMerge %8 %9 None
+               OpBranch %7
+          %7 = OpLabel
+		  	%15 = OpLoad %float %vColor
+          %b = OpFMul %float %15 %15
+               OpBranch %9
+          %9 = OpLabel
+               OpBranchConditional %false %6 %8
+          %8 = OpLabel
+         %bb = OpFMul %float %b %b
+               OpStore %FragColor %bb
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules.asm.frag b/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules.asm.frag
new file mode 100644
index 00000000000..ad526e5560f
--- /dev/null
+++ b/shaders-no-opt/asm/frag/relaxed-precision-inheritance-rules.asm.frag
@@ -0,0 +1,146 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 15
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor0 %FragColor1 %FragColor2 %FragColor3 %V4
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 320
+               OpName %main "main"
+               OpName %FragColor0 "FragColor0"
+               OpName %FragColor1 "FragColor1"
+               OpName %FragColor2 "FragColor2"
+               OpName %FragColor3 "FragColor3"
+               OpName %V4 "V4"
+			   OpName %V4_value0 "V4_value0"
+			   OpName %V1_value0 "V1_value0"
+			   OpName %V1_value1 "V1_value1"
+			   OpName %V1_value2 "V1_value2"
+			   OpName %float_0_weird "float_0_weird"
+			   OpName %ubo "ubo"
+			   OpName %ubo_mp0 "ubo_mp0"
+			   OpName %ubo_hp0 "ubo_hp0"
+			   OpName %block "UBO"
+			   OpName %phi_mp "phi_mp"
+			   OpName %phi_hp "phi_hp"
+			   OpMemberName %block 0 "mediump_float"
+			   OpMemberName %block 1 "highp_float"
+               OpDecorate %FragColor0 RelaxedPrecision
+               OpDecorate %FragColor0 Location 0
+               OpDecorate %FragColor1 RelaxedPrecision
+               OpDecorate %FragColor1 Location 1
+               OpDecorate %FragColor2 RelaxedPrecision
+               OpDecorate %FragColor2 Location 2
+               OpDecorate %FragColor3 RelaxedPrecision
+               OpDecorate %FragColor3 Location 3
+               OpDecorate %V4 RelaxedPrecision
+               OpDecorate %V4 Location 0
+			   OpDecorate %V4_add RelaxedPrecision
+			   OpDecorate %V4_mul RelaxedPrecision
+			   OpDecorate %V1_add RelaxedPrecision
+			   OpDecorate %V1_mul RelaxedPrecision
+			   OpDecorate %phi_mp RelaxedPrecision
+			   OpDecorate %mp_to_mp RelaxedPrecision
+			   OpDecorate %hp_to_mp RelaxedPrecision
+			   OpDecorate %V1_add_composite RelaxedPrecision
+			   OpDecorate %V1_mul_composite RelaxedPrecision
+			   OpDecorate %V4_sin1 RelaxedPrecision
+			   OpDecorate %float_0_weird RelaxedPrecision
+			   OpDecorate %ubo Binding 0
+			   OpDecorate %ubo DescriptorSet 0
+			   OpDecorate %block Block
+			   OpMemberDecorate %block 0 Offset 0
+			   OpMemberDecorate %block 1 Offset 4
+			   OpMemberDecorate %block 0 RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+	  %block = OpTypeStruct %float %float
+	  %block_ptr = OpTypePointer Uniform %block
+	  %ubo_float_ptr = OpTypePointer Uniform %float
+	  %ubo = OpVariable %block_ptr Uniform
+      %uint = OpTypeInt 32 0
+	  %uint_0 = OpConstant %uint 0
+	  %uint_1 = OpConstant %uint 1
+	  %uint_2 = OpConstant %uint 2
+	  %uint_3 = OpConstant %uint 3
+	  %float_3 = OpConstant %float 3.0
+    %v4float = OpTypeVector %float 4
+	  %float_3_splat = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor0 = OpVariable %_ptr_Output_v4float Output
+  %FragColor1 = OpVariable %_ptr_Output_v4float Output
+  %FragColor2 = OpVariable %_ptr_Output_v4float Output
+  %FragColor3 = OpVariable %_ptr_Output_v4float Output
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Input_float = OpTypePointer Input %float
+         %V4 = OpVariable %_ptr_Input_v4float Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+
+		; Inherits precision in GLSL
+         %V4_value0 = OpLoad %v4float %V4
+
+		; Inherits precision in GLSL
+		 %ptr_V4x = OpAccessChain %_ptr_Input_float %V4 %uint_0
+
+		; Inherits precision in GLSL
+         %V1_value0 = OpLoad %float %ptr_V4x
+         %V1_value1 = OpCompositeExtract %float %V4_value0 2
+         %V1_value2 = OpCopyObject %float %V1_value1
+
+		 %mp_ptr = OpAccessChain %ubo_float_ptr %ubo %uint_0
+		 %hp_ptr = OpAccessChain %ubo_float_ptr %ubo %uint_1
+		 %ubo_mp0 = OpLoad %float %mp_ptr
+		 %ubo_hp0 = OpLoad %float %hp_ptr
+
+		; Stays mediump
+         %V4_add = OpFAdd %v4float %V4_value0 %float_3_splat
+		; Must promote to highp
+         %V4_sub = OpFSub %v4float %V4_value0 %float_3_splat
+		; Relaxed, truncate inputs.
+         %V4_mul = OpFMul %v4float %V4_sub %float_3_splat
+		 OpStore %FragColor0 %V4_add
+		 OpStore %FragColor1 %V4_sub
+		 OpStore %FragColor2 %V4_mul
+
+		; Same as V4 tests.
+         %V1_add = OpFAdd %float %V1_value0 %float_3
+		 %float_0_weird = OpFSub %float %float_3 %ubo_hp0
+         %V1_sub = OpFSub %float %V1_value0 %float_0_weird
+         %V1_mul = OpFMul %float %V1_sub %ubo_hp0
+		 %V1_result = OpCompositeConstruct %v4float %V1_add %V1_sub %V1_mul %float_3
+		 OpStore %FragColor3 %V1_result
+
+		; Same as V4 tests, but composite forwarding.
+         %V1_add_composite = OpFAdd %float %V1_value1 %ubo_mp0
+         %V1_sub_composite = OpFSub %float %V1_value2 %ubo_mp0
+         %V1_mul_composite = OpFMul %float %V1_sub_composite %ubo_hp0
+		 %V1_result_composite = OpCompositeConstruct %v4float %V1_add_composite %V1_sub_composite %V1_mul_composite %float_3
+		 OpStore %FragColor3 %V1_result_composite
+
+		 ; Must promote input to highp.
+		 %V4_sin0 = OpExtInst %v4float %1 Sin %V4_value0
+		 OpStore %FragColor0 %V4_sin0
+		 ; Can keep mediump input.
+		 %V4_sin1 = OpExtInst %v4float %1 Sin %V4_value0
+		 OpStore %FragColor1 %V4_sin1
+
+		OpBranch %next
+		%next = OpLabel
+			%phi_mp = OpPhi %float %V1_add %5
+			%phi_hp = OpPhi %float %V1_sub %5
+
+			; Consume PHIs in different precision contexts
+			%mp_to_mp = OpFAdd %float %phi_mp %phi_mp
+			%mp_to_hp = OpFAdd %float %phi_mp %phi_mp
+			%hp_to_mp = OpFAdd %float %phi_hp %phi_hp
+			%hp_to_hp = OpFAdd %float %phi_hp %phi_hp
+			%complete = OpCompositeConstruct %v4float %mp_to_mp %mp_to_hp %hp_to_mp %hp_to_hp
+			OpStore %FragColor2 %complete
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/reserved-function-identifier.asm.frag b/shaders-no-opt/asm/frag/reserved-function-identifier.asm.frag
new file mode 100644
index 00000000000..a5a16f2873b
--- /dev/null
+++ b/shaders-no-opt/asm/frag/reserved-function-identifier.asm.frag
@@ -0,0 +1,60 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %ACOS_f1_ "mat3"
+               OpName %a "a"
+               OpName %ACOS_i1_ "gl_Foo"
+               OpName %a_0 "a"
+               OpName %FragColor "FragColor"
+               OpName %param "param"
+               OpName %param_0 "param"
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+%_ptr_Function_float = OpTypePointer Function %float
+          %8 = OpTypeFunction %float %_ptr_Function_float
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+         %14 = OpTypeFunction %float %_ptr_Function_int
+    %float_1 = OpConstant %float 1
+%_ptr_Output_float = OpTypePointer Output %float
+  %FragColor = OpVariable %_ptr_Output_float Output
+    %float_2 = OpConstant %float 2
+      %int_4 = OpConstant %int 4
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+      %param = OpVariable %_ptr_Function_float Function
+    %param_0 = OpVariable %_ptr_Function_int Function
+               OpStore %param %float_2
+         %32 = OpFunctionCall %float %ACOS_f1_ %param
+               OpStore %param_0 %int_4
+         %35 = OpFunctionCall %float %ACOS_i1_ %param_0
+         %36 = OpFAdd %float %32 %35
+               OpStore %FragColor %36
+               OpReturn
+               OpFunctionEnd
+   %ACOS_f1_ = OpFunction %float None %8
+          %a = OpFunctionParameter %_ptr_Function_float
+         %11 = OpLabel
+         %18 = OpLoad %float %a
+         %20 = OpFAdd %float %18 %float_1
+               OpReturnValue %20
+               OpFunctionEnd
+   %ACOS_i1_ = OpFunction %float None %14
+        %a_0 = OpFunctionParameter %_ptr_Function_int
+         %17 = OpLabel
+         %23 = OpLoad %int %a_0
+         %24 = OpConvertSToF %float %23
+         %25 = OpFAdd %float %24 %float_1
+               OpReturnValue %25
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/reserved-identifiers.asm.frag b/shaders-no-opt/asm/frag/reserved-identifiers.asm.frag
new file mode 100644
index 00000000000..5015cef82af
--- /dev/null
+++ b/shaders-no-opt/asm/frag/reserved-identifiers.asm.frag
@@ -0,0 +1,51 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 24
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %spvFoo %SPIRV_Cross_blah %_40 %_m40 %_underscore_foo_bar_meep
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %spvFoo "spvFoo"
+               OpName %SPIRV_Cross_blah "SPIRV_Cross_blah"
+               OpName %_40 "_40Bar"
+               OpName %_m40 "_m40"
+               OpName %_underscore_foo_bar_meep "__underscore_foo__bar_meep__"
+               OpDecorate %spvFoo Location 0
+               OpDecorate %SPIRV_Cross_blah Location 1
+               OpDecorate %_40 Location 2
+               OpDecorate %_m40 Location 3
+               OpDecorate %_underscore_foo_bar_meep Location 4
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+     %spvFoo = OpVariable %_ptr_Output_v4float Output
+    %float_0 = OpConstant %float 0
+         %11 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+%SPIRV_Cross_blah = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+         %14 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+        %_40 = OpVariable %_ptr_Output_v4float Output
+    %float_2 = OpConstant %float 2
+         %17 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+       %_m40 = OpVariable %_ptr_Output_v4float Output
+    %float_3 = OpConstant %float 3
+         %20 = OpConstantComposite %v4float %float_3 %float_3 %float_3 %float_3
+%_underscore_foo_bar_meep = OpVariable %_ptr_Output_v4float Output
+    %float_4 = OpConstant %float 4
+         %23 = OpConstantComposite %v4float %float_4 %float_4 %float_4 %float_4
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpStore %spvFoo %11
+               OpStore %SPIRV_Cross_blah %14
+               OpStore %_40 %17
+               OpStore %_m40 %20
+               OpStore %_underscore_foo_bar_meep %23
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/scalar-select.spv14.asm.frag b/shaders-no-opt/asm/frag/scalar-select.spv14.asm.frag
new file mode 100644
index 00000000000..07450ee80b6
--- /dev/null
+++ b/shaders-no-opt/asm/frag/scalar-select.spv14.asm.frag
@@ -0,0 +1,62 @@
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %bool = OpTypeBool
+      %false = OpConstantFalse %bool
+	  %uint = OpTypeInt 32 0
+	  %uint_1 = OpConstant %uint 1
+	  %uint_2 = OpConstant %uint 2
+      %true = OpConstantTrue %bool
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+		 %s = OpTypeStruct %float
+		 %arr = OpTypeArray %float %uint_2
+%_ptr_Function_s = OpTypePointer Function %s
+%_ptr_Function_arr = OpTypePointer Function %arr
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+         %17 = OpConstantComposite %v4float %float_1 %float_1 %float_0 %float_1
+         %18 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
+         %19 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+         %20 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+		 %s0 = OpConstantComposite %s %float_0
+		 %s1 = OpConstantComposite %s %float_1
+     %v4bool = OpTypeVector %bool 4
+	 	%b4	= OpConstantComposite %v4bool %false %true %false %true
+		%arr1 = OpConstantComposite %arr %float_0 %float_1
+		%arr2 = OpConstantComposite %arr %float_1 %float_0
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+		  %ss = OpVariable %_ptr_Function_s Function
+		  %arrvar = OpVariable %_ptr_Function_arr Function
+		  ; Not trivial
+         %21 = OpSelect %v4float %false %17 %18
+               OpStore %FragColor %21
+		  ; Trivial
+         %22 = OpSelect %v4float %false %19 %20
+               OpStore %FragColor %22
+			; Vector not trivial
+         %23 = OpSelect %v4float %b4 %17 %18
+               OpStore %FragColor %23
+			; Vector trivial
+         %24 = OpSelect %v4float %b4 %19 %20
+               OpStore %FragColor %24
+		  ; Struct selection
+         %sout = OpSelect %s %false %s0 %s1
+               OpStore %ss %sout
+		; Array selection
+         %arrout = OpSelect %arr %true %arr1 %arr2
+               OpStore %arrvar %arrout
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/selection-merge-to-continue.asm.frag b/shaders-no-opt/asm/frag/selection-merge-to-continue.asm.invalid.frag
similarity index 100%
rename from shaders/asm/frag/selection-merge-to-continue.asm.frag
rename to shaders-no-opt/asm/frag/selection-merge-to-continue.asm.invalid.frag
diff --git a/shaders-no-opt/asm/frag/sparse-texture-feedback-uint-code.asm.desktop.frag b/shaders-no-opt/asm/frag/sparse-texture-feedback-uint-code.asm.desktop.frag
new file mode 100644
index 00000000000..9b2eb72899e
--- /dev/null
+++ b/shaders-no-opt/asm/frag/sparse-texture-feedback-uint-code.asm.desktop.frag
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+               OpCapability SparseResidency
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vUV %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_sparse_texture2"
+               OpSourceExtension "GL_ARB_sparse_texture_clamp"
+               OpName %main "main"
+               OpName %ret "ret"
+               OpName %uSamp "uSamp"
+               OpName %vUV "vUV"
+               OpName %texel "texel"
+               OpName %ResType "ResType"
+               OpName %FragColor "FragColor"
+               OpDecorate %uSamp DescriptorSet 0
+               OpDecorate %uSamp Binding 0
+               OpDecorate %vUV Location 0
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %bool = OpTypeBool
+%_ptr_Function_bool = OpTypePointer Function %bool
+      %float = OpTypeFloat 32
+         %10 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %11 = OpTypeSampledImage %10
+%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11
+      %uSamp = OpVariable %_ptr_UniformConstant_11 UniformConstant
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %vUV = OpVariable %_ptr_Input_v2float Input
+    %v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+        %uint = OpTypeInt 32 0
+    %ResType = OpTypeStruct %uint %v4float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %ret = OpVariable %_ptr_Function_bool Function
+      %texel = OpVariable %_ptr_Function_v4float Function
+         %14 = OpLoad %11 %uSamp
+         %18 = OpLoad %v2float %vUV
+         %24 = OpImageSparseSampleImplicitLod %ResType %14 %18
+         %25 = OpCompositeExtract %v4float %24 1
+               OpStore %texel %25
+         %26 = OpCompositeExtract %uint %24 0
+         %27 = OpImageSparseTexelsResident %bool %26
+               OpStore %ret %27
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/subgroup-arithmetic-cast.nocompat.vk.asm.frag b/shaders-no-opt/asm/frag/subgroup-arithmetic-cast.nocompat.vk.asm.frag
new file mode 100644
index 00000000000..a47c6b785f4
--- /dev/null
+++ b/shaders-no-opt/asm/frag/subgroup-arithmetic-cast.nocompat.vk.asm.frag
@@ -0,0 +1,65 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 78
+; Schema: 0
+               OpCapability Shader
+               OpCapability GroupNonUniform
+               OpCapability GroupNonUniformArithmetic
+               OpCapability GroupNonUniformClustered
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %index %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_KHR_shader_subgroup_arithmetic"
+               OpSourceExtension "GL_KHR_shader_subgroup_basic"
+               OpSourceExtension "GL_KHR_shader_subgroup_clustered"
+               OpName %main "main"
+               OpName %index "index"
+               OpName %FragColor "FragColor"
+               OpDecorate %index Flat
+               OpDecorate %index Location 0
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+     %uint_0 = OpConstant %uint 0
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+      %index = OpVariable %_ptr_Input_int Input
+     %uint_3 = OpConstant %uint 3
+     %uint_4 = OpConstant %uint 4
+%_ptr_Output_uint = OpTypePointer Output %uint
+  %FragColor = OpVariable %_ptr_Output_uint Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %i = OpLoad %int %index
+         %u = OpBitcast %uint %i
+         %res0 = OpGroupNonUniformSMin %uint %uint_3 Reduce %i
+         %res1 = OpGroupNonUniformSMax %uint %uint_3 Reduce %u
+         %res2 = OpGroupNonUniformUMin %uint %uint_3 Reduce %i
+         %res3 = OpGroupNonUniformUMax %uint %uint_3 Reduce %u
+         %res4 = OpGroupNonUniformSMax %uint %uint_3 InclusiveScan %i
+         %res5 = OpGroupNonUniformSMin %uint %uint_3 InclusiveScan %u
+         %res6 = OpGroupNonUniformUMax %uint %uint_3 ExclusiveScan %i
+         %res7 = OpGroupNonUniformUMin %uint %uint_3 ExclusiveScan %u
+         %res8 = OpGroupNonUniformSMin %uint %uint_3 ClusteredReduce %i %uint_4
+         %res9 = OpGroupNonUniformSMax %uint %uint_3 ClusteredReduce %u %uint_4
+         %res10 = OpGroupNonUniformUMin %uint %uint_3 ClusteredReduce %i %uint_4
+         %res11 = OpGroupNonUniformUMax %uint %uint_3 ClusteredReduce %u %uint_4
+               OpStore %FragColor %res0
+               OpStore %FragColor %res1
+               OpStore %FragColor %res2
+               OpStore %FragColor %res3
+               OpStore %FragColor %res4
+               OpStore %FragColor %res5
+               OpStore %FragColor %res6
+               OpStore %FragColor %res7
+               OpStore %FragColor %res8
+               OpStore %FragColor %res9
+               OpStore %FragColor %res10
+               OpStore %FragColor %res11
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag b/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag
new file mode 100644
index 00000000000..39f4d066db8
--- /dev/null
+++ b/shaders-no-opt/asm/frag/subgroup-ballot-only.vk.asm.frag
@@ -0,0 +1,52 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Unknown(30017); 21022
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+               OpCapability GroupNonUniformBallot
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %INDEX %SV_Target
+               OpExecutionMode %main OriginUpperLeft
+               OpName %main "main"
+               OpName %INDEX "INDEX"
+               OpName %SV_Target "SV_Target"
+               OpDecorate %INDEX Flat
+               OpDecorate %INDEX Location 0
+               OpDecorate %SV_Target Location 0
+       %void = OpTypeVoid
+          %2 = OpTypeFunction %void
+       %uint = OpTypeInt 32 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+      %INDEX = OpVariable %_ptr_Input_uint Input
+     %v4uint = OpTypeVector %uint 4
+%_ptr_Output_v4uint = OpTypePointer Output %v4uint
+  %SV_Target = OpVariable %_ptr_Output_v4uint Output
+       %bool = OpTypeBool
+   %uint_100 = OpConstant %uint 100
+     %uint_3 = OpConstant %uint 3
+%_ptr_Output_uint = OpTypePointer Output %uint
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+       %main = OpFunction %void None %2
+          %4 = OpLabel
+               OpBranch %29
+         %29 = OpLabel
+         %11 = OpLoad %uint %INDEX
+         %13 = OpULessThan %bool %11 %uint_100
+         %15 = OpGroupNonUniformBallot %v4uint %uint_3 %13
+         %17 = OpCompositeExtract %uint %15 0
+         %18 = OpCompositeExtract %uint %15 1
+         %19 = OpCompositeExtract %uint %15 2
+         %20 = OpCompositeExtract %uint %15 3
+         %22 = OpAccessChain %_ptr_Output_uint %SV_Target %uint_0
+               OpStore %22 %17
+         %24 = OpAccessChain %_ptr_Output_uint %SV_Target %uint_1
+               OpStore %24 %18
+         %26 = OpAccessChain %_ptr_Output_uint %SV_Target %uint_2
+               OpStore %26 %19
+         %28 = OpAccessChain %_ptr_Output_uint %SV_Target %uint_3
+               OpStore %28 %20
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag
similarity index 100%
rename from shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag
rename to shaders-no-opt/asm/frag/switch-block-case-fallthrough.asm.invalid.frag
diff --git a/shaders/asm/frag/switch-merge-to-continue.asm.frag b/shaders-no-opt/asm/frag/switch-merge-to-continue.asm.invalid.frag
similarity index 100%
rename from shaders/asm/frag/switch-merge-to-continue.asm.frag
rename to shaders-no-opt/asm/frag/switch-merge-to-continue.asm.invalid.frag
diff --git a/shaders-no-opt/asm/frag/switch-non-default-fallthrough-no-phi.asm.frag b/shaders-no-opt/asm/frag/switch-non-default-fallthrough-no-phi.asm.frag
new file mode 100644
index 00000000000..dd9a5a97b0f
--- /dev/null
+++ b/shaders-no-opt/asm/frag/switch-non-default-fallthrough-no-phi.asm.frag
@@ -0,0 +1,105 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Google rspirv; 0
+; Bound: 80
+; Schema: 0
+               OpCapability Shader
+               OpCapability VulkanMemoryModel
+               OpExtension "SPV_KHR_vulkan_memory_model"
+               OpMemoryModel Logical Vulkan
+               OpEntryPoint Fragment %1 "main" %2 %3
+               OpExecutionMode %1 OriginUpperLeft
+               OpMemberDecorate %_struct_14 0 Offset 0
+               OpMemberDecorate %_struct_14 1 Offset 4
+               OpMemberDecorate %_struct_15 0 Offset 0
+               OpMemberDecorate %_struct_15 1 Offset 4
+               OpDecorate %2 Location 0
+               OpDecorate %3 Location 0
+			   OpDecorate %2 Flat
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+       %bool = OpTypeBool
+%_ptr_Input_int = OpTypePointer Input %int
+%_ptr_Output_int = OpTypePointer Output %int
+%_ptr_Function_int = OpTypePointer Function %int
+       %void = OpTypeVoid
+ %_struct_14 = OpTypeStruct %uint %int
+ %_struct_15 = OpTypeStruct %int %int
+%_ptr_Function__struct_15 = OpTypePointer Function %_struct_15
+         %24 = OpTypeFunction %void
+          %2 = OpVariable %_ptr_Input_int Input
+          %3 = OpVariable %_ptr_Output_int Output
+     %uint_1 = OpConstant %uint 1
+         %26 = OpUndef %_struct_14
+     %uint_0 = OpConstant %uint 0
+      %int_0 = OpConstant %int 0
+     %int_10 = OpConstant %int 10
+       %true = OpConstantTrue %bool
+         %31 = OpUndef %int
+      %false = OpConstantFalse %bool
+%_ptr_Function_bool = OpTypePointer Function %bool
+          %1 = OpFunction %void None %24
+         %32 = OpLabel
+         %76 = OpVariable %_ptr_Function_bool Function %false
+         %33 = OpVariable %_ptr_Function__struct_15 Function
+         %34 = OpVariable %_ptr_Function_int Function
+         %35 = OpVariable %_ptr_Function_int Function
+               OpSelectionMerge %72 None
+               OpSwitch %uint_0 %73
+         %73 = OpLabel
+         %36 = OpLoad %int %2
+         %37 = OpAccessChain %_ptr_Function_int %33 %uint_0
+               OpStore %37 %int_0
+         %38 = OpAccessChain %_ptr_Function_int %33 %uint_1
+               OpStore %38 %int_10
+               OpBranch %40
+         %40 = OpLabel
+         %41 = OpPhi %_struct_14 %26 %73 %42 %43
+         %44 = OpPhi %int %int_0 %73 %45 %43
+               OpLoopMerge %48 %43 None
+               OpBranch %49
+         %49 = OpLabel
+         %52 = OpLoad %int %37
+         %53 = OpLoad %int %38
+         %54 = OpSLessThan %bool %52 %53
+               OpSelectionMerge %55 None
+               OpBranchConditional %54 %56 %57
+         %57 = OpLabel
+         %65 = OpCompositeInsert %_struct_14 %uint_0 %41 0
+               OpBranch %55
+         %56 = OpLabel
+         %59 = OpLoad %int %37
+         %60 = OpBitcast %int %uint_1
+         %61 = OpIAdd %int %59 %60
+               OpCopyMemory %34 %37
+         %63 = OpLoad %int %34
+               OpStore %35 %61
+               OpCopyMemory %37 %35
+         %64 = OpCompositeConstruct %_struct_14 %uint_1 %63
+               OpBranch %55
+         %55 = OpLabel
+         %42 = OpPhi %_struct_14 %64 %56 %65 %57
+         %66 = OpCompositeExtract %uint %42 0
+         %67 = OpBitcast %int %66
+               OpSelectionMerge %71 None
+               OpSwitch %67 %69 0 %70 1 %71
+         %71 = OpLabel
+         %45 = OpIAdd %int %44 %36
+               OpBranch %43
+         %70 = OpLabel
+               OpStore %3 %44
+               OpStore %76 %true
+               OpBranch %48
+         %69 = OpLabel
+               OpBranch %48
+         %43 = OpLabel
+               OpBranch %40
+         %48 = OpLabel
+         %79 = OpPhi %bool %false %69 %true %70
+               OpSelectionMerge %77 None
+               OpBranchConditional %79 %72 %77
+         %77 = OpLabel
+               OpBranch %72
+         %72 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/switch-single-case-multiple-exit-cfg.asm.frag b/shaders-no-opt/asm/frag/switch-single-case-multiple-exit-cfg.asm.frag
new file mode 100644
index 00000000000..d2bd15a9785
--- /dev/null
+++ b/shaders-no-opt/asm/frag/switch-single-case-multiple-exit-cfg.asm.frag
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 54
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord %_GLF_color
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %_GLF_color "_GLF_color"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_GLF_color Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+       %bool = OpTypeBool
+    %v2float = OpTypeVector %float 2
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+ %_GLF_color = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+         %52 = OpUndef %v2float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpSelectionMerge %9 None
+               OpSwitch %int_0 %8
+          %8 = OpLabel
+         %17 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %18 = OpLoad %float %17
+         %22 = OpFOrdNotEqual %bool %18 %18
+               OpSelectionMerge %24 None
+               OpBranchConditional %22 %23 %24
+         %23 = OpLabel
+               OpBranch %9
+         %24 = OpLabel
+         %33 = OpCompositeExtract %float %52 1
+         %51 = OpCompositeInsert %v2float %33 %52 1
+               OpBranch %9
+          %9 = OpLabel
+         %53 = OpPhi %v2float %52 %23 %51 %24
+         %42 = OpCompositeExtract %float %53 0
+         %43 = OpCompositeExtract %float %53 1
+         %48 = OpCompositeConstruct %v4float %42 %43 %float_1 %float_1
+               OpStore %_GLF_color %48
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/terminate-impure-function-call.spv16.asm.frag b/shaders-no-opt/asm/frag/terminate-impure-function-call.spv16.asm.frag
new file mode 100644
index 00000000000..d596e4efe72
--- /dev/null
+++ b/shaders-no-opt/asm/frag/terminate-impure-function-call.spv16.asm.frag
@@ -0,0 +1,59 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 34
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vA %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %foobar_i1_ "foobar(i1;"
+               OpName %a "a"
+               OpName %a_0 "a"
+               OpName %vA "vA"
+               OpName %param "param"
+               OpName %FragColor "FragColor"
+               OpDecorate %vA Flat
+               OpDecorate %vA Location 0
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+         %10 = OpTypeFunction %v4float %_ptr_Function_int
+      %int_0 = OpConstant %int 0
+       %bool = OpTypeBool
+   %float_10 = OpConstant %float 10
+         %22 = OpConstantComposite %v4float %float_10 %float_10 %float_10 %float_10
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_int = OpTypePointer Input %int
+         %vA = OpVariable %_ptr_Input_int Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %a_0 = OpVariable %_ptr_Function_v4float Function
+      %param = OpVariable %_ptr_Function_int Function
+         %30 = OpLoad %int %vA
+               OpStore %param %30
+         %31 = OpFunctionCall %v4float %foobar_i1_ %param
+               OpStore %FragColor %22
+               OpReturn
+               OpFunctionEnd
+ %foobar_i1_ = OpFunction %v4float None %10
+          %a = OpFunctionParameter %_ptr_Function_int
+         %13 = OpLabel
+         %14 = OpLoad %int %a
+         %17 = OpSLessThan %bool %14 %int_0
+               OpSelectionMerge %19 None
+               OpBranchConditional %17 %18 %19
+         %18 = OpLabel
+               OpTerminateInvocation
+         %19 = OpLabel
+               OpReturnValue %22
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/texel-fetch-ms-uint-sample.asm.frag b/shaders-no-opt/asm/frag/texel-fetch-ms-uint-sample.asm.frag
new file mode 100644
index 00000000000..6f198079ff7
--- /dev/null
+++ b/shaders-no-opt/asm/frag/texel-fetch-ms-uint-sample.asm.frag
@@ -0,0 +1,68 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 61
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %gl_FragCoord
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uSamp "uSamp"
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uSamp DescriptorSet 0
+               OpDecorate %uSamp Binding 0
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %10 = OpTypeImage %float 2D 0 0 1 1 Unknown
+         %11 = OpTypeSampledImage %10
+%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11
+      %uSamp = OpVariable %_ptr_UniformConstant_11 UniformConstant
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+    %v2float = OpTypeVector %float 2
+       %uint = OpTypeInt 32 0
+       %int = OpTypeInt 32 1
+	   %v2int = OpTypeVector %int 2
+     %uint_0 = OpConstant %uint 0
+%_ptr_Output_float = OpTypePointer Output %float
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+     %uint_3 = OpConstant %uint 3
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %14 = OpLoad %11 %uSamp
+         %18 = OpLoad %v4float %gl_FragCoord
+         %19 = OpVectorShuffle %v2float %18 %18 0 1
+         %22 = OpConvertFToS %v2int %19
+         %24 = OpImage %10 %14
+         %25 = OpImageFetch %v4float %24 %22 Sample %uint_0
+         %28 = OpCompositeExtract %float %25 0
+         %30 = OpAccessChain %_ptr_Output_float %FragColor %uint_0
+               OpStore %30 %28
+         %36 = OpImage %10 %14
+         %37 = OpImageFetch %v4float %36 %22 Sample %uint_1
+         %38 = OpCompositeExtract %float %37 0
+         %40 = OpAccessChain %_ptr_Output_float %FragColor %uint_1
+               OpStore %40 %38
+         %46 = OpImage %10 %14
+         %47 = OpImageFetch %v4float %46 %22 Sample %uint_2
+         %48 = OpCompositeExtract %float %47 0
+         %50 = OpAccessChain %_ptr_Output_float %FragColor %uint_2
+               OpStore %50 %48
+         %56 = OpImage %10 %14
+         %57 = OpImageFetch %v4float %56 %22 Sample %uint_3
+         %58 = OpCompositeExtract %float %57 0
+         %60 = OpAccessChain %_ptr_Output_float %FragColor %uint_3
+               OpStore %60 %58
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/unordered-compare.asm.frag b/shaders-no-opt/asm/frag/unordered-compare.asm.frag
new file mode 100644
index 00000000000..15286e0897e
--- /dev/null
+++ b/shaders-no-opt/asm/frag/unordered-compare.asm.frag
@@ -0,0 +1,179 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 132
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %A %B %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %test_vector_ "test_vector("
+               OpName %test_scalar_ "test_scalar("
+               OpName %le "le"
+               OpName %A "A"
+               OpName %B "B"
+               OpName %leq "leq"
+               OpName %ge "ge"
+               OpName %geq "geq"
+               OpName %eq "eq"
+               OpName %neq "neq"
+               OpName %le_0 "le"
+               OpName %leq_0 "leq"
+               OpName %ge_0 "ge"
+               OpName %geq_0 "geq"
+               OpName %eq_0 "eq"
+               OpName %neq_0 "neq"
+               OpName %FragColor "FragColor"
+               OpDecorate %A Location 0
+               OpDecorate %B Location 1
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %8 = OpTypeFunction %v4float
+         %11 = OpTypeFunction %float
+       %bool = OpTypeBool
+     %v4bool = OpTypeVector %bool 4
+%_ptr_Function_v4bool = OpTypePointer Function %v4bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+          %A = OpVariable %_ptr_Input_v4float Input
+          %B = OpVariable %_ptr_Input_v4float Input
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+         %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+         %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Function_bool = OpTypePointer Function %bool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %128 = OpFunctionCall %v4float %test_vector_
+        %129 = OpFunctionCall %float %test_scalar_
+        %130 = OpCompositeConstruct %v4float %129 %129 %129 %129
+        %131 = OpFAdd %v4float %128 %130
+               OpStore %FragColor %131
+               OpReturn
+               OpFunctionEnd
+%test_vector_ = OpFunction %v4float None %8
+         %10 = OpLabel
+         %le = OpVariable %_ptr_Function_v4bool Function
+        %leq = OpVariable %_ptr_Function_v4bool Function
+         %ge = OpVariable %_ptr_Function_v4bool Function
+        %geq = OpVariable %_ptr_Function_v4bool Function
+         %eq = OpVariable %_ptr_Function_v4bool Function
+        %neq = OpVariable %_ptr_Function_v4bool Function
+         %20 = OpLoad %v4float %A
+         %22 = OpLoad %v4float %B
+         %23 = OpFUnordLessThan %v4bool %20 %22
+               OpStore %le %23
+         %25 = OpLoad %v4float %A
+         %26 = OpLoad %v4float %B
+         %27 = OpFUnordLessThanEqual %v4bool %25 %26
+               OpStore %leq %27
+         %29 = OpLoad %v4float %A
+         %30 = OpLoad %v4float %B
+         %31 = OpFUnordGreaterThan %v4bool %29 %30
+               OpStore %ge %31
+         %33 = OpLoad %v4float %A
+         %34 = OpLoad %v4float %B
+         %35 = OpFUnordGreaterThanEqual %v4bool %33 %34
+               OpStore %geq %35
+         %37 = OpLoad %v4float %A
+         %38 = OpLoad %v4float %B
+         %39 = OpFUnordEqual %v4bool %37 %38
+               OpStore %eq %39
+         %41 = OpLoad %v4float %A
+         %42 = OpLoad %v4float %B
+         %43 = OpFUnordNotEqual %v4bool %41 %42
+               OpStore %neq %43
+         %ordered = OpFOrdNotEqual %v4bool %41 %42
+               OpStore %neq %ordered
+         %44 = OpLoad %v4bool %le
+         %49 = OpSelect %v4float %44 %48 %47
+         %50 = OpLoad %v4bool %leq
+         %51 = OpSelect %v4float %50 %48 %47
+         %52 = OpFAdd %v4float %49 %51
+         %53 = OpLoad %v4bool %ge
+         %54 = OpSelect %v4float %53 %48 %47
+         %55 = OpFAdd %v4float %52 %54
+         %56 = OpLoad %v4bool %geq
+         %57 = OpSelect %v4float %56 %48 %47
+         %58 = OpFAdd %v4float %55 %57
+         %59 = OpLoad %v4bool %eq
+         %60 = OpSelect %v4float %59 %48 %47
+         %61 = OpFAdd %v4float %58 %60
+         %62 = OpLoad %v4bool %neq
+         %63 = OpSelect %v4float %62 %48 %47
+         %64 = OpFAdd %v4float %61 %63
+               OpReturnValue %64
+               OpFunctionEnd
+%test_scalar_ = OpFunction %float None %11
+         %13 = OpLabel
+       %le_0 = OpVariable %_ptr_Function_bool Function
+      %leq_0 = OpVariable %_ptr_Function_bool Function
+       %ge_0 = OpVariable %_ptr_Function_bool Function
+      %geq_0 = OpVariable %_ptr_Function_bool Function
+       %eq_0 = OpVariable %_ptr_Function_bool Function
+      %neq_0 = OpVariable %_ptr_Function_bool Function
+         %72 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %73 = OpLoad %float %72
+         %74 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %75 = OpLoad %float %74
+         %76 = OpFUnordLessThan %bool %73 %75
+               OpStore %le_0 %76
+         %78 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %79 = OpLoad %float %78
+         %80 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %81 = OpLoad %float %80
+         %82 = OpFUnordLessThanEqual %bool %79 %81
+               OpStore %leq_0 %82
+         %84 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %87 = OpLoad %float %86
+         %88 = OpFUnordGreaterThan %bool %85 %87
+               OpStore %ge_0 %88
+         %90 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %91 = OpLoad %float %90
+         %92 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %93 = OpLoad %float %92
+         %94 = OpFUnordGreaterThanEqual %bool %91 %93
+               OpStore %geq_0 %94
+         %96 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %97 = OpLoad %float %96
+         %98 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %99 = OpLoad %float %98
+        %100 = OpFUnordEqual %bool %97 %99
+               OpStore %eq_0 %100
+        %102 = OpAccessChain %_ptr_Input_float %A %uint_0
+        %103 = OpLoad %float %102
+        %104 = OpAccessChain %_ptr_Input_float %B %uint_0
+        %105 = OpLoad %float %104
+        %106 = OpFUnordNotEqual %bool %103 %105
+               OpStore %neq_0 %106
+        %107 = OpLoad %bool %le_0
+        %108 = OpSelect %float %107 %float_1 %float_0
+        %109 = OpLoad %bool %leq_0
+        %110 = OpSelect %float %109 %float_1 %float_0
+        %111 = OpFAdd %float %108 %110
+        %112 = OpLoad %bool %ge_0
+        %113 = OpSelect %float %112 %float_1 %float_0
+        %114 = OpFAdd %float %111 %113
+        %115 = OpLoad %bool %geq_0
+        %116 = OpSelect %float %115 %float_1 %float_0
+        %117 = OpFAdd %float %114 %116
+        %118 = OpLoad %bool %eq_0
+        %119 = OpSelect %float %118 %float_1 %float_0
+        %120 = OpFAdd %float %117 %119
+        %121 = OpLoad %bool %neq_0
+        %122 = OpSelect %float %121 %float_1 %float_0
+        %123 = OpFAdd %float %120 %122
+               OpReturnValue %123
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag b/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
new file mode 100644
index 00000000000..15286e0897e
--- /dev/null
+++ b/shaders-no-opt/asm/frag/unordered-compare.relax-nan.asm.frag
@@ -0,0 +1,179 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 132
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %A %B %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %test_vector_ "test_vector("
+               OpName %test_scalar_ "test_scalar("
+               OpName %le "le"
+               OpName %A "A"
+               OpName %B "B"
+               OpName %leq "leq"
+               OpName %ge "ge"
+               OpName %geq "geq"
+               OpName %eq "eq"
+               OpName %neq "neq"
+               OpName %le_0 "le"
+               OpName %leq_0 "leq"
+               OpName %ge_0 "ge"
+               OpName %geq_0 "geq"
+               OpName %eq_0 "eq"
+               OpName %neq_0 "neq"
+               OpName %FragColor "FragColor"
+               OpDecorate %A Location 0
+               OpDecorate %B Location 1
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+          %8 = OpTypeFunction %v4float
+         %11 = OpTypeFunction %float
+       %bool = OpTypeBool
+     %v4bool = OpTypeVector %bool 4
+%_ptr_Function_v4bool = OpTypePointer Function %v4bool
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+          %A = OpVariable %_ptr_Input_v4float Input
+          %B = OpVariable %_ptr_Input_v4float Input
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+         %47 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+         %48 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Function_bool = OpTypePointer Function %bool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %128 = OpFunctionCall %v4float %test_vector_
+        %129 = OpFunctionCall %float %test_scalar_
+        %130 = OpCompositeConstruct %v4float %129 %129 %129 %129
+        %131 = OpFAdd %v4float %128 %130
+               OpStore %FragColor %131
+               OpReturn
+               OpFunctionEnd
+%test_vector_ = OpFunction %v4float None %8
+         %10 = OpLabel
+         %le = OpVariable %_ptr_Function_v4bool Function
+        %leq = OpVariable %_ptr_Function_v4bool Function
+         %ge = OpVariable %_ptr_Function_v4bool Function
+        %geq = OpVariable %_ptr_Function_v4bool Function
+         %eq = OpVariable %_ptr_Function_v4bool Function
+        %neq = OpVariable %_ptr_Function_v4bool Function
+         %20 = OpLoad %v4float %A
+         %22 = OpLoad %v4float %B
+         %23 = OpFUnordLessThan %v4bool %20 %22
+               OpStore %le %23
+         %25 = OpLoad %v4float %A
+         %26 = OpLoad %v4float %B
+         %27 = OpFUnordLessThanEqual %v4bool %25 %26
+               OpStore %leq %27
+         %29 = OpLoad %v4float %A
+         %30 = OpLoad %v4float %B
+         %31 = OpFUnordGreaterThan %v4bool %29 %30
+               OpStore %ge %31
+         %33 = OpLoad %v4float %A
+         %34 = OpLoad %v4float %B
+         %35 = OpFUnordGreaterThanEqual %v4bool %33 %34
+               OpStore %geq %35
+         %37 = OpLoad %v4float %A
+         %38 = OpLoad %v4float %B
+         %39 = OpFUnordEqual %v4bool %37 %38
+               OpStore %eq %39
+         %41 = OpLoad %v4float %A
+         %42 = OpLoad %v4float %B
+         %43 = OpFUnordNotEqual %v4bool %41 %42
+               OpStore %neq %43
+         %ordered = OpFOrdNotEqual %v4bool %41 %42
+               OpStore %neq %ordered
+         %44 = OpLoad %v4bool %le
+         %49 = OpSelect %v4float %44 %48 %47
+         %50 = OpLoad %v4bool %leq
+         %51 = OpSelect %v4float %50 %48 %47
+         %52 = OpFAdd %v4float %49 %51
+         %53 = OpLoad %v4bool %ge
+         %54 = OpSelect %v4float %53 %48 %47
+         %55 = OpFAdd %v4float %52 %54
+         %56 = OpLoad %v4bool %geq
+         %57 = OpSelect %v4float %56 %48 %47
+         %58 = OpFAdd %v4float %55 %57
+         %59 = OpLoad %v4bool %eq
+         %60 = OpSelect %v4float %59 %48 %47
+         %61 = OpFAdd %v4float %58 %60
+         %62 = OpLoad %v4bool %neq
+         %63 = OpSelect %v4float %62 %48 %47
+         %64 = OpFAdd %v4float %61 %63
+               OpReturnValue %64
+               OpFunctionEnd
+%test_scalar_ = OpFunction %float None %11
+         %13 = OpLabel
+       %le_0 = OpVariable %_ptr_Function_bool Function
+      %leq_0 = OpVariable %_ptr_Function_bool Function
+       %ge_0 = OpVariable %_ptr_Function_bool Function
+      %geq_0 = OpVariable %_ptr_Function_bool Function
+       %eq_0 = OpVariable %_ptr_Function_bool Function
+      %neq_0 = OpVariable %_ptr_Function_bool Function
+         %72 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %73 = OpLoad %float %72
+         %74 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %75 = OpLoad %float %74
+         %76 = OpFUnordLessThan %bool %73 %75
+               OpStore %le_0 %76
+         %78 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %79 = OpLoad %float %78
+         %80 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %81 = OpLoad %float %80
+         %82 = OpFUnordLessThanEqual %bool %79 %81
+               OpStore %leq_0 %82
+         %84 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %87 = OpLoad %float %86
+         %88 = OpFUnordGreaterThan %bool %85 %87
+               OpStore %ge_0 %88
+         %90 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %91 = OpLoad %float %90
+         %92 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %93 = OpLoad %float %92
+         %94 = OpFUnordGreaterThanEqual %bool %91 %93
+               OpStore %geq_0 %94
+         %96 = OpAccessChain %_ptr_Input_float %A %uint_0
+         %97 = OpLoad %float %96
+         %98 = OpAccessChain %_ptr_Input_float %B %uint_0
+         %99 = OpLoad %float %98
+        %100 = OpFUnordEqual %bool %97 %99
+               OpStore %eq_0 %100
+        %102 = OpAccessChain %_ptr_Input_float %A %uint_0
+        %103 = OpLoad %float %102
+        %104 = OpAccessChain %_ptr_Input_float %B %uint_0
+        %105 = OpLoad %float %104
+        %106 = OpFUnordNotEqual %bool %103 %105
+               OpStore %neq_0 %106
+        %107 = OpLoad %bool %le_0
+        %108 = OpSelect %float %107 %float_1 %float_0
+        %109 = OpLoad %bool %leq_0
+        %110 = OpSelect %float %109 %float_1 %float_0
+        %111 = OpFAdd %float %108 %110
+        %112 = OpLoad %bool %ge_0
+        %113 = OpSelect %float %112 %float_1 %float_0
+        %114 = OpFAdd %float %111 %113
+        %115 = OpLoad %bool %geq_0
+        %116 = OpSelect %float %115 %float_1 %float_0
+        %117 = OpFAdd %float %114 %116
+        %118 = OpLoad %bool %eq_0
+        %119 = OpSelect %float %118 %float_1 %float_0
+        %120 = OpFAdd %float %117 %119
+        %121 = OpLoad %bool %neq_0
+        %122 = OpSelect %float %121 %float_1 %float_0
+        %123 = OpFAdd %float %120 %122
+               OpReturnValue %123
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/frag/vector-extract-dynamic-spec-constant.asm.frag b/shaders-no-opt/asm/frag/vector-extract-dynamic-spec-constant.asm.frag
new file mode 100644
index 00000000000..dda5fc40340
--- /dev/null
+++ b/shaders-no-opt/asm/frag/vector-extract-dynamic-spec-constant.asm.frag
@@ -0,0 +1,49 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 27
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %vColor "vColor"
+               OpName %omap_r "omap_r"
+               OpName %omap_g "omap_g"
+               OpName %omap_b "omap_b"
+               OpName %omap_a "omap_a"
+               OpDecorate %FragColor Location 0
+               OpDecorate %vColor Location 0
+               OpDecorate %omap_r SpecId 0
+               OpDecorate %omap_g SpecId 1
+               OpDecorate %omap_b SpecId 2
+               OpDecorate %omap_a SpecId 3
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+     %vColor = OpVariable %_ptr_Input_v4float Input
+        %int = OpTypeInt 32 1
+     %omap_r = OpSpecConstant %int 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %omap_g = OpSpecConstant %int 1
+     %omap_b = OpSpecConstant %int 2
+     %omap_a = OpSpecConstant %int 3
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+		  %loaded = OpLoad %v4float %vColor
+         %r = OpVectorExtractDynamic %float %loaded %omap_r
+         %g = OpVectorExtractDynamic %float %loaded %omap_g
+         %b = OpVectorExtractDynamic %float %loaded %omap_b
+         %a = OpVectorExtractDynamic %float %loaded %omap_a
+         %rgba = OpCompositeConstruct %v4float %r %g %b %a
+               OpStore %FragColor %rgba
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/geom/store-uint-layer.invalid.asm.geom b/shaders-no-opt/asm/geom/store-uint-layer.invalid.asm.geom
similarity index 100%
rename from shaders/asm/geom/store-uint-layer.invalid.asm.geom
rename to shaders-no-opt/asm/geom/store-uint-layer.invalid.asm.geom
diff --git a/shaders-no-opt/asm/loop-header-self-continue-break.asm.comp b/shaders-no-opt/asm/loop-header-self-continue-break.asm.comp
new file mode 100644
index 00000000000..a38b111235a
--- /dev/null
+++ b/shaders-no-opt/asm/loop-header-self-continue-break.asm.comp
@@ -0,0 +1,109 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Clspv; 0
+; Bound: 83
+; Schema: 0
+               OpCapability Shader
+               OpExtension "SPV_KHR_storage_buffer_storage_class"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %19 "main" %gl_GlobalInvocationID
+               OpSource OpenCL_C 120
+               OpDecorate %_runtimearr_float ArrayStride 4
+               OpMemberDecorate %_struct_3 0 Offset 0
+               OpDecorate %_struct_3 Block
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+               OpDecorate %15 DescriptorSet 0
+               OpDecorate %15 Binding 0
+               OpDecorate %16 DescriptorSet 0
+               OpDecorate %16 Binding 1
+               OpDecorate %10 SpecId 0
+               OpDecorate %11 SpecId 1
+               OpDecorate %12 SpecId 2
+      %float = OpTypeFloat 32
+%_runtimearr_float = OpTypeRuntimeArray %float
+  %_struct_3 = OpTypeStruct %_runtimearr_float
+%_ptr_StorageBuffer__struct_3 = OpTypePointer StorageBuffer %_struct_3
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_Private_v3uint = OpTypePointer Private %v3uint
+         %10 = OpSpecConstant %uint 1
+         %11 = OpSpecConstant %uint 1
+         %12 = OpSpecConstant %uint 1
+%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %10 %11 %12
+       %void = OpTypeVoid
+         %18 = OpTypeFunction %void
+%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+    %uint_35 = OpConstant %uint 35
+     %uint_1 = OpConstant %uint 1
+    %float_3 = OpConstant %float 3
+       %bool = OpTypeBool
+    %uint_34 = OpConstant %uint 34
+     %uint_5 = OpConstant %uint 5
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+         %14 = OpVariable %_ptr_Private_v3uint Private %gl_WorkGroupSize
+         %15 = OpVariable %_ptr_StorageBuffer__struct_3 StorageBuffer
+         %16 = OpVariable %_ptr_StorageBuffer__struct_3 StorageBuffer
+         %19 = OpFunction %void None %18
+         %20 = OpLabel
+         %23 = OpAccessChain %_ptr_StorageBuffer_float %15 %uint_0 %uint_0
+         %25 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+         %26 = OpLoad %uint %25
+         %27 = OpLoad %float %23
+         %29 = OpAccessChain %_ptr_StorageBuffer_float %15 %uint_0 %uint_35
+               OpBranch %31
+         %31 = OpLabel
+         %32 = OpPhi %float %27 %20 %67 %58
+         %33 = OpPhi %uint %uint_0 %20 %63 %58
+         %34 = OpLoad %float %29
+               OpLoopMerge %69 %58 None
+               OpBranch %37
+         %37 = OpLabel
+         %38 = OpPhi %float %46 %37 %32 %31
+         %39 = OpPhi %float %38 %37 %34 %31
+         %40 = OpPhi %uint %44 %37 %uint_0 %31
+         %41 = OpAccessChain %_ptr_StorageBuffer_float %15 %uint_0 %40
+         %42 = OpFAdd %float %39 %38
+         %44 = OpIAdd %uint %40 %uint_1
+         %45 = OpAccessChain %_ptr_StorageBuffer_float %15 %uint_0 %44
+         %46 = OpLoad %float %45
+         %47 = OpFAdd %float %42 %46
+         %49 = OpFDiv %float %47 %float_3
+               OpStore %41 %49
+         %52 = OpULessThan %bool %40 %uint_34
+         %53 = OpLogicalNot %bool %52
+               OpLoopMerge %56 %37 None
+               OpBranchConditional %53 %56 %37
+         %56 = OpLabel
+               OpBranch %58
+         %58 = OpLabel
+         %59 = OpLoad %float %29
+         %60 = OpFAdd %float %38 %59
+         %61 = OpFAdd %float %32 %60
+         %62 = OpFDiv %float %61 %float_3
+               OpStore %29 %62
+         %63 = OpIAdd %uint %33 %uint_1
+         %65 = OpULessThan %bool %33 %uint_5
+         %66 = OpLogicalNot %bool %65
+         %67 = OpLoad %float %23
+               OpBranchConditional %66 %69 %31
+         %69 = OpLabel
+         %70 = OpPhi %float %75 %69 %67 %58
+         %71 = OpPhi %uint %76 %69 %uint_1 %58
+         %72 = OpAccessChain %_ptr_StorageBuffer_float %15 %uint_0 %71
+         %73 = OpLoad %float %72
+         %74 = OpFOrdLessThan %bool %70 %73
+         %75 = OpSelect %float %74 %73 %70
+         %76 = OpIAdd %uint %71 %uint_1
+         %77 = OpULessThan %bool %71 %uint_35
+         %78 = OpLogicalNot %bool %77
+               OpLoopMerge %81 %69 None
+               OpBranchConditional %78 %81 %69
+         %81 = OpLabel
+         %82 = OpAccessChain %_ptr_StorageBuffer_float %16 %uint_0 %26
+               OpStore %82 %75
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh b/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh
new file mode 100644
index 00000000000..7b38001d8d4
--- /dev/null
+++ b/shaders-no-opt/asm/mesh/mesh-shader-plain-builtin-outputs.spv14.asm.vk.nocompat.mesh
@@ -0,0 +1,150 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Unknown(30017); 21022
+; Bound: 89
+; Schema: 0
+               OpCapability Shader
+               OpCapability Geometry
+               OpCapability ShaderViewportIndexLayerEXT
+               OpCapability MeshShadingEXT
+               OpExtension "SPV_EXT_mesh_shader"
+               OpExtension "SPV_EXT_shader_viewport_index_layer"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint MeshEXT %main "main" %SV_Position %B %SV_CullPrimitive %SV_RenderTargetArrayIndex %SV_PrimitiveID %C %indices %32 %gl_LocalInvocationIndex %38
+               OpExecutionMode %main OutputVertices 24
+               OpExecutionMode %main OutputPrimitivesNV 8
+               OpExecutionMode %main OutputTrianglesNV
+               OpExecutionMode %main LocalSize 2 3 4
+               OpName %main "main"
+               OpName %SV_Position "SV_Position"
+               OpName %B "B"
+               OpName %SV_CullPrimitive "SV_CullPrimitive"
+               OpName %SV_RenderTargetArrayIndex "SV_RenderTargetArrayIndex"
+               OpName %SV_PrimitiveID "SV_PrimitiveID"
+               OpName %C "C"
+               OpName %indices "indices"
+               OpName %_ ""
+               OpDecorate %SV_Position BuiltIn Position
+               OpDecorate %B Location 1
+               OpDecorate %SV_CullPrimitive BuiltIn CullPrimitiveEXT
+               OpDecorate %SV_CullPrimitive PerPrimitiveNV
+               OpDecorate %SV_RenderTargetArrayIndex BuiltIn Layer
+               OpDecorate %SV_RenderTargetArrayIndex PerPrimitiveNV
+               OpDecorate %SV_PrimitiveID BuiltIn PrimitiveId
+               OpDecorate %SV_PrimitiveID PerPrimitiveNV
+               OpDecorate %C Location 3
+               OpDecorate %C PerPrimitiveNV
+               OpDecorate %indices BuiltIn PrimitiveTriangleIndicesEXT
+               OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
+       %void = OpTypeVoid
+          %2 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+    %uint_24 = OpConstant %uint 24
+%_arr_v4float_uint_24 = OpTypeArray %v4float %uint_24
+%_ptr_Output__arr_v4float_uint_24 = OpTypePointer Output %_arr_v4float_uint_24
+%SV_Position = OpVariable %_ptr_Output__arr_v4float_uint_24 Output
+          %B = OpVariable %_ptr_Output__arr_v4float_uint_24 Output
+       %bool = OpTypeBool
+     %uint_8 = OpConstant %uint 8
+%_arr_bool_uint_8 = OpTypeArray %bool %uint_8
+%_ptr_Output__arr_bool_uint_8 = OpTypePointer Output %_arr_bool_uint_8
+%SV_CullPrimitive = OpVariable %_ptr_Output__arr_bool_uint_8 Output
+%_arr_uint_uint_8 = OpTypeArray %uint %uint_8
+%_ptr_Output__arr_uint_uint_8 = OpTypePointer Output %_arr_uint_uint_8
+%SV_RenderTargetArrayIndex = OpVariable %_ptr_Output__arr_uint_uint_8 Output
+%SV_PrimitiveID = OpVariable %_ptr_Output__arr_uint_uint_8 Output
+%_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8
+%_ptr_Output__arr_v4float_uint_8 = OpTypePointer Output %_arr_v4float_uint_8
+          %C = OpVariable %_ptr_Output__arr_v4float_uint_8 Output
+     %v3uint = OpTypeVector %uint 3
+%_arr_v3uint_uint_8 = OpTypeArray %v3uint %uint_8
+%_ptr_Output__arr_v3uint_uint_8 = OpTypePointer Output %_arr_v3uint_uint_8
+    %indices = OpVariable %_ptr_Output__arr_v3uint_uint_8 Output
+    %uint_64 = OpConstant %uint 64
+%_arr_float_uint_64 = OpTypeArray %float %uint_64
+%_ptr_Workgroup__arr_float_uint_64 = OpTypePointer Workgroup %_arr_float_uint_64
+         %32 = OpVariable %_ptr_Workgroup__arr_float_uint_64 Workgroup
+%_ptr_Input_uint = OpTypePointer Input %uint
+%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
+          %_ = OpTypeStruct %float
+%_ptr_TaskPayloadWorkgroupEXT__ = OpTypePointer TaskPayloadWorkgroupEXT %_
+         %38 = OpVariable %_ptr_TaskPayloadWorkgroupEXT__ TaskPayloadWorkgroupEXT
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+%_ptr_Output_float = OpTypePointer Output %float
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_3 = OpConstant %uint 3
+%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float
+%_ptr_Output_v3uint = OpTypePointer Output %v3uint
+%_ptr_Output_bool = OpTypePointer Output %bool
+%_ptr_Output_uint = OpTypePointer Output %uint
+       %main = OpFunction %void None %2
+          %4 = OpLabel
+               OpBranch %85
+         %85 = OpLabel
+         %35 = OpLoad %uint %gl_LocalInvocationIndex
+         %39 = OpConvertUToF %float %35
+         %41 = OpAccessChain %_ptr_Workgroup_float %32 %35
+               OpStore %41 %39
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+               OpSetMeshOutputsEXT %uint_24 %uint_8
+         %44 = OpLoad %float %41
+         %46 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_0
+               OpStore %46 %44
+         %48 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_1
+               OpStore %48 %44
+         %50 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_2
+               OpStore %50 %44
+         %51 = OpAccessChain %_ptr_Output_float %SV_Position %35 %uint_3
+               OpStore %51 %44
+         %53 = OpBitwiseXor %uint %35 %uint_1
+         %54 = OpAccessChain %_ptr_Workgroup_float %32 %53
+         %55 = OpLoad %float %54
+         %57 = OpInBoundsAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %38 %uint_0
+         %58 = OpLoad %float %57
+         %59 = OpFAdd %float %58 %55
+         %60 = OpAccessChain %_ptr_Output_float %B %35 %uint_0
+               OpStore %60 %59
+         %61 = OpAccessChain %_ptr_Output_float %B %35 %uint_1
+               OpStore %61 %59
+         %62 = OpAccessChain %_ptr_Output_float %B %35 %uint_2
+               OpStore %62 %59
+         %63 = OpAccessChain %_ptr_Output_float %B %35 %uint_3
+               OpStore %63 %59
+         %64 = OpULessThan %bool %35 %uint_8
+               OpSelectionMerge %87 None
+               OpBranchConditional %64 %86 %87
+         %86 = OpLabel
+         %65 = OpIMul %uint %35 %uint_3
+         %66 = OpIAdd %uint %65 %uint_1
+         %67 = OpIAdd %uint %65 %uint_2
+         %68 = OpCompositeConstruct %v3uint %65 %66 %67
+         %70 = OpAccessChain %_ptr_Output_v3uint %indices %35
+               OpStore %70 %68
+         %71 = OpBitwiseAnd %uint %35 %uint_1
+         %72 = OpINotEqual %bool %71 %uint_0
+         %74 = OpAccessChain %_ptr_Output_bool %SV_CullPrimitive %35
+               OpStore %74 %72
+         %76 = OpAccessChain %_ptr_Output_uint %SV_PrimitiveID %35
+               OpStore %76 %35
+         %77 = OpAccessChain %_ptr_Output_uint %SV_RenderTargetArrayIndex %35
+               OpStore %77 %35
+         %78 = OpBitwiseXor %uint %35 %uint_2
+         %79 = OpAccessChain %_ptr_Workgroup_float %32 %78
+         %80 = OpLoad %float %79
+         %81 = OpAccessChain %_ptr_Output_float %C %35 %uint_0
+               OpStore %81 %80
+         %82 = OpAccessChain %_ptr_Output_float %C %35 %uint_1
+               OpStore %82 %80
+         %83 = OpAccessChain %_ptr_Output_float %C %35 %uint_2
+               OpStore %83 %80
+         %84 = OpAccessChain %_ptr_Output_float %C %35 %uint_3
+               OpStore %84 %80
+               OpBranch %87
+         %87 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/rgen/acceleration-nonuniform.spv14.vk.nocompat.asm.rgen b/shaders-no-opt/asm/rgen/acceleration-nonuniform.spv14.vk.nocompat.asm.rgen
new file mode 100644
index 00000000000..2178c8af7ac
--- /dev/null
+++ b/shaders-no-opt/asm/rgen/acceleration-nonuniform.spv14.vk.nocompat.asm.rgen
@@ -0,0 +1,112 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 73
+; Schema: 0
+               OpCapability RayTracingKHR
+               OpCapability ShaderNonUniform
+               OpCapability RuntimeDescriptorArray
+               OpExtension "SPV_EXT_descriptor_indexing"
+               OpExtension "SPV_KHR_ray_tracing"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint RayGenerationNV %main "main" %gl_LaunchIDEXT %gl_LaunchSizeEXT %as %payload %image
+               OpSource GLSL 460
+               OpSourceExtension "GL_EXT_nonuniform_qualifier"
+               OpSourceExtension "GL_EXT_ray_tracing"
+               OpName %main "main"
+               OpName %col "col"
+               OpName %origin "origin"
+               OpName %gl_LaunchIDEXT "gl_LaunchIDEXT"
+               OpName %gl_LaunchSizeEXT "gl_LaunchSizeEXT"
+               OpName %direction "direction"
+               OpName %as "as"
+               OpName %payload "payload"
+               OpName %image "image"
+               OpDecorate %gl_LaunchIDEXT BuiltIn LaunchIdNV
+               OpDecorate %gl_LaunchSizeEXT BuiltIn LaunchSizeNV
+               OpDecorate %as DescriptorSet 0
+               OpDecorate %as Binding 1
+               OpDecorate %51 NonUniform
+               OpDecorate %payload Location 0
+               OpDecorate %image DescriptorSet 0
+               OpDecorate %image Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+    %float_0 = OpConstant %float 0
+    %float_1 = OpConstant %float 1
+         %12 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
+    %v3float = OpTypeVector %float 3
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%gl_LaunchIDEXT = OpVariable %_ptr_Input_v3uint Input
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%gl_LaunchSizeEXT = OpVariable %_ptr_Input_v3uint Input
+     %uint_1 = OpConstant %uint 1
+   %float_n1 = OpConstant %float -1
+         %41 = OpConstantComposite %v3float %float_0 %float_0 %float_n1
+         %42 = OpTypeAccelerationStructureKHR
+%_runtimearr_42 = OpTypeRuntimeArray %42
+%_ptr_UniformConstant__runtimearr_42 = OpTypePointer UniformConstant %_runtimearr_42
+         %as = OpVariable %_ptr_UniformConstant__runtimearr_42 UniformConstant
+%_ptr_UniformConstant_42 = OpTypePointer UniformConstant %42
+   %uint_255 = OpConstant %uint 255
+ %float_1000 = OpConstant %float 1000
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_RayPayloadNV_float = OpTypePointer RayPayloadNV %float
+    %payload = OpVariable %_ptr_RayPayloadNV_float RayPayloadNV
+%_ptr_Function_float = OpTypePointer Function %float
+         %63 = OpTypeImage %float 2D 0 0 0 2 Rgba8
+%_ptr_UniformConstant_63 = OpTypePointer UniformConstant %63
+      %image = OpVariable %_ptr_UniformConstant_63 UniformConstant
+     %v2uint = OpTypeVector %uint 2
+      %v2int = OpTypeVector %int 2
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+        %col = OpVariable %_ptr_Function_v4float Function
+     %origin = OpVariable %_ptr_Function_v3float Function
+  %direction = OpVariable %_ptr_Function_v3float Function
+               OpStore %col %12
+         %22 = OpAccessChain %_ptr_Input_uint %gl_LaunchIDEXT %uint_0
+         %23 = OpLoad %uint %22
+         %24 = OpConvertUToF %float %23
+         %26 = OpAccessChain %_ptr_Input_uint %gl_LaunchSizeEXT %uint_0
+         %27 = OpLoad %uint %26
+         %28 = OpConvertUToF %float %27
+         %29 = OpFDiv %float %24 %28
+         %31 = OpAccessChain %_ptr_Input_uint %gl_LaunchIDEXT %uint_1
+         %32 = OpLoad %uint %31
+         %33 = OpConvertUToF %float %32
+         %34 = OpAccessChain %_ptr_Input_uint %gl_LaunchSizeEXT %uint_1
+         %35 = OpLoad %uint %34
+         %36 = OpConvertUToF %float %35
+         %37 = OpFDiv %float %33 %36
+         %38 = OpCompositeConstruct %v3float %29 %37 %float_1
+               OpStore %origin %38
+               OpStore %direction %41
+         %46 = OpAccessChain %_ptr_Input_uint %gl_LaunchIDEXT %uint_0
+         %47 = OpLoad %uint %46
+         %48 = OpCopyObject %uint %47
+         %50 = OpAccessChain %_ptr_UniformConstant_42 %as %48
+         %51 = OpLoad %42 %50
+         %53 = OpLoad %v3float %origin
+         %54 = OpLoad %v3float %direction
+               OpTraceRayKHR %51 %uint_0 %uint_255 %uint_0 %uint_1 %uint_0 %53 %float_0 %54 %float_1000 %payload
+         %60 = OpLoad %float %payload
+         %62 = OpAccessChain %_ptr_Function_float %col %uint_1
+               OpStore %62 %60
+         %66 = OpLoad %63 %image
+         %68 = OpLoad %v3uint %gl_LaunchIDEXT
+         %69 = OpVectorShuffle %v2uint %68 %68 0 1
+         %71 = OpBitcast %v2int %69
+         %72 = OpLoad %v4float %col
+               OpImageWrite %66 %71 %72
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/task/task-shader.vk.nocompat.spv14.asm.task b/shaders-no-opt/asm/task/task-shader.vk.nocompat.spv14.asm.task
new file mode 100644
index 00000000000..cbef97ed1fb
--- /dev/null
+++ b/shaders-no-opt/asm/task/task-shader.vk.nocompat.spv14.asm.task
@@ -0,0 +1,132 @@
+; SPIR-V
+; Version: 1.4
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 93
+; Schema: 0
+               OpCapability MeshShadingEXT
+               OpExtension "SPV_EXT_mesh_shader"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TaskEXT %main "main" %vs %gl_LocalInvocationIndex %p
+               OpExecutionMode %main LocalSize 4 3 2
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_mesh_shader"
+               OpName %main "main"
+               OpName %vs "vs"
+               OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex"
+               OpName %Payload "Payload"
+               OpMemberName %Payload 0 "v"
+               OpName %p "p"
+               OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+       %uint = OpTypeInt 32 0
+    %uint_24 = OpConstant %uint 24
+%_arr_float_uint_24 = OpTypeArray %float %uint_24
+%_ptr_Workgroup__arr_float_uint_24 = OpTypePointer Workgroup %_arr_float_uint_24
+         %vs = OpVariable %_ptr_Workgroup__arr_float_uint_24 Workgroup
+%_ptr_Input_uint = OpTypePointer Input %uint
+%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
+   %float_10 = OpConstant %float 10
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+     %uint_2 = OpConstant %uint 2
+   %uint_264 = OpConstant %uint 264
+    %uint_12 = OpConstant %uint 12
+       %bool = OpTypeBool
+     %uint_6 = OpConstant %uint 6
+     %uint_3 = OpConstant %uint 3
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+    %Payload = OpTypeStruct %_arr_float_uint_3
+%_ptr_TaskPayloadWorkgroupEXT_Payload = OpTypePointer TaskPayloadWorkgroupEXT %Payload
+          %p = OpVariable %_ptr_TaskPayloadWorkgroupEXT_Payload TaskPayloadWorkgroupEXT
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float
+      %int_4 = OpConstant %int 4
+      %int_6 = OpConstant %int 6
+      %int_8 = OpConstant %int 8
+     %v3uint = OpTypeVector %uint 3
+     %uint_4 = OpConstant %uint 4
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_4 %uint_3 %uint_2
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %14 = OpLoad %uint %gl_LocalInvocationIndex
+         %17 = OpAccessChain %_ptr_Workgroup_float %vs %14
+               OpStore %17 %float_10
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %20 = OpLoad %uint %gl_LocalInvocationIndex
+         %23 = OpULessThan %bool %20 %uint_12
+               OpSelectionMerge %25 None
+               OpBranchConditional %23 %24 %25
+         %24 = OpLabel
+         %26 = OpLoad %uint %gl_LocalInvocationIndex
+         %27 = OpLoad %uint %gl_LocalInvocationIndex
+         %28 = OpIAdd %uint %27 %uint_12
+         %29 = OpAccessChain %_ptr_Workgroup_float %vs %28
+         %30 = OpLoad %float %29
+         %31 = OpAccessChain %_ptr_Workgroup_float %vs %26
+         %32 = OpLoad %float %31
+         %33 = OpFAdd %float %32 %30
+         %34 = OpAccessChain %_ptr_Workgroup_float %vs %26
+               OpStore %34 %33
+               OpBranch %25
+         %25 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %35 = OpLoad %uint %gl_LocalInvocationIndex
+         %37 = OpULessThan %bool %35 %uint_6
+               OpSelectionMerge %39 None
+               OpBranchConditional %37 %38 %39
+         %38 = OpLabel
+         %40 = OpLoad %uint %gl_LocalInvocationIndex
+         %41 = OpLoad %uint %gl_LocalInvocationIndex
+         %42 = OpIAdd %uint %41 %uint_6
+         %43 = OpAccessChain %_ptr_Workgroup_float %vs %42
+         %44 = OpLoad %float %43
+         %45 = OpAccessChain %_ptr_Workgroup_float %vs %40
+         %46 = OpLoad %float %45
+         %47 = OpFAdd %float %46 %44
+         %48 = OpAccessChain %_ptr_Workgroup_float %vs %40
+               OpStore %48 %47
+               OpBranch %39
+         %39 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %49 = OpLoad %uint %gl_LocalInvocationIndex
+         %51 = OpULessThan %bool %49 %uint_3
+               OpSelectionMerge %53 None
+               OpBranchConditional %51 %52 %53
+         %52 = OpLabel
+         %54 = OpLoad %uint %gl_LocalInvocationIndex
+         %55 = OpLoad %uint %gl_LocalInvocationIndex
+         %56 = OpIAdd %uint %55 %uint_3
+         %57 = OpAccessChain %_ptr_Workgroup_float %vs %56
+         %58 = OpLoad %float %57
+         %59 = OpAccessChain %_ptr_Workgroup_float %vs %54
+         %60 = OpLoad %float %59
+         %61 = OpFAdd %float %60 %58
+         %62 = OpAccessChain %_ptr_Workgroup_float %vs %54
+               OpStore %62 %61
+               OpBranch %53
+         %53 = OpLabel
+               OpControlBarrier %uint_2 %uint_2 %uint_264
+         %69 = OpLoad %uint %gl_LocalInvocationIndex
+         %70 = OpLoad %uint %gl_LocalInvocationIndex
+         %71 = OpAccessChain %_ptr_Workgroup_float %vs %70
+         %72 = OpLoad %float %71
+         %74 = OpAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %p %int_0 %69
+               OpStore %74 %72
+         %76 = OpAccessChain %_ptr_Workgroup_float %vs %int_4
+         %77 = OpLoad %float %76
+         %78 = OpConvertFToS %int %77
+         %79 = OpBitcast %uint %78
+         %81 = OpAccessChain %_ptr_Workgroup_float %vs %int_6
+         %82 = OpLoad %float %81
+         %83 = OpConvertFToS %int %82
+         %84 = OpBitcast %uint %83
+         %86 = OpAccessChain %_ptr_Workgroup_float %vs %int_8
+         %87 = OpLoad %float %86
+         %88 = OpConvertFToS %int %87
+         %89 = OpBitcast %uint %88
+               OpEmitMeshTasksEXT %79 %84 %89 %p
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/temporary.zero-initialize.asm.frag b/shaders-no-opt/asm/temporary.zero-initialize.asm.frag
new file mode 100644
index 00000000000..eccff08b331
--- /dev/null
+++ b/shaders-no-opt/asm/temporary.zero-initialize.asm.frag
@@ -0,0 +1,93 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 65
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vA %vB
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %vA "vA"
+               OpName %vB "vB"
+               OpDecorate %FragColor RelaxedPrecision
+               OpDecorate %FragColor Location 0
+               OpDecorate %vA RelaxedPrecision
+               OpDecorate %vA Flat
+               OpDecorate %vA Location 0
+               OpDecorate %25 RelaxedPrecision
+               OpDecorate %30 RelaxedPrecision
+               OpDecorate %vB RelaxedPrecision
+               OpDecorate %vB Flat
+               OpDecorate %vB Location 1
+               OpDecorate %38 RelaxedPrecision
+               OpDecorate %40 RelaxedPrecision
+               OpDecorate %49 RelaxedPrecision
+               OpDecorate %51 RelaxedPrecision
+               OpDecorate %53 RelaxedPrecision
+               OpDecorate %56 RelaxedPrecision
+               OpDecorate %64 RelaxedPrecision
+               OpDecorate %58 RelaxedPrecision
+               OpDecorate %57 RelaxedPrecision
+               OpDecorate %60 RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_0 = OpConstant %float 0
+         %11 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Input_int = OpTypePointer Input %int
+         %vA = OpVariable %_ptr_Input_int Input
+       %bool = OpTypeBool
+     %int_20 = OpConstant %int 20
+     %int_50 = OpConstant %int 50
+         %vB = OpVariable %_ptr_Input_int Input
+     %int_40 = OpConstant %int 40
+     %int_60 = OpConstant %int 60
+     %int_10 = OpConstant %int 10
+    %float_1 = OpConstant %float 1
+         %63 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpStore %FragColor %11
+               OpBranch %17
+         %17 = OpLabel
+         %60 = OpPhi %int %int_0 %5 %58 %20
+         %57 = OpPhi %int %int_0 %5 %56 %20
+         %25 = OpLoad %int %vA
+         %27 = OpSLessThan %bool %57 %25
+               OpLoopMerge %19 %20 None
+               OpBranchConditional %27 %18 %19
+         %18 = OpLabel
+         %30 = OpIAdd %int %25 %57
+         %32 = OpIEqual %bool %30 %int_20
+               OpSelectionMerge %34 None
+               OpBranchConditional %32 %33 %36
+         %33 = OpLabel
+               OpBranch %34
+         %36 = OpLabel
+         %38 = OpLoad %int %vB
+         %40 = OpIAdd %int %38 %57
+         %42 = OpIEqual %bool %40 %int_40
+         %64 = OpSelect %int %42 %int_60 %60
+               OpBranch %34
+         %34 = OpLabel
+         %58 = OpPhi %int %int_50 %33 %64 %36
+         %49 = OpIAdd %int %58 %int_10
+         %51 = OpLoad %v4float %FragColor
+         %53 = OpFAdd %v4float %51 %63
+               OpStore %FragColor %53
+               OpBranch %20
+         %20 = OpLabel
+         %56 = OpIAdd %int %57 %49
+               OpBranch %17
+         %19 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/tesc/array-of-block-output-initializer.asm.tesc b/shaders-no-opt/asm/tesc/array-of-block-output-initializer.asm.tesc
new file mode 100644
index 00000000000..0ec5fa90a96
--- /dev/null
+++ b/shaders-no-opt/asm/tesc/array-of-block-output-initializer.asm.tesc
@@ -0,0 +1,101 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 42
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %_ %patches %v2 %v3 %verts
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %vert "vert"
+               OpMemberName %vert 0 "v0"
+               OpMemberName %vert 1 "v1"
+               OpName %_ ""
+               OpName %vert_patch "vert_patch"
+               OpMemberName %vert_patch 0 "v2"
+               OpMemberName %vert_patch 1 "v3"
+               OpName %patches "patches"
+               OpName %v2 "v2"
+               OpName %v3 "v3"
+               OpName %vert2 "vert2"
+               OpMemberName %vert2 0 "v4"
+               OpMemberName %vert2 1 "v5"
+               OpName %verts "verts"
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpMemberDecorate %vert 0 Patch
+               OpMemberDecorate %vert 1 Patch
+               OpDecorate %vert Block
+               OpDecorate %_ Location 0
+               OpMemberDecorate %vert_patch 0 Patch
+               OpMemberDecorate %vert_patch 1 Patch
+               OpDecorate %vert_patch Block
+               OpDecorate %patches Location 2
+               OpDecorate %v2 Patch
+               OpDecorate %v2 Location 6
+               OpDecorate %v3 Location 7
+               OpDecorate %vert2 Block
+               OpDecorate %verts Location 8
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+     %uint_4 = OpConstant %uint 4
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+	%gl_out_zero = OpConstantNull %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output %gl_out_zero
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %vert = OpTypeStruct %float %float
+%_ptr_Output_vert = OpTypePointer Output %vert
+		%__zero = OpConstantNull %vert
+          %_ = OpVariable %_ptr_Output_vert Output %__zero
+ %vert_patch = OpTypeStruct %float %float
+     %uint_2 = OpConstant %uint 2
+%_arr_vert_patch_uint_2 = OpTypeArray %vert_patch %uint_2
+%_ptr_Output__arr_vert_patch_uint_2 = OpTypePointer Output %_arr_vert_patch_uint_2
+	%patches_zero = OpConstantNull %_arr_vert_patch_uint_2
+    %patches = OpVariable %_ptr_Output__arr_vert_patch_uint_2 Output %patches_zero
+%_ptr_Output_float = OpTypePointer Output %float
+		%v2_zero = OpConstantNull %float
+         %v2 = OpVariable %_ptr_Output_float Output %v2_zero
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+		%v3_zero = OpConstantNull %_arr_float_uint_4
+         %v3 = OpVariable %_ptr_Output__arr_float_uint_4 Output %v3_zero
+      %vert2 = OpTypeStruct %float %float
+%_arr_vert2_uint_4 = OpTypeArray %vert2 %uint_4
+%_ptr_Output__arr_vert2_uint_4 = OpTypePointer Output %_arr_vert2_uint_4
+	%verts_zero = OpConstantNull %_arr_vert2_uint_4
+      %verts = OpVariable %_ptr_Output__arr_vert2_uint_4 Output %verts_zero
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpLoad %int %gl_InvocationID
+         %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0
+               OpStore %24 %22
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/tesc/copy-memory-control-point.asm.tesc b/shaders-no-opt/asm/tesc/copy-memory-control-point.asm.tesc
new file mode 100644
index 00000000000..7c0a638f985
--- /dev/null
+++ b/shaders-no-opt/asm/tesc/copy-memory-control-point.asm.tesc
@@ -0,0 +1,199 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Wine VKD3D Shader Compiler; 2
+; Bound: 126
+; Schema: 0
+               OpCapability Tessellation
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %1 "main" %4 %30 %80 %101 %103 %108 %110 %115 %117
+               OpExecutionMode %1 OutputVertices 3
+               OpExecutionMode %1 Triangles
+               OpExecutionMode %1 SpacingEqual
+               OpExecutionMode %1 VertexOrderCw
+               OpName %1 "main"
+               OpName %11 "opc"
+               OpName %14 "cb1_struct"
+               OpName %16 "cb0_0"
+               OpName %22 "vicp"
+               OpName %23 "fork0"
+               OpName %26 "vForkInstanceId"
+               OpName %34 "r0"
+               OpName %32 "fork0_epilogue"
+               OpName %75 "fork1"
+               OpName %81 "fork1_epilogue"
+               OpName %101 "v0"
+               OpName %103 "v1"
+               OpName %108 "vicp0"
+               OpName %110 "vocp0"
+               OpName %115 "vicp1"
+               OpName %117 "vocp1"
+               OpDecorate %4 BuiltIn InvocationId
+               OpDecorate %13 ArrayStride 16
+               OpDecorate %14 Block
+               OpMemberDecorate %14 0 Offset 0
+               OpDecorate %16 DescriptorSet 0
+               OpDecorate %16 Binding 0
+               OpDecorate %30 BuiltIn TessLevelOuter
+               OpDecorate %30 Patch
+               OpDecorate %30 Patch
+               OpDecorate %30 Patch
+               OpDecorate %30 Patch
+               OpDecorate %80 BuiltIn TessLevelInner
+               OpDecorate %80 Patch
+               OpDecorate %80 Patch
+               OpDecorate %101 Location 0
+               OpDecorate %103 Location 1
+               OpDecorate %108 Location 2
+               OpDecorate %110 Location 3
+               OpDecorate %115 Location 4
+               OpDecorate %117 Location 5
+          %2 = OpTypeInt 32 1
+          %3 = OpTypePointer Input %2
+          %4 = OpVariable %3 Input
+          %5 = OpTypeFloat 32
+          %6 = OpTypeVector %5 4
+          %7 = OpTypeInt 32 0
+          %8 = OpConstant %7 4
+          %9 = OpTypeArray %6 %8
+         %10 = OpTypePointer Private %9
+         %11 = OpVariable %10 Private
+         %12 = OpConstant %7 1
+         %13 = OpTypeArray %6 %12
+         %14 = OpTypeStruct %13
+         %15 = OpTypePointer Uniform %14
+         %16 = OpVariable %15 Uniform
+         %17 = OpConstant %7 3
+         %18 = OpTypeArray %6 %17
+         %19 = OpConstant %7 2
+         %20 = OpTypeArray %18 %19
+         %21 = OpTypePointer Private %20
+         %22 = OpVariable %21 Private
+         %24 = OpTypeVoid
+         %25 = OpTypeFunction %24 %7
+         %28 = OpTypeArray %5 %8
+         %29 = OpTypePointer Output %28
+         %30 = OpVariable %29 Output
+         %31 = OpConstant %7 0
+         %33 = OpTypePointer Function %6
+         %36 = OpTypePointer Function %5
+         %38 = OpTypePointer Uniform %6
+         %40 = OpTypePointer Uniform %5
+         %46 = OpTypePointer Private %6
+         %48 = OpTypePointer Private %5
+         %52 = OpVariable %46 Private
+         %55 = OpVariable %46 Private
+         %58 = OpVariable %46 Private
+         %60 = OpTypeFunction %24 %46 %46 %46
+         %69 = OpTypePointer Output %5
+         %76 = OpTypeFunction %24
+         %78 = OpTypeArray %5 %19
+         %79 = OpTypePointer Output %78
+         %80 = OpVariable %79 Output
+         %89 = OpVariable %46 Private
+         %91 = OpTypeFunction %24 %46
+         %98 = OpTypePointer Private %18
+        %100 = OpTypePointer Input %18
+        %101 = OpVariable %100 Input
+        %103 = OpVariable %100 Input
+        %105 = OpTypeVector %5 3
+        %106 = OpTypeArray %105 %17
+        %107 = OpTypePointer Input %106
+        %108 = OpVariable %107 Input
+        %109 = OpTypePointer Output %106
+        %110 = OpVariable %109 Output
+        %111 = OpTypePointer Output %105
+        %112 = OpTypePointer Input %105
+        %115 = OpVariable %100 Input
+        %116 = OpTypePointer Output %18
+        %117 = OpVariable %116 Output
+        %118 = OpTypePointer Output %6
+        %119 = OpTypePointer Input %6
+         %23 = OpFunction %24 None %25
+         %26 = OpFunctionParameter %7
+         %27 = OpLabel
+         %34 = OpVariable %33 Function
+         %35 = OpBitcast %5 %26
+         %37 = OpInBoundsAccessChain %36 %34 %31
+               OpStore %37 %35
+         %39 = OpAccessChain %38 %16 %31 %31
+         %41 = OpInBoundsAccessChain %40 %39 %31
+         %42 = OpLoad %5 %41
+         %43 = OpInBoundsAccessChain %36 %34 %31
+         %44 = OpLoad %5 %43
+         %45 = OpBitcast %2 %44
+         %47 = OpAccessChain %46 %11 %45
+         %49 = OpInBoundsAccessChain %48 %47 %31
+               OpStore %49 %42
+         %50 = OpAccessChain %46 %11 %31
+         %51 = OpLoad %6 %50
+               OpStore %52 %51
+         %53 = OpAccessChain %46 %11 %12
+         %54 = OpLoad %6 %53
+               OpStore %55 %54
+         %56 = OpAccessChain %46 %11 %19
+         %57 = OpLoad %6 %56
+               OpStore %58 %57
+         %59 = OpFunctionCall %24 %32 %52 %55 %58
+               OpReturn
+               OpFunctionEnd
+         %32 = OpFunction %24 None %60
+         %61 = OpFunctionParameter %46
+         %62 = OpFunctionParameter %46
+         %63 = OpFunctionParameter %46
+         %64 = OpLabel
+         %65 = OpLoad %6 %61
+         %66 = OpLoad %6 %62
+         %67 = OpLoad %6 %63
+         %68 = OpCompositeExtract %5 %65 0
+         %70 = OpAccessChain %69 %30 %31
+               OpStore %70 %68
+         %71 = OpCompositeExtract %5 %66 0
+         %72 = OpAccessChain %69 %30 %12
+               OpStore %72 %71
+         %73 = OpCompositeExtract %5 %67 0
+         %74 = OpAccessChain %69 %30 %19
+               OpStore %74 %73
+               OpReturn
+               OpFunctionEnd
+         %75 = OpFunction %24 None %76
+         %77 = OpLabel
+         %82 = OpAccessChain %38 %16 %31 %31
+         %83 = OpInBoundsAccessChain %40 %82 %31
+         %84 = OpLoad %5 %83
+         %85 = OpAccessChain %46 %11 %17
+         %86 = OpInBoundsAccessChain %48 %85 %31
+               OpStore %86 %84
+         %87 = OpAccessChain %46 %11 %17
+         %88 = OpLoad %6 %87
+               OpStore %89 %88
+         %90 = OpFunctionCall %24 %81 %89
+               OpReturn
+               OpFunctionEnd
+         %81 = OpFunction %24 None %91
+         %92 = OpFunctionParameter %46
+         %93 = OpLabel
+         %94 = OpLoad %6 %92
+         %95 = OpCompositeExtract %5 %94 0
+         %96 = OpAccessChain %69 %80 %31
+               OpStore %96 %95
+               OpReturn
+               OpFunctionEnd
+          %1 = OpFunction %24 None %76
+         %97 = OpLabel
+         %99 = OpInBoundsAccessChain %98 %22 %31
+               OpCopyMemory %99 %101
+        %102 = OpInBoundsAccessChain %98 %22 %12
+               OpCopyMemory %102 %103
+        %104 = OpLoad %2 %4
+        %113 = OpAccessChain %111 %110 %104
+        %114 = OpAccessChain %112 %108 %104
+               OpCopyMemory %113 %114
+        %120 = OpAccessChain %118 %117 %104
+        %121 = OpAccessChain %119 %115 %104
+               OpCopyMemory %120 %121
+        %122 = OpFunctionCall %24 %23 %31
+        %123 = OpFunctionCall %24 %23 %12
+        %124 = OpFunctionCall %24 %23 %19
+        %125 = OpFunctionCall %24 %75
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/shaders-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
new file mode 100644
index 00000000000..0fd4dce256e
--- /dev/null
+++ b/shaders-no-opt/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc
@@ -0,0 +1,248 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 2
+; Bound: 162
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %hs_main "main" %p_pos %p_1 %i_1 %_entryPointOutput_pos %_entryPointOutput %_patchConstantOutput_EdgeTess %_patchConstantOutput_InsideTess
+               OpExecutionMode %hs_main OutputVertices 3
+               OpExecutionMode %hs_main Triangles
+               OpExecutionMode %hs_main SpacingFractionalOdd
+               OpExecutionMode %hs_main VertexOrderCw
+               OpSource HLSL 500
+               OpName %hs_main "hs_main"
+               OpName %VertexOutput "VertexOutput"
+               OpMemberName %VertexOutput 0 "pos"
+               OpMemberName %VertexOutput 1 "uv"
+               OpName %HSOut "HSOut"
+               OpMemberName %HSOut 0 "pos"
+               OpMemberName %HSOut 1 "uv"
+               OpName %_hs_main_struct_VertexOutput_vf4_vf21_3__u1_ "@hs_main(struct-VertexOutput-vf4-vf21[3];u1;"
+               OpName %p "p"
+               OpName %i "i"
+               OpName %HSConstantOut "HSConstantOut"
+               OpMemberName %HSConstantOut 0 "EdgeTess"
+               OpMemberName %HSConstantOut 1 "InsideTess"
+               OpName %PatchHS_struct_VertexOutput_vf4_vf21_3__ "PatchHS(struct-VertexOutput-vf4-vf21[3];"
+               OpName %patch "patch"
+               OpName %output "output"
+               OpName %p_0 "p"
+               OpName %p_pos "p.pos"
+               OpName %VertexOutput_0 "VertexOutput"
+               OpMemberName %VertexOutput_0 0 "uv"
+               OpName %p_1 "p"
+               OpName %i_0 "i"
+               OpName %i_1 "i"
+               OpName %flattenTemp "flattenTemp"
+               OpName %param "param"
+               OpName %param_0 "param"
+               OpName %_entryPointOutput_pos "@entryPointOutput.pos"
+               OpName %HSOut_0 "HSOut"
+               OpMemberName %HSOut_0 0 "uv"
+               OpName %_entryPointOutput "@entryPointOutput"
+               OpName %_patchConstantResult "@patchConstantResult"
+               OpName %param_1 "param"
+               OpName %_patchConstantOutput_EdgeTess "@patchConstantOutput.EdgeTess"
+               OpName %_patchConstantOutput_InsideTess "@patchConstantOutput.InsideTess"
+               OpName %output_0 "output"
+               OpDecorate %p_pos BuiltIn Position
+               OpDecorate %p_1 Location 0
+               OpDecorate %i_1 BuiltIn InvocationId
+               OpDecorate %_entryPointOutput_pos BuiltIn Position
+               OpDecorate %_entryPointOutput Location 0
+               OpDecorate %_patchConstantOutput_EdgeTess Patch
+               OpDecorate %_patchConstantOutput_EdgeTess BuiltIn TessLevelOuter
+               OpDecorate %_patchConstantOutput_InsideTess Patch
+               OpDecorate %_patchConstantOutput_InsideTess BuiltIn TessLevelInner
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+    %v2float = OpTypeVector %float 2
+%VertexOutput = OpTypeStruct %v4float %v2float
+       %uint = OpTypeInt 32 0
+     %uint_3 = OpConstant %uint 3
+%_arr_VertexOutput_uint_3 = OpTypeArray %VertexOutput %uint_3
+%_ptr_Function__arr_VertexOutput_uint_3 = OpTypePointer Function %_arr_VertexOutput_uint_3
+%_ptr_Function_uint = OpTypePointer Function %uint
+      %HSOut = OpTypeStruct %v4float %v2float
+         %16 = OpTypeFunction %HSOut %_ptr_Function__arr_VertexOutput_uint_3 %_ptr_Function_uint
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+%HSConstantOut = OpTypeStruct %_arr_float_uint_3 %float
+         %23 = OpTypeFunction %HSConstantOut %_ptr_Function__arr_VertexOutput_uint_3
+%_ptr_Function_HSOut = OpTypePointer Function %HSOut
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+      %int_1 = OpConstant %int 1
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3
+      %p_pos = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%VertexOutput_0 = OpTypeStruct %v2float
+%_arr_VertexOutput_0_uint_3 = OpTypeArray %VertexOutput_0 %uint_3
+%_ptr_Input__arr_VertexOutput_0_uint_3 = OpTypePointer Input %_arr_VertexOutput_0_uint_3
+        %p_1 = OpVariable %_ptr_Input__arr_VertexOutput_0_uint_3 Input
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+      %int_2 = OpConstant %int 2
+%_ptr_Input_uint = OpTypePointer Input %uint
+        %i_1 = OpVariable %_ptr_Input_uint Input
+%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3
+%_entryPointOutput_pos = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+    %HSOut_0 = OpTypeStruct %v2float
+%_arr_HSOut_0_uint_3 = OpTypeArray %HSOut_0 %uint_3
+%_ptr_Output__arr_HSOut_0_uint_3 = OpTypePointer Output %_arr_HSOut_0_uint_3
+%_entryPointOutput = OpVariable %_ptr_Output__arr_HSOut_0_uint_3 Output
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+     %uint_2 = OpConstant %uint 2
+     %uint_1 = OpConstant %uint 1
+     %uint_0 = OpConstant %uint 0
+       %bool = OpTypeBool
+%_ptr_Function_HSConstantOut = OpTypePointer Function %HSConstantOut
+     %uint_4 = OpConstant %uint 4
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+%_patchConstantOutput_EdgeTess = OpVariable %_ptr_Output__arr_float_uint_4 Output
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Output_float = OpTypePointer Output %float
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+%_patchConstantOutput_InsideTess = OpVariable %_ptr_Output__arr_float_uint_2 Output
+    %float_1 = OpConstant %float 1
+    %hs_main = OpFunction %void None %3
+          %5 = OpLabel
+        %p_0 = OpVariable %_ptr_Function__arr_VertexOutput_uint_3 Function
+        %i_0 = OpVariable %_ptr_Function_uint Function
+%flattenTemp = OpVariable %_ptr_Function_HSOut Function
+      %param = OpVariable %_ptr_Function__arr_VertexOutput_uint_3 Function
+    %param_0 = OpVariable %_ptr_Function_uint Function
+%_patchConstantResult = OpVariable %_ptr_Function_HSConstantOut Function
+    %param_1 = OpVariable %_ptr_Function__arr_VertexOutput_uint_3 Function
+         %50 = OpAccessChain %_ptr_Input_v4float %p_pos %int_0
+         %51 = OpLoad %v4float %50
+         %52 = OpAccessChain %_ptr_Function_v4float %p_0 %int_0 %int_0
+               OpStore %52 %51
+         %58 = OpAccessChain %_ptr_Input_v2float %p_1 %int_0 %int_0
+         %59 = OpLoad %v2float %58
+         %60 = OpAccessChain %_ptr_Function_v2float %p_0 %int_0 %int_1
+               OpStore %60 %59
+         %61 = OpAccessChain %_ptr_Input_v4float %p_pos %int_1
+         %62 = OpLoad %v4float %61
+         %63 = OpAccessChain %_ptr_Function_v4float %p_0 %int_1 %int_0
+               OpStore %63 %62
+         %64 = OpAccessChain %_ptr_Input_v2float %p_1 %int_1 %int_0
+         %65 = OpLoad %v2float %64
+         %66 = OpAccessChain %_ptr_Function_v2float %p_0 %int_1 %int_1
+               OpStore %66 %65
+         %68 = OpAccessChain %_ptr_Input_v4float %p_pos %int_2
+         %69 = OpLoad %v4float %68
+         %70 = OpAccessChain %_ptr_Function_v4float %p_0 %int_2 %int_0
+               OpStore %70 %69
+         %71 = OpAccessChain %_ptr_Input_v2float %p_1 %int_2 %int_0
+         %72 = OpLoad %v2float %71
+         %73 = OpAccessChain %_ptr_Function_v2float %p_0 %int_2 %int_1
+               OpStore %73 %72
+         %77 = OpLoad %uint %i_1
+               OpStore %i_0 %77
+         %80 = OpLoad %_arr_VertexOutput_uint_3 %p_0
+               OpStore %param %80
+         %82 = OpLoad %uint %i_0
+               OpStore %param_0 %82
+         %83 = OpFunctionCall %HSOut %_hs_main_struct_VertexOutput_vf4_vf21_3__u1_ %param %param_0
+               OpStore %flattenTemp %83
+         %86 = OpAccessChain %_ptr_Function_v4float %flattenTemp %int_0
+         %87 = OpLoad %v4float %86
+         %94 = OpLoad %uint %i_1
+         %89 = OpAccessChain %_ptr_Output_v4float %_entryPointOutput_pos %94
+               OpStore %89 %87
+         %95 = OpAccessChain %_ptr_Function_v2float %flattenTemp %int_1
+         %96 = OpLoad %v2float %95
+         %98 = OpAccessChain %_ptr_Output_v2float %_entryPointOutput %94 %int_0
+               OpStore %98 %96
+               OpControlBarrier %uint_2 %uint_1 %uint_0
+        %102 = OpLoad %uint %i_1
+        %104 = OpIEqual %bool %102 %int_0
+               OpSelectionMerge %106 None
+               OpBranchConditional %104 %105 %106
+        %105 = OpLabel
+        %110 = OpLoad %_arr_VertexOutput_uint_3 %p_0
+               OpStore %param_1 %110
+        %111 = OpFunctionCall %HSConstantOut %PatchHS_struct_VertexOutput_vf4_vf21_3__ %param_1
+               OpStore %_patchConstantResult %111
+        %117 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_0 %int_0
+        %118 = OpLoad %float %117
+        %120 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_EdgeTess %int_0
+               OpStore %120 %118
+        %121 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_0 %int_1
+        %122 = OpLoad %float %121
+        %123 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_EdgeTess %int_1
+               OpStore %123 %122
+        %124 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_0 %int_2
+        %125 = OpLoad %float %124
+        %126 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_EdgeTess %int_2
+               OpStore %126 %125
+        %130 = OpAccessChain %_ptr_Function_float %_patchConstantResult %int_1
+        %131 = OpLoad %float %130
+        %132 = OpAccessChain %_ptr_Output_float %_patchConstantOutput_InsideTess %int_0
+               OpStore %132 %131
+               OpBranch %106
+        %106 = OpLabel
+               OpReturn
+               OpFunctionEnd
+%_hs_main_struct_VertexOutput_vf4_vf21_3__u1_ = OpFunction %HSOut None %16
+          %p = OpFunctionParameter %_ptr_Function__arr_VertexOutput_uint_3
+          %i = OpFunctionParameter %_ptr_Function_uint
+         %20 = OpLabel
+     %output = OpVariable %_ptr_Function_HSOut Function
+         %31 = OpLoad %uint %i
+         %33 = OpAccessChain %_ptr_Function_v4float %p %31 %int_0
+         %34 = OpLoad %v4float %33
+         %35 = OpAccessChain %_ptr_Function_v4float %output %int_0
+               OpStore %35 %34
+         %37 = OpLoad %uint %i
+         %39 = OpAccessChain %_ptr_Function_v2float %p %37 %int_1
+         %40 = OpLoad %v2float %39
+         %41 = OpAccessChain %_ptr_Function_v2float %output %int_1
+               OpStore %41 %40
+         %42 = OpLoad %HSOut %output
+               OpReturnValue %42
+               OpFunctionEnd
+%PatchHS_struct_VertexOutput_vf4_vf21_3__ = OpFunction %HSConstantOut None %23
+      %patch = OpFunctionParameter %_ptr_Function__arr_VertexOutput_uint_3
+         %26 = OpLabel
+   %output_0 = OpVariable %_ptr_Function_HSConstantOut Function
+        %135 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1
+        %136 = OpLoad %v2float %135
+        %137 = OpCompositeConstruct %v2float %float_1 %float_1
+        %138 = OpFAdd %v2float %137 %136
+        %139 = OpCompositeExtract %float %138 0
+        %140 = OpAccessChain %_ptr_Function_float %output_0 %int_0 %int_0
+               OpStore %140 %139
+        %141 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1
+        %142 = OpLoad %v2float %141
+        %143 = OpCompositeConstruct %v2float %float_1 %float_1
+        %144 = OpFAdd %v2float %143 %142
+        %145 = OpCompositeExtract %float %144 0
+        %146 = OpAccessChain %_ptr_Function_float %output_0 %int_0 %int_1
+               OpStore %146 %145
+        %147 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1
+        %148 = OpLoad %v2float %147
+        %149 = OpCompositeConstruct %v2float %float_1 %float_1
+        %150 = OpFAdd %v2float %149 %148
+        %151 = OpCompositeExtract %float %150 0
+        %152 = OpAccessChain %_ptr_Function_float %output_0 %int_0 %int_2
+               OpStore %152 %151
+        %153 = OpAccessChain %_ptr_Function_v2float %patch %int_0 %int_1
+        %154 = OpLoad %v2float %153
+        %155 = OpCompositeConstruct %v2float %float_1 %float_1
+        %156 = OpFAdd %v2float %155 %154
+        %157 = OpCompositeExtract %float %156 0
+        %158 = OpAccessChain %_ptr_Function_float %output_0 %int_1
+               OpStore %158 %157
+        %159 = OpLoad %HSConstantOut %output_0
+               OpReturnValue %159
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/tesc/tess-level-initializer.asm.tesc b/shaders-no-opt/asm/tesc/tess-level-initializer.asm.tesc
new file mode 100644
index 00000000000..95fd147e7be
--- /dev/null
+++ b/shaders-no-opt/asm/tesc/tess-level-initializer.asm.tesc
@@ -0,0 +1,87 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 47
+; Schema: 0
+               OpCapability Tessellation
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %main "main" %gl_out %gl_InvocationID %gl_TessLevelInner %gl_TessLevelOuter
+               OpExecutionMode %main OutputVertices 4
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %gl_out "gl_out"
+               OpName %gl_InvocationID "gl_InvocationID"
+               OpName %gl_TessLevelInner "gl_TessLevelInner"
+               OpName %gl_TessLevelOuter "gl_TessLevelOuter"
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+     %uint_4 = OpConstant %uint 4
+%_arr_gl_PerVertex_uint_4 = OpTypeArray %gl_PerVertex %uint_4
+%_ptr_Output__arr_gl_PerVertex_uint_4 = OpTypePointer Output %_arr_gl_PerVertex_uint_4
+     %gl_out = OpVariable %_ptr_Output__arr_gl_PerVertex_uint_4 Output
+        %int = OpTypeInt 32 1
+%_ptr_Input_int = OpTypePointer Input %int
+%gl_InvocationID = OpVariable %_ptr_Input_int Input
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %22 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+     %uint_2 = OpConstant %uint 2
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+	%inner_zero = OpConstantNull %_arr_float_uint_2
+%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output %inner_zero
+%_ptr_Output_float = OpTypePointer Output %float
+      %int_1 = OpConstant %int 1
+    %float_2 = OpConstant %float 2
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+	%outer_zero = OpConstantNull %_arr_float_uint_4
+%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output %outer_zero
+    %float_3 = OpConstant %float 3
+    %float_4 = OpConstant %float 4
+      %int_2 = OpConstant %int 2
+    %float_5 = OpConstant %float 5
+      %int_3 = OpConstant %int 3
+    %float_6 = OpConstant %float 6
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpLoad %int %gl_InvocationID
+         %24 = OpAccessChain %_ptr_Output_v4float %gl_out %19 %int_0
+               OpStore %24 %22
+         %30 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_0
+               OpStore %30 %float_1
+         %33 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %int_1
+               OpStore %33 %float_2
+         %38 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_0
+               OpStore %38 %float_3
+         %40 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_1
+               OpStore %40 %float_4
+         %43 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_2
+               OpStore %43 %float_5
+         %46 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_3
+               OpStore %46 %float_6
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/vert/block-struct-initializer.asm.vert b/shaders-no-opt/asm/vert/block-struct-initializer.asm.vert
new file mode 100644
index 00000000000..a431e6a7174
--- /dev/null
+++ b/shaders-no-opt/asm/vert/block-struct-initializer.asm.vert
@@ -0,0 +1,37 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 13
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_ %foo
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Vert "Vert"
+               OpMemberName %Vert 0 "a"
+               OpMemberName %Vert 1 "b"
+               OpName %_ ""
+               OpName %Foo "Foo"
+               OpMemberName %Foo 0 "c"
+               OpMemberName %Foo 1 "d"
+               OpName %foo "foo"
+               OpDecorate %Vert Block
+               OpDecorate %_ Location 0
+               OpDecorate %foo Location 2
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+       %Vert = OpTypeStruct %float %float
+%_ptr_Output_Vert = OpTypePointer Output %Vert
+		%zero_vert = OpConstantNull %Vert
+          %_ = OpVariable %_ptr_Output_Vert Output %zero_vert
+        %Foo = OpTypeStruct %float %float
+%_ptr_Output_Foo = OpTypePointer Output %Foo
+%zero_foo = OpConstantNull %Foo
+        %foo = OpVariable %_ptr_Output_Foo Output %zero_foo
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/vert/builtin-output-initializer.asm.vert b/shaders-no-opt/asm/vert/builtin-output-initializer.asm.vert
new file mode 100644
index 00000000000..aaa68662e5d
--- /dev/null
+++ b/shaders-no-opt/asm/vert/builtin-output-initializer.asm.vert
@@ -0,0 +1,44 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 20
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %_ ""
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+	%zero = OpConstantNull %gl_PerVertex
+          %_ = OpVariable %_ptr_Output_gl_PerVertex Output %zero
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+         %17 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %19 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %19 %17
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/vert/complex-link-by-name.asm.vert b/shaders-no-opt/asm/vert/complex-link-by-name.asm.vert
new file mode 100644
index 00000000000..94a883c1ed1
--- /dev/null
+++ b/shaders-no-opt/asm/vert/complex-link-by-name.asm.vert
@@ -0,0 +1,119 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 59
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_ %output_location_0 %output_location_2 %output_location_3
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Foo "Struct_vec4"
+               OpMemberName %Foo 0 "m0"
+               OpName %c "c"
+               OpName %Foo_0 "Struct_vec4"
+               OpMemberName %Foo_0 0 "m0"
+               OpName %Bar "Struct_vec4"
+               OpMemberName %Bar 0 "m0"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "m0"
+               OpMemberName %UBO 1 "m1"
+               OpName %ubo_binding_0 "ubo_binding_0"
+               OpName %Bar_0 "Struct_vec4"
+               OpMemberName %Bar_0 0 "m0"
+               OpName %b "b"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %_ ""
+               OpName %VertexOut "VertexOut"
+               OpMemberName %VertexOut 0 "m0"
+               OpMemberName %VertexOut 1 "m1"
+               OpName %output_location_0 "output_location_0"
+               OpName %output_location_2 "output_location_2"
+               OpName %output_location_3 "output_location_3"
+               OpMemberDecorate %Foo_0 0 Offset 0
+               OpMemberDecorate %Bar 0 Offset 0
+               OpMemberDecorate %UBO 0 Offset 0
+               OpMemberDecorate %UBO 1 Offset 16
+               OpDecorate %UBO Block
+               OpDecorate %ubo_binding_0 DescriptorSet 0
+               OpDecorate %ubo_binding_0 Binding 0
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %VertexOut Block
+               OpDecorate %output_location_0 Location 0
+               OpDecorate %output_location_2 Location 2
+               OpDecorate %output_location_3 Location 3
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+        %Foo = OpTypeStruct %v4float
+%_ptr_Function_Foo = OpTypePointer Function %Foo
+      %Foo_0 = OpTypeStruct %v4float
+        %Bar = OpTypeStruct %v4float
+        %UBO = OpTypeStruct %Foo_0 %Bar
+%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO
+%ubo_binding_0 = OpVariable %_ptr_Uniform_UBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+      %Bar_0 = OpTypeStruct %v4float
+%_ptr_Function_Bar_0 = OpTypePointer Function %Bar_0
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_Bar = OpTypePointer Uniform %Bar
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+          %_ = OpVariable %_ptr_Output_gl_PerVertex Output
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %VertexOut = OpTypeStruct %Foo %Bar_0
+%_ptr_Output_VertexOut = OpTypePointer Output %VertexOut
+%output_location_0 = OpVariable %_ptr_Output_VertexOut Output
+%_ptr_Output_Foo = OpTypePointer Output %Foo
+%_ptr_Output_Bar_0 = OpTypePointer Output %Bar_0
+%output_location_2 = OpVariable %_ptr_Output_Foo Output
+%output_location_3 = OpVariable %_ptr_Output_Bar_0 Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %c = OpVariable %_ptr_Function_Foo Function
+          %b = OpVariable %_ptr_Function_Bar_0 Function
+         %19 = OpAccessChain %_ptr_Uniform_Foo_0 %ubo_binding_0 %int_0
+         %20 = OpLoad %Foo_0 %19
+         %21 = OpCompositeExtract %v4float %20 0
+         %23 = OpAccessChain %_ptr_Function_v4float %c %int_0
+               OpStore %23 %21
+         %29 = OpAccessChain %_ptr_Uniform_Bar %ubo_binding_0 %int_1
+         %30 = OpLoad %Bar %29
+         %31 = OpCompositeExtract %v4float %30 0
+         %32 = OpAccessChain %_ptr_Function_v4float %b %int_0
+               OpStore %32 %31
+         %39 = OpAccessChain %_ptr_Function_v4float %c %int_0
+         %40 = OpLoad %v4float %39
+         %41 = OpAccessChain %_ptr_Function_v4float %b %int_0
+         %42 = OpLoad %v4float %41
+         %43 = OpFAdd %v4float %40 %42
+         %45 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %45 %43
+         %49 = OpLoad %Foo %c
+         %51 = OpAccessChain %_ptr_Output_Foo %output_location_0 %int_0
+               OpStore %51 %49
+         %52 = OpLoad %Bar_0 %b
+         %54 = OpAccessChain %_ptr_Output_Bar_0 %output_location_0 %int_1
+               OpStore %54 %52
+         %56 = OpLoad %Foo %c
+               OpStore %output_location_2 %56
+         %58 = OpLoad %Bar_0 %b
+               OpStore %output_location_3 %58
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/vert/complex-link-by-name.force-flattened-io.legacy.asm.vert b/shaders-no-opt/asm/vert/complex-link-by-name.force-flattened-io.legacy.asm.vert
new file mode 100644
index 00000000000..94a883c1ed1
--- /dev/null
+++ b/shaders-no-opt/asm/vert/complex-link-by-name.force-flattened-io.legacy.asm.vert
@@ -0,0 +1,119 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 59
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_ %output_location_0 %output_location_2 %output_location_3
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Foo "Struct_vec4"
+               OpMemberName %Foo 0 "m0"
+               OpName %c "c"
+               OpName %Foo_0 "Struct_vec4"
+               OpMemberName %Foo_0 0 "m0"
+               OpName %Bar "Struct_vec4"
+               OpMemberName %Bar 0 "m0"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "m0"
+               OpMemberName %UBO 1 "m1"
+               OpName %ubo_binding_0 "ubo_binding_0"
+               OpName %Bar_0 "Struct_vec4"
+               OpMemberName %Bar_0 0 "m0"
+               OpName %b "b"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %_ ""
+               OpName %VertexOut "VertexOut"
+               OpMemberName %VertexOut 0 "m0"
+               OpMemberName %VertexOut 1 "m1"
+               OpName %output_location_0 "output_location_0"
+               OpName %output_location_2 "output_location_2"
+               OpName %output_location_3 "output_location_3"
+               OpMemberDecorate %Foo_0 0 Offset 0
+               OpMemberDecorate %Bar 0 Offset 0
+               OpMemberDecorate %UBO 0 Offset 0
+               OpMemberDecorate %UBO 1 Offset 16
+               OpDecorate %UBO Block
+               OpDecorate %ubo_binding_0 DescriptorSet 0
+               OpDecorate %ubo_binding_0 Binding 0
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+               OpDecorate %VertexOut Block
+               OpDecorate %output_location_0 Location 0
+               OpDecorate %output_location_2 Location 2
+               OpDecorate %output_location_3 Location 3
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+        %Foo = OpTypeStruct %v4float
+%_ptr_Function_Foo = OpTypePointer Function %Foo
+      %Foo_0 = OpTypeStruct %v4float
+        %Bar = OpTypeStruct %v4float
+        %UBO = OpTypeStruct %Foo_0 %Bar
+%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO
+%ubo_binding_0 = OpVariable %_ptr_Uniform_UBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+      %Bar_0 = OpTypeStruct %v4float
+%_ptr_Function_Bar_0 = OpTypePointer Function %Bar_0
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_Bar = OpTypePointer Uniform %Bar
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+          %_ = OpVariable %_ptr_Output_gl_PerVertex Output
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %VertexOut = OpTypeStruct %Foo %Bar_0
+%_ptr_Output_VertexOut = OpTypePointer Output %VertexOut
+%output_location_0 = OpVariable %_ptr_Output_VertexOut Output
+%_ptr_Output_Foo = OpTypePointer Output %Foo
+%_ptr_Output_Bar_0 = OpTypePointer Output %Bar_0
+%output_location_2 = OpVariable %_ptr_Output_Foo Output
+%output_location_3 = OpVariable %_ptr_Output_Bar_0 Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %c = OpVariable %_ptr_Function_Foo Function
+          %b = OpVariable %_ptr_Function_Bar_0 Function
+         %19 = OpAccessChain %_ptr_Uniform_Foo_0 %ubo_binding_0 %int_0
+         %20 = OpLoad %Foo_0 %19
+         %21 = OpCompositeExtract %v4float %20 0
+         %23 = OpAccessChain %_ptr_Function_v4float %c %int_0
+               OpStore %23 %21
+         %29 = OpAccessChain %_ptr_Uniform_Bar %ubo_binding_0 %int_1
+         %30 = OpLoad %Bar %29
+         %31 = OpCompositeExtract %v4float %30 0
+         %32 = OpAccessChain %_ptr_Function_v4float %b %int_0
+               OpStore %32 %31
+         %39 = OpAccessChain %_ptr_Function_v4float %c %int_0
+         %40 = OpLoad %v4float %39
+         %41 = OpAccessChain %_ptr_Function_v4float %b %int_0
+         %42 = OpLoad %v4float %41
+         %43 = OpFAdd %v4float %40 %42
+         %45 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+               OpStore %45 %43
+         %49 = OpLoad %Foo %c
+         %51 = OpAccessChain %_ptr_Output_Foo %output_location_0 %int_0
+               OpStore %51 %49
+         %52 = OpLoad %Bar_0 %b
+         %54 = OpAccessChain %_ptr_Output_Bar_0 %output_location_0 %int_1
+               OpStore %54 %52
+         %56 = OpLoad %Foo %c
+               OpStore %output_location_2 %56
+         %58 = OpLoad %Bar_0 %b
+               OpStore %output_location_3 %58
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/vert/constant-composite-extract.asm.vert b/shaders-no-opt/asm/vert/constant-composite-extract.asm.vert
new file mode 100644
index 00000000000..d5a1b41146c
--- /dev/null
+++ b/shaders-no-opt/asm/vert/constant-composite-extract.asm.vert
@@ -0,0 +1,66 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 22
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %main "main" %_
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %gl_PerVertex "gl_PerVertex"
+               OpMemberName %gl_PerVertex 0 "gl_Position"
+               OpMemberName %gl_PerVertex 1 "gl_PointSize"
+               OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
+               OpMemberName %gl_PerVertex 3 "gl_CullDistance"
+               OpName %_ ""
+               OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
+               OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
+               OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
+               OpMemberDecorate %gl_PerVertex 3 BuiltIn CullDistance
+               OpDecorate %gl_PerVertex Block
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+	%m4float = OpTypeMatrix %v4float 4
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpConstant %uint 1
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
+%_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
+          %_ = OpVariable %_ptr_Output_gl_PerVertex Output
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+    %float_1 = OpConstant %float 1
+    %float_2 = OpConstant %float 2
+    %float_3 = OpConstant %float 3
+    %float_4 = OpConstant %float 4
+    %float_5 = OpConstant %float 5
+    %float_6 = OpConstant %float 6
+    %float_7 = OpConstant %float 7
+    %float_8 = OpConstant %float 8
+         %vec0 = OpConstantComposite %v4float %float_1 %float_2 %float_3 %float_4
+         %vec1 = OpConstantComposite %v4float %float_5 %float_6 %float_7 %float_8
+		 %cmat = OpConstantComposite %m4float %vec0 %vec1 %vec0 %vec1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %21 = OpAccessChain %_ptr_Output_v4float %_ %int_0
+		 	%e0 = OpCompositeExtract %float %vec0 0
+		 	%e1 = OpCompositeExtract %float %vec0 1
+		 	%e2 = OpCompositeExtract %float %vec0 2
+		 	%e3 = OpCompositeExtract %float %vec0 3
+			%m13 = OpCompositeExtract %float %cmat 1 3
+			%m21 = OpCompositeExtract %float %cmat 2 1
+			%e_front = OpCompositeConstruct %v4float %e0 %e1 %e2 %e3
+			%e_back = OpCompositeConstruct %v4float %e3 %e2 %m13 %m21
+			%m0 = OpCompositeExtract %v4float %cmat 2
+			%m1 = OpCompositeExtract %v4float %cmat 3
+			%sum0 =	OpFAdd %v4float %m0 %m1
+			%sum1 =	OpFAdd %v4float %e_front %e_back
+			%sum = OpFAdd %v4float %sum0 %sum1
+               OpStore %21 %sum
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/asm/vert/debug-printf.asm.vk.nocompat.vert b/shaders-no-opt/asm/vert/debug-printf.asm.vk.nocompat.vert
new file mode 100644
index 00000000000..38c3de909cf
--- /dev/null
+++ b/shaders-no-opt/asm/vert/debug-printf.asm.vk.nocompat.vert
@@ -0,0 +1,29 @@
+               OpCapability Shader
+               OpExtension "SPV_KHR_non_semantic_info"
+          %1 = OpExtInstImport "NonSemantic.DebugPrintf"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %vert "main" %gl_Position
+          %4 = OpString "Foo %f %f"
+               OpSource HLSL 600
+               OpName %vert "vert"
+               OpDecorate %gl_Position BuiltIn Position
+      %float = OpTypeFloat 32
+    %float_1 = OpConstant %float 1
+    %float_2 = OpConstant %float 2
+    %float_0 = OpConstant %float 0
+    %v4float = OpTypeVector %float 4
+          %9 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %12 = OpTypeFunction %void
+         %13 = OpTypeFunction %v4float
+%gl_Position = OpVariable %_ptr_Output_v4float Output
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+       %vert = OpFunction %void None %12
+         %15 = OpLabel
+         %16 = OpVariable %_ptr_Function_v4float Function
+         %17 = OpExtInst %void %1 1 %4 %float_1 %float_2
+               OpStore %16 %9
+               OpStore %gl_Position %9
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/comp/bitcast-16bit-1.invalid.comp b/shaders-no-opt/comp/bitcast-16bit-1.invalid.comp
similarity index 100%
rename from shaders/comp/bitcast-16bit-1.invalid.comp
rename to shaders-no-opt/comp/bitcast-16bit-1.invalid.comp
diff --git a/shaders/comp/bitcast-16bit-2.invalid.comp b/shaders-no-opt/comp/bitcast-16bit-2.invalid.comp
similarity index 100%
rename from shaders/comp/bitcast-16bit-2.invalid.comp
rename to shaders-no-opt/comp/bitcast-16bit-2.invalid.comp
diff --git a/shaders-no-opt/comp/glsl.std450.comp b/shaders-no-opt/comp/glsl.std450.comp
new file mode 100644
index 00000000000..a17a82b82af
--- /dev/null
+++ b/shaders-no-opt/comp/glsl.std450.comp
@@ -0,0 +1,129 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+	float res;
+	int ires;
+	uint ures;
+
+	vec4 f32;
+	ivec4 s32;
+	uvec4 u32;
+
+	mat2 m2;
+	mat3 m3;
+	mat4 m4;
+};
+
+void main()
+{
+	float tmp;
+	vec2 v2;
+	vec3 v3;
+	vec4 v4;
+	int itmp;
+
+	res = round(f32.x);
+	res = roundEven(f32.x);
+	res = trunc(f32.x);
+	res = abs(f32.x);
+	ires = abs(s32.x);
+	res = sign(f32.x);
+	ires = sign(s32.x);
+	res = floor(f32.x);
+	res = ceil(f32.x);
+	res = fract(f32.x);
+	res = radians(f32.x);
+	res = degrees(f32.x);
+	res = sin(f32.x);
+	res = cos(f32.x);
+	res = tan(f32.x);
+	res = asin(f32.x);
+	res = acos(f32.x);
+	res = atan(f32.x);
+	res = sinh(f32.x);
+	res = cosh(f32.x);
+	res = tanh(f32.x);
+	res = asinh(f32.x);
+	res = acosh(f32.x);
+	res = atanh(f32.x);
+	res = atan(f32.x, f32.y);
+	res = pow(f32.x, f32.y);
+	res = exp(f32.x);
+	res = log(f32.x);
+	res = exp2(f32.x);
+	res = log2(f32.x);
+	res = sqrt(f32.x);
+	res = inversesqrt(f32.x);
+
+	res = length(f32.x);
+	res = distance(f32.x, f32.y);
+	res = normalize(f32.x);
+	res = faceforward(f32.x, f32.y, f32.z);
+	res = reflect(f32.x, f32.y);
+	res = refract(f32.x, f32.y, f32.z);
+
+	res = length(f32.xy);
+	res = distance(f32.xy, f32.zw);
+	v2 = normalize(f32.xy);
+	v2 = faceforward(f32.xy, f32.yz, f32.zw);
+	v2 = reflect(f32.xy, f32.zw);
+	v2 = refract(f32.xy, f32.yz, f32.w);
+
+	v3 = cross(f32.xyz, f32.yzw);
+
+	res = determinant(m2);
+	res = determinant(m3);
+	res = determinant(m4);
+	m2 = inverse(m2);
+	m3 = inverse(m3);
+	m4 = inverse(m4);
+
+	res = modf(f32.x, tmp);
+	// ModfStruct
+
+	res = min(f32.x, f32.y);
+	ures = min(u32.x, u32.y);
+	ires = min(s32.x, s32.y);
+	res = max(f32.x, f32.y);
+	ures = max(u32.x, u32.y);
+	ires = max(s32.x, s32.y);
+
+	res = clamp(f32.x, f32.y, f32.z);
+	ures = clamp(u32.x, u32.y, u32.z);
+	ires = clamp(s32.x, s32.y, s32.z);
+
+	res = mix(f32.x, f32.y, f32.z);
+	res = step(f32.x, f32.y);
+	res = smoothstep(f32.x, f32.y, f32.z);
+	res = fma(f32.x, f32.y, f32.z);
+
+	res = frexp(f32.x, itmp);
+	// FrexpStruct
+	res = ldexp(f32.x, itmp);
+
+	ures = packSnorm4x8(f32);
+	ures = packUnorm4x8(f32);
+	ures = packSnorm2x16(f32.xy);
+	ures = packUnorm2x16(f32.xy);
+	ures = packHalf2x16(f32.xy);
+	// packDouble2x32
+
+	v2 = unpackSnorm2x16(u32.x);
+	v2 = unpackUnorm2x16(u32.x);
+	v2 = unpackHalf2x16(u32.x);
+	v4 = unpackSnorm4x8(u32.x);
+	v4 = unpackUnorm4x8(u32.x);
+	// unpackDouble2x32
+
+	s32 = findLSB(s32);
+	s32 = findLSB(u32);
+	s32 = findMSB(s32);
+	s32 = findMSB(u32);
+
+	// interpolateAtSample
+	// interpolateAtOffset
+
+	// NMin, NMax, NClamp
+}
diff --git a/shaders-no-opt/comp/illegal-struct-name.asm.comp b/shaders-no-opt/comp/illegal-struct-name.asm.comp
new file mode 100644
index 00000000000..f7a8787d3d8
--- /dev/null
+++ b/shaders-no-opt/comp/illegal-struct-name.asm.comp
@@ -0,0 +1,62 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 31
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Foo "Foo"
+               OpMemberName %Foo 0 "abs"
+               OpName %f "f"
+               OpName %Foo_0 "Foo"
+               OpMemberName %Foo_0 0 "abs"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "foo"
+               OpMemberName %SSBO 1 "foo2"
+               OpName %_ ""
+               OpName %linear "abs"
+               OpMemberDecorate %Foo_0 0 Offset 0
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 4
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+        %Foo = OpTypeStruct %float
+%_ptr_Function_Foo = OpTypePointer Function %Foo
+      %Foo_0 = OpTypeStruct %float
+       %SSBO = OpTypeStruct %Foo_0 %Foo_0
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Function_int = OpTypePointer Function %int
+     %int_10 = OpConstant %int 10
+      %int_1 = OpConstant %int 1
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %f = OpVariable %_ptr_Function_Foo Function
+     %linear = OpVariable %_ptr_Function_int Function
+         %17 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_0
+         %18 = OpLoad %Foo_0 %17
+         %19 = OpCompositeExtract %float %18 0
+         %21 = OpAccessChain %_ptr_Function_float %f %int_0
+               OpStore %21 %19
+               OpStore %linear %int_10
+         %26 = OpLoad %Foo %f
+         %27 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_1
+         %28 = OpCompositeExtract %float %26 0
+         %30 = OpAccessChain %_ptr_Uniform_float %27 %int_0
+               OpStore %30 %28
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/comp/image-load-formatted.comp b/shaders-no-opt/comp/image-load-formatted.comp
new file mode 100644
index 00000000000..7fd587d99ad
--- /dev/null
+++ b/shaders-no-opt/comp/image-load-formatted.comp
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_EXT_shader_image_load_formatted : require
+layout(local_size_x = 8, local_size_y = 8) in;
+
+layout(binding = 0) uniform image2D img;
+
+void main()
+{
+	vec4 v = imageLoad(img, ivec2(gl_GlobalInvocationID.xy));
+	imageStore(img, ivec2(gl_GlobalInvocationID.xy), v + 1.0);
+}
diff --git a/shaders/comp/inout-struct.invalid.comp b/shaders-no-opt/comp/inout-struct.invalid.comp
similarity index 100%
rename from shaders/comp/inout-struct.invalid.comp
rename to shaders-no-opt/comp/inout-struct.invalid.comp
diff --git a/shaders-no-opt/comp/int16min-literal.comp b/shaders-no-opt/comp/int16min-literal.comp
new file mode 100644
index 00000000000..c1b345266d8
--- /dev/null
+++ b/shaders-no-opt/comp/int16min-literal.comp
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 1) buffer SSBO
+{
+	float16_t a;
+};
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	float16_t b;
+};
+
+void main()
+{
+	int16_t v = float16BitsToInt16(b);
+	v ^= 0x8000s;
+	a = int16BitsToFloat16(v);
+}
diff --git a/shaders-no-opt/comp/int64min-literal.comp b/shaders-no-opt/comp/int64min-literal.comp
new file mode 100644
index 00000000000..ac20389033d
--- /dev/null
+++ b/shaders-no-opt/comp/int64min-literal.comp
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_gpu_shader_int64 : require
+
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 1) buffer SSBO
+{
+	float a;
+};
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	float b;
+};
+
+void main()
+{
+	double b2 = b;
+	int64_t v = doubleBitsToInt64(b2);
+	v ^= 0x8000000000000000L;
+	double a2 = int64BitsToDouble(v);
+	a = float(a2);
+}
diff --git a/shaders-no-opt/comp/intmin-literal.comp b/shaders-no-opt/comp/intmin-literal.comp
new file mode 100644
index 00000000000..ee35cedabb9
--- /dev/null
+++ b/shaders-no-opt/comp/intmin-literal.comp
@@ -0,0 +1,18 @@
+#version 450
+
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 1) buffer SSBO
+{
+	float a;
+};
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	float b;
+};
+
+void main()
+{
+	a = intBitsToFloat(floatBitsToInt(b) ^ 0x80000000);
+}
diff --git a/shaders-no-opt/comp/loop-break-merge-after-inner-continue.comp b/shaders-no-opt/comp/loop-break-merge-after-inner-continue.comp
new file mode 100644
index 00000000000..e916ab2408c
--- /dev/null
+++ b/shaders-no-opt/comp/loop-break-merge-after-inner-continue.comp
@@ -0,0 +1,21 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout (binding = 0) buffer STO
+{
+  uint data[];
+} ssbo;
+
+void main()
+{
+  while(true)
+  {
+    ssbo.data[0] += 1;
+    if (bool(ssbo.data[2]))
+    {
+      ssbo.data[5] += 1;
+      continue;
+    }
+    break;
+  }
+}
diff --git a/shaders-no-opt/comp/loop-resolve-debug-semantics.g.comp b/shaders-no-opt/comp/loop-resolve-debug-semantics.g.comp
new file mode 100644
index 00000000000..72998477111
--- /dev/null
+++ b/shaders-no-opt/comp/loop-resolve-debug-semantics.g.comp
@@ -0,0 +1,16 @@
+#version 450
+
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0) buffer SSBO
+{
+	int v[];
+};
+
+void main()
+{
+	for (int i = 0; i < 4; i++)
+	{
+		v[i] += 10;
+	}
+}
diff --git a/shaders-no-opt/comp/loop-resolve-debug-semantics.gV.comp b/shaders-no-opt/comp/loop-resolve-debug-semantics.gV.comp
new file mode 100644
index 00000000000..72998477111
--- /dev/null
+++ b/shaders-no-opt/comp/loop-resolve-debug-semantics.gV.comp
@@ -0,0 +1,16 @@
+#version 450
+
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0) buffer SSBO
+{
+	int v[];
+};
+
+void main()
+{
+	for (int i = 0; i < 4; i++)
+	{
+		v[i] += 10;
+	}
+}
diff --git a/shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp b/shaders-no-opt/comp/shader_ballot_nonuniform_invocations.invalid.comp
similarity index 100%
rename from shaders/amd/shader_ballot_nonuniform_invocations.invalid.comp
rename to shaders-no-opt/comp/shader_ballot_nonuniform_invocations.invalid.comp
diff --git a/shaders-no-opt/comp/specialization-constant-evaluation.comp b/shaders-no-opt/comp/specialization-constant-evaluation.comp
new file mode 100644
index 00000000000..d45d021ac55
--- /dev/null
+++ b/shaders-no-opt/comp/specialization-constant-evaluation.comp
@@ -0,0 +1,123 @@
+#version 450
+
+layout(local_size_x = 1) in;
+
+layout(constant_id = 0) const bool TRUE = true;
+layout(constant_id = 1) const bool FALSE = false;
+layout(constant_id = 2) const int SONE = 1;
+layout(constant_id = 3) const int STWO = 2;
+layout(constant_id = 4) const int SNEG_TWO = -2;
+layout(constant_id = 5) const uint UONE = 1;
+layout(constant_id = 6) const uint UTWO = 2;
+layout(constant_id = 7) const int SNEG_THREE = -3;
+
+const uint IADD = SONE + STWO + UONE + UTWO; // 6
+const uint ISUB = UTWO - SONE;               // 1
+const uint IMUL = UTWO * UTWO;               // 4
+const uint UDIV = UTWO / UTWO;               // 1
+const int SDIV = STWO / SNEG_TWO;            // -1
+//const int SREM = STWO % SNEG_THREE;          // 1
+const int SREM = 1;
+const int SMOD = STWO % SNEG_THREE;          // -1
+const uint UMOD = IADD % IMUL;               // 2
+
+const uint LSHL = IADD << ISUB;              // 12
+const uint RSHL = IADD >> ISUB;              // 3
+const int RSHA = (-int(IADD)) >> (-SDIV);         // -3
+
+const bool IEQ = IADD == ISUB;               // false
+const bool INEQ = IADD != ISUB;              // true
+const bool ULT = IADD < ISUB;                // false
+const bool ULE = IADD <= ISUB;               // false
+const bool UGT = IADD > ISUB;                // true
+const bool UGE = IADD >= ISUB;               // true
+
+const bool SLT = SMOD < SREM;                // true
+const bool SLE = SMOD <= SREM;               // true
+const bool SGT = SMOD > SREM;                // false
+const bool SGE = SMOD >= SREM;               // false
+
+const bool LOR = IEQ || SLT;                 // true
+const bool LAND = IEQ && SLT;                // false
+const bool LNOT = !LOR;                      // false
+
+const uint AND = IADD & IADD;                // 6
+const uint OR = IADD | ISUB;                 // 7
+const uint XOR = IADD ^ IADD;                // 0
+const uint NOT = ~XOR;                       // UINT_MAX
+
+const bool LEQ = LAND == LNOT;               // true
+const bool LNEQ = LAND != LNOT;              // false
+
+const uint SEL = IEQ ? IADD : ISUB;          // 1
+
+#define DUMMY_SSBO(name, bind, size) layout(std430, set = 0, binding = bind) buffer SSBO_##name { float val[size]; float dummy; } name
+
+// Normalize all sizes to 1 element so that the default offsets in glslang matches up with what we should be computing.
+// If we do it right, we should get no layout(offset = N) expressions.
+DUMMY_SSBO(IAdd, 0, IADD - 5);
+DUMMY_SSBO(ISub, 1, ISUB);
+DUMMY_SSBO(IMul, 2, IMUL - 3);
+DUMMY_SSBO(UDiv, 3, UDIV);
+DUMMY_SSBO(SDiv, 4, SDIV + 2);
+DUMMY_SSBO(SRem, 5, SREM);
+DUMMY_SSBO(SMod, 6, SMOD + 2);
+DUMMY_SSBO(UMod, 7, UMOD - 1);
+DUMMY_SSBO(LShl, 8, LSHL - 11);
+DUMMY_SSBO(RShl, 9, RSHL - 2);
+DUMMY_SSBO(RSha, 10, RSHA + 4);
+DUMMY_SSBO(IEq, 11, IEQ ? 2 : 1);
+DUMMY_SSBO(INeq, 12, INEQ ? 1 : 2);
+DUMMY_SSBO(Ult, 13, ULT ? 2 : 1);
+DUMMY_SSBO(Ule, 14, ULE ? 2 : 1);
+DUMMY_SSBO(Ugt, 15, UGT ? 1 : 2);
+DUMMY_SSBO(Uge, 16, UGE ? 1 : 2);
+DUMMY_SSBO(Slt, 17, SLT ? 1 : 2);
+DUMMY_SSBO(Sle, 18, SLE ? 1 : 2);
+DUMMY_SSBO(Sgt, 19, SGT ? 2 : 1);
+DUMMY_SSBO(Sge, 20, SGE ? 2 : 1);
+DUMMY_SSBO(Lor, 21, LOR ? 1 : 2);
+DUMMY_SSBO(Land, 22, LAND ? 2 : 1);
+DUMMY_SSBO(Lnot, 23, LNOT ? 2 : 1);
+DUMMY_SSBO(And, 24, AND - 5);
+DUMMY_SSBO(Or, 24, OR - 6);
+DUMMY_SSBO(Xor, 24, XOR + 1);
+DUMMY_SSBO(Not, 25, NOT - 0xfffffffeu);
+DUMMY_SSBO(Leq, 26, LEQ ? 1 : 2);
+DUMMY_SSBO(Lneq, 27, LNEQ ? 2 : 1);
+DUMMY_SSBO(Sel, 28, SEL);
+
+void main()
+{
+	IAdd.val[0] = 0.0;
+	ISub.val[0] = 0.0;
+	IMul.val[0] = 0.0;
+	UDiv.val[0] = 0.0;
+	SDiv.val[0] = 0.0;
+	SRem.val[0] = 0.0;
+	SMod.val[0] = 0.0;
+	UMod.val[0] = 0.0;
+	LShl.val[0] = 0.0;
+	RShl.val[0] = 0.0;
+	RSha.val[0] = 0.0;
+	IEq.val[0] = 0.0;
+	INeq.val[0] = 0.0;
+	Ult.val[0] = 0.0;
+	Ule.val[0] = 0.0;
+	Ugt.val[0] = 0.0;
+	Uge.val[0] = 0.0;
+	Slt.val[0] = 0.0;
+	Sle.val[0] = 0.0;
+	Sgt.val[0] = 0.0;
+	Sge.val[0] = 0.0;
+	Lor.val[0] = 0.0;
+	Land.val[0] = 0.0;
+	Lnot.val[0] = 0.0;
+	And.val[0] = 0.0;
+	Or.val[0] = 0.0;
+	Xor.val[0] = 0.0;
+	Not.val[0] = 0.0;
+	Leq.val[0] = 0.0;
+	Lneq.val[0] = 0.0;
+	Sel.val[0] = 0.0;
+}
diff --git a/shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/shaders-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp
similarity index 100%
rename from shaders/vulkan/comp/struct-packing-scalar.nocompat.invalid.vk.comp
rename to shaders-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp
diff --git a/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp b/shaders-no-opt/comp/subgroups.nocompat.invalid.vk.comp
similarity index 92%
rename from shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp
rename to shaders-no-opt/comp/subgroups.nocompat.invalid.vk.comp
index 68fc74f910d..a73a231259a 100644
--- a/shaders/vulkan/comp/subgroups.nocompat.invalid.vk.comp
+++ b/shaders-no-opt/comp/subgroups.nocompat.invalid.vk.comp
@@ -72,6 +72,9 @@ void main()
 	uvec4 anded = subgroupAnd(ballot_value);
 	uvec4 ored = subgroupOr(ballot_value);
 	uvec4 xored = subgroupXor(ballot_value);
+	bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42)));
+	bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42)));
+	bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42)));
 
 	added = subgroupInclusiveAdd(added);
 	iadded = subgroupInclusiveAdd(iadded);
@@ -117,6 +120,10 @@ void main()
 	ored = subgroupClusteredOr(ored, 4u);
 	xored = subgroupClusteredXor(xored, 4u);
 
+	anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u);
+	ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u);
+	xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u);
+
 	// quad
 	vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
 	vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
diff --git a/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp b/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp
new file mode 100644
index 00000000000..833f43079b1
--- /dev/null
+++ b/shaders-no-opt/comp/subgroups_basicvoteballot.vk.comp
@@ -0,0 +1,49 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_vote : require
+
+layout(local_size_x = 1) in;
+
+layout(std430, binding = 0) buffer SSBO
+{
+	float FragColor;
+};
+
+void main()
+{
+	// basic
+	FragColor = float(gl_NumSubgroups);
+	FragColor = float(gl_SubgroupID);
+	FragColor = float(gl_SubgroupSize);
+	FragColor = float(gl_SubgroupInvocationID);
+	subgroupBarrier();
+	subgroupMemoryBarrier();
+	subgroupMemoryBarrierBuffer();
+	subgroupMemoryBarrierShared();
+	subgroupMemoryBarrierImage();
+	bool elected = subgroupElect();
+
+	// ballot
+	FragColor = float(gl_SubgroupEqMask);
+	FragColor = float(gl_SubgroupGeMask);
+	FragColor = float(gl_SubgroupGtMask);
+	FragColor = float(gl_SubgroupLeMask);
+	FragColor = float(gl_SubgroupLtMask);
+	vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+	vec3 first = subgroupBroadcastFirst(vec3(20.0));
+	uvec4 ballot_value = subgroupBallot(true);
+	bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
+	bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
+	uint bit_count = subgroupBallotBitCount(ballot_value);
+	uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
+	uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
+	uint lsb = subgroupBallotFindLSB(ballot_value);
+	uint msb = subgroupBallotFindMSB(ballot_value);
+
+	// vote
+	bool has_all = subgroupAll(true);
+	bool has_any = subgroupAny(true);
+	bool has_equal_bool = subgroupAllEqual(true);
+	bool has_equal_T = subgroupAllEqual(uvec3(5u));
+}
\ No newline at end of file
diff --git a/shaders-no-opt/comp/trivial-select-cast-vector.comp b/shaders-no-opt/comp/trivial-select-cast-vector.comp
new file mode 100644
index 00000000000..c3e0922a166
--- /dev/null
+++ b/shaders-no-opt/comp/trivial-select-cast-vector.comp
@@ -0,0 +1,14 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0) buffer A
+{
+	vec3 a;
+	vec3 b;
+};
+
+void main()
+{
+	bvec3 c = lessThan(b, vec3(1.0));
+	a = mix(vec3(1, 0, 0), vec3(0, 0, 1), c);
+}
diff --git a/shaders-no-opt/comp/trivial-select-matrix.spv14.comp b/shaders-no-opt/comp/trivial-select-matrix.spv14.comp
new file mode 100644
index 00000000000..5ffcc3f3a49
--- /dev/null
+++ b/shaders-no-opt/comp/trivial-select-matrix.spv14.comp
@@ -0,0 +1,16 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0) buffer A
+{
+	mat3 a;
+	float b;
+};
+
+void main()
+{
+	// Scalar to Matrix
+	bool c = b < 1.0;
+	a = c ? mat3(vec3(1), vec3(1), vec3(1)) : mat3(vec3(0), vec3(0), vec3(0));
+	a = c ? mat3(1) : mat3(0);
+}
diff --git a/shaders/frag/16bit-constants.frag b/shaders-no-opt/frag/16bit-constants.invalid.frag
similarity index 100%
rename from shaders/frag/16bit-constants.frag
rename to shaders-no-opt/frag/16bit-constants.invalid.frag
diff --git a/shaders/desktop-only/frag/fp16.invalid.desktop.frag b/shaders-no-opt/frag/fp16.invalid.desktop.frag
similarity index 100%
rename from shaders/desktop-only/frag/fp16.invalid.desktop.frag
rename to shaders-no-opt/frag/fp16.invalid.desktop.frag
diff --git a/shaders-no-opt/frag/frag-fully-covered.frag b/shaders-no-opt/frag/frag-fully-covered.frag
new file mode 100644
index 00000000000..95cc4fc9757
--- /dev/null
+++ b/shaders-no-opt/frag/frag-fully-covered.frag
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_NV_conservative_raster_underestimation : require
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	if (!gl_FragFullyCoveredNV)
+		discard;
+	FragColor = vec4(1.0);
+}
diff --git a/shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag b/shaders-no-opt/frag/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag
similarity index 100%
rename from shaders/amd/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag
rename to shaders-no-opt/frag/fragmentMaskFetch_subpassInput.vk.nocompat.invalid.frag
diff --git a/shaders/amd/fs.invalid.frag b/shaders-no-opt/frag/fs.invalid.frag
similarity index 100%
rename from shaders/amd/fs.invalid.frag
rename to shaders-no-opt/frag/fs.invalid.frag
diff --git a/shaders-no-opt/frag/image-gather.frag b/shaders-no-opt/frag/image-gather.frag
new file mode 100644
index 00000000000..b492cfbe903
--- /dev/null
+++ b/shaders-no-opt/frag/image-gather.frag
@@ -0,0 +1,14 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+layout(set = 0, binding = 0) uniform sampler2D uSamp;
+layout(set = 0, binding = 1) uniform sampler2DShadow uSampShadow;
+layout(location = 0) in vec3 vUV;
+
+void main()
+{
+	FragColor = textureGather(uSamp, vUV.xy, 0);
+	FragColor += textureGather(uSamp, vUV.xy, 1);
+	FragColor += textureGather(uSampShadow, vUV.xy, vUV.z);
+}
diff --git a/shaders-no-opt/frag/modf-non-function-purity-analysis.frag b/shaders-no-opt/frag/modf-non-function-purity-analysis.frag
new file mode 100644
index 00000000000..c1f1a1266f1
--- /dev/null
+++ b/shaders-no-opt/frag/modf-non-function-purity-analysis.frag
@@ -0,0 +1,15 @@
+#version 450
+
+layout(location = 0) in vec4 v;
+layout(location = 0) out vec4 vo0;
+layout(location = 1) out vec4 vo1;
+
+vec4 modf_inner()
+{
+	return modf(v, vo1);
+}
+
+void main()
+{
+	vo0 = modf_inner();
+}
diff --git a/shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag b/shaders-no-opt/frag/multi-dimensional.desktop.invalid.flatten_dim.frag
similarity index 100%
rename from shaders/flatten/multi-dimensional.desktop.invalid.flatten_dim.frag
rename to shaders-no-opt/frag/multi-dimensional.desktop.invalid.flatten_dim.frag
diff --git a/shaders-no-opt/frag/nonuniform-constructor.vk.nocompat.frag b/shaders-no-opt/frag/nonuniform-constructor.vk.nocompat.frag
new file mode 100644
index 00000000000..452aa953a42
--- /dev/null
+++ b/shaders-no-opt/frag/nonuniform-constructor.vk.nocompat.frag
@@ -0,0 +1,14 @@
+#version 450
+#extension GL_EXT_nonuniform_qualifier : require
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec2 vUV;
+layout(location = 1) flat in int vIndex;
+
+layout(set = 0, binding = 0) uniform texture2D uTex[];
+layout(set = 1, binding = 0) uniform sampler Immut;
+
+void main()
+{
+	FragColor = texture(nonuniformEXT(sampler2D(uTex[vIndex], Immut)), vUV);
+}
diff --git a/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag b/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag
new file mode 100644
index 00000000000..59079fe58b4
--- /dev/null
+++ b/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(set = 0, binding = 0, std430) buffer SSBO0
+{
+	uint values0[];
+};
+
+layout(set = 0, binding = 1, std430) buffer SSBO1
+{
+	uint values1[];
+};
+
+void callee2()
+{
+	values1[int(gl_FragCoord.x)] += 1;
+}
+
+void callee()
+{
+	values0[int(gl_FragCoord.x)] += 1;
+	callee2();
+}
+
+void main()
+{
+	beginInvocationInterlockARB();
+	callee();
+	endInvocationInterlockARB();
+}
diff --git a/shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag b/shaders-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
similarity index 100%
rename from shaders/vulkan/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
rename to shaders-no-opt/frag/scalar-block-layout-ubo-std430.vk.nocompat.invalid.frag
diff --git a/shaders-no-opt/frag/sparse-texture-clamp.desktop.frag b/shaders-no-opt/frag/sparse-texture-clamp.desktop.frag
new file mode 100644
index 00000000000..880e67e5de2
--- /dev/null
+++ b/shaders-no-opt/frag/sparse-texture-clamp.desktop.frag
@@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_sparse_texture2 : require
+#extension GL_ARB_sparse_texture_clamp : require
+
+layout(set = 0, binding = 0) uniform sampler2D uSamp;
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec2 vUV;
+
+void main()
+{
+	vec4 texel;
+	int code;
+
+	code = sparseTextureClampARB(uSamp, vUV, 1.0, texel, 2.0);
+	texel = textureClampARB(uSamp, vUV, 1.0, 2.0);
+	code = sparseTextureOffsetClampARB(uSamp, vUV, ivec2(1, 2), 1.0, texel, 2.0);
+	texel = textureOffsetClampARB(uSamp, vUV, ivec2(1, 2), 1.0, 2.0);
+	code = sparseTextureGradClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), 1.0, texel);
+	texel = textureGradClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), 1.0);
+	code = sparseTextureGradOffsetClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), ivec2(-1, -2), 1.0, texel);
+	texel = textureGradOffsetClampARB(uSamp, vUV, vec2(1.0), vec2(2.0), ivec2(-1, -2), 1.0);
+}
+
diff --git a/shaders-no-opt/frag/sparse-texture-feedback.desktop.frag b/shaders-no-opt/frag/sparse-texture-feedback.desktop.frag
new file mode 100644
index 00000000000..67cc5b42a91
--- /dev/null
+++ b/shaders-no-opt/frag/sparse-texture-feedback.desktop.frag
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_sparse_texture2 : require
+#extension GL_ARB_sparse_texture_clamp : require
+
+layout(set = 0, binding = 0) uniform sampler2D uSamp;
+layout(set = 0, binding = 1) uniform sampler2DMS uSampMS;
+layout(set = 0, binding = 2, rgba8) uniform image2D uImage;
+layout(set = 0, binding = 3, rgba8) uniform image2DMS uImageMS;
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec2 vUV;
+
+void main()
+{
+    vec4 texel;
+    bool ret;
+
+    ret = sparseTexelsResidentARB(sparseTextureARB(uSamp, vUV, texel));
+    ret = sparseTexelsResidentARB(sparseTextureARB(uSamp, vUV, texel, 1.1));
+    ret = sparseTexelsResidentARB(sparseTextureLodARB(uSamp, vUV, 1.0, texel));
+    ret = sparseTexelsResidentARB(sparseTextureOffsetARB(uSamp, vUV, ivec2(1, 1), texel));
+    ret = sparseTexelsResidentARB(sparseTextureOffsetARB(uSamp, vUV, ivec2(2, 2), texel, 0.5));
+    ret = sparseTexelsResidentARB(sparseTexelFetchARB(uSamp, ivec2(vUV), 1, texel));
+    ret = sparseTexelsResidentARB(sparseTexelFetchARB(uSampMS, ivec2(vUV), 2, texel));
+    ret = sparseTexelsResidentARB(sparseTexelFetchOffsetARB(uSamp, ivec2(vUV), 1, ivec2(2, 3), texel));
+    ret = sparseTexelsResidentARB(sparseTextureLodOffsetARB(uSamp, vUV, 1.5, ivec2(2, 3), texel));
+    ret = sparseTexelsResidentARB(sparseTextureGradARB(uSamp, vUV, vec2(1.0), vec2(3.0), texel));
+    ret = sparseTexelsResidentARB(sparseTextureGradOffsetARB(uSamp, vUV, vec2(1.0), vec2(3.0), ivec2(-2, -3), texel));
+    ret = sparseTexelsResidentARB(sparseTextureClampARB(uSamp, vUV, 4.0, texel));
+    ret = sparseTexelsResidentARB(sparseImageLoadARB(uImage, ivec2(vUV), texel));
+    ret = sparseTexelsResidentARB(sparseImageLoadARB(uImageMS, ivec2(vUV), 1, texel));
+}
diff --git a/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.frag b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.frag
new file mode 100644
index 00000000000..621457a14ad
--- /dev/null
+++ b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.frag
@@ -0,0 +1,12 @@
+#version 310 es
+precision mediump float;
+
+layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0;
+layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1;
+layout(location = 0) out vec3 FragColor;
+layout(location = 1) out vec4 FragColor2;
+
+void main()
+{
+    FragColor.rgb = subpassLoad(uSubpass0).rgb + subpassLoad(uSubpass1).rgb;
+}
diff --git a/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.framebuffer-fetch-noncoherent.frag b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.framebuffer-fetch-noncoherent.frag
new file mode 100644
index 00000000000..621457a14ad
--- /dev/null
+++ b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.framebuffer-fetch-noncoherent.frag
@@ -0,0 +1,12 @@
+#version 310 es
+precision mediump float;
+
+layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0;
+layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1;
+layout(location = 0) out vec3 FragColor;
+layout(location = 1) out vec4 FragColor2;
+
+void main()
+{
+    FragColor.rgb = subpassLoad(uSubpass0).rgb + subpassLoad(uSubpass1).rgb;
+}
diff --git a/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.frag b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.frag
new file mode 100644
index 00000000000..621457a14ad
--- /dev/null
+++ b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.frag
@@ -0,0 +1,12 @@
+#version 310 es
+precision mediump float;
+
+layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0;
+layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1;
+layout(location = 0) out vec3 FragColor;
+layout(location = 1) out vec4 FragColor2;
+
+void main()
+{
+    FragColor.rgb = subpassLoad(uSubpass0).rgb + subpassLoad(uSubpass1).rgb;
+}
diff --git a/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.framebuffer-fetch-noncoherent.frag b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.framebuffer-fetch-noncoherent.frag
new file mode 100644
index 00000000000..621457a14ad
--- /dev/null
+++ b/shaders-no-opt/frag/subpass-input.framebuffer-fetch.nocompat.legacy.framebuffer-fetch-noncoherent.frag
@@ -0,0 +1,12 @@
+#version 310 es
+precision mediump float;
+
+layout(input_attachment_index = 0, set = 0, binding = 0) uniform mediump subpassInput uSubpass0;
+layout(input_attachment_index = 1, set = 0, binding = 1) uniform mediump subpassInput uSubpass1;
+layout(location = 0) out vec3 FragColor;
+layout(location = 1) out vec4 FragColor2;
+
+void main()
+{
+    FragColor.rgb = subpassLoad(uSubpass0).rgb + subpassLoad(uSubpass1).rgb;
+}
diff --git a/shaders-no-opt/frag/texture-gather-offsets.frag b/shaders-no-opt/frag/texture-gather-offsets.frag
new file mode 100644
index 00000000000..52d79097464
--- /dev/null
+++ b/shaders-no-opt/frag/texture-gather-offsets.frag
@@ -0,0 +1,14 @@
+#version 460 core
+#extension GL_ARB_separate_shader_objects : enable
+
+layout(location = 0) in vec2 inUv;
+
+layout(location = 0) out vec4 outColor;
+
+layout(set=0, binding=0) uniform sampler2D Image0;
+
+void main(void)
+{
+    const ivec2 offs[4] = {ivec2(0,0), ivec2(1,0), ivec2(1,1), ivec2(0,1)};
+    outColor = textureGatherOffsets(Image0, inUv, offs);
+}
diff --git a/shaders-no-opt/frag/texture-gather-uint-component.asm.frag b/shaders-no-opt/frag/texture-gather-uint-component.asm.frag
new file mode 100644
index 00000000000..b4d9509ab49
--- /dev/null
+++ b/shaders-no-opt/frag/texture-gather-uint-component.asm.frag
@@ -0,0 +1,42 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 10
+; Bound: 22
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %vUV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uSamp "uSamp"
+               OpName %vUV "vUV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uSamp DescriptorSet 0
+               OpDecorate %uSamp Binding 0
+               OpDecorate %vUV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %10 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %11 = OpTypeSampledImage %10
+%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11
+      %uSamp = OpVariable %_ptr_UniformConstant_11 UniformConstant
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %vUV = OpVariable %_ptr_Input_v2float Input
+        %int = OpTypeInt 32 0
+      %int_1 = OpConstant %int 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %14 = OpLoad %11 %uSamp
+         %18 = OpLoad %v2float %vUV
+         %21 = OpImageGather %v4float %14 %18 %int_1
+               OpStore %FragColor %21
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/frag/texture1d-emulation.es.frag b/shaders-no-opt/frag/texture1d-emulation.es.frag
new file mode 100644
index 00000000000..1ad99932b60
--- /dev/null
+++ b/shaders-no-opt/frag/texture1d-emulation.es.frag
@@ -0,0 +1,32 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler1D uSamp;
+layout(set = 0, binding = 1) uniform sampler1DShadow uSampShadow;
+layout(set = 0, binding = 2) uniform sampler1DArray uSampArray;
+layout(set = 0, binding = 3) uniform sampler1DArrayShadow uSampArrayShadow;
+layout(set = 0, binding = 4, r32f) uniform image1D uImage;
+layout(location = 0) in vec4 vUV;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	// 1D
+	FragColor = texture(uSamp, vUV.x);
+	FragColor += textureProj(uSamp, vUV.xy);
+	FragColor += texelFetch(uSamp, int(vUV.x), 0);
+
+	// 1D Shadow
+	FragColor += texture(uSampShadow, vUV.xyz);
+	FragColor += textureProj(uSampShadow, vUV);
+
+	// 1D Array
+	FragColor = texture(uSampArray, vUV.xy);
+	FragColor += texelFetch(uSampArray, ivec2(vUV.xy), 0);
+
+	// 1D Array Shadow
+	FragColor += texture(uSampArrayShadow, vUV.xyz);
+
+	// 1D images
+	FragColor += imageLoad(uImage, int(vUV.x));
+	imageStore(uImage, int(vUV.x), FragColor);
+}
diff --git a/shaders-no-opt/frag/texture1d-emulation.legacy.frag b/shaders-no-opt/frag/texture1d-emulation.legacy.frag
new file mode 100644
index 00000000000..9ebd81e3338
--- /dev/null
+++ b/shaders-no-opt/frag/texture1d-emulation.legacy.frag
@@ -0,0 +1,17 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform sampler1D uSamp;
+layout(set = 0, binding = 1) uniform sampler1DShadow uSampShadow;
+layout(location = 0) in vec4 vUV;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	// 1D
+	FragColor = texture(uSamp, vUV.x);
+	FragColor += textureProj(uSamp, vUV.xy);
+
+	// 1D Shadow
+	FragColor += texture(uSampShadow, vUV.xyz);
+	FragColor += textureProj(uSampShadow, vUV);
+}
diff --git a/shaders-no-opt/frag/variables.zero-initialize.frag b/shaders-no-opt/frag/variables.zero-initialize.frag
new file mode 100644
index 00000000000..41da8001f47
--- /dev/null
+++ b/shaders-no-opt/frag/variables.zero-initialize.frag
@@ -0,0 +1,21 @@
+#version 450
+
+layout(location = 0) in vec4 vColor;
+layout(location = 0) out vec4 FragColor;
+
+int uninit_int;
+ivec4 uninit_vector;
+mat4 uninit_matrix;
+
+struct Foo { int a; };
+Foo uninit_foo;
+
+void main()
+{
+	int uninit_function_int;
+	if (vColor.x > 10.0)
+		uninit_function_int = 10;
+	else
+		uninit_function_int = 20;
+	FragColor = vColor;
+}
diff --git a/shaders-no-opt/legacy/frag/switch-single-case-multiple-exit-cfg.legacy.asm.frag b/shaders-no-opt/legacy/frag/switch-single-case-multiple-exit-cfg.legacy.asm.frag
new file mode 100644
index 00000000000..d2bd15a9785
--- /dev/null
+++ b/shaders-no-opt/legacy/frag/switch-single-case-multiple-exit-cfg.legacy.asm.frag
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 54
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %gl_FragCoord %_GLF_color
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 310
+               OpName %main "main"
+               OpName %gl_FragCoord "gl_FragCoord"
+               OpName %_GLF_color "_GLF_color"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorate %_GLF_color Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+       %bool = OpTypeBool
+    %v2float = OpTypeVector %float 2
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+ %_GLF_color = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+         %52 = OpUndef %v2float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpSelectionMerge %9 None
+               OpSwitch %int_0 %8
+          %8 = OpLabel
+         %17 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0
+         %18 = OpLoad %float %17
+         %22 = OpFOrdNotEqual %bool %18 %18
+               OpSelectionMerge %24 None
+               OpBranchConditional %22 %23 %24
+         %23 = OpLabel
+               OpBranch %9
+         %24 = OpLabel
+         %33 = OpCompositeExtract %float %52 1
+         %51 = OpCompositeInsert %v2float %33 %52 1
+               OpBranch %9
+          %9 = OpLabel
+         %53 = OpPhi %v2float %52 %23 %51 %24
+         %42 = OpCompositeExtract %float %53 0
+         %43 = OpCompositeExtract %float %53 1
+         %48 = OpCompositeConstruct %v4float %42 %43 %float_1 %float_1
+               OpStore %_GLF_color %48
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-no-opt/task/task-shader-basic-2.vk.spv14.nocompat.task b/shaders-no-opt/task/task-shader-basic-2.vk.spv14.nocompat.task
new file mode 100644
index 00000000000..3fcb7147114
--- /dev/null
+++ b/shaders-no-opt/task/task-shader-basic-2.vk.spv14.nocompat.task
@@ -0,0 +1,35 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in;
+
+struct Payload
+{
+	float v[3];
+};
+taskPayloadSharedEXT Payload p;
+shared float vs[24];
+
+void main()
+{
+	vs[gl_LocalInvocationIndex] = 10.0;
+	barrier();
+	if (gl_LocalInvocationIndex < 12)
+		vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12];
+	barrier();
+	if (gl_LocalInvocationIndex < 6)
+		vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6];
+	barrier();
+	if (gl_LocalInvocationIndex < 3)
+		vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3];
+	barrier();
+
+	p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex];
+	if (vs[5] > 20.0)
+	{
+		EmitMeshTasksEXT(int(vs[4]), int(vs[6]), int(vs[8]));
+	}
+	else
+	{
+		EmitMeshTasksEXT(int(vs[6]), 10, 50u);
+	}
+}
diff --git a/shaders-no-opt/task/task-shader-basic.vk.spv14.nocompat.task b/shaders-no-opt/task/task-shader-basic.vk.spv14.nocompat.task
new file mode 100644
index 00000000000..6e97160309a
--- /dev/null
+++ b/shaders-no-opt/task/task-shader-basic.vk.spv14.nocompat.task
@@ -0,0 +1,28 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in;
+
+struct Payload
+{
+	float v[3];
+};
+taskPayloadSharedEXT Payload p;
+shared float vs[24];
+
+void main()
+{
+	vs[gl_LocalInvocationIndex] = 10.0;
+	barrier();
+	if (gl_LocalInvocationIndex < 12)
+		vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12];
+	barrier();
+	if (gl_LocalInvocationIndex < 6)
+		vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6];
+	barrier();
+	if (gl_LocalInvocationIndex < 3)
+		vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3];
+	barrier();
+
+	p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex];
+	EmitMeshTasksEXT(int(vs[4]), int(vs[6]), int(vs[8]));
+}
diff --git a/shaders-no-opt/vert/io-blocks.force-flattened-io.vert b/shaders-no-opt/vert/io-blocks.force-flattened-io.vert
new file mode 100644
index 00000000000..e308a9f2891
--- /dev/null
+++ b/shaders-no-opt/vert/io-blocks.force-flattened-io.vert
@@ -0,0 +1,25 @@
+#version 450
+
+struct Foo
+{
+	vec4 bar[2];
+	vec4 baz[2];
+};
+
+layout(location = 0) out Vertex
+{
+	Foo foo;
+	Foo foo2;
+};
+
+layout(location = 8) out Foo foo3;
+
+void main()
+{
+	foo.bar[0] = vec4(1.0);
+	foo.baz[1] = vec4(2.0);
+	foo2.bar[0] = vec4(3.0);
+	foo2.baz[1] = vec4(4.0);
+	foo3.bar[0] = vec4(5.0);
+	foo3.baz[1] = vec4(6.0);
+}
diff --git a/shaders-no-opt/vulkan/frag/shading-rate.vk.nocompat.frag b/shaders-no-opt/vulkan/frag/shading-rate.vk.nocompat.frag
new file mode 100644
index 00000000000..8aee6d35909
--- /dev/null
+++ b/shaders-no-opt/vulkan/frag/shading-rate.vk.nocompat.frag
@@ -0,0 +1,9 @@
+#version 450
+#extension GL_EXT_fragment_shading_rate : require
+
+layout(location = 0) out uint FragColor;
+
+void main()
+{
+	FragColor = gl_ShadingRateEXT;
+}
diff --git a/shaders-no-opt/vulkan/frag/ubo-offset-out-of-order.vk.nocompat.frag b/shaders-no-opt/vulkan/frag/ubo-offset-out-of-order.vk.nocompat.frag
new file mode 100644
index 00000000000..6d3987a886d
--- /dev/null
+++ b/shaders-no-opt/vulkan/frag/ubo-offset-out-of-order.vk.nocompat.frag
@@ -0,0 +1,16 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+layout(std140, binding = 0) uniform UBO
+{
+	layout(offset = 16) mat4 m;
+	layout(offset = 0) vec4 v;
+};
+
+layout(location = 0) in vec4 vColor;
+
+void main()
+{
+	FragColor = m * vColor + v;
+}
diff --git a/shaders-no-opt/vulkan/frag/volatile-helper-invocation.vk.nocompat.spv16.frag b/shaders-no-opt/vulkan/frag/volatile-helper-invocation.vk.nocompat.spv16.frag
new file mode 100644
index 00000000000..9a8d9d20b25
--- /dev/null
+++ b/shaders-no-opt/vulkan/frag/volatile-helper-invocation.vk.nocompat.spv16.frag
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+layout(location = 0) out float FragColor;
+
+void main()
+{
+	FragColor = float(gl_HelperInvocation);
+	demote;
+	FragColor = float(gl_HelperInvocation);
+}
diff --git a/shaders-no-opt/vulkan/vert/primitive-shading-rate.vk.nocompat.vert b/shaders-no-opt/vulkan/vert/primitive-shading-rate.vk.nocompat.vert
new file mode 100644
index 00000000000..95ac8d64453
--- /dev/null
+++ b/shaders-no-opt/vulkan/vert/primitive-shading-rate.vk.nocompat.vert
@@ -0,0 +1,8 @@
+#version 450
+#extension GL_EXT_fragment_shading_rate : require
+
+void main()
+{
+	gl_PrimitiveShadingRateEXT = 3;
+	gl_Position = vec4(1.0);
+}
diff --git a/shaders-reflection/asm/comp/pointer-to-array-of-physical-pointer.asm.comp b/shaders-reflection/asm/comp/pointer-to-array-of-physical-pointer.asm.comp
new file mode 100644
index 00000000000..caca050ad3f
--- /dev/null
+++ b/shaders-reflection/asm/comp/pointer-to-array-of-physical-pointer.asm.comp
@@ -0,0 +1,51 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 17
+; Schema: 0
+               OpCapability Shader
+               OpCapability PhysicalStorageBufferAddresses
+               OpExtension "SPV_EXT_physical_storage_buffer"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel PhysicalStorageBuffer64 GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 460
+               OpSourceExtension "GL_EXT_buffer_reference"
+               OpSourceExtension "GL_EXT_buffer_reference2"
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_int64"
+               OpName %main "main"
+               OpName %Params "Params"
+               OpMemberName %Params 0 "x"
+               OpMemberName %Params 1 "y"
+               OpName %IntBuf "IntBuf"
+               OpMemberName %IntBuf 0 "v"
+               OpName %_ ""
+               OpDecorate %_arr_7_uint_3 ArrayStride 16
+               OpMemberDecorate %Params 0 Offset 0
+               OpMemberDecorate %Params 1 Offset 16
+               OpDecorate %Params Block
+               OpMemberDecorate %IntBuf 0 Offset 0
+               OpDecorate %IntBuf Block
+               OpDecorate %_arr__ptr_PhysicalStorageBuffer_IntBuf_uint_3 ArrayStride 16
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+               OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_IntBuf PhysicalStorageBuffer
+       %uint = OpTypeInt 32 0
+     %uint_3 = OpConstant %uint 3
+%_arr_7_uint_3 = OpTypeArray %_ptr_PhysicalStorageBuffer_IntBuf %uint_3
+%ptr_array_ptr = OpTypePointer PhysicalStorageBuffer %_arr_7_uint_3
+     %Params = OpTypeStruct %float %ptr_array_ptr
+        %int = OpTypeInt 32 1
+     %IntBuf = OpTypeStruct %int
+%_ptr_PhysicalStorageBuffer_IntBuf = OpTypePointer PhysicalStorageBuffer %IntBuf
+%_arr__ptr_PhysicalStorageBuffer_IntBuf_uint_3 = OpTypeArray %_ptr_PhysicalStorageBuffer_IntBuf %uint_3
+%_ptr_Uniform_Params = OpTypePointer Uniform %Params
+          %_ = OpVariable %_ptr_Uniform_Params Uniform
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-reflection/comp/array-of-physical-pointer.comp b/shaders-reflection/comp/array-of-physical-pointer.comp
new file mode 100644
index 00000000000..992f6f90891
--- /dev/null
+++ b/shaders-reflection/comp/array-of-physical-pointer.comp
@@ -0,0 +1,15 @@
+#version 460
+#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable
+#extension GL_EXT_buffer_reference2 : enable
+layout(buffer_reference, std430, buffer_reference_align = 4) buffer IntBuf
+{
+	int v;
+};
+layout(std140, binding = 0) uniform Params
+{
+	float x;
+	IntBuf y[3];
+};
+void main()
+{
+}
diff --git a/shaders-reflection/comp/function-pointer.invalid.asm.comp b/shaders-reflection/comp/function-pointer.invalid.asm.comp
new file mode 100644
index 00000000000..440f3311ef3
--- /dev/null
+++ b/shaders-reflection/comp/function-pointer.invalid.asm.comp
@@ -0,0 +1,19 @@
+; SPIR-V
+; Version: 1.5
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 7
+; Schema: 0
+OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %2 "main"
+OpExecutionMode %2 LocalSize 1 1 1
+OpSource GLSL 450
+OpName %2 "main"
+%3 = OpTypeVoid
+%4 = OpTypeFunction %3
+%5 = OpTypePointer Private %4
+%2 = OpFunction %3 None %4
+%6 = OpLabel
+OpReturn
+OpFunctionEnd
diff --git a/shaders-reflection/comp/out-of-order-block-offsets.comp b/shaders-reflection/comp/out-of-order-block-offsets.comp
new file mode 100644
index 00000000000..da5c86eef46
--- /dev/null
+++ b/shaders-reflection/comp/out-of-order-block-offsets.comp
@@ -0,0 +1,12 @@
+#version 450
+
+layout(set = 0, binding = 0) buffer SSBO
+{
+	layout(offset = 8) uint foo;
+	layout(offset = 4) uint bar;
+};
+
+void main()
+{
+	bar = foo;
+}
diff --git a/shaders-reflection/comp/physical-pointer.comp b/shaders-reflection/comp/physical-pointer.comp
new file mode 100644
index 00000000000..ecd1e287d23
--- /dev/null
+++ b/shaders-reflection/comp/physical-pointer.comp
@@ -0,0 +1,15 @@
+#version 460
+#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable
+#extension GL_EXT_buffer_reference2 : enable
+layout(buffer_reference, std430, buffer_reference_align = 4) buffer IntBuf
+{
+	int v;
+};
+layout(std140, binding = 0) uniform Params
+{
+	float x;
+	IntBuf y;
+};
+void main()
+{
+}
diff --git a/shaders-reflection/comp/workgroup-size-spec-constant.comp b/shaders-reflection/comp/workgroup-size-spec-constant.comp
new file mode 100644
index 00000000000..376a3516220
--- /dev/null
+++ b/shaders-reflection/comp/workgroup-size-spec-constant.comp
@@ -0,0 +1,13 @@
+#version 450
+
+layout(local_size_x_id = 10, local_size_y_id = 40, local_size_z_id = 60) in;
+
+layout(std430, set = 0, binding = 0) buffer SSBO
+{
+	vec4 v;
+};
+
+void main()
+{
+	v = vec4(10.0);
+}
diff --git a/shaders-reflection/vert/array-size-reflection.vert b/shaders-reflection/vert/array-size-reflection.vert
new file mode 100644
index 00000000000..24a4a43831f
--- /dev/null
+++ b/shaders-reflection/vert/array-size-reflection.vert
@@ -0,0 +1,13 @@
+#version 450
+layout(constant_id = 0) const int ARR_SIZE = 1;
+
+layout(binding = 0, set = 1, std140) uniform u_
+{
+	vec4 u_0[ARR_SIZE];
+};
+
+void main()
+{
+	gl_Position = u_0[0];
+}
+
diff --git a/shaders-reflection/vert/stride-reflection.vert b/shaders-reflection/vert/stride-reflection.vert
new file mode 100644
index 00000000000..6e7d96df44d
--- /dev/null
+++ b/shaders-reflection/vert/stride-reflection.vert
@@ -0,0 +1,14 @@
+#version 450
+
+layout(binding = 0, set = 0, std140) uniform U
+{
+	vec4 v[4];
+	mat4 c[4];
+	layout(row_major) mat4 r[4];
+};
+
+void main()
+{
+	gl_Position = v[0];
+}
+
diff --git a/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag b/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag
new file mode 100644
index 00000000000..fae211f278d
--- /dev/null
+++ b/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag
@@ -0,0 +1,1087 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 572
+; Schema: 0
+               OpCapability Shader
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %Main "main" %in_var_TEXCOORD0 %in_var_TEXCOORD7 %in_var_TEXCOORD8 %gl_FragCoord %gl_FrontFacing %out_var_SV_Target0
+               OpExecutionMode %Main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_TranslatedWorldToView"
+               OpMemberName %type_View 3 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 4 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 6 "View_ViewToClip"
+               OpMemberName %type_View 7 "View_ViewToClipNoAA"
+               OpMemberName %type_View 8 "View_ClipToView"
+               OpMemberName %type_View 9 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 11 "View_ScreenToWorld"
+               OpMemberName %type_View 12 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 13 "View_ViewForward"
+               OpMemberName %type_View 14 "PrePadding_View_844"
+               OpMemberName %type_View 15 "View_ViewUp"
+               OpMemberName %type_View 16 "PrePadding_View_860"
+               OpMemberName %type_View 17 "View_ViewRight"
+               OpMemberName %type_View 18 "PrePadding_View_876"
+               OpMemberName %type_View 19 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 20 "PrePadding_View_892"
+               OpMemberName %type_View 21 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 22 "PrePadding_View_908"
+               OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 24 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 25 "View_WorldCameraOrigin"
+               OpMemberName %type_View 26 "PrePadding_View_956"
+               OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 28 "PrePadding_View_972"
+               OpMemberName %type_View 29 "View_WorldViewOrigin"
+               OpMemberName %type_View 30 "PrePadding_View_988"
+               OpMemberName %type_View 31 "View_PreViewTranslation"
+               OpMemberName %type_View 32 "PrePadding_View_1004"
+               OpMemberName %type_View 33 "View_PrevProjection"
+               OpMemberName %type_View 34 "View_PrevViewProj"
+               OpMemberName %type_View 35 "View_PrevViewRotationProj"
+               OpMemberName %type_View 36 "View_PrevViewToClip"
+               OpMemberName %type_View 37 "View_PrevClipToView"
+               OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 43 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 44 "PrePadding_View_1660"
+               OpMemberName %type_View 45 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 46 "PrePadding_View_1676"
+               OpMemberName %type_View 47 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 48 "PrePadding_View_1692"
+               OpMemberName %type_View 49 "View_PrevInvViewProj"
+               OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 51 "View_ClipToPrevClip"
+               OpMemberName %type_View 52 "View_TemporalAAJitter"
+               OpMemberName %type_View 53 "View_GlobalClippingPlane"
+               OpMemberName %type_View 54 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_ViewRectMin"
+               OpMemberName %type_View 57 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 58 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 60 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 61 "View_PreExposure"
+               OpMemberName %type_View 62 "View_OneOverPreExposure"
+               OpMemberName %type_View 63 "PrePadding_View_2012"
+               OpMemberName %type_View 64 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 65 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 66 "View_NormalOverrideParameter"
+               OpMemberName %type_View 67 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 68 "View_PrevFrameGameTime"
+               OpMemberName %type_View 69 "View_PrevFrameRealTime"
+               OpMemberName %type_View 70 "View_OutOfBoundsMask"
+               OpMemberName %type_View 71 "PrePadding_View_2084"
+               OpMemberName %type_View 72 "PrePadding_View_2088"
+               OpMemberName %type_View 73 "PrePadding_View_2092"
+               OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 75 "View_CullingSign"
+               OpMemberName %type_View 76 "View_NearPlane"
+               OpMemberName %type_View 77 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 78 "View_GameTime"
+               OpMemberName %type_View 79 "View_RealTime"
+               OpMemberName %type_View 80 "View_DeltaTime"
+               OpMemberName %type_View 81 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 83 "View_Random"
+               OpMemberName %type_View 84 "View_FrameNumber"
+               OpMemberName %type_View 85 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 86 "View_StateFrameIndex"
+               OpMemberName %type_View 87 "View_CameraCut"
+               OpMemberName %type_View 88 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 89 "PrePadding_View_2164"
+               OpMemberName %type_View 90 "PrePadding_View_2168"
+               OpMemberName %type_View 91 "PrePadding_View_2172"
+               OpMemberName %type_View 92 "View_DirectionalLightColor"
+               OpMemberName %type_View 93 "View_DirectionalLightDirection"
+               OpMemberName %type_View 94 "PrePadding_View_2204"
+               OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 97 "View_TemporalAAParams"
+               OpMemberName %type_View 98 "View_CircleDOFParams"
+               OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 101 "View_DepthOfFieldScale"
+               OpMemberName %type_View 102 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 108 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 109 "View_DemosaicVposOffset"
+               OpMemberName %type_View 110 "PrePadding_View_2348"
+               OpMemberName %type_View 111 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 112 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 113 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 115 "View_AtmosphericFogPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 122 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 125 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 127 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 130 "View_AmbientCubemapTint"
+               OpMemberName %type_View 131 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 132 "View_SkyLightParameters"
+               OpMemberName %type_View 133 "PrePadding_View_2488"
+               OpMemberName %type_View 134 "PrePadding_View_2492"
+               OpMemberName %type_View 135 "View_SkyLightColor"
+               OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 137 "View_MobilePreviewMode"
+               OpMemberName %type_View 138 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 140 "View_ShowDecalsMask"
+               OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 143 "PrePadding_View_2648"
+               OpMemberName %type_View 144 "PrePadding_View_2652"
+               OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 146 "View_StereoPassIndex"
+               OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 149 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 150 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 151 "View_MaxGlobalDistance"
+               OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 153 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 154 "PrePadding_View_2828"
+               OpMemberName %type_View 155 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 156 "PrePadding_View_2844"
+               OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 158 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 159 "PrePadding_View_2860"
+               OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 161 "PrePadding_View_2876"
+               OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 163 "PrePadding_View_2892"
+               OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 167 "View_StereoIPD"
+               OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle"
+               OpName %View "View"
+               OpName %type_MobileDirectionalLight "type.MobileDirectionalLight"
+               OpMemberName %type_MobileDirectionalLight 0 "MobileDirectionalLight_DirectionalLightColor"
+               OpMemberName %type_MobileDirectionalLight 1 "MobileDirectionalLight_DirectionalLightDirectionAndShadowTransition"
+               OpMemberName %type_MobileDirectionalLight 2 "MobileDirectionalLight_DirectionalLightShadowSize"
+               OpMemberName %type_MobileDirectionalLight 3 "MobileDirectionalLight_DirectionalLightDistanceFadeMAD"
+               OpMemberName %type_MobileDirectionalLight 4 "MobileDirectionalLight_DirectionalLightShadowDistances"
+               OpMemberName %type_MobileDirectionalLight 5 "MobileDirectionalLight_DirectionalLightScreenToShadow"
+               OpName %MobileDirectionalLight "MobileDirectionalLight"
+               OpName %type_2d_image "type.2d.image"
+               OpName %MobileDirectionalLight_DirectionalLightShadowTexture "MobileDirectionalLight_DirectionalLightShadowTexture"
+               OpName %type_sampler "type.sampler"
+               OpName %MobileDirectionalLight_DirectionalLightShadowSampler "MobileDirectionalLight_DirectionalLightShadowSampler"
+               OpName %Material_Texture2D_0 "Material_Texture2D_0"
+               OpName %Material_Texture2D_0Sampler "Material_Texture2D_0Sampler"
+               OpName %Material_Texture2D_1 "Material_Texture2D_1"
+               OpName %Material_Texture2D_1Sampler "Material_Texture2D_1Sampler"
+               OpName %type__Globals "type.$Globals"
+               OpMemberName %type__Globals 0 "NumDynamicPointLights"
+               OpMemberName %type__Globals 1 "LightPositionAndInvRadius"
+               OpMemberName %type__Globals 2 "LightColorAndFalloffExponent"
+               OpMemberName %type__Globals 3 "MobileReflectionParams"
+               OpName %_Globals "$Globals"
+               OpName %type_cube_image "type.cube.image"
+               OpName %ReflectionCubemap "ReflectionCubemap"
+               OpName %ReflectionCubemapSampler "ReflectionCubemapSampler"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %in_var_TEXCOORD7 "in.var.TEXCOORD7"
+               OpName %in_var_TEXCOORD8 "in.var.TEXCOORD8"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %Main "Main"
+               OpName %type_sampled_image "type.sampled.image"
+               OpName %type_sampled_image_0 "type.sampled.image"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %in_var_TEXCOORD7 UserSemantic "TEXCOORD7"
+               OpDecorateString %in_var_TEXCOORD8 UserSemantic "TEXCOORD8"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_Position"
+               OpDecorate %gl_FrontFacing BuiltIn FrontFacing
+               OpDecorateString %gl_FrontFacing UserSemantic "SV_IsFrontFace"
+               OpDecorate %gl_FrontFacing Flat
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %in_var_TEXCOORD0 Location 0
+               OpDecorate %in_var_TEXCOORD7 Location 1
+               OpDecorate %in_var_TEXCOORD8 Location 2
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 0
+               OpDecorate %MobileDirectionalLight DescriptorSet 0
+               OpDecorate %MobileDirectionalLight Binding 1
+               OpDecorate %MobileDirectionalLight_DirectionalLightShadowTexture DescriptorSet 0
+               OpDecorate %MobileDirectionalLight_DirectionalLightShadowTexture Binding 0
+               OpDecorate %MobileDirectionalLight_DirectionalLightShadowSampler DescriptorSet 0
+               OpDecorate %MobileDirectionalLight_DirectionalLightShadowSampler Binding 0
+               OpDecorate %Material_Texture2D_0 DescriptorSet 0
+               OpDecorate %Material_Texture2D_0 Binding 1
+               OpDecorate %Material_Texture2D_0Sampler DescriptorSet 0
+               OpDecorate %Material_Texture2D_0Sampler Binding 1
+               OpDecorate %Material_Texture2D_1 DescriptorSet 0
+               OpDecorate %Material_Texture2D_1 Binding 2
+               OpDecorate %Material_Texture2D_1Sampler DescriptorSet 0
+               OpDecorate %Material_Texture2D_1Sampler Binding 2
+               OpDecorate %_Globals DescriptorSet 0
+               OpDecorate %_Globals Binding 2
+               OpDecorate %ReflectionCubemap DescriptorSet 0
+               OpDecorate %ReflectionCubemap Binding 3
+               OpDecorate %ReflectionCubemapSampler DescriptorSet 0
+               OpDecorate %ReflectionCubemapSampler Binding 3
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 14 Offset 844
+               OpMemberDecorate %type_View 15 Offset 848
+               OpMemberDecorate %type_View 16 Offset 860
+               OpMemberDecorate %type_View 17 Offset 864
+               OpMemberDecorate %type_View 18 Offset 876
+               OpMemberDecorate %type_View 19 Offset 880
+               OpMemberDecorate %type_View 20 Offset 892
+               OpMemberDecorate %type_View 21 Offset 896
+               OpMemberDecorate %type_View 22 Offset 908
+               OpMemberDecorate %type_View 23 Offset 912
+               OpMemberDecorate %type_View 24 Offset 928
+               OpMemberDecorate %type_View 25 Offset 944
+               OpMemberDecorate %type_View 26 Offset 956
+               OpMemberDecorate %type_View 27 Offset 960
+               OpMemberDecorate %type_View 28 Offset 972
+               OpMemberDecorate %type_View 29 Offset 976
+               OpMemberDecorate %type_View 30 Offset 988
+               OpMemberDecorate %type_View 31 Offset 992
+               OpMemberDecorate %type_View 32 Offset 1004
+               OpMemberDecorate %type_View 33 Offset 1008
+               OpMemberDecorate %type_View 33 MatrixStride 16
+               OpMemberDecorate %type_View 33 ColMajor
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 44 Offset 1660
+               OpMemberDecorate %type_View 45 Offset 1664
+               OpMemberDecorate %type_View 46 Offset 1676
+               OpMemberDecorate %type_View 47 Offset 1680
+               OpMemberDecorate %type_View 48 Offset 1692
+               OpMemberDecorate %type_View 49 Offset 1696
+               OpMemberDecorate %type_View 49 MatrixStride 16
+               OpMemberDecorate %type_View 49 ColMajor
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 53 Offset 1904
+               OpMemberDecorate %type_View 54 Offset 1920
+               OpMemberDecorate %type_View 55 Offset 1928
+               OpMemberDecorate %type_View 56 Offset 1936
+               OpMemberDecorate %type_View 57 Offset 1952
+               OpMemberDecorate %type_View 58 Offset 1968
+               OpMemberDecorate %type_View 59 Offset 1984
+               OpMemberDecorate %type_View 60 Offset 2000
+               OpMemberDecorate %type_View 61 Offset 2004
+               OpMemberDecorate %type_View 62 Offset 2008
+               OpMemberDecorate %type_View 63 Offset 2012
+               OpMemberDecorate %type_View 64 Offset 2016
+               OpMemberDecorate %type_View 65 Offset 2032
+               OpMemberDecorate %type_View 66 Offset 2048
+               OpMemberDecorate %type_View 67 Offset 2064
+               OpMemberDecorate %type_View 68 Offset 2072
+               OpMemberDecorate %type_View 69 Offset 2076
+               OpMemberDecorate %type_View 70 Offset 2080
+               OpMemberDecorate %type_View 71 Offset 2084
+               OpMemberDecorate %type_View 72 Offset 2088
+               OpMemberDecorate %type_View 73 Offset 2092
+               OpMemberDecorate %type_View 74 Offset 2096
+               OpMemberDecorate %type_View 75 Offset 2108
+               OpMemberDecorate %type_View 76 Offset 2112
+               OpMemberDecorate %type_View 77 Offset 2116
+               OpMemberDecorate %type_View 78 Offset 2120
+               OpMemberDecorate %type_View 79 Offset 2124
+               OpMemberDecorate %type_View 80 Offset 2128
+               OpMemberDecorate %type_View 81 Offset 2132
+               OpMemberDecorate %type_View 82 Offset 2136
+               OpMemberDecorate %type_View 83 Offset 2140
+               OpMemberDecorate %type_View 84 Offset 2144
+               OpMemberDecorate %type_View 85 Offset 2148
+               OpMemberDecorate %type_View 86 Offset 2152
+               OpMemberDecorate %type_View 87 Offset 2156
+               OpMemberDecorate %type_View 88 Offset 2160
+               OpMemberDecorate %type_View 89 Offset 2164
+               OpMemberDecorate %type_View 90 Offset 2168
+               OpMemberDecorate %type_View 91 Offset 2172
+               OpMemberDecorate %type_View 92 Offset 2176
+               OpMemberDecorate %type_View 93 Offset 2192
+               OpMemberDecorate %type_View 94 Offset 2204
+               OpMemberDecorate %type_View 95 Offset 2208
+               OpMemberDecorate %type_View 96 Offset 2240
+               OpMemberDecorate %type_View 97 Offset 2272
+               OpMemberDecorate %type_View 98 Offset 2288
+               OpMemberDecorate %type_View 99 Offset 2304
+               OpMemberDecorate %type_View 100 Offset 2308
+               OpMemberDecorate %type_View 101 Offset 2312
+               OpMemberDecorate %type_View 102 Offset 2316
+               OpMemberDecorate %type_View 103 Offset 2320
+               OpMemberDecorate %type_View 104 Offset 2324
+               OpMemberDecorate %type_View 105 Offset 2328
+               OpMemberDecorate %type_View 106 Offset 2332
+               OpMemberDecorate %type_View 107 Offset 2336
+               OpMemberDecorate %type_View 108 Offset 2340
+               OpMemberDecorate %type_View 109 Offset 2344
+               OpMemberDecorate %type_View 110 Offset 2348
+               OpMemberDecorate %type_View 111 Offset 2352
+               OpMemberDecorate %type_View 112 Offset 2364
+               OpMemberDecorate %type_View 113 Offset 2368
+               OpMemberDecorate %type_View 114 Offset 2380
+               OpMemberDecorate %type_View 115 Offset 2384
+               OpMemberDecorate %type_View 116 Offset 2388
+               OpMemberDecorate %type_View 117 Offset 2392
+               OpMemberDecorate %type_View 118 Offset 2396
+               OpMemberDecorate %type_View 119 Offset 2400
+               OpMemberDecorate %type_View 120 Offset 2404
+               OpMemberDecorate %type_View 121 Offset 2408
+               OpMemberDecorate %type_View 122 Offset 2412
+               OpMemberDecorate %type_View 123 Offset 2416
+               OpMemberDecorate %type_View 124 Offset 2420
+               OpMemberDecorate %type_View 125 Offset 2424
+               OpMemberDecorate %type_View 126 Offset 2428
+               OpMemberDecorate %type_View 127 Offset 2432
+               OpMemberDecorate %type_View 128 Offset 2448
+               OpMemberDecorate %type_View 129 Offset 2460
+               OpMemberDecorate %type_View 130 Offset 2464
+               OpMemberDecorate %type_View 131 Offset 2480
+               OpMemberDecorate %type_View 132 Offset 2484
+               OpMemberDecorate %type_View 133 Offset 2488
+               OpMemberDecorate %type_View 134 Offset 2492
+               OpMemberDecorate %type_View 135 Offset 2496
+               OpMemberDecorate %type_View 136 Offset 2512
+               OpMemberDecorate %type_View 137 Offset 2624
+               OpMemberDecorate %type_View 138 Offset 2628
+               OpMemberDecorate %type_View 139 Offset 2632
+               OpMemberDecorate %type_View 140 Offset 2636
+               OpMemberDecorate %type_View 141 Offset 2640
+               OpMemberDecorate %type_View 142 Offset 2644
+               OpMemberDecorate %type_View 143 Offset 2648
+               OpMemberDecorate %type_View 144 Offset 2652
+               OpMemberDecorate %type_View 145 Offset 2656
+               OpMemberDecorate %type_View 146 Offset 2668
+               OpMemberDecorate %type_View 147 Offset 2672
+               OpMemberDecorate %type_View 148 Offset 2736
+               OpMemberDecorate %type_View 149 Offset 2800
+               OpMemberDecorate %type_View 150 Offset 2804
+               OpMemberDecorate %type_View 151 Offset 2808
+               OpMemberDecorate %type_View 152 Offset 2812
+               OpMemberDecorate %type_View 153 Offset 2816
+               OpMemberDecorate %type_View 154 Offset 2828
+               OpMemberDecorate %type_View 155 Offset 2832
+               OpMemberDecorate %type_View 156 Offset 2844
+               OpMemberDecorate %type_View 157 Offset 2848
+               OpMemberDecorate %type_View 158 Offset 2856
+               OpMemberDecorate %type_View 159 Offset 2860
+               OpMemberDecorate %type_View 160 Offset 2864
+               OpMemberDecorate %type_View 161 Offset 2876
+               OpMemberDecorate %type_View 162 Offset 2880
+               OpMemberDecorate %type_View 163 Offset 2892
+               OpMemberDecorate %type_View 164 Offset 2896
+               OpMemberDecorate %type_View 165 Offset 2908
+               OpMemberDecorate %type_View 166 Offset 2912
+               OpMemberDecorate %type_View 167 Offset 2924
+               OpMemberDecorate %type_View 168 Offset 2928
+               OpMemberDecorate %type_View 169 Offset 2932
+               OpDecorate %type_View Block
+               OpDecorate %_arr_mat4v4float_uint_4 ArrayStride 64
+               OpMemberDecorate %type_MobileDirectionalLight 0 Offset 0
+               OpMemberDecorate %type_MobileDirectionalLight 1 Offset 16
+               OpMemberDecorate %type_MobileDirectionalLight 2 Offset 32
+               OpMemberDecorate %type_MobileDirectionalLight 3 Offset 48
+               OpMemberDecorate %type_MobileDirectionalLight 4 Offset 64
+               OpMemberDecorate %type_MobileDirectionalLight 5 Offset 80
+               OpMemberDecorate %type_MobileDirectionalLight 5 MatrixStride 16
+               OpMemberDecorate %type_MobileDirectionalLight 5 ColMajor
+               OpDecorate %type_MobileDirectionalLight Block
+               OpMemberDecorate %type__Globals 0 Offset 0
+               OpMemberDecorate %type__Globals 1 Offset 16
+               OpMemberDecorate %type__Globals 2 Offset 80
+               OpMemberDecorate %type__Globals 3 Offset 144
+               OpDecorate %type__Globals Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+      %int_4 = OpConstant %int 4
+    %float_0 = OpConstant %float 0
+      %int_3 = OpConstant %int 3
+         %47 = OpConstantComposite %v3float %float_0 %float_0 %float_0
+    %float_1 = OpConstant %float 1
+      %int_0 = OpConstant %int 0
+       %bool = OpTypeBool
+      %int_5 = OpConstant %int 5
+         %52 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+%float_0_999989986 = OpConstant %float 0.999989986
+%float_65000 = OpConstant %float 65000
+         %55 = OpConstantComposite %v3float %float_65000 %float_65000 %float_65000
+%float_0_318309873 = OpConstant %float 0.318309873
+         %57 = OpConstantComposite %v3float %float_0_318309873 %float_0_318309873 %float_0_318309873
+%float_65500 = OpConstant %float 65500
+  %float_0_5 = OpConstant %float 0.5
+         %60 = OpConstantComposite %v2float %float_0_5 %float_0_5
+    %float_2 = OpConstant %float 2
+   %float_n2 = OpConstant %float -2
+         %63 = OpConstantComposite %v2float %float_2 %float_n2
+         %64 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+%float_0_119999997 = OpConstant %float 0.119999997
+   %float_n1 = OpConstant %float -1
+%float_n0_0274999999 = OpConstant %float -0.0274999999
+         %68 = OpConstantComposite %v2float %float_n1 %float_n0_0274999999
+%float_0_0425000004 = OpConstant %float 0.0425000004
+         %70 = OpConstantComposite %v2float %float_1 %float_0_0425000004
+%float_n9_27999973 = OpConstant %float -9.27999973
+         %72 = OpConstantComposite %v2float %float_1 %float_1
+ %float_0_25 = OpConstant %float 0.25
+   %float_16 = OpConstant %float 16
+     %int_31 = OpConstant %int 31
+     %int_56 = OpConstant %int 56
+     %int_57 = OpConstant %int 57
+     %int_64 = OpConstant %int 64
+     %int_65 = OpConstant %int 65
+     %int_66 = OpConstant %int 66
+     %int_67 = OpConstant %int 67
+     %int_88 = OpConstant %int 88
+    %int_135 = OpConstant %int 135
+    %int_139 = OpConstant %int 139
+%mat3v3float = OpTypeMatrix %v3float 3
+         %86 = OpConstantComposite %v2float %float_2 %float_2
+%float_0_300000012 = OpConstant %float 0.300000012
+         %88 = OpConstantComposite %v3float %float_0_300000012 %float_0_300000012 %float_1
+   %float_20 = OpConstant %float 20
+         %90 = OpConstantComposite %v2float %float_20 %float_20
+%float_0_400000006 = OpConstant %float 0.400000006
+   %float_24 = OpConstant %float 24
+%float_0_294999987 = OpConstant %float 0.294999987
+%float_0_660000026 = OpConstant %float 0.660000026
+%float_0_699999988 = OpConstant %float 0.699999988
+%float_65504 = OpConstant %float 65504
+%float_1_20000005 = OpConstant %float 1.20000005
+         %98 = OpConstantComposite %v3float %float_2 %float_2 %float_2
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%_arr_mat4v4float_uint_4 = OpTypeArray %mat4v4float %uint_4
+%type_MobileDirectionalLight = OpTypeStruct %v4float %v4float %v4float %v4float %v4float %_arr_mat4v4float_uint_4
+%_ptr_Uniform_type_MobileDirectionalLight = OpTypePointer Uniform %type_MobileDirectionalLight
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+%type__Globals = OpTypeStruct %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %v4float
+%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+%type_cube_image = OpTypeImage %float Cube 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_cube_image = OpTypePointer UniformConstant %type_cube_image
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Input_bool = OpTypePointer Input %bool
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+        %110 = OpTypeFunction %void
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+%type_sampled_image = OpTypeSampledImage %type_cube_image
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%type_sampled_image_0 = OpTypeSampledImage %type_2d_image
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+%MobileDirectionalLight = OpVariable %_ptr_Uniform_type_MobileDirectionalLight Uniform
+%MobileDirectionalLight_DirectionalLightShadowTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%MobileDirectionalLight_DirectionalLightShadowSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%Material_Texture2D_0 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%Material_Texture2D_0Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%Material_Texture2D_1 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%Material_Texture2D_1Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+   %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+%ReflectionCubemap = OpVariable %_ptr_UniformConstant_type_cube_image UniformConstant
+%ReflectionCubemapSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input_v2float Input
+%in_var_TEXCOORD7 = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD8 = OpVariable %_ptr_Input_v4float Input
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%gl_FrontFacing = OpVariable %_ptr_Input_bool Input
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+        %117 = OpConstantComposite %v3float %float_1 %float_0 %float_0
+        %118 = OpConstantComposite %v3float %float_0 %float_1 %float_0
+        %119 = OpConstantComposite %v3float %float_0 %float_0 %float_1
+        %120 = OpConstantComposite %mat3v3float %117 %118 %119
+   %float_10 = OpConstant %float 10
+        %122 = OpConstantComposite %v2float %float_10 %float_10
+    %float_5 = OpConstant %float 5
+        %124 = OpConstantComposite %v2float %float_5 %float_5
+%float_0_00066666666 = OpConstant %float 0.00066666666
+ %float_n0_5 = OpConstant %float -0.5
+        %127 = OpConstantComposite %v2float %float_n0_5 %float_n0_5
+        %128 = OpConstantComposite %v2float %float_0_5 %float_n0_5
+  %float_1_5 = OpConstant %float 1.5
+        %130 = OpConstantComposite %v2float %float_1_5 %float_n0_5
+        %131 = OpConstantComposite %v2float %float_n0_5 %float_0_5
+        %132 = OpConstantComposite %v2float %float_1_5 %float_0_5
+        %133 = OpConstantComposite %v2float %float_n0_5 %float_1_5
+        %134 = OpConstantComposite %v2float %float_0_5 %float_1_5
+        %135 = OpConstantComposite %v2float %float_1_5 %float_1_5
+        %136 = OpUndef %v3float
+        %137 = OpUndef %v4float
+        %138 = OpUndef %float
+        %139 = OpUndef %v3float
+       %Main = OpFunction %void None %110
+        %140 = OpLabel
+        %141 = OpLoad %v2float %in_var_TEXCOORD0
+        %142 = OpLoad %v4float %in_var_TEXCOORD7
+        %143 = OpLoad %v4float %in_var_TEXCOORD8
+        %144 = OpLoad %v4float %gl_FragCoord
+        %145 = OpAccessChain %_ptr_Uniform_v3float %View %int_31
+        %146 = OpLoad %v3float %145
+        %147 = OpAccessChain %_ptr_Uniform_v4float %View %int_56
+        %148 = OpLoad %v4float %147
+        %149 = OpAccessChain %_ptr_Uniform_v4float %View %int_57
+        %150 = OpLoad %v4float %149
+        %151 = OpAccessChain %_ptr_Uniform_v4float %View %int_64
+        %152 = OpLoad %v4float %151
+        %153 = OpAccessChain %_ptr_Uniform_v4float %View %int_65
+        %154 = OpLoad %v4float %153
+        %155 = OpAccessChain %_ptr_Uniform_v4float %View %int_66
+        %156 = OpLoad %v4float %155
+        %157 = OpAccessChain %_ptr_Uniform_v2float %View %int_67
+        %158 = OpLoad %v2float %157
+        %159 = OpAccessChain %_ptr_Uniform_float %View %int_88
+        %160 = OpLoad %float %159
+        %161 = OpAccessChain %_ptr_Uniform_v4float %View %int_135
+        %162 = OpLoad %v4float %161
+        %163 = OpAccessChain %_ptr_Uniform_float %View %int_139
+        %164 = OpLoad %float %163
+        %165 = OpVectorShuffle %v2float %144 %144 0 1
+        %166 = OpVectorShuffle %v2float %148 %148 0 1
+        %167 = OpFSub %v2float %165 %166
+        %168 = OpVectorShuffle %v2float %150 %150 2 3
+        %169 = OpFMul %v2float %167 %168
+        %170 = OpFSub %v2float %169 %60
+        %171 = OpFMul %v2float %170 %63
+        %172 = OpCompositeExtract %float %171 0
+        %173 = OpCompositeExtract %float %171 1
+        %174 = OpCompositeConstruct %v4float %172 %173 %138 %float_1
+        %175 = OpCompositeExtract %float %144 3
+        %176 = OpCompositeConstruct %v4float %175 %175 %175 %175
+        %177 = OpFMul %v4float %174 %176
+        %178 = OpVectorShuffle %v3float %143 %143 0 1 2
+        %179 = OpFSub %v3float %178 %146
+        %180 = OpFNegate %v3float %178
+        %181 = OpExtInst %v3float %1 Normalize %180
+        %182 = OpFMul %v2float %141 %60
+        %183 = OpFMul %v2float %141 %122
+        %184 = OpLoad %type_2d_image %Material_Texture2D_0
+        %185 = OpLoad %type_sampler %Material_Texture2D_0Sampler
+        %186 = OpSampledImage %type_sampled_image_0 %184 %185
+        %187 = OpImageSampleImplicitLod %v4float %186 %183 None
+        %188 = OpVectorShuffle %v2float %187 %187 0 1
+        %189 = OpFMul %v2float %188 %86
+        %190 = OpFSub %v2float %189 %72
+        %191 = OpDot %float %190 %190
+        %192 = OpFSub %float %float_1 %191
+        %193 = OpExtInst %float %1 FClamp %192 %float_0 %float_1
+        %194 = OpExtInst %float %1 Sqrt %193
+        %195 = OpCompositeExtract %float %190 0
+        %196 = OpCompositeExtract %float %190 1
+        %197 = OpCompositeConstruct %v4float %195 %196 %194 %float_1
+        %198 = OpVectorShuffle %v3float %197 %197 0 1 2
+        %199 = OpFMul %v3float %198 %88
+        %200 = OpVectorShuffle %v3float %156 %156 0 1 2
+        %201 = OpCompositeExtract %float %156 3
+        %202 = OpCompositeConstruct %v3float %201 %201 %201
+        %203 = OpFMul %v3float %199 %202
+        %204 = OpFAdd %v3float %203 %200
+        %205 = OpMatrixTimesVector %v3float %120 %204
+        %206 = OpExtInst %v3float %1 Normalize %205
+        %207 = OpFNegate %v3float %181
+        %208 = OpDot %float %206 %181
+        %209 = OpCompositeConstruct %v3float %208 %208 %208
+        %210 = OpFMul %v3float %206 %209
+        %211 = OpFMul %v3float %210 %98
+        %212 = OpFAdd %v3float %207 %211
+        %213 = OpFMul %v2float %141 %90
+        %214 = OpLoad %type_2d_image %Material_Texture2D_1
+        %215 = OpLoad %type_sampler %Material_Texture2D_1Sampler
+        %216 = OpSampledImage %type_sampled_image_0 %214 %215
+        %217 = OpImageSampleImplicitLod %v4float %216 %213 None
+        %218 = OpCompositeExtract %float %217 0
+        %219 = OpExtInst %float %1 FMix %float_0_400000006 %float_1 %218
+        %220 = OpFSub %float %float_1 %219
+        %221 = OpFMul %v2float %141 %124
+        %222 = OpSampledImage %type_sampled_image_0 %214 %215
+        %223 = OpImageSampleImplicitLod %v4float %222 %221 None
+        %224 = OpCompositeExtract %float %177 3
+        %225 = OpFSub %float %224 %float_24
+        %226 = OpFMul %float %225 %float_0_00066666666
+        %227 = OpExtInst %float %1 FMax %226 %float_0
+        %228 = OpExtInst %float %1 FMin %227 %float_1
+        %229 = OpCompositeExtract %float %223 1
+        %230 = OpExtInst %float %1 FMix %229 %float_1 %228
+        %231 = OpExtInst %float %1 FMix %219 %220 %230
+        %232 = OpSampledImage %type_sampled_image_0 %214 %215
+        %233 = OpImageSampleImplicitLod %v4float %232 %182 None
+        %234 = OpExtInst %float %1 FMix %229 %float_0 %228
+        %235 = OpCompositeExtract %float %233 1
+        %236 = OpFAdd %float %235 %234
+        %237 = OpExtInst %float %1 FMix %236 %float_0_5 %float_0_5
+        %238 = OpExtInst %float %1 FMix %float_0_294999987 %float_0_660000026 %237
+        %239 = OpFMul %float %238 %float_0_5
+        %240 = OpFMul %float %231 %239
+        %241 = OpExtInst %float %1 FMix %float_0 %float_0_5 %235
+        %242 = OpExtInst %float %1 FMix %float_0_699999988 %float_1 %229
+        %243 = OpExtInst %float %1 FMix %242 %float_1 %228
+        %244 = OpFAdd %float %241 %243
+        %245 = OpExtInst %float %1 FMax %244 %float_0
+        %246 = OpExtInst %float %1 FMin %245 %float_1
+        %247 = OpCompositeConstruct %v3float %240 %240 %240
+        %248 = OpExtInst %v3float %1 FClamp %247 %47 %64
+        %249 = OpCompositeExtract %float %158 1
+        %250 = OpFMul %float %246 %249
+        %251 = OpCompositeExtract %float %158 0
+        %252 = OpFAdd %float %250 %251
+        %253 = OpExtInst %float %1 FClamp %252 %float_0_119999997 %float_1
+        %254 = OpExtInst %float %1 FMax %208 %float_0
+        %255 = OpCompositeConstruct %v2float %253 %253
+        %256 = OpFMul %v2float %255 %68
+        %257 = OpFAdd %v2float %256 %70
+        %258 = OpCompositeExtract %float %257 0
+        %259 = OpFMul %float %258 %258
+        %260 = OpFMul %float %float_n9_27999973 %254
+        %261 = OpExtInst %float %1 Exp2 %260
+        %262 = OpExtInst %float %1 FMin %259 %261
+        %263 = OpFMul %float %262 %258
+        %264 = OpCompositeExtract %float %257 1
+        %265 = OpFAdd %float %263 %264
+        %266 = OpCompositeExtract %float %152 3
+        %267 = OpCompositeConstruct %v3float %266 %266 %266
+        %268 = OpFMul %v3float %248 %267
+        %269 = OpVectorShuffle %v3float %152 %152 0 1 2
+        %270 = OpFAdd %v3float %268 %269
+        %271 = OpCompositeExtract %float %154 3
+        %272 = OpFMul %float %265 %271
+        %273 = OpCompositeConstruct %v3float %272 %272 %272
+        %274 = OpVectorShuffle %v3float %154 %154 0 1 2
+        %275 = OpFAdd %v3float %273 %274
+        %276 = OpCompositeExtract %float %275 0
+        %277 = OpExtInst %float %1 FClamp %float_1 %float_0 %float_1
+        %278 = OpLoad %type_2d_image %MobileDirectionalLight_DirectionalLightShadowTexture
+        %279 = OpLoad %type_sampler %MobileDirectionalLight_DirectionalLightShadowSampler
+        %280 = OpAccessChain %_ptr_Uniform_v4float %MobileDirectionalLight %int_1
+        %281 = OpAccessChain %_ptr_Uniform_float %MobileDirectionalLight %int_1 %int_3
+        %282 = OpLoad %float %281
+        %283 = OpAccessChain %_ptr_Uniform_v4float %MobileDirectionalLight %int_2
+        %284 = OpLoad %v4float %283
+               OpBranch %285
+        %285 = OpLabel
+        %286 = OpPhi %int %int_0 %140 %287 %288
+        %289 = OpSLessThan %bool %286 %int_2
+               OpLoopMerge %290 %288 None
+               OpBranchConditional %289 %291 %290
+        %291 = OpLabel
+        %292 = OpBitcast %uint %286
+        %293 = OpAccessChain %_ptr_Uniform_float %MobileDirectionalLight %int_4 %292
+        %294 = OpLoad %float %293
+        %295 = OpFOrdLessThan %bool %224 %294
+               OpSelectionMerge %288 None
+               OpBranchConditional %295 %296 %288
+        %296 = OpLabel
+        %297 = OpCompositeExtract %float %177 0
+        %298 = OpCompositeExtract %float %177 1
+        %299 = OpCompositeConstruct %v4float %297 %298 %224 %float_1
+        %300 = OpAccessChain %_ptr_Uniform_mat4v4float %MobileDirectionalLight %int_5 %286
+        %301 = OpLoad %mat4v4float %300
+        %302 = OpMatrixTimesVector %v4float %301 %299
+               OpBranch %290
+        %288 = OpLabel
+        %287 = OpIAdd %int %286 %int_1
+               OpBranch %285
+        %290 = OpLabel
+        %303 = OpPhi %v4float %52 %285 %302 %296
+        %304 = OpCompositeExtract %float %303 2
+        %305 = OpFOrdGreaterThan %bool %304 %float_0
+               OpSelectionMerge %306 None
+               OpBranchConditional %305 %307 %306
+        %307 = OpLabel
+        %308 = OpExtInst %float %1 FMin %304 %float_0_999989986
+        %309 = OpVectorShuffle %v2float %303 %303 0 1
+        %310 = OpVectorShuffle %v2float %284 %284 0 1
+        %311 = OpFMul %v2float %309 %310
+        %312 = OpExtInst %v2float %1 Fract %311
+        %313 = OpExtInst %v2float %1 Floor %311
+        %314 = OpFAdd %v2float %313 %127
+        %315 = OpVectorShuffle %v2float %284 %284 2 3
+        %316 = OpFMul %v2float %314 %315
+        %317 = OpSampledImage %type_sampled_image_0 %278 %279
+        %318 = OpImageSampleExplicitLod %v4float %317 %316 Lod %float_0
+        %319 = OpCompositeExtract %float %318 0
+        %320 = OpCompositeInsert %v3float %319 %139 0
+        %321 = OpFAdd %v2float %313 %128
+        %322 = OpFMul %v2float %321 %315
+        %323 = OpSampledImage %type_sampled_image_0 %278 %279
+        %324 = OpImageSampleExplicitLod %v4float %323 %322 Lod %float_0
+        %325 = OpCompositeExtract %float %324 0
+        %326 = OpCompositeInsert %v3float %325 %320 1
+        %327 = OpFAdd %v2float %313 %130
+        %328 = OpFMul %v2float %327 %315
+        %329 = OpSampledImage %type_sampled_image_0 %278 %279
+        %330 = OpImageSampleExplicitLod %v4float %329 %328 Lod %float_0
+        %331 = OpCompositeExtract %float %330 0
+        %332 = OpCompositeInsert %v3float %331 %326 2
+        %333 = OpFMul %float %308 %282
+        %334 = OpFSub %float %333 %float_1
+        %335 = OpCompositeConstruct %v3float %282 %282 %282
+        %336 = OpFMul %v3float %332 %335
+        %337 = OpCompositeConstruct %v3float %334 %334 %334
+        %338 = OpFSub %v3float %336 %337
+        %339 = OpExtInst %v3float %1 FClamp %338 %47 %64
+        %340 = OpFAdd %v2float %313 %131
+        %341 = OpFMul %v2float %340 %315
+        %342 = OpSampledImage %type_sampled_image_0 %278 %279
+        %343 = OpImageSampleExplicitLod %v4float %342 %341 Lod %float_0
+        %344 = OpCompositeExtract %float %343 0
+        %345 = OpCompositeInsert %v3float %344 %139 0
+        %346 = OpFAdd %v2float %313 %60
+        %347 = OpFMul %v2float %346 %315
+        %348 = OpSampledImage %type_sampled_image_0 %278 %279
+        %349 = OpImageSampleExplicitLod %v4float %348 %347 Lod %float_0
+        %350 = OpCompositeExtract %float %349 0
+        %351 = OpCompositeInsert %v3float %350 %345 1
+        %352 = OpFAdd %v2float %313 %132
+        %353 = OpFMul %v2float %352 %315
+        %354 = OpSampledImage %type_sampled_image_0 %278 %279
+        %355 = OpImageSampleExplicitLod %v4float %354 %353 Lod %float_0
+        %356 = OpCompositeExtract %float %355 0
+        %357 = OpCompositeInsert %v3float %356 %351 2
+        %358 = OpFMul %v3float %357 %335
+        %359 = OpFSub %v3float %358 %337
+        %360 = OpExtInst %v3float %1 FClamp %359 %47 %64
+        %361 = OpFAdd %v2float %313 %133
+        %362 = OpFMul %v2float %361 %315
+        %363 = OpSampledImage %type_sampled_image_0 %278 %279
+        %364 = OpImageSampleExplicitLod %v4float %363 %362 Lod %float_0
+        %365 = OpCompositeExtract %float %364 0
+        %366 = OpCompositeInsert %v3float %365 %139 0
+        %367 = OpFAdd %v2float %313 %134
+        %368 = OpFMul %v2float %367 %315
+        %369 = OpSampledImage %type_sampled_image_0 %278 %279
+        %370 = OpImageSampleExplicitLod %v4float %369 %368 Lod %float_0
+        %371 = OpCompositeExtract %float %370 0
+        %372 = OpCompositeInsert %v3float %371 %366 1
+        %373 = OpFAdd %v2float %313 %135
+        %374 = OpFMul %v2float %373 %315
+        %375 = OpSampledImage %type_sampled_image_0 %278 %279
+        %376 = OpImageSampleExplicitLod %v4float %375 %374 Lod %float_0
+        %377 = OpCompositeExtract %float %376 0
+        %378 = OpCompositeInsert %v3float %377 %372 2
+        %379 = OpFMul %v3float %378 %335
+        %380 = OpFSub %v3float %379 %337
+        %381 = OpExtInst %v3float %1 FClamp %380 %47 %64
+        %382 = OpCompositeExtract %float %339 0
+        %383 = OpCompositeExtract %float %312 0
+        %384 = OpFSub %float %float_1 %383
+        %385 = OpFMul %float %382 %384
+        %386 = OpCompositeExtract %float %360 0
+        %387 = OpFMul %float %386 %384
+        %388 = OpCompositeExtract %float %381 0
+        %389 = OpFMul %float %388 %384
+        %390 = OpCompositeExtract %float %339 1
+        %391 = OpFAdd %float %385 %390
+        %392 = OpCompositeExtract %float %360 1
+        %393 = OpFAdd %float %387 %392
+        %394 = OpCompositeExtract %float %381 1
+        %395 = OpFAdd %float %389 %394
+        %396 = OpCompositeExtract %float %339 2
+        %397 = OpFMul %float %396 %383
+        %398 = OpFAdd %float %391 %397
+        %399 = OpCompositeInsert %v3float %398 %136 0
+        %400 = OpCompositeExtract %float %360 2
+        %401 = OpFMul %float %400 %383
+        %402 = OpFAdd %float %393 %401
+        %403 = OpCompositeInsert %v3float %402 %399 1
+        %404 = OpCompositeExtract %float %381 2
+        %405 = OpFMul %float %404 %383
+        %406 = OpFAdd %float %395 %405
+        %407 = OpCompositeInsert %v3float %406 %403 2
+        %408 = OpCompositeExtract %float %312 1
+        %409 = OpFSub %float %float_1 %408
+        %410 = OpCompositeConstruct %v3float %409 %float_1 %408
+        %411 = OpDot %float %407 %410
+        %412 = OpFMul %float %float_0_25 %411
+        %413 = OpExtInst %float %1 FClamp %412 %float_0 %float_1
+        %414 = OpAccessChain %_ptr_Uniform_float %MobileDirectionalLight %int_3 %int_0
+        %415 = OpLoad %float %414
+        %416 = OpFMul %float %224 %415
+        %417 = OpAccessChain %_ptr_Uniform_float %MobileDirectionalLight %int_3 %int_1
+        %418 = OpLoad %float %417
+        %419 = OpFAdd %float %416 %418
+        %420 = OpExtInst %float %1 FClamp %419 %float_0 %float_1
+        %421 = OpFMul %float %420 %420
+        %422 = OpExtInst %float %1 FMix %413 %float_1 %421
+               OpBranch %306
+        %306 = OpLabel
+        %423 = OpPhi %float %float_1 %290 %422 %307
+        %424 = OpLoad %v4float %280
+        %425 = OpVectorShuffle %v3float %424 %424 0 1 2
+        %426 = OpDot %float %206 %425
+        %427 = OpExtInst %float %1 FMax %float_0 %426
+        %428 = OpFAdd %v3float %181 %425
+        %429 = OpExtInst %v3float %1 Normalize %428
+        %430 = OpDot %float %206 %429
+        %431 = OpExtInst %float %1 FMax %float_0 %430
+        %432 = OpFMul %float %423 %427
+        %433 = OpCompositeConstruct %v3float %432 %432 %432
+        %434 = OpAccessChain %_ptr_Uniform_v4float %MobileDirectionalLight %int_0
+        %435 = OpLoad %v4float %434
+        %436 = OpVectorShuffle %v3float %435 %435 0 1 2
+        %437 = OpFMul %v3float %433 %436
+        %438 = OpFMul %float %253 %float_0_25
+        %439 = OpFAdd %float %438 %float_0_25
+        %440 = OpExtInst %v3float %1 Cross %206 %429
+        %441 = OpDot %float %440 %440
+        %442 = OpFMul %float %253 %253
+        %443 = OpFMul %float %431 %442
+        %444 = OpFMul %float %443 %443
+        %445 = OpFAdd %float %441 %444
+        %446 = OpFDiv %float %442 %445
+        %447 = OpFMul %float %446 %446
+        %448 = OpExtInst %float %1 FMin %447 %float_65504
+        %449 = OpFMul %float %439 %448
+        %450 = OpFMul %float %276 %449
+        %451 = OpCompositeConstruct %v3float %450 %450 %450
+        %452 = OpFAdd %v3float %270 %451
+        %453 = OpFMul %v3float %437 %452
+        %454 = OpAccessChain %_ptr_Uniform_float %_Globals %int_3 %int_3
+        %455 = OpLoad %float %454
+        %456 = OpFOrdGreaterThan %bool %455 %float_0
+        %457 = OpSelect %float %456 %float_1 %float_0
+        %458 = OpFOrdNotEqual %bool %457 %float_0
+        %459 = OpSelect %float %458 %455 %164
+        %460 = OpExtInst %float %1 Log2 %253
+        %461 = OpFMul %float %float_1_20000005 %460
+        %462 = OpFSub %float %float_1 %461
+        %463 = OpFSub %float %459 %float_1
+        %464 = OpFSub %float %463 %462
+        %465 = OpLoad %type_cube_image %ReflectionCubemap
+        %466 = OpLoad %type_sampler %ReflectionCubemapSampler
+        %467 = OpSampledImage %type_sampled_image %465 %466
+        %468 = OpImageSampleExplicitLod %v4float %467 %212 Lod %464
+               OpSelectionMerge %469 None
+               OpBranchConditional %458 %470 %471
+        %471 = OpLabel
+        %472 = OpVectorShuffle %v3float %468 %468 0 1 2
+        %473 = OpCompositeExtract %float %468 3
+        %474 = OpFMul %float %473 %float_16
+        %475 = OpCompositeConstruct %v3float %474 %474 %474
+        %476 = OpFMul %v3float %472 %475
+        %477 = OpFMul %v3float %476 %476
+               OpBranch %469
+        %470 = OpLabel
+        %478 = OpVectorShuffle %v3float %468 %468 0 1 2
+        %479 = OpVectorShuffle %v3float %162 %162 0 1 2
+        %480 = OpFMul %v3float %478 %479
+               OpBranch %469
+        %469 = OpLabel
+        %481 = OpPhi %v3float %477 %471 %480 %470
+        %482 = OpCompositeConstruct %v3float %277 %277 %277
+        %483 = OpFMul %v3float %481 %482
+        %484 = OpCompositeConstruct %v3float %276 %276 %276
+        %485 = OpFMul %v3float %483 %484
+        %486 = OpFAdd %v3float %453 %485
+               OpBranch %487
+        %487 = OpLabel
+        %488 = OpPhi %v3float %486 %469 %489 %490
+        %491 = OpPhi %int %int_0 %469 %492 %490
+        %493 = OpAccessChain %_ptr_Uniform_int %_Globals %int_0
+        %494 = OpLoad %int %493
+        %495 = OpSLessThan %bool %491 %494
+               OpLoopMerge %496 %490 None
+               OpBranchConditional %495 %497 %496
+        %497 = OpLabel
+        %498 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_1 %491
+        %499 = OpLoad %v4float %498
+        %500 = OpVectorShuffle %v3float %499 %499 0 1 2
+        %501 = OpFSub %v3float %500 %179
+        %502 = OpDot %float %501 %501
+        %503 = OpExtInst %float %1 InverseSqrt %502
+        %504 = OpCompositeConstruct %v3float %503 %503 %503
+        %505 = OpFMul %v3float %501 %504
+        %506 = OpFAdd %v3float %181 %505
+        %507 = OpExtInst %v3float %1 Normalize %506
+        %508 = OpDot %float %206 %505
+        %509 = OpExtInst %float %1 FMax %float_0 %508
+        %510 = OpDot %float %206 %507
+        %511 = OpExtInst %float %1 FMax %float_0 %510
+        %512 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_2 %491
+        %513 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 %491 %int_3
+        %514 = OpLoad %float %513
+        %515 = OpFOrdEqual %bool %514 %float_0
+               OpSelectionMerge %490 None
+               OpBranchConditional %515 %516 %517
+        %517 = OpLabel
+        %518 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %491 %int_3
+        %519 = OpLoad %float %518
+        %520 = OpCompositeConstruct %v3float %519 %519 %519
+        %521 = OpFMul %v3float %501 %520
+        %522 = OpDot %float %521 %521
+        %523 = OpExtInst %float %1 FClamp %522 %float_0 %float_1
+        %524 = OpFSub %float %float_1 %523
+        %525 = OpExtInst %float %1 Pow %524 %514
+               OpBranch %490
+        %516 = OpLabel
+        %526 = OpFAdd %float %502 %float_1
+        %527 = OpFDiv %float %float_1 %526
+        %528 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %491 %int_3
+        %529 = OpLoad %float %528
+        %530 = OpFMul %float %529 %529
+        %531 = OpFMul %float %502 %530
+        %532 = OpFMul %float %531 %531
+        %533 = OpFSub %float %float_1 %532
+        %534 = OpExtInst %float %1 FClamp %533 %float_0 %float_1
+        %535 = OpFMul %float %534 %534
+        %536 = OpFMul %float %527 %535
+               OpBranch %490
+        %490 = OpLabel
+        %537 = OpPhi %float %525 %517 %536 %516
+        %538 = OpFMul %float %537 %509
+        %539 = OpCompositeConstruct %v3float %538 %538 %538
+        %540 = OpLoad %v4float %512
+        %541 = OpVectorShuffle %v3float %540 %540 0 1 2
+        %542 = OpFMul %v3float %539 %541
+        %543 = OpFMul %v3float %542 %57
+        %544 = OpExtInst %v3float %1 Cross %206 %507
+        %545 = OpDot %float %544 %544
+        %546 = OpFMul %float %511 %442
+        %547 = OpFMul %float %546 %546
+        %548 = OpFAdd %float %545 %547
+        %549 = OpFDiv %float %442 %548
+        %550 = OpFMul %float %549 %549
+        %551 = OpExtInst %float %1 FMin %550 %float_65504
+        %552 = OpFMul %float %439 %551
+        %553 = OpFMul %float %276 %552
+        %554 = OpCompositeConstruct %v3float %553 %553 %553
+        %555 = OpFAdd %v3float %270 %554
+        %556 = OpFMul %v3float %543 %555
+        %557 = OpExtInst %v3float %1 FMin %55 %556
+        %489 = OpFAdd %v3float %488 %557
+        %492 = OpIAdd %int %491 %int_1
+               OpBranch %487
+        %496 = OpLabel
+        %558 = OpExtInst %v3float %1 FMax %47 %47
+        %559 = OpFAdd %v3float %488 %558
+        %560 = OpFAdd %v3float %270 %484
+        %561 = OpCompositeConstruct %v3float %160 %160 %160
+        %562 = OpExtInst %v3float %1 FMix %559 %560 %561
+        %563 = OpCompositeExtract %float %142 3
+        %564 = OpCompositeConstruct %v3float %563 %563 %563
+        %565 = OpFMul %v3float %562 %564
+        %566 = OpVectorShuffle %v3float %142 %142 0 1 2
+        %567 = OpFAdd %v3float %565 %566
+        %568 = OpVectorShuffle %v4float %137 %567 4 5 6 3
+        %569 = OpCompositeExtract %float %143 3
+        %570 = OpExtInst %float %1 FMin %569 %float_65500
+        %571 = OpCompositeInsert %v4float %570 %568 3
+               OpStore %out_var_SV_Target0 %571
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag b/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag
new file mode 100644
index 00000000000..eba220ba4e7
--- /dev/null
+++ b/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag
@@ -0,0 +1,878 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 353
+; Schema: 0
+               OpCapability Shader
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %MainPixelShader "main" %gl_FragCoord %in_var_TEXCOORD6 %in_var_TEXCOORD7 %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_TEXCOORD0 %in_var_PRIMITIVE_ID %gl_FrontFacing %gl_FragDepth %out_var_SV_Target0
+               OpExecutionMode %MainPixelShader OriginUpperLeft
+               OpExecutionMode %MainPixelShader DepthReplacing
+               OpExecutionMode %MainPixelShader DepthLess
+               OpSource HLSL 600
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_ClipToWorld"
+               OpMemberName %type_View 3 "View_TranslatedWorldToView"
+               OpMemberName %type_View 4 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 5 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 7 "View_ViewToClip"
+               OpMemberName %type_View 8 "View_ViewToClipNoAA"
+               OpMemberName %type_View 9 "View_ClipToView"
+               OpMemberName %type_View 10 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 12 "View_ScreenToWorld"
+               OpMemberName %type_View 13 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 14 "View_ViewForward"
+               OpMemberName %type_View 15 "PrePadding_View_908"
+               OpMemberName %type_View 16 "View_ViewUp"
+               OpMemberName %type_View 17 "PrePadding_View_924"
+               OpMemberName %type_View 18 "View_ViewRight"
+               OpMemberName %type_View 19 "PrePadding_View_940"
+               OpMemberName %type_View 20 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 21 "PrePadding_View_956"
+               OpMemberName %type_View 22 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 23 "PrePadding_View_972"
+               OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 25 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 26 "View_WorldCameraOrigin"
+               OpMemberName %type_View 27 "PrePadding_View_1020"
+               OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 29 "PrePadding_View_1036"
+               OpMemberName %type_View 30 "View_WorldViewOrigin"
+               OpMemberName %type_View 31 "PrePadding_View_1052"
+               OpMemberName %type_View 32 "View_PreViewTranslation"
+               OpMemberName %type_View 33 "PrePadding_View_1068"
+               OpMemberName %type_View 34 "View_PrevProjection"
+               OpMemberName %type_View 35 "View_PrevViewProj"
+               OpMemberName %type_View 36 "View_PrevViewRotationProj"
+               OpMemberName %type_View 37 "View_PrevViewToClip"
+               OpMemberName %type_View 38 "View_PrevClipToView"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 40 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 44 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 45 "PrePadding_View_1724"
+               OpMemberName %type_View 46 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 47 "PrePadding_View_1740"
+               OpMemberName %type_View 48 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 49 "PrePadding_View_1756"
+               OpMemberName %type_View 50 "View_PrevInvViewProj"
+               OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 52 "View_ClipToPrevClip"
+               OpMemberName %type_View 53 "View_TemporalAAJitter"
+               OpMemberName %type_View 54 "View_GlobalClippingPlane"
+               OpMemberName %type_View 55 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 57 "View_ViewRectMin"
+               OpMemberName %type_View 58 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 60 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 61 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 62 "View_PreExposure"
+               OpMemberName %type_View 63 "View_OneOverPreExposure"
+               OpMemberName %type_View 64 "PrePadding_View_2076"
+               OpMemberName %type_View 65 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 66 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 67 "View_NormalOverrideParameter"
+               OpMemberName %type_View 68 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 69 "View_PrevFrameGameTime"
+               OpMemberName %type_View 70 "View_PrevFrameRealTime"
+               OpMemberName %type_View 71 "View_OutOfBoundsMask"
+               OpMemberName %type_View 72 "PrePadding_View_2148"
+               OpMemberName %type_View 73 "PrePadding_View_2152"
+               OpMemberName %type_View 74 "PrePadding_View_2156"
+               OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 76 "View_CullingSign"
+               OpMemberName %type_View 77 "View_NearPlane"
+               OpMemberName %type_View 78 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 79 "View_GameTime"
+               OpMemberName %type_View 80 "View_RealTime"
+               OpMemberName %type_View 81 "View_DeltaTime"
+               OpMemberName %type_View 82 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 84 "View_Random"
+               OpMemberName %type_View 85 "View_FrameNumber"
+               OpMemberName %type_View 86 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 87 "View_StateFrameIndex"
+               OpMemberName %type_View 88 "View_CameraCut"
+               OpMemberName %type_View 89 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 90 "PrePadding_View_2228"
+               OpMemberName %type_View 91 "PrePadding_View_2232"
+               OpMemberName %type_View 92 "PrePadding_View_2236"
+               OpMemberName %type_View 93 "View_DirectionalLightColor"
+               OpMemberName %type_View 94 "View_DirectionalLightDirection"
+               OpMemberName %type_View 95 "PrePadding_View_2268"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 98 "View_TemporalAAParams"
+               OpMemberName %type_View 99 "View_CircleDOFParams"
+               OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 102 "View_DepthOfFieldScale"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 109 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 110 "View_DemosaicVposOffset"
+               OpMemberName %type_View 111 "PrePadding_View_2412"
+               OpMemberName %type_View 112 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 113 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 115 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogPower"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 123 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian"
+               OpMemberName %type_View 127 "PrePadding_View_2492"
+               OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance"
+               OpMemberName %type_View 129 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 131 "PrePadding_View_2520"
+               OpMemberName %type_View 132 "PrePadding_View_2524"
+               OpMemberName %type_View 133 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 136 "View_AmbientCubemapTint"
+               OpMemberName %type_View 137 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 138 "View_SkyLightParameters"
+               OpMemberName %type_View 139 "PrePadding_View_2584"
+               OpMemberName %type_View 140 "PrePadding_View_2588"
+               OpMemberName %type_View 141 "View_SkyLightColor"
+               OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 143 "View_MobilePreviewMode"
+               OpMemberName %type_View 144 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 146 "View_ShowDecalsMask"
+               OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 149 "PrePadding_View_2744"
+               OpMemberName %type_View 150 "PrePadding_View_2748"
+               OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 152 "View_StereoPassIndex"
+               OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 155 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 156 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 157 "View_MaxGlobalDistance"
+               OpMemberName %type_View 158 "PrePadding_View_2908"
+               OpMemberName %type_View 159 "View_CursorPosition"
+               OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 161 "PrePadding_View_2924"
+               OpMemberName %type_View 162 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 163 "PrePadding_View_2940"
+               OpMemberName %type_View 164 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 165 "PrePadding_View_2956"
+               OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 167 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 168 "PrePadding_View_2972"
+               OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 170 "PrePadding_View_2988"
+               OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 172 "PrePadding_View_3004"
+               OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 176 "View_StereoIPD"
+               OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle"
+               OpName %View "View"
+               OpName %type_PrimitiveDither "type.PrimitiveDither"
+               OpMemberName %type_PrimitiveDither 0 "PrimitiveDither_LODFactor"
+               OpName %PrimitiveDither "PrimitiveDither"
+               OpName %type_PrimitiveFade "type.PrimitiveFade"
+               OpMemberName %type_PrimitiveFade 0 "PrimitiveFade_FadeTimeScaleBias"
+               OpName %PrimitiveFade "PrimitiveFade"
+               OpName %type_Material "type.Material"
+               OpMemberName %type_Material 0 "Material_VectorExpressions"
+               OpMemberName %type_Material 1 "Material_ScalarExpressions"
+               OpName %Material "Material"
+               OpName %type_2d_image "type.2d.image"
+               OpName %Material_Texture2D_0 "Material_Texture2D_0"
+               OpName %type_sampler "type.sampler"
+               OpName %Material_Texture2D_0Sampler "Material_Texture2D_0Sampler"
+               OpName %Material_Texture2D_3 "Material_Texture2D_3"
+               OpName %Material_Texture2D_3Sampler "Material_Texture2D_3Sampler"
+               OpName %in_var_TEXCOORD6 "in.var.TEXCOORD6"
+               OpName %in_var_TEXCOORD7 "in.var.TEXCOORD7"
+               OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid"
+               OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %in_var_PRIMITIVE_ID "in.var.PRIMITIVE_ID"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %MainPixelShader "MainPixelShader"
+               OpName %type_sampled_image "type.sampled.image"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_Position"
+               OpDecorateString %in_var_TEXCOORD6 UserSemantic "TEXCOORD6"
+               OpDecorateString %in_var_TEXCOORD7 UserSemantic "TEXCOORD7"
+               OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %in_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID"
+               OpDecorate %in_var_PRIMITIVE_ID Flat
+               OpDecorate %gl_FrontFacing BuiltIn FrontFacing
+               OpDecorateString %gl_FrontFacing UserSemantic "SV_IsFrontFace"
+               OpDecorate %gl_FrontFacing Flat
+               OpDecorate %gl_FragDepth BuiltIn FragDepth
+               OpDecorateString %gl_FragDepth UserSemantic "SV_DepthLessEqual"
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %in_var_TEXCOORD6 Location 0
+               OpDecorate %in_var_TEXCOORD7 Location 1
+               OpDecorate %in_var_TEXCOORD10_centroid Location 2
+               OpDecorate %in_var_TEXCOORD11_centroid Location 3
+               OpDecorate %in_var_TEXCOORD0 Location 4
+               OpDecorate %in_var_PRIMITIVE_ID Location 5
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 0
+               OpDecorate %PrimitiveDither DescriptorSet 0
+               OpDecorate %PrimitiveDither Binding 1
+               OpDecorate %PrimitiveFade DescriptorSet 0
+               OpDecorate %PrimitiveFade Binding 2
+               OpDecorate %Material DescriptorSet 0
+               OpDecorate %Material Binding 3
+               OpDecorate %Material_Texture2D_0 DescriptorSet 0
+               OpDecorate %Material_Texture2D_0 Binding 0
+               OpDecorate %Material_Texture2D_0Sampler DescriptorSet 0
+               OpDecorate %Material_Texture2D_0Sampler Binding 0
+               OpDecorate %Material_Texture2D_3 DescriptorSet 0
+               OpDecorate %Material_Texture2D_3 Binding 1
+               OpDecorate %Material_Texture2D_3Sampler DescriptorSet 0
+               OpDecorate %Material_Texture2D_3Sampler Binding 1
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 13 MatrixStride 16
+               OpMemberDecorate %type_View 13 ColMajor
+               OpMemberDecorate %type_View 14 Offset 896
+               OpMemberDecorate %type_View 15 Offset 908
+               OpMemberDecorate %type_View 16 Offset 912
+               OpMemberDecorate %type_View 17 Offset 924
+               OpMemberDecorate %type_View 18 Offset 928
+               OpMemberDecorate %type_View 19 Offset 940
+               OpMemberDecorate %type_View 20 Offset 944
+               OpMemberDecorate %type_View 21 Offset 956
+               OpMemberDecorate %type_View 22 Offset 960
+               OpMemberDecorate %type_View 23 Offset 972
+               OpMemberDecorate %type_View 24 Offset 976
+               OpMemberDecorate %type_View 25 Offset 992
+               OpMemberDecorate %type_View 26 Offset 1008
+               OpMemberDecorate %type_View 27 Offset 1020
+               OpMemberDecorate %type_View 28 Offset 1024
+               OpMemberDecorate %type_View 29 Offset 1036
+               OpMemberDecorate %type_View 30 Offset 1040
+               OpMemberDecorate %type_View 31 Offset 1052
+               OpMemberDecorate %type_View 32 Offset 1056
+               OpMemberDecorate %type_View 33 Offset 1068
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 43 MatrixStride 16
+               OpMemberDecorate %type_View 43 ColMajor
+               OpMemberDecorate %type_View 44 Offset 1712
+               OpMemberDecorate %type_View 45 Offset 1724
+               OpMemberDecorate %type_View 46 Offset 1728
+               OpMemberDecorate %type_View 47 Offset 1740
+               OpMemberDecorate %type_View 48 Offset 1744
+               OpMemberDecorate %type_View 49 Offset 1756
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 52 MatrixStride 16
+               OpMemberDecorate %type_View 52 ColMajor
+               OpMemberDecorate %type_View 53 Offset 1952
+               OpMemberDecorate %type_View 54 Offset 1968
+               OpMemberDecorate %type_View 55 Offset 1984
+               OpMemberDecorate %type_View 56 Offset 1992
+               OpMemberDecorate %type_View 57 Offset 2000
+               OpMemberDecorate %type_View 58 Offset 2016
+               OpMemberDecorate %type_View 59 Offset 2032
+               OpMemberDecorate %type_View 60 Offset 2048
+               OpMemberDecorate %type_View 61 Offset 2064
+               OpMemberDecorate %type_View 62 Offset 2068
+               OpMemberDecorate %type_View 63 Offset 2072
+               OpMemberDecorate %type_View 64 Offset 2076
+               OpMemberDecorate %type_View 65 Offset 2080
+               OpMemberDecorate %type_View 66 Offset 2096
+               OpMemberDecorate %type_View 67 Offset 2112
+               OpMemberDecorate %type_View 68 Offset 2128
+               OpMemberDecorate %type_View 69 Offset 2136
+               OpMemberDecorate %type_View 70 Offset 2140
+               OpMemberDecorate %type_View 71 Offset 2144
+               OpMemberDecorate %type_View 72 Offset 2148
+               OpMemberDecorate %type_View 73 Offset 2152
+               OpMemberDecorate %type_View 74 Offset 2156
+               OpMemberDecorate %type_View 75 Offset 2160
+               OpMemberDecorate %type_View 76 Offset 2172
+               OpMemberDecorate %type_View 77 Offset 2176
+               OpMemberDecorate %type_View 78 Offset 2180
+               OpMemberDecorate %type_View 79 Offset 2184
+               OpMemberDecorate %type_View 80 Offset 2188
+               OpMemberDecorate %type_View 81 Offset 2192
+               OpMemberDecorate %type_View 82 Offset 2196
+               OpMemberDecorate %type_View 83 Offset 2200
+               OpMemberDecorate %type_View 84 Offset 2204
+               OpMemberDecorate %type_View 85 Offset 2208
+               OpMemberDecorate %type_View 86 Offset 2212
+               OpMemberDecorate %type_View 87 Offset 2216
+               OpMemberDecorate %type_View 88 Offset 2220
+               OpMemberDecorate %type_View 89 Offset 2224
+               OpMemberDecorate %type_View 90 Offset 2228
+               OpMemberDecorate %type_View 91 Offset 2232
+               OpMemberDecorate %type_View 92 Offset 2236
+               OpMemberDecorate %type_View 93 Offset 2240
+               OpMemberDecorate %type_View 94 Offset 2256
+               OpMemberDecorate %type_View 95 Offset 2268
+               OpMemberDecorate %type_View 96 Offset 2272
+               OpMemberDecorate %type_View 97 Offset 2304
+               OpMemberDecorate %type_View 98 Offset 2336
+               OpMemberDecorate %type_View 99 Offset 2352
+               OpMemberDecorate %type_View 100 Offset 2368
+               OpMemberDecorate %type_View 101 Offset 2372
+               OpMemberDecorate %type_View 102 Offset 2376
+               OpMemberDecorate %type_View 103 Offset 2380
+               OpMemberDecorate %type_View 104 Offset 2384
+               OpMemberDecorate %type_View 105 Offset 2388
+               OpMemberDecorate %type_View 106 Offset 2392
+               OpMemberDecorate %type_View 107 Offset 2396
+               OpMemberDecorate %type_View 108 Offset 2400
+               OpMemberDecorate %type_View 109 Offset 2404
+               OpMemberDecorate %type_View 110 Offset 2408
+               OpMemberDecorate %type_View 111 Offset 2412
+               OpMemberDecorate %type_View 112 Offset 2416
+               OpMemberDecorate %type_View 113 Offset 2428
+               OpMemberDecorate %type_View 114 Offset 2432
+               OpMemberDecorate %type_View 115 Offset 2444
+               OpMemberDecorate %type_View 116 Offset 2448
+               OpMemberDecorate %type_View 117 Offset 2452
+               OpMemberDecorate %type_View 118 Offset 2456
+               OpMemberDecorate %type_View 119 Offset 2460
+               OpMemberDecorate %type_View 120 Offset 2464
+               OpMemberDecorate %type_View 121 Offset 2468
+               OpMemberDecorate %type_View 122 Offset 2472
+               OpMemberDecorate %type_View 123 Offset 2476
+               OpMemberDecorate %type_View 124 Offset 2480
+               OpMemberDecorate %type_View 125 Offset 2484
+               OpMemberDecorate %type_View 126 Offset 2488
+               OpMemberDecorate %type_View 127 Offset 2492
+               OpMemberDecorate %type_View 128 Offset 2496
+               OpMemberDecorate %type_View 129 Offset 2512
+               OpMemberDecorate %type_View 130 Offset 2516
+               OpMemberDecorate %type_View 131 Offset 2520
+               OpMemberDecorate %type_View 132 Offset 2524
+               OpMemberDecorate %type_View 133 Offset 2528
+               OpMemberDecorate %type_View 134 Offset 2544
+               OpMemberDecorate %type_View 135 Offset 2556
+               OpMemberDecorate %type_View 136 Offset 2560
+               OpMemberDecorate %type_View 137 Offset 2576
+               OpMemberDecorate %type_View 138 Offset 2580
+               OpMemberDecorate %type_View 139 Offset 2584
+               OpMemberDecorate %type_View 140 Offset 2588
+               OpMemberDecorate %type_View 141 Offset 2592
+               OpMemberDecorate %type_View 142 Offset 2608
+               OpMemberDecorate %type_View 143 Offset 2720
+               OpMemberDecorate %type_View 144 Offset 2724
+               OpMemberDecorate %type_View 145 Offset 2728
+               OpMemberDecorate %type_View 146 Offset 2732
+               OpMemberDecorate %type_View 147 Offset 2736
+               OpMemberDecorate %type_View 148 Offset 2740
+               OpMemberDecorate %type_View 149 Offset 2744
+               OpMemberDecorate %type_View 150 Offset 2748
+               OpMemberDecorate %type_View 151 Offset 2752
+               OpMemberDecorate %type_View 152 Offset 2764
+               OpMemberDecorate %type_View 153 Offset 2768
+               OpMemberDecorate %type_View 154 Offset 2832
+               OpMemberDecorate %type_View 155 Offset 2896
+               OpMemberDecorate %type_View 156 Offset 2900
+               OpMemberDecorate %type_View 157 Offset 2904
+               OpMemberDecorate %type_View 158 Offset 2908
+               OpMemberDecorate %type_View 159 Offset 2912
+               OpMemberDecorate %type_View 160 Offset 2920
+               OpMemberDecorate %type_View 161 Offset 2924
+               OpMemberDecorate %type_View 162 Offset 2928
+               OpMemberDecorate %type_View 163 Offset 2940
+               OpMemberDecorate %type_View 164 Offset 2944
+               OpMemberDecorate %type_View 165 Offset 2956
+               OpMemberDecorate %type_View 166 Offset 2960
+               OpMemberDecorate %type_View 167 Offset 2968
+               OpMemberDecorate %type_View 168 Offset 2972
+               OpMemberDecorate %type_View 169 Offset 2976
+               OpMemberDecorate %type_View 170 Offset 2988
+               OpMemberDecorate %type_View 171 Offset 2992
+               OpMemberDecorate %type_View 172 Offset 3004
+               OpMemberDecorate %type_View 173 Offset 3008
+               OpMemberDecorate %type_View 174 Offset 3020
+               OpMemberDecorate %type_View 175 Offset 3024
+               OpMemberDecorate %type_View 176 Offset 3036
+               OpMemberDecorate %type_View 177 Offset 3040
+               OpMemberDecorate %type_View 178 Offset 3044
+               OpDecorate %type_View Block
+               OpMemberDecorate %type_PrimitiveDither 0 Offset 0
+               OpDecorate %type_PrimitiveDither Block
+               OpMemberDecorate %type_PrimitiveFade 0 Offset 0
+               OpDecorate %type_PrimitiveFade Block
+               OpDecorate %_arr_v4float_uint_9 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_3 ArrayStride 16
+               OpMemberDecorate %type_Material 0 Offset 0
+               OpMemberDecorate %type_Material 1 Offset 144
+               OpDecorate %type_Material Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+      %v2int = OpTypeVector %int 2
+%float_0_00100000005 = OpConstant %float 0.00100000005
+      %int_2 = OpConstant %int 2
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+    %float_0 = OpConstant %float 0
+         %49 = OpConstantComposite %v2float %float_0 %float_0
+    %float_1 = OpConstant %float 1
+      %int_4 = OpConstant %int 4
+     %int_11 = OpConstant %int 11
+%float_0_249500006 = OpConstant %float 0.249500006
+         %54 = OpConstantComposite %v2float %float_0_249500006 %float_0_249500006
+%float_0_499992371 = OpConstant %float 0.499992371
+         %56 = OpConstantComposite %v2float %float_0_499992371 %float_0_499992371
+     %int_32 = OpConstant %int 32
+     %int_53 = OpConstant %int 53
+     %int_57 = OpConstant %int 57
+     %int_80 = OpConstant %int 80
+     %int_82 = OpConstant %int 82
+     %int_98 = OpConstant %int 98
+     %uint_1 = OpConstant %uint 1
+%mat3v3float = OpTypeMatrix %v3float 3
+    %float_2 = OpConstant %float 2
+   %float_n1 = OpConstant %float -1
+         %67 = OpConstantComposite %v2float %float_n1 %float_n1
+       %bool = OpTypeBool
+ %float_n0_5 = OpConstant %float -0.5
+         %70 = OpConstantComposite %v3float %float_0 %float_0 %float_1
+%float_0_333299994 = OpConstant %float 0.333299994
+     %uint_5 = OpConstant %uint 5
+%float_347_834503 = OpConstant %float 347.834503
+%float_3343_28369 = OpConstant %float 3343.28369
+         %75 = OpConstantComposite %v2float %float_347_834503 %float_3343_28369
+ %float_1000 = OpConstant %float 1000
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%type_PrimitiveDither = OpTypeStruct %float
+%_ptr_Uniform_type_PrimitiveDither = OpTypePointer Uniform %type_PrimitiveDither
+%type_PrimitiveFade = OpTypeStruct %v2float
+%_ptr_Uniform_type_PrimitiveFade = OpTypePointer Uniform %type_PrimitiveFade
+     %uint_9 = OpConstant %uint 9
+%_arr_v4float_uint_9 = OpTypeArray %v4float %uint_9
+     %uint_3 = OpConstant %uint 3
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%type_Material = OpTypeStruct %_arr_v4float_uint_9 %_arr_v4float_uint_3
+%_ptr_Uniform_type_Material = OpTypePointer Uniform %type_Material
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1
+%_ptr_Input__arr_v4float_uint_1 = OpTypePointer Input %_arr_v4float_uint_1
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Input_bool = OpTypePointer Input %bool
+%_ptr_Output_float = OpTypePointer Output %float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %93 = OpTypeFunction %void
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%type_sampled_image = OpTypeSampledImage %type_2d_image
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+%PrimitiveDither = OpVariable %_ptr_Uniform_type_PrimitiveDither Uniform
+%PrimitiveFade = OpVariable %_ptr_Uniform_type_PrimitiveFade Uniform
+   %Material = OpVariable %_ptr_Uniform_type_Material Uniform
+%Material_Texture2D_0 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%Material_Texture2D_0Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%Material_Texture2D_3 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%Material_Texture2D_3Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD6 = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD7 = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input__arr_v4float_uint_1 Input
+%in_var_PRIMITIVE_ID = OpVariable %_ptr_Input_uint Input
+%gl_FrontFacing = OpVariable %_ptr_Input_bool Input
+%gl_FragDepth = OpVariable %_ptr_Output_float Output
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+         %98 = OpUndef %float
+         %99 = OpConstantNull %v2float
+%float_0_015625 = OpConstant %float 0.015625
+        %101 = OpConstantComposite %v2float %float_0_015625 %float_0_015625
+%float_0_166666672 = OpConstant %float 0.166666672
+        %103 = OpUndef %float
+        %104 = OpConstantNull %v3float
+%MainPixelShader = OpFunction %void None %93
+        %105 = OpLabel
+        %106 = OpLoad %v4float %gl_FragCoord
+        %107 = OpLoad %v4float %in_var_TEXCOORD6
+        %108 = OpLoad %v4float %in_var_TEXCOORD7
+        %109 = OpLoad %v4float %in_var_TEXCOORD10_centroid
+        %110 = OpLoad %v4float %in_var_TEXCOORD11_centroid
+        %111 = OpLoad %_arr_v4float_uint_1 %in_var_TEXCOORD0
+        %112 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_4
+        %113 = OpLoad %mat4v4float %112
+        %114 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_11
+        %115 = OpLoad %mat4v4float %114
+        %116 = OpAccessChain %_ptr_Uniform_v3float %View %int_32
+        %117 = OpLoad %v3float %116
+        %118 = OpAccessChain %_ptr_Uniform_v4float %View %int_53
+        %119 = OpLoad %v4float %118
+        %120 = OpAccessChain %_ptr_Uniform_v4float %View %int_57
+        %121 = OpLoad %v4float %120
+        %122 = OpAccessChain %_ptr_Uniform_float %View %int_80
+        %123 = OpLoad %float %122
+        %124 = OpCompositeExtract %v4float %111 0
+        %125 = OpVectorShuffle %v2float %99 %124 2 3
+        %126 = OpVectorShuffle %v3float %109 %109 0 1 2
+        %127 = OpVectorShuffle %v3float %110 %110 0 1 2
+        %128 = OpExtInst %v3float %1 Cross %127 %126
+        %129 = OpCompositeExtract %float %110 3
+        %130 = OpCompositeConstruct %v3float %129 %129 %129
+        %131 = OpFMul %v3float %128 %130
+        %132 = OpCompositeConstruct %mat3v3float %126 %131 %127
+        %133 = OpVectorShuffle %v2float %106 %106 0 1
+        %134 = OpVectorShuffle %v2float %121 %121 0 1
+        %135 = OpFSub %v2float %133 %134
+        %136 = OpCompositeExtract %float %106 2
+        %137 = OpCompositeConstruct %v4float %103 %103 %136 %float_1
+        %138 = OpCompositeExtract %float %106 3
+        %139 = OpCompositeConstruct %v4float %138 %138 %138 %138
+        %140 = OpFMul %v4float %137 %139
+        %141 = OpCompositeExtract %float %106 0
+        %142 = OpCompositeExtract %float %106 1
+        %143 = OpCompositeConstruct %v4float %141 %142 %136 %float_1
+        %144 = OpMatrixTimesVector %v4float %115 %143
+        %145 = OpVectorShuffle %v3float %144 %144 0 1 2
+        %146 = OpCompositeExtract %float %144 3
+        %147 = OpCompositeConstruct %v3float %146 %146 %146
+        %148 = OpFDiv %v3float %145 %147
+        %149 = OpFSub %v3float %148 %117
+        %150 = OpFNegate %v3float %148
+        %151 = OpExtInst %v3float %1 Normalize %150
+        %152 = OpVectorTimesMatrix %v3float %151 %132
+        %153 = OpVectorShuffle %v2float %152 %152 0 1
+        %154 = OpFMul %v2float %153 %67
+        %155 = OpCompositeExtract %float %152 2
+        %156 = OpCompositeConstruct %v2float %155 %155
+        %157 = OpFDiv %v2float %154 %156
+        %158 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_0
+        %159 = OpLoad %float %158
+        %160 = OpCompositeConstruct %v2float %159 %159
+        %161 = OpFMul %v2float %160 %157
+        %162 = OpDot %float %151 %127
+        %163 = OpExtInst %float %1 FAbs %162
+        %164 = OpExtInst %float %1 FMax %163 %float_0
+        %165 = OpExtInst %float %1 FMin %164 %float_1
+        %166 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_1
+        %167 = OpLoad %float %166
+        %168 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_2
+        %169 = OpLoad %float %168
+        %170 = OpExtInst %float %1 FMix %167 %169 %165
+        %171 = OpExtInst %float %1 Floor %170
+        %172 = OpFDiv %float %float_1 %170
+        %173 = OpCompositeConstruct %v2float %172 %172
+        %174 = OpFMul %v2float %161 %173
+        %175 = OpDPdx %v2float %125
+        %176 = OpDPdy %v2float %125
+        %177 = OpLoad %type_2d_image %Material_Texture2D_0
+        %178 = OpLoad %type_sampler %Material_Texture2D_0Sampler
+               OpBranch %179
+        %179 = OpLabel
+        %180 = OpPhi %float %float_1 %105 %181 %182
+        %183 = OpPhi %v2float %49 %105 %184 %182
+        %185 = OpPhi %int %int_0 %105 %186 %182
+        %187 = OpPhi %float %float_1 %105 %188 %182
+        %189 = OpPhi %float %float_1 %105 %180 %182
+        %190 = OpConvertSToF %float %185
+        %191 = OpFAdd %float %171 %float_2
+        %192 = OpFOrdLessThan %bool %190 %191
+               OpLoopMerge %193 %182 None
+               OpBranchConditional %192 %194 %193
+        %194 = OpLabel
+        %195 = OpFAdd %v2float %125 %183
+        %196 = OpSampledImage %type_sampled_image %177 %178
+        %197 = OpImageSampleExplicitLod %v4float %196 %195 Grad %175 %176
+        %188 = OpCompositeExtract %float %197 1
+        %198 = OpFOrdLessThan %bool %180 %188
+               OpSelectionMerge %182 None
+               OpBranchConditional %198 %199 %182
+        %199 = OpLabel
+        %200 = OpFSub %float %189 %187
+        %201 = OpFSub %float %188 %180
+        %202 = OpFAdd %float %200 %201
+        %203 = OpFDiv %float %201 %202
+        %204 = OpFMul %float %189 %203
+        %205 = OpFSub %float %float_1 %203
+        %206 = OpFMul %float %180 %205
+        %207 = OpFAdd %float %204 %206
+        %208 = OpCompositeConstruct %v2float %203 %203
+        %209 = OpFMul %v2float %208 %174
+        %210 = OpFSub %v2float %183 %209
+               OpBranch %193
+        %182 = OpLabel
+        %181 = OpFSub %float %180 %172
+        %184 = OpFAdd %v2float %183 %174
+        %186 = OpIAdd %int %185 %int_1
+               OpBranch %179
+        %193 = OpLabel
+        %211 = OpPhi %float %98 %179 %207 %199
+        %212 = OpPhi %v2float %183 %179 %210 %199
+        %213 = OpVectorShuffle %v2float %212 %104 0 1
+        %214 = OpFAdd %v2float %125 %213
+        %215 = OpAccessChain %_ptr_Uniform_float %View %int_82
+        %216 = OpLoad %float %215
+        %217 = OpSampledImage %type_sampled_image %177 %178
+        %218 = OpImageSampleImplicitLod %v4float %217 %214 Bias %216
+        %219 = OpCompositeExtract %float %218 0
+        %220 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_2 %int_1
+        %221 = OpLoad %float %220
+        %222 = OpFMul %float %219 %221
+        %223 = OpFSub %float %float_1 %222
+        %224 = OpExtInst %float %1 FMax %223 %float_0
+        %225 = OpExtInst %float %1 FMin %224 %float_1
+        %226 = OpAccessChain %_ptr_Uniform_float %View %int_98 %int_0
+        %227 = OpLoad %float %226
+        %228 = OpCompositeConstruct %v2float %227 %227
+        %229 = OpFAdd %v2float %135 %228
+        %230 = OpCompositeExtract %float %229 0
+        %231 = OpConvertFToU %uint %230
+        %232 = OpCompositeExtract %float %229 1
+        %233 = OpConvertFToU %uint %232
+        %234 = OpIMul %uint %uint_2 %233
+        %235 = OpIAdd %uint %231 %234
+        %236 = OpUMod %uint %235 %uint_5
+        %237 = OpConvertUToF %float %236
+        %238 = OpFMul %v2float %135 %101
+        %239 = OpLoad %type_2d_image %Material_Texture2D_3
+        %240 = OpLoad %type_sampler %Material_Texture2D_3Sampler
+        %241 = OpSampledImage %type_sampled_image %239 %240
+        %242 = OpImageSampleImplicitLod %v4float %241 %238 Bias %216
+        %243 = OpCompositeExtract %float %242 0
+        %244 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_2 %int_2
+        %245 = OpLoad %float %244
+        %246 = OpFMul %float %243 %245
+        %247 = OpFAdd %float %237 %246
+        %248 = OpFMul %float %247 %float_0_166666672
+        %249 = OpFAdd %float %225 %248
+        %250 = OpFAdd %float %249 %float_n0_5
+        %251 = OpCompositeExtract %float %218 2
+        %252 = OpFAdd %float %251 %250
+        %253 = OpSampledImage %type_sampled_image %239 %240
+        %254 = OpImageSampleImplicitLod %v4float %253 %238 Bias %216
+        %255 = OpCompositeExtract %float %254 0
+        %256 = OpFAdd %float %237 %255
+        %257 = OpFMul %float %256 %float_0_166666672
+        %258 = OpAccessChain %_ptr_Uniform_float %PrimitiveFade %int_0 %int_0
+        %259 = OpLoad %float %258
+        %260 = OpFMul %float %123 %259
+        %261 = OpAccessChain %_ptr_Uniform_float %PrimitiveFade %int_0 %int_1
+        %262 = OpLoad %float %261
+        %263 = OpFAdd %float %260 %262
+        %264 = OpExtInst %float %1 FClamp %263 %float_0 %float_1
+        %265 = OpFAdd %float %264 %257
+        %266 = OpFAdd %float %265 %float_n0_5
+        %267 = OpFMul %float %252 %266
+        %268 = OpFSub %float %float_1 %211
+        %269 = OpFMul %float %268 %159
+        %270 = OpCompositeExtract %float %212 0
+        %271 = OpCompositeExtract %float %212 1
+        %272 = OpCompositeConstruct %v3float %270 %271 %269
+        %273 = OpDot %float %272 %272
+        %274 = OpExtInst %float %1 Sqrt %273
+        %275 = OpDPdx %v2float %125
+        %276 = OpExtInst %v2float %1 FAbs %275
+        %277 = OpDot %float %276 %276
+        %278 = OpExtInst %float %1 Sqrt %277
+        %279 = OpDPdx %v3float %149
+        %280 = OpDot %float %279 %279
+        %281 = OpExtInst %float %1 Sqrt %280
+        %282 = OpFDiv %float %278 %281
+        %283 = OpDPdy %v2float %125
+        %284 = OpExtInst %v2float %1 FAbs %283
+        %285 = OpDot %float %284 %284
+        %286 = OpExtInst %float %1 Sqrt %285
+        %287 = OpDPdy %v3float %149
+        %288 = OpDot %float %287 %287
+        %289 = OpExtInst %float %1 Sqrt %288
+        %290 = OpFDiv %float %286 %289
+        %291 = OpExtInst %float %1 FMax %282 %290
+        %292 = OpCompositeExtract %v4float %113 0
+        %293 = OpVectorShuffle %v3float %292 %292 0 1 2
+        %294 = OpCompositeExtract %v4float %113 1
+        %295 = OpVectorShuffle %v3float %294 %294 0 1 2
+        %296 = OpCompositeExtract %v4float %113 2
+        %297 = OpVectorShuffle %v3float %296 %296 0 1 2
+        %298 = OpCompositeConstruct %mat3v3float %293 %295 %297
+        %299 = OpMatrixTimesVector %v3float %298 %70
+        %300 = OpDot %float %299 %151
+        %301 = OpExtInst %float %1 FAbs %300
+        %302 = OpFDiv %float %291 %301
+        %303 = OpFDiv %float %274 %302
+        %304 = OpAccessChain %_ptr_Uniform_float %PrimitiveDither %int_0
+        %305 = OpLoad %float %304
+        %306 = OpFOrdNotEqual %bool %305 %float_0
+               OpSelectionMerge %307 None
+               OpBranchConditional %306 %308 %307
+        %308 = OpLabel
+        %309 = OpExtInst %float %1 FAbs %305
+        %310 = OpFOrdGreaterThan %bool %309 %float_0_00100000005
+               OpSelectionMerge %311 None
+               OpBranchConditional %310 %312 %311
+        %312 = OpLabel
+        %313 = OpExtInst %v2float %1 Floor %133
+        %314 = OpDot %float %313 %75
+        %315 = OpExtInst %float %1 Cos %314
+        %316 = OpFMul %float %315 %float_1000
+        %317 = OpExtInst %float %1 Fract %316
+        %318 = OpFOrdLessThan %bool %305 %float_0
+        %319 = OpFAdd %float %305 %float_1
+        %320 = OpFOrdGreaterThan %bool %319 %317
+        %321 = OpFOrdLessThan %bool %305 %317
+        %322 = OpSelect %bool %318 %320 %321
+        %323 = OpSelect %float %322 %float_1 %float_0
+        %324 = OpFSub %float %323 %float_0_00100000005
+        %325 = OpFOrdLessThan %bool %324 %float_0
+               OpSelectionMerge %326 None
+               OpBranchConditional %325 %327 %326
+        %327 = OpLabel
+               OpKill
+        %326 = OpLabel
+               OpBranch %311
+        %311 = OpLabel
+               OpBranch %307
+        %307 = OpLabel
+        %328 = OpFSub %float %267 %float_0_333299994
+        %329 = OpFOrdLessThan %bool %328 %float_0
+               OpSelectionMerge %330 None
+               OpBranchConditional %329 %331 %330
+        %331 = OpLabel
+               OpKill
+        %330 = OpLabel
+        %332 = OpCompositeExtract %float %140 2
+        %333 = OpCompositeExtract %float %140 3
+        %334 = OpFAdd %float %333 %303
+        %335 = OpFDiv %float %332 %334
+        %336 = OpExtInst %float %1 FMin %335 %136
+        %337 = OpVectorShuffle %v2float %107 %107 0 1
+        %338 = OpCompositeExtract %float %107 3
+        %339 = OpCompositeConstruct %v2float %338 %338
+        %340 = OpFDiv %v2float %337 %339
+        %341 = OpVectorShuffle %v2float %119 %119 0 1
+        %342 = OpFSub %v2float %340 %341
+        %343 = OpVectorShuffle %v2float %108 %108 0 1
+        %344 = OpCompositeExtract %float %108 3
+        %345 = OpCompositeConstruct %v2float %344 %344
+        %346 = OpFDiv %v2float %343 %345
+        %347 = OpVectorShuffle %v2float %119 %119 2 3
+        %348 = OpFSub %v2float %346 %347
+        %349 = OpFSub %v2float %342 %348
+        %350 = OpFMul %v2float %349 %54
+        %351 = OpFAdd %v2float %350 %56
+        %352 = OpVectorShuffle %v4float %351 %49 0 1 2 3
+               OpStore %gl_FragDepth %336
+               OpStore %out_var_SV_Target0 %352
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag b/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag
new file mode 100644
index 00000000000..eba220ba4e7
--- /dev/null
+++ b/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag
@@ -0,0 +1,878 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 353
+; Schema: 0
+               OpCapability Shader
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %MainPixelShader "main" %gl_FragCoord %in_var_TEXCOORD6 %in_var_TEXCOORD7 %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_TEXCOORD0 %in_var_PRIMITIVE_ID %gl_FrontFacing %gl_FragDepth %out_var_SV_Target0
+               OpExecutionMode %MainPixelShader OriginUpperLeft
+               OpExecutionMode %MainPixelShader DepthReplacing
+               OpExecutionMode %MainPixelShader DepthLess
+               OpSource HLSL 600
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_ClipToWorld"
+               OpMemberName %type_View 3 "View_TranslatedWorldToView"
+               OpMemberName %type_View 4 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 5 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 7 "View_ViewToClip"
+               OpMemberName %type_View 8 "View_ViewToClipNoAA"
+               OpMemberName %type_View 9 "View_ClipToView"
+               OpMemberName %type_View 10 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 12 "View_ScreenToWorld"
+               OpMemberName %type_View 13 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 14 "View_ViewForward"
+               OpMemberName %type_View 15 "PrePadding_View_908"
+               OpMemberName %type_View 16 "View_ViewUp"
+               OpMemberName %type_View 17 "PrePadding_View_924"
+               OpMemberName %type_View 18 "View_ViewRight"
+               OpMemberName %type_View 19 "PrePadding_View_940"
+               OpMemberName %type_View 20 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 21 "PrePadding_View_956"
+               OpMemberName %type_View 22 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 23 "PrePadding_View_972"
+               OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 25 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 26 "View_WorldCameraOrigin"
+               OpMemberName %type_View 27 "PrePadding_View_1020"
+               OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 29 "PrePadding_View_1036"
+               OpMemberName %type_View 30 "View_WorldViewOrigin"
+               OpMemberName %type_View 31 "PrePadding_View_1052"
+               OpMemberName %type_View 32 "View_PreViewTranslation"
+               OpMemberName %type_View 33 "PrePadding_View_1068"
+               OpMemberName %type_View 34 "View_PrevProjection"
+               OpMemberName %type_View 35 "View_PrevViewProj"
+               OpMemberName %type_View 36 "View_PrevViewRotationProj"
+               OpMemberName %type_View 37 "View_PrevViewToClip"
+               OpMemberName %type_View 38 "View_PrevClipToView"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 40 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 44 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 45 "PrePadding_View_1724"
+               OpMemberName %type_View 46 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 47 "PrePadding_View_1740"
+               OpMemberName %type_View 48 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 49 "PrePadding_View_1756"
+               OpMemberName %type_View 50 "View_PrevInvViewProj"
+               OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 52 "View_ClipToPrevClip"
+               OpMemberName %type_View 53 "View_TemporalAAJitter"
+               OpMemberName %type_View 54 "View_GlobalClippingPlane"
+               OpMemberName %type_View 55 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 57 "View_ViewRectMin"
+               OpMemberName %type_View 58 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 60 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 61 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 62 "View_PreExposure"
+               OpMemberName %type_View 63 "View_OneOverPreExposure"
+               OpMemberName %type_View 64 "PrePadding_View_2076"
+               OpMemberName %type_View 65 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 66 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 67 "View_NormalOverrideParameter"
+               OpMemberName %type_View 68 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 69 "View_PrevFrameGameTime"
+               OpMemberName %type_View 70 "View_PrevFrameRealTime"
+               OpMemberName %type_View 71 "View_OutOfBoundsMask"
+               OpMemberName %type_View 72 "PrePadding_View_2148"
+               OpMemberName %type_View 73 "PrePadding_View_2152"
+               OpMemberName %type_View 74 "PrePadding_View_2156"
+               OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 76 "View_CullingSign"
+               OpMemberName %type_View 77 "View_NearPlane"
+               OpMemberName %type_View 78 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 79 "View_GameTime"
+               OpMemberName %type_View 80 "View_RealTime"
+               OpMemberName %type_View 81 "View_DeltaTime"
+               OpMemberName %type_View 82 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 84 "View_Random"
+               OpMemberName %type_View 85 "View_FrameNumber"
+               OpMemberName %type_View 86 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 87 "View_StateFrameIndex"
+               OpMemberName %type_View 88 "View_CameraCut"
+               OpMemberName %type_View 89 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 90 "PrePadding_View_2228"
+               OpMemberName %type_View 91 "PrePadding_View_2232"
+               OpMemberName %type_View 92 "PrePadding_View_2236"
+               OpMemberName %type_View 93 "View_DirectionalLightColor"
+               OpMemberName %type_View 94 "View_DirectionalLightDirection"
+               OpMemberName %type_View 95 "PrePadding_View_2268"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 98 "View_TemporalAAParams"
+               OpMemberName %type_View 99 "View_CircleDOFParams"
+               OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 102 "View_DepthOfFieldScale"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 109 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 110 "View_DemosaicVposOffset"
+               OpMemberName %type_View 111 "PrePadding_View_2412"
+               OpMemberName %type_View 112 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 113 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 115 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogPower"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 123 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian"
+               OpMemberName %type_View 127 "PrePadding_View_2492"
+               OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance"
+               OpMemberName %type_View 129 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 131 "PrePadding_View_2520"
+               OpMemberName %type_View 132 "PrePadding_View_2524"
+               OpMemberName %type_View 133 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 136 "View_AmbientCubemapTint"
+               OpMemberName %type_View 137 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 138 "View_SkyLightParameters"
+               OpMemberName %type_View 139 "PrePadding_View_2584"
+               OpMemberName %type_View 140 "PrePadding_View_2588"
+               OpMemberName %type_View 141 "View_SkyLightColor"
+               OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 143 "View_MobilePreviewMode"
+               OpMemberName %type_View 144 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 146 "View_ShowDecalsMask"
+               OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 149 "PrePadding_View_2744"
+               OpMemberName %type_View 150 "PrePadding_View_2748"
+               OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 152 "View_StereoPassIndex"
+               OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 155 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 156 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 157 "View_MaxGlobalDistance"
+               OpMemberName %type_View 158 "PrePadding_View_2908"
+               OpMemberName %type_View 159 "View_CursorPosition"
+               OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 161 "PrePadding_View_2924"
+               OpMemberName %type_View 162 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 163 "PrePadding_View_2940"
+               OpMemberName %type_View 164 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 165 "PrePadding_View_2956"
+               OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 167 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 168 "PrePadding_View_2972"
+               OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 170 "PrePadding_View_2988"
+               OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 172 "PrePadding_View_3004"
+               OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 176 "View_StereoIPD"
+               OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle"
+               OpName %View "View"
+               OpName %type_PrimitiveDither "type.PrimitiveDither"
+               OpMemberName %type_PrimitiveDither 0 "PrimitiveDither_LODFactor"
+               OpName %PrimitiveDither "PrimitiveDither"
+               OpName %type_PrimitiveFade "type.PrimitiveFade"
+               OpMemberName %type_PrimitiveFade 0 "PrimitiveFade_FadeTimeScaleBias"
+               OpName %PrimitiveFade "PrimitiveFade"
+               OpName %type_Material "type.Material"
+               OpMemberName %type_Material 0 "Material_VectorExpressions"
+               OpMemberName %type_Material 1 "Material_ScalarExpressions"
+               OpName %Material "Material"
+               OpName %type_2d_image "type.2d.image"
+               OpName %Material_Texture2D_0 "Material_Texture2D_0"
+               OpName %type_sampler "type.sampler"
+               OpName %Material_Texture2D_0Sampler "Material_Texture2D_0Sampler"
+               OpName %Material_Texture2D_3 "Material_Texture2D_3"
+               OpName %Material_Texture2D_3Sampler "Material_Texture2D_3Sampler"
+               OpName %in_var_TEXCOORD6 "in.var.TEXCOORD6"
+               OpName %in_var_TEXCOORD7 "in.var.TEXCOORD7"
+               OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid"
+               OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %in_var_PRIMITIVE_ID "in.var.PRIMITIVE_ID"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %MainPixelShader "MainPixelShader"
+               OpName %type_sampled_image "type.sampled.image"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_Position"
+               OpDecorateString %in_var_TEXCOORD6 UserSemantic "TEXCOORD6"
+               OpDecorateString %in_var_TEXCOORD7 UserSemantic "TEXCOORD7"
+               OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %in_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID"
+               OpDecorate %in_var_PRIMITIVE_ID Flat
+               OpDecorate %gl_FrontFacing BuiltIn FrontFacing
+               OpDecorateString %gl_FrontFacing UserSemantic "SV_IsFrontFace"
+               OpDecorate %gl_FrontFacing Flat
+               OpDecorate %gl_FragDepth BuiltIn FragDepth
+               OpDecorateString %gl_FragDepth UserSemantic "SV_DepthLessEqual"
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %in_var_TEXCOORD6 Location 0
+               OpDecorate %in_var_TEXCOORD7 Location 1
+               OpDecorate %in_var_TEXCOORD10_centroid Location 2
+               OpDecorate %in_var_TEXCOORD11_centroid Location 3
+               OpDecorate %in_var_TEXCOORD0 Location 4
+               OpDecorate %in_var_PRIMITIVE_ID Location 5
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 0
+               OpDecorate %PrimitiveDither DescriptorSet 0
+               OpDecorate %PrimitiveDither Binding 1
+               OpDecorate %PrimitiveFade DescriptorSet 0
+               OpDecorate %PrimitiveFade Binding 2
+               OpDecorate %Material DescriptorSet 0
+               OpDecorate %Material Binding 3
+               OpDecorate %Material_Texture2D_0 DescriptorSet 0
+               OpDecorate %Material_Texture2D_0 Binding 0
+               OpDecorate %Material_Texture2D_0Sampler DescriptorSet 0
+               OpDecorate %Material_Texture2D_0Sampler Binding 0
+               OpDecorate %Material_Texture2D_3 DescriptorSet 0
+               OpDecorate %Material_Texture2D_3 Binding 1
+               OpDecorate %Material_Texture2D_3Sampler DescriptorSet 0
+               OpDecorate %Material_Texture2D_3Sampler Binding 1
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 13 MatrixStride 16
+               OpMemberDecorate %type_View 13 ColMajor
+               OpMemberDecorate %type_View 14 Offset 896
+               OpMemberDecorate %type_View 15 Offset 908
+               OpMemberDecorate %type_View 16 Offset 912
+               OpMemberDecorate %type_View 17 Offset 924
+               OpMemberDecorate %type_View 18 Offset 928
+               OpMemberDecorate %type_View 19 Offset 940
+               OpMemberDecorate %type_View 20 Offset 944
+               OpMemberDecorate %type_View 21 Offset 956
+               OpMemberDecorate %type_View 22 Offset 960
+               OpMemberDecorate %type_View 23 Offset 972
+               OpMemberDecorate %type_View 24 Offset 976
+               OpMemberDecorate %type_View 25 Offset 992
+               OpMemberDecorate %type_View 26 Offset 1008
+               OpMemberDecorate %type_View 27 Offset 1020
+               OpMemberDecorate %type_View 28 Offset 1024
+               OpMemberDecorate %type_View 29 Offset 1036
+               OpMemberDecorate %type_View 30 Offset 1040
+               OpMemberDecorate %type_View 31 Offset 1052
+               OpMemberDecorate %type_View 32 Offset 1056
+               OpMemberDecorate %type_View 33 Offset 1068
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 43 MatrixStride 16
+               OpMemberDecorate %type_View 43 ColMajor
+               OpMemberDecorate %type_View 44 Offset 1712
+               OpMemberDecorate %type_View 45 Offset 1724
+               OpMemberDecorate %type_View 46 Offset 1728
+               OpMemberDecorate %type_View 47 Offset 1740
+               OpMemberDecorate %type_View 48 Offset 1744
+               OpMemberDecorate %type_View 49 Offset 1756
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 52 MatrixStride 16
+               OpMemberDecorate %type_View 52 ColMajor
+               OpMemberDecorate %type_View 53 Offset 1952
+               OpMemberDecorate %type_View 54 Offset 1968
+               OpMemberDecorate %type_View 55 Offset 1984
+               OpMemberDecorate %type_View 56 Offset 1992
+               OpMemberDecorate %type_View 57 Offset 2000
+               OpMemberDecorate %type_View 58 Offset 2016
+               OpMemberDecorate %type_View 59 Offset 2032
+               OpMemberDecorate %type_View 60 Offset 2048
+               OpMemberDecorate %type_View 61 Offset 2064
+               OpMemberDecorate %type_View 62 Offset 2068
+               OpMemberDecorate %type_View 63 Offset 2072
+               OpMemberDecorate %type_View 64 Offset 2076
+               OpMemberDecorate %type_View 65 Offset 2080
+               OpMemberDecorate %type_View 66 Offset 2096
+               OpMemberDecorate %type_View 67 Offset 2112
+               OpMemberDecorate %type_View 68 Offset 2128
+               OpMemberDecorate %type_View 69 Offset 2136
+               OpMemberDecorate %type_View 70 Offset 2140
+               OpMemberDecorate %type_View 71 Offset 2144
+               OpMemberDecorate %type_View 72 Offset 2148
+               OpMemberDecorate %type_View 73 Offset 2152
+               OpMemberDecorate %type_View 74 Offset 2156
+               OpMemberDecorate %type_View 75 Offset 2160
+               OpMemberDecorate %type_View 76 Offset 2172
+               OpMemberDecorate %type_View 77 Offset 2176
+               OpMemberDecorate %type_View 78 Offset 2180
+               OpMemberDecorate %type_View 79 Offset 2184
+               OpMemberDecorate %type_View 80 Offset 2188
+               OpMemberDecorate %type_View 81 Offset 2192
+               OpMemberDecorate %type_View 82 Offset 2196
+               OpMemberDecorate %type_View 83 Offset 2200
+               OpMemberDecorate %type_View 84 Offset 2204
+               OpMemberDecorate %type_View 85 Offset 2208
+               OpMemberDecorate %type_View 86 Offset 2212
+               OpMemberDecorate %type_View 87 Offset 2216
+               OpMemberDecorate %type_View 88 Offset 2220
+               OpMemberDecorate %type_View 89 Offset 2224
+               OpMemberDecorate %type_View 90 Offset 2228
+               OpMemberDecorate %type_View 91 Offset 2232
+               OpMemberDecorate %type_View 92 Offset 2236
+               OpMemberDecorate %type_View 93 Offset 2240
+               OpMemberDecorate %type_View 94 Offset 2256
+               OpMemberDecorate %type_View 95 Offset 2268
+               OpMemberDecorate %type_View 96 Offset 2272
+               OpMemberDecorate %type_View 97 Offset 2304
+               OpMemberDecorate %type_View 98 Offset 2336
+               OpMemberDecorate %type_View 99 Offset 2352
+               OpMemberDecorate %type_View 100 Offset 2368
+               OpMemberDecorate %type_View 101 Offset 2372
+               OpMemberDecorate %type_View 102 Offset 2376
+               OpMemberDecorate %type_View 103 Offset 2380
+               OpMemberDecorate %type_View 104 Offset 2384
+               OpMemberDecorate %type_View 105 Offset 2388
+               OpMemberDecorate %type_View 106 Offset 2392
+               OpMemberDecorate %type_View 107 Offset 2396
+               OpMemberDecorate %type_View 108 Offset 2400
+               OpMemberDecorate %type_View 109 Offset 2404
+               OpMemberDecorate %type_View 110 Offset 2408
+               OpMemberDecorate %type_View 111 Offset 2412
+               OpMemberDecorate %type_View 112 Offset 2416
+               OpMemberDecorate %type_View 113 Offset 2428
+               OpMemberDecorate %type_View 114 Offset 2432
+               OpMemberDecorate %type_View 115 Offset 2444
+               OpMemberDecorate %type_View 116 Offset 2448
+               OpMemberDecorate %type_View 117 Offset 2452
+               OpMemberDecorate %type_View 118 Offset 2456
+               OpMemberDecorate %type_View 119 Offset 2460
+               OpMemberDecorate %type_View 120 Offset 2464
+               OpMemberDecorate %type_View 121 Offset 2468
+               OpMemberDecorate %type_View 122 Offset 2472
+               OpMemberDecorate %type_View 123 Offset 2476
+               OpMemberDecorate %type_View 124 Offset 2480
+               OpMemberDecorate %type_View 125 Offset 2484
+               OpMemberDecorate %type_View 126 Offset 2488
+               OpMemberDecorate %type_View 127 Offset 2492
+               OpMemberDecorate %type_View 128 Offset 2496
+               OpMemberDecorate %type_View 129 Offset 2512
+               OpMemberDecorate %type_View 130 Offset 2516
+               OpMemberDecorate %type_View 131 Offset 2520
+               OpMemberDecorate %type_View 132 Offset 2524
+               OpMemberDecorate %type_View 133 Offset 2528
+               OpMemberDecorate %type_View 134 Offset 2544
+               OpMemberDecorate %type_View 135 Offset 2556
+               OpMemberDecorate %type_View 136 Offset 2560
+               OpMemberDecorate %type_View 137 Offset 2576
+               OpMemberDecorate %type_View 138 Offset 2580
+               OpMemberDecorate %type_View 139 Offset 2584
+               OpMemberDecorate %type_View 140 Offset 2588
+               OpMemberDecorate %type_View 141 Offset 2592
+               OpMemberDecorate %type_View 142 Offset 2608
+               OpMemberDecorate %type_View 143 Offset 2720
+               OpMemberDecorate %type_View 144 Offset 2724
+               OpMemberDecorate %type_View 145 Offset 2728
+               OpMemberDecorate %type_View 146 Offset 2732
+               OpMemberDecorate %type_View 147 Offset 2736
+               OpMemberDecorate %type_View 148 Offset 2740
+               OpMemberDecorate %type_View 149 Offset 2744
+               OpMemberDecorate %type_View 150 Offset 2748
+               OpMemberDecorate %type_View 151 Offset 2752
+               OpMemberDecorate %type_View 152 Offset 2764
+               OpMemberDecorate %type_View 153 Offset 2768
+               OpMemberDecorate %type_View 154 Offset 2832
+               OpMemberDecorate %type_View 155 Offset 2896
+               OpMemberDecorate %type_View 156 Offset 2900
+               OpMemberDecorate %type_View 157 Offset 2904
+               OpMemberDecorate %type_View 158 Offset 2908
+               OpMemberDecorate %type_View 159 Offset 2912
+               OpMemberDecorate %type_View 160 Offset 2920
+               OpMemberDecorate %type_View 161 Offset 2924
+               OpMemberDecorate %type_View 162 Offset 2928
+               OpMemberDecorate %type_View 163 Offset 2940
+               OpMemberDecorate %type_View 164 Offset 2944
+               OpMemberDecorate %type_View 165 Offset 2956
+               OpMemberDecorate %type_View 166 Offset 2960
+               OpMemberDecorate %type_View 167 Offset 2968
+               OpMemberDecorate %type_View 168 Offset 2972
+               OpMemberDecorate %type_View 169 Offset 2976
+               OpMemberDecorate %type_View 170 Offset 2988
+               OpMemberDecorate %type_View 171 Offset 2992
+               OpMemberDecorate %type_View 172 Offset 3004
+               OpMemberDecorate %type_View 173 Offset 3008
+               OpMemberDecorate %type_View 174 Offset 3020
+               OpMemberDecorate %type_View 175 Offset 3024
+               OpMemberDecorate %type_View 176 Offset 3036
+               OpMemberDecorate %type_View 177 Offset 3040
+               OpMemberDecorate %type_View 178 Offset 3044
+               OpDecorate %type_View Block
+               OpMemberDecorate %type_PrimitiveDither 0 Offset 0
+               OpDecorate %type_PrimitiveDither Block
+               OpMemberDecorate %type_PrimitiveFade 0 Offset 0
+               OpDecorate %type_PrimitiveFade Block
+               OpDecorate %_arr_v4float_uint_9 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_3 ArrayStride 16
+               OpMemberDecorate %type_Material 0 Offset 0
+               OpMemberDecorate %type_Material 1 Offset 144
+               OpDecorate %type_Material Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+      %v2int = OpTypeVector %int 2
+%float_0_00100000005 = OpConstant %float 0.00100000005
+      %int_2 = OpConstant %int 2
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+    %float_0 = OpConstant %float 0
+         %49 = OpConstantComposite %v2float %float_0 %float_0
+    %float_1 = OpConstant %float 1
+      %int_4 = OpConstant %int 4
+     %int_11 = OpConstant %int 11
+%float_0_249500006 = OpConstant %float 0.249500006
+         %54 = OpConstantComposite %v2float %float_0_249500006 %float_0_249500006
+%float_0_499992371 = OpConstant %float 0.499992371
+         %56 = OpConstantComposite %v2float %float_0_499992371 %float_0_499992371
+     %int_32 = OpConstant %int 32
+     %int_53 = OpConstant %int 53
+     %int_57 = OpConstant %int 57
+     %int_80 = OpConstant %int 80
+     %int_82 = OpConstant %int 82
+     %int_98 = OpConstant %int 98
+     %uint_1 = OpConstant %uint 1
+%mat3v3float = OpTypeMatrix %v3float 3
+    %float_2 = OpConstant %float 2
+   %float_n1 = OpConstant %float -1
+         %67 = OpConstantComposite %v2float %float_n1 %float_n1
+       %bool = OpTypeBool
+ %float_n0_5 = OpConstant %float -0.5
+         %70 = OpConstantComposite %v3float %float_0 %float_0 %float_1
+%float_0_333299994 = OpConstant %float 0.333299994
+     %uint_5 = OpConstant %uint 5
+%float_347_834503 = OpConstant %float 347.834503
+%float_3343_28369 = OpConstant %float 3343.28369
+         %75 = OpConstantComposite %v2float %float_347_834503 %float_3343_28369
+ %float_1000 = OpConstant %float 1000
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%type_PrimitiveDither = OpTypeStruct %float
+%_ptr_Uniform_type_PrimitiveDither = OpTypePointer Uniform %type_PrimitiveDither
+%type_PrimitiveFade = OpTypeStruct %v2float
+%_ptr_Uniform_type_PrimitiveFade = OpTypePointer Uniform %type_PrimitiveFade
+     %uint_9 = OpConstant %uint 9
+%_arr_v4float_uint_9 = OpTypeArray %v4float %uint_9
+     %uint_3 = OpConstant %uint 3
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%type_Material = OpTypeStruct %_arr_v4float_uint_9 %_arr_v4float_uint_3
+%_ptr_Uniform_type_Material = OpTypePointer Uniform %type_Material
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1
+%_ptr_Input__arr_v4float_uint_1 = OpTypePointer Input %_arr_v4float_uint_1
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Input_bool = OpTypePointer Input %bool
+%_ptr_Output_float = OpTypePointer Output %float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %93 = OpTypeFunction %void
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%type_sampled_image = OpTypeSampledImage %type_2d_image
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+%PrimitiveDither = OpVariable %_ptr_Uniform_type_PrimitiveDither Uniform
+%PrimitiveFade = OpVariable %_ptr_Uniform_type_PrimitiveFade Uniform
+   %Material = OpVariable %_ptr_Uniform_type_Material Uniform
+%Material_Texture2D_0 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%Material_Texture2D_0Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%Material_Texture2D_3 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%Material_Texture2D_3Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD6 = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD7 = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input__arr_v4float_uint_1 Input
+%in_var_PRIMITIVE_ID = OpVariable %_ptr_Input_uint Input
+%gl_FrontFacing = OpVariable %_ptr_Input_bool Input
+%gl_FragDepth = OpVariable %_ptr_Output_float Output
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+         %98 = OpUndef %float
+         %99 = OpConstantNull %v2float
+%float_0_015625 = OpConstant %float 0.015625
+        %101 = OpConstantComposite %v2float %float_0_015625 %float_0_015625
+%float_0_166666672 = OpConstant %float 0.166666672
+        %103 = OpUndef %float
+        %104 = OpConstantNull %v3float
+%MainPixelShader = OpFunction %void None %93
+        %105 = OpLabel
+        %106 = OpLoad %v4float %gl_FragCoord
+        %107 = OpLoad %v4float %in_var_TEXCOORD6
+        %108 = OpLoad %v4float %in_var_TEXCOORD7
+        %109 = OpLoad %v4float %in_var_TEXCOORD10_centroid
+        %110 = OpLoad %v4float %in_var_TEXCOORD11_centroid
+        %111 = OpLoad %_arr_v4float_uint_1 %in_var_TEXCOORD0
+        %112 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_4
+        %113 = OpLoad %mat4v4float %112
+        %114 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_11
+        %115 = OpLoad %mat4v4float %114
+        %116 = OpAccessChain %_ptr_Uniform_v3float %View %int_32
+        %117 = OpLoad %v3float %116
+        %118 = OpAccessChain %_ptr_Uniform_v4float %View %int_53
+        %119 = OpLoad %v4float %118
+        %120 = OpAccessChain %_ptr_Uniform_v4float %View %int_57
+        %121 = OpLoad %v4float %120
+        %122 = OpAccessChain %_ptr_Uniform_float %View %int_80
+        %123 = OpLoad %float %122
+        %124 = OpCompositeExtract %v4float %111 0
+        %125 = OpVectorShuffle %v2float %99 %124 2 3
+        %126 = OpVectorShuffle %v3float %109 %109 0 1 2
+        %127 = OpVectorShuffle %v3float %110 %110 0 1 2
+        %128 = OpExtInst %v3float %1 Cross %127 %126
+        %129 = OpCompositeExtract %float %110 3
+        %130 = OpCompositeConstruct %v3float %129 %129 %129
+        %131 = OpFMul %v3float %128 %130
+        %132 = OpCompositeConstruct %mat3v3float %126 %131 %127
+        %133 = OpVectorShuffle %v2float %106 %106 0 1
+        %134 = OpVectorShuffle %v2float %121 %121 0 1
+        %135 = OpFSub %v2float %133 %134
+        %136 = OpCompositeExtract %float %106 2
+        %137 = OpCompositeConstruct %v4float %103 %103 %136 %float_1
+        %138 = OpCompositeExtract %float %106 3
+        %139 = OpCompositeConstruct %v4float %138 %138 %138 %138
+        %140 = OpFMul %v4float %137 %139
+        %141 = OpCompositeExtract %float %106 0
+        %142 = OpCompositeExtract %float %106 1
+        %143 = OpCompositeConstruct %v4float %141 %142 %136 %float_1
+        %144 = OpMatrixTimesVector %v4float %115 %143
+        %145 = OpVectorShuffle %v3float %144 %144 0 1 2
+        %146 = OpCompositeExtract %float %144 3
+        %147 = OpCompositeConstruct %v3float %146 %146 %146
+        %148 = OpFDiv %v3float %145 %147
+        %149 = OpFSub %v3float %148 %117
+        %150 = OpFNegate %v3float %148
+        %151 = OpExtInst %v3float %1 Normalize %150
+        %152 = OpVectorTimesMatrix %v3float %151 %132
+        %153 = OpVectorShuffle %v2float %152 %152 0 1
+        %154 = OpFMul %v2float %153 %67
+        %155 = OpCompositeExtract %float %152 2
+        %156 = OpCompositeConstruct %v2float %155 %155
+        %157 = OpFDiv %v2float %154 %156
+        %158 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_0
+        %159 = OpLoad %float %158
+        %160 = OpCompositeConstruct %v2float %159 %159
+        %161 = OpFMul %v2float %160 %157
+        %162 = OpDot %float %151 %127
+        %163 = OpExtInst %float %1 FAbs %162
+        %164 = OpExtInst %float %1 FMax %163 %float_0
+        %165 = OpExtInst %float %1 FMin %164 %float_1
+        %166 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_1
+        %167 = OpLoad %float %166
+        %168 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_2
+        %169 = OpLoad %float %168
+        %170 = OpExtInst %float %1 FMix %167 %169 %165
+        %171 = OpExtInst %float %1 Floor %170
+        %172 = OpFDiv %float %float_1 %170
+        %173 = OpCompositeConstruct %v2float %172 %172
+        %174 = OpFMul %v2float %161 %173
+        %175 = OpDPdx %v2float %125
+        %176 = OpDPdy %v2float %125
+        %177 = OpLoad %type_2d_image %Material_Texture2D_0
+        %178 = OpLoad %type_sampler %Material_Texture2D_0Sampler
+               OpBranch %179
+        %179 = OpLabel
+        %180 = OpPhi %float %float_1 %105 %181 %182
+        %183 = OpPhi %v2float %49 %105 %184 %182
+        %185 = OpPhi %int %int_0 %105 %186 %182
+        %187 = OpPhi %float %float_1 %105 %188 %182
+        %189 = OpPhi %float %float_1 %105 %180 %182
+        %190 = OpConvertSToF %float %185
+        %191 = OpFAdd %float %171 %float_2
+        %192 = OpFOrdLessThan %bool %190 %191
+               OpLoopMerge %193 %182 None
+               OpBranchConditional %192 %194 %193
+        %194 = OpLabel
+        %195 = OpFAdd %v2float %125 %183
+        %196 = OpSampledImage %type_sampled_image %177 %178
+        %197 = OpImageSampleExplicitLod %v4float %196 %195 Grad %175 %176
+        %188 = OpCompositeExtract %float %197 1
+        %198 = OpFOrdLessThan %bool %180 %188
+               OpSelectionMerge %182 None
+               OpBranchConditional %198 %199 %182
+        %199 = OpLabel
+        %200 = OpFSub %float %189 %187
+        %201 = OpFSub %float %188 %180
+        %202 = OpFAdd %float %200 %201
+        %203 = OpFDiv %float %201 %202
+        %204 = OpFMul %float %189 %203
+        %205 = OpFSub %float %float_1 %203
+        %206 = OpFMul %float %180 %205
+        %207 = OpFAdd %float %204 %206
+        %208 = OpCompositeConstruct %v2float %203 %203
+        %209 = OpFMul %v2float %208 %174
+        %210 = OpFSub %v2float %183 %209
+               OpBranch %193
+        %182 = OpLabel
+        %181 = OpFSub %float %180 %172
+        %184 = OpFAdd %v2float %183 %174
+        %186 = OpIAdd %int %185 %int_1
+               OpBranch %179
+        %193 = OpLabel
+        %211 = OpPhi %float %98 %179 %207 %199
+        %212 = OpPhi %v2float %183 %179 %210 %199
+        %213 = OpVectorShuffle %v2float %212 %104 0 1
+        %214 = OpFAdd %v2float %125 %213
+        %215 = OpAccessChain %_ptr_Uniform_float %View %int_82
+        %216 = OpLoad %float %215
+        %217 = OpSampledImage %type_sampled_image %177 %178
+        %218 = OpImageSampleImplicitLod %v4float %217 %214 Bias %216
+        %219 = OpCompositeExtract %float %218 0
+        %220 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_2 %int_1
+        %221 = OpLoad %float %220
+        %222 = OpFMul %float %219 %221
+        %223 = OpFSub %float %float_1 %222
+        %224 = OpExtInst %float %1 FMax %223 %float_0
+        %225 = OpExtInst %float %1 FMin %224 %float_1
+        %226 = OpAccessChain %_ptr_Uniform_float %View %int_98 %int_0
+        %227 = OpLoad %float %226
+        %228 = OpCompositeConstruct %v2float %227 %227
+        %229 = OpFAdd %v2float %135 %228
+        %230 = OpCompositeExtract %float %229 0
+        %231 = OpConvertFToU %uint %230
+        %232 = OpCompositeExtract %float %229 1
+        %233 = OpConvertFToU %uint %232
+        %234 = OpIMul %uint %uint_2 %233
+        %235 = OpIAdd %uint %231 %234
+        %236 = OpUMod %uint %235 %uint_5
+        %237 = OpConvertUToF %float %236
+        %238 = OpFMul %v2float %135 %101
+        %239 = OpLoad %type_2d_image %Material_Texture2D_3
+        %240 = OpLoad %type_sampler %Material_Texture2D_3Sampler
+        %241 = OpSampledImage %type_sampled_image %239 %240
+        %242 = OpImageSampleImplicitLod %v4float %241 %238 Bias %216
+        %243 = OpCompositeExtract %float %242 0
+        %244 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_2 %int_2
+        %245 = OpLoad %float %244
+        %246 = OpFMul %float %243 %245
+        %247 = OpFAdd %float %237 %246
+        %248 = OpFMul %float %247 %float_0_166666672
+        %249 = OpFAdd %float %225 %248
+        %250 = OpFAdd %float %249 %float_n0_5
+        %251 = OpCompositeExtract %float %218 2
+        %252 = OpFAdd %float %251 %250
+        %253 = OpSampledImage %type_sampled_image %239 %240
+        %254 = OpImageSampleImplicitLod %v4float %253 %238 Bias %216
+        %255 = OpCompositeExtract %float %254 0
+        %256 = OpFAdd %float %237 %255
+        %257 = OpFMul %float %256 %float_0_166666672
+        %258 = OpAccessChain %_ptr_Uniform_float %PrimitiveFade %int_0 %int_0
+        %259 = OpLoad %float %258
+        %260 = OpFMul %float %123 %259
+        %261 = OpAccessChain %_ptr_Uniform_float %PrimitiveFade %int_0 %int_1
+        %262 = OpLoad %float %261
+        %263 = OpFAdd %float %260 %262
+        %264 = OpExtInst %float %1 FClamp %263 %float_0 %float_1
+        %265 = OpFAdd %float %264 %257
+        %266 = OpFAdd %float %265 %float_n0_5
+        %267 = OpFMul %float %252 %266
+        %268 = OpFSub %float %float_1 %211
+        %269 = OpFMul %float %268 %159
+        %270 = OpCompositeExtract %float %212 0
+        %271 = OpCompositeExtract %float %212 1
+        %272 = OpCompositeConstruct %v3float %270 %271 %269
+        %273 = OpDot %float %272 %272
+        %274 = OpExtInst %float %1 Sqrt %273
+        %275 = OpDPdx %v2float %125
+        %276 = OpExtInst %v2float %1 FAbs %275
+        %277 = OpDot %float %276 %276
+        %278 = OpExtInst %float %1 Sqrt %277
+        %279 = OpDPdx %v3float %149
+        %280 = OpDot %float %279 %279
+        %281 = OpExtInst %float %1 Sqrt %280
+        %282 = OpFDiv %float %278 %281
+        %283 = OpDPdy %v2float %125
+        %284 = OpExtInst %v2float %1 FAbs %283
+        %285 = OpDot %float %284 %284
+        %286 = OpExtInst %float %1 Sqrt %285
+        %287 = OpDPdy %v3float %149
+        %288 = OpDot %float %287 %287
+        %289 = OpExtInst %float %1 Sqrt %288
+        %290 = OpFDiv %float %286 %289
+        %291 = OpExtInst %float %1 FMax %282 %290
+        %292 = OpCompositeExtract %v4float %113 0
+        %293 = OpVectorShuffle %v3float %292 %292 0 1 2
+        %294 = OpCompositeExtract %v4float %113 1
+        %295 = OpVectorShuffle %v3float %294 %294 0 1 2
+        %296 = OpCompositeExtract %v4float %113 2
+        %297 = OpVectorShuffle %v3float %296 %296 0 1 2
+        %298 = OpCompositeConstruct %mat3v3float %293 %295 %297
+        %299 = OpMatrixTimesVector %v3float %298 %70
+        %300 = OpDot %float %299 %151
+        %301 = OpExtInst %float %1 FAbs %300
+        %302 = OpFDiv %float %291 %301
+        %303 = OpFDiv %float %274 %302
+        %304 = OpAccessChain %_ptr_Uniform_float %PrimitiveDither %int_0
+        %305 = OpLoad %float %304
+        %306 = OpFOrdNotEqual %bool %305 %float_0
+               OpSelectionMerge %307 None
+               OpBranchConditional %306 %308 %307
+        %308 = OpLabel
+        %309 = OpExtInst %float %1 FAbs %305
+        %310 = OpFOrdGreaterThan %bool %309 %float_0_00100000005
+               OpSelectionMerge %311 None
+               OpBranchConditional %310 %312 %311
+        %312 = OpLabel
+        %313 = OpExtInst %v2float %1 Floor %133
+        %314 = OpDot %float %313 %75
+        %315 = OpExtInst %float %1 Cos %314
+        %316 = OpFMul %float %315 %float_1000
+        %317 = OpExtInst %float %1 Fract %316
+        %318 = OpFOrdLessThan %bool %305 %float_0
+        %319 = OpFAdd %float %305 %float_1
+        %320 = OpFOrdGreaterThan %bool %319 %317
+        %321 = OpFOrdLessThan %bool %305 %317
+        %322 = OpSelect %bool %318 %320 %321
+        %323 = OpSelect %float %322 %float_1 %float_0
+        %324 = OpFSub %float %323 %float_0_00100000005
+        %325 = OpFOrdLessThan %bool %324 %float_0
+               OpSelectionMerge %326 None
+               OpBranchConditional %325 %327 %326
+        %327 = OpLabel
+               OpKill
+        %326 = OpLabel
+               OpBranch %311
+        %311 = OpLabel
+               OpBranch %307
+        %307 = OpLabel
+        %328 = OpFSub %float %267 %float_0_333299994
+        %329 = OpFOrdLessThan %bool %328 %float_0
+               OpSelectionMerge %330 None
+               OpBranchConditional %329 %331 %330
+        %331 = OpLabel
+               OpKill
+        %330 = OpLabel
+        %332 = OpCompositeExtract %float %140 2
+        %333 = OpCompositeExtract %float %140 3
+        %334 = OpFAdd %float %333 %303
+        %335 = OpFDiv %float %332 %334
+        %336 = OpExtInst %float %1 FMin %335 %136
+        %337 = OpVectorShuffle %v2float %107 %107 0 1
+        %338 = OpCompositeExtract %float %107 3
+        %339 = OpCompositeConstruct %v2float %338 %338
+        %340 = OpFDiv %v2float %337 %339
+        %341 = OpVectorShuffle %v2float %119 %119 0 1
+        %342 = OpFSub %v2float %340 %341
+        %343 = OpVectorShuffle %v2float %108 %108 0 1
+        %344 = OpCompositeExtract %float %108 3
+        %345 = OpCompositeConstruct %v2float %344 %344
+        %346 = OpFDiv %v2float %343 %345
+        %347 = OpVectorShuffle %v2float %119 %119 2 3
+        %348 = OpFSub %v2float %346 %347
+        %349 = OpFSub %v2float %342 %348
+        %350 = OpFMul %v2float %349 %54
+        %351 = OpFAdd %v2float %350 %56
+        %352 = OpVectorShuffle %v4float %351 %49 0 1 2 3
+               OpStore %gl_FragDepth %336
+               OpStore %out_var_SV_Target0 %352
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4-no-opt/asm/tese/ds-texcoord-array.asm.tese b/shaders-ue4-no-opt/asm/tese/ds-texcoord-array.asm.tese
new file mode 100644
index 00000000000..778e93d39a3
--- /dev/null
+++ b/shaders-ue4-no-opt/asm/tese/ds-texcoord-array.asm.tese
@@ -0,0 +1,715 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 183
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability SampledBuffer
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationEvaluation %MainDomain "main" %gl_TessLevelOuter %gl_TessLevelInner %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_TEXCOORD0 %in_var_COLOR1 %in_var_COLOR2 %in_var_VS_To_DS_Position %in_var_TEXCOORD7 %in_var_Flat_DisplacementScales %in_var_Flat_TessellationMultiplier %in_var_Flat_WorldDisplacementMultiplier %gl_TessCoord %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_TEXCOORD0 %out_var_COLOR1 %out_var_COLOR2 %out_var_TEXCOORD6 %out_var_TEXCOORD7 %gl_Position
+               OpExecutionMode %MainDomain Triangles
+               OpExecutionMode %MainDomain SpacingFractionalOdd
+               OpExecutionMode %MainDomain VertexOrderCw
+               OpSource HLSL 600
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_ClipToWorld"
+               OpMemberName %type_View 3 "View_TranslatedWorldToView"
+               OpMemberName %type_View 4 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 5 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 7 "View_ViewToClip"
+               OpMemberName %type_View 8 "View_ViewToClipNoAA"
+               OpMemberName %type_View 9 "View_ClipToView"
+               OpMemberName %type_View 10 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 12 "View_ScreenToWorld"
+               OpMemberName %type_View 13 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 14 "View_ViewForward"
+               OpMemberName %type_View 15 "PrePadding_View_908"
+               OpMemberName %type_View 16 "View_ViewUp"
+               OpMemberName %type_View 17 "PrePadding_View_924"
+               OpMemberName %type_View 18 "View_ViewRight"
+               OpMemberName %type_View 19 "PrePadding_View_940"
+               OpMemberName %type_View 20 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 21 "PrePadding_View_956"
+               OpMemberName %type_View 22 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 23 "PrePadding_View_972"
+               OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 25 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 26 "View_WorldCameraOrigin"
+               OpMemberName %type_View 27 "PrePadding_View_1020"
+               OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 29 "PrePadding_View_1036"
+               OpMemberName %type_View 30 "View_WorldViewOrigin"
+               OpMemberName %type_View 31 "PrePadding_View_1052"
+               OpMemberName %type_View 32 "View_PreViewTranslation"
+               OpMemberName %type_View 33 "PrePadding_View_1068"
+               OpMemberName %type_View 34 "View_PrevProjection"
+               OpMemberName %type_View 35 "View_PrevViewProj"
+               OpMemberName %type_View 36 "View_PrevViewRotationProj"
+               OpMemberName %type_View 37 "View_PrevViewToClip"
+               OpMemberName %type_View 38 "View_PrevClipToView"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 40 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 44 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 45 "PrePadding_View_1724"
+               OpMemberName %type_View 46 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 47 "PrePadding_View_1740"
+               OpMemberName %type_View 48 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 49 "PrePadding_View_1756"
+               OpMemberName %type_View 50 "View_PrevInvViewProj"
+               OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 52 "View_ClipToPrevClip"
+               OpMemberName %type_View 53 "View_TemporalAAJitter"
+               OpMemberName %type_View 54 "View_GlobalClippingPlane"
+               OpMemberName %type_View 55 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 57 "View_ViewRectMin"
+               OpMemberName %type_View 58 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 60 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 61 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 62 "View_PreExposure"
+               OpMemberName %type_View 63 "View_OneOverPreExposure"
+               OpMemberName %type_View 64 "PrePadding_View_2076"
+               OpMemberName %type_View 65 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 66 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 67 "View_NormalOverrideParameter"
+               OpMemberName %type_View 68 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 69 "View_PrevFrameGameTime"
+               OpMemberName %type_View 70 "View_PrevFrameRealTime"
+               OpMemberName %type_View 71 "View_OutOfBoundsMask"
+               OpMemberName %type_View 72 "PrePadding_View_2148"
+               OpMemberName %type_View 73 "PrePadding_View_2152"
+               OpMemberName %type_View 74 "PrePadding_View_2156"
+               OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 76 "View_CullingSign"
+               OpMemberName %type_View 77 "View_NearPlane"
+               OpMemberName %type_View 78 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 79 "View_GameTime"
+               OpMemberName %type_View 80 "View_RealTime"
+               OpMemberName %type_View 81 "View_DeltaTime"
+               OpMemberName %type_View 82 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 84 "View_Random"
+               OpMemberName %type_View 85 "View_FrameNumber"
+               OpMemberName %type_View 86 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 87 "View_StateFrameIndex"
+               OpMemberName %type_View 88 "View_CameraCut"
+               OpMemberName %type_View 89 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 90 "PrePadding_View_2228"
+               OpMemberName %type_View 91 "PrePadding_View_2232"
+               OpMemberName %type_View 92 "PrePadding_View_2236"
+               OpMemberName %type_View 93 "View_DirectionalLightColor"
+               OpMemberName %type_View 94 "View_DirectionalLightDirection"
+               OpMemberName %type_View 95 "PrePadding_View_2268"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 98 "View_TemporalAAParams"
+               OpMemberName %type_View 99 "View_CircleDOFParams"
+               OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 102 "View_DepthOfFieldScale"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 109 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 110 "View_DemosaicVposOffset"
+               OpMemberName %type_View 111 "PrePadding_View_2412"
+               OpMemberName %type_View 112 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 113 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 115 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogPower"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 123 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian"
+               OpMemberName %type_View 127 "PrePadding_View_2492"
+               OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance"
+               OpMemberName %type_View 129 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 131 "PrePadding_View_2520"
+               OpMemberName %type_View 132 "PrePadding_View_2524"
+               OpMemberName %type_View 133 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 136 "View_AmbientCubemapTint"
+               OpMemberName %type_View 137 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 138 "View_SkyLightParameters"
+               OpMemberName %type_View 139 "PrePadding_View_2584"
+               OpMemberName %type_View 140 "PrePadding_View_2588"
+               OpMemberName %type_View 141 "View_SkyLightColor"
+               OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 143 "View_MobilePreviewMode"
+               OpMemberName %type_View 144 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 146 "View_ShowDecalsMask"
+               OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 149 "PrePadding_View_2744"
+               OpMemberName %type_View 150 "PrePadding_View_2748"
+               OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 152 "View_StereoPassIndex"
+               OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 155 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 156 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 157 "View_MaxGlobalDistance"
+               OpMemberName %type_View 158 "PrePadding_View_2908"
+               OpMemberName %type_View 159 "View_CursorPosition"
+               OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 161 "PrePadding_View_2924"
+               OpMemberName %type_View 162 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 163 "PrePadding_View_2940"
+               OpMemberName %type_View 164 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 165 "PrePadding_View_2956"
+               OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 167 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 168 "PrePadding_View_2972"
+               OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 170 "PrePadding_View_2988"
+               OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 172 "PrePadding_View_3004"
+               OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 176 "View_StereoIPD"
+               OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle"
+               OpMemberName %type_View 179 "PrePadding_View_3048"
+               OpMemberName %type_View 180 "PrePadding_View_3052"
+               OpMemberName %type_View 181 "View_WorldToVirtualTexture"
+               OpMemberName %type_View 182 "View_VirtualTextureParams"
+               OpMemberName %type_View 183 "View_XRPassthroughCameraUVs"
+               OpName %View "View"
+               OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid"
+               OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %in_var_COLOR1 "in.var.COLOR1"
+               OpName %in_var_COLOR2 "in.var.COLOR2"
+               OpName %in_var_VS_To_DS_Position "in.var.VS_To_DS_Position"
+               OpName %in_var_TEXCOORD7 "in.var.TEXCOORD7"
+               OpName %in_var_Flat_DisplacementScales "in.var.Flat_DisplacementScales"
+               OpName %in_var_Flat_TessellationMultiplier "in.var.Flat_TessellationMultiplier"
+               OpName %in_var_Flat_WorldDisplacementMultiplier "in.var.Flat_WorldDisplacementMultiplier"
+               OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid"
+               OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid"
+               OpName %out_var_TEXCOORD0 "out.var.TEXCOORD0"
+               OpName %out_var_COLOR1 "out.var.COLOR1"
+               OpName %out_var_COLOR2 "out.var.COLOR2"
+               OpName %out_var_TEXCOORD6 "out.var.TEXCOORD6"
+               OpName %out_var_TEXCOORD7 "out.var.TEXCOORD7"
+               OpName %MainDomain "MainDomain"
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+               OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor"
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor"
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %in_var_COLOR1 UserSemantic "COLOR1"
+               OpDecorateString %in_var_COLOR2 UserSemantic "COLOR2"
+               OpDecorateString %in_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position"
+               OpDecorateString %in_var_TEXCOORD7 UserSemantic "TEXCOORD7"
+               OpDecorateString %in_var_Flat_DisplacementScales UserSemantic "Flat_DisplacementScales"
+               OpDecorateString %in_var_Flat_TessellationMultiplier UserSemantic "Flat_TessellationMultiplier"
+               OpDecorateString %in_var_Flat_WorldDisplacementMultiplier UserSemantic "Flat_WorldDisplacementMultiplier"
+               OpDecorate %gl_TessCoord BuiltIn TessCoord
+               OpDecorateString %gl_TessCoord UserSemantic "SV_DomainLocation"
+               OpDecorate %gl_TessCoord Patch
+               OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %out_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %out_var_COLOR1 UserSemantic "COLOR1"
+               OpDecorateString %out_var_COLOR2 UserSemantic "COLOR2"
+               OpDecorateString %out_var_TEXCOORD6 UserSemantic "TEXCOORD6"
+               OpDecorateString %out_var_TEXCOORD7 UserSemantic "TEXCOORD7"
+               OpDecorate %gl_Position BuiltIn Position
+               OpDecorateString %gl_Position UserSemantic "SV_POSITION"
+               OpDecorate %in_var_COLOR1 Location 0
+               OpDecorate %in_var_COLOR2 Location 1
+               OpDecorate %in_var_Flat_DisplacementScales Location 2
+               OpDecorate %in_var_Flat_TessellationMultiplier Location 3
+               OpDecorate %in_var_Flat_WorldDisplacementMultiplier Location 4
+               OpDecorate %in_var_TEXCOORD0 Location 5
+               OpDecorate %in_var_TEXCOORD10_centroid Location 6
+               OpDecorate %in_var_TEXCOORD11_centroid Location 7
+               OpDecorate %in_var_TEXCOORD7 Location 8
+               OpDecorate %in_var_VS_To_DS_Position Location 9
+               OpDecorate %out_var_TEXCOORD10_centroid Location 0
+               OpDecorate %out_var_TEXCOORD11_centroid Location 1
+               OpDecorate %out_var_TEXCOORD0 Location 2
+               OpDecorate %out_var_COLOR1 Location 3
+               OpDecorate %out_var_COLOR2 Location 4
+               OpDecorate %out_var_TEXCOORD6 Location 5
+               OpDecorate %out_var_TEXCOORD7 Location 6
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 0
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 13 MatrixStride 16
+               OpMemberDecorate %type_View 13 ColMajor
+               OpMemberDecorate %type_View 14 Offset 896
+               OpMemberDecorate %type_View 15 Offset 908
+               OpMemberDecorate %type_View 16 Offset 912
+               OpMemberDecorate %type_View 17 Offset 924
+               OpMemberDecorate %type_View 18 Offset 928
+               OpMemberDecorate %type_View 19 Offset 940
+               OpMemberDecorate %type_View 20 Offset 944
+               OpMemberDecorate %type_View 21 Offset 956
+               OpMemberDecorate %type_View 22 Offset 960
+               OpMemberDecorate %type_View 23 Offset 972
+               OpMemberDecorate %type_View 24 Offset 976
+               OpMemberDecorate %type_View 25 Offset 992
+               OpMemberDecorate %type_View 26 Offset 1008
+               OpMemberDecorate %type_View 27 Offset 1020
+               OpMemberDecorate %type_View 28 Offset 1024
+               OpMemberDecorate %type_View 29 Offset 1036
+               OpMemberDecorate %type_View 30 Offset 1040
+               OpMemberDecorate %type_View 31 Offset 1052
+               OpMemberDecorate %type_View 32 Offset 1056
+               OpMemberDecorate %type_View 33 Offset 1068
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 43 MatrixStride 16
+               OpMemberDecorate %type_View 43 ColMajor
+               OpMemberDecorate %type_View 44 Offset 1712
+               OpMemberDecorate %type_View 45 Offset 1724
+               OpMemberDecorate %type_View 46 Offset 1728
+               OpMemberDecorate %type_View 47 Offset 1740
+               OpMemberDecorate %type_View 48 Offset 1744
+               OpMemberDecorate %type_View 49 Offset 1756
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 52 MatrixStride 16
+               OpMemberDecorate %type_View 52 ColMajor
+               OpMemberDecorate %type_View 53 Offset 1952
+               OpMemberDecorate %type_View 54 Offset 1968
+               OpMemberDecorate %type_View 55 Offset 1984
+               OpMemberDecorate %type_View 56 Offset 1992
+               OpMemberDecorate %type_View 57 Offset 2000
+               OpMemberDecorate %type_View 58 Offset 2016
+               OpMemberDecorate %type_View 59 Offset 2032
+               OpMemberDecorate %type_View 60 Offset 2048
+               OpMemberDecorate %type_View 61 Offset 2064
+               OpMemberDecorate %type_View 62 Offset 2068
+               OpMemberDecorate %type_View 63 Offset 2072
+               OpMemberDecorate %type_View 64 Offset 2076
+               OpMemberDecorate %type_View 65 Offset 2080
+               OpMemberDecorate %type_View 66 Offset 2096
+               OpMemberDecorate %type_View 67 Offset 2112
+               OpMemberDecorate %type_View 68 Offset 2128
+               OpMemberDecorate %type_View 69 Offset 2136
+               OpMemberDecorate %type_View 70 Offset 2140
+               OpMemberDecorate %type_View 71 Offset 2144
+               OpMemberDecorate %type_View 72 Offset 2148
+               OpMemberDecorate %type_View 73 Offset 2152
+               OpMemberDecorate %type_View 74 Offset 2156
+               OpMemberDecorate %type_View 75 Offset 2160
+               OpMemberDecorate %type_View 76 Offset 2172
+               OpMemberDecorate %type_View 77 Offset 2176
+               OpMemberDecorate %type_View 78 Offset 2180
+               OpMemberDecorate %type_View 79 Offset 2184
+               OpMemberDecorate %type_View 80 Offset 2188
+               OpMemberDecorate %type_View 81 Offset 2192
+               OpMemberDecorate %type_View 82 Offset 2196
+               OpMemberDecorate %type_View 83 Offset 2200
+               OpMemberDecorate %type_View 84 Offset 2204
+               OpMemberDecorate %type_View 85 Offset 2208
+               OpMemberDecorate %type_View 86 Offset 2212
+               OpMemberDecorate %type_View 87 Offset 2216
+               OpMemberDecorate %type_View 88 Offset 2220
+               OpMemberDecorate %type_View 89 Offset 2224
+               OpMemberDecorate %type_View 90 Offset 2228
+               OpMemberDecorate %type_View 91 Offset 2232
+               OpMemberDecorate %type_View 92 Offset 2236
+               OpMemberDecorate %type_View 93 Offset 2240
+               OpMemberDecorate %type_View 94 Offset 2256
+               OpMemberDecorate %type_View 95 Offset 2268
+               OpMemberDecorate %type_View 96 Offset 2272
+               OpMemberDecorate %type_View 97 Offset 2304
+               OpMemberDecorate %type_View 98 Offset 2336
+               OpMemberDecorate %type_View 99 Offset 2352
+               OpMemberDecorate %type_View 100 Offset 2368
+               OpMemberDecorate %type_View 101 Offset 2372
+               OpMemberDecorate %type_View 102 Offset 2376
+               OpMemberDecorate %type_View 103 Offset 2380
+               OpMemberDecorate %type_View 104 Offset 2384
+               OpMemberDecorate %type_View 105 Offset 2388
+               OpMemberDecorate %type_View 106 Offset 2392
+               OpMemberDecorate %type_View 107 Offset 2396
+               OpMemberDecorate %type_View 108 Offset 2400
+               OpMemberDecorate %type_View 109 Offset 2404
+               OpMemberDecorate %type_View 110 Offset 2408
+               OpMemberDecorate %type_View 111 Offset 2412
+               OpMemberDecorate %type_View 112 Offset 2416
+               OpMemberDecorate %type_View 113 Offset 2428
+               OpMemberDecorate %type_View 114 Offset 2432
+               OpMemberDecorate %type_View 115 Offset 2444
+               OpMemberDecorate %type_View 116 Offset 2448
+               OpMemberDecorate %type_View 117 Offset 2452
+               OpMemberDecorate %type_View 118 Offset 2456
+               OpMemberDecorate %type_View 119 Offset 2460
+               OpMemberDecorate %type_View 120 Offset 2464
+               OpMemberDecorate %type_View 121 Offset 2468
+               OpMemberDecorate %type_View 122 Offset 2472
+               OpMemberDecorate %type_View 123 Offset 2476
+               OpMemberDecorate %type_View 124 Offset 2480
+               OpMemberDecorate %type_View 125 Offset 2484
+               OpMemberDecorate %type_View 126 Offset 2488
+               OpMemberDecorate %type_View 127 Offset 2492
+               OpMemberDecorate %type_View 128 Offset 2496
+               OpMemberDecorate %type_View 129 Offset 2512
+               OpMemberDecorate %type_View 130 Offset 2516
+               OpMemberDecorate %type_View 131 Offset 2520
+               OpMemberDecorate %type_View 132 Offset 2524
+               OpMemberDecorate %type_View 133 Offset 2528
+               OpMemberDecorate %type_View 134 Offset 2544
+               OpMemberDecorate %type_View 135 Offset 2556
+               OpMemberDecorate %type_View 136 Offset 2560
+               OpMemberDecorate %type_View 137 Offset 2576
+               OpMemberDecorate %type_View 138 Offset 2580
+               OpMemberDecorate %type_View 139 Offset 2584
+               OpMemberDecorate %type_View 140 Offset 2588
+               OpMemberDecorate %type_View 141 Offset 2592
+               OpMemberDecorate %type_View 142 Offset 2608
+               OpMemberDecorate %type_View 143 Offset 2720
+               OpMemberDecorate %type_View 144 Offset 2724
+               OpMemberDecorate %type_View 145 Offset 2728
+               OpMemberDecorate %type_View 146 Offset 2732
+               OpMemberDecorate %type_View 147 Offset 2736
+               OpMemberDecorate %type_View 148 Offset 2740
+               OpMemberDecorate %type_View 149 Offset 2744
+               OpMemberDecorate %type_View 150 Offset 2748
+               OpMemberDecorate %type_View 151 Offset 2752
+               OpMemberDecorate %type_View 152 Offset 2764
+               OpMemberDecorate %type_View 153 Offset 2768
+               OpMemberDecorate %type_View 154 Offset 2832
+               OpMemberDecorate %type_View 155 Offset 2896
+               OpMemberDecorate %type_View 156 Offset 2900
+               OpMemberDecorate %type_View 157 Offset 2904
+               OpMemberDecorate %type_View 158 Offset 2908
+               OpMemberDecorate %type_View 159 Offset 2912
+               OpMemberDecorate %type_View 160 Offset 2920
+               OpMemberDecorate %type_View 161 Offset 2924
+               OpMemberDecorate %type_View 162 Offset 2928
+               OpMemberDecorate %type_View 163 Offset 2940
+               OpMemberDecorate %type_View 164 Offset 2944
+               OpMemberDecorate %type_View 165 Offset 2956
+               OpMemberDecorate %type_View 166 Offset 2960
+               OpMemberDecorate %type_View 167 Offset 2968
+               OpMemberDecorate %type_View 168 Offset 2972
+               OpMemberDecorate %type_View 169 Offset 2976
+               OpMemberDecorate %type_View 170 Offset 2988
+               OpMemberDecorate %type_View 171 Offset 2992
+               OpMemberDecorate %type_View 172 Offset 3004
+               OpMemberDecorate %type_View 173 Offset 3008
+               OpMemberDecorate %type_View 174 Offset 3020
+               OpMemberDecorate %type_View 175 Offset 3024
+               OpMemberDecorate %type_View 176 Offset 3036
+               OpMemberDecorate %type_View 177 Offset 3040
+               OpMemberDecorate %type_View 178 Offset 3044
+               OpMemberDecorate %type_View 179 Offset 3048
+               OpMemberDecorate %type_View 180 Offset 3052
+               OpMemberDecorate %type_View 181 Offset 3056
+               OpMemberDecorate %type_View 181 MatrixStride 16
+               OpMemberDecorate %type_View 181 ColMajor
+               OpMemberDecorate %type_View 182 Offset 3120
+               OpMemberDecorate %type_View 183 Offset 3136
+               OpDecorate %type_View Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+      %v2int = OpTypeVector %int 2
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+     %uint_1 = OpConstant %uint 1
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float %float %float %mat4v4float %v4float %_arr_v4float_uint_2
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Input__arr_float_uint_2 = OpTypePointer Input %_arr_float_uint_2
+     %uint_3 = OpConstant %uint 3
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3
+%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1
+%_arr__arr_v4float_uint_1_uint_3 = OpTypeArray %_arr_v4float_uint_1 %uint_3
+%_ptr_Input__arr__arr_v4float_uint_1_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_1_uint_3
+%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3
+%_ptr_Input__arr_v3float_uint_3 = OpTypePointer Input %_arr_v3float_uint_3
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+%_ptr_Input__arr_float_uint_3 = OpTypePointer Input %_arr_float_uint_3
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_ptr_Output__arr_v4float_uint_1 = OpTypePointer Output %_arr_v4float_uint_1
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+       %void = OpTypeVoid
+         %63 = OpTypeFunction %void
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+       %bool = OpTypeBool
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+%gl_TessLevelOuter = OpVariable %_ptr_Input__arr_float_uint_4 Input
+%gl_TessLevelInner = OpVariable %_ptr_Input__arr_float_uint_2 Input
+%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input__arr__arr_v4float_uint_1_uint_3 Input
+%in_var_COLOR1 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_COLOR2 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_VS_To_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_TEXCOORD7 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
+%in_var_Flat_DisplacementScales = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
+%in_var_Flat_TessellationMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input
+%in_var_Flat_WorldDisplacementMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input
+%gl_TessCoord = OpVariable %_ptr_Input_v3float Input
+%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD0 = OpVariable %_ptr_Output__arr_v4float_uint_1 Output
+%out_var_COLOR1 = OpVariable %_ptr_Output_v4float Output
+%out_var_COLOR2 = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD6 = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD7 = OpVariable %_ptr_Output_v3float Output
+%gl_Position = OpVariable %_ptr_Output_v4float Output
+%_ptr_Function__arr_v4float_uint_1 = OpTypePointer Function %_arr_v4float_uint_1
+         %68 = OpUndef %v4float
+         %69 = OpConstantNull %v4float
+ %MainDomain = OpFunction %void None %63
+         %70 = OpLabel
+         %71 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function
+         %72 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function
+         %73 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function
+         %74 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function
+         %75 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function
+         %76 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function
+         %77 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD10_centroid
+         %78 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD11_centroid
+         %79 = OpLoad %_arr__arr_v4float_uint_1_uint_3 %in_var_TEXCOORD0
+         %80 = OpLoad %_arr_v4float_uint_3 %in_var_COLOR1
+         %81 = OpLoad %_arr_v4float_uint_3 %in_var_COLOR2
+         %82 = OpCompositeExtract %v4float %77 0
+         %83 = OpCompositeExtract %v4float %78 0
+         %84 = OpCompositeExtract %_arr_v4float_uint_1 %79 0
+         %85 = OpCompositeExtract %v4float %80 0
+         %86 = OpCompositeExtract %v4float %81 0
+         %87 = OpCompositeExtract %v4float %77 1
+         %88 = OpCompositeExtract %v4float %78 1
+         %89 = OpCompositeExtract %_arr_v4float_uint_1 %79 1
+         %90 = OpCompositeExtract %v4float %80 1
+         %91 = OpCompositeExtract %v4float %81 1
+         %92 = OpCompositeExtract %v4float %77 2
+         %93 = OpCompositeExtract %v4float %78 2
+         %94 = OpCompositeExtract %_arr_v4float_uint_1 %79 2
+         %95 = OpCompositeExtract %v4float %80 2
+         %96 = OpCompositeExtract %v4float %81 2
+         %97 = OpLoad %_arr_v4float_uint_3 %in_var_VS_To_DS_Position
+         %98 = OpLoad %_arr_v3float_uint_3 %in_var_TEXCOORD7
+         %99 = OpCompositeExtract %v4float %97 0
+        %100 = OpCompositeExtract %v3float %98 0
+        %101 = OpCompositeExtract %v4float %97 1
+        %102 = OpCompositeExtract %v3float %98 1
+        %103 = OpCompositeExtract %v4float %97 2
+        %104 = OpCompositeExtract %v3float %98 2
+        %105 = OpLoad %v3float %gl_TessCoord
+        %106 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_0
+        %107 = OpLoad %mat4v4float %106
+        %108 = OpCompositeExtract %float %105 0
+        %109 = OpCompositeExtract %float %105 1
+        %110 = OpCompositeExtract %float %105 2
+        %111 = OpCompositeConstruct %v4float %108 %108 %108 %108
+        %112 = OpFMul %v4float %99 %111
+        %113 = OpCompositeConstruct %v4float %109 %109 %109 %109
+        %114 = OpFMul %v4float %101 %113
+        %115 = OpFAdd %v4float %112 %114
+        %116 = OpCompositeConstruct %v4float %110 %110 %110 %110
+        %117 = OpFMul %v4float %103 %116
+        %118 = OpFAdd %v4float %115 %117
+               OpStore %72 %84
+               OpStore %71 %89
+        %119 = OpVectorShuffle %v3float %82 %82 0 1 2
+        %120 = OpCompositeConstruct %v3float %108 %108 %108
+        %121 = OpFMul %v3float %119 %120
+        %122 = OpVectorShuffle %v3float %87 %87 0 1 2
+        %123 = OpCompositeConstruct %v3float %109 %109 %109
+        %124 = OpFMul %v3float %122 %123
+        %125 = OpFAdd %v3float %121 %124
+        %126 = OpFMul %v4float %83 %111
+        %127 = OpFMul %v4float %88 %113
+        %128 = OpFAdd %v4float %126 %127
+        %129 = OpFMul %v4float %85 %111
+        %130 = OpFMul %v4float %90 %113
+        %131 = OpFAdd %v4float %129 %130
+               OpBranch %132
+        %132 = OpLabel
+        %133 = OpPhi %int %int_0 %70 %134 %135
+        %136 = OpSLessThan %bool %133 %int_1
+               OpLoopMerge %137 %135 None
+               OpBranchConditional %136 %135 %137
+        %135 = OpLabel
+        %138 = OpAccessChain %_ptr_Function_v4float %72 %133
+        %139 = OpLoad %v4float %138
+        %140 = OpFMul %v4float %139 %111
+        %141 = OpAccessChain %_ptr_Function_v4float %71 %133
+        %142 = OpLoad %v4float %141
+        %143 = OpFMul %v4float %142 %113
+        %144 = OpFAdd %v4float %140 %143
+        %145 = OpAccessChain %_ptr_Function_v4float %73 %133
+               OpStore %145 %144
+        %134 = OpIAdd %int %133 %int_1
+               OpBranch %132
+        %137 = OpLabel
+        %146 = OpFMul %v4float %86 %111
+        %147 = OpFMul %v4float %91 %113
+        %148 = OpFAdd %v4float %146 %147
+        %149 = OpLoad %_arr_v4float_uint_1 %73
+        %150 = OpFMul %v3float %100 %120
+        %151 = OpFMul %v3float %102 %123
+        %152 = OpFAdd %v3float %150 %151
+               OpStore %75 %149
+               OpStore %74 %94
+        %153 = OpVectorShuffle %v3float %125 %69 0 1 2
+        %154 = OpVectorShuffle %v3float %92 %92 0 1 2
+        %155 = OpCompositeConstruct %v3float %110 %110 %110
+        %156 = OpFMul %v3float %154 %155
+        %157 = OpFAdd %v3float %153 %156
+        %158 = OpVectorShuffle %v4float %68 %157 4 5 6 3
+        %159 = OpFMul %v4float %93 %116
+        %160 = OpFAdd %v4float %128 %159
+        %161 = OpFMul %v4float %95 %116
+        %162 = OpFAdd %v4float %131 %161
+               OpBranch %163
+        %163 = OpLabel
+        %164 = OpPhi %int %int_0 %137 %165 %166
+        %167 = OpSLessThan %bool %164 %int_1
+               OpLoopMerge %168 %166 None
+               OpBranchConditional %167 %166 %168
+        %166 = OpLabel
+        %169 = OpAccessChain %_ptr_Function_v4float %75 %164
+        %170 = OpLoad %v4float %169
+        %171 = OpAccessChain %_ptr_Function_v4float %74 %164
+        %172 = OpLoad %v4float %171
+        %173 = OpFMul %v4float %172 %116
+        %174 = OpFAdd %v4float %170 %173
+        %175 = OpAccessChain %_ptr_Function_v4float %76 %164
+               OpStore %175 %174
+        %165 = OpIAdd %int %164 %int_1
+               OpBranch %163
+        %168 = OpLabel
+        %176 = OpFMul %v4float %96 %116
+        %177 = OpFAdd %v4float %148 %176
+        %178 = OpLoad %_arr_v4float_uint_1 %76
+        %179 = OpFMul %v3float %104 %155
+        %180 = OpFAdd %v3float %152 %179
+        %181 = OpVectorShuffle %v4float %118 %118 4 5 6 3
+        %182 = OpMatrixTimesVector %v4float %107 %181
+               OpStore %out_var_TEXCOORD10_centroid %158
+               OpStore %out_var_TEXCOORD11_centroid %160
+               OpStore %out_var_TEXCOORD0 %178
+               OpStore %out_var_COLOR1 %162
+               OpStore %out_var_COLOR2 %177
+               OpStore %out_var_TEXCOORD6 %181
+               OpStore %out_var_TEXCOORD7 %180
+               OpStore %gl_Position %182
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4-no-opt/asm/vert/loop-accesschain-writethrough.asm.invalid.vert b/shaders-ue4-no-opt/asm/vert/loop-accesschain-writethrough.asm.invalid.vert
new file mode 100644
index 00000000000..693f16c0099
--- /dev/null
+++ b/shaders-ue4-no-opt/asm/vert/loop-accesschain-writethrough.asm.invalid.vert
@@ -0,0 +1,259 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 181
+; Schema: 0
+               OpCapability Shader
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %ScatterMainVS "main" %gl_VertexIndex %gl_InstanceIndex %out_var_TEXCOORD0 %out_var_TEXCOORD1 %out_var_TEXCOORD2 %out_var_TEXCOORD3 %out_var_TEXCOORD4 %out_var_TEXCOORD5 %out_var_TEXCOORD6 %gl_Position
+               OpSource HLSL 600
+               OpName %type__Globals "type.$Globals"
+               OpMemberName %type__Globals 0 "ViewportSize"
+               OpMemberName %type__Globals 1 "ScatteringScaling"
+               OpMemberName %type__Globals 2 "CocRadiusToCircumscribedRadius"
+               OpName %_Globals "$Globals"
+               OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float"
+               OpName %ScatterDrawList "ScatterDrawList"
+               OpName %out_var_TEXCOORD0 "out.var.TEXCOORD0"
+               OpName %out_var_TEXCOORD1 "out.var.TEXCOORD1"
+               OpName %out_var_TEXCOORD2 "out.var.TEXCOORD2"
+               OpName %out_var_TEXCOORD3 "out.var.TEXCOORD3"
+               OpName %out_var_TEXCOORD4 "out.var.TEXCOORD4"
+               OpName %out_var_TEXCOORD5 "out.var.TEXCOORD5"
+               OpName %out_var_TEXCOORD6 "out.var.TEXCOORD6"
+               OpName %ScatterMainVS "ScatterMainVS"
+               OpDecorate %gl_VertexIndex BuiltIn VertexIndex
+               OpDecorateString %gl_VertexIndex UserSemantic "SV_VertexID"
+               OpDecorate %gl_InstanceIndex BuiltIn InstanceIndex
+               OpDecorateString %gl_InstanceIndex UserSemantic "SV_InstanceID"
+               OpDecorateString %out_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %out_var_TEXCOORD1 UserSemantic "TEXCOORD1"
+               OpDecorateString %out_var_TEXCOORD2 UserSemantic "TEXCOORD2"
+               OpDecorateString %out_var_TEXCOORD3 UserSemantic "TEXCOORD3"
+               OpDecorateString %out_var_TEXCOORD4 UserSemantic "TEXCOORD4"
+               OpDecorateString %out_var_TEXCOORD5 UserSemantic "TEXCOORD5"
+               OpDecorateString %out_var_TEXCOORD6 UserSemantic "TEXCOORD6"
+               OpDecorate %gl_Position BuiltIn Position
+               OpDecorateString %gl_Position UserSemantic "SV_POSITION"
+               OpDecorate %out_var_TEXCOORD0 Location 0
+               OpDecorate %out_var_TEXCOORD1 Location 1
+               OpDecorate %out_var_TEXCOORD2 Location 2
+               OpDecorate %out_var_TEXCOORD3 Location 3
+               OpDecorate %out_var_TEXCOORD4 Location 4
+               OpDecorate %out_var_TEXCOORD5 Location 5
+               OpDecorate %out_var_TEXCOORD6 Location 6
+               OpDecorate %_Globals DescriptorSet 0
+               OpDecorate %_Globals Binding 1
+               OpDecorate %ScatterDrawList DescriptorSet 0
+               OpDecorate %ScatterDrawList Binding 0
+               OpMemberDecorate %type__Globals 0 Offset 0
+               OpMemberDecorate %type__Globals 1 Offset 16
+               OpMemberDecorate %type__Globals 2 Offset 20
+               OpDecorate %type__Globals Block
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0
+               OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable
+               OpDecorate %type_StructuredBuffer_v4float BufferBlock
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_4 = OpConstant %uint 4
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %float_0_5 = OpConstant %float 0.5
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+    %float_1 = OpConstant %float 1
+    %uint_16 = OpConstant %uint 16
+    %float_0 = OpConstant %float 0
+     %uint_0 = OpConstant %uint 0
+     %uint_5 = OpConstant %uint 5
+     %uint_1 = OpConstant %uint 1
+      %int_3 = OpConstant %int 3
+ %float_n0_5 = OpConstant %float -0.5
+      %int_2 = OpConstant %int 2
+    %float_2 = OpConstant %float 2
+         %39 = OpConstantComposite %v2float %float_2 %float_2
+         %40 = OpConstantComposite %v2float %float_1 %float_1
+         %41 = OpConstantComposite %v2float %float_0_5 %float_0_5
+%type__Globals = OpTypeStruct %v4float %float %float
+%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %48 = OpTypeFunction %void
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Function__arr_v4float_uint_4 = OpTypePointer Function %_arr_v4float_uint_4
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Function__arr_float_uint_4 = OpTypePointer Function %_arr_float_uint_4
+%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
+%_ptr_Function__arr_v2float_uint_4 = OpTypePointer Function %_arr_v2float_uint_4
+%_ptr_Function_float = OpTypePointer Function %float
+       %bool = OpTypeBool
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+   %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+%ScatterDrawList = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform
+%gl_VertexIndex = OpVariable %_ptr_Input_uint Input
+%gl_InstanceIndex = OpVariable %_ptr_Input_uint Input
+%out_var_TEXCOORD0 = OpVariable %_ptr_Output_v2float Output
+%out_var_TEXCOORD1 = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD2 = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD3 = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD4 = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD5 = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD6 = OpVariable %_ptr_Output_v4float Output
+%gl_Position = OpVariable %_ptr_Output_v4float Output
+%ScatterMainVS = OpFunction %void None %48
+         %60 = OpLabel
+         %61 = OpVariable %_ptr_Function__arr_v4float_uint_4 Function
+         %62 = OpVariable %_ptr_Function__arr_float_uint_4 Function
+         %63 = OpVariable %_ptr_Function__arr_v2float_uint_4 Function
+         %64 = OpLoad %uint %gl_VertexIndex
+         %65 = OpLoad %uint %gl_InstanceIndex
+         %66 = OpUDiv %uint %64 %uint_4
+         %67 = OpIMul %uint %66 %uint_4
+         %68 = OpISub %uint %64 %67
+         %69 = OpIMul %uint %uint_16 %65
+         %70 = OpIAdd %uint %69 %66
+               OpBranch %71
+         %71 = OpLabel
+         %72 = OpPhi %float %float_0 %60 %73 %74
+         %75 = OpPhi %uint %uint_0 %60 %76 %74
+         %77 = OpULessThan %bool %75 %uint_4
+               OpLoopMerge %78 %74 Unroll
+               OpBranchConditional %77 %79 %78
+         %79 = OpLabel
+         %80 = OpIMul %uint %uint_5 %70
+         %81 = OpIAdd %uint %80 %75
+         %82 = OpIAdd %uint %81 %uint_1
+         %83 = OpAccessChain %_ptr_Uniform_v4float %ScatterDrawList %int_0 %82
+         %84 = OpLoad %v4float %83
+         %85 = OpCompositeExtract %float %84 0
+         %86 = OpCompositeExtract %float %84 1
+         %87 = OpCompositeExtract %float %84 2
+         %88 = OpCompositeConstruct %v4float %85 %86 %87 %float_0
+         %89 = OpAccessChain %_ptr_Function_v4float %61 %75
+               OpStore %89 %88
+         %90 = OpCompositeExtract %float %84 3
+         %91 = OpAccessChain %_ptr_Function_float %62 %75
+               OpStore %91 %90
+         %92 = OpIEqual %bool %75 %uint_0
+               OpSelectionMerge %74 None
+               OpBranchConditional %92 %93 %94
+         %93 = OpLabel
+         %95 = OpLoad %float %91
+               OpBranch %74
+         %94 = OpLabel
+         %96 = OpLoad %float %91
+         %97 = OpExtInst %float %1 FMax %72 %96
+               OpBranch %74
+         %74 = OpLabel
+         %73 = OpPhi %float %95 %93 %97 %94
+         %98 = OpLoad %float %91
+         %99 = OpFDiv %float %float_n0_5 %98
+        %100 = OpAccessChain %_ptr_Function_float %63 %75 %int_0
+               OpStore %100 %99
+        %101 = OpLoad %float %91
+        %102 = OpFMul %float %float_0_5 %101
+        %103 = OpFAdd %float %102 %float_0_5
+        %104 = OpAccessChain %_ptr_Function_float %63 %75 %int_1
+               OpStore %104 %103
+         %76 = OpIAdd %uint %75 %uint_1
+               OpBranch %71
+         %78 = OpLabel
+        %105 = OpAccessChain %_ptr_Function_v4float %61 %int_0
+        %106 = OpLoad %v4float %105
+        %107 = OpCompositeExtract %float %106 0
+        %108 = OpCompositeExtract %float %106 1
+        %109 = OpCompositeExtract %float %106 2
+        %110 = OpAccessChain %_ptr_Function_float %62 %int_0
+        %111 = OpLoad %float %110
+        %112 = OpCompositeConstruct %v4float %107 %108 %109 %111
+        %113 = OpAccessChain %_ptr_Function_v4float %61 %int_1
+        %114 = OpLoad %v4float %113
+        %115 = OpCompositeExtract %float %114 0
+        %116 = OpCompositeExtract %float %114 1
+        %117 = OpCompositeExtract %float %114 2
+        %118 = OpAccessChain %_ptr_Function_float %62 %int_1
+        %119 = OpLoad %float %118
+        %120 = OpCompositeConstruct %v4float %115 %116 %117 %119
+        %121 = OpAccessChain %_ptr_Function_v4float %61 %int_2
+        %122 = OpLoad %v4float %121
+        %123 = OpCompositeExtract %float %122 0
+        %124 = OpCompositeExtract %float %122 1
+        %125 = OpCompositeExtract %float %122 2
+        %126 = OpAccessChain %_ptr_Function_float %62 %int_2
+        %127 = OpLoad %float %126
+        %128 = OpCompositeConstruct %v4float %123 %124 %125 %127
+        %129 = OpAccessChain %_ptr_Function_v4float %61 %int_3
+        %130 = OpLoad %v4float %129
+        %131 = OpCompositeExtract %float %130 0
+        %132 = OpCompositeExtract %float %130 1
+        %133 = OpCompositeExtract %float %130 2
+        %134 = OpAccessChain %_ptr_Function_float %62 %int_3
+        %135 = OpLoad %float %134
+        %136 = OpCompositeConstruct %v4float %131 %132 %133 %135
+        %137 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1
+        %138 = OpLoad %float %137
+        %139 = OpCompositeConstruct %v2float %138 %138
+        %140 = OpIMul %uint %uint_5 %70
+        %141 = OpAccessChain %_ptr_Uniform_v4float %ScatterDrawList %int_0 %140
+        %142 = OpLoad %v4float %141
+        %143 = OpVectorShuffle %v2float %142 %142 0 1
+        %144 = OpFMul %v2float %139 %143
+        %145 = OpAccessChain %_ptr_Function_v2float %63 %int_0
+        %146 = OpLoad %v2float %145
+        %147 = OpAccessChain %_ptr_Function_v2float %63 %int_1
+        %148 = OpLoad %v2float %147
+        %149 = OpVectorShuffle %v4float %146 %148 0 1 2 3
+        %150 = OpAccessChain %_ptr_Function_v2float %63 %int_2
+        %151 = OpLoad %v2float %150
+        %152 = OpAccessChain %_ptr_Function_v2float %63 %int_3
+        %153 = OpLoad %v2float %152
+        %154 = OpVectorShuffle %v4float %151 %153 0 1 2 3
+        %155 = OpUMod %uint %68 %uint_2
+        %156 = OpConvertUToF %float %155
+        %157 = OpUDiv %uint %68 %uint_2
+        %158 = OpConvertUToF %float %157
+        %159 = OpCompositeConstruct %v2float %156 %158
+        %160 = OpFMul %v2float %159 %39
+        %161 = OpFSub %v2float %160 %40
+        %162 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2
+        %163 = OpLoad %float %162
+        %164 = OpFMul %float %72 %163
+        %165 = OpFAdd %float %164 %float_1
+        %166 = OpCompositeConstruct %v2float %165 %165
+        %167 = OpFMul %v2float %166 %161
+        %168 = OpFAdd %v2float %167 %144
+        %169 = OpFAdd %v2float %168 %41
+        %170 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_0
+        %171 = OpLoad %v4float %170
+        %172 = OpVectorShuffle %v2float %171 %171 2 3
+        %173 = OpFMul %v2float %169 %172
+        %174 = OpCompositeExtract %float %173 0
+        %175 = OpFMul %float %174 %float_2
+        %176 = OpFSub %float %175 %float_1
+        %177 = OpCompositeExtract %float %173 1
+        %178 = OpFMul %float %177 %float_2
+        %179 = OpFSub %float %float_1 %178
+        %180 = OpCompositeConstruct %v4float %176 %179 %float_0 %float_1
+               OpStore %out_var_TEXCOORD0 %144
+               OpStore %out_var_TEXCOORD1 %112
+               OpStore %out_var_TEXCOORD2 %120
+               OpStore %out_var_TEXCOORD3 %128
+               OpStore %out_var_TEXCOORD4 %136
+               OpStore %out_var_TEXCOORD5 %149
+               OpStore %out_var_TEXCOORD6 %154
+               OpStore %gl_Position %180
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/frag/depth-compare.asm.frag b/shaders-ue4/asm/frag/depth-compare.asm.frag
new file mode 100644
index 00000000000..603d4f28c46
--- /dev/null
+++ b/shaders-ue4/asm/frag/depth-compare.asm.frag
@@ -0,0 +1,961 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 452
+; Schema: 0
+               OpCapability Shader
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %MainOnePassPointLightPS "main" %gl_FragCoord %out_var_SV_Target0
+               OpExecutionMode %MainOnePassPointLightPS OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_TranslatedWorldToView"
+               OpMemberName %type_View 3 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 4 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 6 "View_ViewToClip"
+               OpMemberName %type_View 7 "View_ViewToClipNoAA"
+               OpMemberName %type_View 8 "View_ClipToView"
+               OpMemberName %type_View 9 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 11 "View_ScreenToWorld"
+               OpMemberName %type_View 12 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 13 "View_ViewForward"
+               OpMemberName %type_View 14 "PrePadding_View_844"
+               OpMemberName %type_View 15 "View_ViewUp"
+               OpMemberName %type_View 16 "PrePadding_View_860"
+               OpMemberName %type_View 17 "View_ViewRight"
+               OpMemberName %type_View 18 "PrePadding_View_876"
+               OpMemberName %type_View 19 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 20 "PrePadding_View_892"
+               OpMemberName %type_View 21 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 22 "PrePadding_View_908"
+               OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 24 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 25 "View_WorldCameraOrigin"
+               OpMemberName %type_View 26 "PrePadding_View_956"
+               OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 28 "PrePadding_View_972"
+               OpMemberName %type_View 29 "View_WorldViewOrigin"
+               OpMemberName %type_View 30 "PrePadding_View_988"
+               OpMemberName %type_View 31 "View_PreViewTranslation"
+               OpMemberName %type_View 32 "PrePadding_View_1004"
+               OpMemberName %type_View 33 "View_PrevProjection"
+               OpMemberName %type_View 34 "View_PrevViewProj"
+               OpMemberName %type_View 35 "View_PrevViewRotationProj"
+               OpMemberName %type_View 36 "View_PrevViewToClip"
+               OpMemberName %type_View 37 "View_PrevClipToView"
+               OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 43 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 44 "PrePadding_View_1660"
+               OpMemberName %type_View 45 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 46 "PrePadding_View_1676"
+               OpMemberName %type_View 47 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 48 "PrePadding_View_1692"
+               OpMemberName %type_View 49 "View_PrevInvViewProj"
+               OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 51 "View_ClipToPrevClip"
+               OpMemberName %type_View 52 "View_TemporalAAJitter"
+               OpMemberName %type_View 53 "View_GlobalClippingPlane"
+               OpMemberName %type_View 54 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_ViewRectMin"
+               OpMemberName %type_View 57 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 58 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 60 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 61 "View_PreExposure"
+               OpMemberName %type_View 62 "View_OneOverPreExposure"
+               OpMemberName %type_View 63 "PrePadding_View_2012"
+               OpMemberName %type_View 64 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 65 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 66 "View_NormalOverrideParameter"
+               OpMemberName %type_View 67 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 68 "View_PrevFrameGameTime"
+               OpMemberName %type_View 69 "View_PrevFrameRealTime"
+               OpMemberName %type_View 70 "View_OutOfBoundsMask"
+               OpMemberName %type_View 71 "PrePadding_View_2084"
+               OpMemberName %type_View 72 "PrePadding_View_2088"
+               OpMemberName %type_View 73 "PrePadding_View_2092"
+               OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 75 "View_CullingSign"
+               OpMemberName %type_View 76 "View_NearPlane"
+               OpMemberName %type_View 77 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 78 "View_GameTime"
+               OpMemberName %type_View 79 "View_RealTime"
+               OpMemberName %type_View 80 "View_DeltaTime"
+               OpMemberName %type_View 81 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 83 "View_Random"
+               OpMemberName %type_View 84 "View_FrameNumber"
+               OpMemberName %type_View 85 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 86 "View_StateFrameIndex"
+               OpMemberName %type_View 87 "View_CameraCut"
+               OpMemberName %type_View 88 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 89 "PrePadding_View_2164"
+               OpMemberName %type_View 90 "PrePadding_View_2168"
+               OpMemberName %type_View 91 "PrePadding_View_2172"
+               OpMemberName %type_View 92 "View_DirectionalLightColor"
+               OpMemberName %type_View 93 "View_DirectionalLightDirection"
+               OpMemberName %type_View 94 "PrePadding_View_2204"
+               OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 97 "View_TemporalAAParams"
+               OpMemberName %type_View 98 "View_CircleDOFParams"
+               OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 101 "View_DepthOfFieldScale"
+               OpMemberName %type_View 102 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 108 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 109 "View_DemosaicVposOffset"
+               OpMemberName %type_View 110 "PrePadding_View_2348"
+               OpMemberName %type_View 111 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 112 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 113 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 115 "View_AtmosphericFogPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 122 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 125 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 127 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 130 "View_AmbientCubemapTint"
+               OpMemberName %type_View 131 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 132 "View_SkyLightParameters"
+               OpMemberName %type_View 133 "PrePadding_View_2488"
+               OpMemberName %type_View 134 "PrePadding_View_2492"
+               OpMemberName %type_View 135 "View_SkyLightColor"
+               OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 137 "View_MobilePreviewMode"
+               OpMemberName %type_View 138 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 140 "View_ShowDecalsMask"
+               OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 143 "PrePadding_View_2648"
+               OpMemberName %type_View 144 "PrePadding_View_2652"
+               OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 146 "View_StereoPassIndex"
+               OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 149 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 150 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 151 "View_MaxGlobalDistance"
+               OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 153 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 154 "PrePadding_View_2828"
+               OpMemberName %type_View 155 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 156 "PrePadding_View_2844"
+               OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 158 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 159 "PrePadding_View_2860"
+               OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 161 "PrePadding_View_2876"
+               OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 163 "PrePadding_View_2892"
+               OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 167 "View_StereoIPD"
+               OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle"
+               OpName %View "View"
+               OpName %type_2d_image "type.2d.image"
+               OpName %SceneTexturesStruct_SceneDepthTexture "SceneTexturesStruct_SceneDepthTexture"
+               OpName %type_sampler "type.sampler"
+               OpName %SceneTexturesStruct_SceneDepthTextureSampler "SceneTexturesStruct_SceneDepthTextureSampler"
+               OpName %SceneTexturesStruct_GBufferATexture "SceneTexturesStruct_GBufferATexture"
+               OpName %SceneTexturesStruct_GBufferBTexture "SceneTexturesStruct_GBufferBTexture"
+               OpName %SceneTexturesStruct_GBufferDTexture "SceneTexturesStruct_GBufferDTexture"
+               OpName %SceneTexturesStruct_GBufferATextureSampler "SceneTexturesStruct_GBufferATextureSampler"
+               OpName %SceneTexturesStruct_GBufferBTextureSampler "SceneTexturesStruct_GBufferBTextureSampler"
+               OpName %SceneTexturesStruct_GBufferDTextureSampler "SceneTexturesStruct_GBufferDTextureSampler"
+               OpName %ShadowDepthTextureSampler "ShadowDepthTextureSampler"
+               OpName %type__Globals "type.$Globals"
+               OpMemberName %type__Globals 0 "SoftTransitionScale"
+               OpMemberName %type__Globals 1 "ShadowViewProjectionMatrices"
+               OpMemberName %type__Globals 2 "InvShadowmapResolution"
+               OpMemberName %type__Globals 3 "ShadowFadeFraction"
+               OpMemberName %type__Globals 4 "ShadowSharpen"
+               OpMemberName %type__Globals 5 "LightPositionAndInvRadius"
+               OpMemberName %type__Globals 6 "ProjectionDepthBiasParameters"
+               OpMemberName %type__Globals 7 "PointLightDepthBiasAndProjParameters"
+               OpName %_Globals "$Globals"
+               OpName %type_cube_image "type.cube.image"
+               OpName %ShadowDepthCubeTexture "ShadowDepthCubeTexture"
+               OpName %ShadowDepthCubeTextureSampler "ShadowDepthCubeTextureSampler"
+               OpName %SSProfilesTexture "SSProfilesTexture"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %MainOnePassPointLightPS "MainOnePassPointLightPS"
+               OpName %type_sampled_image "type.sampled.image"
+               OpName %type_sampled_image_0 "type.sampled.image"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION"
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 0
+               OpDecorate %SceneTexturesStruct_SceneDepthTexture DescriptorSet 0
+               OpDecorate %SceneTexturesStruct_SceneDepthTexture Binding 0
+               OpDecorate %SceneTexturesStruct_SceneDepthTextureSampler DescriptorSet 0
+               OpDecorate %SceneTexturesStruct_SceneDepthTextureSampler Binding 0
+               OpDecorate %SceneTexturesStruct_GBufferATexture DescriptorSet 0
+               OpDecorate %SceneTexturesStruct_GBufferATexture Binding 1
+               OpDecorate %SceneTexturesStruct_GBufferBTexture DescriptorSet 0
+               OpDecorate %SceneTexturesStruct_GBufferBTexture Binding 2
+               OpDecorate %SceneTexturesStruct_GBufferDTexture DescriptorSet 0
+               OpDecorate %SceneTexturesStruct_GBufferDTexture Binding 3
+               OpDecorate %SceneTexturesStruct_GBufferATextureSampler DescriptorSet 0
+               OpDecorate %SceneTexturesStruct_GBufferATextureSampler Binding 1
+               OpDecorate %SceneTexturesStruct_GBufferBTextureSampler DescriptorSet 0
+               OpDecorate %SceneTexturesStruct_GBufferBTextureSampler Binding 2
+               OpDecorate %SceneTexturesStruct_GBufferDTextureSampler DescriptorSet 0
+               OpDecorate %SceneTexturesStruct_GBufferDTextureSampler Binding 3
+               OpDecorate %ShadowDepthTextureSampler DescriptorSet 0
+               OpDecorate %ShadowDepthTextureSampler Binding 4
+               OpDecorate %_Globals DescriptorSet 0
+               OpDecorate %_Globals Binding 1
+               OpDecorate %ShadowDepthCubeTexture DescriptorSet 0
+               OpDecorate %ShadowDepthCubeTexture Binding 4
+               OpDecorate %ShadowDepthCubeTextureSampler DescriptorSet 0
+               OpDecorate %ShadowDepthCubeTextureSampler Binding 5
+               OpDecorate %SSProfilesTexture DescriptorSet 0
+               OpDecorate %SSProfilesTexture Binding 5
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 14 Offset 844
+               OpMemberDecorate %type_View 15 Offset 848
+               OpMemberDecorate %type_View 16 Offset 860
+               OpMemberDecorate %type_View 17 Offset 864
+               OpMemberDecorate %type_View 18 Offset 876
+               OpMemberDecorate %type_View 19 Offset 880
+               OpMemberDecorate %type_View 20 Offset 892
+               OpMemberDecorate %type_View 21 Offset 896
+               OpMemberDecorate %type_View 22 Offset 908
+               OpMemberDecorate %type_View 23 Offset 912
+               OpMemberDecorate %type_View 24 Offset 928
+               OpMemberDecorate %type_View 25 Offset 944
+               OpMemberDecorate %type_View 26 Offset 956
+               OpMemberDecorate %type_View 27 Offset 960
+               OpMemberDecorate %type_View 28 Offset 972
+               OpMemberDecorate %type_View 29 Offset 976
+               OpMemberDecorate %type_View 30 Offset 988
+               OpMemberDecorate %type_View 31 Offset 992
+               OpMemberDecorate %type_View 32 Offset 1004
+               OpMemberDecorate %type_View 33 Offset 1008
+               OpMemberDecorate %type_View 33 MatrixStride 16
+               OpMemberDecorate %type_View 33 ColMajor
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 44 Offset 1660
+               OpMemberDecorate %type_View 45 Offset 1664
+               OpMemberDecorate %type_View 46 Offset 1676
+               OpMemberDecorate %type_View 47 Offset 1680
+               OpMemberDecorate %type_View 48 Offset 1692
+               OpMemberDecorate %type_View 49 Offset 1696
+               OpMemberDecorate %type_View 49 MatrixStride 16
+               OpMemberDecorate %type_View 49 ColMajor
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 53 Offset 1904
+               OpMemberDecorate %type_View 54 Offset 1920
+               OpMemberDecorate %type_View 55 Offset 1928
+               OpMemberDecorate %type_View 56 Offset 1936
+               OpMemberDecorate %type_View 57 Offset 1952
+               OpMemberDecorate %type_View 58 Offset 1968
+               OpMemberDecorate %type_View 59 Offset 1984
+               OpMemberDecorate %type_View 60 Offset 2000
+               OpMemberDecorate %type_View 61 Offset 2004
+               OpMemberDecorate %type_View 62 Offset 2008
+               OpMemberDecorate %type_View 63 Offset 2012
+               OpMemberDecorate %type_View 64 Offset 2016
+               OpMemberDecorate %type_View 65 Offset 2032
+               OpMemberDecorate %type_View 66 Offset 2048
+               OpMemberDecorate %type_View 67 Offset 2064
+               OpMemberDecorate %type_View 68 Offset 2072
+               OpMemberDecorate %type_View 69 Offset 2076
+               OpMemberDecorate %type_View 70 Offset 2080
+               OpMemberDecorate %type_View 71 Offset 2084
+               OpMemberDecorate %type_View 72 Offset 2088
+               OpMemberDecorate %type_View 73 Offset 2092
+               OpMemberDecorate %type_View 74 Offset 2096
+               OpMemberDecorate %type_View 75 Offset 2108
+               OpMemberDecorate %type_View 76 Offset 2112
+               OpMemberDecorate %type_View 77 Offset 2116
+               OpMemberDecorate %type_View 78 Offset 2120
+               OpMemberDecorate %type_View 79 Offset 2124
+               OpMemberDecorate %type_View 80 Offset 2128
+               OpMemberDecorate %type_View 81 Offset 2132
+               OpMemberDecorate %type_View 82 Offset 2136
+               OpMemberDecorate %type_View 83 Offset 2140
+               OpMemberDecorate %type_View 84 Offset 2144
+               OpMemberDecorate %type_View 85 Offset 2148
+               OpMemberDecorate %type_View 86 Offset 2152
+               OpMemberDecorate %type_View 87 Offset 2156
+               OpMemberDecorate %type_View 88 Offset 2160
+               OpMemberDecorate %type_View 89 Offset 2164
+               OpMemberDecorate %type_View 90 Offset 2168
+               OpMemberDecorate %type_View 91 Offset 2172
+               OpMemberDecorate %type_View 92 Offset 2176
+               OpMemberDecorate %type_View 93 Offset 2192
+               OpMemberDecorate %type_View 94 Offset 2204
+               OpMemberDecorate %type_View 95 Offset 2208
+               OpMemberDecorate %type_View 96 Offset 2240
+               OpMemberDecorate %type_View 97 Offset 2272
+               OpMemberDecorate %type_View 98 Offset 2288
+               OpMemberDecorate %type_View 99 Offset 2304
+               OpMemberDecorate %type_View 100 Offset 2308
+               OpMemberDecorate %type_View 101 Offset 2312
+               OpMemberDecorate %type_View 102 Offset 2316
+               OpMemberDecorate %type_View 103 Offset 2320
+               OpMemberDecorate %type_View 104 Offset 2324
+               OpMemberDecorate %type_View 105 Offset 2328
+               OpMemberDecorate %type_View 106 Offset 2332
+               OpMemberDecorate %type_View 107 Offset 2336
+               OpMemberDecorate %type_View 108 Offset 2340
+               OpMemberDecorate %type_View 109 Offset 2344
+               OpMemberDecorate %type_View 110 Offset 2348
+               OpMemberDecorate %type_View 111 Offset 2352
+               OpMemberDecorate %type_View 112 Offset 2364
+               OpMemberDecorate %type_View 113 Offset 2368
+               OpMemberDecorate %type_View 114 Offset 2380
+               OpMemberDecorate %type_View 115 Offset 2384
+               OpMemberDecorate %type_View 116 Offset 2388
+               OpMemberDecorate %type_View 117 Offset 2392
+               OpMemberDecorate %type_View 118 Offset 2396
+               OpMemberDecorate %type_View 119 Offset 2400
+               OpMemberDecorate %type_View 120 Offset 2404
+               OpMemberDecorate %type_View 121 Offset 2408
+               OpMemberDecorate %type_View 122 Offset 2412
+               OpMemberDecorate %type_View 123 Offset 2416
+               OpMemberDecorate %type_View 124 Offset 2420
+               OpMemberDecorate %type_View 125 Offset 2424
+               OpMemberDecorate %type_View 126 Offset 2428
+               OpMemberDecorate %type_View 127 Offset 2432
+               OpMemberDecorate %type_View 128 Offset 2448
+               OpMemberDecorate %type_View 129 Offset 2460
+               OpMemberDecorate %type_View 130 Offset 2464
+               OpMemberDecorate %type_View 131 Offset 2480
+               OpMemberDecorate %type_View 132 Offset 2484
+               OpMemberDecorate %type_View 133 Offset 2488
+               OpMemberDecorate %type_View 134 Offset 2492
+               OpMemberDecorate %type_View 135 Offset 2496
+               OpMemberDecorate %type_View 136 Offset 2512
+               OpMemberDecorate %type_View 137 Offset 2624
+               OpMemberDecorate %type_View 138 Offset 2628
+               OpMemberDecorate %type_View 139 Offset 2632
+               OpMemberDecorate %type_View 140 Offset 2636
+               OpMemberDecorate %type_View 141 Offset 2640
+               OpMemberDecorate %type_View 142 Offset 2644
+               OpMemberDecorate %type_View 143 Offset 2648
+               OpMemberDecorate %type_View 144 Offset 2652
+               OpMemberDecorate %type_View 145 Offset 2656
+               OpMemberDecorate %type_View 146 Offset 2668
+               OpMemberDecorate %type_View 147 Offset 2672
+               OpMemberDecorate %type_View 148 Offset 2736
+               OpMemberDecorate %type_View 149 Offset 2800
+               OpMemberDecorate %type_View 150 Offset 2804
+               OpMemberDecorate %type_View 151 Offset 2808
+               OpMemberDecorate %type_View 152 Offset 2812
+               OpMemberDecorate %type_View 153 Offset 2816
+               OpMemberDecorate %type_View 154 Offset 2828
+               OpMemberDecorate %type_View 155 Offset 2832
+               OpMemberDecorate %type_View 156 Offset 2844
+               OpMemberDecorate %type_View 157 Offset 2848
+               OpMemberDecorate %type_View 158 Offset 2856
+               OpMemberDecorate %type_View 159 Offset 2860
+               OpMemberDecorate %type_View 160 Offset 2864
+               OpMemberDecorate %type_View 161 Offset 2876
+               OpMemberDecorate %type_View 162 Offset 2880
+               OpMemberDecorate %type_View 163 Offset 2892
+               OpMemberDecorate %type_View 164 Offset 2896
+               OpMemberDecorate %type_View 165 Offset 2908
+               OpMemberDecorate %type_View 166 Offset 2912
+               OpMemberDecorate %type_View 167 Offset 2924
+               OpMemberDecorate %type_View 168 Offset 2928
+               OpMemberDecorate %type_View 169 Offset 2932
+               OpDecorate %type_View Block
+               OpDecorate %_arr_mat4v4float_uint_6 ArrayStride 64
+               OpMemberDecorate %type__Globals 0 Offset 0
+               OpMemberDecorate %type__Globals 1 Offset 16
+               OpMemberDecorate %type__Globals 1 MatrixStride 16
+               OpMemberDecorate %type__Globals 1 ColMajor
+               OpMemberDecorate %type__Globals 2 Offset 400
+               OpMemberDecorate %type__Globals 3 Offset 404
+               OpMemberDecorate %type__Globals 4 Offset 408
+               OpMemberDecorate %type__Globals 5 Offset 416
+               OpMemberDecorate %type__Globals 6 Offset 432
+               OpMemberDecorate %type__Globals 7 Offset 448
+               OpDecorate %type__Globals Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+    %float_0 = OpConstant %float 0
+  %float_2_5 = OpConstant %float 2.5
+%float_2_37764096 = OpConstant %float 2.37764096
+%float_0_772542 = OpConstant %float 0.772542
+%float_1_46946299 = OpConstant %float 1.46946299
+%float_n2_02254295 = OpConstant %float -2.02254295
+%float_n1_46946299 = OpConstant %float -1.46946299
+%float_n2_022542 = OpConstant %float -2.022542
+%float_n2_37764096 = OpConstant %float -2.37764096
+%float_0_772543013 = OpConstant %float 0.772543013
+    %float_1 = OpConstant %float 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+      %int_3 = OpConstant %int 3
+      %int_7 = OpConstant %int 7
+     %int_58 = OpConstant %int 58
+     %int_24 = OpConstant %int 24
+     %int_11 = OpConstant %int 11
+      %int_5 = OpConstant %int 5
+  %float_0_5 = OpConstant %float 0.5
+      %int_4 = OpConstant %int 4
+      %int_2 = OpConstant %int 2
+         %62 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+       %bool = OpTypeBool
+     %uint_5 = OpConstant %uint 5
+         %65 = OpConstantComposite %v3float %float_0 %float_0 %float_1
+         %66 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+   %float_10 = OpConstant %float 10
+    %float_5 = OpConstant %float 5
+     %uint_0 = OpConstant %uint 0
+     %int_23 = OpConstant %int 23
+     %uint_1 = OpConstant %uint 1
+     %uint_3 = OpConstant %uint 3
+    %uint_16 = OpConstant %uint 16
+%float_0_150000006 = OpConstant %float 0.150000006
+ %float_0_25 = OpConstant %float 0.25
+    %float_2 = OpConstant %float 2
+         %77 = OpConstantComposite %v3float %float_2 %float_2 %float_2
+  %float_255 = OpConstant %float 255
+    %uint_15 = OpConstant %uint 15
+%uint_4294967280 = OpConstant %uint 4294967280
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+     %uint_6 = OpConstant %uint 6
+%_arr_mat4v4float_uint_6 = OpTypeArray %mat4v4float %uint_6
+%type__Globals = OpTypeStruct %v3float %_arr_mat4v4float_uint_6 %float %float %float %v4float %v2float %v4float
+%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+%type_cube_image = OpTypeImage %float Cube 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_cube_image = OpTypePointer UniformConstant %type_cube_image
+      %v2int = OpTypeVector %int 2
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %91 = OpTypeFunction %void
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%type_sampled_image = OpTypeSampledImage %type_cube_image
+      %v3int = OpTypeVector %int 3
+%type_sampled_image_0 = OpTypeSampledImage %type_2d_image
+     %v4bool = OpTypeVector %bool 4
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+%SceneTexturesStruct_SceneDepthTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%SceneTexturesStruct_SceneDepthTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%SceneTexturesStruct_GBufferATexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%SceneTexturesStruct_GBufferBTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%SceneTexturesStruct_GBufferDTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%SceneTexturesStruct_GBufferATextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%SceneTexturesStruct_GBufferBTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%SceneTexturesStruct_GBufferDTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%ShadowDepthTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+   %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+%ShadowDepthCubeTexture = OpVariable %_ptr_UniformConstant_type_cube_image UniformConstant
+%ShadowDepthCubeTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%SSProfilesTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+%float_0_200000003 = OpConstant %float 0.200000003
+         %98 = OpConstantComposite %v3float %float_2_5 %float_2_5 %float_2_5
+         %99 = OpConstantComposite %v3float %float_2_37764096 %float_2_37764096 %float_2_37764096
+        %100 = OpConstantComposite %v3float %float_0_772542 %float_0_772542 %float_0_772542
+        %101 = OpConstantComposite %v3float %float_1_46946299 %float_1_46946299 %float_1_46946299
+        %102 = OpConstantComposite %v3float %float_n2_02254295 %float_n2_02254295 %float_n2_02254295
+        %103 = OpConstantComposite %v3float %float_n1_46946299 %float_n1_46946299 %float_n1_46946299
+        %104 = OpConstantComposite %v3float %float_n2_022542 %float_n2_022542 %float_n2_022542
+        %105 = OpConstantComposite %v3float %float_n2_37764096 %float_n2_37764096 %float_n2_37764096
+        %106 = OpConstantComposite %v3float %float_0_772543013 %float_0_772543013 %float_0_772543013
+        %107 = OpUndef %v4float
+%MainOnePassPointLightPS = OpFunction %void None %91
+        %108 = OpLabel
+        %109 = OpLoad %v4float %gl_FragCoord
+        %110 = OpVectorShuffle %v2float %109 %109 0 1
+        %111 = OpAccessChain %_ptr_Uniform_v4float %View %int_58
+        %112 = OpLoad %v4float %111
+        %113 = OpVectorShuffle %v2float %112 %112 2 3
+        %114 = OpFMul %v2float %110 %113
+        %115 = OpLoad %type_2d_image %SceneTexturesStruct_SceneDepthTexture
+        %116 = OpLoad %type_sampler %SceneTexturesStruct_SceneDepthTextureSampler
+        %117 = OpSampledImage %type_sampled_image_0 %115 %116
+        %118 = OpImageSampleExplicitLod %v4float %117 %114 Lod %float_0
+        %119 = OpCompositeExtract %float %118 0
+        %120 = OpAccessChain %_ptr_Uniform_float %View %int_23 %uint_0
+        %121 = OpLoad %float %120
+        %122 = OpFMul %float %119 %121
+        %123 = OpAccessChain %_ptr_Uniform_float %View %int_23 %uint_1
+        %124 = OpLoad %float %123
+        %125 = OpFAdd %float %122 %124
+        %126 = OpAccessChain %_ptr_Uniform_float %View %int_23 %uint_2
+        %127 = OpLoad %float %126
+        %128 = OpFMul %float %119 %127
+        %129 = OpAccessChain %_ptr_Uniform_float %View %int_23 %uint_3
+        %130 = OpLoad %float %129
+        %131 = OpFSub %float %128 %130
+        %132 = OpFDiv %float %float_1 %131
+        %133 = OpFAdd %float %125 %132
+        %134 = OpAccessChain %_ptr_Uniform_v4float %View %int_24
+        %135 = OpLoad %v4float %134
+        %136 = OpVectorShuffle %v2float %135 %135 3 2
+        %137 = OpFSub %v2float %114 %136
+        %138 = OpVectorShuffle %v2float %135 %135 0 1
+        %139 = OpFDiv %v2float %137 %138
+        %140 = OpCompositeConstruct %v2float %133 %133
+        %141 = OpFMul %v2float %139 %140
+        %142 = OpCompositeExtract %float %141 0
+        %143 = OpCompositeExtract %float %141 1
+        %144 = OpCompositeConstruct %v4float %142 %143 %133 %float_1
+        %145 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_11
+        %146 = OpLoad %mat4v4float %145
+        %147 = OpMatrixTimesVector %v4float %146 %144
+        %148 = OpVectorShuffle %v3float %147 %147 0 1 2
+        %149 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_5
+        %150 = OpLoad %v4float %149
+        %151 = OpVectorShuffle %v3float %150 %150 0 1 2
+        %152 = OpFSub %v3float %151 %148
+        %153 = OpAccessChain %_ptr_Uniform_float %_Globals %int_5 %int_3
+        %154 = OpLoad %float %153
+        %155 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_7
+        %156 = OpAccessChain %_ptr_Uniform_float %_Globals %int_7 %int_0
+        %157 = OpLoad %float %156
+        %158 = OpExtInst %float %1 Length %152
+        %159 = OpFMul %float %158 %154
+        %160 = OpFOrdLessThan %bool %159 %float_1
+               OpSelectionMerge %161 DontFlatten
+               OpBranchConditional %160 %162 %161
+        %162 = OpLabel
+        %163 = OpCompositeConstruct %v3float %158 %158 %158
+        %164 = OpFDiv %v3float %152 %163
+        %165 = OpExtInst %v3float %1 FAbs %152
+        %166 = OpCompositeExtract %float %165 0
+        %167 = OpCompositeExtract %float %165 1
+        %168 = OpCompositeExtract %float %165 2
+        %169 = OpExtInst %float %1 FMax %167 %168
+        %170 = OpExtInst %float %1 FMax %166 %169
+        %171 = OpFOrdEqual %bool %170 %166
+               OpSelectionMerge %172 None
+               OpBranchConditional %171 %173 %174
+        %174 = OpLabel
+        %175 = OpFOrdEqual %bool %170 %167
+               OpSelectionMerge %176 None
+               OpBranchConditional %175 %177 %178
+        %178 = OpLabel
+        %179 = OpCompositeExtract %float %152 2
+        %180 = OpFOrdEqual %bool %168 %179
+        %181 = OpSelect %int %180 %int_4 %int_5
+               OpBranch %176
+        %177 = OpLabel
+        %182 = OpCompositeExtract %float %152 1
+        %183 = OpFOrdEqual %bool %167 %182
+        %184 = OpSelect %int %183 %int_2 %int_3
+               OpBranch %176
+        %176 = OpLabel
+        %185 = OpPhi %int %184 %177 %181 %178
+               OpBranch %172
+        %173 = OpLabel
+        %186 = OpCompositeExtract %float %152 0
+        %187 = OpFOrdEqual %bool %166 %186
+        %188 = OpSelect %int %187 %int_0 %int_1
+               OpBranch %172
+        %172 = OpLabel
+        %189 = OpPhi %int %188 %173 %185 %176
+        %190 = OpCompositeExtract %float %147 0
+        %191 = OpCompositeExtract %float %147 1
+        %192 = OpCompositeExtract %float %147 2
+        %193 = OpCompositeConstruct %v4float %190 %191 %192 %float_1
+        %194 = OpAccessChain %_ptr_Uniform_mat4v4float %_Globals %int_1 %189
+        %195 = OpLoad %mat4v4float %194
+        %196 = OpMatrixTimesVector %v4float %195 %193
+        %197 = OpCompositeExtract %float %196 2
+        %198 = OpCompositeExtract %float %196 3
+        %199 = OpFDiv %float %197 %198
+        %200 = OpFNegate %float %157
+        %201 = OpFDiv %float %200 %198
+        %202 = OpLoad %type_cube_image %ShadowDepthCubeTexture
+        %203 = OpLoad %type_sampler %ShadowDepthCubeTextureSampler
+        %204 = OpFAdd %float %199 %201
+        %205 = OpSampledImage %type_sampled_image %202 %203
+        %206 = OpImageSampleDrefExplicitLod %float %205 %164 %204 Lod %float_0
+               OpBranch %161
+        %161 = OpLabel
+        %207 = OpPhi %float %float_1 %108 %206 %172
+        %208 = OpFSub %float %207 %float_0_5
+        %209 = OpAccessChain %_ptr_Uniform_float %_Globals %int_4
+        %210 = OpLoad %float %209
+        %211 = OpFMul %float %208 %210
+        %212 = OpFAdd %float %211 %float_0_5
+        %213 = OpExtInst %float %1 FClamp %212 %float_0 %float_1
+        %214 = OpFMul %float %213 %213
+        %215 = OpAccessChain %_ptr_Uniform_float %_Globals %int_3
+        %216 = OpLoad %float %215
+        %217 = OpExtInst %float %1 FMix %float_1 %214 %216
+        %218 = OpExtInst %float %1 Sqrt %217
+        %219 = OpCompositeInsert %v4float %218 %107 2
+        %220 = OpVectorShuffle %v4float %219 %62 4 5 2 6
+        %221 = OpLoad %type_2d_image %SceneTexturesStruct_GBufferATexture
+        %222 = OpLoad %type_sampler %SceneTexturesStruct_GBufferATextureSampler
+        %223 = OpSampledImage %type_sampled_image_0 %221 %222
+        %224 = OpImageSampleExplicitLod %v4float %223 %114 Lod %float_0
+        %225 = OpLoad %type_2d_image %SceneTexturesStruct_GBufferBTexture
+        %226 = OpLoad %type_sampler %SceneTexturesStruct_GBufferBTextureSampler
+        %227 = OpSampledImage %type_sampled_image_0 %225 %226
+        %228 = OpImageSampleExplicitLod %v4float %227 %114 Lod %float_0
+        %229 = OpLoad %type_2d_image %SceneTexturesStruct_GBufferDTexture
+        %230 = OpLoad %type_sampler %SceneTexturesStruct_GBufferDTextureSampler
+        %231 = OpSampledImage %type_sampled_image_0 %229 %230
+        %232 = OpImageSampleExplicitLod %v4float %231 %114 Lod %float_0
+        %233 = OpVectorShuffle %v3float %224 %224 0 1 2
+        %234 = OpFMul %v3float %233 %77
+        %235 = OpFSub %v3float %234 %62
+        %236 = OpExtInst %v3float %1 Normalize %235
+        %237 = OpCompositeExtract %float %228 3
+        %238 = OpFMul %float %237 %float_255
+        %239 = OpExtInst %float %1 Round %238
+        %240 = OpConvertFToU %uint %239
+        %241 = OpBitwiseAnd %uint %240 %uint_15
+        %242 = OpBitwiseAnd %uint %240 %uint_4294967280
+        %243 = OpBitwiseAnd %uint %242 %uint_16
+        %244 = OpINotEqual %bool %243 %uint_0
+        %245 = OpLogicalNot %bool %244
+        %246 = OpCompositeConstruct %v4bool %245 %245 %245 %245
+        %247 = OpSelect %v4float %246 %232 %66
+        %248 = OpIEqual %bool %241 %uint_5
+               OpSelectionMerge %249 None
+               OpBranchConditional %248 %250 %249
+        %250 = OpLabel
+        %251 = OpLoad %v4float %155
+        %252 = OpCompositeExtract %float %247 0
+        %253 = OpFMul %float %252 %float_255
+        %254 = OpFAdd %float %253 %float_0_5
+        %255 = OpConvertFToU %uint %254
+        %256 = OpBitcast %int %255
+        %257 = OpCompositeConstruct %v3int %int_1 %256 %int_0
+        %258 = OpVectorShuffle %v2int %257 %257 0 1
+        %259 = OpLoad %type_2d_image %SSProfilesTexture
+        %260 = OpImageFetch %v4float %259 %258 Lod %int_0
+        %261 = OpCompositeExtract %float %260 0
+        %262 = OpCompositeExtract %float %260 1
+        %263 = OpFMul %float %262 %float_0_5
+        %264 = OpCompositeConstruct %v3float %263 %263 %263
+        %265 = OpFMul %v3float %236 %264
+        %266 = OpFSub %v3float %148 %265
+        %267 = OpDot %float %152 %152
+        %268 = OpExtInst %float %1 InverseSqrt %267
+        %269 = OpCompositeConstruct %v3float %268 %268 %268
+        %270 = OpFMul %v3float %152 %269
+        %271 = OpFNegate %v3float %270
+        %272 = OpDot %float %271 %236
+        %273 = OpExtInst %float %1 FClamp %272 %float_0 %float_1
+        %274 = OpExtInst %float %1 Pow %273 %float_1
+               OpSelectionMerge %275 DontFlatten
+               OpBranchConditional %160 %276 %275
+        %276 = OpLabel
+        %277 = OpCompositeConstruct %v3float %158 %158 %158
+        %278 = OpFDiv %v3float %152 %277
+        %279 = OpExtInst %v3float %1 Cross %278 %65
+        %280 = OpExtInst %v3float %1 Normalize %279
+        %281 = OpExtInst %v3float %1 Cross %280 %278
+        %282 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2
+        %283 = OpLoad %float %282
+        %284 = OpCompositeConstruct %v3float %283 %283 %283
+        %285 = OpFMul %v3float %280 %284
+        %286 = OpFMul %v3float %281 %284
+        %287 = OpExtInst %v3float %1 FAbs %278
+        %288 = OpCompositeExtract %float %287 0
+        %289 = OpCompositeExtract %float %287 1
+        %290 = OpCompositeExtract %float %287 2
+        %291 = OpExtInst %float %1 FMax %289 %290
+        %292 = OpExtInst %float %1 FMax %288 %291
+        %293 = OpFOrdEqual %bool %292 %288
+               OpSelectionMerge %294 None
+               OpBranchConditional %293 %295 %296
+        %296 = OpLabel
+        %297 = OpFOrdEqual %bool %292 %289
+               OpSelectionMerge %298 None
+               OpBranchConditional %297 %299 %300
+        %300 = OpLabel
+        %301 = OpCompositeExtract %float %278 2
+        %302 = OpFOrdEqual %bool %290 %301
+        %303 = OpSelect %int %302 %int_4 %int_5
+               OpBranch %298
+        %299 = OpLabel
+        %304 = OpCompositeExtract %float %278 1
+        %305 = OpFOrdEqual %bool %289 %304
+        %306 = OpSelect %int %305 %int_2 %int_3
+               OpBranch %298
+        %298 = OpLabel
+        %307 = OpPhi %int %306 %299 %303 %300
+               OpBranch %294
+        %295 = OpLabel
+        %308 = OpCompositeExtract %float %278 0
+        %309 = OpFOrdEqual %bool %288 %308
+        %310 = OpSelect %int %309 %int_0 %int_1
+               OpBranch %294
+        %294 = OpLabel
+        %311 = OpPhi %int %310 %295 %307 %298
+        %312 = OpCompositeExtract %float %266 0
+        %313 = OpCompositeExtract %float %266 1
+        %314 = OpCompositeExtract %float %266 2
+        %315 = OpCompositeConstruct %v4float %312 %313 %314 %float_1
+        %316 = OpAccessChain %_ptr_Uniform_mat4v4float %_Globals %int_1 %311
+        %317 = OpLoad %mat4v4float %316
+        %318 = OpMatrixTimesVector %v4float %317 %315
+        %319 = OpCompositeExtract %float %318 2
+        %320 = OpCompositeExtract %float %318 3
+        %321 = OpFDiv %float %319 %320
+        %322 = OpFDiv %float %float_10 %154
+        %323 = OpFMul %float %261 %322
+        %324 = OpCompositeExtract %float %251 2
+        %325 = OpFMul %float %321 %324
+        %326 = OpCompositeExtract %float %251 3
+        %327 = OpFSub %float %325 %326
+        %328 = OpFDiv %float %float_1 %327
+        %329 = OpFMul %float %328 %154
+        %330 = OpFMul %v3float %286 %98
+        %331 = OpFAdd %v3float %278 %330
+        %332 = OpLoad %type_cube_image %ShadowDepthCubeTexture
+        %333 = OpLoad %type_sampler %ShadowDepthTextureSampler
+        %334 = OpSampledImage %type_sampled_image %332 %333
+        %335 = OpImageSampleExplicitLod %v4float %334 %331 Lod %float_0
+        %336 = OpCompositeExtract %float %335 0
+        %337 = OpFMul %float %336 %324
+        %338 = OpFSub %float %337 %326
+        %339 = OpFDiv %float %float_1 %338
+        %340 = OpFMul %float %339 %154
+        %341 = OpFSub %float %329 %340
+        %342 = OpFMul %float %341 %323
+        %343 = OpFOrdGreaterThan %bool %342 %float_0
+        %344 = OpFAdd %float %342 %263
+        %345 = OpFMul %float %342 %274
+        %346 = OpFAdd %float %345 %263
+        %347 = OpExtInst %float %1 FMax %float_0 %346
+        %348 = OpSelect %float %343 %344 %347
+        %349 = OpExtInst %float %1 FAbs %348
+        %350 = OpExtInst %float %1 FClamp %349 %float_0_150000006 %float_5
+        %351 = OpFAdd %float %350 %float_0_25
+        %352 = OpFMul %v3float %285 %99
+        %353 = OpFAdd %v3float %278 %352
+        %354 = OpFMul %v3float %286 %100
+        %355 = OpFAdd %v3float %353 %354
+        %356 = OpSampledImage %type_sampled_image %332 %333
+        %357 = OpImageSampleExplicitLod %v4float %356 %355 Lod %float_0
+        %358 = OpCompositeExtract %float %357 0
+        %359 = OpFMul %float %358 %324
+        %360 = OpFSub %float %359 %326
+        %361 = OpFDiv %float %float_1 %360
+        %362 = OpFMul %float %361 %154
+        %363 = OpFSub %float %329 %362
+        %364 = OpFMul %float %363 %323
+        %365 = OpFOrdGreaterThan %bool %364 %float_0
+        %366 = OpFAdd %float %364 %263
+        %367 = OpFMul %float %364 %274
+        %368 = OpFAdd %float %367 %263
+        %369 = OpExtInst %float %1 FMax %float_0 %368
+        %370 = OpSelect %float %365 %366 %369
+        %371 = OpExtInst %float %1 FAbs %370
+        %372 = OpExtInst %float %1 FClamp %371 %float_0_150000006 %float_5
+        %373 = OpFAdd %float %372 %float_0_25
+        %374 = OpFAdd %float %351 %373
+        %375 = OpFMul %v3float %285 %101
+        %376 = OpFAdd %v3float %278 %375
+        %377 = OpFMul %v3float %286 %102
+        %378 = OpFAdd %v3float %376 %377
+        %379 = OpSampledImage %type_sampled_image %332 %333
+        %380 = OpImageSampleExplicitLod %v4float %379 %378 Lod %float_0
+        %381 = OpCompositeExtract %float %380 0
+        %382 = OpFMul %float %381 %324
+        %383 = OpFSub %float %382 %326
+        %384 = OpFDiv %float %float_1 %383
+        %385 = OpFMul %float %384 %154
+        %386 = OpFSub %float %329 %385
+        %387 = OpFMul %float %386 %323
+        %388 = OpFOrdGreaterThan %bool %387 %float_0
+        %389 = OpFAdd %float %387 %263
+        %390 = OpFMul %float %387 %274
+        %391 = OpFAdd %float %390 %263
+        %392 = OpExtInst %float %1 FMax %float_0 %391
+        %393 = OpSelect %float %388 %389 %392
+        %394 = OpExtInst %float %1 FAbs %393
+        %395 = OpExtInst %float %1 FClamp %394 %float_0_150000006 %float_5
+        %396 = OpFAdd %float %395 %float_0_25
+        %397 = OpFAdd %float %374 %396
+        %398 = OpFMul %v3float %285 %103
+        %399 = OpFAdd %v3float %278 %398
+        %400 = OpFMul %v3float %286 %104
+        %401 = OpFAdd %v3float %399 %400
+        %402 = OpSampledImage %type_sampled_image %332 %333
+        %403 = OpImageSampleExplicitLod %v4float %402 %401 Lod %float_0
+        %404 = OpCompositeExtract %float %403 0
+        %405 = OpFMul %float %404 %324
+        %406 = OpFSub %float %405 %326
+        %407 = OpFDiv %float %float_1 %406
+        %408 = OpFMul %float %407 %154
+        %409 = OpFSub %float %329 %408
+        %410 = OpFMul %float %409 %323
+        %411 = OpFOrdGreaterThan %bool %410 %float_0
+        %412 = OpFAdd %float %410 %263
+        %413 = OpFMul %float %410 %274
+        %414 = OpFAdd %float %413 %263
+        %415 = OpExtInst %float %1 FMax %float_0 %414
+        %416 = OpSelect %float %411 %412 %415
+        %417 = OpExtInst %float %1 FAbs %416
+        %418 = OpExtInst %float %1 FClamp %417 %float_0_150000006 %float_5
+        %419 = OpFAdd %float %418 %float_0_25
+        %420 = OpFAdd %float %397 %419
+        %421 = OpFMul %v3float %285 %105
+        %422 = OpFAdd %v3float %278 %421
+        %423 = OpFMul %v3float %286 %106
+        %424 = OpFAdd %v3float %422 %423
+        %425 = OpSampledImage %type_sampled_image %332 %333
+        %426 = OpImageSampleExplicitLod %v4float %425 %424 Lod %float_0
+        %427 = OpCompositeExtract %float %426 0
+        %428 = OpFMul %float %427 %324
+        %429 = OpFSub %float %428 %326
+        %430 = OpFDiv %float %float_1 %429
+        %431 = OpFMul %float %430 %154
+        %432 = OpFSub %float %329 %431
+        %433 = OpFMul %float %432 %323
+        %434 = OpFOrdGreaterThan %bool %433 %float_0
+        %435 = OpFAdd %float %433 %263
+        %436 = OpFMul %float %433 %274
+        %437 = OpFAdd %float %436 %263
+        %438 = OpExtInst %float %1 FMax %float_0 %437
+        %439 = OpSelect %float %434 %435 %438
+        %440 = OpExtInst %float %1 FAbs %439
+        %441 = OpExtInst %float %1 FClamp %440 %float_0_150000006 %float_5
+        %442 = OpFAdd %float %441 %float_0_25
+        %443 = OpFAdd %float %420 %442
+        %444 = OpFMul %float %443 %float_0_200000003
+               OpBranch %275
+        %275 = OpLabel
+        %445 = OpPhi %float %float_1 %250 %444 %294
+        %446 = OpFMul %float %445 %float_0_200000003
+        %447 = OpFSub %float %float_1 %446
+               OpBranch %249
+        %249 = OpLabel
+        %448 = OpPhi %float %float_1 %161 %447 %275
+        %449 = OpExtInst %float %1 Sqrt %448
+        %450 = OpSelect %float %248 %449 %218
+        %451 = OpCompositeInsert %v4float %450 %220 3
+               OpStore %out_var_SV_Target0 %451
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/frag/global-constant-arrays.asm.frag b/shaders-ue4/asm/frag/global-constant-arrays.asm.frag
new file mode 100644
index 00000000000..47db9ebc512
--- /dev/null
+++ b/shaders-ue4/asm/frag/global-constant-arrays.asm.frag
@@ -0,0 +1,3556 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 3005
+; Schema: 0
+               OpCapability Shader
+               OpCapability Geometry
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %MainPS "main" %in_var_TEXCOORD0 %gl_FragCoord %gl_Layer %out_var_SV_Target0
+               OpExecutionMode %MainPS OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type__Globals "type.$Globals"
+               OpMemberName %type__Globals 0 "MappingPolynomial"
+               OpMemberName %type__Globals 1 "InverseGamma"
+               OpMemberName %type__Globals 2 "ColorMatrixR_ColorCurveCd1"
+               OpMemberName %type__Globals 3 "ColorMatrixG_ColorCurveCd3Cm3"
+               OpMemberName %type__Globals 4 "ColorMatrixB_ColorCurveCm2"
+               OpMemberName %type__Globals 5 "ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3"
+               OpMemberName %type__Globals 6 "ColorCurve_Ch1_Ch2"
+               OpMemberName %type__Globals 7 "ColorShadow_Luma"
+               OpMemberName %type__Globals 8 "ColorShadow_Tint1"
+               OpMemberName %type__Globals 9 "ColorShadow_Tint2"
+               OpMemberName %type__Globals 10 "FilmSlope"
+               OpMemberName %type__Globals 11 "FilmToe"
+               OpMemberName %type__Globals 12 "FilmShoulder"
+               OpMemberName %type__Globals 13 "FilmBlackClip"
+               OpMemberName %type__Globals 14 "FilmWhiteClip"
+               OpMemberName %type__Globals 15 "ColorScale"
+               OpMemberName %type__Globals 16 "OverlayColor"
+               OpMemberName %type__Globals 17 "WhiteTemp"
+               OpMemberName %type__Globals 18 "WhiteTint"
+               OpMemberName %type__Globals 19 "ColorSaturation"
+               OpMemberName %type__Globals 20 "ColorContrast"
+               OpMemberName %type__Globals 21 "ColorGamma"
+               OpMemberName %type__Globals 22 "ColorGain"
+               OpMemberName %type__Globals 23 "ColorOffset"
+               OpMemberName %type__Globals 24 "ColorSaturationShadows"
+               OpMemberName %type__Globals 25 "ColorContrastShadows"
+               OpMemberName %type__Globals 26 "ColorGammaShadows"
+               OpMemberName %type__Globals 27 "ColorGainShadows"
+               OpMemberName %type__Globals 28 "ColorOffsetShadows"
+               OpMemberName %type__Globals 29 "ColorSaturationMidtones"
+               OpMemberName %type__Globals 30 "ColorContrastMidtones"
+               OpMemberName %type__Globals 31 "ColorGammaMidtones"
+               OpMemberName %type__Globals 32 "ColorGainMidtones"
+               OpMemberName %type__Globals 33 "ColorOffsetMidtones"
+               OpMemberName %type__Globals 34 "ColorSaturationHighlights"
+               OpMemberName %type__Globals 35 "ColorContrastHighlights"
+               OpMemberName %type__Globals 36 "ColorGammaHighlights"
+               OpMemberName %type__Globals 37 "ColorGainHighlights"
+               OpMemberName %type__Globals 38 "ColorOffsetHighlights"
+               OpMemberName %type__Globals 39 "ColorCorrectionShadowsMax"
+               OpMemberName %type__Globals 40 "ColorCorrectionHighlightsMin"
+               OpMemberName %type__Globals 41 "OutputDevice"
+               OpMemberName %type__Globals 42 "OutputGamut"
+               OpMemberName %type__Globals 43 "BlueCorrection"
+               OpMemberName %type__Globals 44 "ExpandGamut"
+               OpName %_Globals "$Globals"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %MainPS "MainPS"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorate %in_var_TEXCOORD0 NoPerspective
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION"
+               OpDecorate %gl_Layer BuiltIn Layer
+               OpDecorateString %gl_Layer UserSemantic "SV_RenderTargetArrayIndex"
+               OpDecorate %gl_Layer Flat
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %in_var_TEXCOORD0 Location 0
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %_Globals DescriptorSet 0
+               OpDecorate %_Globals Binding 0
+               OpMemberDecorate %type__Globals 0 Offset 0
+               OpMemberDecorate %type__Globals 1 Offset 16
+               OpMemberDecorate %type__Globals 2 Offset 32
+               OpMemberDecorate %type__Globals 3 Offset 48
+               OpMemberDecorate %type__Globals 4 Offset 64
+               OpMemberDecorate %type__Globals 5 Offset 80
+               OpMemberDecorate %type__Globals 6 Offset 96
+               OpMemberDecorate %type__Globals 7 Offset 112
+               OpMemberDecorate %type__Globals 8 Offset 128
+               OpMemberDecorate %type__Globals 9 Offset 144
+               OpMemberDecorate %type__Globals 10 Offset 160
+               OpMemberDecorate %type__Globals 11 Offset 164
+               OpMemberDecorate %type__Globals 12 Offset 168
+               OpMemberDecorate %type__Globals 13 Offset 172
+               OpMemberDecorate %type__Globals 14 Offset 176
+               OpMemberDecorate %type__Globals 15 Offset 180
+               OpMemberDecorate %type__Globals 16 Offset 192
+               OpMemberDecorate %type__Globals 17 Offset 208
+               OpMemberDecorate %type__Globals 18 Offset 212
+               OpMemberDecorate %type__Globals 19 Offset 224
+               OpMemberDecorate %type__Globals 20 Offset 240
+               OpMemberDecorate %type__Globals 21 Offset 256
+               OpMemberDecorate %type__Globals 22 Offset 272
+               OpMemberDecorate %type__Globals 23 Offset 288
+               OpMemberDecorate %type__Globals 24 Offset 304
+               OpMemberDecorate %type__Globals 25 Offset 320
+               OpMemberDecorate %type__Globals 26 Offset 336
+               OpMemberDecorate %type__Globals 27 Offset 352
+               OpMemberDecorate %type__Globals 28 Offset 368
+               OpMemberDecorate %type__Globals 29 Offset 384
+               OpMemberDecorate %type__Globals 30 Offset 400
+               OpMemberDecorate %type__Globals 31 Offset 416
+               OpMemberDecorate %type__Globals 32 Offset 432
+               OpMemberDecorate %type__Globals 33 Offset 448
+               OpMemberDecorate %type__Globals 34 Offset 464
+               OpMemberDecorate %type__Globals 35 Offset 480
+               OpMemberDecorate %type__Globals 36 Offset 496
+               OpMemberDecorate %type__Globals 37 Offset 512
+               OpMemberDecorate %type__Globals 38 Offset 528
+               OpMemberDecorate %type__Globals 39 Offset 544
+               OpMemberDecorate %type__Globals 40 Offset 548
+               OpMemberDecorate %type__Globals 41 Offset 552
+               OpMemberDecorate %type__Globals 42 Offset 556
+               OpMemberDecorate %type__Globals 43 Offset 560
+               OpMemberDecorate %type__Globals 44 Offset 564
+               OpDecorate %type__Globals Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+%float_0_952552378 = OpConstant %float 0.952552378
+    %float_0 = OpConstant %float 0
+
+; HACK: Needed to hack this constant since MSVC and GNU libc are off by 1 ULP when converting to string (it probably still works fine though in a roundtrip ...)
+%float_9_36786018en05 = OpConstant %float 9.25
+
+%float_0_343966454 = OpConstant %float 0.343966454
+%float_0_728166103 = OpConstant %float 0.728166103
+%float_n0_0721325427 = OpConstant %float -0.0721325427
+%float_1_00882518 = OpConstant %float 1.00882518
+%float_1_04981101 = OpConstant %float 1.04981101
+%float_n9_74845025en05 = OpConstant %float -9.74845025e-05
+%float_n0_495903015 = OpConstant %float -0.495903015
+%float_1_37331307 = OpConstant %float 1.37331307
+%float_0_0982400328 = OpConstant %float 0.0982400328
+%float_0_991252005 = OpConstant %float 0.991252005
+%float_0_662454188 = OpConstant %float 0.662454188
+%float_0_134004205 = OpConstant %float 0.134004205
+%float_0_156187683 = OpConstant %float 0.156187683
+%float_0_272228718 = OpConstant %float 0.272228718
+%float_0_674081743 = OpConstant %float 0.674081743
+%float_0_0536895171 = OpConstant %float 0.0536895171
+%float_n0_00557464967 = OpConstant %float -0.00557464967
+%float_0_0040607336 = OpConstant %float 0.0040607336
+%float_1_01033914 = OpConstant %float 1.01033914
+%float_1_6410234 = OpConstant %float 1.6410234
+%float_n0_324803293 = OpConstant %float -0.324803293
+%float_n0_236424699 = OpConstant %float -0.236424699
+%float_n0_663662851 = OpConstant %float -0.663662851
+%float_1_61533165 = OpConstant %float 1.61533165
+%float_0_0167563483 = OpConstant %float 0.0167563483
+%float_0_0117218941 = OpConstant %float 0.0117218941
+%float_n0_00828444213 = OpConstant %float -0.00828444213
+%float_0_988394856 = OpConstant %float 0.988394856
+%float_1_45143926 = OpConstant %float 1.45143926
+%float_n0_236510754 = OpConstant %float -0.236510754
+%float_n0_214928567 = OpConstant %float -0.214928567
+%float_n0_0765537769 = OpConstant %float -0.0765537769
+%float_1_17622972 = OpConstant %float 1.17622972
+%float_n0_0996759236 = OpConstant %float -0.0996759236
+%float_0_00831614807 = OpConstant %float 0.00831614807
+%float_n0_00603244966 = OpConstant %float -0.00603244966
+%float_0_997716308 = OpConstant %float 0.997716308
+%float_0_695452213 = OpConstant %float 0.695452213
+%float_0_140678704 = OpConstant %float 0.140678704
+%float_0_163869068 = OpConstant %float 0.163869068
+%float_0_0447945632 = OpConstant %float 0.0447945632
+%float_0_859671116 = OpConstant %float 0.859671116
+%float_0_0955343172 = OpConstant %float 0.0955343172
+%float_n0_00552588282 = OpConstant %float -0.00552588282
+%float_0_00402521016 = OpConstant %float 0.00402521016
+%float_1_00150073 = OpConstant %float 1.00150073
+         %67 = OpConstantComposite %v3float %float_0_272228718 %float_0_674081743 %float_0_0536895171
+%float_3_2409699 = OpConstant %float 3.2409699
+%float_n1_5373832 = OpConstant %float -1.5373832
+%float_n0_498610765 = OpConstant %float -0.498610765
+%float_n0_969243646 = OpConstant %float -0.969243646
+%float_1_8759675 = OpConstant %float 1.8759675
+%float_0_0415550582 = OpConstant %float 0.0415550582
+%float_0_0556300804 = OpConstant %float 0.0556300804
+%float_n0_203976959 = OpConstant %float -0.203976959
+%float_1_05697155 = OpConstant %float 1.05697155
+%float_0_412456393 = OpConstant %float 0.412456393
+%float_0_357576102 = OpConstant %float 0.357576102
+%float_0_180437505 = OpConstant %float 0.180437505
+%float_0_212672904 = OpConstant %float 0.212672904
+%float_0_715152204 = OpConstant %float 0.715152204
+%float_0_0721750036 = OpConstant %float 0.0721750036
+%float_0_0193339009 = OpConstant %float 0.0193339009
+%float_0_119191997 = OpConstant %float 0.119191997
+%float_0_950304091 = OpConstant %float 0.950304091
+%float_1_71660841 = OpConstant %float 1.71660841
+%float_n0_355662107 = OpConstant %float -0.355662107
+%float_n0_253360093 = OpConstant %float -0.253360093
+%float_n0_666682899 = OpConstant %float -0.666682899
+%float_1_61647761 = OpConstant %float 1.61647761
+%float_0_0157685 = OpConstant %float 0.0157685
+%float_0_0176422 = OpConstant %float 0.0176422
+%float_n0_0427763015 = OpConstant %float -0.0427763015
+%float_0_942228675 = OpConstant %float 0.942228675
+%float_2_49339628 = OpConstant %float 2.49339628
+%float_n0_93134588 = OpConstant %float -0.93134588
+%float_n0_402694494 = OpConstant %float -0.402694494
+%float_n0_829486787 = OpConstant %float -0.829486787
+%float_1_76265967 = OpConstant %float 1.76265967
+%float_0_0236246008 = OpConstant %float 0.0236246008
+%float_0_0358507 = OpConstant %float 0.0358507
+%float_n0_0761827007 = OpConstant %float -0.0761827007
+%float_0_957014024 = OpConstant %float 0.957014024
+%float_1_01303005 = OpConstant %float 1.01303005
+%float_0_00610530982 = OpConstant %float 0.00610530982
+%float_n0_0149710001 = OpConstant %float -0.0149710001
+%float_0_00769822998 = OpConstant %float 0.00769822998
+%float_0_998165011 = OpConstant %float 0.998165011
+%float_n0_00503202993 = OpConstant %float -0.00503202993
+%float_n0_00284131011 = OpConstant %float -0.00284131011
+%float_0_00468515977 = OpConstant %float 0.00468515977
+%float_0_924507022 = OpConstant %float 0.924507022
+%float_0_987223983 = OpConstant %float 0.987223983
+%float_n0_00611326983 = OpConstant %float -0.00611326983
+%float_0_0159533005 = OpConstant %float 0.0159533005
+%float_n0_00759836007 = OpConstant %float -0.00759836007
+%float_1_00186002 = OpConstant %float 1.00186002
+%float_0_0053300201 = OpConstant %float 0.0053300201
+%float_0_00307257008 = OpConstant %float 0.00307257008
+%float_n0_00509594986 = OpConstant %float -0.00509594986
+%float_1_08168006 = OpConstant %float 1.08168006
+  %float_0_5 = OpConstant %float 0.5
+   %float_n1 = OpConstant %float -1
+    %float_1 = OpConstant %float 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+%float_0_015625 = OpConstant %float 0.015625
+        %128 = OpConstantComposite %v2float %float_0_015625 %float_0_015625
+        %129 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+     %int_42 = OpConstant %int 42
+     %uint_3 = OpConstant %uint 3
+        %132 = OpConstantComposite %v3float %float_0 %float_0 %float_0
+      %int_9 = OpConstant %int 9
+      %int_3 = OpConstant %int 3
+        %135 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+   %float_n4 = OpConstant %float -4
+     %int_44 = OpConstant %int 44
+%float_0_544169128 = OpConstant %float 0.544169128
+%float_0_239592597 = OpConstant %float 0.239592597
+%float_0_166694298 = OpConstant %float 0.166694298
+%float_0_239465594 = OpConstant %float 0.239465594
+%float_0_702153027 = OpConstant %float 0.702153027
+%float_0_058381401 = OpConstant %float 0.058381401
+%float_n0_00234390004 = OpConstant %float -0.00234390004
+%float_0_0361833982 = OpConstant %float 0.0361833982
+%float_1_05521834 = OpConstant %float 1.05521834
+%float_0_940437257 = OpConstant %float 0.940437257
+%float_n0_0183068793 = OpConstant %float -0.0183068793
+%float_0_077869609 = OpConstant %float 0.077869609
+%float_0_00837869663 = OpConstant %float 0.00837869663
+%float_0_828660011 = OpConstant %float 0.828660011
+%float_0_162961304 = OpConstant %float 0.162961304
+%float_0_00054712611 = OpConstant %float 0.00054712611
+%float_n0_000883374596 = OpConstant %float -0.000883374596
+%float_1_00033629 = OpConstant %float 1.00033629
+%float_1_06317997 = OpConstant %float 1.06317997
+%float_0_0233955998 = OpConstant %float 0.0233955998
+%float_n0_0865726024 = OpConstant %float -0.0865726024
+%float_n0_0106336996 = OpConstant %float -0.0106336996
+%float_1_20632005 = OpConstant %float 1.20632005
+%float_n0_195690006 = OpConstant %float -0.195690006
+%float_n0_000590886979 = OpConstant %float -0.000590886979
+%float_0_00105247996 = OpConstant %float 0.00105247996
+%float_0_999538004 = OpConstant %float 0.999538004
+     %int_43 = OpConstant %int 43
+     %int_15 = OpConstant %int 15
+     %int_16 = OpConstant %int 16
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_5 = OpConstant %uint 5
+     %uint_6 = OpConstant %uint 6
+      %int_2 = OpConstant %int 2
+%mat3v3float = OpTypeMatrix %v3float 3
+     %int_41 = OpConstant %int 41
+%float_0_159301758 = OpConstant %float 0.159301758
+%float_78_84375 = OpConstant %float 78.84375
+%float_0_8359375 = OpConstant %float 0.8359375
+%float_18_8515625 = OpConstant %float 18.8515625
+%float_18_6875 = OpConstant %float 18.6875
+%float_10000 = OpConstant %float 10000
+%float_0_0126833133 = OpConstant %float 0.0126833133
+        %182 = OpConstantComposite %v3float %float_0_0126833133 %float_0_0126833133 %float_0_0126833133
+        %183 = OpConstantComposite %v3float %float_0_8359375 %float_0_8359375 %float_0_8359375
+        %184 = OpConstantComposite %v3float %float_18_8515625 %float_18_8515625 %float_18_8515625
+        %185 = OpConstantComposite %v3float %float_18_6875 %float_18_6875 %float_18_6875
+%float_6_27739477 = OpConstant %float 6.27739477
+        %187 = OpConstantComposite %v3float %float_6_27739477 %float_6_27739477 %float_6_27739477
+        %188 = OpConstantComposite %v3float %float_10000 %float_10000 %float_10000
+   %float_14 = OpConstant %float 14
+%float_0_180000007 = OpConstant %float 0.180000007
+%float_0_434017599 = OpConstant %float 0.434017599
+        %192 = OpConstantComposite %v3float %float_0_434017599 %float_0_434017599 %float_0_434017599
+        %193 = OpConstantComposite %v3float %float_14 %float_14 %float_14
+        %194 = OpConstantComposite %v3float %float_0_180000007 %float_0_180000007 %float_0_180000007
+     %int_17 = OpConstant %int 17
+ %float_4000 = OpConstant %float 4000
+%float_0_312700003 = OpConstant %float 0.312700003
+%float_0_328999996 = OpConstant %float 0.328999996
+     %int_18 = OpConstant %int 18
+     %int_24 = OpConstant %int 24
+     %int_19 = OpConstant %int 19
+     %int_25 = OpConstant %int 25
+     %int_20 = OpConstant %int 20
+     %int_26 = OpConstant %int 26
+     %int_21 = OpConstant %int 21
+     %int_27 = OpConstant %int 27
+     %int_22 = OpConstant %int 22
+     %int_28 = OpConstant %int 28
+     %int_23 = OpConstant %int 23
+     %int_39 = OpConstant %int 39
+     %int_34 = OpConstant %int 34
+     %int_35 = OpConstant %int 35
+     %int_36 = OpConstant %int 36
+     %int_37 = OpConstant %int 37
+     %int_38 = OpConstant %int 38
+     %int_40 = OpConstant %int 40
+     %int_29 = OpConstant %int 29
+     %int_30 = OpConstant %int 30
+     %int_31 = OpConstant %int 31
+     %int_32 = OpConstant %int 32
+     %int_33 = OpConstant %int 33
+%float_0_0500000007 = OpConstant %float 0.0500000007
+ %float_1_75 = OpConstant %float 1.75
+%float_0_400000006 = OpConstant %float 0.400000006
+%float_0_0299999993 = OpConstant %float 0.0299999993
+    %float_2 = OpConstant %float 2
+%float_0_959999979 = OpConstant %float 0.959999979
+        %228 = OpConstantComposite %v3float %float_0_959999979 %float_0_959999979 %float_0_959999979
+     %int_13 = OpConstant %int 13
+     %int_11 = OpConstant %int 11
+     %int_14 = OpConstant %int 14
+     %int_12 = OpConstant %int 12
+%float_0_800000012 = OpConstant %float 0.800000012
+     %int_10 = OpConstant %int 10
+   %float_10 = OpConstant %float 10
+   %float_n2 = OpConstant %float -2
+    %float_3 = OpConstant %float 3
+        %238 = OpConstantComposite %v3float %float_3 %float_3 %float_3
+        %239 = OpConstantComposite %v3float %float_2 %float_2 %float_2
+%float_0_930000007 = OpConstant %float 0.930000007
+        %241 = OpConstantComposite %v3float %float_0_930000007 %float_0_930000007 %float_0_930000007
+      %int_4 = OpConstant %int 4
+      %int_8 = OpConstant %int 8
+      %int_7 = OpConstant %int 7
+      %int_5 = OpConstant %int 5
+      %int_6 = OpConstant %int 6
+%float_0_00200000009 = OpConstant %float 0.00200000009
+        %248 = OpConstantComposite %v3float %float_0_00200000009 %float_0_00200000009 %float_0_00200000009
+%float_6_10351999en05 = OpConstant %float 6.10351999e-05
+        %250 = OpConstantComposite %v3float %float_6_10351999en05 %float_6_10351999en05 %float_6_10351999en05
+  %float_4_5 = OpConstant %float 4.5
+        %252 = OpConstantComposite %v3float %float_4_5 %float_4_5 %float_4_5
+%float_0_0179999992 = OpConstant %float 0.0179999992
+        %254 = OpConstantComposite %v3float %float_0_0179999992 %float_0_0179999992 %float_0_0179999992
+%float_0_449999988 = OpConstant %float 0.449999988
+        %256 = OpConstantComposite %v3float %float_0_449999988 %float_0_449999988 %float_0_449999988
+%float_1_09899998 = OpConstant %float 1.09899998
+        %258 = OpConstantComposite %v3float %float_1_09899998 %float_1_09899998 %float_1_09899998
+%float_0_0989999995 = OpConstant %float 0.0989999995
+        %260 = OpConstantComposite %v3float %float_0_0989999995 %float_0_0989999995 %float_0_0989999995
+  %float_1_5 = OpConstant %float 1.5
+        %262 = OpConstantComposite %v3float %float_1_5 %float_1_5 %float_1_5
+        %263 = OpConstantComposite %v3float %float_0_159301758 %float_0_159301758 %float_0_159301758
+        %264 = OpConstantComposite %v3float %float_78_84375 %float_78_84375 %float_78_84375
+%float_1_00055635 = OpConstant %float 1.00055635
+ %float_7000 = OpConstant %float 7000
+%float_0_244063005 = OpConstant %float 0.244063005
+%float_99_1100006 = OpConstant %float 99.1100006
+%float_2967800 = OpConstant %float 2967800
+%float_0_237039998 = OpConstant %float 0.237039998
+%float_247_479996 = OpConstant %float 247.479996
+%float_1901800 = OpConstant %float 1901800
+   %float_n3 = OpConstant %float -3
+%float_2_86999989 = OpConstant %float 2.86999989
+%float_0_275000006 = OpConstant %float 0.275000006
+%float_0_860117733 = OpConstant %float 0.860117733
+%float_0_000154118257 = OpConstant %float 0.000154118257
+%float_1_28641219en07 = OpConstant %float 1.28641219e-07
+%float_0_00084242021 = OpConstant %float 0.00084242021
+%float_7_08145137en07 = OpConstant %float 7.08145137e-07
+%float_0_317398727 = OpConstant %float 0.317398727
+
+; HACK: Needed to hack this constant since MSVC and GNU libc are off by 1 ULP when converting to string (it probably still works fine though in a roundtrip ...)
+%float_4_22806261en05 = OpConstant %float 4.25
+
+%float_4_20481676en08 = OpConstant %float 4.20481676e-08
+%float_2_8974182en05 = OpConstant %float 2.8974182e-05
+%float_1_61456057en07 = OpConstant %float 1.61456057e-07
+    %float_8 = OpConstant %float 8
+    %float_4 = OpConstant %float 4
+%float_0_895099998 = OpConstant %float 0.895099998
+%float_0_266400009 = OpConstant %float 0.266400009
+%float_n0_161400005 = OpConstant %float -0.161400005
+%float_n0_750199974 = OpConstant %float -0.750199974
+%float_1_71350002 = OpConstant %float 1.71350002
+%float_0_0366999991 = OpConstant %float 0.0366999991
+%float_0_0388999991 = OpConstant %float 0.0388999991
+%float_n0_0684999973 = OpConstant %float -0.0684999973
+%float_1_02960002 = OpConstant %float 1.02960002
+%float_0_986992896 = OpConstant %float 0.986992896
+%float_n0_1470543 = OpConstant %float -0.1470543
+%float_0_159962699 = OpConstant %float 0.159962699
+%float_0_432305306 = OpConstant %float 0.432305306
+%float_0_518360317 = OpConstant %float 0.518360317
+%float_0_0492912009 = OpConstant %float 0.0492912009
+%float_n0_0085287001 = OpConstant %float -0.0085287001
+%float_0_040042799 = OpConstant %float 0.040042799
+%float_0_968486726 = OpConstant %float 0.968486726
+%float_5_55555534 = OpConstant %float 5.55555534
+        %307 = OpConstantComposite %v3float %float_5_55555534 %float_5_55555534 %float_5_55555534
+%float_1_00000001en10 = OpConstant %float 1.00000001e-10
+%float_0_00999999978 = OpConstant %float 0.00999999978
+%float_0_666666687 = OpConstant %float 0.666666687
+  %float_180 = OpConstant %float 180
+  %float_360 = OpConstant %float 360
+%float_65535 = OpConstant %float 65535
+        %314 = OpConstantComposite %v3float %float_65535 %float_65535 %float_65535
+%float_n4_97062206 = OpConstant %float -4.97062206
+%float_n3_02937818 = OpConstant %float -3.02937818
+%float_n2_12619996 = OpConstant %float -2.12619996
+%float_n1_51049995 = OpConstant %float -1.51049995
+%float_n1_05780005 = OpConstant %float -1.05780005
+%float_n0_466800004 = OpConstant %float -0.466800004
+%float_0_119379997 = OpConstant %float 0.119379997
+%float_0_708813429 = OpConstant %float 0.708813429
+%float_1_29118657 = OpConstant %float 1.29118657
+%float_0_808913231 = OpConstant %float 0.808913231
+%float_1_19108677 = OpConstant %float 1.19108677
+%float_1_56830001 = OpConstant %float 1.56830001
+%float_1_9483 = OpConstant %float 1.9483
+%float_2_30830002 = OpConstant %float 2.30830002
+%float_2_63840008 = OpConstant %float 2.63840008
+%float_2_85949993 = OpConstant %float 2.85949993
+%float_2_98726082 = OpConstant %float 2.98726082
+%float_3_01273918 = OpConstant %float 3.01273918
+%float_0_179999992 = OpConstant %float 0.179999992
+%float_9_99999975en05 = OpConstant %float 9.99999975e-05
+ %float_1000 = OpConstant %float 1000
+%float_0_0599999987 = OpConstant %float 0.0599999987
+%float_3_50738446en05 = OpConstant %float 3.50738446e-05
+        %338 = OpConstantComposite %v3float %float_3_50738446en05 %float_3_50738446en05 %float_3_50738446en05
+%float_n2_30102992 = OpConstant %float -2.30102992
+%float_n1_93120003 = OpConstant %float -1.93120003
+%float_n1_52049994 = OpConstant %float -1.52049994
+%float_0_801995218 = OpConstant %float 0.801995218
+%float_1_19800484 = OpConstant %float 1.19800484
+%float_1_59430003 = OpConstant %float 1.59430003
+%float_1_99730003 = OpConstant %float 1.99730003
+%float_2_37829995 = OpConstant %float 2.37829995
+%float_2_76839995 = OpConstant %float 2.76839995
+%float_3_05150008 = OpConstant %float 3.05150008
+%float_3_27462935 = OpConstant %float 3.27462935
+%float_3_32743073 = OpConstant %float 3.32743073
+%float_0_00499999989 = OpConstant %float 0.00499999989
+   %float_11 = OpConstant %float 11
+ %float_2000 = OpConstant %float 2000
+%float_0_119999997 = OpConstant %float 0.119999997
+%float_0_00313066994 = OpConstant %float 0.00313066994
+%float_12_9200001 = OpConstant %float 12.9200001
+%float_0_416666657 = OpConstant %float 0.416666657
+%float_1_05499995 = OpConstant %float 1.05499995
+%float_0_0549999997 = OpConstant %float 0.0549999997
+%float_n0_166666672 = OpConstant %float -0.166666672
+ %float_n0_5 = OpConstant %float -0.5
+%float_0_166666672 = OpConstant %float 0.166666672
+%float_n3_15737653 = OpConstant %float -3.15737653
+%float_n0_485249996 = OpConstant %float -0.485249996
+%float_1_84773242 = OpConstant %float 1.84773242
+%float_n0_718548238 = OpConstant %float -0.718548238
+%float_2_08103061 = OpConstant %float 2.08103061
+%float_3_6681242 = OpConstant %float 3.6681242
+   %float_18 = OpConstant %float 18
+    %float_7 = OpConstant %float 7
+%type__Globals = OpTypeStruct %v4float %v3float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %float %float %float %float %float %v3float %v4float %float %float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %float %float %uint %uint %float %float
+%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+        %377 = OpTypeFunction %void
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+       %bool = OpTypeBool
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+     %v2bool = OpTypeVector %bool 2
+     %v3bool = OpTypeVector %bool 3
+    %uint_10 = OpConstant %uint 10
+%_arr_float_uint_10 = OpTypeArray %float %uint_10
+%_arr_float_uint_6 = OpTypeArray %float %uint_6
+   %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input_v2float Input
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+   %gl_Layer = OpVariable %_ptr_Input_uint Input
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+%_ptr_Function__arr_float_uint_6 = OpTypePointer Function %_arr_float_uint_6
+%_ptr_Function__arr_float_uint_10 = OpTypePointer Function %_arr_float_uint_10
+        %391 = OpUndef %v3float
+        %392 = OpConstantComposite %v3float %float_0_952552378 %float_0 %float_9_36786018en05
+        %393 = OpConstantComposite %v3float %float_0_343966454 %float_0_728166103 %float_n0_0721325427
+        %394 = OpConstantComposite %v3float %float_0 %float_0 %float_1_00882518
+        %395 = OpConstantComposite %mat3v3float %392 %393 %394
+        %396 = OpConstantComposite %v3float %float_1_04981101 %float_0 %float_n9_74845025en05
+        %397 = OpConstantComposite %v3float %float_n0_495903015 %float_1_37331307 %float_0_0982400328
+        %398 = OpConstantComposite %v3float %float_0 %float_0 %float_0_991252005
+        %399 = OpConstantComposite %mat3v3float %396 %397 %398
+        %400 = OpConstantComposite %v3float %float_0_662454188 %float_0_134004205 %float_0_156187683
+        %401 = OpConstantComposite %v3float %float_n0_00557464967 %float_0_0040607336 %float_1_01033914
+        %402 = OpConstantComposite %mat3v3float %400 %67 %401
+        %403 = OpConstantComposite %v3float %float_1_6410234 %float_n0_324803293 %float_n0_236424699
+        %404 = OpConstantComposite %v3float %float_n0_663662851 %float_1_61533165 %float_0_0167563483
+        %405 = OpConstantComposite %v3float %float_0_0117218941 %float_n0_00828444213 %float_0_988394856
+        %406 = OpConstantComposite %mat3v3float %403 %404 %405
+        %407 = OpConstantComposite %v3float %float_1_45143926 %float_n0_236510754 %float_n0_214928567
+        %408 = OpConstantComposite %v3float %float_n0_0765537769 %float_1_17622972 %float_n0_0996759236
+        %409 = OpConstantComposite %v3float %float_0_00831614807 %float_n0_00603244966 %float_0_997716308
+        %410 = OpConstantComposite %mat3v3float %407 %408 %409
+        %411 = OpConstantComposite %v3float %float_0_695452213 %float_0_140678704 %float_0_163869068
+        %412 = OpConstantComposite %v3float %float_0_0447945632 %float_0_859671116 %float_0_0955343172
+        %413 = OpConstantComposite %v3float %float_n0_00552588282 %float_0_00402521016 %float_1_00150073
+        %414 = OpConstantComposite %mat3v3float %411 %412 %413
+        %415 = OpConstantComposite %v3float %float_3_2409699 %float_n1_5373832 %float_n0_498610765
+        %416 = OpConstantComposite %v3float %float_n0_969243646 %float_1_8759675 %float_0_0415550582
+        %417 = OpConstantComposite %v3float %float_0_0556300804 %float_n0_203976959 %float_1_05697155
+        %418 = OpConstantComposite %mat3v3float %415 %416 %417
+        %419 = OpConstantComposite %v3float %float_0_412456393 %float_0_357576102 %float_0_180437505
+        %420 = OpConstantComposite %v3float %float_0_212672904 %float_0_715152204 %float_0_0721750036
+        %421 = OpConstantComposite %v3float %float_0_0193339009 %float_0_119191997 %float_0_950304091
+        %422 = OpConstantComposite %mat3v3float %419 %420 %421
+        %423 = OpConstantComposite %v3float %float_1_71660841 %float_n0_355662107 %float_n0_253360093
+        %424 = OpConstantComposite %v3float %float_n0_666682899 %float_1_61647761 %float_0_0157685
+        %425 = OpConstantComposite %v3float %float_0_0176422 %float_n0_0427763015 %float_0_942228675
+        %426 = OpConstantComposite %mat3v3float %423 %424 %425
+        %427 = OpConstantComposite %v3float %float_2_49339628 %float_n0_93134588 %float_n0_402694494
+        %428 = OpConstantComposite %v3float %float_n0_829486787 %float_1_76265967 %float_0_0236246008
+        %429 = OpConstantComposite %v3float %float_0_0358507 %float_n0_0761827007 %float_0_957014024
+        %430 = OpConstantComposite %mat3v3float %427 %428 %429
+        %431 = OpConstantComposite %v3float %float_1_01303005 %float_0_00610530982 %float_n0_0149710001
+        %432 = OpConstantComposite %v3float %float_0_00769822998 %float_0_998165011 %float_n0_00503202993
+        %433 = OpConstantComposite %v3float %float_n0_00284131011 %float_0_00468515977 %float_0_924507022
+        %434 = OpConstantComposite %mat3v3float %431 %432 %433
+        %435 = OpConstantComposite %v3float %float_0_987223983 %float_n0_00611326983 %float_0_0159533005
+        %436 = OpConstantComposite %v3float %float_n0_00759836007 %float_1_00186002 %float_0_0053300201
+        %437 = OpConstantComposite %v3float %float_0_00307257008 %float_n0_00509594986 %float_1_08168006
+        %438 = OpConstantComposite %mat3v3float %435 %436 %437
+        %439 = OpConstantComposite %v3float %float_0_5 %float_n1 %float_0_5
+        %440 = OpConstantComposite %v3float %float_n1 %float_1 %float_0_5
+        %441 = OpConstantComposite %v3float %float_0_5 %float_0 %float_0
+        %442 = OpConstantComposite %mat3v3float %439 %440 %441
+        %443 = OpConstantComposite %v3float %float_1 %float_0 %float_0
+        %444 = OpConstantComposite %v3float %float_0 %float_1 %float_0
+        %445 = OpConstantComposite %v3float %float_0 %float_0 %float_1
+        %446 = OpConstantComposite %mat3v3float %443 %444 %445
+%float_n6_07624626 = OpConstant %float -6.07624626
+        %448 = OpConstantComposite %v3float %float_n6_07624626 %float_n6_07624626 %float_n6_07624626
+        %449 = OpConstantComposite %v3float %float_0_895099998 %float_0_266400009 %float_n0_161400005
+        %450 = OpConstantComposite %v3float %float_n0_750199974 %float_1_71350002 %float_0_0366999991
+        %451 = OpConstantComposite %v3float %float_0_0388999991 %float_n0_0684999973 %float_1_02960002
+        %452 = OpConstantComposite %mat3v3float %449 %450 %451
+        %453 = OpConstantComposite %v3float %float_0_986992896 %float_n0_1470543 %float_0_159962699
+        %454 = OpConstantComposite %v3float %float_0_432305306 %float_0_518360317 %float_0_0492912009
+        %455 = OpConstantComposite %v3float %float_n0_0085287001 %float_0_040042799 %float_0_968486726
+        %456 = OpConstantComposite %mat3v3float %453 %454 %455
+%float_0_358299971 = OpConstant %float 0.358299971
+        %458 = OpConstantComposite %v3float %float_0_544169128 %float_0_239592597 %float_0_166694298
+        %459 = OpConstantComposite %v3float %float_0_239465594 %float_0_702153027 %float_0_058381401
+        %460 = OpConstantComposite %v3float %float_n0_00234390004 %float_0_0361833982 %float_1_05521834
+        %461 = OpConstantComposite %mat3v3float %458 %459 %460
+        %462 = OpConstantComposite %v3float %float_0_940437257 %float_n0_0183068793 %float_0_077869609
+        %463 = OpConstantComposite %v3float %float_0_00837869663 %float_0_828660011 %float_0_162961304
+        %464 = OpConstantComposite %v3float %float_0_00054712611 %float_n0_000883374596 %float_1_00033629
+        %465 = OpConstantComposite %mat3v3float %462 %463 %464
+        %466 = OpConstantComposite %v3float %float_1_06317997 %float_0_0233955998 %float_n0_0865726024
+        %467 = OpConstantComposite %v3float %float_n0_0106336996 %float_1_20632005 %float_n0_195690006
+        %468 = OpConstantComposite %v3float %float_n0_000590886979 %float_0_00105247996 %float_0_999538004
+        %469 = OpConstantComposite %mat3v3float %466 %467 %468
+%float_0_0533333346 = OpConstant %float 0.0533333346
+%float_0_159999996 = OpConstant %float 0.159999996
+%float_57_2957764 = OpConstant %float 57.2957764
+%float_n67_5 = OpConstant %float -67.5
+ %float_67_5 = OpConstant %float 67.5
+        %475 = OpConstantComposite %_arr_float_uint_6 %float_n4 %float_n4 %float_n3_15737653 %float_n0_485249996 %float_1_84773242 %float_1_84773242
+        %476 = OpConstantComposite %_arr_float_uint_6 %float_n0_718548238 %float_2_08103061 %float_3_6681242 %float_4 %float_4 %float_4
+  %float_n15 = OpConstant %float -15
+  %float_n14 = OpConstant %float -14
+        %479 = OpConstantComposite %_arr_float_uint_10 %float_n4_97062206 %float_n3_02937818 %float_n2_12619996 %float_n1_51049995 %float_n1_05780005 %float_n0_466800004 %float_0_119379997 %float_0_708813429 %float_1_29118657 %float_1_29118657
+        %480 = OpConstantComposite %_arr_float_uint_10 %float_0_808913231 %float_1_19108677 %float_1_56830001 %float_1_9483 %float_2_30830002 %float_2_63840008 %float_2_85949993 %float_2_98726082 %float_3_01273918 %float_3_01273918
+  %float_n12 = OpConstant %float -12
+        %482 = OpConstantComposite %_arr_float_uint_10 %float_n2_30102992 %float_n2_30102992 %float_n1_93120003 %float_n1_52049994 %float_n1_05780005 %float_n0_466800004 %float_0_119379997 %float_0_708813429 %float_1_29118657 %float_1_29118657
+        %483 = OpConstantComposite %_arr_float_uint_10 %float_0_801995218 %float_1_19800484 %float_1_59430003 %float_1_99730003 %float_2_37829995 %float_2_76839995 %float_3_05150008 %float_3_27462935 %float_3_32743073 %float_3_32743073
+%float_0_0322580636 = OpConstant %float 0.0322580636
+%float_1_03225803 = OpConstant %float 1.03225803
+        %486 = OpConstantComposite %v2float %float_1_03225803 %float_1_03225803
+%float_4_60443853e_09 = OpConstant %float 4.60443853e+09
+%float_2_00528435e_09 = OpConstant %float 2.00528435e+09
+%float_0_333333343 = OpConstant %float 0.333333343
+    %float_5 = OpConstant %float 5
+  %float_2_5 = OpConstant %float 2.5
+%float_0_0250000004 = OpConstant %float 0.0250000004
+%float_0_239999995 = OpConstant %float 0.239999995
+%float_0_0148148146 = OpConstant %float 0.0148148146
+%float_0_819999993 = OpConstant %float 0.819999993
+        %496 = OpConstantComposite %v3float %float_9_99999975en05 %float_9_99999975en05 %float_9_99999975en05
+%float_0_0296296291 = OpConstant %float 0.0296296291
+%float_0_952381015 = OpConstant %float 0.952381015
+        %499 = OpConstantComposite %v3float %float_0_952381015 %float_0_952381015 %float_0_952381015
+     %MainPS = OpFunction %void None %377
+        %500 = OpLabel
+        %501 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %502 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %503 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %504 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %505 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %506 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %507 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %508 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %509 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %510 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %511 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %512 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %513 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %514 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %515 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %516 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %517 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %518 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %519 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %520 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %521 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %522 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %523 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %524 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %525 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %526 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %527 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %528 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %529 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %530 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %531 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %532 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %533 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %534 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %535 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %536 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %537 = OpLoad %v2float %in_var_TEXCOORD0
+        %538 = OpLoad %uint %gl_Layer
+        %539 = OpFSub %v2float %537 %128
+        %540 = OpFMul %v2float %539 %486
+        %541 = OpCompositeExtract %float %540 0
+        %542 = OpCompositeExtract %float %540 1
+        %543 = OpConvertUToF %float %538
+        %544 = OpFMul %float %543 %float_0_0322580636
+        %545 = OpCompositeConstruct %v4float %541 %542 %544 %float_0
+        %546 = OpMatrixTimesMatrix %mat3v3float %422 %434
+        %547 = OpMatrixTimesMatrix %mat3v3float %546 %406
+        %548 = OpMatrixTimesMatrix %mat3v3float %402 %438
+        %549 = OpMatrixTimesMatrix %mat3v3float %548 %418
+        %550 = OpMatrixTimesMatrix %mat3v3float %395 %406
+        %551 = OpMatrixTimesMatrix %mat3v3float %402 %399
+        %552 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_42
+        %553 = OpLoad %uint %552
+               OpBranch %554
+        %554 = OpLabel
+               OpLoopMerge %555 %556 None
+               OpBranch %557
+        %557 = OpLabel
+        %558 = OpMatrixTimesMatrix %mat3v3float %548 %430
+        %559 = OpMatrixTimesMatrix %mat3v3float %548 %426
+        %560 = OpIEqual %bool %553 %uint_1
+               OpSelectionMerge %561 None
+               OpBranchConditional %560 %562 %563
+        %563 = OpLabel
+        %564 = OpIEqual %bool %553 %uint_2
+               OpSelectionMerge %565 None
+               OpBranchConditional %564 %566 %567
+        %567 = OpLabel
+        %568 = OpIEqual %bool %553 %uint_3
+               OpSelectionMerge %569 None
+               OpBranchConditional %568 %570 %571
+        %571 = OpLabel
+        %572 = OpIEqual %bool %553 %uint_4
+               OpSelectionMerge %573 None
+               OpBranchConditional %572 %574 %575
+        %575 = OpLabel
+               OpBranch %555
+        %574 = OpLabel
+               OpBranch %555
+        %573 = OpLabel
+               OpUnreachable
+        %570 = OpLabel
+               OpBranch %555
+        %569 = OpLabel
+               OpUnreachable
+        %566 = OpLabel
+               OpBranch %555
+        %565 = OpLabel
+               OpUnreachable
+        %562 = OpLabel
+               OpBranch %555
+        %561 = OpLabel
+               OpUnreachable
+        %556 = OpLabel
+               OpBranch %554
+        %555 = OpLabel
+        %576 = OpPhi %mat3v3float %549 %575 %446 %574 %414 %570 %559 %566 %558 %562
+        %577 = OpVectorShuffle %v3float %545 %545 0 1 2
+        %578 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_41
+        %579 = OpLoad %uint %578
+        %580 = OpUGreaterThanEqual %bool %579 %uint_3
+               OpSelectionMerge %581 None
+               OpBranchConditional %580 %582 %583
+        %583 = OpLabel
+        %584 = OpFSub %v3float %577 %192
+        %585 = OpFMul %v3float %584 %193
+        %586 = OpExtInst %v3float %1 Exp2 %585
+        %587 = OpFMul %v3float %586 %194
+        %588 = OpExtInst %v3float %1 Exp2 %448
+        %589 = OpFMul %v3float %588 %194
+        %590 = OpFSub %v3float %587 %589
+               OpBranch %581
+        %582 = OpLabel
+        %591 = OpExtInst %v3float %1 Pow %577 %182
+        %592 = OpFSub %v3float %591 %183
+        %593 = OpExtInst %v3float %1 FMax %132 %592
+        %594 = OpFMul %v3float %185 %591
+        %595 = OpFSub %v3float %184 %594
+        %596 = OpFDiv %v3float %593 %595
+        %597 = OpExtInst %v3float %1 Pow %596 %187
+        %598 = OpFMul %v3float %597 %188
+               OpBranch %581
+        %581 = OpLabel
+        %599 = OpPhi %v3float %590 %583 %598 %582
+        %600 = OpAccessChain %_ptr_Uniform_float %_Globals %int_17
+        %601 = OpLoad %float %600
+        %602 = OpFMul %float %601 %float_1_00055635
+        %603 = OpFOrdLessThanEqual %bool %602 %float_7000
+        %604 = OpFDiv %float %float_4_60443853e_09 %601
+        %605 = OpFSub %float %float_2967800 %604
+        %606 = OpFDiv %float %605 %602
+        %607 = OpFAdd %float %float_99_1100006 %606
+        %608 = OpFDiv %float %607 %602
+        %609 = OpFAdd %float %float_0_244063005 %608
+        %610 = OpFDiv %float %float_2_00528435e_09 %601
+        %611 = OpFSub %float %float_1901800 %610
+        %612 = OpFDiv %float %611 %602
+        %613 = OpFAdd %float %float_247_479996 %612
+        %614 = OpFDiv %float %613 %602
+        %615 = OpFAdd %float %float_0_237039998 %614
+        %616 = OpSelect %float %603 %609 %615
+        %617 = OpFMul %float %float_n3 %616
+        %618 = OpFMul %float %617 %616
+        %619 = OpFMul %float %float_2_86999989 %616
+        %620 = OpFAdd %float %618 %619
+        %621 = OpFSub %float %620 %float_0_275000006
+        %622 = OpCompositeConstruct %v2float %616 %621
+        %623 = OpFMul %float %float_0_000154118257 %601
+        %624 = OpFAdd %float %float_0_860117733 %623
+        %625 = OpFMul %float %float_1_28641219en07 %601
+        %626 = OpFMul %float %625 %601
+        %627 = OpFAdd %float %624 %626
+        %628 = OpFMul %float %float_0_00084242021 %601
+        %629 = OpFAdd %float %float_1 %628
+        %630 = OpFMul %float %float_7_08145137en07 %601
+        %631 = OpFMul %float %630 %601
+        %632 = OpFAdd %float %629 %631
+        %633 = OpFDiv %float %627 %632
+        %634 = OpFMul %float %float_4_22806261en05 %601
+        %635 = OpFAdd %float %float_0_317398727 %634
+        %636 = OpFMul %float %float_4_20481676en08 %601
+        %637 = OpFMul %float %636 %601
+        %638 = OpFAdd %float %635 %637
+        %639 = OpFMul %float %float_2_8974182en05 %601
+        %640 = OpFSub %float %float_1 %639
+        %641 = OpFMul %float %float_1_61456057en07 %601
+        %642 = OpFMul %float %641 %601
+        %643 = OpFAdd %float %640 %642
+        %644 = OpFDiv %float %638 %643
+        %645 = OpFMul %float %float_3 %633
+        %646 = OpFMul %float %float_2 %633
+        %647 = OpFMul %float %float_8 %644
+        %648 = OpFSub %float %646 %647
+        %649 = OpFAdd %float %648 %float_4
+        %650 = OpFDiv %float %645 %649
+        %651 = OpFMul %float %float_2 %644
+        %652 = OpFDiv %float %651 %649
+        %653 = OpCompositeConstruct %v2float %650 %652
+        %654 = OpFOrdLessThan %bool %601 %float_4000
+        %655 = OpCompositeConstruct %v2bool %654 %654
+        %656 = OpSelect %v2float %655 %653 %622
+        %657 = OpAccessChain %_ptr_Uniform_float %_Globals %int_18
+        %658 = OpLoad %float %657
+        %659 = OpCompositeConstruct %v2float %633 %644
+        %660 = OpExtInst %v2float %1 Normalize %659
+        %661 = OpCompositeExtract %float %660 1
+        %662 = OpFNegate %float %661
+        %663 = OpFMul %float %662 %658
+        %664 = OpFMul %float %663 %float_0_0500000007
+        %665 = OpFAdd %float %633 %664
+        %666 = OpCompositeExtract %float %660 0
+        %667 = OpFMul %float %666 %658
+        %668 = OpFMul %float %667 %float_0_0500000007
+        %669 = OpFAdd %float %644 %668
+        %670 = OpFMul %float %float_3 %665
+        %671 = OpFMul %float %float_2 %665
+        %672 = OpFMul %float %float_8 %669
+        %673 = OpFSub %float %671 %672
+        %674 = OpFAdd %float %673 %float_4
+        %675 = OpFDiv %float %670 %674
+        %676 = OpFMul %float %float_2 %669
+        %677 = OpFDiv %float %676 %674
+        %678 = OpCompositeConstruct %v2float %675 %677
+        %679 = OpFSub %v2float %678 %653
+        %680 = OpFAdd %v2float %656 %679
+        %681 = OpCompositeExtract %float %680 0
+        %682 = OpCompositeExtract %float %680 1
+        %683 = OpExtInst %float %1 FMax %682 %float_1_00000001en10
+        %684 = OpFDiv %float %681 %683
+        %685 = OpCompositeInsert %v3float %684 %391 0
+        %686 = OpCompositeInsert %v3float %float_1 %685 1
+        %687 = OpFSub %float %float_1 %681
+        %688 = OpFSub %float %687 %682
+        %689 = OpFDiv %float %688 %683
+        %690 = OpCompositeInsert %v3float %689 %686 2
+        %691 = OpExtInst %float %1 FMax %float_0_328999996 %float_1_00000001en10
+        %692 = OpFDiv %float %float_0_312700003 %691
+        %693 = OpCompositeInsert %v3float %692 %391 0
+        %694 = OpCompositeInsert %v3float %float_1 %693 1
+        %695 = OpFDiv %float %float_0_358299971 %691
+        %696 = OpCompositeInsert %v3float %695 %694 2
+        %697 = OpVectorTimesMatrix %v3float %690 %452
+        %698 = OpVectorTimesMatrix %v3float %696 %452
+        %699 = OpCompositeExtract %float %698 0
+        %700 = OpCompositeExtract %float %697 0
+        %701 = OpFDiv %float %699 %700
+        %702 = OpCompositeConstruct %v3float %701 %float_0 %float_0
+        %703 = OpCompositeExtract %float %698 1
+        %704 = OpCompositeExtract %float %697 1
+        %705 = OpFDiv %float %703 %704
+        %706 = OpCompositeConstruct %v3float %float_0 %705 %float_0
+        %707 = OpCompositeExtract %float %698 2
+        %708 = OpCompositeExtract %float %697 2
+        %709 = OpFDiv %float %707 %708
+        %710 = OpCompositeConstruct %v3float %float_0 %float_0 %709
+        %711 = OpCompositeConstruct %mat3v3float %702 %706 %710
+        %712 = OpMatrixTimesMatrix %mat3v3float %452 %711
+        %713 = OpMatrixTimesMatrix %mat3v3float %712 %456
+        %714 = OpMatrixTimesMatrix %mat3v3float %422 %713
+        %715 = OpMatrixTimesMatrix %mat3v3float %714 %418
+        %716 = OpVectorTimesMatrix %v3float %599 %715
+        %717 = OpVectorTimesMatrix %v3float %716 %547
+        %718 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_9
+        %719 = OpAccessChain %_ptr_Uniform_float %_Globals %int_9 %int_3
+        %720 = OpLoad %float %719
+        %721 = OpFOrdNotEqual %bool %720 %float_0
+               OpSelectionMerge %722 None
+               OpBranchConditional %721 %723 %722
+        %723 = OpLabel
+        %724 = OpDot %float %717 %67
+        %725 = OpCompositeConstruct %v3float %724 %724 %724
+        %726 = OpFDiv %v3float %717 %725
+        %727 = OpFSub %v3float %726 %135
+        %728 = OpDot %float %727 %727
+        %729 = OpFMul %float %float_n4 %728
+        %730 = OpExtInst %float %1 Exp2 %729
+        %731 = OpFSub %float %float_1 %730
+        %732 = OpAccessChain %_ptr_Uniform_float %_Globals %int_44
+        %733 = OpLoad %float %732
+        %734 = OpFMul %float %float_n4 %733
+        %735 = OpFMul %float %734 %724
+        %736 = OpFMul %float %735 %724
+        %737 = OpExtInst %float %1 Exp2 %736
+        %738 = OpFSub %float %float_1 %737
+        %739 = OpFMul %float %731 %738
+        %740 = OpMatrixTimesMatrix %mat3v3float %461 %406
+        %741 = OpMatrixTimesMatrix %mat3v3float %549 %740
+        %742 = OpVectorTimesMatrix %v3float %717 %741
+        %743 = OpCompositeConstruct %v3float %739 %739 %739
+        %744 = OpExtInst %v3float %1 FMix %717 %742 %743
+               OpBranch %722
+        %722 = OpLabel
+        %745 = OpPhi %v3float %717 %581 %744 %723
+        %746 = OpDot %float %745 %67
+        %747 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_24
+        %748 = OpLoad %v4float %747
+        %749 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_19
+        %750 = OpLoad %v4float %749
+        %751 = OpFMul %v4float %748 %750
+        %752 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_25
+        %753 = OpLoad %v4float %752
+        %754 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_20
+        %755 = OpLoad %v4float %754
+        %756 = OpFMul %v4float %753 %755
+        %757 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_26
+        %758 = OpLoad %v4float %757
+        %759 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_21
+        %760 = OpLoad %v4float %759
+        %761 = OpFMul %v4float %758 %760
+        %762 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_27
+        %763 = OpLoad %v4float %762
+        %764 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_22
+        %765 = OpLoad %v4float %764
+        %766 = OpFMul %v4float %763 %765
+        %767 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_28
+        %768 = OpLoad %v4float %767
+        %769 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_23
+        %770 = OpLoad %v4float %769
+        %771 = OpFAdd %v4float %768 %770
+        %772 = OpCompositeConstruct %v3float %746 %746 %746
+        %773 = OpVectorShuffle %v3float %751 %751 0 1 2
+        %774 = OpCompositeExtract %float %751 3
+        %775 = OpCompositeConstruct %v3float %774 %774 %774
+        %776 = OpFMul %v3float %773 %775
+        %777 = OpExtInst %v3float %1 FMix %772 %745 %776
+        %778 = OpExtInst %v3float %1 FMax %132 %777
+        %779 = OpFMul %v3float %778 %307
+        %780 = OpVectorShuffle %v3float %756 %756 0 1 2
+        %781 = OpCompositeExtract %float %756 3
+        %782 = OpCompositeConstruct %v3float %781 %781 %781
+        %783 = OpFMul %v3float %780 %782
+        %784 = OpExtInst %v3float %1 Pow %779 %783
+        %785 = OpFMul %v3float %784 %194
+        %786 = OpVectorShuffle %v3float %761 %761 0 1 2
+        %787 = OpCompositeExtract %float %761 3
+        %788 = OpCompositeConstruct %v3float %787 %787 %787
+        %789 = OpFMul %v3float %786 %788
+        %790 = OpFDiv %v3float %135 %789
+        %791 = OpExtInst %v3float %1 Pow %785 %790
+        %792 = OpVectorShuffle %v3float %766 %766 0 1 2
+        %793 = OpCompositeExtract %float %766 3
+        %794 = OpCompositeConstruct %v3float %793 %793 %793
+        %795 = OpFMul %v3float %792 %794
+        %796 = OpFMul %v3float %791 %795
+        %797 = OpVectorShuffle %v3float %771 %771 0 1 2
+        %798 = OpCompositeExtract %float %771 3
+        %799 = OpCompositeConstruct %v3float %798 %798 %798
+        %800 = OpFAdd %v3float %797 %799
+        %801 = OpFAdd %v3float %796 %800
+        %802 = OpAccessChain %_ptr_Uniform_float %_Globals %int_39
+        %803 = OpLoad %float %802
+        %804 = OpExtInst %float %1 SmoothStep %float_0 %803 %746
+        %805 = OpFSub %float %float_1 %804
+        %806 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_34
+        %807 = OpLoad %v4float %806
+        %808 = OpFMul %v4float %807 %750
+        %809 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_35
+        %810 = OpLoad %v4float %809
+        %811 = OpFMul %v4float %810 %755
+        %812 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_36
+        %813 = OpLoad %v4float %812
+        %814 = OpFMul %v4float %813 %760
+        %815 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_37
+        %816 = OpLoad %v4float %815
+        %817 = OpFMul %v4float %816 %765
+        %818 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_38
+        %819 = OpLoad %v4float %818
+        %820 = OpFAdd %v4float %819 %770
+        %821 = OpVectorShuffle %v3float %808 %808 0 1 2
+        %822 = OpCompositeExtract %float %808 3
+        %823 = OpCompositeConstruct %v3float %822 %822 %822
+        %824 = OpFMul %v3float %821 %823
+        %825 = OpExtInst %v3float %1 FMix %772 %745 %824
+        %826 = OpExtInst %v3float %1 FMax %132 %825
+        %827 = OpFMul %v3float %826 %307
+        %828 = OpVectorShuffle %v3float %811 %811 0 1 2
+        %829 = OpCompositeExtract %float %811 3
+        %830 = OpCompositeConstruct %v3float %829 %829 %829
+        %831 = OpFMul %v3float %828 %830
+        %832 = OpExtInst %v3float %1 Pow %827 %831
+        %833 = OpFMul %v3float %832 %194
+        %834 = OpVectorShuffle %v3float %814 %814 0 1 2
+        %835 = OpCompositeExtract %float %814 3
+        %836 = OpCompositeConstruct %v3float %835 %835 %835
+        %837 = OpFMul %v3float %834 %836
+        %838 = OpFDiv %v3float %135 %837
+        %839 = OpExtInst %v3float %1 Pow %833 %838
+        %840 = OpVectorShuffle %v3float %817 %817 0 1 2
+        %841 = OpCompositeExtract %float %817 3
+        %842 = OpCompositeConstruct %v3float %841 %841 %841
+        %843 = OpFMul %v3float %840 %842
+        %844 = OpFMul %v3float %839 %843
+        %845 = OpVectorShuffle %v3float %820 %820 0 1 2
+        %846 = OpCompositeExtract %float %820 3
+        %847 = OpCompositeConstruct %v3float %846 %846 %846
+        %848 = OpFAdd %v3float %845 %847
+        %849 = OpFAdd %v3float %844 %848
+        %850 = OpAccessChain %_ptr_Uniform_float %_Globals %int_40
+        %851 = OpLoad %float %850
+        %852 = OpExtInst %float %1 SmoothStep %851 %float_1 %746
+        %853 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_29
+        %854 = OpLoad %v4float %853
+        %855 = OpFMul %v4float %854 %750
+        %856 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_30
+        %857 = OpLoad %v4float %856
+        %858 = OpFMul %v4float %857 %755
+        %859 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_31
+        %860 = OpLoad %v4float %859
+        %861 = OpFMul %v4float %860 %760
+        %862 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_32
+        %863 = OpLoad %v4float %862
+        %864 = OpFMul %v4float %863 %765
+        %865 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_33
+        %866 = OpLoad %v4float %865
+        %867 = OpFAdd %v4float %866 %770
+        %868 = OpVectorShuffle %v3float %855 %855 0 1 2
+        %869 = OpCompositeExtract %float %855 3
+        %870 = OpCompositeConstruct %v3float %869 %869 %869
+        %871 = OpFMul %v3float %868 %870
+        %872 = OpExtInst %v3float %1 FMix %772 %745 %871
+        %873 = OpExtInst %v3float %1 FMax %132 %872
+        %874 = OpFMul %v3float %873 %307
+        %875 = OpVectorShuffle %v3float %858 %858 0 1 2
+        %876 = OpCompositeExtract %float %858 3
+        %877 = OpCompositeConstruct %v3float %876 %876 %876
+        %878 = OpFMul %v3float %875 %877
+        %879 = OpExtInst %v3float %1 Pow %874 %878
+        %880 = OpFMul %v3float %879 %194
+        %881 = OpVectorShuffle %v3float %861 %861 0 1 2
+        %882 = OpCompositeExtract %float %861 3
+        %883 = OpCompositeConstruct %v3float %882 %882 %882
+        %884 = OpFMul %v3float %881 %883
+        %885 = OpFDiv %v3float %135 %884
+        %886 = OpExtInst %v3float %1 Pow %880 %885
+        %887 = OpVectorShuffle %v3float %864 %864 0 1 2
+        %888 = OpCompositeExtract %float %864 3
+        %889 = OpCompositeConstruct %v3float %888 %888 %888
+        %890 = OpFMul %v3float %887 %889
+        %891 = OpFMul %v3float %886 %890
+        %892 = OpVectorShuffle %v3float %867 %867 0 1 2
+        %893 = OpCompositeExtract %float %867 3
+        %894 = OpCompositeConstruct %v3float %893 %893 %893
+        %895 = OpFAdd %v3float %892 %894
+        %896 = OpFAdd %v3float %891 %895
+        %897 = OpFSub %float %804 %852
+        %898 = OpCompositeConstruct %v3float %805 %805 %805
+        %899 = OpFMul %v3float %801 %898
+        %900 = OpCompositeConstruct %v3float %897 %897 %897
+        %901 = OpFMul %v3float %896 %900
+        %902 = OpFAdd %v3float %899 %901
+        %903 = OpCompositeConstruct %v3float %852 %852 %852
+        %904 = OpFMul %v3float %849 %903
+        %905 = OpFAdd %v3float %902 %904
+        %906 = OpVectorTimesMatrix %v3float %905 %549
+        %907 = OpMatrixTimesMatrix %mat3v3float %551 %465
+        %908 = OpMatrixTimesMatrix %mat3v3float %907 %550
+        %909 = OpMatrixTimesMatrix %mat3v3float %551 %469
+        %910 = OpMatrixTimesMatrix %mat3v3float %909 %550
+        %911 = OpVectorTimesMatrix %v3float %905 %908
+        %912 = OpAccessChain %_ptr_Uniform_float %_Globals %int_43
+        %913 = OpLoad %float %912
+        %914 = OpCompositeConstruct %v3float %913 %913 %913
+        %915 = OpExtInst %v3float %1 FMix %905 %911 %914
+        %916 = OpVectorTimesMatrix %v3float %915 %551
+        %917 = OpCompositeExtract %float %916 0
+        %918 = OpCompositeExtract %float %916 1
+        %919 = OpExtInst %float %1 FMin %917 %918
+        %920 = OpCompositeExtract %float %916 2
+        %921 = OpExtInst %float %1 FMin %919 %920
+        %922 = OpExtInst %float %1 FMax %917 %918
+        %923 = OpExtInst %float %1 FMax %922 %920
+        %924 = OpExtInst %float %1 FMax %923 %float_1_00000001en10
+        %925 = OpExtInst %float %1 FMax %921 %float_1_00000001en10
+        %926 = OpFSub %float %924 %925
+        %927 = OpExtInst %float %1 FMax %923 %float_0_00999999978
+        %928 = OpFDiv %float %926 %927
+        %929 = OpFSub %float %920 %918
+        %930 = OpFMul %float %920 %929
+        %931 = OpFSub %float %918 %917
+        %932 = OpFMul %float %918 %931
+        %933 = OpFAdd %float %930 %932
+        %934 = OpFSub %float %917 %920
+        %935 = OpFMul %float %917 %934
+        %936 = OpFAdd %float %933 %935
+        %937 = OpExtInst %float %1 Sqrt %936
+        %938 = OpFAdd %float %920 %918
+        %939 = OpFAdd %float %938 %917
+        %940 = OpFMul %float %float_1_75 %937
+        %941 = OpFAdd %float %939 %940
+        %942 = OpFMul %float %941 %float_0_333333343
+        %943 = OpFSub %float %928 %float_0_400000006
+        %944 = OpFMul %float %943 %float_5
+        %945 = OpFMul %float %943 %float_2_5
+        %946 = OpExtInst %float %1 FAbs %945
+        %947 = OpFSub %float %float_1 %946
+        %948 = OpExtInst %float %1 FMax %947 %float_0
+        %949 = OpExtInst %float %1 FSign %944
+        %950 = OpConvertFToS %int %949
+        %951 = OpConvertSToF %float %950
+        %952 = OpFMul %float %948 %948
+        %953 = OpFSub %float %float_1 %952
+        %954 = OpFMul %float %951 %953
+        %955 = OpFAdd %float %float_1 %954
+        %956 = OpFMul %float %955 %float_0_0250000004
+        %957 = OpFOrdLessThanEqual %bool %942 %float_0_0533333346
+               OpSelectionMerge %958 None
+               OpBranchConditional %957 %959 %960
+        %960 = OpLabel
+        %961 = OpFOrdGreaterThanEqual %bool %942 %float_0_159999996
+               OpSelectionMerge %962 None
+               OpBranchConditional %961 %963 %964
+        %964 = OpLabel
+        %965 = OpFDiv %float %float_0_239999995 %941
+        %966 = OpFSub %float %965 %float_0_5
+        %967 = OpFMul %float %956 %966
+               OpBranch %962
+        %963 = OpLabel
+               OpBranch %962
+        %962 = OpLabel
+        %968 = OpPhi %float %967 %964 %float_0 %963
+               OpBranch %958
+        %959 = OpLabel
+               OpBranch %958
+        %958 = OpLabel
+        %969 = OpPhi %float %968 %962 %956 %959
+        %970 = OpFAdd %float %float_1 %969
+        %971 = OpCompositeConstruct %v3float %970 %970 %970
+        %972 = OpFMul %v3float %916 %971
+        %973 = OpCompositeExtract %float %972 0
+        %974 = OpCompositeExtract %float %972 1
+        %975 = OpFOrdEqual %bool %973 %974
+        %976 = OpCompositeExtract %float %972 2
+        %977 = OpFOrdEqual %bool %974 %976
+        %978 = OpLogicalAnd %bool %975 %977
+               OpSelectionMerge %979 None
+               OpBranchConditional %978 %980 %981
+        %981 = OpLabel
+        %982 = OpExtInst %float %1 Sqrt %float_3
+        %983 = OpFSub %float %974 %976
+        %984 = OpFMul %float %982 %983
+        %985 = OpFMul %float %float_2 %973
+        %986 = OpFSub %float %985 %974
+        %987 = OpFSub %float %986 %976
+        %988 = OpExtInst %float %1 Atan2 %984 %987
+        %989 = OpFMul %float %float_57_2957764 %988
+               OpBranch %979
+        %980 = OpLabel
+               OpBranch %979
+        %979 = OpLabel
+        %990 = OpPhi %float %989 %981 %float_0 %980
+        %991 = OpFOrdLessThan %bool %990 %float_0
+               OpSelectionMerge %992 None
+               OpBranchConditional %991 %993 %992
+        %993 = OpLabel
+        %994 = OpFAdd %float %990 %float_360
+               OpBranch %992
+        %992 = OpLabel
+        %995 = OpPhi %float %990 %979 %994 %993
+        %996 = OpExtInst %float %1 FClamp %995 %float_0 %float_360
+        %997 = OpFOrdGreaterThan %bool %996 %float_180
+               OpSelectionMerge %998 None
+               OpBranchConditional %997 %999 %998
+        %999 = OpLabel
+       %1000 = OpFSub %float %996 %float_360
+               OpBranch %998
+        %998 = OpLabel
+       %1001 = OpPhi %float %996 %992 %1000 %999
+       %1002 = OpFMul %float %1001 %float_0_0148148146
+       %1003 = OpExtInst %float %1 FAbs %1002
+       %1004 = OpFSub %float %float_1 %1003
+       %1005 = OpExtInst %float %1 SmoothStep %float_0 %float_1 %1004
+       %1006 = OpFMul %float %1005 %1005
+       %1007 = OpFMul %float %1006 %928
+       %1008 = OpFSub %float %float_0_0299999993 %973
+       %1009 = OpFMul %float %1007 %1008
+       %1010 = OpFMul %float %1009 %float_0_180000007
+       %1011 = OpFAdd %float %973 %1010
+       %1012 = OpCompositeInsert %v3float %1011 %972 0
+       %1013 = OpVectorTimesMatrix %v3float %1012 %410
+       %1014 = OpExtInst %v3float %1 FMax %132 %1013
+       %1015 = OpDot %float %1014 %67
+       %1016 = OpCompositeConstruct %v3float %1015 %1015 %1015
+       %1017 = OpExtInst %v3float %1 FMix %1016 %1014 %228
+       %1018 = OpAccessChain %_ptr_Uniform_float %_Globals %int_13
+       %1019 = OpLoad %float %1018
+       %1020 = OpFAdd %float %float_1 %1019
+       %1021 = OpAccessChain %_ptr_Uniform_float %_Globals %int_11
+       %1022 = OpLoad %float %1021
+       %1023 = OpFSub %float %1020 %1022
+       %1024 = OpAccessChain %_ptr_Uniform_float %_Globals %int_14
+       %1025 = OpLoad %float %1024
+       %1026 = OpFAdd %float %float_1 %1025
+       %1027 = OpAccessChain %_ptr_Uniform_float %_Globals %int_12
+       %1028 = OpLoad %float %1027
+       %1029 = OpFSub %float %1026 %1028
+       %1030 = OpFOrdGreaterThan %bool %1022 %float_0_800000012
+               OpSelectionMerge %1031 None
+               OpBranchConditional %1030 %1032 %1033
+       %1033 = OpLabel
+       %1034 = OpFAdd %float %float_0_180000007 %1019
+       %1035 = OpFDiv %float %1034 %1023
+       %1036 = OpExtInst %float %1 Log %float_0_180000007
+       %1037 = OpExtInst %float %1 Log %float_10
+       %1038 = OpFDiv %float %1036 %1037
+       %1039 = OpFSub %float %float_2 %1035
+       %1040 = OpFDiv %float %1035 %1039
+       %1041 = OpExtInst %float %1 Log %1040
+       %1042 = OpFMul %float %float_0_5 %1041
+       %1043 = OpAccessChain %_ptr_Uniform_float %_Globals %int_10
+       %1044 = OpLoad %float %1043
+       %1045 = OpFDiv %float %1023 %1044
+       %1046 = OpFMul %float %1042 %1045
+       %1047 = OpFSub %float %1038 %1046
+               OpBranch %1031
+       %1032 = OpLabel
+       %1048 = OpFSub %float %float_0_819999993 %1022
+       %1049 = OpAccessChain %_ptr_Uniform_float %_Globals %int_10
+       %1050 = OpLoad %float %1049
+       %1051 = OpFDiv %float %1048 %1050
+       %1052 = OpExtInst %float %1 Log %float_0_180000007
+       %1053 = OpExtInst %float %1 Log %float_10
+       %1054 = OpFDiv %float %1052 %1053
+       %1055 = OpFAdd %float %1051 %1054
+               OpBranch %1031
+       %1031 = OpLabel
+       %1056 = OpPhi %float %1047 %1033 %1055 %1032
+       %1057 = OpFSub %float %float_1 %1022
+       %1058 = OpAccessChain %_ptr_Uniform_float %_Globals %int_10
+       %1059 = OpLoad %float %1058
+       %1060 = OpFDiv %float %1057 %1059
+       %1061 = OpFSub %float %1060 %1056
+       %1062 = OpFDiv %float %1028 %1059
+       %1063 = OpFSub %float %1062 %1061
+       %1064 = OpExtInst %v3float %1 Log %1017
+       %1065 = OpExtInst %float %1 Log %float_10
+       %1066 = OpCompositeConstruct %v3float %1065 %1065 %1065
+       %1067 = OpFDiv %v3float %1064 %1066
+       %1068 = OpCompositeConstruct %v3float %1059 %1059 %1059
+       %1069 = OpCompositeConstruct %v3float %1061 %1061 %1061
+       %1070 = OpFAdd %v3float %1067 %1069
+       %1071 = OpFMul %v3float %1068 %1070
+       %1072 = OpFNegate %float %1019
+       %1073 = OpCompositeConstruct %v3float %1072 %1072 %1072
+       %1074 = OpFMul %float %float_2 %1023
+       %1075 = OpCompositeConstruct %v3float %1074 %1074 %1074
+       %1076 = OpFMul %float %float_n2 %1059
+       %1077 = OpFDiv %float %1076 %1023
+       %1078 = OpCompositeConstruct %v3float %1077 %1077 %1077
+       %1079 = OpCompositeConstruct %v3float %1056 %1056 %1056
+       %1080 = OpFSub %v3float %1067 %1079
+       %1081 = OpFMul %v3float %1078 %1080
+       %1082 = OpExtInst %v3float %1 Exp %1081
+       %1083 = OpFAdd %v3float %135 %1082
+       %1084 = OpFDiv %v3float %1075 %1083
+       %1085 = OpFAdd %v3float %1073 %1084
+       %1086 = OpCompositeConstruct %v3float %1026 %1026 %1026
+       %1087 = OpFMul %float %float_2 %1029
+       %1088 = OpCompositeConstruct %v3float %1087 %1087 %1087
+       %1089 = OpFMul %float %float_2 %1059
+       %1090 = OpFDiv %float %1089 %1029
+       %1091 = OpCompositeConstruct %v3float %1090 %1090 %1090
+       %1092 = OpCompositeConstruct %v3float %1063 %1063 %1063
+       %1093 = OpFSub %v3float %1067 %1092
+       %1094 = OpFMul %v3float %1091 %1093
+       %1095 = OpExtInst %v3float %1 Exp %1094
+       %1096 = OpFAdd %v3float %135 %1095
+       %1097 = OpFDiv %v3float %1088 %1096
+       %1098 = OpFSub %v3float %1086 %1097
+       %1099 = OpFOrdLessThan %v3bool %1067 %1079
+       %1100 = OpSelect %v3float %1099 %1085 %1071
+       %1101 = OpFOrdGreaterThan %v3bool %1067 %1092
+       %1102 = OpSelect %v3float %1101 %1098 %1071
+       %1103 = OpFSub %float %1063 %1056
+       %1104 = OpCompositeConstruct %v3float %1103 %1103 %1103
+       %1105 = OpFDiv %v3float %1080 %1104
+       %1106 = OpExtInst %v3float %1 FClamp %1105 %132 %135
+       %1107 = OpFOrdLessThan %bool %1063 %1056
+       %1108 = OpFSub %v3float %135 %1106
+       %1109 = OpCompositeConstruct %v3bool %1107 %1107 %1107
+       %1110 = OpSelect %v3float %1109 %1108 %1106
+       %1111 = OpFMul %v3float %239 %1110
+       %1112 = OpFSub %v3float %238 %1111
+       %1113 = OpFMul %v3float %1112 %1110
+       %1114 = OpFMul %v3float %1113 %1110
+       %1115 = OpExtInst %v3float %1 FMix %1100 %1102 %1114
+       %1116 = OpDot %float %1115 %67
+       %1117 = OpCompositeConstruct %v3float %1116 %1116 %1116
+       %1118 = OpExtInst %v3float %1 FMix %1117 %1115 %241
+       %1119 = OpExtInst %v3float %1 FMax %132 %1118
+       %1120 = OpVectorTimesMatrix %v3float %1119 %910
+       %1121 = OpExtInst %v3float %1 FMix %1119 %1120 %914
+       %1122 = OpVectorTimesMatrix %v3float %1121 %549
+       %1123 = OpExtInst %v3float %1 FMax %132 %1122
+       %1124 = OpFOrdEqual %bool %720 %float_0
+               OpSelectionMerge %1125 DontFlatten
+               OpBranchConditional %1124 %1126 %1125
+       %1126 = OpLabel
+       %1127 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_2
+       %1128 = OpLoad %v4float %1127
+       %1129 = OpVectorShuffle %v3float %1128 %1128 0 1 2
+       %1130 = OpDot %float %906 %1129
+       %1131 = OpCompositeInsert %v3float %1130 %391 0
+       %1132 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_3
+       %1133 = OpLoad %v4float %1132
+       %1134 = OpVectorShuffle %v3float %1133 %1133 0 1 2
+       %1135 = OpDot %float %906 %1134
+       %1136 = OpCompositeInsert %v3float %1135 %1131 1
+       %1137 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_4
+       %1138 = OpLoad %v4float %1137
+       %1139 = OpVectorShuffle %v3float %1138 %1138 0 1 2
+       %1140 = OpDot %float %906 %1139
+       %1141 = OpCompositeInsert %v3float %1140 %1136 2
+       %1142 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_8
+       %1143 = OpLoad %v4float %1142
+       %1144 = OpVectorShuffle %v3float %1143 %1143 0 1 2
+       %1145 = OpLoad %v4float %718
+       %1146 = OpVectorShuffle %v3float %1145 %1145 0 1 2
+       %1147 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_7
+       %1148 = OpLoad %v4float %1147
+       %1149 = OpVectorShuffle %v3float %1148 %1148 0 1 2
+       %1150 = OpDot %float %906 %1149
+       %1151 = OpFAdd %float %1150 %float_1
+       %1152 = OpFDiv %float %float_1 %1151
+       %1153 = OpCompositeConstruct %v3float %1152 %1152 %1152
+       %1154 = OpFMul %v3float %1146 %1153
+       %1155 = OpFAdd %v3float %1144 %1154
+       %1156 = OpFMul %v3float %1141 %1155
+       %1157 = OpExtInst %v3float %1 FMax %132 %1156
+       %1158 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_5
+       %1159 = OpLoad %v4float %1158
+       %1160 = OpVectorShuffle %v3float %1159 %1159 0 0 0
+       %1161 = OpFSub %v3float %1160 %1157
+       %1162 = OpExtInst %v3float %1 FMax %132 %1161
+       %1163 = OpVectorShuffle %v3float %1159 %1159 2 2 2
+       %1164 = OpExtInst %v3float %1 FMax %1157 %1163
+       %1165 = OpExtInst %v3float %1 FClamp %1157 %1160 %1163
+       %1166 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_6
+       %1167 = OpLoad %v4float %1166
+       %1168 = OpVectorShuffle %v3float %1167 %1167 0 0 0
+       %1169 = OpFMul %v3float %1164 %1168
+       %1170 = OpVectorShuffle %v3float %1167 %1167 1 1 1
+       %1171 = OpFAdd %v3float %1169 %1170
+       %1172 = OpVectorShuffle %v3float %1159 %1159 3 3 3
+       %1173 = OpFAdd %v3float %1164 %1172
+       %1174 = OpFDiv %v3float %135 %1173
+       %1175 = OpFMul %v3float %1171 %1174
+       %1176 = OpVectorShuffle %v3float %1138 %1138 3 3 3
+       %1177 = OpFMul %v3float %1165 %1176
+       %1178 = OpVectorShuffle %v3float %1128 %1128 3 3 3
+       %1179 = OpFMul %v3float %1162 %1178
+       %1180 = OpVectorShuffle %v3float %1159 %1159 1 1 1
+       %1181 = OpFAdd %v3float %1162 %1180
+       %1182 = OpFDiv %v3float %135 %1181
+       %1183 = OpFMul %v3float %1179 %1182
+       %1184 = OpVectorShuffle %v3float %1133 %1133 3 3 3
+       %1185 = OpFAdd %v3float %1183 %1184
+       %1186 = OpFAdd %v3float %1177 %1185
+       %1187 = OpFAdd %v3float %1175 %1186
+       %1188 = OpFSub %v3float %1187 %248
+               OpBranch %1125
+       %1125 = OpLabel
+       %1189 = OpPhi %v3float %1123 %1031 %1188 %1126
+       %1190 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_0
+       %1191 = OpLoad %float %1190
+       %1192 = OpCompositeConstruct %v3float %1191 %1191 %1191
+       %1193 = OpFMul %v3float %1189 %1189
+       %1194 = OpFMul %v3float %1192 %1193
+       %1195 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_1
+       %1196 = OpLoad %float %1195
+       %1197 = OpCompositeConstruct %v3float %1196 %1196 %1196
+       %1198 = OpFMul %v3float %1197 %1189
+       %1199 = OpFAdd %v3float %1194 %1198
+       %1200 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_2
+       %1201 = OpLoad %float %1200
+       %1202 = OpCompositeConstruct %v3float %1201 %1201 %1201
+       %1203 = OpFAdd %v3float %1199 %1202
+       %1204 = OpAccessChain %_ptr_Uniform_v3float %_Globals %int_15
+       %1205 = OpLoad %v3float %1204
+       %1206 = OpFMul %v3float %1203 %1205
+       %1207 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_16
+       %1208 = OpLoad %v4float %1207
+       %1209 = OpVectorShuffle %v3float %1208 %1208 0 1 2
+       %1210 = OpAccessChain %_ptr_Uniform_float %_Globals %int_16 %int_3
+       %1211 = OpLoad %float %1210
+       %1212 = OpCompositeConstruct %v3float %1211 %1211 %1211
+       %1213 = OpExtInst %v3float %1 FMix %1206 %1209 %1212
+       %1214 = OpExtInst %v3float %1 FMax %132 %1213
+       %1215 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %int_1
+       %1216 = OpLoad %float %1215
+       %1217 = OpCompositeConstruct %v3float %1216 %1216 %1216
+       %1218 = OpExtInst %v3float %1 Pow %1214 %1217
+       %1219 = OpIEqual %bool %579 %uint_0
+               OpSelectionMerge %1220 DontFlatten
+               OpBranchConditional %1219 %1221 %1222
+       %1222 = OpLabel
+       %1223 = OpIEqual %bool %579 %uint_1
+               OpSelectionMerge %1224 None
+               OpBranchConditional %1223 %1225 %1226
+       %1226 = OpLabel
+       %1227 = OpIEqual %bool %579 %uint_3
+       %1228 = OpIEqual %bool %579 %uint_5
+       %1229 = OpLogicalOr %bool %1227 %1228
+               OpSelectionMerge %1230 None
+               OpBranchConditional %1229 %1231 %1232
+       %1232 = OpLabel
+       %1233 = OpIEqual %bool %579 %uint_4
+       %1234 = OpIEqual %bool %579 %uint_6
+       %1235 = OpLogicalOr %bool %1233 %1234
+               OpSelectionMerge %1236 None
+               OpBranchConditional %1235 %1237 %1238
+       %1238 = OpLabel
+       %1239 = OpIEqual %bool %579 %uint_7
+               OpSelectionMerge %1240 None
+               OpBranchConditional %1239 %1241 %1242
+       %1242 = OpLabel
+       %1243 = OpVectorTimesMatrix %v3float %1218 %547
+       %1244 = OpVectorTimesMatrix %v3float %1243 %576
+       %1245 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %int_2
+       %1246 = OpLoad %float %1245
+       %1247 = OpCompositeConstruct %v3float %1246 %1246 %1246
+       %1248 = OpExtInst %v3float %1 Pow %1244 %1247
+               OpBranch %1240
+       %1241 = OpLabel
+       %1249 = OpVectorTimesMatrix %v3float %906 %547
+       %1250 = OpVectorTimesMatrix %v3float %1249 %576
+       %1251 = OpFMul %v3float %1250 %496
+       %1252 = OpExtInst %v3float %1 Pow %1251 %263
+       %1253 = OpFMul %v3float %184 %1252
+       %1254 = OpFAdd %v3float %183 %1253
+       %1255 = OpFMul %v3float %185 %1252
+       %1256 = OpFAdd %v3float %135 %1255
+       %1257 = OpFDiv %v3float %135 %1256
+       %1258 = OpFMul %v3float %1254 %1257
+       %1259 = OpExtInst %v3float %1 Pow %1258 %264
+               OpBranch %1240
+       %1240 = OpLabel
+       %1260 = OpPhi %v3float %1248 %1242 %1259 %1241
+               OpBranch %1236
+       %1237 = OpLabel
+       %1261 = OpMatrixTimesMatrix %mat3v3float %546 %399
+       %1262 = OpFMul %v3float %906 %262
+       %1263 = OpVectorTimesMatrix %v3float %1262 %1261
+       %1264 = OpCompositeExtract %float %1263 0
+       %1265 = OpCompositeExtract %float %1263 1
+       %1266 = OpExtInst %float %1 FMin %1264 %1265
+       %1267 = OpCompositeExtract %float %1263 2
+       %1268 = OpExtInst %float %1 FMin %1266 %1267
+       %1269 = OpExtInst %float %1 FMax %1264 %1265
+       %1270 = OpExtInst %float %1 FMax %1269 %1267
+       %1271 = OpExtInst %float %1 FMax %1270 %float_1_00000001en10
+       %1272 = OpExtInst %float %1 FMax %1268 %float_1_00000001en10
+       %1273 = OpFSub %float %1271 %1272
+       %1274 = OpExtInst %float %1 FMax %1270 %float_0_00999999978
+       %1275 = OpFDiv %float %1273 %1274
+       %1276 = OpFSub %float %1267 %1265
+       %1277 = OpFMul %float %1267 %1276
+       %1278 = OpFSub %float %1265 %1264
+       %1279 = OpFMul %float %1265 %1278
+       %1280 = OpFAdd %float %1277 %1279
+       %1281 = OpFSub %float %1264 %1267
+       %1282 = OpFMul %float %1264 %1281
+       %1283 = OpFAdd %float %1280 %1282
+       %1284 = OpExtInst %float %1 Sqrt %1283
+       %1285 = OpFAdd %float %1267 %1265
+       %1286 = OpFAdd %float %1285 %1264
+       %1287 = OpFMul %float %float_1_75 %1284
+       %1288 = OpFAdd %float %1286 %1287
+       %1289 = OpFMul %float %1288 %float_0_333333343
+       %1290 = OpFSub %float %1275 %float_0_400000006
+       %1291 = OpFMul %float %1290 %float_5
+       %1292 = OpFMul %float %1290 %float_2_5
+       %1293 = OpExtInst %float %1 FAbs %1292
+       %1294 = OpFSub %float %float_1 %1293
+       %1295 = OpExtInst %float %1 FMax %1294 %float_0
+       %1296 = OpExtInst %float %1 FSign %1291
+       %1297 = OpConvertFToS %int %1296
+       %1298 = OpConvertSToF %float %1297
+       %1299 = OpFMul %float %1295 %1295
+       %1300 = OpFSub %float %float_1 %1299
+       %1301 = OpFMul %float %1298 %1300
+       %1302 = OpFAdd %float %float_1 %1301
+       %1303 = OpFMul %float %1302 %float_0_0250000004
+       %1304 = OpFOrdLessThanEqual %bool %1289 %float_0_0533333346
+               OpSelectionMerge %1305 None
+               OpBranchConditional %1304 %1306 %1307
+       %1307 = OpLabel
+       %1308 = OpFOrdGreaterThanEqual %bool %1289 %float_0_159999996
+               OpSelectionMerge %1309 None
+               OpBranchConditional %1308 %1310 %1311
+       %1311 = OpLabel
+       %1312 = OpFDiv %float %float_0_239999995 %1288
+       %1313 = OpFSub %float %1312 %float_0_5
+       %1314 = OpFMul %float %1303 %1313
+               OpBranch %1309
+       %1310 = OpLabel
+               OpBranch %1309
+       %1309 = OpLabel
+       %1315 = OpPhi %float %1314 %1311 %float_0 %1310
+               OpBranch %1305
+       %1306 = OpLabel
+               OpBranch %1305
+       %1305 = OpLabel
+       %1316 = OpPhi %float %1315 %1309 %1303 %1306
+       %1317 = OpFAdd %float %float_1 %1316
+       %1318 = OpCompositeConstruct %v3float %1317 %1317 %1317
+       %1319 = OpFMul %v3float %1263 %1318
+       %1320 = OpCompositeExtract %float %1319 0
+       %1321 = OpCompositeExtract %float %1319 1
+       %1322 = OpFOrdEqual %bool %1320 %1321
+       %1323 = OpCompositeExtract %float %1319 2
+       %1324 = OpFOrdEqual %bool %1321 %1323
+       %1325 = OpLogicalAnd %bool %1322 %1324
+               OpSelectionMerge %1326 None
+               OpBranchConditional %1325 %1327 %1328
+       %1328 = OpLabel
+       %1329 = OpExtInst %float %1 Sqrt %float_3
+       %1330 = OpFSub %float %1321 %1323
+       %1331 = OpFMul %float %1329 %1330
+       %1332 = OpFMul %float %float_2 %1320
+       %1333 = OpFSub %float %1332 %1321
+       %1334 = OpFSub %float %1333 %1323
+       %1335 = OpExtInst %float %1 Atan2 %1331 %1334
+       %1336 = OpFMul %float %float_57_2957764 %1335
+               OpBranch %1326
+       %1327 = OpLabel
+               OpBranch %1326
+       %1326 = OpLabel
+       %1337 = OpPhi %float %1336 %1328 %float_0 %1327
+       %1338 = OpFOrdLessThan %bool %1337 %float_0
+               OpSelectionMerge %1339 None
+               OpBranchConditional %1338 %1340 %1339
+       %1340 = OpLabel
+       %1341 = OpFAdd %float %1337 %float_360
+               OpBranch %1339
+       %1339 = OpLabel
+       %1342 = OpPhi %float %1337 %1326 %1341 %1340
+       %1343 = OpExtInst %float %1 FClamp %1342 %float_0 %float_360
+       %1344 = OpFOrdGreaterThan %bool %1343 %float_180
+               OpSelectionMerge %1345 None
+               OpBranchConditional %1344 %1346 %1345
+       %1346 = OpLabel
+       %1347 = OpFSub %float %1343 %float_360
+               OpBranch %1345
+       %1345 = OpLabel
+       %1348 = OpPhi %float %1343 %1339 %1347 %1346
+       %1349 = OpFOrdGreaterThan %bool %1348 %float_n67_5
+       %1350 = OpFOrdLessThan %bool %1348 %float_67_5
+       %1351 = OpLogicalAnd %bool %1349 %1350
+               OpSelectionMerge %1352 None
+               OpBranchConditional %1351 %1353 %1352
+       %1353 = OpLabel
+       %1354 = OpFSub %float %1348 %float_n67_5
+       %1355 = OpFMul %float %1354 %float_0_0296296291
+       %1356 = OpConvertFToS %int %1355
+       %1357 = OpConvertSToF %float %1356
+       %1358 = OpFSub %float %1355 %1357
+       %1359 = OpFMul %float %1358 %1358
+       %1360 = OpFMul %float %1359 %1358
+       %1361 = OpIEqual %bool %1356 %int_3
+               OpSelectionMerge %1362 None
+               OpBranchConditional %1361 %1363 %1364
+       %1364 = OpLabel
+       %1365 = OpIEqual %bool %1356 %int_2
+               OpSelectionMerge %1366 None
+               OpBranchConditional %1365 %1367 %1368
+       %1368 = OpLabel
+       %1369 = OpIEqual %bool %1356 %int_1
+               OpSelectionMerge %1370 None
+               OpBranchConditional %1369 %1371 %1372
+       %1372 = OpLabel
+       %1373 = OpIEqual %bool %1356 %int_0
+               OpSelectionMerge %1374 None
+               OpBranchConditional %1373 %1375 %1376
+       %1376 = OpLabel
+               OpBranch %1374
+       %1375 = OpLabel
+       %1377 = OpFMul %float %1360 %float_0_166666672
+               OpBranch %1374
+       %1374 = OpLabel
+       %1378 = OpPhi %float %float_0 %1376 %1377 %1375
+               OpBranch %1370
+       %1371 = OpLabel
+       %1379 = OpFMul %float %1360 %float_n0_5
+       %1380 = OpFMul %float %1359 %float_0_5
+       %1381 = OpFAdd %float %1379 %1380
+       %1382 = OpFMul %float %1358 %float_0_5
+       %1383 = OpFAdd %float %1381 %1382
+       %1384 = OpFAdd %float %1383 %float_0_166666672
+               OpBranch %1370
+       %1370 = OpLabel
+       %1385 = OpPhi %float %1378 %1374 %1384 %1371
+               OpBranch %1366
+       %1367 = OpLabel
+       %1386 = OpFMul %float %1360 %float_0_5
+       %1387 = OpFMul %float %1359 %float_n1
+       %1388 = OpFAdd %float %1386 %1387
+       %1389 = OpFAdd %float %1388 %float_0_666666687
+               OpBranch %1366
+       %1366 = OpLabel
+       %1390 = OpPhi %float %1385 %1370 %1389 %1367
+               OpBranch %1362
+       %1363 = OpLabel
+       %1391 = OpFMul %float %1360 %float_n0_166666672
+       %1392 = OpFMul %float %1359 %float_0_5
+       %1393 = OpFAdd %float %1391 %1392
+       %1394 = OpFMul %float %1358 %float_n0_5
+       %1395 = OpFAdd %float %1393 %1394
+       %1396 = OpFAdd %float %1395 %float_0_166666672
+               OpBranch %1362
+       %1362 = OpLabel
+       %1397 = OpPhi %float %1390 %1366 %1396 %1363
+               OpBranch %1352
+       %1352 = OpLabel
+       %1398 = OpPhi %float %float_0 %1345 %1397 %1362
+       %1399 = OpFMul %float %1398 %float_1_5
+       %1400 = OpFMul %float %1399 %1275
+       %1401 = OpFSub %float %float_0_0299999993 %1320
+       %1402 = OpFMul %float %1400 %1401
+       %1403 = OpFMul %float %1402 %float_0_180000007
+       %1404 = OpFAdd %float %1320 %1403
+       %1405 = OpCompositeInsert %v3float %1404 %1319 0
+       %1406 = OpExtInst %v3float %1 FClamp %1405 %132 %314
+       %1407 = OpVectorTimesMatrix %v3float %1406 %410
+       %1408 = OpExtInst %v3float %1 FClamp %1407 %132 %314
+       %1409 = OpDot %float %1408 %67
+       %1410 = OpCompositeConstruct %v3float %1409 %1409 %1409
+       %1411 = OpExtInst %v3float %1 FMix %1410 %1408 %228
+       %1412 = OpCompositeExtract %float %1411 0
+       %1413 = OpExtInst %float %1 Exp2 %float_n15
+       %1414 = OpFMul %float %float_0_179999992 %1413
+       %1415 = OpExtInst %float %1 Exp2 %float_18
+       %1416 = OpFMul %float %float_0_179999992 %1415
+               OpStore %502 %475
+               OpStore %501 %476
+       %1417 = OpFOrdLessThanEqual %bool %1412 %float_0
+       %1418 = OpExtInst %float %1 Exp2 %float_n14
+       %1419 = OpSelect %float %1417 %1418 %1412
+       %1420 = OpExtInst %float %1 Log %1419
+       %1421 = OpFDiv %float %1420 %1065
+       %1422 = OpExtInst %float %1 Log %1414
+       %1423 = OpFDiv %float %1422 %1065
+       %1424 = OpFOrdLessThanEqual %bool %1421 %1423
+               OpSelectionMerge %1425 None
+               OpBranchConditional %1424 %1426 %1427
+       %1427 = OpLabel
+       %1428 = OpFOrdGreaterThan %bool %1421 %1423
+       %1429 = OpExtInst %float %1 Log %float_0_180000007
+       %1430 = OpFDiv %float %1429 %1065
+       %1431 = OpFOrdLessThan %bool %1421 %1430
+       %1432 = OpLogicalAnd %bool %1428 %1431
+               OpSelectionMerge %1433 None
+               OpBranchConditional %1432 %1434 %1435
+       %1435 = OpLabel
+       %1436 = OpFOrdGreaterThanEqual %bool %1421 %1430
+       %1437 = OpExtInst %float %1 Log %1416
+       %1438 = OpFDiv %float %1437 %1065
+       %1439 = OpFOrdLessThan %bool %1421 %1438
+       %1440 = OpLogicalAnd %bool %1436 %1439
+               OpSelectionMerge %1441 None
+               OpBranchConditional %1440 %1442 %1443
+       %1443 = OpLabel
+       %1444 = OpExtInst %float %1 Log %float_10000
+       %1445 = OpFDiv %float %1444 %1065
+               OpBranch %1441
+       %1442 = OpLabel
+       %1446 = OpFSub %float %1421 %1430
+       %1447 = OpFMul %float %float_3 %1446
+       %1448 = OpFSub %float %1438 %1430
+       %1449 = OpFDiv %float %1447 %1448
+       %1450 = OpConvertFToS %int %1449
+       %1451 = OpConvertSToF %float %1450
+       %1452 = OpFSub %float %1449 %1451
+       %1453 = OpAccessChain %_ptr_Function_float %501 %1450
+       %1454 = OpLoad %float %1453
+       %1455 = OpIAdd %int %1450 %int_1
+       %1456 = OpAccessChain %_ptr_Function_float %501 %1455
+       %1457 = OpLoad %float %1456
+       %1458 = OpIAdd %int %1450 %int_2
+       %1459 = OpAccessChain %_ptr_Function_float %501 %1458
+       %1460 = OpLoad %float %1459
+       %1461 = OpCompositeConstruct %v3float %1454 %1457 %1460
+       %1462 = OpFMul %float %1452 %1452
+       %1463 = OpCompositeConstruct %v3float %1462 %1452 %float_1
+       %1464 = OpMatrixTimesVector %v3float %442 %1461
+       %1465 = OpDot %float %1463 %1464
+               OpBranch %1441
+       %1441 = OpLabel
+       %1466 = OpPhi %float %1445 %1443 %1465 %1442
+               OpBranch %1433
+       %1434 = OpLabel
+       %1467 = OpFSub %float %1421 %1423
+       %1468 = OpFMul %float %float_3 %1467
+       %1469 = OpFSub %float %1430 %1423
+       %1470 = OpFDiv %float %1468 %1469
+       %1471 = OpConvertFToS %int %1470
+       %1472 = OpConvertSToF %float %1471
+       %1473 = OpFSub %float %1470 %1472
+       %1474 = OpAccessChain %_ptr_Function_float %502 %1471
+       %1475 = OpLoad %float %1474
+       %1476 = OpIAdd %int %1471 %int_1
+       %1477 = OpAccessChain %_ptr_Function_float %502 %1476
+       %1478 = OpLoad %float %1477
+       %1479 = OpIAdd %int %1471 %int_2
+       %1480 = OpAccessChain %_ptr_Function_float %502 %1479
+       %1481 = OpLoad %float %1480
+       %1482 = OpCompositeConstruct %v3float %1475 %1478 %1481
+       %1483 = OpFMul %float %1473 %1473
+       %1484 = OpCompositeConstruct %v3float %1483 %1473 %float_1
+       %1485 = OpMatrixTimesVector %v3float %442 %1482
+       %1486 = OpDot %float %1484 %1485
+               OpBranch %1433
+       %1433 = OpLabel
+       %1487 = OpPhi %float %1466 %1441 %1486 %1434
+               OpBranch %1425
+       %1426 = OpLabel
+       %1488 = OpExtInst %float %1 Log %float_9_99999975en05
+       %1489 = OpFDiv %float %1488 %1065
+               OpBranch %1425
+       %1425 = OpLabel
+       %1490 = OpPhi %float %1487 %1433 %1489 %1426
+       %1491 = OpExtInst %float %1 Pow %float_10 %1490
+       %1492 = OpCompositeInsert %v3float %1491 %391 0
+       %1493 = OpCompositeExtract %float %1411 1
+               OpStore %504 %475
+               OpStore %503 %476
+       %1494 = OpFOrdLessThanEqual %bool %1493 %float_0
+       %1495 = OpSelect %float %1494 %1418 %1493
+       %1496 = OpExtInst %float %1 Log %1495
+       %1497 = OpFDiv %float %1496 %1065
+       %1498 = OpFOrdLessThanEqual %bool %1497 %1423
+               OpSelectionMerge %1499 None
+               OpBranchConditional %1498 %1500 %1501
+       %1501 = OpLabel
+       %1502 = OpFOrdGreaterThan %bool %1497 %1423
+       %1503 = OpExtInst %float %1 Log %float_0_180000007
+       %1504 = OpFDiv %float %1503 %1065
+       %1505 = OpFOrdLessThan %bool %1497 %1504
+       %1506 = OpLogicalAnd %bool %1502 %1505
+               OpSelectionMerge %1507 None
+               OpBranchConditional %1506 %1508 %1509
+       %1509 = OpLabel
+       %1510 = OpFOrdGreaterThanEqual %bool %1497 %1504
+       %1511 = OpExtInst %float %1 Log %1416
+       %1512 = OpFDiv %float %1511 %1065
+       %1513 = OpFOrdLessThan %bool %1497 %1512
+       %1514 = OpLogicalAnd %bool %1510 %1513
+               OpSelectionMerge %1515 None
+               OpBranchConditional %1514 %1516 %1517
+       %1517 = OpLabel
+       %1518 = OpExtInst %float %1 Log %float_10000
+       %1519 = OpFDiv %float %1518 %1065
+               OpBranch %1515
+       %1516 = OpLabel
+       %1520 = OpFSub %float %1497 %1504
+       %1521 = OpFMul %float %float_3 %1520
+       %1522 = OpFSub %float %1512 %1504
+       %1523 = OpFDiv %float %1521 %1522
+       %1524 = OpConvertFToS %int %1523
+       %1525 = OpConvertSToF %float %1524
+       %1526 = OpFSub %float %1523 %1525
+       %1527 = OpAccessChain %_ptr_Function_float %503 %1524
+       %1528 = OpLoad %float %1527
+       %1529 = OpIAdd %int %1524 %int_1
+       %1530 = OpAccessChain %_ptr_Function_float %503 %1529
+       %1531 = OpLoad %float %1530
+       %1532 = OpIAdd %int %1524 %int_2
+       %1533 = OpAccessChain %_ptr_Function_float %503 %1532
+       %1534 = OpLoad %float %1533
+       %1535 = OpCompositeConstruct %v3float %1528 %1531 %1534
+       %1536 = OpFMul %float %1526 %1526
+       %1537 = OpCompositeConstruct %v3float %1536 %1526 %float_1
+       %1538 = OpMatrixTimesVector %v3float %442 %1535
+       %1539 = OpDot %float %1537 %1538
+               OpBranch %1515
+       %1515 = OpLabel
+       %1540 = OpPhi %float %1519 %1517 %1539 %1516
+               OpBranch %1507
+       %1508 = OpLabel
+       %1541 = OpFSub %float %1497 %1423
+       %1542 = OpFMul %float %float_3 %1541
+       %1543 = OpFSub %float %1504 %1423
+       %1544 = OpFDiv %float %1542 %1543
+       %1545 = OpConvertFToS %int %1544
+       %1546 = OpConvertSToF %float %1545
+       %1547 = OpFSub %float %1544 %1546
+       %1548 = OpAccessChain %_ptr_Function_float %504 %1545
+       %1549 = OpLoad %float %1548
+       %1550 = OpIAdd %int %1545 %int_1
+       %1551 = OpAccessChain %_ptr_Function_float %504 %1550
+       %1552 = OpLoad %float %1551
+       %1553 = OpIAdd %int %1545 %int_2
+       %1554 = OpAccessChain %_ptr_Function_float %504 %1553
+       %1555 = OpLoad %float %1554
+       %1556 = OpCompositeConstruct %v3float %1549 %1552 %1555
+       %1557 = OpFMul %float %1547 %1547
+       %1558 = OpCompositeConstruct %v3float %1557 %1547 %float_1
+       %1559 = OpMatrixTimesVector %v3float %442 %1556
+       %1560 = OpDot %float %1558 %1559
+               OpBranch %1507
+       %1507 = OpLabel
+       %1561 = OpPhi %float %1540 %1515 %1560 %1508
+               OpBranch %1499
+       %1500 = OpLabel
+       %1562 = OpExtInst %float %1 Log %float_9_99999975en05
+       %1563 = OpFDiv %float %1562 %1065
+               OpBranch %1499
+       %1499 = OpLabel
+       %1564 = OpPhi %float %1561 %1507 %1563 %1500
+       %1565 = OpExtInst %float %1 Pow %float_10 %1564
+       %1566 = OpCompositeInsert %v3float %1565 %1492 1
+       %1567 = OpCompositeExtract %float %1411 2
+               OpStore %506 %475
+               OpStore %505 %476
+       %1568 = OpFOrdLessThanEqual %bool %1567 %float_0
+       %1569 = OpSelect %float %1568 %1418 %1567
+       %1570 = OpExtInst %float %1 Log %1569
+       %1571 = OpFDiv %float %1570 %1065
+       %1572 = OpFOrdLessThanEqual %bool %1571 %1423
+               OpSelectionMerge %1573 None
+               OpBranchConditional %1572 %1574 %1575
+       %1575 = OpLabel
+       %1576 = OpFOrdGreaterThan %bool %1571 %1423
+       %1577 = OpExtInst %float %1 Log %float_0_180000007
+       %1578 = OpFDiv %float %1577 %1065
+       %1579 = OpFOrdLessThan %bool %1571 %1578
+       %1580 = OpLogicalAnd %bool %1576 %1579
+               OpSelectionMerge %1581 None
+               OpBranchConditional %1580 %1582 %1583
+       %1583 = OpLabel
+       %1584 = OpFOrdGreaterThanEqual %bool %1571 %1578
+       %1585 = OpExtInst %float %1 Log %1416
+       %1586 = OpFDiv %float %1585 %1065
+       %1587 = OpFOrdLessThan %bool %1571 %1586
+       %1588 = OpLogicalAnd %bool %1584 %1587
+               OpSelectionMerge %1589 None
+               OpBranchConditional %1588 %1590 %1591
+       %1591 = OpLabel
+       %1592 = OpExtInst %float %1 Log %float_10000
+       %1593 = OpFDiv %float %1592 %1065
+               OpBranch %1589
+       %1590 = OpLabel
+       %1594 = OpFSub %float %1571 %1578
+       %1595 = OpFMul %float %float_3 %1594
+       %1596 = OpFSub %float %1586 %1578
+       %1597 = OpFDiv %float %1595 %1596
+       %1598 = OpConvertFToS %int %1597
+       %1599 = OpConvertSToF %float %1598
+       %1600 = OpFSub %float %1597 %1599
+       %1601 = OpAccessChain %_ptr_Function_float %505 %1598
+       %1602 = OpLoad %float %1601
+       %1603 = OpIAdd %int %1598 %int_1
+       %1604 = OpAccessChain %_ptr_Function_float %505 %1603
+       %1605 = OpLoad %float %1604
+       %1606 = OpIAdd %int %1598 %int_2
+       %1607 = OpAccessChain %_ptr_Function_float %505 %1606
+       %1608 = OpLoad %float %1607
+       %1609 = OpCompositeConstruct %v3float %1602 %1605 %1608
+       %1610 = OpFMul %float %1600 %1600
+       %1611 = OpCompositeConstruct %v3float %1610 %1600 %float_1
+       %1612 = OpMatrixTimesVector %v3float %442 %1609
+       %1613 = OpDot %float %1611 %1612
+               OpBranch %1589
+       %1589 = OpLabel
+       %1614 = OpPhi %float %1593 %1591 %1613 %1590
+               OpBranch %1581
+       %1582 = OpLabel
+       %1615 = OpFSub %float %1571 %1423
+       %1616 = OpFMul %float %float_3 %1615
+       %1617 = OpFSub %float %1578 %1423
+       %1618 = OpFDiv %float %1616 %1617
+       %1619 = OpConvertFToS %int %1618
+       %1620 = OpConvertSToF %float %1619
+       %1621 = OpFSub %float %1618 %1620
+       %1622 = OpAccessChain %_ptr_Function_float %506 %1619
+       %1623 = OpLoad %float %1622
+       %1624 = OpIAdd %int %1619 %int_1
+       %1625 = OpAccessChain %_ptr_Function_float %506 %1624
+       %1626 = OpLoad %float %1625
+       %1627 = OpIAdd %int %1619 %int_2
+       %1628 = OpAccessChain %_ptr_Function_float %506 %1627
+       %1629 = OpLoad %float %1628
+       %1630 = OpCompositeConstruct %v3float %1623 %1626 %1629
+       %1631 = OpFMul %float %1621 %1621
+       %1632 = OpCompositeConstruct %v3float %1631 %1621 %float_1
+       %1633 = OpMatrixTimesVector %v3float %442 %1630
+       %1634 = OpDot %float %1632 %1633
+               OpBranch %1581
+       %1581 = OpLabel
+       %1635 = OpPhi %float %1614 %1589 %1634 %1582
+               OpBranch %1573
+       %1574 = OpLabel
+       %1636 = OpExtInst %float %1 Log %float_9_99999975en05
+       %1637 = OpFDiv %float %1636 %1065
+               OpBranch %1573
+       %1573 = OpLabel
+       %1638 = OpPhi %float %1635 %1581 %1637 %1574
+       %1639 = OpExtInst %float %1 Pow %float_10 %1638
+       %1640 = OpCompositeInsert %v3float %1639 %1566 2
+       %1641 = OpVectorTimesMatrix %v3float %1640 %414
+       %1642 = OpVectorTimesMatrix %v3float %1641 %410
+       %1643 = OpExtInst %float %1 Pow %float_2 %float_n12
+       %1644 = OpFMul %float %float_0_179999992 %1643
+               OpStore %514 %475
+               OpStore %513 %476
+       %1645 = OpFOrdLessThanEqual %bool %1644 %float_0
+       %1646 = OpSelect %float %1645 %1418 %1644
+       %1647 = OpExtInst %float %1 Log %1646
+       %1648 = OpFDiv %float %1647 %1065
+       %1649 = OpFOrdLessThanEqual %bool %1648 %1423
+               OpSelectionMerge %1650 None
+               OpBranchConditional %1649 %1651 %1652
+       %1652 = OpLabel
+       %1653 = OpFOrdGreaterThan %bool %1648 %1423
+       %1654 = OpExtInst %float %1 Log %float_0_180000007
+       %1655 = OpFDiv %float %1654 %1065
+       %1656 = OpFOrdLessThan %bool %1648 %1655
+       %1657 = OpLogicalAnd %bool %1653 %1656
+               OpSelectionMerge %1658 None
+               OpBranchConditional %1657 %1659 %1660
+       %1660 = OpLabel
+       %1661 = OpFOrdGreaterThanEqual %bool %1648 %1655
+       %1662 = OpExtInst %float %1 Log %1416
+       %1663 = OpFDiv %float %1662 %1065
+       %1664 = OpFOrdLessThan %bool %1648 %1663
+       %1665 = OpLogicalAnd %bool %1661 %1664
+               OpSelectionMerge %1666 None
+               OpBranchConditional %1665 %1667 %1668
+       %1668 = OpLabel
+       %1669 = OpExtInst %float %1 Log %float_10000
+       %1670 = OpFDiv %float %1669 %1065
+               OpBranch %1666
+       %1667 = OpLabel
+       %1671 = OpFSub %float %1648 %1655
+       %1672 = OpFMul %float %float_3 %1671
+       %1673 = OpFSub %float %1663 %1655
+       %1674 = OpFDiv %float %1672 %1673
+       %1675 = OpConvertFToS %int %1674
+       %1676 = OpConvertSToF %float %1675
+       %1677 = OpFSub %float %1674 %1676
+       %1678 = OpAccessChain %_ptr_Function_float %513 %1675
+       %1679 = OpLoad %float %1678
+       %1680 = OpIAdd %int %1675 %int_1
+       %1681 = OpAccessChain %_ptr_Function_float %513 %1680
+       %1682 = OpLoad %float %1681
+       %1683 = OpIAdd %int %1675 %int_2
+       %1684 = OpAccessChain %_ptr_Function_float %513 %1683
+       %1685 = OpLoad %float %1684
+       %1686 = OpCompositeConstruct %v3float %1679 %1682 %1685
+       %1687 = OpFMul %float %1677 %1677
+       %1688 = OpCompositeConstruct %v3float %1687 %1677 %float_1
+       %1689 = OpMatrixTimesVector %v3float %442 %1686
+       %1690 = OpDot %float %1688 %1689
+               OpBranch %1666
+       %1666 = OpLabel
+       %1691 = OpPhi %float %1670 %1668 %1690 %1667
+               OpBranch %1658
+       %1659 = OpLabel
+       %1692 = OpFSub %float %1648 %1423
+       %1693 = OpFMul %float %float_3 %1692
+       %1694 = OpFSub %float %1655 %1423
+       %1695 = OpFDiv %float %1693 %1694
+       %1696 = OpConvertFToS %int %1695
+       %1697 = OpConvertSToF %float %1696
+       %1698 = OpFSub %float %1695 %1697
+       %1699 = OpAccessChain %_ptr_Function_float %514 %1696
+       %1700 = OpLoad %float %1699
+       %1701 = OpIAdd %int %1696 %int_1
+       %1702 = OpAccessChain %_ptr_Function_float %514 %1701
+       %1703 = OpLoad %float %1702
+       %1704 = OpIAdd %int %1696 %int_2
+       %1705 = OpAccessChain %_ptr_Function_float %514 %1704
+       %1706 = OpLoad %float %1705
+       %1707 = OpCompositeConstruct %v3float %1700 %1703 %1706
+       %1708 = OpFMul %float %1698 %1698
+       %1709 = OpCompositeConstruct %v3float %1708 %1698 %float_1
+       %1710 = OpMatrixTimesVector %v3float %442 %1707
+       %1711 = OpDot %float %1709 %1710
+               OpBranch %1658
+       %1658 = OpLabel
+       %1712 = OpPhi %float %1691 %1666 %1711 %1659
+               OpBranch %1650
+       %1651 = OpLabel
+       %1713 = OpExtInst %float %1 Log %float_9_99999975en05
+       %1714 = OpFDiv %float %1713 %1065
+               OpBranch %1650
+       %1650 = OpLabel
+       %1715 = OpPhi %float %1712 %1658 %1714 %1651
+       %1716 = OpExtInst %float %1 Pow %float_10 %1715
+               OpStore %516 %475
+               OpStore %515 %476
+       %1717 = OpExtInst %float %1 Log %float_0_180000007
+       %1718 = OpFDiv %float %1717 %1065
+       %1719 = OpFOrdLessThanEqual %bool %1718 %1423
+               OpSelectionMerge %1720 None
+               OpBranchConditional %1719 %1721 %1722
+       %1722 = OpLabel
+       %1723 = OpFOrdGreaterThan %bool %1718 %1423
+       %1724 = OpFOrdLessThan %bool %1718 %1718
+       %1725 = OpLogicalAnd %bool %1723 %1724
+               OpSelectionMerge %1726 None
+               OpBranchConditional %1725 %1727 %1728
+       %1728 = OpLabel
+       %1729 = OpFOrdGreaterThanEqual %bool %1718 %1718
+       %1730 = OpExtInst %float %1 Log %1416
+       %1731 = OpFDiv %float %1730 %1065
+       %1732 = OpFOrdLessThan %bool %1718 %1731
+       %1733 = OpLogicalAnd %bool %1729 %1732
+               OpSelectionMerge %1734 None
+               OpBranchConditional %1733 %1735 %1736
+       %1736 = OpLabel
+       %1737 = OpExtInst %float %1 Log %float_10000
+       %1738 = OpFDiv %float %1737 %1065
+               OpBranch %1734
+       %1735 = OpLabel
+       %1739 = OpFSub %float %1718 %1718
+       %1740 = OpFMul %float %float_3 %1739
+       %1741 = OpFSub %float %1731 %1718
+       %1742 = OpFDiv %float %1740 %1741
+       %1743 = OpConvertFToS %int %1742
+       %1744 = OpConvertSToF %float %1743
+       %1745 = OpFSub %float %1742 %1744
+       %1746 = OpAccessChain %_ptr_Function_float %515 %1743
+       %1747 = OpLoad %float %1746
+       %1748 = OpIAdd %int %1743 %int_1
+       %1749 = OpAccessChain %_ptr_Function_float %515 %1748
+       %1750 = OpLoad %float %1749
+       %1751 = OpIAdd %int %1743 %int_2
+       %1752 = OpAccessChain %_ptr_Function_float %515 %1751
+       %1753 = OpLoad %float %1752
+       %1754 = OpCompositeConstruct %v3float %1747 %1750 %1753
+       %1755 = OpFMul %float %1745 %1745
+       %1756 = OpCompositeConstruct %v3float %1755 %1745 %float_1
+       %1757 = OpMatrixTimesVector %v3float %442 %1754
+       %1758 = OpDot %float %1756 %1757
+               OpBranch %1734
+       %1734 = OpLabel
+       %1759 = OpPhi %float %1738 %1736 %1758 %1735
+               OpBranch %1726
+       %1727 = OpLabel
+       %1760 = OpFSub %float %1718 %1423
+       %1761 = OpFMul %float %float_3 %1760
+       %1762 = OpAccessChain %_ptr_Function_float %516 %int_3
+       %1763 = OpLoad %float %1762
+       %1764 = OpAccessChain %_ptr_Function_float %516 %int_4
+       %1765 = OpLoad %float %1764
+       %1766 = OpAccessChain %_ptr_Function_float %516 %int_5
+       %1767 = OpLoad %float %1766
+       %1768 = OpCompositeConstruct %v3float %1763 %1765 %1767
+       %1769 = OpMatrixTimesVector %v3float %442 %1768
+       %1770 = OpCompositeExtract %float %1769 2
+               OpBranch %1726
+       %1726 = OpLabel
+       %1771 = OpPhi %float %1759 %1734 %1770 %1727
+               OpBranch %1720
+       %1721 = OpLabel
+       %1772 = OpExtInst %float %1 Log %float_9_99999975en05
+       %1773 = OpFDiv %float %1772 %1065
+               OpBranch %1720
+       %1720 = OpLabel
+       %1774 = OpPhi %float %1771 %1726 %1773 %1721
+       %1775 = OpExtInst %float %1 Pow %float_10 %1774
+       %1776 = OpExtInst %float %1 Pow %float_2 %float_11
+       %1777 = OpFMul %float %float_0_179999992 %1776
+               OpStore %518 %475
+               OpStore %517 %476
+       %1778 = OpFOrdLessThanEqual %bool %1777 %float_0
+       %1779 = OpSelect %float %1778 %1418 %1777
+       %1780 = OpExtInst %float %1 Log %1779
+       %1781 = OpFDiv %float %1780 %1065
+       %1782 = OpFOrdLessThanEqual %bool %1781 %1423
+               OpSelectionMerge %1783 None
+               OpBranchConditional %1782 %1784 %1785
+       %1785 = OpLabel
+       %1786 = OpFOrdGreaterThan %bool %1781 %1423
+       %1787 = OpFOrdLessThan %bool %1781 %1718
+       %1788 = OpLogicalAnd %bool %1786 %1787
+               OpSelectionMerge %1789 None
+               OpBranchConditional %1788 %1790 %1791
+       %1791 = OpLabel
+       %1792 = OpFOrdGreaterThanEqual %bool %1781 %1718
+       %1793 = OpExtInst %float %1 Log %1416
+       %1794 = OpFDiv %float %1793 %1065
+       %1795 = OpFOrdLessThan %bool %1781 %1794
+       %1796 = OpLogicalAnd %bool %1792 %1795
+               OpSelectionMerge %1797 None
+               OpBranchConditional %1796 %1798 %1799
+       %1799 = OpLabel
+       %1800 = OpExtInst %float %1 Log %float_10000
+       %1801 = OpFDiv %float %1800 %1065
+               OpBranch %1797
+       %1798 = OpLabel
+       %1802 = OpFSub %float %1781 %1718
+       %1803 = OpFMul %float %float_3 %1802
+       %1804 = OpFSub %float %1794 %1718
+       %1805 = OpFDiv %float %1803 %1804
+       %1806 = OpConvertFToS %int %1805
+       %1807 = OpConvertSToF %float %1806
+       %1808 = OpFSub %float %1805 %1807
+       %1809 = OpAccessChain %_ptr_Function_float %517 %1806
+       %1810 = OpLoad %float %1809
+       %1811 = OpIAdd %int %1806 %int_1
+       %1812 = OpAccessChain %_ptr_Function_float %517 %1811
+       %1813 = OpLoad %float %1812
+       %1814 = OpIAdd %int %1806 %int_2
+       %1815 = OpAccessChain %_ptr_Function_float %517 %1814
+       %1816 = OpLoad %float %1815
+       %1817 = OpCompositeConstruct %v3float %1810 %1813 %1816
+       %1818 = OpFMul %float %1808 %1808
+       %1819 = OpCompositeConstruct %v3float %1818 %1808 %float_1
+       %1820 = OpMatrixTimesVector %v3float %442 %1817
+       %1821 = OpDot %float %1819 %1820
+               OpBranch %1797
+       %1797 = OpLabel
+       %1822 = OpPhi %float %1801 %1799 %1821 %1798
+               OpBranch %1789
+       %1790 = OpLabel
+       %1823 = OpFSub %float %1781 %1423
+       %1824 = OpFMul %float %float_3 %1823
+       %1825 = OpFSub %float %1718 %1423
+       %1826 = OpFDiv %float %1824 %1825
+       %1827 = OpConvertFToS %int %1826
+       %1828 = OpConvertSToF %float %1827
+       %1829 = OpFSub %float %1826 %1828
+       %1830 = OpAccessChain %_ptr_Function_float %518 %1827
+       %1831 = OpLoad %float %1830
+       %1832 = OpIAdd %int %1827 %int_1
+       %1833 = OpAccessChain %_ptr_Function_float %518 %1832
+       %1834 = OpLoad %float %1833
+       %1835 = OpIAdd %int %1827 %int_2
+       %1836 = OpAccessChain %_ptr_Function_float %518 %1835
+       %1837 = OpLoad %float %1836
+       %1838 = OpCompositeConstruct %v3float %1831 %1834 %1837
+       %1839 = OpFMul %float %1829 %1829
+       %1840 = OpCompositeConstruct %v3float %1839 %1829 %float_1
+       %1841 = OpMatrixTimesVector %v3float %442 %1838
+       %1842 = OpDot %float %1840 %1841
+               OpBranch %1789
+       %1789 = OpLabel
+       %1843 = OpPhi %float %1822 %1797 %1842 %1790
+               OpBranch %1783
+       %1784 = OpLabel
+       %1844 = OpExtInst %float %1 Log %float_9_99999975en05
+       %1845 = OpFDiv %float %1844 %1065
+               OpBranch %1783
+       %1783 = OpLabel
+       %1846 = OpPhi %float %1843 %1789 %1845 %1784
+       %1847 = OpExtInst %float %1 Pow %float_10 %1846
+       %1848 = OpCompositeExtract %float %1642 0
+               OpStore %512 %482
+               OpStore %511 %483
+       %1849 = OpFOrdLessThanEqual %bool %1848 %float_0
+       %1850 = OpSelect %float %1849 %float_9_99999975en05 %1848
+       %1851 = OpExtInst %float %1 Log %1850
+       %1852 = OpFDiv %float %1851 %1065
+       %1853 = OpExtInst %float %1 Log %1716
+       %1854 = OpFDiv %float %1853 %1065
+       %1855 = OpFOrdLessThanEqual %bool %1852 %1854
+               OpSelectionMerge %1856 None
+               OpBranchConditional %1855 %1857 %1858
+       %1858 = OpLabel
+       %1859 = OpFOrdGreaterThan %bool %1852 %1854
+       %1860 = OpExtInst %float %1 Log %1775
+       %1861 = OpFDiv %float %1860 %1065
+       %1862 = OpFOrdLessThan %bool %1852 %1861
+       %1863 = OpLogicalAnd %bool %1859 %1862
+               OpSelectionMerge %1864 None
+               OpBranchConditional %1863 %1865 %1866
+       %1866 = OpLabel
+       %1867 = OpFOrdGreaterThanEqual %bool %1852 %1861
+       %1868 = OpExtInst %float %1 Log %1847
+       %1869 = OpFDiv %float %1868 %1065
+       %1870 = OpFOrdLessThan %bool %1852 %1869
+       %1871 = OpLogicalAnd %bool %1867 %1870
+               OpSelectionMerge %1872 None
+               OpBranchConditional %1871 %1873 %1874
+       %1874 = OpLabel
+       %1875 = OpFMul %float %1852 %float_0_119999997
+       %1876 = OpExtInst %float %1 Log %float_2000
+       %1877 = OpFDiv %float %1876 %1065
+       %1878 = OpFMul %float %float_0_119999997 %1868
+       %1879 = OpFDiv %float %1878 %1065
+       %1880 = OpFSub %float %1877 %1879
+       %1881 = OpFAdd %float %1875 %1880
+               OpBranch %1872
+       %1873 = OpLabel
+       %1882 = OpFSub %float %1852 %1861
+       %1883 = OpFMul %float %float_7 %1882
+       %1884 = OpFSub %float %1869 %1861
+       %1885 = OpFDiv %float %1883 %1884
+       %1886 = OpConvertFToS %int %1885
+       %1887 = OpConvertSToF %float %1886
+       %1888 = OpFSub %float %1885 %1887
+       %1889 = OpAccessChain %_ptr_Function_float %511 %1886
+       %1890 = OpLoad %float %1889
+       %1891 = OpIAdd %int %1886 %int_1
+       %1892 = OpAccessChain %_ptr_Function_float %511 %1891
+       %1893 = OpLoad %float %1892
+       %1894 = OpIAdd %int %1886 %int_2
+       %1895 = OpAccessChain %_ptr_Function_float %511 %1894
+       %1896 = OpLoad %float %1895
+       %1897 = OpCompositeConstruct %v3float %1890 %1893 %1896
+       %1898 = OpFMul %float %1888 %1888
+       %1899 = OpCompositeConstruct %v3float %1898 %1888 %float_1
+       %1900 = OpMatrixTimesVector %v3float %442 %1897
+       %1901 = OpDot %float %1899 %1900
+               OpBranch %1872
+       %1872 = OpLabel
+       %1902 = OpPhi %float %1881 %1874 %1901 %1873
+               OpBranch %1864
+       %1865 = OpLabel
+       %1903 = OpFSub %float %1852 %1854
+       %1904 = OpFMul %float %float_7 %1903
+       %1905 = OpFSub %float %1861 %1854
+       %1906 = OpFDiv %float %1904 %1905
+       %1907 = OpConvertFToS %int %1906
+       %1908 = OpConvertSToF %float %1907
+       %1909 = OpFSub %float %1906 %1908
+       %1910 = OpAccessChain %_ptr_Function_float %512 %1907
+       %1911 = OpLoad %float %1910
+       %1912 = OpIAdd %int %1907 %int_1
+       %1913 = OpAccessChain %_ptr_Function_float %512 %1912
+       %1914 = OpLoad %float %1913
+       %1915 = OpIAdd %int %1907 %int_2
+       %1916 = OpAccessChain %_ptr_Function_float %512 %1915
+       %1917 = OpLoad %float %1916
+       %1918 = OpCompositeConstruct %v3float %1911 %1914 %1917
+       %1919 = OpFMul %float %1909 %1909
+       %1920 = OpCompositeConstruct %v3float %1919 %1909 %float_1
+       %1921 = OpMatrixTimesVector %v3float %442 %1918
+       %1922 = OpDot %float %1920 %1921
+               OpBranch %1864
+       %1864 = OpLabel
+       %1923 = OpPhi %float %1902 %1872 %1922 %1865
+               OpBranch %1856
+       %1857 = OpLabel
+       %1924 = OpExtInst %float %1 Log %float_0_00499999989
+       %1925 = OpFDiv %float %1924 %1065
+               OpBranch %1856
+       %1856 = OpLabel
+       %1926 = OpPhi %float %1923 %1864 %1925 %1857
+       %1927 = OpExtInst %float %1 Pow %float_10 %1926
+       %1928 = OpCompositeInsert %v3float %1927 %391 0
+       %1929 = OpCompositeExtract %float %1642 1
+               OpStore %510 %482
+               OpStore %509 %483
+       %1930 = OpFOrdLessThanEqual %bool %1929 %float_0
+       %1931 = OpSelect %float %1930 %float_9_99999975en05 %1929
+       %1932 = OpExtInst %float %1 Log %1931
+       %1933 = OpFDiv %float %1932 %1065
+       %1934 = OpFOrdLessThanEqual %bool %1933 %1854
+               OpSelectionMerge %1935 None
+               OpBranchConditional %1934 %1936 %1937
+       %1937 = OpLabel
+       %1938 = OpFOrdGreaterThan %bool %1933 %1854
+       %1939 = OpExtInst %float %1 Log %1775
+       %1940 = OpFDiv %float %1939 %1065
+       %1941 = OpFOrdLessThan %bool %1933 %1940
+       %1942 = OpLogicalAnd %bool %1938 %1941
+               OpSelectionMerge %1943 None
+               OpBranchConditional %1942 %1944 %1945
+       %1945 = OpLabel
+       %1946 = OpFOrdGreaterThanEqual %bool %1933 %1940
+       %1947 = OpExtInst %float %1 Log %1847
+       %1948 = OpFDiv %float %1947 %1065
+       %1949 = OpFOrdLessThan %bool %1933 %1948
+       %1950 = OpLogicalAnd %bool %1946 %1949
+               OpSelectionMerge %1951 None
+               OpBranchConditional %1950 %1952 %1953
+       %1953 = OpLabel
+       %1954 = OpFMul %float %1933 %float_0_119999997
+       %1955 = OpExtInst %float %1 Log %float_2000
+       %1956 = OpFDiv %float %1955 %1065
+       %1957 = OpFMul %float %float_0_119999997 %1947
+       %1958 = OpFDiv %float %1957 %1065
+       %1959 = OpFSub %float %1956 %1958
+       %1960 = OpFAdd %float %1954 %1959
+               OpBranch %1951
+       %1952 = OpLabel
+       %1961 = OpFSub %float %1933 %1940
+       %1962 = OpFMul %float %float_7 %1961
+       %1963 = OpFSub %float %1948 %1940
+       %1964 = OpFDiv %float %1962 %1963
+       %1965 = OpConvertFToS %int %1964
+       %1966 = OpConvertSToF %float %1965
+       %1967 = OpFSub %float %1964 %1966
+       %1968 = OpAccessChain %_ptr_Function_float %509 %1965
+       %1969 = OpLoad %float %1968
+       %1970 = OpIAdd %int %1965 %int_1
+       %1971 = OpAccessChain %_ptr_Function_float %509 %1970
+       %1972 = OpLoad %float %1971
+       %1973 = OpIAdd %int %1965 %int_2
+       %1974 = OpAccessChain %_ptr_Function_float %509 %1973
+       %1975 = OpLoad %float %1974
+       %1976 = OpCompositeConstruct %v3float %1969 %1972 %1975
+       %1977 = OpFMul %float %1967 %1967
+       %1978 = OpCompositeConstruct %v3float %1977 %1967 %float_1
+       %1979 = OpMatrixTimesVector %v3float %442 %1976
+       %1980 = OpDot %float %1978 %1979
+               OpBranch %1951
+       %1951 = OpLabel
+       %1981 = OpPhi %float %1960 %1953 %1980 %1952
+               OpBranch %1943
+       %1944 = OpLabel
+       %1982 = OpFSub %float %1933 %1854
+       %1983 = OpFMul %float %float_7 %1982
+       %1984 = OpFSub %float %1940 %1854
+       %1985 = OpFDiv %float %1983 %1984
+       %1986 = OpConvertFToS %int %1985
+       %1987 = OpConvertSToF %float %1986
+       %1988 = OpFSub %float %1985 %1987
+       %1989 = OpAccessChain %_ptr_Function_float %510 %1986
+       %1990 = OpLoad %float %1989
+       %1991 = OpIAdd %int %1986 %int_1
+       %1992 = OpAccessChain %_ptr_Function_float %510 %1991
+       %1993 = OpLoad %float %1992
+       %1994 = OpIAdd %int %1986 %int_2
+       %1995 = OpAccessChain %_ptr_Function_float %510 %1994
+       %1996 = OpLoad %float %1995
+       %1997 = OpCompositeConstruct %v3float %1990 %1993 %1996
+       %1998 = OpFMul %float %1988 %1988
+       %1999 = OpCompositeConstruct %v3float %1998 %1988 %float_1
+       %2000 = OpMatrixTimesVector %v3float %442 %1997
+       %2001 = OpDot %float %1999 %2000
+               OpBranch %1943
+       %1943 = OpLabel
+       %2002 = OpPhi %float %1981 %1951 %2001 %1944
+               OpBranch %1935
+       %1936 = OpLabel
+       %2003 = OpExtInst %float %1 Log %float_0_00499999989
+       %2004 = OpFDiv %float %2003 %1065
+               OpBranch %1935
+       %1935 = OpLabel
+       %2005 = OpPhi %float %2002 %1943 %2004 %1936
+       %2006 = OpExtInst %float %1 Pow %float_10 %2005
+       %2007 = OpCompositeInsert %v3float %2006 %1928 1
+       %2008 = OpCompositeExtract %float %1642 2
+               OpStore %508 %482
+               OpStore %507 %483
+       %2009 = OpFOrdLessThanEqual %bool %2008 %float_0
+       %2010 = OpSelect %float %2009 %float_9_99999975en05 %2008
+       %2011 = OpExtInst %float %1 Log %2010
+       %2012 = OpFDiv %float %2011 %1065
+       %2013 = OpFOrdLessThanEqual %bool %2012 %1854
+               OpSelectionMerge %2014 None
+               OpBranchConditional %2013 %2015 %2016
+       %2016 = OpLabel
+       %2017 = OpFOrdGreaterThan %bool %2012 %1854
+       %2018 = OpExtInst %float %1 Log %1775
+       %2019 = OpFDiv %float %2018 %1065
+       %2020 = OpFOrdLessThan %bool %2012 %2019
+       %2021 = OpLogicalAnd %bool %2017 %2020
+               OpSelectionMerge %2022 None
+               OpBranchConditional %2021 %2023 %2024
+       %2024 = OpLabel
+       %2025 = OpFOrdGreaterThanEqual %bool %2012 %2019
+       %2026 = OpExtInst %float %1 Log %1847
+       %2027 = OpFDiv %float %2026 %1065
+       %2028 = OpFOrdLessThan %bool %2012 %2027
+       %2029 = OpLogicalAnd %bool %2025 %2028
+               OpSelectionMerge %2030 None
+               OpBranchConditional %2029 %2031 %2032
+       %2032 = OpLabel
+       %2033 = OpFMul %float %2012 %float_0_119999997
+       %2034 = OpExtInst %float %1 Log %float_2000
+       %2035 = OpFDiv %float %2034 %1065
+       %2036 = OpFMul %float %float_0_119999997 %2026
+       %2037 = OpFDiv %float %2036 %1065
+       %2038 = OpFSub %float %2035 %2037
+       %2039 = OpFAdd %float %2033 %2038
+               OpBranch %2030
+       %2031 = OpLabel
+       %2040 = OpFSub %float %2012 %2019
+       %2041 = OpFMul %float %float_7 %2040
+       %2042 = OpFSub %float %2027 %2019
+       %2043 = OpFDiv %float %2041 %2042
+       %2044 = OpConvertFToS %int %2043
+       %2045 = OpConvertSToF %float %2044
+       %2046 = OpFSub %float %2043 %2045
+       %2047 = OpAccessChain %_ptr_Function_float %507 %2044
+       %2048 = OpLoad %float %2047
+       %2049 = OpIAdd %int %2044 %int_1
+       %2050 = OpAccessChain %_ptr_Function_float %507 %2049
+       %2051 = OpLoad %float %2050
+       %2052 = OpIAdd %int %2044 %int_2
+       %2053 = OpAccessChain %_ptr_Function_float %507 %2052
+       %2054 = OpLoad %float %2053
+       %2055 = OpCompositeConstruct %v3float %2048 %2051 %2054
+       %2056 = OpFMul %float %2046 %2046
+       %2057 = OpCompositeConstruct %v3float %2056 %2046 %float_1
+       %2058 = OpMatrixTimesVector %v3float %442 %2055
+       %2059 = OpDot %float %2057 %2058
+               OpBranch %2030
+       %2030 = OpLabel
+       %2060 = OpPhi %float %2039 %2032 %2059 %2031
+               OpBranch %2022
+       %2023 = OpLabel
+       %2061 = OpFSub %float %2012 %1854
+       %2062 = OpFMul %float %float_7 %2061
+       %2063 = OpFSub %float %2019 %1854
+       %2064 = OpFDiv %float %2062 %2063
+       %2065 = OpConvertFToS %int %2064
+       %2066 = OpConvertSToF %float %2065
+       %2067 = OpFSub %float %2064 %2066
+       %2068 = OpAccessChain %_ptr_Function_float %508 %2065
+       %2069 = OpLoad %float %2068
+       %2070 = OpIAdd %int %2065 %int_1
+       %2071 = OpAccessChain %_ptr_Function_float %508 %2070
+       %2072 = OpLoad %float %2071
+       %2073 = OpIAdd %int %2065 %int_2
+       %2074 = OpAccessChain %_ptr_Function_float %508 %2073
+       %2075 = OpLoad %float %2074
+       %2076 = OpCompositeConstruct %v3float %2069 %2072 %2075
+       %2077 = OpFMul %float %2067 %2067
+       %2078 = OpCompositeConstruct %v3float %2077 %2067 %float_1
+       %2079 = OpMatrixTimesVector %v3float %442 %2076
+       %2080 = OpDot %float %2078 %2079
+               OpBranch %2022
+       %2022 = OpLabel
+       %2081 = OpPhi %float %2060 %2030 %2080 %2023
+               OpBranch %2014
+       %2015 = OpLabel
+       %2082 = OpExtInst %float %1 Log %float_0_00499999989
+       %2083 = OpFDiv %float %2082 %1065
+               OpBranch %2014
+       %2014 = OpLabel
+       %2084 = OpPhi %float %2081 %2022 %2083 %2015
+       %2085 = OpExtInst %float %1 Pow %float_10 %2084
+       %2086 = OpCompositeInsert %v3float %2085 %2007 2
+       %2087 = OpVectorTimesMatrix %v3float %2086 %576
+       %2088 = OpFMul %v3float %2087 %496
+       %2089 = OpExtInst %v3float %1 Pow %2088 %263
+       %2090 = OpFMul %v3float %184 %2089
+       %2091 = OpFAdd %v3float %183 %2090
+       %2092 = OpFMul %v3float %185 %2089
+       %2093 = OpFAdd %v3float %135 %2092
+       %2094 = OpFDiv %v3float %135 %2093
+       %2095 = OpFMul %v3float %2091 %2094
+       %2096 = OpExtInst %v3float %1 Pow %2095 %264
+               OpBranch %1236
+       %1236 = OpLabel
+       %2097 = OpPhi %v3float %1260 %1240 %2096 %2014
+               OpBranch %1230
+       %1231 = OpLabel
+       %2098 = OpMatrixTimesMatrix %mat3v3float %546 %399
+       %2099 = OpFMul %v3float %906 %262
+       %2100 = OpVectorTimesMatrix %v3float %2099 %2098
+       %2101 = OpCompositeExtract %float %2100 0
+       %2102 = OpCompositeExtract %float %2100 1
+       %2103 = OpExtInst %float %1 FMin %2101 %2102
+       %2104 = OpCompositeExtract %float %2100 2
+       %2105 = OpExtInst %float %1 FMin %2103 %2104
+       %2106 = OpExtInst %float %1 FMax %2101 %2102
+       %2107 = OpExtInst %float %1 FMax %2106 %2104
+       %2108 = OpExtInst %float %1 FMax %2107 %float_1_00000001en10
+       %2109 = OpExtInst %float %1 FMax %2105 %float_1_00000001en10
+       %2110 = OpFSub %float %2108 %2109
+       %2111 = OpExtInst %float %1 FMax %2107 %float_0_00999999978
+       %2112 = OpFDiv %float %2110 %2111
+       %2113 = OpFSub %float %2104 %2102
+       %2114 = OpFMul %float %2104 %2113
+       %2115 = OpFSub %float %2102 %2101
+       %2116 = OpFMul %float %2102 %2115
+       %2117 = OpFAdd %float %2114 %2116
+       %2118 = OpFSub %float %2101 %2104
+       %2119 = OpFMul %float %2101 %2118
+       %2120 = OpFAdd %float %2117 %2119
+       %2121 = OpExtInst %float %1 Sqrt %2120
+       %2122 = OpFAdd %float %2104 %2102
+       %2123 = OpFAdd %float %2122 %2101
+       %2124 = OpFMul %float %float_1_75 %2121
+       %2125 = OpFAdd %float %2123 %2124
+       %2126 = OpFMul %float %2125 %float_0_333333343
+       %2127 = OpFSub %float %2112 %float_0_400000006
+       %2128 = OpFMul %float %2127 %float_5
+       %2129 = OpFMul %float %2127 %float_2_5
+       %2130 = OpExtInst %float %1 FAbs %2129
+       %2131 = OpFSub %float %float_1 %2130
+       %2132 = OpExtInst %float %1 FMax %2131 %float_0
+       %2133 = OpExtInst %float %1 FSign %2128
+       %2134 = OpConvertFToS %int %2133
+       %2135 = OpConvertSToF %float %2134
+       %2136 = OpFMul %float %2132 %2132
+       %2137 = OpFSub %float %float_1 %2136
+       %2138 = OpFMul %float %2135 %2137
+       %2139 = OpFAdd %float %float_1 %2138
+       %2140 = OpFMul %float %2139 %float_0_0250000004
+       %2141 = OpFOrdLessThanEqual %bool %2126 %float_0_0533333346
+               OpSelectionMerge %2142 None
+               OpBranchConditional %2141 %2143 %2144
+       %2144 = OpLabel
+       %2145 = OpFOrdGreaterThanEqual %bool %2126 %float_0_159999996
+               OpSelectionMerge %2146 None
+               OpBranchConditional %2145 %2147 %2148
+       %2148 = OpLabel
+       %2149 = OpFDiv %float %float_0_239999995 %2125
+       %2150 = OpFSub %float %2149 %float_0_5
+       %2151 = OpFMul %float %2140 %2150
+               OpBranch %2146
+       %2147 = OpLabel
+               OpBranch %2146
+       %2146 = OpLabel
+       %2152 = OpPhi %float %2151 %2148 %float_0 %2147
+               OpBranch %2142
+       %2143 = OpLabel
+               OpBranch %2142
+       %2142 = OpLabel
+       %2153 = OpPhi %float %2152 %2146 %2140 %2143
+       %2154 = OpFAdd %float %float_1 %2153
+       %2155 = OpCompositeConstruct %v3float %2154 %2154 %2154
+       %2156 = OpFMul %v3float %2100 %2155
+       %2157 = OpCompositeExtract %float %2156 0
+       %2158 = OpCompositeExtract %float %2156 1
+       %2159 = OpFOrdEqual %bool %2157 %2158
+       %2160 = OpCompositeExtract %float %2156 2
+       %2161 = OpFOrdEqual %bool %2158 %2160
+       %2162 = OpLogicalAnd %bool %2159 %2161
+               OpSelectionMerge %2163 None
+               OpBranchConditional %2162 %2164 %2165
+       %2165 = OpLabel
+       %2166 = OpExtInst %float %1 Sqrt %float_3
+       %2167 = OpFSub %float %2158 %2160
+       %2168 = OpFMul %float %2166 %2167
+       %2169 = OpFMul %float %float_2 %2157
+       %2170 = OpFSub %float %2169 %2158
+       %2171 = OpFSub %float %2170 %2160
+       %2172 = OpExtInst %float %1 Atan2 %2168 %2171
+       %2173 = OpFMul %float %float_57_2957764 %2172
+               OpBranch %2163
+       %2164 = OpLabel
+               OpBranch %2163
+       %2163 = OpLabel
+       %2174 = OpPhi %float %2173 %2165 %float_0 %2164
+       %2175 = OpFOrdLessThan %bool %2174 %float_0
+               OpSelectionMerge %2176 None
+               OpBranchConditional %2175 %2177 %2176
+       %2177 = OpLabel
+       %2178 = OpFAdd %float %2174 %float_360
+               OpBranch %2176
+       %2176 = OpLabel
+       %2179 = OpPhi %float %2174 %2163 %2178 %2177
+       %2180 = OpExtInst %float %1 FClamp %2179 %float_0 %float_360
+       %2181 = OpFOrdGreaterThan %bool %2180 %float_180
+               OpSelectionMerge %2182 None
+               OpBranchConditional %2181 %2183 %2182
+       %2183 = OpLabel
+       %2184 = OpFSub %float %2180 %float_360
+               OpBranch %2182
+       %2182 = OpLabel
+       %2185 = OpPhi %float %2180 %2176 %2184 %2183
+       %2186 = OpFOrdGreaterThan %bool %2185 %float_n67_5
+       %2187 = OpFOrdLessThan %bool %2185 %float_67_5
+       %2188 = OpLogicalAnd %bool %2186 %2187
+               OpSelectionMerge %2189 None
+               OpBranchConditional %2188 %2190 %2189
+       %2190 = OpLabel
+       %2191 = OpFSub %float %2185 %float_n67_5
+       %2192 = OpFMul %float %2191 %float_0_0296296291
+       %2193 = OpConvertFToS %int %2192
+       %2194 = OpConvertSToF %float %2193
+       %2195 = OpFSub %float %2192 %2194
+       %2196 = OpFMul %float %2195 %2195
+       %2197 = OpFMul %float %2196 %2195
+       %2198 = OpIEqual %bool %2193 %int_3
+               OpSelectionMerge %2199 None
+               OpBranchConditional %2198 %2200 %2201
+       %2201 = OpLabel
+       %2202 = OpIEqual %bool %2193 %int_2
+               OpSelectionMerge %2203 None
+               OpBranchConditional %2202 %2204 %2205
+       %2205 = OpLabel
+       %2206 = OpIEqual %bool %2193 %int_1
+               OpSelectionMerge %2207 None
+               OpBranchConditional %2206 %2208 %2209
+       %2209 = OpLabel
+       %2210 = OpIEqual %bool %2193 %int_0
+               OpSelectionMerge %2211 None
+               OpBranchConditional %2210 %2212 %2213
+       %2213 = OpLabel
+               OpBranch %2211
+       %2212 = OpLabel
+       %2214 = OpFMul %float %2197 %float_0_166666672
+               OpBranch %2211
+       %2211 = OpLabel
+       %2215 = OpPhi %float %float_0 %2213 %2214 %2212
+               OpBranch %2207
+       %2208 = OpLabel
+       %2216 = OpFMul %float %2197 %float_n0_5
+       %2217 = OpFMul %float %2196 %float_0_5
+       %2218 = OpFAdd %float %2216 %2217
+       %2219 = OpFMul %float %2195 %float_0_5
+       %2220 = OpFAdd %float %2218 %2219
+       %2221 = OpFAdd %float %2220 %float_0_166666672
+               OpBranch %2207
+       %2207 = OpLabel
+       %2222 = OpPhi %float %2215 %2211 %2221 %2208
+               OpBranch %2203
+       %2204 = OpLabel
+       %2223 = OpFMul %float %2197 %float_0_5
+       %2224 = OpFMul %float %2196 %float_n1
+       %2225 = OpFAdd %float %2223 %2224
+       %2226 = OpFAdd %float %2225 %float_0_666666687
+               OpBranch %2203
+       %2203 = OpLabel
+       %2227 = OpPhi %float %2222 %2207 %2226 %2204
+               OpBranch %2199
+       %2200 = OpLabel
+       %2228 = OpFMul %float %2197 %float_n0_166666672
+       %2229 = OpFMul %float %2196 %float_0_5
+       %2230 = OpFAdd %float %2228 %2229
+       %2231 = OpFMul %float %2195 %float_n0_5
+       %2232 = OpFAdd %float %2230 %2231
+       %2233 = OpFAdd %float %2232 %float_0_166666672
+               OpBranch %2199
+       %2199 = OpLabel
+       %2234 = OpPhi %float %2227 %2203 %2233 %2200
+               OpBranch %2189
+       %2189 = OpLabel
+       %2235 = OpPhi %float %float_0 %2182 %2234 %2199
+       %2236 = OpFMul %float %2235 %float_1_5
+       %2237 = OpFMul %float %2236 %2112
+       %2238 = OpFSub %float %float_0_0299999993 %2157
+       %2239 = OpFMul %float %2237 %2238
+       %2240 = OpFMul %float %2239 %float_0_180000007
+       %2241 = OpFAdd %float %2157 %2240
+       %2242 = OpCompositeInsert %v3float %2241 %2156 0
+       %2243 = OpExtInst %v3float %1 FClamp %2242 %132 %314
+       %2244 = OpVectorTimesMatrix %v3float %2243 %410
+       %2245 = OpExtInst %v3float %1 FClamp %2244 %132 %314
+       %2246 = OpDot %float %2245 %67
+       %2247 = OpCompositeConstruct %v3float %2246 %2246 %2246
+       %2248 = OpExtInst %v3float %1 FMix %2247 %2245 %228
+       %2249 = OpCompositeExtract %float %2248 0
+       %2250 = OpExtInst %float %1 Exp2 %float_n15
+       %2251 = OpFMul %float %float_0_179999992 %2250
+       %2252 = OpExtInst %float %1 Exp2 %float_18
+       %2253 = OpFMul %float %float_0_179999992 %2252
+               OpStore %520 %475
+               OpStore %519 %476
+       %2254 = OpFOrdLessThanEqual %bool %2249 %float_0
+       %2255 = OpExtInst %float %1 Exp2 %float_n14
+       %2256 = OpSelect %float %2254 %2255 %2249
+       %2257 = OpExtInst %float %1 Log %2256
+       %2258 = OpFDiv %float %2257 %1065
+       %2259 = OpExtInst %float %1 Log %2251
+       %2260 = OpFDiv %float %2259 %1065
+       %2261 = OpFOrdLessThanEqual %bool %2258 %2260
+               OpSelectionMerge %2262 None
+               OpBranchConditional %2261 %2263 %2264
+       %2264 = OpLabel
+       %2265 = OpFOrdGreaterThan %bool %2258 %2260
+       %2266 = OpExtInst %float %1 Log %float_0_180000007
+       %2267 = OpFDiv %float %2266 %1065
+       %2268 = OpFOrdLessThan %bool %2258 %2267
+       %2269 = OpLogicalAnd %bool %2265 %2268
+               OpSelectionMerge %2270 None
+               OpBranchConditional %2269 %2271 %2272
+       %2272 = OpLabel
+       %2273 = OpFOrdGreaterThanEqual %bool %2258 %2267
+       %2274 = OpExtInst %float %1 Log %2253
+       %2275 = OpFDiv %float %2274 %1065
+       %2276 = OpFOrdLessThan %bool %2258 %2275
+       %2277 = OpLogicalAnd %bool %2273 %2276
+               OpSelectionMerge %2278 None
+               OpBranchConditional %2277 %2279 %2280
+       %2280 = OpLabel
+       %2281 = OpExtInst %float %1 Log %float_10000
+       %2282 = OpFDiv %float %2281 %1065
+               OpBranch %2278
+       %2279 = OpLabel
+       %2283 = OpFSub %float %2258 %2267
+       %2284 = OpFMul %float %float_3 %2283
+       %2285 = OpFSub %float %2275 %2267
+       %2286 = OpFDiv %float %2284 %2285
+       %2287 = OpConvertFToS %int %2286
+       %2288 = OpConvertSToF %float %2287
+       %2289 = OpFSub %float %2286 %2288
+       %2290 = OpAccessChain %_ptr_Function_float %519 %2287
+       %2291 = OpLoad %float %2290
+       %2292 = OpIAdd %int %2287 %int_1
+       %2293 = OpAccessChain %_ptr_Function_float %519 %2292
+       %2294 = OpLoad %float %2293
+       %2295 = OpIAdd %int %2287 %int_2
+       %2296 = OpAccessChain %_ptr_Function_float %519 %2295
+       %2297 = OpLoad %float %2296
+       %2298 = OpCompositeConstruct %v3float %2291 %2294 %2297
+       %2299 = OpFMul %float %2289 %2289
+       %2300 = OpCompositeConstruct %v3float %2299 %2289 %float_1
+       %2301 = OpMatrixTimesVector %v3float %442 %2298
+       %2302 = OpDot %float %2300 %2301
+               OpBranch %2278
+       %2278 = OpLabel
+       %2303 = OpPhi %float %2282 %2280 %2302 %2279
+               OpBranch %2270
+       %2271 = OpLabel
+       %2304 = OpFSub %float %2258 %2260
+       %2305 = OpFMul %float %float_3 %2304
+       %2306 = OpFSub %float %2267 %2260
+       %2307 = OpFDiv %float %2305 %2306
+       %2308 = OpConvertFToS %int %2307
+       %2309 = OpConvertSToF %float %2308
+       %2310 = OpFSub %float %2307 %2309
+       %2311 = OpAccessChain %_ptr_Function_float %520 %2308
+       %2312 = OpLoad %float %2311
+       %2313 = OpIAdd %int %2308 %int_1
+       %2314 = OpAccessChain %_ptr_Function_float %520 %2313
+       %2315 = OpLoad %float %2314
+       %2316 = OpIAdd %int %2308 %int_2
+       %2317 = OpAccessChain %_ptr_Function_float %520 %2316
+       %2318 = OpLoad %float %2317
+       %2319 = OpCompositeConstruct %v3float %2312 %2315 %2318
+       %2320 = OpFMul %float %2310 %2310
+       %2321 = OpCompositeConstruct %v3float %2320 %2310 %float_1
+       %2322 = OpMatrixTimesVector %v3float %442 %2319
+       %2323 = OpDot %float %2321 %2322
+               OpBranch %2270
+       %2270 = OpLabel
+       %2324 = OpPhi %float %2303 %2278 %2323 %2271
+               OpBranch %2262
+       %2263 = OpLabel
+       %2325 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2326 = OpFDiv %float %2325 %1065
+               OpBranch %2262
+       %2262 = OpLabel
+       %2327 = OpPhi %float %2324 %2270 %2326 %2263
+       %2328 = OpExtInst %float %1 Pow %float_10 %2327
+       %2329 = OpCompositeInsert %v3float %2328 %391 0
+       %2330 = OpCompositeExtract %float %2248 1
+               OpStore %522 %475
+               OpStore %521 %476
+       %2331 = OpFOrdLessThanEqual %bool %2330 %float_0
+       %2332 = OpSelect %float %2331 %2255 %2330
+       %2333 = OpExtInst %float %1 Log %2332
+       %2334 = OpFDiv %float %2333 %1065
+       %2335 = OpFOrdLessThanEqual %bool %2334 %2260
+               OpSelectionMerge %2336 None
+               OpBranchConditional %2335 %2337 %2338
+       %2338 = OpLabel
+       %2339 = OpFOrdGreaterThan %bool %2334 %2260
+       %2340 = OpExtInst %float %1 Log %float_0_180000007
+       %2341 = OpFDiv %float %2340 %1065
+       %2342 = OpFOrdLessThan %bool %2334 %2341
+       %2343 = OpLogicalAnd %bool %2339 %2342
+               OpSelectionMerge %2344 None
+               OpBranchConditional %2343 %2345 %2346
+       %2346 = OpLabel
+       %2347 = OpFOrdGreaterThanEqual %bool %2334 %2341
+       %2348 = OpExtInst %float %1 Log %2253
+       %2349 = OpFDiv %float %2348 %1065
+       %2350 = OpFOrdLessThan %bool %2334 %2349
+       %2351 = OpLogicalAnd %bool %2347 %2350
+               OpSelectionMerge %2352 None
+               OpBranchConditional %2351 %2353 %2354
+       %2354 = OpLabel
+       %2355 = OpExtInst %float %1 Log %float_10000
+       %2356 = OpFDiv %float %2355 %1065
+               OpBranch %2352
+       %2353 = OpLabel
+       %2357 = OpFSub %float %2334 %2341
+       %2358 = OpFMul %float %float_3 %2357
+       %2359 = OpFSub %float %2349 %2341
+       %2360 = OpFDiv %float %2358 %2359
+       %2361 = OpConvertFToS %int %2360
+       %2362 = OpConvertSToF %float %2361
+       %2363 = OpFSub %float %2360 %2362
+       %2364 = OpAccessChain %_ptr_Function_float %521 %2361
+       %2365 = OpLoad %float %2364
+       %2366 = OpIAdd %int %2361 %int_1
+       %2367 = OpAccessChain %_ptr_Function_float %521 %2366
+       %2368 = OpLoad %float %2367
+       %2369 = OpIAdd %int %2361 %int_2
+       %2370 = OpAccessChain %_ptr_Function_float %521 %2369
+       %2371 = OpLoad %float %2370
+       %2372 = OpCompositeConstruct %v3float %2365 %2368 %2371
+       %2373 = OpFMul %float %2363 %2363
+       %2374 = OpCompositeConstruct %v3float %2373 %2363 %float_1
+       %2375 = OpMatrixTimesVector %v3float %442 %2372
+       %2376 = OpDot %float %2374 %2375
+               OpBranch %2352
+       %2352 = OpLabel
+       %2377 = OpPhi %float %2356 %2354 %2376 %2353
+               OpBranch %2344
+       %2345 = OpLabel
+       %2378 = OpFSub %float %2334 %2260
+       %2379 = OpFMul %float %float_3 %2378
+       %2380 = OpFSub %float %2341 %2260
+       %2381 = OpFDiv %float %2379 %2380
+       %2382 = OpConvertFToS %int %2381
+       %2383 = OpConvertSToF %float %2382
+       %2384 = OpFSub %float %2381 %2383
+       %2385 = OpAccessChain %_ptr_Function_float %522 %2382
+       %2386 = OpLoad %float %2385
+       %2387 = OpIAdd %int %2382 %int_1
+       %2388 = OpAccessChain %_ptr_Function_float %522 %2387
+       %2389 = OpLoad %float %2388
+       %2390 = OpIAdd %int %2382 %int_2
+       %2391 = OpAccessChain %_ptr_Function_float %522 %2390
+       %2392 = OpLoad %float %2391
+       %2393 = OpCompositeConstruct %v3float %2386 %2389 %2392
+       %2394 = OpFMul %float %2384 %2384
+       %2395 = OpCompositeConstruct %v3float %2394 %2384 %float_1
+       %2396 = OpMatrixTimesVector %v3float %442 %2393
+       %2397 = OpDot %float %2395 %2396
+               OpBranch %2344
+       %2344 = OpLabel
+       %2398 = OpPhi %float %2377 %2352 %2397 %2345
+               OpBranch %2336
+       %2337 = OpLabel
+       %2399 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2400 = OpFDiv %float %2399 %1065
+               OpBranch %2336
+       %2336 = OpLabel
+       %2401 = OpPhi %float %2398 %2344 %2400 %2337
+       %2402 = OpExtInst %float %1 Pow %float_10 %2401
+       %2403 = OpCompositeInsert %v3float %2402 %2329 1
+       %2404 = OpCompositeExtract %float %2248 2
+               OpStore %524 %475
+               OpStore %523 %476
+       %2405 = OpFOrdLessThanEqual %bool %2404 %float_0
+       %2406 = OpSelect %float %2405 %2255 %2404
+       %2407 = OpExtInst %float %1 Log %2406
+       %2408 = OpFDiv %float %2407 %1065
+       %2409 = OpFOrdLessThanEqual %bool %2408 %2260
+               OpSelectionMerge %2410 None
+               OpBranchConditional %2409 %2411 %2412
+       %2412 = OpLabel
+       %2413 = OpFOrdGreaterThan %bool %2408 %2260
+       %2414 = OpExtInst %float %1 Log %float_0_180000007
+       %2415 = OpFDiv %float %2414 %1065
+       %2416 = OpFOrdLessThan %bool %2408 %2415
+       %2417 = OpLogicalAnd %bool %2413 %2416
+               OpSelectionMerge %2418 None
+               OpBranchConditional %2417 %2419 %2420
+       %2420 = OpLabel
+       %2421 = OpFOrdGreaterThanEqual %bool %2408 %2415
+       %2422 = OpExtInst %float %1 Log %2253
+       %2423 = OpFDiv %float %2422 %1065
+       %2424 = OpFOrdLessThan %bool %2408 %2423
+       %2425 = OpLogicalAnd %bool %2421 %2424
+               OpSelectionMerge %2426 None
+               OpBranchConditional %2425 %2427 %2428
+       %2428 = OpLabel
+       %2429 = OpExtInst %float %1 Log %float_10000
+       %2430 = OpFDiv %float %2429 %1065
+               OpBranch %2426
+       %2427 = OpLabel
+       %2431 = OpFSub %float %2408 %2415
+       %2432 = OpFMul %float %float_3 %2431
+       %2433 = OpFSub %float %2423 %2415
+       %2434 = OpFDiv %float %2432 %2433
+       %2435 = OpConvertFToS %int %2434
+       %2436 = OpConvertSToF %float %2435
+       %2437 = OpFSub %float %2434 %2436
+       %2438 = OpAccessChain %_ptr_Function_float %523 %2435
+       %2439 = OpLoad %float %2438
+       %2440 = OpIAdd %int %2435 %int_1
+       %2441 = OpAccessChain %_ptr_Function_float %523 %2440
+       %2442 = OpLoad %float %2441
+       %2443 = OpIAdd %int %2435 %int_2
+       %2444 = OpAccessChain %_ptr_Function_float %523 %2443
+       %2445 = OpLoad %float %2444
+       %2446 = OpCompositeConstruct %v3float %2439 %2442 %2445
+       %2447 = OpFMul %float %2437 %2437
+       %2448 = OpCompositeConstruct %v3float %2447 %2437 %float_1
+       %2449 = OpMatrixTimesVector %v3float %442 %2446
+       %2450 = OpDot %float %2448 %2449
+               OpBranch %2426
+       %2426 = OpLabel
+       %2451 = OpPhi %float %2430 %2428 %2450 %2427
+               OpBranch %2418
+       %2419 = OpLabel
+       %2452 = OpFSub %float %2408 %2260
+       %2453 = OpFMul %float %float_3 %2452
+       %2454 = OpFSub %float %2415 %2260
+       %2455 = OpFDiv %float %2453 %2454
+       %2456 = OpConvertFToS %int %2455
+       %2457 = OpConvertSToF %float %2456
+       %2458 = OpFSub %float %2455 %2457
+       %2459 = OpAccessChain %_ptr_Function_float %524 %2456
+       %2460 = OpLoad %float %2459
+       %2461 = OpIAdd %int %2456 %int_1
+       %2462 = OpAccessChain %_ptr_Function_float %524 %2461
+       %2463 = OpLoad %float %2462
+       %2464 = OpIAdd %int %2456 %int_2
+       %2465 = OpAccessChain %_ptr_Function_float %524 %2464
+       %2466 = OpLoad %float %2465
+       %2467 = OpCompositeConstruct %v3float %2460 %2463 %2466
+       %2468 = OpFMul %float %2458 %2458
+       %2469 = OpCompositeConstruct %v3float %2468 %2458 %float_1
+       %2470 = OpMatrixTimesVector %v3float %442 %2467
+       %2471 = OpDot %float %2469 %2470
+               OpBranch %2418
+       %2418 = OpLabel
+       %2472 = OpPhi %float %2451 %2426 %2471 %2419
+               OpBranch %2410
+       %2411 = OpLabel
+       %2473 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2474 = OpFDiv %float %2473 %1065
+               OpBranch %2410
+       %2410 = OpLabel
+       %2475 = OpPhi %float %2472 %2418 %2474 %2411
+       %2476 = OpExtInst %float %1 Pow %float_10 %2475
+       %2477 = OpCompositeInsert %v3float %2476 %2403 2
+       %2478 = OpVectorTimesMatrix %v3float %2477 %414
+       %2479 = OpVectorTimesMatrix %v3float %2478 %410
+       %2480 = OpExtInst %float %1 Pow %float_2 %float_n12
+       %2481 = OpFMul %float %float_0_179999992 %2480
+               OpStore %532 %475
+               OpStore %531 %476
+       %2482 = OpFOrdLessThanEqual %bool %2481 %float_0
+       %2483 = OpSelect %float %2482 %2255 %2481
+       %2484 = OpExtInst %float %1 Log %2483
+       %2485 = OpFDiv %float %2484 %1065
+       %2486 = OpFOrdLessThanEqual %bool %2485 %2260
+               OpSelectionMerge %2487 None
+               OpBranchConditional %2486 %2488 %2489
+       %2489 = OpLabel
+       %2490 = OpFOrdGreaterThan %bool %2485 %2260
+       %2491 = OpExtInst %float %1 Log %float_0_180000007
+       %2492 = OpFDiv %float %2491 %1065
+       %2493 = OpFOrdLessThan %bool %2485 %2492
+       %2494 = OpLogicalAnd %bool %2490 %2493
+               OpSelectionMerge %2495 None
+               OpBranchConditional %2494 %2496 %2497
+       %2497 = OpLabel
+       %2498 = OpFOrdGreaterThanEqual %bool %2485 %2492
+       %2499 = OpExtInst %float %1 Log %2253
+       %2500 = OpFDiv %float %2499 %1065
+       %2501 = OpFOrdLessThan %bool %2485 %2500
+       %2502 = OpLogicalAnd %bool %2498 %2501
+               OpSelectionMerge %2503 None
+               OpBranchConditional %2502 %2504 %2505
+       %2505 = OpLabel
+       %2506 = OpExtInst %float %1 Log %float_10000
+       %2507 = OpFDiv %float %2506 %1065
+               OpBranch %2503
+       %2504 = OpLabel
+       %2508 = OpFSub %float %2485 %2492
+       %2509 = OpFMul %float %float_3 %2508
+       %2510 = OpFSub %float %2500 %2492
+       %2511 = OpFDiv %float %2509 %2510
+       %2512 = OpConvertFToS %int %2511
+       %2513 = OpConvertSToF %float %2512
+       %2514 = OpFSub %float %2511 %2513
+       %2515 = OpAccessChain %_ptr_Function_float %531 %2512
+       %2516 = OpLoad %float %2515
+       %2517 = OpIAdd %int %2512 %int_1
+       %2518 = OpAccessChain %_ptr_Function_float %531 %2517
+       %2519 = OpLoad %float %2518
+       %2520 = OpIAdd %int %2512 %int_2
+       %2521 = OpAccessChain %_ptr_Function_float %531 %2520
+       %2522 = OpLoad %float %2521
+       %2523 = OpCompositeConstruct %v3float %2516 %2519 %2522
+       %2524 = OpFMul %float %2514 %2514
+       %2525 = OpCompositeConstruct %v3float %2524 %2514 %float_1
+       %2526 = OpMatrixTimesVector %v3float %442 %2523
+       %2527 = OpDot %float %2525 %2526
+               OpBranch %2503
+       %2503 = OpLabel
+       %2528 = OpPhi %float %2507 %2505 %2527 %2504
+               OpBranch %2495
+       %2496 = OpLabel
+       %2529 = OpFSub %float %2485 %2260
+       %2530 = OpFMul %float %float_3 %2529
+       %2531 = OpFSub %float %2492 %2260
+       %2532 = OpFDiv %float %2530 %2531
+       %2533 = OpConvertFToS %int %2532
+       %2534 = OpConvertSToF %float %2533
+       %2535 = OpFSub %float %2532 %2534
+       %2536 = OpAccessChain %_ptr_Function_float %532 %2533
+       %2537 = OpLoad %float %2536
+       %2538 = OpIAdd %int %2533 %int_1
+       %2539 = OpAccessChain %_ptr_Function_float %532 %2538
+       %2540 = OpLoad %float %2539
+       %2541 = OpIAdd %int %2533 %int_2
+       %2542 = OpAccessChain %_ptr_Function_float %532 %2541
+       %2543 = OpLoad %float %2542
+       %2544 = OpCompositeConstruct %v3float %2537 %2540 %2543
+       %2545 = OpFMul %float %2535 %2535
+       %2546 = OpCompositeConstruct %v3float %2545 %2535 %float_1
+       %2547 = OpMatrixTimesVector %v3float %442 %2544
+       %2548 = OpDot %float %2546 %2547
+               OpBranch %2495
+       %2495 = OpLabel
+       %2549 = OpPhi %float %2528 %2503 %2548 %2496
+               OpBranch %2487
+       %2488 = OpLabel
+       %2550 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2551 = OpFDiv %float %2550 %1065
+               OpBranch %2487
+       %2487 = OpLabel
+       %2552 = OpPhi %float %2549 %2495 %2551 %2488
+       %2553 = OpExtInst %float %1 Pow %float_10 %2552
+               OpStore %534 %475
+               OpStore %533 %476
+       %2554 = OpExtInst %float %1 Log %float_0_180000007
+       %2555 = OpFDiv %float %2554 %1065
+       %2556 = OpFOrdLessThanEqual %bool %2555 %2260
+               OpSelectionMerge %2557 None
+               OpBranchConditional %2556 %2558 %2559
+       %2559 = OpLabel
+       %2560 = OpFOrdGreaterThan %bool %2555 %2260
+       %2561 = OpFOrdLessThan %bool %2555 %2555
+       %2562 = OpLogicalAnd %bool %2560 %2561
+               OpSelectionMerge %2563 None
+               OpBranchConditional %2562 %2564 %2565
+       %2565 = OpLabel
+       %2566 = OpFOrdGreaterThanEqual %bool %2555 %2555
+       %2567 = OpExtInst %float %1 Log %2253
+       %2568 = OpFDiv %float %2567 %1065
+       %2569 = OpFOrdLessThan %bool %2555 %2568
+       %2570 = OpLogicalAnd %bool %2566 %2569
+               OpSelectionMerge %2571 None
+               OpBranchConditional %2570 %2572 %2573
+       %2573 = OpLabel
+       %2574 = OpExtInst %float %1 Log %float_10000
+       %2575 = OpFDiv %float %2574 %1065
+               OpBranch %2571
+       %2572 = OpLabel
+       %2576 = OpFSub %float %2555 %2555
+       %2577 = OpFMul %float %float_3 %2576
+       %2578 = OpFSub %float %2568 %2555
+       %2579 = OpFDiv %float %2577 %2578
+       %2580 = OpConvertFToS %int %2579
+       %2581 = OpConvertSToF %float %2580
+       %2582 = OpFSub %float %2579 %2581
+       %2583 = OpAccessChain %_ptr_Function_float %533 %2580
+       %2584 = OpLoad %float %2583
+       %2585 = OpIAdd %int %2580 %int_1
+       %2586 = OpAccessChain %_ptr_Function_float %533 %2585
+       %2587 = OpLoad %float %2586
+       %2588 = OpIAdd %int %2580 %int_2
+       %2589 = OpAccessChain %_ptr_Function_float %533 %2588
+       %2590 = OpLoad %float %2589
+       %2591 = OpCompositeConstruct %v3float %2584 %2587 %2590
+       %2592 = OpFMul %float %2582 %2582
+       %2593 = OpCompositeConstruct %v3float %2592 %2582 %float_1
+       %2594 = OpMatrixTimesVector %v3float %442 %2591
+       %2595 = OpDot %float %2593 %2594
+               OpBranch %2571
+       %2571 = OpLabel
+       %2596 = OpPhi %float %2575 %2573 %2595 %2572
+               OpBranch %2563
+       %2564 = OpLabel
+       %2597 = OpFSub %float %2555 %2260
+       %2598 = OpFMul %float %float_3 %2597
+       %2599 = OpAccessChain %_ptr_Function_float %534 %int_3
+       %2600 = OpLoad %float %2599
+       %2601 = OpAccessChain %_ptr_Function_float %534 %int_4
+       %2602 = OpLoad %float %2601
+       %2603 = OpAccessChain %_ptr_Function_float %534 %int_5
+       %2604 = OpLoad %float %2603
+       %2605 = OpCompositeConstruct %v3float %2600 %2602 %2604
+       %2606 = OpMatrixTimesVector %v3float %442 %2605
+       %2607 = OpCompositeExtract %float %2606 2
+               OpBranch %2563
+       %2563 = OpLabel
+       %2608 = OpPhi %float %2596 %2571 %2607 %2564
+               OpBranch %2557
+       %2558 = OpLabel
+       %2609 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2610 = OpFDiv %float %2609 %1065
+               OpBranch %2557
+       %2557 = OpLabel
+       %2611 = OpPhi %float %2608 %2563 %2610 %2558
+       %2612 = OpExtInst %float %1 Pow %float_10 %2611
+       %2613 = OpExtInst %float %1 Pow %float_2 %float_10
+       %2614 = OpFMul %float %float_0_179999992 %2613
+               OpStore %536 %475
+               OpStore %535 %476
+       %2615 = OpFOrdLessThanEqual %bool %2614 %float_0
+       %2616 = OpSelect %float %2615 %2255 %2614
+       %2617 = OpExtInst %float %1 Log %2616
+       %2618 = OpFDiv %float %2617 %1065
+       %2619 = OpFOrdLessThanEqual %bool %2618 %2260
+               OpSelectionMerge %2620 None
+               OpBranchConditional %2619 %2621 %2622
+       %2622 = OpLabel
+       %2623 = OpFOrdGreaterThan %bool %2618 %2260
+       %2624 = OpFOrdLessThan %bool %2618 %2555
+       %2625 = OpLogicalAnd %bool %2623 %2624
+               OpSelectionMerge %2626 None
+               OpBranchConditional %2625 %2627 %2628
+       %2628 = OpLabel
+       %2629 = OpFOrdGreaterThanEqual %bool %2618 %2555
+       %2630 = OpExtInst %float %1 Log %2253
+       %2631 = OpFDiv %float %2630 %1065
+       %2632 = OpFOrdLessThan %bool %2618 %2631
+       %2633 = OpLogicalAnd %bool %2629 %2632
+               OpSelectionMerge %2634 None
+               OpBranchConditional %2633 %2635 %2636
+       %2636 = OpLabel
+       %2637 = OpExtInst %float %1 Log %float_10000
+       %2638 = OpFDiv %float %2637 %1065
+               OpBranch %2634
+       %2635 = OpLabel
+       %2639 = OpFSub %float %2618 %2555
+       %2640 = OpFMul %float %float_3 %2639
+       %2641 = OpFSub %float %2631 %2555
+       %2642 = OpFDiv %float %2640 %2641
+       %2643 = OpConvertFToS %int %2642
+       %2644 = OpConvertSToF %float %2643
+       %2645 = OpFSub %float %2642 %2644
+       %2646 = OpAccessChain %_ptr_Function_float %535 %2643
+       %2647 = OpLoad %float %2646
+       %2648 = OpIAdd %int %2643 %int_1
+       %2649 = OpAccessChain %_ptr_Function_float %535 %2648
+       %2650 = OpLoad %float %2649
+       %2651 = OpIAdd %int %2643 %int_2
+       %2652 = OpAccessChain %_ptr_Function_float %535 %2651
+       %2653 = OpLoad %float %2652
+       %2654 = OpCompositeConstruct %v3float %2647 %2650 %2653
+       %2655 = OpFMul %float %2645 %2645
+       %2656 = OpCompositeConstruct %v3float %2655 %2645 %float_1
+       %2657 = OpMatrixTimesVector %v3float %442 %2654
+       %2658 = OpDot %float %2656 %2657
+               OpBranch %2634
+       %2634 = OpLabel
+       %2659 = OpPhi %float %2638 %2636 %2658 %2635
+               OpBranch %2626
+       %2627 = OpLabel
+       %2660 = OpFSub %float %2618 %2260
+       %2661 = OpFMul %float %float_3 %2660
+       %2662 = OpFSub %float %2555 %2260
+       %2663 = OpFDiv %float %2661 %2662
+       %2664 = OpConvertFToS %int %2663
+       %2665 = OpConvertSToF %float %2664
+       %2666 = OpFSub %float %2663 %2665
+       %2667 = OpAccessChain %_ptr_Function_float %536 %2664
+       %2668 = OpLoad %float %2667
+       %2669 = OpIAdd %int %2664 %int_1
+       %2670 = OpAccessChain %_ptr_Function_float %536 %2669
+       %2671 = OpLoad %float %2670
+       %2672 = OpIAdd %int %2664 %int_2
+       %2673 = OpAccessChain %_ptr_Function_float %536 %2672
+       %2674 = OpLoad %float %2673
+       %2675 = OpCompositeConstruct %v3float %2668 %2671 %2674
+       %2676 = OpFMul %float %2666 %2666
+       %2677 = OpCompositeConstruct %v3float %2676 %2666 %float_1
+       %2678 = OpMatrixTimesVector %v3float %442 %2675
+       %2679 = OpDot %float %2677 %2678
+               OpBranch %2626
+       %2626 = OpLabel
+       %2680 = OpPhi %float %2659 %2634 %2679 %2627
+               OpBranch %2620
+       %2621 = OpLabel
+       %2681 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2682 = OpFDiv %float %2681 %1065
+               OpBranch %2620
+       %2620 = OpLabel
+       %2683 = OpPhi %float %2680 %2626 %2682 %2621
+       %2684 = OpExtInst %float %1 Pow %float_10 %2683
+       %2685 = OpCompositeExtract %float %2479 0
+               OpStore %530 %479
+               OpStore %529 %480
+       %2686 = OpFOrdLessThanEqual %bool %2685 %float_0
+       %2687 = OpSelect %float %2686 %float_9_99999975en05 %2685
+       %2688 = OpExtInst %float %1 Log %2687
+       %2689 = OpFDiv %float %2688 %1065
+       %2690 = OpExtInst %float %1 Log %2553
+       %2691 = OpFDiv %float %2690 %1065
+       %2692 = OpFOrdLessThanEqual %bool %2689 %2691
+               OpSelectionMerge %2693 None
+               OpBranchConditional %2692 %2694 %2695
+       %2695 = OpLabel
+       %2696 = OpFOrdGreaterThan %bool %2689 %2691
+       %2697 = OpExtInst %float %1 Log %2612
+       %2698 = OpFDiv %float %2697 %1065
+       %2699 = OpFOrdLessThan %bool %2689 %2698
+       %2700 = OpLogicalAnd %bool %2696 %2699
+               OpSelectionMerge %2701 None
+               OpBranchConditional %2700 %2702 %2703
+       %2703 = OpLabel
+       %2704 = OpFOrdGreaterThanEqual %bool %2689 %2698
+       %2705 = OpExtInst %float %1 Log %2684
+       %2706 = OpFDiv %float %2705 %1065
+       %2707 = OpFOrdLessThan %bool %2689 %2706
+       %2708 = OpLogicalAnd %bool %2704 %2707
+               OpSelectionMerge %2709 None
+               OpBranchConditional %2708 %2710 %2711
+       %2711 = OpLabel
+       %2712 = OpFMul %float %2689 %float_0_0599999987
+       %2713 = OpExtInst %float %1 Log %float_1000
+       %2714 = OpFDiv %float %2713 %1065
+       %2715 = OpFMul %float %float_0_0599999987 %2705
+       %2716 = OpFDiv %float %2715 %1065
+       %2717 = OpFSub %float %2714 %2716
+       %2718 = OpFAdd %float %2712 %2717
+               OpBranch %2709
+       %2710 = OpLabel
+       %2719 = OpFSub %float %2689 %2698
+       %2720 = OpFMul %float %float_7 %2719
+       %2721 = OpFSub %float %2706 %2698
+       %2722 = OpFDiv %float %2720 %2721
+       %2723 = OpConvertFToS %int %2722
+       %2724 = OpConvertSToF %float %2723
+       %2725 = OpFSub %float %2722 %2724
+       %2726 = OpAccessChain %_ptr_Function_float %529 %2723
+       %2727 = OpLoad %float %2726
+       %2728 = OpIAdd %int %2723 %int_1
+       %2729 = OpAccessChain %_ptr_Function_float %529 %2728
+       %2730 = OpLoad %float %2729
+       %2731 = OpIAdd %int %2723 %int_2
+       %2732 = OpAccessChain %_ptr_Function_float %529 %2731
+       %2733 = OpLoad %float %2732
+       %2734 = OpCompositeConstruct %v3float %2727 %2730 %2733
+       %2735 = OpFMul %float %2725 %2725
+       %2736 = OpCompositeConstruct %v3float %2735 %2725 %float_1
+       %2737 = OpMatrixTimesVector %v3float %442 %2734
+       %2738 = OpDot %float %2736 %2737
+               OpBranch %2709
+       %2709 = OpLabel
+       %2739 = OpPhi %float %2718 %2711 %2738 %2710
+               OpBranch %2701
+       %2702 = OpLabel
+       %2740 = OpFSub %float %2689 %2691
+       %2741 = OpFMul %float %float_7 %2740
+       %2742 = OpFSub %float %2698 %2691
+       %2743 = OpFDiv %float %2741 %2742
+       %2744 = OpConvertFToS %int %2743
+       %2745 = OpConvertSToF %float %2744
+       %2746 = OpFSub %float %2743 %2745
+       %2747 = OpAccessChain %_ptr_Function_float %530 %2744
+       %2748 = OpLoad %float %2747
+       %2749 = OpIAdd %int %2744 %int_1
+       %2750 = OpAccessChain %_ptr_Function_float %530 %2749
+       %2751 = OpLoad %float %2750
+       %2752 = OpIAdd %int %2744 %int_2
+       %2753 = OpAccessChain %_ptr_Function_float %530 %2752
+       %2754 = OpLoad %float %2753
+       %2755 = OpCompositeConstruct %v3float %2748 %2751 %2754
+       %2756 = OpFMul %float %2746 %2746
+       %2757 = OpCompositeConstruct %v3float %2756 %2746 %float_1
+       %2758 = OpMatrixTimesVector %v3float %442 %2755
+       %2759 = OpDot %float %2757 %2758
+               OpBranch %2701
+       %2701 = OpLabel
+       %2760 = OpPhi %float %2739 %2709 %2759 %2702
+               OpBranch %2693
+       %2694 = OpLabel
+       %2761 = OpFMul %float %2689 %float_3
+       %2762 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2763 = OpFDiv %float %2762 %1065
+       %2764 = OpFMul %float %float_3 %2690
+       %2765 = OpFDiv %float %2764 %1065
+       %2766 = OpFSub %float %2763 %2765
+       %2767 = OpFAdd %float %2761 %2766
+               OpBranch %2693
+       %2693 = OpLabel
+       %2768 = OpPhi %float %2760 %2701 %2767 %2694
+       %2769 = OpExtInst %float %1 Pow %float_10 %2768
+       %2770 = OpCompositeInsert %v3float %2769 %391 0
+       %2771 = OpCompositeExtract %float %2479 1
+               OpStore %528 %479
+               OpStore %527 %480
+       %2772 = OpFOrdLessThanEqual %bool %2771 %float_0
+       %2773 = OpSelect %float %2772 %float_9_99999975en05 %2771
+       %2774 = OpExtInst %float %1 Log %2773
+       %2775 = OpFDiv %float %2774 %1065
+       %2776 = OpFOrdLessThanEqual %bool %2775 %2691
+               OpSelectionMerge %2777 None
+               OpBranchConditional %2776 %2778 %2779
+       %2779 = OpLabel
+       %2780 = OpFOrdGreaterThan %bool %2775 %2691
+       %2781 = OpExtInst %float %1 Log %2612
+       %2782 = OpFDiv %float %2781 %1065
+       %2783 = OpFOrdLessThan %bool %2775 %2782
+       %2784 = OpLogicalAnd %bool %2780 %2783
+               OpSelectionMerge %2785 None
+               OpBranchConditional %2784 %2786 %2787
+       %2787 = OpLabel
+       %2788 = OpFOrdGreaterThanEqual %bool %2775 %2782
+       %2789 = OpExtInst %float %1 Log %2684
+       %2790 = OpFDiv %float %2789 %1065
+       %2791 = OpFOrdLessThan %bool %2775 %2790
+       %2792 = OpLogicalAnd %bool %2788 %2791
+               OpSelectionMerge %2793 None
+               OpBranchConditional %2792 %2794 %2795
+       %2795 = OpLabel
+       %2796 = OpFMul %float %2775 %float_0_0599999987
+       %2797 = OpExtInst %float %1 Log %float_1000
+       %2798 = OpFDiv %float %2797 %1065
+       %2799 = OpFMul %float %float_0_0599999987 %2789
+       %2800 = OpFDiv %float %2799 %1065
+       %2801 = OpFSub %float %2798 %2800
+       %2802 = OpFAdd %float %2796 %2801
+               OpBranch %2793
+       %2794 = OpLabel
+       %2803 = OpFSub %float %2775 %2782
+       %2804 = OpFMul %float %float_7 %2803
+       %2805 = OpFSub %float %2790 %2782
+       %2806 = OpFDiv %float %2804 %2805
+       %2807 = OpConvertFToS %int %2806
+       %2808 = OpConvertSToF %float %2807
+       %2809 = OpFSub %float %2806 %2808
+       %2810 = OpAccessChain %_ptr_Function_float %527 %2807
+       %2811 = OpLoad %float %2810
+       %2812 = OpIAdd %int %2807 %int_1
+       %2813 = OpAccessChain %_ptr_Function_float %527 %2812
+       %2814 = OpLoad %float %2813
+       %2815 = OpIAdd %int %2807 %int_2
+       %2816 = OpAccessChain %_ptr_Function_float %527 %2815
+       %2817 = OpLoad %float %2816
+       %2818 = OpCompositeConstruct %v3float %2811 %2814 %2817
+       %2819 = OpFMul %float %2809 %2809
+       %2820 = OpCompositeConstruct %v3float %2819 %2809 %float_1
+       %2821 = OpMatrixTimesVector %v3float %442 %2818
+       %2822 = OpDot %float %2820 %2821
+               OpBranch %2793
+       %2793 = OpLabel
+       %2823 = OpPhi %float %2802 %2795 %2822 %2794
+               OpBranch %2785
+       %2786 = OpLabel
+       %2824 = OpFSub %float %2775 %2691
+       %2825 = OpFMul %float %float_7 %2824
+       %2826 = OpFSub %float %2782 %2691
+       %2827 = OpFDiv %float %2825 %2826
+       %2828 = OpConvertFToS %int %2827
+       %2829 = OpConvertSToF %float %2828
+       %2830 = OpFSub %float %2827 %2829
+       %2831 = OpAccessChain %_ptr_Function_float %528 %2828
+       %2832 = OpLoad %float %2831
+       %2833 = OpIAdd %int %2828 %int_1
+       %2834 = OpAccessChain %_ptr_Function_float %528 %2833
+       %2835 = OpLoad %float %2834
+       %2836 = OpIAdd %int %2828 %int_2
+       %2837 = OpAccessChain %_ptr_Function_float %528 %2836
+       %2838 = OpLoad %float %2837
+       %2839 = OpCompositeConstruct %v3float %2832 %2835 %2838
+       %2840 = OpFMul %float %2830 %2830
+       %2841 = OpCompositeConstruct %v3float %2840 %2830 %float_1
+       %2842 = OpMatrixTimesVector %v3float %442 %2839
+       %2843 = OpDot %float %2841 %2842
+               OpBranch %2785
+       %2785 = OpLabel
+       %2844 = OpPhi %float %2823 %2793 %2843 %2786
+               OpBranch %2777
+       %2778 = OpLabel
+       %2845 = OpFMul %float %2775 %float_3
+       %2846 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2847 = OpFDiv %float %2846 %1065
+       %2848 = OpFMul %float %float_3 %2690
+       %2849 = OpFDiv %float %2848 %1065
+       %2850 = OpFSub %float %2847 %2849
+       %2851 = OpFAdd %float %2845 %2850
+               OpBranch %2777
+       %2777 = OpLabel
+       %2852 = OpPhi %float %2844 %2785 %2851 %2778
+       %2853 = OpExtInst %float %1 Pow %float_10 %2852
+       %2854 = OpCompositeInsert %v3float %2853 %2770 1
+       %2855 = OpCompositeExtract %float %2479 2
+               OpStore %526 %479
+               OpStore %525 %480
+       %2856 = OpFOrdLessThanEqual %bool %2855 %float_0
+       %2857 = OpSelect %float %2856 %float_9_99999975en05 %2855
+       %2858 = OpExtInst %float %1 Log %2857
+       %2859 = OpFDiv %float %2858 %1065
+       %2860 = OpFOrdLessThanEqual %bool %2859 %2691
+               OpSelectionMerge %2861 None
+               OpBranchConditional %2860 %2862 %2863
+       %2863 = OpLabel
+       %2864 = OpFOrdGreaterThan %bool %2859 %2691
+       %2865 = OpExtInst %float %1 Log %2612
+       %2866 = OpFDiv %float %2865 %1065
+       %2867 = OpFOrdLessThan %bool %2859 %2866
+       %2868 = OpLogicalAnd %bool %2864 %2867
+               OpSelectionMerge %2869 None
+               OpBranchConditional %2868 %2870 %2871
+       %2871 = OpLabel
+       %2872 = OpFOrdGreaterThanEqual %bool %2859 %2866
+       %2873 = OpExtInst %float %1 Log %2684
+       %2874 = OpFDiv %float %2873 %1065
+       %2875 = OpFOrdLessThan %bool %2859 %2874
+       %2876 = OpLogicalAnd %bool %2872 %2875
+               OpSelectionMerge %2877 None
+               OpBranchConditional %2876 %2878 %2879
+       %2879 = OpLabel
+       %2880 = OpFMul %float %2859 %float_0_0599999987
+       %2881 = OpExtInst %float %1 Log %float_1000
+       %2882 = OpFDiv %float %2881 %1065
+       %2883 = OpFMul %float %float_0_0599999987 %2873
+       %2884 = OpFDiv %float %2883 %1065
+       %2885 = OpFSub %float %2882 %2884
+       %2886 = OpFAdd %float %2880 %2885
+               OpBranch %2877
+       %2878 = OpLabel
+       %2887 = OpFSub %float %2859 %2866
+       %2888 = OpFMul %float %float_7 %2887
+       %2889 = OpFSub %float %2874 %2866
+       %2890 = OpFDiv %float %2888 %2889
+       %2891 = OpConvertFToS %int %2890
+       %2892 = OpConvertSToF %float %2891
+       %2893 = OpFSub %float %2890 %2892
+       %2894 = OpAccessChain %_ptr_Function_float %525 %2891
+       %2895 = OpLoad %float %2894
+       %2896 = OpIAdd %int %2891 %int_1
+       %2897 = OpAccessChain %_ptr_Function_float %525 %2896
+       %2898 = OpLoad %float %2897
+       %2899 = OpIAdd %int %2891 %int_2
+       %2900 = OpAccessChain %_ptr_Function_float %525 %2899
+       %2901 = OpLoad %float %2900
+       %2902 = OpCompositeConstruct %v3float %2895 %2898 %2901
+       %2903 = OpFMul %float %2893 %2893
+       %2904 = OpCompositeConstruct %v3float %2903 %2893 %float_1
+       %2905 = OpMatrixTimesVector %v3float %442 %2902
+       %2906 = OpDot %float %2904 %2905
+               OpBranch %2877
+       %2877 = OpLabel
+       %2907 = OpPhi %float %2886 %2879 %2906 %2878
+               OpBranch %2869
+       %2870 = OpLabel
+       %2908 = OpFSub %float %2859 %2691
+       %2909 = OpFMul %float %float_7 %2908
+       %2910 = OpFSub %float %2866 %2691
+       %2911 = OpFDiv %float %2909 %2910
+       %2912 = OpConvertFToS %int %2911
+       %2913 = OpConvertSToF %float %2912
+       %2914 = OpFSub %float %2911 %2913
+       %2915 = OpAccessChain %_ptr_Function_float %526 %2912
+       %2916 = OpLoad %float %2915
+       %2917 = OpIAdd %int %2912 %int_1
+       %2918 = OpAccessChain %_ptr_Function_float %526 %2917
+       %2919 = OpLoad %float %2918
+       %2920 = OpIAdd %int %2912 %int_2
+       %2921 = OpAccessChain %_ptr_Function_float %526 %2920
+       %2922 = OpLoad %float %2921
+       %2923 = OpCompositeConstruct %v3float %2916 %2919 %2922
+       %2924 = OpFMul %float %2914 %2914
+       %2925 = OpCompositeConstruct %v3float %2924 %2914 %float_1
+       %2926 = OpMatrixTimesVector %v3float %442 %2923
+       %2927 = OpDot %float %2925 %2926
+               OpBranch %2869
+       %2869 = OpLabel
+       %2928 = OpPhi %float %2907 %2877 %2927 %2870
+               OpBranch %2861
+       %2862 = OpLabel
+       %2929 = OpFMul %float %2859 %float_3
+       %2930 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2931 = OpFDiv %float %2930 %1065
+       %2932 = OpFMul %float %float_3 %2690
+       %2933 = OpFDiv %float %2932 %1065
+       %2934 = OpFSub %float %2931 %2933
+       %2935 = OpFAdd %float %2929 %2934
+               OpBranch %2861
+       %2861 = OpLabel
+       %2936 = OpPhi %float %2928 %2869 %2935 %2862
+       %2937 = OpExtInst %float %1 Pow %float_10 %2936
+       %2938 = OpCompositeInsert %v3float %2937 %2854 2
+       %2939 = OpFSub %v3float %2938 %338
+       %2940 = OpVectorTimesMatrix %v3float %2939 %576
+       %2941 = OpFMul %v3float %2940 %496
+       %2942 = OpExtInst %v3float %1 Pow %2941 %263
+       %2943 = OpFMul %v3float %184 %2942
+       %2944 = OpFAdd %v3float %183 %2943
+       %2945 = OpFMul %v3float %185 %2942
+       %2946 = OpFAdd %v3float %135 %2945
+       %2947 = OpFDiv %v3float %135 %2946
+       %2948 = OpFMul %v3float %2944 %2947
+       %2949 = OpExtInst %v3float %1 Pow %2948 %264
+               OpBranch %1230
+       %1230 = OpLabel
+       %2950 = OpPhi %v3float %2097 %1236 %2949 %2861
+               OpBranch %1224
+       %1225 = OpLabel
+       %2951 = OpVectorTimesMatrix %v3float %1218 %547
+       %2952 = OpVectorTimesMatrix %v3float %2951 %576
+       %2953 = OpExtInst %v3float %1 FMax %250 %2952
+       %2954 = OpFMul %v3float %2953 %252
+       %2955 = OpExtInst %v3float %1 FMax %2953 %254
+       %2956 = OpExtInst %v3float %1 Pow %2955 %256
+       %2957 = OpFMul %v3float %2956 %258
+       %2958 = OpFSub %v3float %2957 %260
+       %2959 = OpExtInst %v3float %1 FMin %2954 %2958
+               OpBranch %1224
+       %1224 = OpLabel
+       %2960 = OpPhi %v3float %2950 %1230 %2959 %1225
+               OpBranch %1220
+       %1221 = OpLabel
+       %2961 = OpCompositeExtract %float %1218 0
+               OpBranch %2962
+       %2962 = OpLabel
+               OpLoopMerge %2963 %2964 None
+               OpBranch %2965
+       %2965 = OpLabel
+       %2966 = OpFOrdLessThan %bool %2961 %float_0_00313066994
+               OpSelectionMerge %2967 None
+               OpBranchConditional %2966 %2968 %2967
+       %2968 = OpLabel
+       %2969 = OpFMul %float %2961 %float_12_9200001
+               OpBranch %2963
+       %2967 = OpLabel
+       %2970 = OpExtInst %float %1 Pow %2961 %float_0_416666657
+       %2971 = OpFMul %float %2970 %float_1_05499995
+       %2972 = OpFSub %float %2971 %float_0_0549999997
+               OpBranch %2963
+       %2964 = OpLabel
+               OpBranch %2962
+       %2963 = OpLabel
+       %2973 = OpPhi %float %2969 %2968 %2972 %2967
+       %2974 = OpCompositeExtract %float %1218 1
+               OpBranch %2975
+       %2975 = OpLabel
+               OpLoopMerge %2976 %2977 None
+               OpBranch %2978
+       %2978 = OpLabel
+       %2979 = OpFOrdLessThan %bool %2974 %float_0_00313066994
+               OpSelectionMerge %2980 None
+               OpBranchConditional %2979 %2981 %2980
+       %2981 = OpLabel
+       %2982 = OpFMul %float %2974 %float_12_9200001
+               OpBranch %2976
+       %2980 = OpLabel
+       %2983 = OpExtInst %float %1 Pow %2974 %float_0_416666657
+       %2984 = OpFMul %float %2983 %float_1_05499995
+       %2985 = OpFSub %float %2984 %float_0_0549999997
+               OpBranch %2976
+       %2977 = OpLabel
+               OpBranch %2975
+       %2976 = OpLabel
+       %2986 = OpPhi %float %2982 %2981 %2985 %2980
+       %2987 = OpCompositeExtract %float %1218 2
+               OpBranch %2988
+       %2988 = OpLabel
+               OpLoopMerge %2989 %2990 None
+               OpBranch %2991
+       %2991 = OpLabel
+       %2992 = OpFOrdLessThan %bool %2987 %float_0_00313066994
+               OpSelectionMerge %2993 None
+               OpBranchConditional %2992 %2994 %2993
+       %2994 = OpLabel
+       %2995 = OpFMul %float %2987 %float_12_9200001
+               OpBranch %2989
+       %2993 = OpLabel
+       %2996 = OpExtInst %float %1 Pow %2987 %float_0_416666657
+       %2997 = OpFMul %float %2996 %float_1_05499995
+       %2998 = OpFSub %float %2997 %float_0_0549999997
+               OpBranch %2989
+       %2990 = OpLabel
+               OpBranch %2988
+       %2989 = OpLabel
+       %2999 = OpPhi %float %2995 %2994 %2998 %2993
+       %3000 = OpCompositeConstruct %v3float %2973 %2986 %2999
+               OpBranch %1220
+       %1220 = OpLabel
+       %3001 = OpPhi %v3float %2960 %1224 %3000 %2989
+       %3002 = OpFMul %v3float %3001 %499
+       %3003 = OpVectorShuffle %v4float %129 %3002 4 5 6 3
+       %3004 = OpCompositeInsert %v4float %float_0 %3003 3
+               OpStore %out_var_SV_Target0 %3004
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag b/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag
new file mode 100644
index 00000000000..e0359bfdd3e
--- /dev/null
+++ b/shaders-ue4/asm/frag/padded-float-array-member-defef.asm.frag
@@ -0,0 +1,3694 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 3107
+; Schema: 0
+               OpCapability Shader
+               OpCapability Geometry
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %MainPS "main" %in_var_TEXCOORD0 %gl_FragCoord %gl_Layer %out_var_SV_Target0
+               OpExecutionMode %MainPS OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type__Globals "type.$Globals"
+               OpMemberName %type__Globals 0 "MappingPolynomial"
+               OpMemberName %type__Globals 1 "InverseGamma"
+               OpMemberName %type__Globals 2 "ColorMatrixR_ColorCurveCd1"
+               OpMemberName %type__Globals 3 "ColorMatrixG_ColorCurveCd3Cm3"
+               OpMemberName %type__Globals 4 "ColorMatrixB_ColorCurveCm2"
+               OpMemberName %type__Globals 5 "ColorCurve_Cm0Cd0_Cd2_Ch0Cm1_Ch3"
+               OpMemberName %type__Globals 6 "ColorCurve_Ch1_Ch2"
+               OpMemberName %type__Globals 7 "ColorShadow_Luma"
+               OpMemberName %type__Globals 8 "ColorShadow_Tint1"
+               OpMemberName %type__Globals 9 "ColorShadow_Tint2"
+               OpMemberName %type__Globals 10 "FilmSlope"
+               OpMemberName %type__Globals 11 "FilmToe"
+               OpMemberName %type__Globals 12 "FilmShoulder"
+               OpMemberName %type__Globals 13 "FilmBlackClip"
+               OpMemberName %type__Globals 14 "FilmWhiteClip"
+               OpMemberName %type__Globals 15 "LUTWeights"
+               OpMemberName %type__Globals 16 "ColorScale"
+               OpMemberName %type__Globals 17 "OverlayColor"
+               OpMemberName %type__Globals 18 "WhiteTemp"
+               OpMemberName %type__Globals 19 "WhiteTint"
+               OpMemberName %type__Globals 20 "ColorSaturation"
+               OpMemberName %type__Globals 21 "ColorContrast"
+               OpMemberName %type__Globals 22 "ColorGamma"
+               OpMemberName %type__Globals 23 "ColorGain"
+               OpMemberName %type__Globals 24 "ColorOffset"
+               OpMemberName %type__Globals 25 "ColorSaturationShadows"
+               OpMemberName %type__Globals 26 "ColorContrastShadows"
+               OpMemberName %type__Globals 27 "ColorGammaShadows"
+               OpMemberName %type__Globals 28 "ColorGainShadows"
+               OpMemberName %type__Globals 29 "ColorOffsetShadows"
+               OpMemberName %type__Globals 30 "ColorSaturationMidtones"
+               OpMemberName %type__Globals 31 "ColorContrastMidtones"
+               OpMemberName %type__Globals 32 "ColorGammaMidtones"
+               OpMemberName %type__Globals 33 "ColorGainMidtones"
+               OpMemberName %type__Globals 34 "ColorOffsetMidtones"
+               OpMemberName %type__Globals 35 "ColorSaturationHighlights"
+               OpMemberName %type__Globals 36 "ColorContrastHighlights"
+               OpMemberName %type__Globals 37 "ColorGammaHighlights"
+               OpMemberName %type__Globals 38 "ColorGainHighlights"
+               OpMemberName %type__Globals 39 "ColorOffsetHighlights"
+               OpMemberName %type__Globals 40 "ColorCorrectionShadowsMax"
+               OpMemberName %type__Globals 41 "ColorCorrectionHighlightsMin"
+               OpMemberName %type__Globals 42 "OutputDevice"
+               OpMemberName %type__Globals 43 "OutputGamut"
+               OpMemberName %type__Globals 44 "BlueCorrection"
+               OpMemberName %type__Globals 45 "ExpandGamut"
+               OpName %_Globals "$Globals"
+               OpName %type_2d_image "type.2d.image"
+               OpName %Texture1 "Texture1"
+               OpName %type_sampler "type.sampler"
+               OpName %Texture1Sampler "Texture1Sampler"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %MainPS "MainPS"
+               OpName %type_sampled_image "type.sampled.image"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorate %in_var_TEXCOORD0 NoPerspective
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION"
+               OpDecorate %gl_Layer BuiltIn Layer
+               OpDecorateString %gl_Layer UserSemantic "SV_RenderTargetArrayIndex"
+               OpDecorate %gl_Layer Flat
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %in_var_TEXCOORD0 Location 0
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %_Globals DescriptorSet 0
+               OpDecorate %_Globals Binding 0
+               OpDecorate %Texture1 DescriptorSet 0
+               OpDecorate %Texture1 Binding 0
+               OpDecorate %Texture1Sampler DescriptorSet 0
+               OpDecorate %Texture1Sampler Binding 0
+               OpDecorate %_arr_float_uint_5 ArrayStride 16
+               OpMemberDecorate %type__Globals 0 Offset 0
+               OpMemberDecorate %type__Globals 1 Offset 16
+               OpMemberDecorate %type__Globals 2 Offset 32
+               OpMemberDecorate %type__Globals 3 Offset 48
+               OpMemberDecorate %type__Globals 4 Offset 64
+               OpMemberDecorate %type__Globals 5 Offset 80
+               OpMemberDecorate %type__Globals 6 Offset 96
+               OpMemberDecorate %type__Globals 7 Offset 112
+               OpMemberDecorate %type__Globals 8 Offset 128
+               OpMemberDecorate %type__Globals 9 Offset 144
+               OpMemberDecorate %type__Globals 10 Offset 160
+               OpMemberDecorate %type__Globals 11 Offset 164
+               OpMemberDecorate %type__Globals 12 Offset 168
+               OpMemberDecorate %type__Globals 13 Offset 172
+               OpMemberDecorate %type__Globals 14 Offset 176
+               OpMemberDecorate %type__Globals 15 Offset 192
+               OpMemberDecorate %type__Globals 16 Offset 272
+               OpMemberDecorate %type__Globals 17 Offset 288
+               OpMemberDecorate %type__Globals 18 Offset 304
+               OpMemberDecorate %type__Globals 19 Offset 308
+               OpMemberDecorate %type__Globals 20 Offset 320
+               OpMemberDecorate %type__Globals 21 Offset 336
+               OpMemberDecorate %type__Globals 22 Offset 352
+               OpMemberDecorate %type__Globals 23 Offset 368
+               OpMemberDecorate %type__Globals 24 Offset 384
+               OpMemberDecorate %type__Globals 25 Offset 400
+               OpMemberDecorate %type__Globals 26 Offset 416
+               OpMemberDecorate %type__Globals 27 Offset 432
+               OpMemberDecorate %type__Globals 28 Offset 448
+               OpMemberDecorate %type__Globals 29 Offset 464
+               OpMemberDecorate %type__Globals 30 Offset 480
+               OpMemberDecorate %type__Globals 31 Offset 496
+               OpMemberDecorate %type__Globals 32 Offset 512
+               OpMemberDecorate %type__Globals 33 Offset 528
+               OpMemberDecorate %type__Globals 34 Offset 544
+               OpMemberDecorate %type__Globals 35 Offset 560
+               OpMemberDecorate %type__Globals 36 Offset 576
+               OpMemberDecorate %type__Globals 37 Offset 592
+               OpMemberDecorate %type__Globals 38 Offset 608
+               OpMemberDecorate %type__Globals 39 Offset 624
+               OpMemberDecorate %type__Globals 40 Offset 640
+               OpMemberDecorate %type__Globals 41 Offset 644
+               OpMemberDecorate %type__Globals 42 Offset 648
+               OpMemberDecorate %type__Globals 43 Offset 652
+               OpMemberDecorate %type__Globals 44 Offset 656
+               OpMemberDecorate %type__Globals 45 Offset 660
+               OpDecorate %type__Globals Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+%float_0_952552378 = OpConstant %float 0.952552378
+    %float_0 = OpConstant %float 0
+
+; HACK: Needed to hack this constant since MSVC and GNU libc are off by 1 ULP when converting to string (it probably still works fine though in a roundtrip ...)
+%float_9_36786018en05 = OpConstant %float 9.25
+
+%float_0_343966454 = OpConstant %float 0.343966454
+%float_0_728166103 = OpConstant %float 0.728166103
+%float_n0_0721325427 = OpConstant %float -0.0721325427
+%float_1_00882518 = OpConstant %float 1.00882518
+%float_1_04981101 = OpConstant %float 1.04981101
+%float_n9_74845025en05 = OpConstant %float -9.74845025e-05
+%float_n0_495903015 = OpConstant %float -0.495903015
+%float_1_37331307 = OpConstant %float 1.37331307
+%float_0_0982400328 = OpConstant %float 0.0982400328
+%float_0_991252005 = OpConstant %float 0.991252005
+%float_0_662454188 = OpConstant %float 0.662454188
+%float_0_134004205 = OpConstant %float 0.134004205
+%float_0_156187683 = OpConstant %float 0.156187683
+%float_0_272228718 = OpConstant %float 0.272228718
+%float_0_674081743 = OpConstant %float 0.674081743
+%float_0_0536895171 = OpConstant %float 0.0536895171
+%float_n0_00557464967 = OpConstant %float -0.00557464967
+%float_0_0040607336 = OpConstant %float 0.0040607336
+%float_1_01033914 = OpConstant %float 1.01033914
+%float_1_6410234 = OpConstant %float 1.6410234
+%float_n0_324803293 = OpConstant %float -0.324803293
+%float_n0_236424699 = OpConstant %float -0.236424699
+%float_n0_663662851 = OpConstant %float -0.663662851
+%float_1_61533165 = OpConstant %float 1.61533165
+%float_0_0167563483 = OpConstant %float 0.0167563483
+%float_0_0117218941 = OpConstant %float 0.0117218941
+%float_n0_00828444213 = OpConstant %float -0.00828444213
+%float_0_988394856 = OpConstant %float 0.988394856
+%float_1_45143926 = OpConstant %float 1.45143926
+%float_n0_236510754 = OpConstant %float -0.236510754
+%float_n0_214928567 = OpConstant %float -0.214928567
+%float_n0_0765537769 = OpConstant %float -0.0765537769
+%float_1_17622972 = OpConstant %float 1.17622972
+%float_n0_0996759236 = OpConstant %float -0.0996759236
+%float_0_00831614807 = OpConstant %float 0.00831614807
+%float_n0_00603244966 = OpConstant %float -0.00603244966
+%float_0_997716308 = OpConstant %float 0.997716308
+%float_0_695452213 = OpConstant %float 0.695452213
+%float_0_140678704 = OpConstant %float 0.140678704
+%float_0_163869068 = OpConstant %float 0.163869068
+%float_0_0447945632 = OpConstant %float 0.0447945632
+%float_0_859671116 = OpConstant %float 0.859671116
+%float_0_0955343172 = OpConstant %float 0.0955343172
+%float_n0_00552588282 = OpConstant %float -0.00552588282
+%float_0_00402521016 = OpConstant %float 0.00402521016
+%float_1_00150073 = OpConstant %float 1.00150073
+         %73 = OpConstantComposite %v3float %float_0_272228718 %float_0_674081743 %float_0_0536895171
+%float_3_2409699 = OpConstant %float 3.2409699
+%float_n1_5373832 = OpConstant %float -1.5373832
+%float_n0_498610765 = OpConstant %float -0.498610765
+%float_n0_969243646 = OpConstant %float -0.969243646
+%float_1_8759675 = OpConstant %float 1.8759675
+%float_0_0415550582 = OpConstant %float 0.0415550582
+%float_0_0556300804 = OpConstant %float 0.0556300804
+%float_n0_203976959 = OpConstant %float -0.203976959
+%float_1_05697155 = OpConstant %float 1.05697155
+%float_0_412456393 = OpConstant %float 0.412456393
+%float_0_357576102 = OpConstant %float 0.357576102
+%float_0_180437505 = OpConstant %float 0.180437505
+%float_0_212672904 = OpConstant %float 0.212672904
+%float_0_715152204 = OpConstant %float 0.715152204
+%float_0_0721750036 = OpConstant %float 0.0721750036
+%float_0_0193339009 = OpConstant %float 0.0193339009
+%float_0_119191997 = OpConstant %float 0.119191997
+%float_0_950304091 = OpConstant %float 0.950304091
+%float_1_71660841 = OpConstant %float 1.71660841
+%float_n0_355662107 = OpConstant %float -0.355662107
+%float_n0_253360093 = OpConstant %float -0.253360093
+%float_n0_666682899 = OpConstant %float -0.666682899
+%float_1_61647761 = OpConstant %float 1.61647761
+%float_0_0157685 = OpConstant %float 0.0157685
+%float_0_0176422 = OpConstant %float 0.0176422
+%float_n0_0427763015 = OpConstant %float -0.0427763015
+%float_0_942228675 = OpConstant %float 0.942228675
+%float_2_49339628 = OpConstant %float 2.49339628
+%float_n0_93134588 = OpConstant %float -0.93134588
+%float_n0_402694494 = OpConstant %float -0.402694494
+%float_n0_829486787 = OpConstant %float -0.829486787
+%float_1_76265967 = OpConstant %float 1.76265967
+%float_0_0236246008 = OpConstant %float 0.0236246008
+%float_0_0358507 = OpConstant %float 0.0358507
+%float_n0_0761827007 = OpConstant %float -0.0761827007
+%float_0_957014024 = OpConstant %float 0.957014024
+%float_1_01303005 = OpConstant %float 1.01303005
+%float_0_00610530982 = OpConstant %float 0.00610530982
+%float_n0_0149710001 = OpConstant %float -0.0149710001
+%float_0_00769822998 = OpConstant %float 0.00769822998
+%float_0_998165011 = OpConstant %float 0.998165011
+%float_n0_00503202993 = OpConstant %float -0.00503202993
+%float_n0_00284131011 = OpConstant %float -0.00284131011
+%float_0_00468515977 = OpConstant %float 0.00468515977
+%float_0_924507022 = OpConstant %float 0.924507022
+%float_0_987223983 = OpConstant %float 0.987223983
+%float_n0_00611326983 = OpConstant %float -0.00611326983
+%float_0_0159533005 = OpConstant %float 0.0159533005
+%float_n0_00759836007 = OpConstant %float -0.00759836007
+%float_1_00186002 = OpConstant %float 1.00186002
+%float_0_0053300201 = OpConstant %float 0.0053300201
+%float_0_00307257008 = OpConstant %float 0.00307257008
+%float_n0_00509594986 = OpConstant %float -0.00509594986
+%float_1_08168006 = OpConstant %float 1.08168006
+  %float_0_5 = OpConstant %float 0.5
+   %float_n1 = OpConstant %float -1
+    %float_1 = OpConstant %float 1
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+%float_0_015625 = OpConstant %float 0.015625
+        %134 = OpConstantComposite %v2float %float_0_015625 %float_0_015625
+        %135 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+     %int_43 = OpConstant %int 43
+     %uint_3 = OpConstant %uint 3
+        %138 = OpConstantComposite %v3float %float_0 %float_0 %float_0
+      %int_9 = OpConstant %int 9
+      %int_3 = OpConstant %int 3
+        %141 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+   %float_n4 = OpConstant %float -4
+     %int_45 = OpConstant %int 45
+%float_0_544169128 = OpConstant %float 0.544169128
+%float_0_239592597 = OpConstant %float 0.239592597
+%float_0_166694298 = OpConstant %float 0.166694298
+%float_0_239465594 = OpConstant %float 0.239465594
+%float_0_702153027 = OpConstant %float 0.702153027
+%float_0_058381401 = OpConstant %float 0.058381401
+%float_n0_00234390004 = OpConstant %float -0.00234390004
+%float_0_0361833982 = OpConstant %float 0.0361833982
+%float_1_05521834 = OpConstant %float 1.05521834
+%float_0_940437257 = OpConstant %float 0.940437257
+%float_n0_0183068793 = OpConstant %float -0.0183068793
+%float_0_077869609 = OpConstant %float 0.077869609
+%float_0_00837869663 = OpConstant %float 0.00837869663
+%float_0_828660011 = OpConstant %float 0.828660011
+%float_0_162961304 = OpConstant %float 0.162961304
+%float_0_00054712611 = OpConstant %float 0.00054712611
+%float_n0_000883374596 = OpConstant %float -0.000883374596
+%float_1_00033629 = OpConstant %float 1.00033629
+%float_1_06317997 = OpConstant %float 1.06317997
+%float_0_0233955998 = OpConstant %float 0.0233955998
+%float_n0_0865726024 = OpConstant %float -0.0865726024
+%float_n0_0106336996 = OpConstant %float -0.0106336996
+%float_1_20632005 = OpConstant %float 1.20632005
+%float_n0_195690006 = OpConstant %float -0.195690006
+%float_n0_000590886979 = OpConstant %float -0.000590886979
+%float_0_00105247996 = OpConstant %float 0.00105247996
+%float_0_999538004 = OpConstant %float 0.999538004
+     %int_44 = OpConstant %int 44
+%float_0_9375 = OpConstant %float 0.9375
+        %173 = OpConstantComposite %v3float %float_0_9375 %float_0_9375 %float_0_9375
+%float_0_03125 = OpConstant %float 0.03125
+        %175 = OpConstantComposite %v3float %float_0_03125 %float_0_03125 %float_0_03125
+     %int_15 = OpConstant %int 15
+   %float_16 = OpConstant %float 16
+     %int_16 = OpConstant %int 16
+     %int_17 = OpConstant %int 17
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_5 = OpConstant %uint 5
+     %uint_6 = OpConstant %uint 6
+      %int_2 = OpConstant %int 2
+%mat3v3float = OpTypeMatrix %v3float 3
+     %int_42 = OpConstant %int 42
+%float_0_159301758 = OpConstant %float 0.159301758
+%float_78_84375 = OpConstant %float 78.84375
+%float_0_8359375 = OpConstant %float 0.8359375
+%float_18_8515625 = OpConstant %float 18.8515625
+%float_18_6875 = OpConstant %float 18.6875
+%float_10000 = OpConstant %float 10000
+%float_0_0126833133 = OpConstant %float 0.0126833133
+        %194 = OpConstantComposite %v3float %float_0_0126833133 %float_0_0126833133 %float_0_0126833133
+        %195 = OpConstantComposite %v3float %float_0_8359375 %float_0_8359375 %float_0_8359375
+        %196 = OpConstantComposite %v3float %float_18_8515625 %float_18_8515625 %float_18_8515625
+        %197 = OpConstantComposite %v3float %float_18_6875 %float_18_6875 %float_18_6875
+%float_6_27739477 = OpConstant %float 6.27739477
+        %199 = OpConstantComposite %v3float %float_6_27739477 %float_6_27739477 %float_6_27739477
+        %200 = OpConstantComposite %v3float %float_10000 %float_10000 %float_10000
+   %float_14 = OpConstant %float 14
+%float_0_180000007 = OpConstant %float 0.180000007
+%float_0_434017599 = OpConstant %float 0.434017599
+        %204 = OpConstantComposite %v3float %float_0_434017599 %float_0_434017599 %float_0_434017599
+        %205 = OpConstantComposite %v3float %float_14 %float_14 %float_14
+        %206 = OpConstantComposite %v3float %float_0_180000007 %float_0_180000007 %float_0_180000007
+     %int_18 = OpConstant %int 18
+ %float_4000 = OpConstant %float 4000
+%float_0_312700003 = OpConstant %float 0.312700003
+%float_0_328999996 = OpConstant %float 0.328999996
+     %int_19 = OpConstant %int 19
+     %int_25 = OpConstant %int 25
+     %int_20 = OpConstant %int 20
+     %int_26 = OpConstant %int 26
+     %int_21 = OpConstant %int 21
+     %int_27 = OpConstant %int 27
+     %int_22 = OpConstant %int 22
+     %int_28 = OpConstant %int 28
+     %int_23 = OpConstant %int 23
+     %int_29 = OpConstant %int 29
+     %int_24 = OpConstant %int 24
+     %int_40 = OpConstant %int 40
+     %int_35 = OpConstant %int 35
+     %int_36 = OpConstant %int 36
+     %int_37 = OpConstant %int 37
+     %int_38 = OpConstant %int 38
+     %int_39 = OpConstant %int 39
+     %int_41 = OpConstant %int 41
+     %int_30 = OpConstant %int 30
+     %int_31 = OpConstant %int 31
+     %int_32 = OpConstant %int 32
+     %int_33 = OpConstant %int 33
+     %int_34 = OpConstant %int 34
+%float_0_0500000007 = OpConstant %float 0.0500000007
+ %float_1_75 = OpConstant %float 1.75
+%float_0_400000006 = OpConstant %float 0.400000006
+%float_0_819999993 = OpConstant %float 0.819999993
+%float_0_0299999993 = OpConstant %float 0.0299999993
+    %float_2 = OpConstant %float 2
+%float_0_959999979 = OpConstant %float 0.959999979
+        %241 = OpConstantComposite %v3float %float_0_959999979 %float_0_959999979 %float_0_959999979
+     %int_13 = OpConstant %int 13
+     %int_11 = OpConstant %int 11
+     %int_14 = OpConstant %int 14
+     %int_12 = OpConstant %int 12
+%float_0_800000012 = OpConstant %float 0.800000012
+     %int_10 = OpConstant %int 10
+   %float_10 = OpConstant %float 10
+   %float_n2 = OpConstant %float -2
+    %float_3 = OpConstant %float 3
+        %251 = OpConstantComposite %v3float %float_3 %float_3 %float_3
+        %252 = OpConstantComposite %v3float %float_2 %float_2 %float_2
+%float_0_930000007 = OpConstant %float 0.930000007
+        %254 = OpConstantComposite %v3float %float_0_930000007 %float_0_930000007 %float_0_930000007
+      %int_4 = OpConstant %int 4
+      %int_8 = OpConstant %int 8
+      %int_7 = OpConstant %int 7
+      %int_5 = OpConstant %int 5
+      %int_6 = OpConstant %int 6
+%float_0_00200000009 = OpConstant %float 0.00200000009
+        %261 = OpConstantComposite %v3float %float_0_00200000009 %float_0_00200000009 %float_0_00200000009
+%float_6_10351999en05 = OpConstant %float 6.10351999e-05
+        %263 = OpConstantComposite %v3float %float_6_10351999en05 %float_6_10351999en05 %float_6_10351999en05
+%float_0_0404499993 = OpConstant %float 0.0404499993
+        %265 = OpConstantComposite %v3float %float_0_0404499993 %float_0_0404499993 %float_0_0404499993
+%float_0_947867274 = OpConstant %float 0.947867274
+        %267 = OpConstantComposite %v3float %float_0_947867274 %float_0_947867274 %float_0_947867274
+%float_0_0521326996 = OpConstant %float 0.0521326996
+        %269 = OpConstantComposite %v3float %float_0_0521326996 %float_0_0521326996 %float_0_0521326996
+%float_2_4000001 = OpConstant %float 2.4000001
+        %271 = OpConstantComposite %v3float %float_2_4000001 %float_2_4000001 %float_2_4000001
+%float_0_0773993805 = OpConstant %float 0.0773993805
+        %273 = OpConstantComposite %v3float %float_0_0773993805 %float_0_0773993805 %float_0_0773993805
+  %float_4_5 = OpConstant %float 4.5
+        %275 = OpConstantComposite %v3float %float_4_5 %float_4_5 %float_4_5
+%float_0_0179999992 = OpConstant %float 0.0179999992
+        %277 = OpConstantComposite %v3float %float_0_0179999992 %float_0_0179999992 %float_0_0179999992
+%float_0_449999988 = OpConstant %float 0.449999988
+        %279 = OpConstantComposite %v3float %float_0_449999988 %float_0_449999988 %float_0_449999988
+%float_1_09899998 = OpConstant %float 1.09899998
+        %281 = OpConstantComposite %v3float %float_1_09899998 %float_1_09899998 %float_1_09899998
+%float_0_0989999995 = OpConstant %float 0.0989999995
+        %283 = OpConstantComposite %v3float %float_0_0989999995 %float_0_0989999995 %float_0_0989999995
+  %float_1_5 = OpConstant %float 1.5
+        %285 = OpConstantComposite %v3float %float_1_5 %float_1_5 %float_1_5
+        %286 = OpConstantComposite %v3float %float_0_159301758 %float_0_159301758 %float_0_159301758
+        %287 = OpConstantComposite %v3float %float_78_84375 %float_78_84375 %float_78_84375
+%float_1_00055635 = OpConstant %float 1.00055635
+ %float_7000 = OpConstant %float 7000
+%float_0_244063005 = OpConstant %float 0.244063005
+%float_99_1100006 = OpConstant %float 99.1100006
+%float_2967800 = OpConstant %float 2967800
+%float_0_237039998 = OpConstant %float 0.237039998
+%float_247_479996 = OpConstant %float 247.479996
+%float_1901800 = OpConstant %float 1901800
+   %float_n3 = OpConstant %float -3
+%float_2_86999989 = OpConstant %float 2.86999989
+%float_0_275000006 = OpConstant %float 0.275000006
+%float_0_860117733 = OpConstant %float 0.860117733
+%float_0_000154118257 = OpConstant %float 0.000154118257
+%float_1_28641219en07 = OpConstant %float 1.28641219e-07
+%float_0_00084242021 = OpConstant %float 0.00084242021
+%float_7_08145137en07 = OpConstant %float 7.08145137e-07
+%float_0_317398727 = OpConstant %float 0.317398727
+
+; HACK: Needed to hack this constant since MSVC and GNU libc are off by 1 ULP when converting to string (it probably still works fine though in a roundtrip ...)
+%float_4_22806261en05 = OpConstant %float 4.25
+
+%float_4_20481676en08 = OpConstant %float 4.20481676e-08
+%float_2_8974182en05 = OpConstant %float 2.8974182e-05
+%float_1_61456057en07 = OpConstant %float 1.61456057e-07
+    %float_8 = OpConstant %float 8
+    %float_4 = OpConstant %float 4
+%float_0_895099998 = OpConstant %float 0.895099998
+%float_0_266400009 = OpConstant %float 0.266400009
+%float_n0_161400005 = OpConstant %float -0.161400005
+%float_n0_750199974 = OpConstant %float -0.750199974
+%float_1_71350002 = OpConstant %float 1.71350002
+%float_0_0366999991 = OpConstant %float 0.0366999991
+%float_0_0388999991 = OpConstant %float 0.0388999991
+%float_n0_0684999973 = OpConstant %float -0.0684999973
+%float_1_02960002 = OpConstant %float 1.02960002
+%float_0_986992896 = OpConstant %float 0.986992896
+%float_n0_1470543 = OpConstant %float -0.1470543
+%float_0_159962699 = OpConstant %float 0.159962699
+%float_0_432305306 = OpConstant %float 0.432305306
+%float_0_518360317 = OpConstant %float 0.518360317
+%float_0_0492912009 = OpConstant %float 0.0492912009
+%float_n0_0085287001 = OpConstant %float -0.0085287001
+%float_0_040042799 = OpConstant %float 0.040042799
+%float_0_968486726 = OpConstant %float 0.968486726
+%float_5_55555534 = OpConstant %float 5.55555534
+        %330 = OpConstantComposite %v3float %float_5_55555534 %float_5_55555534 %float_5_55555534
+%float_1_00000001en10 = OpConstant %float 1.00000001e-10
+%float_0_00999999978 = OpConstant %float 0.00999999978
+%float_0_666666687 = OpConstant %float 0.666666687
+  %float_180 = OpConstant %float 180
+  %float_360 = OpConstant %float 360
+%float_65535 = OpConstant %float 65535
+        %337 = OpConstantComposite %v3float %float_65535 %float_65535 %float_65535
+%float_n4_97062206 = OpConstant %float -4.97062206
+%float_n3_02937818 = OpConstant %float -3.02937818
+%float_n2_12619996 = OpConstant %float -2.12619996
+%float_n1_51049995 = OpConstant %float -1.51049995
+%float_n1_05780005 = OpConstant %float -1.05780005
+%float_n0_466800004 = OpConstant %float -0.466800004
+%float_0_119379997 = OpConstant %float 0.119379997
+%float_0_708813429 = OpConstant %float 0.708813429
+%float_1_29118657 = OpConstant %float 1.29118657
+%float_0_808913231 = OpConstant %float 0.808913231
+%float_1_19108677 = OpConstant %float 1.19108677
+%float_1_56830001 = OpConstant %float 1.56830001
+%float_1_9483 = OpConstant %float 1.9483
+%float_2_30830002 = OpConstant %float 2.30830002
+%float_2_63840008 = OpConstant %float 2.63840008
+%float_2_85949993 = OpConstant %float 2.85949993
+%float_2_98726082 = OpConstant %float 2.98726082
+%float_3_01273918 = OpConstant %float 3.01273918
+%float_0_179999992 = OpConstant %float 0.179999992
+%float_9_99999975en05 = OpConstant %float 9.99999975e-05
+ %float_1000 = OpConstant %float 1000
+%float_0_0599999987 = OpConstant %float 0.0599999987
+%float_3_50738446en05 = OpConstant %float 3.50738446e-05
+        %361 = OpConstantComposite %v3float %float_3_50738446en05 %float_3_50738446en05 %float_3_50738446en05
+%float_n2_30102992 = OpConstant %float -2.30102992
+%float_n1_93120003 = OpConstant %float -1.93120003
+%float_n1_52049994 = OpConstant %float -1.52049994
+%float_0_801995218 = OpConstant %float 0.801995218
+%float_1_19800484 = OpConstant %float 1.19800484
+%float_1_59430003 = OpConstant %float 1.59430003
+%float_1_99730003 = OpConstant %float 1.99730003
+%float_2_37829995 = OpConstant %float 2.37829995
+%float_2_76839995 = OpConstant %float 2.76839995
+%float_3_05150008 = OpConstant %float 3.05150008
+%float_3_27462935 = OpConstant %float 3.27462935
+%float_3_32743073 = OpConstant %float 3.32743073
+%float_0_00499999989 = OpConstant %float 0.00499999989
+   %float_11 = OpConstant %float 11
+ %float_2000 = OpConstant %float 2000
+%float_0_119999997 = OpConstant %float 0.119999997
+%float_0_00313066994 = OpConstant %float 0.00313066994
+%float_12_9200001 = OpConstant %float 12.9200001
+%float_0_416666657 = OpConstant %float 0.416666657
+%float_1_05499995 = OpConstant %float 1.05499995
+%float_0_0549999997 = OpConstant %float 0.0549999997
+%float_n0_166666672 = OpConstant %float -0.166666672
+ %float_n0_5 = OpConstant %float -0.5
+%float_0_166666672 = OpConstant %float 0.166666672
+%float_n3_15737653 = OpConstant %float -3.15737653
+%float_n0_485249996 = OpConstant %float -0.485249996
+%float_1_84773242 = OpConstant %float 1.84773242
+%float_n0_718548238 = OpConstant %float -0.718548238
+%float_2_08103061 = OpConstant %float 2.08103061
+%float_3_6681242 = OpConstant %float 3.6681242
+   %float_18 = OpConstant %float 18
+    %float_7 = OpConstant %float 7
+%_arr_float_uint_5 = OpTypeArray %float %uint_5
+%type__Globals = OpTypeStruct %v4float %v3float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %float %float %float %float %float %_arr_float_uint_5 %v3float %v4float %float %float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %float %float %uint %uint %float %float
+%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+        %402 = OpTypeFunction %void
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+       %bool = OpTypeBool
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+     %v2bool = OpTypeVector %bool 2
+     %v3bool = OpTypeVector %bool 3
+%type_sampled_image = OpTypeSampledImage %type_2d_image
+    %uint_10 = OpConstant %uint 10
+%_arr_float_uint_10 = OpTypeArray %float %uint_10
+%_arr_float_uint_6 = OpTypeArray %float %uint_6
+   %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+   %Texture1 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%Texture1Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input_v2float Input
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+   %gl_Layer = OpVariable %_ptr_Input_uint Input
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+%_ptr_Function__arr_float_uint_6 = OpTypePointer Function %_arr_float_uint_6
+%_ptr_Function__arr_float_uint_10 = OpTypePointer Function %_arr_float_uint_10
+        %416 = OpConstantComposite %v3float %float_0_952552378 %float_0 %float_9_36786018en05
+        %417 = OpConstantComposite %v3float %float_0_343966454 %float_0_728166103 %float_n0_0721325427
+        %418 = OpConstantComposite %v3float %float_0 %float_0 %float_1_00882518
+        %419 = OpConstantComposite %mat3v3float %416 %417 %418
+        %420 = OpConstantComposite %v3float %float_1_04981101 %float_0 %float_n9_74845025en05
+        %421 = OpConstantComposite %v3float %float_n0_495903015 %float_1_37331307 %float_0_0982400328
+        %422 = OpConstantComposite %v3float %float_0 %float_0 %float_0_991252005
+        %423 = OpConstantComposite %mat3v3float %420 %421 %422
+        %424 = OpConstantComposite %v3float %float_0_662454188 %float_0_134004205 %float_0_156187683
+        %425 = OpConstantComposite %v3float %float_n0_00557464967 %float_0_0040607336 %float_1_01033914
+        %426 = OpConstantComposite %mat3v3float %424 %73 %425
+        %427 = OpConstantComposite %v3float %float_1_6410234 %float_n0_324803293 %float_n0_236424699
+        %428 = OpConstantComposite %v3float %float_n0_663662851 %float_1_61533165 %float_0_0167563483
+        %429 = OpConstantComposite %v3float %float_0_0117218941 %float_n0_00828444213 %float_0_988394856
+        %430 = OpConstantComposite %mat3v3float %427 %428 %429
+        %431 = OpConstantComposite %v3float %float_1_45143926 %float_n0_236510754 %float_n0_214928567
+        %432 = OpConstantComposite %v3float %float_n0_0765537769 %float_1_17622972 %float_n0_0996759236
+        %433 = OpConstantComposite %v3float %float_0_00831614807 %float_n0_00603244966 %float_0_997716308
+        %434 = OpConstantComposite %mat3v3float %431 %432 %433
+        %435 = OpConstantComposite %v3float %float_0_695452213 %float_0_140678704 %float_0_163869068
+        %436 = OpConstantComposite %v3float %float_0_0447945632 %float_0_859671116 %float_0_0955343172
+        %437 = OpConstantComposite %v3float %float_n0_00552588282 %float_0_00402521016 %float_1_00150073
+        %438 = OpConstantComposite %mat3v3float %435 %436 %437
+        %439 = OpConstantComposite %v3float %float_3_2409699 %float_n1_5373832 %float_n0_498610765
+        %440 = OpConstantComposite %v3float %float_n0_969243646 %float_1_8759675 %float_0_0415550582
+        %441 = OpConstantComposite %v3float %float_0_0556300804 %float_n0_203976959 %float_1_05697155
+        %442 = OpConstantComposite %mat3v3float %439 %440 %441
+        %443 = OpConstantComposite %v3float %float_0_412456393 %float_0_357576102 %float_0_180437505
+        %444 = OpConstantComposite %v3float %float_0_212672904 %float_0_715152204 %float_0_0721750036
+        %445 = OpConstantComposite %v3float %float_0_0193339009 %float_0_119191997 %float_0_950304091
+        %446 = OpConstantComposite %mat3v3float %443 %444 %445
+        %447 = OpConstantComposite %v3float %float_1_71660841 %float_n0_355662107 %float_n0_253360093
+        %448 = OpConstantComposite %v3float %float_n0_666682899 %float_1_61647761 %float_0_0157685
+        %449 = OpConstantComposite %v3float %float_0_0176422 %float_n0_0427763015 %float_0_942228675
+        %450 = OpConstantComposite %mat3v3float %447 %448 %449
+        %451 = OpConstantComposite %v3float %float_2_49339628 %float_n0_93134588 %float_n0_402694494
+        %452 = OpConstantComposite %v3float %float_n0_829486787 %float_1_76265967 %float_0_0236246008
+        %453 = OpConstantComposite %v3float %float_0_0358507 %float_n0_0761827007 %float_0_957014024
+        %454 = OpConstantComposite %mat3v3float %451 %452 %453
+        %455 = OpConstantComposite %v3float %float_1_01303005 %float_0_00610530982 %float_n0_0149710001
+        %456 = OpConstantComposite %v3float %float_0_00769822998 %float_0_998165011 %float_n0_00503202993
+        %457 = OpConstantComposite %v3float %float_n0_00284131011 %float_0_00468515977 %float_0_924507022
+        %458 = OpConstantComposite %mat3v3float %455 %456 %457
+        %459 = OpConstantComposite %v3float %float_0_987223983 %float_n0_00611326983 %float_0_0159533005
+        %460 = OpConstantComposite %v3float %float_n0_00759836007 %float_1_00186002 %float_0_0053300201
+        %461 = OpConstantComposite %v3float %float_0_00307257008 %float_n0_00509594986 %float_1_08168006
+        %462 = OpConstantComposite %mat3v3float %459 %460 %461
+        %463 = OpConstantComposite %v3float %float_0_5 %float_n1 %float_0_5
+        %464 = OpConstantComposite %v3float %float_n1 %float_1 %float_0_5
+        %465 = OpConstantComposite %v3float %float_0_5 %float_0 %float_0
+        %466 = OpConstantComposite %mat3v3float %463 %464 %465
+        %467 = OpConstantComposite %v3float %float_1 %float_0 %float_0
+        %468 = OpConstantComposite %v3float %float_0 %float_1 %float_0
+        %469 = OpConstantComposite %v3float %float_0 %float_0 %float_1
+        %470 = OpConstantComposite %mat3v3float %467 %468 %469
+%float_n6_07624626 = OpConstant %float -6.07624626
+        %472 = OpConstantComposite %v3float %float_n6_07624626 %float_n6_07624626 %float_n6_07624626
+        %473 = OpConstantComposite %v3float %float_0_895099998 %float_0_266400009 %float_n0_161400005
+        %474 = OpConstantComposite %v3float %float_n0_750199974 %float_1_71350002 %float_0_0366999991
+        %475 = OpConstantComposite %v3float %float_0_0388999991 %float_n0_0684999973 %float_1_02960002
+        %476 = OpConstantComposite %mat3v3float %473 %474 %475
+        %477 = OpConstantComposite %v3float %float_0_986992896 %float_n0_1470543 %float_0_159962699
+        %478 = OpConstantComposite %v3float %float_0_432305306 %float_0_518360317 %float_0_0492912009
+        %479 = OpConstantComposite %v3float %float_n0_0085287001 %float_0_040042799 %float_0_968486726
+        %480 = OpConstantComposite %mat3v3float %477 %478 %479
+        %481 = OpConstantComposite %v3float %float_0_544169128 %float_0_239592597 %float_0_166694298
+        %482 = OpConstantComposite %v3float %float_0_239465594 %float_0_702153027 %float_0_058381401
+        %483 = OpConstantComposite %v3float %float_n0_00234390004 %float_0_0361833982 %float_1_05521834
+        %484 = OpConstantComposite %mat3v3float %481 %482 %483
+        %485 = OpConstantComposite %v3float %float_0_940437257 %float_n0_0183068793 %float_0_077869609
+        %486 = OpConstantComposite %v3float %float_0_00837869663 %float_0_828660011 %float_0_162961304
+        %487 = OpConstantComposite %v3float %float_0_00054712611 %float_n0_000883374596 %float_1_00033629
+        %488 = OpConstantComposite %mat3v3float %485 %486 %487
+        %489 = OpConstantComposite %v3float %float_1_06317997 %float_0_0233955998 %float_n0_0865726024
+        %490 = OpConstantComposite %v3float %float_n0_0106336996 %float_1_20632005 %float_n0_195690006
+        %491 = OpConstantComposite %v3float %float_n0_000590886979 %float_0_00105247996 %float_0_999538004
+        %492 = OpConstantComposite %mat3v3float %489 %490 %491
+%float_0_0533333346 = OpConstant %float 0.0533333346
+%float_0_159999996 = OpConstant %float 0.159999996
+%float_57_2957764 = OpConstant %float 57.2957764
+%float_0_0625 = OpConstant %float 0.0625
+%float_n67_5 = OpConstant %float -67.5
+ %float_67_5 = OpConstant %float 67.5
+        %499 = OpConstantComposite %_arr_float_uint_6 %float_n4 %float_n4 %float_n3_15737653 %float_n0_485249996 %float_1_84773242 %float_1_84773242
+        %500 = OpConstantComposite %_arr_float_uint_6 %float_n0_718548238 %float_2_08103061 %float_3_6681242 %float_4 %float_4 %float_4
+  %float_n15 = OpConstant %float -15
+  %float_n14 = OpConstant %float -14
+        %503 = OpConstantComposite %_arr_float_uint_10 %float_n4_97062206 %float_n3_02937818 %float_n2_12619996 %float_n1_51049995 %float_n1_05780005 %float_n0_466800004 %float_0_119379997 %float_0_708813429 %float_1_29118657 %float_1_29118657
+        %504 = OpConstantComposite %_arr_float_uint_10 %float_0_808913231 %float_1_19108677 %float_1_56830001 %float_1_9483 %float_2_30830002 %float_2_63840008 %float_2_85949993 %float_2_98726082 %float_3_01273918 %float_3_01273918
+  %float_n12 = OpConstant %float -12
+        %506 = OpConstantComposite %_arr_float_uint_10 %float_n2_30102992 %float_n2_30102992 %float_n1_93120003 %float_n1_52049994 %float_n1_05780005 %float_n0_466800004 %float_0_119379997 %float_0_708813429 %float_1_29118657 %float_1_29118657
+        %507 = OpConstantComposite %_arr_float_uint_10 %float_0_801995218 %float_1_19800484 %float_1_59430003 %float_1_99730003 %float_2_37829995 %float_2_76839995 %float_3_05150008 %float_3_27462935 %float_3_32743073 %float_3_32743073
+%float_0_0322580636 = OpConstant %float 0.0322580636
+%float_1_03225803 = OpConstant %float 1.03225803
+        %510 = OpConstantComposite %v2float %float_1_03225803 %float_1_03225803
+%float_4_60443853e_09 = OpConstant %float 4.60443853e+09
+%float_2_00528435e_09 = OpConstant %float 2.00528435e+09
+%float_0_333333343 = OpConstant %float 0.333333343
+    %float_5 = OpConstant %float 5
+  %float_2_5 = OpConstant %float 2.5
+%float_0_0250000004 = OpConstant %float 0.0250000004
+%float_0_239999995 = OpConstant %float 0.239999995
+%float_0_0148148146 = OpConstant %float 0.0148148146
+        %519 = OpConstantComposite %v3float %float_9_99999975en05 %float_9_99999975en05 %float_9_99999975en05
+%float_0_0296296291 = OpConstant %float 0.0296296291
+%float_0_952381015 = OpConstant %float 0.952381015
+        %522 = OpConstantComposite %v3float %float_0_952381015 %float_0_952381015 %float_0_952381015
+        %523 = OpUndef %v3float
+%float_0_358299971 = OpConstant %float 0.358299971
+        %525 = OpUndef %v3float
+     %MainPS = OpFunction %void None %402
+        %526 = OpLabel
+        %527 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %528 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %529 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %530 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %531 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %532 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %533 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %534 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %535 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %536 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %537 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %538 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %539 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %540 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %541 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %542 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %543 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %544 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %545 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %546 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %547 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %548 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %549 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %550 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %551 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %552 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %553 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %554 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %555 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %556 = OpVariable %_ptr_Function__arr_float_uint_10 Function
+        %557 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %558 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %559 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %560 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %561 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %562 = OpVariable %_ptr_Function__arr_float_uint_6 Function
+        %563 = OpLoad %v2float %in_var_TEXCOORD0
+        %564 = OpLoad %uint %gl_Layer
+        %565 = OpFSub %v2float %563 %134
+        %566 = OpFMul %v2float %565 %510
+        %567 = OpCompositeExtract %float %566 0
+        %568 = OpCompositeExtract %float %566 1
+        %569 = OpConvertUToF %float %564
+        %570 = OpFMul %float %569 %float_0_0322580636
+        %571 = OpCompositeConstruct %v4float %567 %568 %570 %float_0
+        %572 = OpMatrixTimesMatrix %mat3v3float %446 %458
+        %573 = OpMatrixTimesMatrix %mat3v3float %572 %430
+        %574 = OpMatrixTimesMatrix %mat3v3float %426 %462
+        %575 = OpMatrixTimesMatrix %mat3v3float %574 %442
+        %576 = OpMatrixTimesMatrix %mat3v3float %419 %430
+        %577 = OpMatrixTimesMatrix %mat3v3float %426 %423
+        %578 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_43
+        %579 = OpLoad %uint %578
+               OpBranch %580
+        %580 = OpLabel
+               OpLoopMerge %581 %582 None
+               OpBranch %583
+        %583 = OpLabel
+        %584 = OpMatrixTimesMatrix %mat3v3float %574 %454
+        %585 = OpMatrixTimesMatrix %mat3v3float %574 %450
+        %586 = OpIEqual %bool %579 %uint_1
+               OpSelectionMerge %587 None
+               OpBranchConditional %586 %588 %589
+        %589 = OpLabel
+        %590 = OpIEqual %bool %579 %uint_2
+               OpSelectionMerge %591 None
+               OpBranchConditional %590 %592 %593
+        %593 = OpLabel
+        %594 = OpIEqual %bool %579 %uint_3
+               OpSelectionMerge %595 None
+               OpBranchConditional %594 %596 %597
+        %597 = OpLabel
+        %598 = OpIEqual %bool %579 %uint_4
+               OpSelectionMerge %599 None
+               OpBranchConditional %598 %600 %601
+        %601 = OpLabel
+               OpBranch %581
+        %600 = OpLabel
+               OpBranch %581
+        %599 = OpLabel
+               OpUnreachable
+        %596 = OpLabel
+               OpBranch %581
+        %595 = OpLabel
+               OpUnreachable
+        %592 = OpLabel
+               OpBranch %581
+        %591 = OpLabel
+               OpUnreachable
+        %588 = OpLabel
+               OpBranch %581
+        %587 = OpLabel
+               OpUnreachable
+        %582 = OpLabel
+               OpBranch %580
+        %581 = OpLabel
+        %602 = OpPhi %mat3v3float %575 %601 %470 %600 %438 %596 %585 %592 %584 %588
+        %603 = OpVectorShuffle %v3float %571 %571 0 1 2
+        %604 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_42
+        %605 = OpLoad %uint %604
+        %606 = OpUGreaterThanEqual %bool %605 %uint_3
+               OpSelectionMerge %607 None
+               OpBranchConditional %606 %608 %609
+        %609 = OpLabel
+        %610 = OpFSub %v3float %603 %204
+        %611 = OpFMul %v3float %610 %205
+        %612 = OpExtInst %v3float %1 Exp2 %611
+        %613 = OpFMul %v3float %612 %206
+        %614 = OpExtInst %v3float %1 Exp2 %472
+        %615 = OpFMul %v3float %614 %206
+        %616 = OpFSub %v3float %613 %615
+               OpBranch %607
+        %608 = OpLabel
+        %617 = OpExtInst %v3float %1 Pow %603 %194
+        %618 = OpFSub %v3float %617 %195
+        %619 = OpExtInst %v3float %1 FMax %138 %618
+        %620 = OpFMul %v3float %197 %617
+        %621 = OpFSub %v3float %196 %620
+        %622 = OpFDiv %v3float %619 %621
+        %623 = OpExtInst %v3float %1 Pow %622 %199
+        %624 = OpFMul %v3float %623 %200
+               OpBranch %607
+        %607 = OpLabel
+        %625 = OpPhi %v3float %616 %609 %624 %608
+        %626 = OpAccessChain %_ptr_Uniform_float %_Globals %int_18
+        %627 = OpLoad %float %626
+        %628 = OpFMul %float %627 %float_1_00055635
+        %629 = OpFOrdLessThanEqual %bool %628 %float_7000
+        %630 = OpFDiv %float %float_4_60443853e_09 %627
+        %631 = OpFSub %float %float_2967800 %630
+        %632 = OpFDiv %float %631 %628
+        %633 = OpFAdd %float %float_99_1100006 %632
+        %634 = OpFDiv %float %633 %628
+        %635 = OpFAdd %float %float_0_244063005 %634
+        %636 = OpFDiv %float %float_2_00528435e_09 %627
+        %637 = OpFSub %float %float_1901800 %636
+        %638 = OpFDiv %float %637 %628
+        %639 = OpFAdd %float %float_247_479996 %638
+        %640 = OpFDiv %float %639 %628
+        %641 = OpFAdd %float %float_0_237039998 %640
+        %642 = OpSelect %float %629 %635 %641
+        %643 = OpFMul %float %float_n3 %642
+        %644 = OpFMul %float %643 %642
+        %645 = OpFMul %float %float_2_86999989 %642
+        %646 = OpFAdd %float %644 %645
+        %647 = OpFSub %float %646 %float_0_275000006
+        %648 = OpCompositeConstruct %v2float %642 %647
+        %649 = OpFMul %float %float_0_000154118257 %627
+        %650 = OpFAdd %float %float_0_860117733 %649
+        %651 = OpFMul %float %float_1_28641219en07 %627
+        %652 = OpFMul %float %651 %627
+        %653 = OpFAdd %float %650 %652
+        %654 = OpFMul %float %float_0_00084242021 %627
+        %655 = OpFAdd %float %float_1 %654
+        %656 = OpFMul %float %float_7_08145137en07 %627
+        %657 = OpFMul %float %656 %627
+        %658 = OpFAdd %float %655 %657
+        %659 = OpFDiv %float %653 %658
+        %660 = OpFMul %float %float_4_22806261en05 %627
+        %661 = OpFAdd %float %float_0_317398727 %660
+        %662 = OpFMul %float %float_4_20481676en08 %627
+        %663 = OpFMul %float %662 %627
+        %664 = OpFAdd %float %661 %663
+        %665 = OpFMul %float %float_2_8974182en05 %627
+        %666 = OpFSub %float %float_1 %665
+        %667 = OpFMul %float %float_1_61456057en07 %627
+        %668 = OpFMul %float %667 %627
+        %669 = OpFAdd %float %666 %668
+        %670 = OpFDiv %float %664 %669
+        %671 = OpFMul %float %float_3 %659
+        %672 = OpFMul %float %float_2 %659
+        %673 = OpFMul %float %float_8 %670
+        %674 = OpFSub %float %672 %673
+        %675 = OpFAdd %float %674 %float_4
+        %676 = OpFDiv %float %671 %675
+        %677 = OpFMul %float %float_2 %670
+        %678 = OpFDiv %float %677 %675
+        %679 = OpCompositeConstruct %v2float %676 %678
+        %680 = OpFOrdLessThan %bool %627 %float_4000
+        %681 = OpCompositeConstruct %v2bool %680 %680
+        %682 = OpSelect %v2float %681 %679 %648
+        %683 = OpAccessChain %_ptr_Uniform_float %_Globals %int_19
+        %684 = OpLoad %float %683
+        %685 = OpCompositeConstruct %v2float %659 %670
+        %686 = OpExtInst %v2float %1 Normalize %685
+        %687 = OpCompositeExtract %float %686 1
+        %688 = OpFNegate %float %687
+        %689 = OpFMul %float %688 %684
+        %690 = OpFMul %float %689 %float_0_0500000007
+        %691 = OpFAdd %float %659 %690
+        %692 = OpCompositeExtract %float %686 0
+        %693 = OpFMul %float %692 %684
+        %694 = OpFMul %float %693 %float_0_0500000007
+        %695 = OpFAdd %float %670 %694
+        %696 = OpFMul %float %float_3 %691
+        %697 = OpFMul %float %float_2 %691
+        %698 = OpFMul %float %float_8 %695
+        %699 = OpFSub %float %697 %698
+        %700 = OpFAdd %float %699 %float_4
+        %701 = OpFDiv %float %696 %700
+        %702 = OpFMul %float %float_2 %695
+        %703 = OpFDiv %float %702 %700
+        %704 = OpCompositeConstruct %v2float %701 %703
+        %705 = OpFSub %v2float %704 %679
+        %706 = OpFAdd %v2float %682 %705
+        %707 = OpCompositeExtract %float %706 0
+        %708 = OpCompositeExtract %float %706 1
+        %709 = OpExtInst %float %1 FMax %708 %float_1_00000001en10
+        %710 = OpFDiv %float %707 %709
+        %711 = OpCompositeInsert %v3float %710 %523 0
+        %712 = OpCompositeInsert %v3float %float_1 %711 1
+        %713 = OpFSub %float %float_1 %707
+        %714 = OpFSub %float %713 %708
+        %715 = OpFDiv %float %714 %709
+        %716 = OpCompositeInsert %v3float %715 %712 2
+        %717 = OpExtInst %float %1 FMax %float_0_328999996 %float_1_00000001en10
+        %718 = OpFDiv %float %float_0_312700003 %717
+        %719 = OpCompositeInsert %v3float %718 %523 0
+        %720 = OpCompositeInsert %v3float %float_1 %719 1
+        %721 = OpFDiv %float %float_0_358299971 %717
+        %722 = OpCompositeInsert %v3float %721 %720 2
+        %723 = OpVectorTimesMatrix %v3float %716 %476
+        %724 = OpVectorTimesMatrix %v3float %722 %476
+        %725 = OpCompositeExtract %float %724 0
+        %726 = OpCompositeExtract %float %723 0
+        %727 = OpFDiv %float %725 %726
+        %728 = OpCompositeConstruct %v3float %727 %float_0 %float_0
+        %729 = OpCompositeExtract %float %724 1
+        %730 = OpCompositeExtract %float %723 1
+        %731 = OpFDiv %float %729 %730
+        %732 = OpCompositeConstruct %v3float %float_0 %731 %float_0
+        %733 = OpCompositeExtract %float %724 2
+        %734 = OpCompositeExtract %float %723 2
+        %735 = OpFDiv %float %733 %734
+        %736 = OpCompositeConstruct %v3float %float_0 %float_0 %735
+        %737 = OpCompositeConstruct %mat3v3float %728 %732 %736
+        %738 = OpMatrixTimesMatrix %mat3v3float %476 %737
+        %739 = OpMatrixTimesMatrix %mat3v3float %738 %480
+        %740 = OpMatrixTimesMatrix %mat3v3float %446 %739
+        %741 = OpMatrixTimesMatrix %mat3v3float %740 %442
+        %742 = OpVectorTimesMatrix %v3float %625 %741
+        %743 = OpVectorTimesMatrix %v3float %742 %573
+        %744 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_9
+        %745 = OpAccessChain %_ptr_Uniform_float %_Globals %int_9 %int_3
+        %746 = OpLoad %float %745
+        %747 = OpFOrdNotEqual %bool %746 %float_0
+               OpSelectionMerge %748 None
+               OpBranchConditional %747 %749 %748
+        %749 = OpLabel
+        %750 = OpDot %float %743 %73
+        %751 = OpCompositeConstruct %v3float %750 %750 %750
+        %752 = OpFDiv %v3float %743 %751
+        %753 = OpFSub %v3float %752 %141
+        %754 = OpDot %float %753 %753
+        %755 = OpFMul %float %float_n4 %754
+        %756 = OpExtInst %float %1 Exp2 %755
+        %757 = OpFSub %float %float_1 %756
+        %758 = OpAccessChain %_ptr_Uniform_float %_Globals %int_45
+        %759 = OpLoad %float %758
+        %760 = OpFMul %float %float_n4 %759
+        %761 = OpFMul %float %760 %750
+        %762 = OpFMul %float %761 %750
+        %763 = OpExtInst %float %1 Exp2 %762
+        %764 = OpFSub %float %float_1 %763
+        %765 = OpFMul %float %757 %764
+        %766 = OpMatrixTimesMatrix %mat3v3float %484 %430
+        %767 = OpMatrixTimesMatrix %mat3v3float %575 %766
+        %768 = OpVectorTimesMatrix %v3float %743 %767
+        %769 = OpCompositeConstruct %v3float %765 %765 %765
+        %770 = OpExtInst %v3float %1 FMix %743 %768 %769
+               OpBranch %748
+        %748 = OpLabel
+        %771 = OpPhi %v3float %743 %607 %770 %749
+        %772 = OpDot %float %771 %73
+        %773 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_25
+        %774 = OpLoad %v4float %773
+        %775 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_20
+        %776 = OpLoad %v4float %775
+        %777 = OpFMul %v4float %774 %776
+        %778 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_26
+        %779 = OpLoad %v4float %778
+        %780 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_21
+        %781 = OpLoad %v4float %780
+        %782 = OpFMul %v4float %779 %781
+        %783 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_27
+        %784 = OpLoad %v4float %783
+        %785 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_22
+        %786 = OpLoad %v4float %785
+        %787 = OpFMul %v4float %784 %786
+        %788 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_28
+        %789 = OpLoad %v4float %788
+        %790 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_23
+        %791 = OpLoad %v4float %790
+        %792 = OpFMul %v4float %789 %791
+        %793 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_29
+        %794 = OpLoad %v4float %793
+        %795 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_24
+        %796 = OpLoad %v4float %795
+        %797 = OpFAdd %v4float %794 %796
+        %798 = OpCompositeConstruct %v3float %772 %772 %772
+        %799 = OpVectorShuffle %v3float %777 %777 0 1 2
+        %800 = OpCompositeExtract %float %777 3
+        %801 = OpCompositeConstruct %v3float %800 %800 %800
+        %802 = OpFMul %v3float %799 %801
+        %803 = OpExtInst %v3float %1 FMix %798 %771 %802
+        %804 = OpExtInst %v3float %1 FMax %138 %803
+        %805 = OpFMul %v3float %804 %330
+        %806 = OpVectorShuffle %v3float %782 %782 0 1 2
+        %807 = OpCompositeExtract %float %782 3
+        %808 = OpCompositeConstruct %v3float %807 %807 %807
+        %809 = OpFMul %v3float %806 %808
+        %810 = OpExtInst %v3float %1 Pow %805 %809
+        %811 = OpFMul %v3float %810 %206
+        %812 = OpVectorShuffle %v3float %787 %787 0 1 2
+        %813 = OpCompositeExtract %float %787 3
+        %814 = OpCompositeConstruct %v3float %813 %813 %813
+        %815 = OpFMul %v3float %812 %814
+        %816 = OpFDiv %v3float %141 %815
+        %817 = OpExtInst %v3float %1 Pow %811 %816
+        %818 = OpVectorShuffle %v3float %792 %792 0 1 2
+        %819 = OpCompositeExtract %float %792 3
+        %820 = OpCompositeConstruct %v3float %819 %819 %819
+        %821 = OpFMul %v3float %818 %820
+        %822 = OpFMul %v3float %817 %821
+        %823 = OpVectorShuffle %v3float %797 %797 0 1 2
+        %824 = OpCompositeExtract %float %797 3
+        %825 = OpCompositeConstruct %v3float %824 %824 %824
+        %826 = OpFAdd %v3float %823 %825
+        %827 = OpFAdd %v3float %822 %826
+        %828 = OpAccessChain %_ptr_Uniform_float %_Globals %int_40
+        %829 = OpLoad %float %828
+        %830 = OpExtInst %float %1 SmoothStep %float_0 %829 %772
+        %831 = OpFSub %float %float_1 %830
+        %832 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_35
+        %833 = OpLoad %v4float %832
+        %834 = OpFMul %v4float %833 %776
+        %835 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_36
+        %836 = OpLoad %v4float %835
+        %837 = OpFMul %v4float %836 %781
+        %838 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_37
+        %839 = OpLoad %v4float %838
+        %840 = OpFMul %v4float %839 %786
+        %841 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_38
+        %842 = OpLoad %v4float %841
+        %843 = OpFMul %v4float %842 %791
+        %844 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_39
+        %845 = OpLoad %v4float %844
+        %846 = OpFAdd %v4float %845 %796
+        %847 = OpVectorShuffle %v3float %834 %834 0 1 2
+        %848 = OpCompositeExtract %float %834 3
+        %849 = OpCompositeConstruct %v3float %848 %848 %848
+        %850 = OpFMul %v3float %847 %849
+        %851 = OpExtInst %v3float %1 FMix %798 %771 %850
+        %852 = OpExtInst %v3float %1 FMax %138 %851
+        %853 = OpFMul %v3float %852 %330
+        %854 = OpVectorShuffle %v3float %837 %837 0 1 2
+        %855 = OpCompositeExtract %float %837 3
+        %856 = OpCompositeConstruct %v3float %855 %855 %855
+        %857 = OpFMul %v3float %854 %856
+        %858 = OpExtInst %v3float %1 Pow %853 %857
+        %859 = OpFMul %v3float %858 %206
+        %860 = OpVectorShuffle %v3float %840 %840 0 1 2
+        %861 = OpCompositeExtract %float %840 3
+        %862 = OpCompositeConstruct %v3float %861 %861 %861
+        %863 = OpFMul %v3float %860 %862
+        %864 = OpFDiv %v3float %141 %863
+        %865 = OpExtInst %v3float %1 Pow %859 %864
+        %866 = OpVectorShuffle %v3float %843 %843 0 1 2
+        %867 = OpCompositeExtract %float %843 3
+        %868 = OpCompositeConstruct %v3float %867 %867 %867
+        %869 = OpFMul %v3float %866 %868
+        %870 = OpFMul %v3float %865 %869
+        %871 = OpVectorShuffle %v3float %846 %846 0 1 2
+        %872 = OpCompositeExtract %float %846 3
+        %873 = OpCompositeConstruct %v3float %872 %872 %872
+        %874 = OpFAdd %v3float %871 %873
+        %875 = OpFAdd %v3float %870 %874
+        %876 = OpAccessChain %_ptr_Uniform_float %_Globals %int_41
+        %877 = OpLoad %float %876
+        %878 = OpExtInst %float %1 SmoothStep %877 %float_1 %772
+        %879 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_30
+        %880 = OpLoad %v4float %879
+        %881 = OpFMul %v4float %880 %776
+        %882 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_31
+        %883 = OpLoad %v4float %882
+        %884 = OpFMul %v4float %883 %781
+        %885 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_32
+        %886 = OpLoad %v4float %885
+        %887 = OpFMul %v4float %886 %786
+        %888 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_33
+        %889 = OpLoad %v4float %888
+        %890 = OpFMul %v4float %889 %791
+        %891 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_34
+        %892 = OpLoad %v4float %891
+        %893 = OpFAdd %v4float %892 %796
+        %894 = OpVectorShuffle %v3float %881 %881 0 1 2
+        %895 = OpCompositeExtract %float %881 3
+        %896 = OpCompositeConstruct %v3float %895 %895 %895
+        %897 = OpFMul %v3float %894 %896
+        %898 = OpExtInst %v3float %1 FMix %798 %771 %897
+        %899 = OpExtInst %v3float %1 FMax %138 %898
+        %900 = OpFMul %v3float %899 %330
+        %901 = OpVectorShuffle %v3float %884 %884 0 1 2
+        %902 = OpCompositeExtract %float %884 3
+        %903 = OpCompositeConstruct %v3float %902 %902 %902
+        %904 = OpFMul %v3float %901 %903
+        %905 = OpExtInst %v3float %1 Pow %900 %904
+        %906 = OpFMul %v3float %905 %206
+        %907 = OpVectorShuffle %v3float %887 %887 0 1 2
+        %908 = OpCompositeExtract %float %887 3
+        %909 = OpCompositeConstruct %v3float %908 %908 %908
+        %910 = OpFMul %v3float %907 %909
+        %911 = OpFDiv %v3float %141 %910
+        %912 = OpExtInst %v3float %1 Pow %906 %911
+        %913 = OpVectorShuffle %v3float %890 %890 0 1 2
+        %914 = OpCompositeExtract %float %890 3
+        %915 = OpCompositeConstruct %v3float %914 %914 %914
+        %916 = OpFMul %v3float %913 %915
+        %917 = OpFMul %v3float %912 %916
+        %918 = OpVectorShuffle %v3float %893 %893 0 1 2
+        %919 = OpCompositeExtract %float %893 3
+        %920 = OpCompositeConstruct %v3float %919 %919 %919
+        %921 = OpFAdd %v3float %918 %920
+        %922 = OpFAdd %v3float %917 %921
+        %923 = OpFSub %float %830 %878
+        %924 = OpCompositeConstruct %v3float %831 %831 %831
+        %925 = OpFMul %v3float %827 %924
+        %926 = OpCompositeConstruct %v3float %923 %923 %923
+        %927 = OpFMul %v3float %922 %926
+        %928 = OpFAdd %v3float %925 %927
+        %929 = OpCompositeConstruct %v3float %878 %878 %878
+        %930 = OpFMul %v3float %875 %929
+        %931 = OpFAdd %v3float %928 %930
+        %932 = OpVectorTimesMatrix %v3float %931 %575
+        %933 = OpMatrixTimesMatrix %mat3v3float %577 %488
+        %934 = OpMatrixTimesMatrix %mat3v3float %933 %576
+        %935 = OpMatrixTimesMatrix %mat3v3float %577 %492
+        %936 = OpMatrixTimesMatrix %mat3v3float %935 %576
+        %937 = OpVectorTimesMatrix %v3float %931 %934
+        %938 = OpAccessChain %_ptr_Uniform_float %_Globals %int_44
+        %939 = OpLoad %float %938
+        %940 = OpCompositeConstruct %v3float %939 %939 %939
+        %941 = OpExtInst %v3float %1 FMix %931 %937 %940
+        %942 = OpVectorTimesMatrix %v3float %941 %577
+        %943 = OpCompositeExtract %float %942 0
+        %944 = OpCompositeExtract %float %942 1
+        %945 = OpExtInst %float %1 FMin %943 %944
+        %946 = OpCompositeExtract %float %942 2
+        %947 = OpExtInst %float %1 FMin %945 %946
+        %948 = OpExtInst %float %1 FMax %943 %944
+        %949 = OpExtInst %float %1 FMax %948 %946
+        %950 = OpExtInst %float %1 FMax %949 %float_1_00000001en10
+        %951 = OpExtInst %float %1 FMax %947 %float_1_00000001en10
+        %952 = OpFSub %float %950 %951
+        %953 = OpExtInst %float %1 FMax %949 %float_0_00999999978
+        %954 = OpFDiv %float %952 %953
+        %955 = OpFSub %float %946 %944
+        %956 = OpFMul %float %946 %955
+        %957 = OpFSub %float %944 %943
+        %958 = OpFMul %float %944 %957
+        %959 = OpFAdd %float %956 %958
+        %960 = OpFSub %float %943 %946
+        %961 = OpFMul %float %943 %960
+        %962 = OpFAdd %float %959 %961
+        %963 = OpExtInst %float %1 Sqrt %962
+        %964 = OpFAdd %float %946 %944
+        %965 = OpFAdd %float %964 %943
+        %966 = OpFMul %float %float_1_75 %963
+        %967 = OpFAdd %float %965 %966
+        %968 = OpFMul %float %967 %float_0_333333343
+        %969 = OpFSub %float %954 %float_0_400000006
+        %970 = OpFMul %float %969 %float_5
+        %971 = OpFMul %float %969 %float_2_5
+        %972 = OpExtInst %float %1 FAbs %971
+        %973 = OpFSub %float %float_1 %972
+        %974 = OpExtInst %float %1 FMax %973 %float_0
+        %975 = OpExtInst %float %1 FSign %970
+        %976 = OpConvertFToS %int %975
+        %977 = OpConvertSToF %float %976
+        %978 = OpFMul %float %974 %974
+        %979 = OpFSub %float %float_1 %978
+        %980 = OpFMul %float %977 %979
+        %981 = OpFAdd %float %float_1 %980
+        %982 = OpFMul %float %981 %float_0_0250000004
+        %983 = OpFOrdLessThanEqual %bool %968 %float_0_0533333346
+               OpSelectionMerge %984 None
+               OpBranchConditional %983 %985 %986
+        %986 = OpLabel
+        %987 = OpFOrdGreaterThanEqual %bool %968 %float_0_159999996
+               OpSelectionMerge %988 None
+               OpBranchConditional %987 %989 %990
+        %990 = OpLabel
+        %991 = OpFDiv %float %float_0_239999995 %967
+        %992 = OpFSub %float %991 %float_0_5
+        %993 = OpFMul %float %982 %992
+               OpBranch %988
+        %989 = OpLabel
+               OpBranch %988
+        %988 = OpLabel
+        %994 = OpPhi %float %993 %990 %float_0 %989
+               OpBranch %984
+        %985 = OpLabel
+               OpBranch %984
+        %984 = OpLabel
+        %995 = OpPhi %float %994 %988 %982 %985
+        %996 = OpFAdd %float %float_1 %995
+        %997 = OpCompositeConstruct %v3float %996 %996 %996
+        %998 = OpFMul %v3float %942 %997
+        %999 = OpCompositeExtract %float %998 0
+       %1000 = OpCompositeExtract %float %998 1
+       %1001 = OpFOrdEqual %bool %999 %1000
+       %1002 = OpCompositeExtract %float %998 2
+       %1003 = OpFOrdEqual %bool %1000 %1002
+       %1004 = OpLogicalAnd %bool %1001 %1003
+               OpSelectionMerge %1005 None
+               OpBranchConditional %1004 %1006 %1007
+       %1007 = OpLabel
+       %1008 = OpExtInst %float %1 Sqrt %float_3
+       %1009 = OpFSub %float %1000 %1002
+       %1010 = OpFMul %float %1008 %1009
+       %1011 = OpFMul %float %float_2 %999
+       %1012 = OpFSub %float %1011 %1000
+       %1013 = OpFSub %float %1012 %1002
+       %1014 = OpExtInst %float %1 Atan2 %1010 %1013
+       %1015 = OpFMul %float %float_57_2957764 %1014
+               OpBranch %1005
+       %1006 = OpLabel
+               OpBranch %1005
+       %1005 = OpLabel
+       %1016 = OpPhi %float %1015 %1007 %float_0 %1006
+       %1017 = OpFOrdLessThan %bool %1016 %float_0
+               OpSelectionMerge %1018 None
+               OpBranchConditional %1017 %1019 %1018
+       %1019 = OpLabel
+       %1020 = OpFAdd %float %1016 %float_360
+               OpBranch %1018
+       %1018 = OpLabel
+       %1021 = OpPhi %float %1016 %1005 %1020 %1019
+       %1022 = OpExtInst %float %1 FClamp %1021 %float_0 %float_360
+       %1023 = OpFOrdGreaterThan %bool %1022 %float_180
+               OpSelectionMerge %1024 None
+               OpBranchConditional %1023 %1025 %1024
+       %1025 = OpLabel
+       %1026 = OpFSub %float %1022 %float_360
+               OpBranch %1024
+       %1024 = OpLabel
+       %1027 = OpPhi %float %1022 %1018 %1026 %1025
+       %1028 = OpFMul %float %1027 %float_0_0148148146
+       %1029 = OpExtInst %float %1 FAbs %1028
+       %1030 = OpFSub %float %float_1 %1029
+       %1031 = OpExtInst %float %1 SmoothStep %float_0 %float_1 %1030
+       %1032 = OpFMul %float %1031 %1031
+       %1033 = OpFMul %float %1032 %954
+       %1034 = OpFSub %float %float_0_0299999993 %999
+       %1035 = OpFMul %float %1033 %1034
+       %1036 = OpFMul %float %1035 %float_0_180000007
+       %1037 = OpFAdd %float %999 %1036
+       %1038 = OpCompositeInsert %v3float %1037 %998 0
+       %1039 = OpVectorTimesMatrix %v3float %1038 %434
+       %1040 = OpExtInst %v3float %1 FMax %138 %1039
+       %1041 = OpDot %float %1040 %73
+       %1042 = OpCompositeConstruct %v3float %1041 %1041 %1041
+       %1043 = OpExtInst %v3float %1 FMix %1042 %1040 %241
+       %1044 = OpAccessChain %_ptr_Uniform_float %_Globals %int_13
+       %1045 = OpLoad %float %1044
+       %1046 = OpFAdd %float %float_1 %1045
+       %1047 = OpAccessChain %_ptr_Uniform_float %_Globals %int_11
+       %1048 = OpLoad %float %1047
+       %1049 = OpFSub %float %1046 %1048
+       %1050 = OpAccessChain %_ptr_Uniform_float %_Globals %int_14
+       %1051 = OpLoad %float %1050
+       %1052 = OpFAdd %float %float_1 %1051
+       %1053 = OpAccessChain %_ptr_Uniform_float %_Globals %int_12
+       %1054 = OpLoad %float %1053
+       %1055 = OpFSub %float %1052 %1054
+       %1056 = OpFOrdGreaterThan %bool %1048 %float_0_800000012
+               OpSelectionMerge %1057 None
+               OpBranchConditional %1056 %1058 %1059
+       %1059 = OpLabel
+       %1060 = OpFAdd %float %float_0_180000007 %1045
+       %1061 = OpFDiv %float %1060 %1049
+       %1062 = OpExtInst %float %1 Log %float_0_180000007
+       %1063 = OpExtInst %float %1 Log %float_10
+       %1064 = OpFDiv %float %1062 %1063
+       %1065 = OpFSub %float %float_2 %1061
+       %1066 = OpFDiv %float %1061 %1065
+       %1067 = OpExtInst %float %1 Log %1066
+       %1068 = OpFMul %float %float_0_5 %1067
+       %1069 = OpAccessChain %_ptr_Uniform_float %_Globals %int_10
+       %1070 = OpLoad %float %1069
+       %1071 = OpFDiv %float %1049 %1070
+       %1072 = OpFMul %float %1068 %1071
+       %1073 = OpFSub %float %1064 %1072
+               OpBranch %1057
+       %1058 = OpLabel
+       %1074 = OpFSub %float %float_0_819999993 %1048
+       %1075 = OpAccessChain %_ptr_Uniform_float %_Globals %int_10
+       %1076 = OpLoad %float %1075
+       %1077 = OpFDiv %float %1074 %1076
+       %1078 = OpExtInst %float %1 Log %float_0_180000007
+       %1079 = OpExtInst %float %1 Log %float_10
+       %1080 = OpFDiv %float %1078 %1079
+       %1081 = OpFAdd %float %1077 %1080
+               OpBranch %1057
+       %1057 = OpLabel
+       %1082 = OpPhi %float %1073 %1059 %1081 %1058
+       %1083 = OpFSub %float %float_1 %1048
+       %1084 = OpAccessChain %_ptr_Uniform_float %_Globals %int_10
+       %1085 = OpLoad %float %1084
+       %1086 = OpFDiv %float %1083 %1085
+       %1087 = OpFSub %float %1086 %1082
+       %1088 = OpFDiv %float %1054 %1085
+       %1089 = OpFSub %float %1088 %1087
+       %1090 = OpExtInst %v3float %1 Log %1043
+       %1091 = OpExtInst %float %1 Log %float_10
+       %1092 = OpCompositeConstruct %v3float %1091 %1091 %1091
+       %1093 = OpFDiv %v3float %1090 %1092
+       %1094 = OpCompositeConstruct %v3float %1085 %1085 %1085
+       %1095 = OpCompositeConstruct %v3float %1087 %1087 %1087
+       %1096 = OpFAdd %v3float %1093 %1095
+       %1097 = OpFMul %v3float %1094 %1096
+       %1098 = OpFNegate %float %1045
+       %1099 = OpCompositeConstruct %v3float %1098 %1098 %1098
+       %1100 = OpFMul %float %float_2 %1049
+       %1101 = OpCompositeConstruct %v3float %1100 %1100 %1100
+       %1102 = OpFMul %float %float_n2 %1085
+       %1103 = OpFDiv %float %1102 %1049
+       %1104 = OpCompositeConstruct %v3float %1103 %1103 %1103
+       %1105 = OpCompositeConstruct %v3float %1082 %1082 %1082
+       %1106 = OpFSub %v3float %1093 %1105
+       %1107 = OpFMul %v3float %1104 %1106
+       %1108 = OpExtInst %v3float %1 Exp %1107
+       %1109 = OpFAdd %v3float %141 %1108
+       %1110 = OpFDiv %v3float %1101 %1109
+       %1111 = OpFAdd %v3float %1099 %1110
+       %1112 = OpCompositeConstruct %v3float %1052 %1052 %1052
+       %1113 = OpFMul %float %float_2 %1055
+       %1114 = OpCompositeConstruct %v3float %1113 %1113 %1113
+       %1115 = OpFMul %float %float_2 %1085
+       %1116 = OpFDiv %float %1115 %1055
+       %1117 = OpCompositeConstruct %v3float %1116 %1116 %1116
+       %1118 = OpCompositeConstruct %v3float %1089 %1089 %1089
+       %1119 = OpFSub %v3float %1093 %1118
+       %1120 = OpFMul %v3float %1117 %1119
+       %1121 = OpExtInst %v3float %1 Exp %1120
+       %1122 = OpFAdd %v3float %141 %1121
+       %1123 = OpFDiv %v3float %1114 %1122
+       %1124 = OpFSub %v3float %1112 %1123
+       %1125 = OpFOrdLessThan %v3bool %1093 %1105
+       %1126 = OpSelect %v3float %1125 %1111 %1097
+       %1127 = OpFOrdGreaterThan %v3bool %1093 %1118
+       %1128 = OpSelect %v3float %1127 %1124 %1097
+       %1129 = OpFSub %float %1089 %1082
+       %1130 = OpCompositeConstruct %v3float %1129 %1129 %1129
+       %1131 = OpFDiv %v3float %1106 %1130
+       %1132 = OpExtInst %v3float %1 FClamp %1131 %138 %141
+       %1133 = OpFOrdLessThan %bool %1089 %1082
+       %1134 = OpFSub %v3float %141 %1132
+       %1135 = OpCompositeConstruct %v3bool %1133 %1133 %1133
+       %1136 = OpSelect %v3float %1135 %1134 %1132
+       %1137 = OpFMul %v3float %252 %1136
+       %1138 = OpFSub %v3float %251 %1137
+       %1139 = OpFMul %v3float %1138 %1136
+       %1140 = OpFMul %v3float %1139 %1136
+       %1141 = OpExtInst %v3float %1 FMix %1126 %1128 %1140
+       %1142 = OpDot %float %1141 %73
+       %1143 = OpCompositeConstruct %v3float %1142 %1142 %1142
+       %1144 = OpExtInst %v3float %1 FMix %1143 %1141 %254
+       %1145 = OpExtInst %v3float %1 FMax %138 %1144
+       %1146 = OpVectorTimesMatrix %v3float %1145 %936
+       %1147 = OpExtInst %v3float %1 FMix %1145 %1146 %940
+       %1148 = OpVectorTimesMatrix %v3float %1147 %575
+       %1149 = OpExtInst %v3float %1 FMax %138 %1148
+       %1150 = OpFOrdEqual %bool %746 %float_0
+               OpSelectionMerge %1151 DontFlatten
+               OpBranchConditional %1150 %1152 %1151
+       %1152 = OpLabel
+       %1153 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_2
+       %1154 = OpLoad %v4float %1153
+       %1155 = OpVectorShuffle %v3float %1154 %1154 0 1 2
+       %1156 = OpDot %float %932 %1155
+       %1157 = OpCompositeInsert %v3float %1156 %525 0
+       %1158 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_3
+       %1159 = OpLoad %v4float %1158
+       %1160 = OpVectorShuffle %v3float %1159 %1159 0 1 2
+       %1161 = OpDot %float %932 %1160
+       %1162 = OpCompositeInsert %v3float %1161 %1157 1
+       %1163 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_4
+       %1164 = OpLoad %v4float %1163
+       %1165 = OpVectorShuffle %v3float %1164 %1164 0 1 2
+       %1166 = OpDot %float %932 %1165
+       %1167 = OpCompositeInsert %v3float %1166 %1162 2
+       %1168 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_8
+       %1169 = OpLoad %v4float %1168
+       %1170 = OpVectorShuffle %v3float %1169 %1169 0 1 2
+       %1171 = OpLoad %v4float %744
+       %1172 = OpVectorShuffle %v3float %1171 %1171 0 1 2
+       %1173 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_7
+       %1174 = OpLoad %v4float %1173
+       %1175 = OpVectorShuffle %v3float %1174 %1174 0 1 2
+       %1176 = OpDot %float %932 %1175
+       %1177 = OpFAdd %float %1176 %float_1
+       %1178 = OpFDiv %float %float_1 %1177
+       %1179 = OpCompositeConstruct %v3float %1178 %1178 %1178
+       %1180 = OpFMul %v3float %1172 %1179
+       %1181 = OpFAdd %v3float %1170 %1180
+       %1182 = OpFMul %v3float %1167 %1181
+       %1183 = OpExtInst %v3float %1 FMax %138 %1182
+       %1184 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_5
+       %1185 = OpLoad %v4float %1184
+       %1186 = OpVectorShuffle %v3float %1185 %1185 0 0 0
+       %1187 = OpFSub %v3float %1186 %1183
+       %1188 = OpExtInst %v3float %1 FMax %138 %1187
+       %1189 = OpVectorShuffle %v3float %1185 %1185 2 2 2
+       %1190 = OpExtInst %v3float %1 FMax %1183 %1189
+       %1191 = OpExtInst %v3float %1 FClamp %1183 %1186 %1189
+       %1192 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_6
+       %1193 = OpLoad %v4float %1192
+       %1194 = OpVectorShuffle %v3float %1193 %1193 0 0 0
+       %1195 = OpFMul %v3float %1190 %1194
+       %1196 = OpVectorShuffle %v3float %1193 %1193 1 1 1
+       %1197 = OpFAdd %v3float %1195 %1196
+       %1198 = OpVectorShuffle %v3float %1185 %1185 3 3 3
+       %1199 = OpFAdd %v3float %1190 %1198
+       %1200 = OpFDiv %v3float %141 %1199
+       %1201 = OpFMul %v3float %1197 %1200
+       %1202 = OpVectorShuffle %v3float %1164 %1164 3 3 3
+       %1203 = OpFMul %v3float %1191 %1202
+       %1204 = OpVectorShuffle %v3float %1154 %1154 3 3 3
+       %1205 = OpFMul %v3float %1188 %1204
+       %1206 = OpVectorShuffle %v3float %1185 %1185 1 1 1
+       %1207 = OpFAdd %v3float %1188 %1206
+       %1208 = OpFDiv %v3float %141 %1207
+       %1209 = OpFMul %v3float %1205 %1208
+       %1210 = OpVectorShuffle %v3float %1159 %1159 3 3 3
+       %1211 = OpFAdd %v3float %1209 %1210
+       %1212 = OpFAdd %v3float %1203 %1211
+       %1213 = OpFAdd %v3float %1201 %1212
+       %1214 = OpFSub %v3float %1213 %261
+               OpBranch %1151
+       %1151 = OpLabel
+       %1215 = OpPhi %v3float %1149 %1057 %1214 %1152
+       %1216 = OpExtInst %v3float %1 FClamp %1215 %138 %141
+       %1217 = OpCompositeExtract %float %1216 0
+               OpBranch %1218
+       %1218 = OpLabel
+               OpLoopMerge %1219 %1220 None
+               OpBranch %1221
+       %1221 = OpLabel
+       %1222 = OpFOrdLessThan %bool %1217 %float_0_00313066994
+               OpSelectionMerge %1223 None
+               OpBranchConditional %1222 %1224 %1223
+       %1224 = OpLabel
+       %1225 = OpFMul %float %1217 %float_12_9200001
+               OpBranch %1219
+       %1223 = OpLabel
+       %1226 = OpExtInst %float %1 Pow %1217 %float_0_416666657
+       %1227 = OpFMul %float %1226 %float_1_05499995
+       %1228 = OpFSub %float %1227 %float_0_0549999997
+               OpBranch %1219
+       %1220 = OpLabel
+               OpBranch %1218
+       %1219 = OpLabel
+       %1229 = OpPhi %float %1225 %1224 %1228 %1223
+       %1230 = OpCompositeExtract %float %1216 1
+               OpBranch %1231
+       %1231 = OpLabel
+               OpLoopMerge %1232 %1233 None
+               OpBranch %1234
+       %1234 = OpLabel
+       %1235 = OpFOrdLessThan %bool %1230 %float_0_00313066994
+               OpSelectionMerge %1236 None
+               OpBranchConditional %1235 %1237 %1236
+       %1237 = OpLabel
+       %1238 = OpFMul %float %1230 %float_12_9200001
+               OpBranch %1232
+       %1236 = OpLabel
+       %1239 = OpExtInst %float %1 Pow %1230 %float_0_416666657
+       %1240 = OpFMul %float %1239 %float_1_05499995
+       %1241 = OpFSub %float %1240 %float_0_0549999997
+               OpBranch %1232
+       %1233 = OpLabel
+               OpBranch %1231
+       %1232 = OpLabel
+       %1242 = OpPhi %float %1238 %1237 %1241 %1236
+       %1243 = OpCompositeExtract %float %1216 2
+               OpBranch %1244
+       %1244 = OpLabel
+               OpLoopMerge %1245 %1246 None
+               OpBranch %1247
+       %1247 = OpLabel
+       %1248 = OpFOrdLessThan %bool %1243 %float_0_00313066994
+               OpSelectionMerge %1249 None
+               OpBranchConditional %1248 %1250 %1249
+       %1250 = OpLabel
+       %1251 = OpFMul %float %1243 %float_12_9200001
+               OpBranch %1245
+       %1249 = OpLabel
+       %1252 = OpExtInst %float %1 Pow %1243 %float_0_416666657
+       %1253 = OpFMul %float %1252 %float_1_05499995
+       %1254 = OpFSub %float %1253 %float_0_0549999997
+               OpBranch %1245
+       %1246 = OpLabel
+               OpBranch %1244
+       %1245 = OpLabel
+       %1255 = OpPhi %float %1251 %1250 %1254 %1249
+       %1256 = OpCompositeConstruct %v3float %1229 %1242 %1255
+       %1257 = OpFMul %v3float %1256 %173
+       %1258 = OpFAdd %v3float %1257 %175
+       %1259 = OpAccessChain %_ptr_Uniform_float %_Globals %int_15 %int_0
+       %1260 = OpLoad %float %1259
+       %1261 = OpCompositeConstruct %v3float %1260 %1260 %1260
+       %1262 = OpFMul %v3float %1261 %1256
+       %1263 = OpAccessChain %_ptr_Uniform_float %_Globals %int_15 %int_1
+       %1264 = OpLoad %float %1263
+       %1265 = OpCompositeConstruct %v3float %1264 %1264 %1264
+       %1266 = OpLoad %type_2d_image %Texture1
+       %1267 = OpLoad %type_sampler %Texture1Sampler
+       %1268 = OpCompositeExtract %float %1258 2
+       %1269 = OpFMul %float %1268 %float_16
+       %1270 = OpFSub %float %1269 %float_0_5
+       %1271 = OpExtInst %float %1 Floor %1270
+       %1272 = OpFSub %float %1270 %1271
+       %1273 = OpCompositeExtract %float %1258 0
+       %1274 = OpFAdd %float %1273 %1271
+       %1275 = OpFMul %float %1274 %float_0_0625
+       %1276 = OpCompositeExtract %float %1258 1
+       %1277 = OpCompositeConstruct %v2float %1275 %1276
+       %1278 = OpSampledImage %type_sampled_image %1266 %1267
+       %1279 = OpImageSampleImplicitLod %v4float %1278 %1277 None
+       %1280 = OpFAdd %float %1275 %float_0_0625
+       %1281 = OpCompositeConstruct %v2float %1280 %1276
+       %1282 = OpSampledImage %type_sampled_image %1266 %1267
+       %1283 = OpImageSampleImplicitLod %v4float %1282 %1281 None
+       %1284 = OpCompositeConstruct %v4float %1272 %1272 %1272 %1272
+       %1285 = OpExtInst %v4float %1 FMix %1279 %1283 %1284
+       %1286 = OpVectorShuffle %v3float %1285 %1285 0 1 2
+       %1287 = OpFMul %v3float %1265 %1286
+       %1288 = OpFAdd %v3float %1262 %1287
+       %1289 = OpExtInst %v3float %1 FMax %263 %1288
+       %1290 = OpFOrdGreaterThan %v3bool %1289 %265
+       %1291 = OpFMul %v3float %1289 %267
+       %1292 = OpFAdd %v3float %1291 %269
+       %1293 = OpExtInst %v3float %1 Pow %1292 %271
+       %1294 = OpFMul %v3float %1289 %273
+       %1295 = OpSelect %v3float %1290 %1293 %1294
+       %1296 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_0
+       %1297 = OpLoad %float %1296
+       %1298 = OpCompositeConstruct %v3float %1297 %1297 %1297
+       %1299 = OpFMul %v3float %1295 %1295
+       %1300 = OpFMul %v3float %1298 %1299
+       %1301 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_1
+       %1302 = OpLoad %float %1301
+       %1303 = OpCompositeConstruct %v3float %1302 %1302 %1302
+       %1304 = OpFMul %v3float %1303 %1295
+       %1305 = OpFAdd %v3float %1300 %1304
+       %1306 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_2
+       %1307 = OpLoad %float %1306
+       %1308 = OpCompositeConstruct %v3float %1307 %1307 %1307
+       %1309 = OpFAdd %v3float %1305 %1308
+       %1310 = OpAccessChain %_ptr_Uniform_v3float %_Globals %int_16
+       %1311 = OpLoad %v3float %1310
+       %1312 = OpFMul %v3float %1309 %1311
+       %1313 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_17
+       %1314 = OpLoad %v4float %1313
+       %1315 = OpVectorShuffle %v3float %1314 %1314 0 1 2
+       %1316 = OpAccessChain %_ptr_Uniform_float %_Globals %int_17 %int_3
+       %1317 = OpLoad %float %1316
+       %1318 = OpCompositeConstruct %v3float %1317 %1317 %1317
+       %1319 = OpExtInst %v3float %1 FMix %1312 %1315 %1318
+       %1320 = OpExtInst %v3float %1 FMax %138 %1319
+       %1321 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %int_1
+       %1322 = OpLoad %float %1321
+       %1323 = OpCompositeConstruct %v3float %1322 %1322 %1322
+       %1324 = OpExtInst %v3float %1 Pow %1320 %1323
+       %1325 = OpIEqual %bool %605 %uint_0
+               OpSelectionMerge %1326 DontFlatten
+               OpBranchConditional %1325 %1327 %1328
+       %1328 = OpLabel
+       %1329 = OpIEqual %bool %605 %uint_1
+               OpSelectionMerge %1330 None
+               OpBranchConditional %1329 %1331 %1332
+       %1332 = OpLabel
+       %1333 = OpIEqual %bool %605 %uint_3
+       %1334 = OpIEqual %bool %605 %uint_5
+       %1335 = OpLogicalOr %bool %1333 %1334
+               OpSelectionMerge %1336 None
+               OpBranchConditional %1335 %1337 %1338
+       %1338 = OpLabel
+       %1339 = OpIEqual %bool %605 %uint_4
+       %1340 = OpIEqual %bool %605 %uint_6
+       %1341 = OpLogicalOr %bool %1339 %1340
+               OpSelectionMerge %1342 None
+               OpBranchConditional %1341 %1343 %1344
+       %1344 = OpLabel
+       %1345 = OpIEqual %bool %605 %uint_7
+               OpSelectionMerge %1346 None
+               OpBranchConditional %1345 %1347 %1348
+       %1348 = OpLabel
+       %1349 = OpVectorTimesMatrix %v3float %1324 %573
+       %1350 = OpVectorTimesMatrix %v3float %1349 %602
+       %1351 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %int_2
+       %1352 = OpLoad %float %1351
+       %1353 = OpCompositeConstruct %v3float %1352 %1352 %1352
+       %1354 = OpExtInst %v3float %1 Pow %1350 %1353
+               OpBranch %1346
+       %1347 = OpLabel
+       %1355 = OpVectorTimesMatrix %v3float %932 %573
+       %1356 = OpVectorTimesMatrix %v3float %1355 %602
+       %1357 = OpFMul %v3float %1356 %519
+       %1358 = OpExtInst %v3float %1 Pow %1357 %286
+       %1359 = OpFMul %v3float %196 %1358
+       %1360 = OpFAdd %v3float %195 %1359
+       %1361 = OpFMul %v3float %197 %1358
+       %1362 = OpFAdd %v3float %141 %1361
+       %1363 = OpFDiv %v3float %141 %1362
+       %1364 = OpFMul %v3float %1360 %1363
+       %1365 = OpExtInst %v3float %1 Pow %1364 %287
+               OpBranch %1346
+       %1346 = OpLabel
+       %1366 = OpPhi %v3float %1354 %1348 %1365 %1347
+               OpBranch %1342
+       %1343 = OpLabel
+       %1367 = OpMatrixTimesMatrix %mat3v3float %572 %423
+       %1368 = OpFMul %v3float %932 %285
+       %1369 = OpVectorTimesMatrix %v3float %1368 %1367
+       %1370 = OpCompositeExtract %float %1369 0
+       %1371 = OpCompositeExtract %float %1369 1
+       %1372 = OpExtInst %float %1 FMin %1370 %1371
+       %1373 = OpCompositeExtract %float %1369 2
+       %1374 = OpExtInst %float %1 FMin %1372 %1373
+       %1375 = OpExtInst %float %1 FMax %1370 %1371
+       %1376 = OpExtInst %float %1 FMax %1375 %1373
+       %1377 = OpExtInst %float %1 FMax %1376 %float_1_00000001en10
+       %1378 = OpExtInst %float %1 FMax %1374 %float_1_00000001en10
+       %1379 = OpFSub %float %1377 %1378
+       %1380 = OpExtInst %float %1 FMax %1376 %float_0_00999999978
+       %1381 = OpFDiv %float %1379 %1380
+       %1382 = OpFSub %float %1373 %1371
+       %1383 = OpFMul %float %1373 %1382
+       %1384 = OpFSub %float %1371 %1370
+       %1385 = OpFMul %float %1371 %1384
+       %1386 = OpFAdd %float %1383 %1385
+       %1387 = OpFSub %float %1370 %1373
+       %1388 = OpFMul %float %1370 %1387
+       %1389 = OpFAdd %float %1386 %1388
+       %1390 = OpExtInst %float %1 Sqrt %1389
+       %1391 = OpFAdd %float %1373 %1371
+       %1392 = OpFAdd %float %1391 %1370
+       %1393 = OpFMul %float %float_1_75 %1390
+       %1394 = OpFAdd %float %1392 %1393
+       %1395 = OpFMul %float %1394 %float_0_333333343
+       %1396 = OpFSub %float %1381 %float_0_400000006
+       %1397 = OpFMul %float %1396 %float_5
+       %1398 = OpFMul %float %1396 %float_2_5
+       %1399 = OpExtInst %float %1 FAbs %1398
+       %1400 = OpFSub %float %float_1 %1399
+       %1401 = OpExtInst %float %1 FMax %1400 %float_0
+       %1402 = OpExtInst %float %1 FSign %1397
+       %1403 = OpConvertFToS %int %1402
+       %1404 = OpConvertSToF %float %1403
+       %1405 = OpFMul %float %1401 %1401
+       %1406 = OpFSub %float %float_1 %1405
+       %1407 = OpFMul %float %1404 %1406
+       %1408 = OpFAdd %float %float_1 %1407
+       %1409 = OpFMul %float %1408 %float_0_0250000004
+       %1410 = OpFOrdLessThanEqual %bool %1395 %float_0_0533333346
+               OpSelectionMerge %1411 None
+               OpBranchConditional %1410 %1412 %1413
+       %1413 = OpLabel
+       %1414 = OpFOrdGreaterThanEqual %bool %1395 %float_0_159999996
+               OpSelectionMerge %1415 None
+               OpBranchConditional %1414 %1416 %1417
+       %1417 = OpLabel
+       %1418 = OpFDiv %float %float_0_239999995 %1394
+       %1419 = OpFSub %float %1418 %float_0_5
+       %1420 = OpFMul %float %1409 %1419
+               OpBranch %1415
+       %1416 = OpLabel
+               OpBranch %1415
+       %1415 = OpLabel
+       %1421 = OpPhi %float %1420 %1417 %float_0 %1416
+               OpBranch %1411
+       %1412 = OpLabel
+               OpBranch %1411
+       %1411 = OpLabel
+       %1422 = OpPhi %float %1421 %1415 %1409 %1412
+       %1423 = OpFAdd %float %float_1 %1422
+       %1424 = OpCompositeConstruct %v3float %1423 %1423 %1423
+       %1425 = OpFMul %v3float %1369 %1424
+       %1426 = OpCompositeExtract %float %1425 0
+       %1427 = OpCompositeExtract %float %1425 1
+       %1428 = OpFOrdEqual %bool %1426 %1427
+       %1429 = OpCompositeExtract %float %1425 2
+       %1430 = OpFOrdEqual %bool %1427 %1429
+       %1431 = OpLogicalAnd %bool %1428 %1430
+               OpSelectionMerge %1432 None
+               OpBranchConditional %1431 %1433 %1434
+       %1434 = OpLabel
+       %1435 = OpExtInst %float %1 Sqrt %float_3
+       %1436 = OpFSub %float %1427 %1429
+       %1437 = OpFMul %float %1435 %1436
+       %1438 = OpFMul %float %float_2 %1426
+       %1439 = OpFSub %float %1438 %1427
+       %1440 = OpFSub %float %1439 %1429
+       %1441 = OpExtInst %float %1 Atan2 %1437 %1440
+       %1442 = OpFMul %float %float_57_2957764 %1441
+               OpBranch %1432
+       %1433 = OpLabel
+               OpBranch %1432
+       %1432 = OpLabel
+       %1443 = OpPhi %float %1442 %1434 %float_0 %1433
+       %1444 = OpFOrdLessThan %bool %1443 %float_0
+               OpSelectionMerge %1445 None
+               OpBranchConditional %1444 %1446 %1445
+       %1446 = OpLabel
+       %1447 = OpFAdd %float %1443 %float_360
+               OpBranch %1445
+       %1445 = OpLabel
+       %1448 = OpPhi %float %1443 %1432 %1447 %1446
+       %1449 = OpExtInst %float %1 FClamp %1448 %float_0 %float_360
+       %1450 = OpFOrdGreaterThan %bool %1449 %float_180
+               OpSelectionMerge %1451 None
+               OpBranchConditional %1450 %1452 %1451
+       %1452 = OpLabel
+       %1453 = OpFSub %float %1449 %float_360
+               OpBranch %1451
+       %1451 = OpLabel
+       %1454 = OpPhi %float %1449 %1445 %1453 %1452
+       %1455 = OpFOrdGreaterThan %bool %1454 %float_n67_5
+       %1456 = OpFOrdLessThan %bool %1454 %float_67_5
+       %1457 = OpLogicalAnd %bool %1455 %1456
+               OpSelectionMerge %1458 None
+               OpBranchConditional %1457 %1459 %1458
+       %1459 = OpLabel
+       %1460 = OpFSub %float %1454 %float_n67_5
+       %1461 = OpFMul %float %1460 %float_0_0296296291
+       %1462 = OpConvertFToS %int %1461
+       %1463 = OpConvertSToF %float %1462
+       %1464 = OpFSub %float %1461 %1463
+       %1465 = OpFMul %float %1464 %1464
+       %1466 = OpFMul %float %1465 %1464
+       %1467 = OpIEqual %bool %1462 %int_3
+               OpSelectionMerge %1468 None
+               OpBranchConditional %1467 %1469 %1470
+       %1470 = OpLabel
+       %1471 = OpIEqual %bool %1462 %int_2
+               OpSelectionMerge %1472 None
+               OpBranchConditional %1471 %1473 %1474
+       %1474 = OpLabel
+       %1475 = OpIEqual %bool %1462 %int_1
+               OpSelectionMerge %1476 None
+               OpBranchConditional %1475 %1477 %1478
+       %1478 = OpLabel
+       %1479 = OpIEqual %bool %1462 %int_0
+               OpSelectionMerge %1480 None
+               OpBranchConditional %1479 %1481 %1482
+       %1482 = OpLabel
+               OpBranch %1480
+       %1481 = OpLabel
+       %1483 = OpFMul %float %1466 %float_0_166666672
+               OpBranch %1480
+       %1480 = OpLabel
+       %1484 = OpPhi %float %float_0 %1482 %1483 %1481
+               OpBranch %1476
+       %1477 = OpLabel
+       %1485 = OpFMul %float %1466 %float_n0_5
+       %1486 = OpFMul %float %1465 %float_0_5
+       %1487 = OpFAdd %float %1485 %1486
+       %1488 = OpFMul %float %1464 %float_0_5
+       %1489 = OpFAdd %float %1487 %1488
+       %1490 = OpFAdd %float %1489 %float_0_166666672
+               OpBranch %1476
+       %1476 = OpLabel
+       %1491 = OpPhi %float %1484 %1480 %1490 %1477
+               OpBranch %1472
+       %1473 = OpLabel
+       %1492 = OpFMul %float %1466 %float_0_5
+       %1493 = OpFMul %float %1465 %float_n1
+       %1494 = OpFAdd %float %1492 %1493
+       %1495 = OpFAdd %float %1494 %float_0_666666687
+               OpBranch %1472
+       %1472 = OpLabel
+       %1496 = OpPhi %float %1491 %1476 %1495 %1473
+               OpBranch %1468
+       %1469 = OpLabel
+       %1497 = OpFMul %float %1466 %float_n0_166666672
+       %1498 = OpFMul %float %1465 %float_0_5
+       %1499 = OpFAdd %float %1497 %1498
+       %1500 = OpFMul %float %1464 %float_n0_5
+       %1501 = OpFAdd %float %1499 %1500
+       %1502 = OpFAdd %float %1501 %float_0_166666672
+               OpBranch %1468
+       %1468 = OpLabel
+       %1503 = OpPhi %float %1496 %1472 %1502 %1469
+               OpBranch %1458
+       %1458 = OpLabel
+       %1504 = OpPhi %float %float_0 %1451 %1503 %1468
+       %1505 = OpFMul %float %1504 %float_1_5
+       %1506 = OpFMul %float %1505 %1381
+       %1507 = OpFSub %float %float_0_0299999993 %1426
+       %1508 = OpFMul %float %1506 %1507
+       %1509 = OpFMul %float %1508 %float_0_180000007
+       %1510 = OpFAdd %float %1426 %1509
+       %1511 = OpCompositeInsert %v3float %1510 %1425 0
+       %1512 = OpExtInst %v3float %1 FClamp %1511 %138 %337
+       %1513 = OpVectorTimesMatrix %v3float %1512 %434
+       %1514 = OpExtInst %v3float %1 FClamp %1513 %138 %337
+       %1515 = OpDot %float %1514 %73
+       %1516 = OpCompositeConstruct %v3float %1515 %1515 %1515
+       %1517 = OpExtInst %v3float %1 FMix %1516 %1514 %241
+       %1518 = OpCompositeExtract %float %1517 0
+       %1519 = OpExtInst %float %1 Exp2 %float_n15
+       %1520 = OpFMul %float %float_0_179999992 %1519
+       %1521 = OpExtInst %float %1 Exp2 %float_18
+       %1522 = OpFMul %float %float_0_179999992 %1521
+               OpStore %528 %499
+               OpStore %527 %500
+       %1523 = OpFOrdLessThanEqual %bool %1518 %float_0
+       %1524 = OpExtInst %float %1 Exp2 %float_n14
+       %1525 = OpSelect %float %1523 %1524 %1518
+       %1526 = OpExtInst %float %1 Log %1525
+       %1527 = OpFDiv %float %1526 %1091
+       %1528 = OpExtInst %float %1 Log %1520
+       %1529 = OpFDiv %float %1528 %1091
+       %1530 = OpFOrdLessThanEqual %bool %1527 %1529
+               OpSelectionMerge %1531 None
+               OpBranchConditional %1530 %1532 %1533
+       %1533 = OpLabel
+       %1534 = OpFOrdGreaterThan %bool %1527 %1529
+       %1535 = OpExtInst %float %1 Log %float_0_180000007
+       %1536 = OpFDiv %float %1535 %1091
+       %1537 = OpFOrdLessThan %bool %1527 %1536
+       %1538 = OpLogicalAnd %bool %1534 %1537
+               OpSelectionMerge %1539 None
+               OpBranchConditional %1538 %1540 %1541
+       %1541 = OpLabel
+       %1542 = OpFOrdGreaterThanEqual %bool %1527 %1536
+       %1543 = OpExtInst %float %1 Log %1522
+       %1544 = OpFDiv %float %1543 %1091
+       %1545 = OpFOrdLessThan %bool %1527 %1544
+       %1546 = OpLogicalAnd %bool %1542 %1545
+               OpSelectionMerge %1547 None
+               OpBranchConditional %1546 %1548 %1549
+       %1549 = OpLabel
+       %1550 = OpExtInst %float %1 Log %float_10000
+       %1551 = OpFDiv %float %1550 %1091
+               OpBranch %1547
+       %1548 = OpLabel
+       %1552 = OpFSub %float %1527 %1536
+       %1553 = OpFMul %float %float_3 %1552
+       %1554 = OpFSub %float %1544 %1536
+       %1555 = OpFDiv %float %1553 %1554
+       %1556 = OpConvertFToS %int %1555
+       %1557 = OpConvertSToF %float %1556
+       %1558 = OpFSub %float %1555 %1557
+       %1559 = OpAccessChain %_ptr_Function_float %527 %1556
+       %1560 = OpLoad %float %1559
+       %1561 = OpIAdd %int %1556 %int_1
+       %1562 = OpAccessChain %_ptr_Function_float %527 %1561
+       %1563 = OpLoad %float %1562
+       %1564 = OpIAdd %int %1556 %int_2
+       %1565 = OpAccessChain %_ptr_Function_float %527 %1564
+       %1566 = OpLoad %float %1565
+       %1567 = OpCompositeConstruct %v3float %1560 %1563 %1566
+       %1568 = OpFMul %float %1558 %1558
+       %1569 = OpCompositeConstruct %v3float %1568 %1558 %float_1
+       %1570 = OpMatrixTimesVector %v3float %466 %1567
+       %1571 = OpDot %float %1569 %1570
+               OpBranch %1547
+       %1547 = OpLabel
+       %1572 = OpPhi %float %1551 %1549 %1571 %1548
+               OpBranch %1539
+       %1540 = OpLabel
+       %1573 = OpFSub %float %1527 %1529
+       %1574 = OpFMul %float %float_3 %1573
+       %1575 = OpFSub %float %1536 %1529
+       %1576 = OpFDiv %float %1574 %1575
+       %1577 = OpConvertFToS %int %1576
+       %1578 = OpConvertSToF %float %1577
+       %1579 = OpFSub %float %1576 %1578
+       %1580 = OpAccessChain %_ptr_Function_float %528 %1577
+       %1581 = OpLoad %float %1580
+       %1582 = OpIAdd %int %1577 %int_1
+       %1583 = OpAccessChain %_ptr_Function_float %528 %1582
+       %1584 = OpLoad %float %1583
+       %1585 = OpIAdd %int %1577 %int_2
+       %1586 = OpAccessChain %_ptr_Function_float %528 %1585
+       %1587 = OpLoad %float %1586
+       %1588 = OpCompositeConstruct %v3float %1581 %1584 %1587
+       %1589 = OpFMul %float %1579 %1579
+       %1590 = OpCompositeConstruct %v3float %1589 %1579 %float_1
+       %1591 = OpMatrixTimesVector %v3float %466 %1588
+       %1592 = OpDot %float %1590 %1591
+               OpBranch %1539
+       %1539 = OpLabel
+       %1593 = OpPhi %float %1572 %1547 %1592 %1540
+               OpBranch %1531
+       %1532 = OpLabel
+       %1594 = OpExtInst %float %1 Log %float_9_99999975en05
+       %1595 = OpFDiv %float %1594 %1091
+               OpBranch %1531
+       %1531 = OpLabel
+       %1596 = OpPhi %float %1593 %1539 %1595 %1532
+       %1597 = OpExtInst %float %1 Pow %float_10 %1596
+       %1598 = OpCompositeInsert %v3float %1597 %523 0
+       %1599 = OpCompositeExtract %float %1517 1
+               OpStore %530 %499
+               OpStore %529 %500
+       %1600 = OpFOrdLessThanEqual %bool %1599 %float_0
+       %1601 = OpSelect %float %1600 %1524 %1599
+       %1602 = OpExtInst %float %1 Log %1601
+       %1603 = OpFDiv %float %1602 %1091
+       %1604 = OpFOrdLessThanEqual %bool %1603 %1529
+               OpSelectionMerge %1605 None
+               OpBranchConditional %1604 %1606 %1607
+       %1607 = OpLabel
+       %1608 = OpFOrdGreaterThan %bool %1603 %1529
+       %1609 = OpExtInst %float %1 Log %float_0_180000007
+       %1610 = OpFDiv %float %1609 %1091
+       %1611 = OpFOrdLessThan %bool %1603 %1610
+       %1612 = OpLogicalAnd %bool %1608 %1611
+               OpSelectionMerge %1613 None
+               OpBranchConditional %1612 %1614 %1615
+       %1615 = OpLabel
+       %1616 = OpFOrdGreaterThanEqual %bool %1603 %1610
+       %1617 = OpExtInst %float %1 Log %1522
+       %1618 = OpFDiv %float %1617 %1091
+       %1619 = OpFOrdLessThan %bool %1603 %1618
+       %1620 = OpLogicalAnd %bool %1616 %1619
+               OpSelectionMerge %1621 None
+               OpBranchConditional %1620 %1622 %1623
+       %1623 = OpLabel
+       %1624 = OpExtInst %float %1 Log %float_10000
+       %1625 = OpFDiv %float %1624 %1091
+               OpBranch %1621
+       %1622 = OpLabel
+       %1626 = OpFSub %float %1603 %1610
+       %1627 = OpFMul %float %float_3 %1626
+       %1628 = OpFSub %float %1618 %1610
+       %1629 = OpFDiv %float %1627 %1628
+       %1630 = OpConvertFToS %int %1629
+       %1631 = OpConvertSToF %float %1630
+       %1632 = OpFSub %float %1629 %1631
+       %1633 = OpAccessChain %_ptr_Function_float %529 %1630
+       %1634 = OpLoad %float %1633
+       %1635 = OpIAdd %int %1630 %int_1
+       %1636 = OpAccessChain %_ptr_Function_float %529 %1635
+       %1637 = OpLoad %float %1636
+       %1638 = OpIAdd %int %1630 %int_2
+       %1639 = OpAccessChain %_ptr_Function_float %529 %1638
+       %1640 = OpLoad %float %1639
+       %1641 = OpCompositeConstruct %v3float %1634 %1637 %1640
+       %1642 = OpFMul %float %1632 %1632
+       %1643 = OpCompositeConstruct %v3float %1642 %1632 %float_1
+       %1644 = OpMatrixTimesVector %v3float %466 %1641
+       %1645 = OpDot %float %1643 %1644
+               OpBranch %1621
+       %1621 = OpLabel
+       %1646 = OpPhi %float %1625 %1623 %1645 %1622
+               OpBranch %1613
+       %1614 = OpLabel
+       %1647 = OpFSub %float %1603 %1529
+       %1648 = OpFMul %float %float_3 %1647
+       %1649 = OpFSub %float %1610 %1529
+       %1650 = OpFDiv %float %1648 %1649
+       %1651 = OpConvertFToS %int %1650
+       %1652 = OpConvertSToF %float %1651
+       %1653 = OpFSub %float %1650 %1652
+       %1654 = OpAccessChain %_ptr_Function_float %530 %1651
+       %1655 = OpLoad %float %1654
+       %1656 = OpIAdd %int %1651 %int_1
+       %1657 = OpAccessChain %_ptr_Function_float %530 %1656
+       %1658 = OpLoad %float %1657
+       %1659 = OpIAdd %int %1651 %int_2
+       %1660 = OpAccessChain %_ptr_Function_float %530 %1659
+       %1661 = OpLoad %float %1660
+       %1662 = OpCompositeConstruct %v3float %1655 %1658 %1661
+       %1663 = OpFMul %float %1653 %1653
+       %1664 = OpCompositeConstruct %v3float %1663 %1653 %float_1
+       %1665 = OpMatrixTimesVector %v3float %466 %1662
+       %1666 = OpDot %float %1664 %1665
+               OpBranch %1613
+       %1613 = OpLabel
+       %1667 = OpPhi %float %1646 %1621 %1666 %1614
+               OpBranch %1605
+       %1606 = OpLabel
+       %1668 = OpExtInst %float %1 Log %float_9_99999975en05
+       %1669 = OpFDiv %float %1668 %1091
+               OpBranch %1605
+       %1605 = OpLabel
+       %1670 = OpPhi %float %1667 %1613 %1669 %1606
+       %1671 = OpExtInst %float %1 Pow %float_10 %1670
+       %1672 = OpCompositeInsert %v3float %1671 %1598 1
+       %1673 = OpCompositeExtract %float %1517 2
+               OpStore %532 %499
+               OpStore %531 %500
+       %1674 = OpFOrdLessThanEqual %bool %1673 %float_0
+       %1675 = OpSelect %float %1674 %1524 %1673
+       %1676 = OpExtInst %float %1 Log %1675
+       %1677 = OpFDiv %float %1676 %1091
+       %1678 = OpFOrdLessThanEqual %bool %1677 %1529
+               OpSelectionMerge %1679 None
+               OpBranchConditional %1678 %1680 %1681
+       %1681 = OpLabel
+       %1682 = OpFOrdGreaterThan %bool %1677 %1529
+       %1683 = OpExtInst %float %1 Log %float_0_180000007
+       %1684 = OpFDiv %float %1683 %1091
+       %1685 = OpFOrdLessThan %bool %1677 %1684
+       %1686 = OpLogicalAnd %bool %1682 %1685
+               OpSelectionMerge %1687 None
+               OpBranchConditional %1686 %1688 %1689
+       %1689 = OpLabel
+       %1690 = OpFOrdGreaterThanEqual %bool %1677 %1684
+       %1691 = OpExtInst %float %1 Log %1522
+       %1692 = OpFDiv %float %1691 %1091
+       %1693 = OpFOrdLessThan %bool %1677 %1692
+       %1694 = OpLogicalAnd %bool %1690 %1693
+               OpSelectionMerge %1695 None
+               OpBranchConditional %1694 %1696 %1697
+       %1697 = OpLabel
+       %1698 = OpExtInst %float %1 Log %float_10000
+       %1699 = OpFDiv %float %1698 %1091
+               OpBranch %1695
+       %1696 = OpLabel
+       %1700 = OpFSub %float %1677 %1684
+       %1701 = OpFMul %float %float_3 %1700
+       %1702 = OpFSub %float %1692 %1684
+       %1703 = OpFDiv %float %1701 %1702
+       %1704 = OpConvertFToS %int %1703
+       %1705 = OpConvertSToF %float %1704
+       %1706 = OpFSub %float %1703 %1705
+       %1707 = OpAccessChain %_ptr_Function_float %531 %1704
+       %1708 = OpLoad %float %1707
+       %1709 = OpIAdd %int %1704 %int_1
+       %1710 = OpAccessChain %_ptr_Function_float %531 %1709
+       %1711 = OpLoad %float %1710
+       %1712 = OpIAdd %int %1704 %int_2
+       %1713 = OpAccessChain %_ptr_Function_float %531 %1712
+       %1714 = OpLoad %float %1713
+       %1715 = OpCompositeConstruct %v3float %1708 %1711 %1714
+       %1716 = OpFMul %float %1706 %1706
+       %1717 = OpCompositeConstruct %v3float %1716 %1706 %float_1
+       %1718 = OpMatrixTimesVector %v3float %466 %1715
+       %1719 = OpDot %float %1717 %1718
+               OpBranch %1695
+       %1695 = OpLabel
+       %1720 = OpPhi %float %1699 %1697 %1719 %1696
+               OpBranch %1687
+       %1688 = OpLabel
+       %1721 = OpFSub %float %1677 %1529
+       %1722 = OpFMul %float %float_3 %1721
+       %1723 = OpFSub %float %1684 %1529
+       %1724 = OpFDiv %float %1722 %1723
+       %1725 = OpConvertFToS %int %1724
+       %1726 = OpConvertSToF %float %1725
+       %1727 = OpFSub %float %1724 %1726
+       %1728 = OpAccessChain %_ptr_Function_float %532 %1725
+       %1729 = OpLoad %float %1728
+       %1730 = OpIAdd %int %1725 %int_1
+       %1731 = OpAccessChain %_ptr_Function_float %532 %1730
+       %1732 = OpLoad %float %1731
+       %1733 = OpIAdd %int %1725 %int_2
+       %1734 = OpAccessChain %_ptr_Function_float %532 %1733
+       %1735 = OpLoad %float %1734
+       %1736 = OpCompositeConstruct %v3float %1729 %1732 %1735
+       %1737 = OpFMul %float %1727 %1727
+       %1738 = OpCompositeConstruct %v3float %1737 %1727 %float_1
+       %1739 = OpMatrixTimesVector %v3float %466 %1736
+       %1740 = OpDot %float %1738 %1739
+               OpBranch %1687
+       %1687 = OpLabel
+       %1741 = OpPhi %float %1720 %1695 %1740 %1688
+               OpBranch %1679
+       %1680 = OpLabel
+       %1742 = OpExtInst %float %1 Log %float_9_99999975en05
+       %1743 = OpFDiv %float %1742 %1091
+               OpBranch %1679
+       %1679 = OpLabel
+       %1744 = OpPhi %float %1741 %1687 %1743 %1680
+       %1745 = OpExtInst %float %1 Pow %float_10 %1744
+       %1746 = OpCompositeInsert %v3float %1745 %1672 2
+       %1747 = OpVectorTimesMatrix %v3float %1746 %438
+       %1748 = OpVectorTimesMatrix %v3float %1747 %434
+       %1749 = OpExtInst %float %1 Pow %float_2 %float_n12
+       %1750 = OpFMul %float %float_0_179999992 %1749
+               OpStore %540 %499
+               OpStore %539 %500
+       %1751 = OpFOrdLessThanEqual %bool %1750 %float_0
+       %1752 = OpSelect %float %1751 %1524 %1750
+       %1753 = OpExtInst %float %1 Log %1752
+       %1754 = OpFDiv %float %1753 %1091
+       %1755 = OpFOrdLessThanEqual %bool %1754 %1529
+               OpSelectionMerge %1756 None
+               OpBranchConditional %1755 %1757 %1758
+       %1758 = OpLabel
+       %1759 = OpFOrdGreaterThan %bool %1754 %1529
+       %1760 = OpExtInst %float %1 Log %float_0_180000007
+       %1761 = OpFDiv %float %1760 %1091
+       %1762 = OpFOrdLessThan %bool %1754 %1761
+       %1763 = OpLogicalAnd %bool %1759 %1762
+               OpSelectionMerge %1764 None
+               OpBranchConditional %1763 %1765 %1766
+       %1766 = OpLabel
+       %1767 = OpFOrdGreaterThanEqual %bool %1754 %1761
+       %1768 = OpExtInst %float %1 Log %1522
+       %1769 = OpFDiv %float %1768 %1091
+       %1770 = OpFOrdLessThan %bool %1754 %1769
+       %1771 = OpLogicalAnd %bool %1767 %1770
+               OpSelectionMerge %1772 None
+               OpBranchConditional %1771 %1773 %1774
+       %1774 = OpLabel
+       %1775 = OpExtInst %float %1 Log %float_10000
+       %1776 = OpFDiv %float %1775 %1091
+               OpBranch %1772
+       %1773 = OpLabel
+       %1777 = OpFSub %float %1754 %1761
+       %1778 = OpFMul %float %float_3 %1777
+       %1779 = OpFSub %float %1769 %1761
+       %1780 = OpFDiv %float %1778 %1779
+       %1781 = OpConvertFToS %int %1780
+       %1782 = OpConvertSToF %float %1781
+       %1783 = OpFSub %float %1780 %1782
+       %1784 = OpAccessChain %_ptr_Function_float %539 %1781
+       %1785 = OpLoad %float %1784
+       %1786 = OpIAdd %int %1781 %int_1
+       %1787 = OpAccessChain %_ptr_Function_float %539 %1786
+       %1788 = OpLoad %float %1787
+       %1789 = OpIAdd %int %1781 %int_2
+       %1790 = OpAccessChain %_ptr_Function_float %539 %1789
+       %1791 = OpLoad %float %1790
+       %1792 = OpCompositeConstruct %v3float %1785 %1788 %1791
+       %1793 = OpFMul %float %1783 %1783
+       %1794 = OpCompositeConstruct %v3float %1793 %1783 %float_1
+       %1795 = OpMatrixTimesVector %v3float %466 %1792
+       %1796 = OpDot %float %1794 %1795
+               OpBranch %1772
+       %1772 = OpLabel
+       %1797 = OpPhi %float %1776 %1774 %1796 %1773
+               OpBranch %1764
+       %1765 = OpLabel
+       %1798 = OpFSub %float %1754 %1529
+       %1799 = OpFMul %float %float_3 %1798
+       %1800 = OpFSub %float %1761 %1529
+       %1801 = OpFDiv %float %1799 %1800
+       %1802 = OpConvertFToS %int %1801
+       %1803 = OpConvertSToF %float %1802
+       %1804 = OpFSub %float %1801 %1803
+       %1805 = OpAccessChain %_ptr_Function_float %540 %1802
+       %1806 = OpLoad %float %1805
+       %1807 = OpIAdd %int %1802 %int_1
+       %1808 = OpAccessChain %_ptr_Function_float %540 %1807
+       %1809 = OpLoad %float %1808
+       %1810 = OpIAdd %int %1802 %int_2
+       %1811 = OpAccessChain %_ptr_Function_float %540 %1810
+       %1812 = OpLoad %float %1811
+       %1813 = OpCompositeConstruct %v3float %1806 %1809 %1812
+       %1814 = OpFMul %float %1804 %1804
+       %1815 = OpCompositeConstruct %v3float %1814 %1804 %float_1
+       %1816 = OpMatrixTimesVector %v3float %466 %1813
+       %1817 = OpDot %float %1815 %1816
+               OpBranch %1764
+       %1764 = OpLabel
+       %1818 = OpPhi %float %1797 %1772 %1817 %1765
+               OpBranch %1756
+       %1757 = OpLabel
+       %1819 = OpExtInst %float %1 Log %float_9_99999975en05
+       %1820 = OpFDiv %float %1819 %1091
+               OpBranch %1756
+       %1756 = OpLabel
+       %1821 = OpPhi %float %1818 %1764 %1820 %1757
+       %1822 = OpExtInst %float %1 Pow %float_10 %1821
+               OpStore %542 %499
+               OpStore %541 %500
+       %1823 = OpExtInst %float %1 Log %float_0_180000007
+       %1824 = OpFDiv %float %1823 %1091
+       %1825 = OpFOrdLessThanEqual %bool %1824 %1529
+               OpSelectionMerge %1826 None
+               OpBranchConditional %1825 %1827 %1828
+       %1828 = OpLabel
+       %1829 = OpFOrdGreaterThan %bool %1824 %1529
+       %1830 = OpFOrdLessThan %bool %1824 %1824
+       %1831 = OpLogicalAnd %bool %1829 %1830
+               OpSelectionMerge %1832 None
+               OpBranchConditional %1831 %1833 %1834
+       %1834 = OpLabel
+       %1835 = OpFOrdGreaterThanEqual %bool %1824 %1824
+       %1836 = OpExtInst %float %1 Log %1522
+       %1837 = OpFDiv %float %1836 %1091
+       %1838 = OpFOrdLessThan %bool %1824 %1837
+       %1839 = OpLogicalAnd %bool %1835 %1838
+               OpSelectionMerge %1840 None
+               OpBranchConditional %1839 %1841 %1842
+       %1842 = OpLabel
+       %1843 = OpExtInst %float %1 Log %float_10000
+       %1844 = OpFDiv %float %1843 %1091
+               OpBranch %1840
+       %1841 = OpLabel
+       %1845 = OpFSub %float %1824 %1824
+       %1846 = OpFMul %float %float_3 %1845
+       %1847 = OpFSub %float %1837 %1824
+       %1848 = OpFDiv %float %1846 %1847
+       %1849 = OpConvertFToS %int %1848
+       %1850 = OpConvertSToF %float %1849
+       %1851 = OpFSub %float %1848 %1850
+       %1852 = OpAccessChain %_ptr_Function_float %541 %1849
+       %1853 = OpLoad %float %1852
+       %1854 = OpIAdd %int %1849 %int_1
+       %1855 = OpAccessChain %_ptr_Function_float %541 %1854
+       %1856 = OpLoad %float %1855
+       %1857 = OpIAdd %int %1849 %int_2
+       %1858 = OpAccessChain %_ptr_Function_float %541 %1857
+       %1859 = OpLoad %float %1858
+       %1860 = OpCompositeConstruct %v3float %1853 %1856 %1859
+       %1861 = OpFMul %float %1851 %1851
+       %1862 = OpCompositeConstruct %v3float %1861 %1851 %float_1
+       %1863 = OpMatrixTimesVector %v3float %466 %1860
+       %1864 = OpDot %float %1862 %1863
+               OpBranch %1840
+       %1840 = OpLabel
+       %1865 = OpPhi %float %1844 %1842 %1864 %1841
+               OpBranch %1832
+       %1833 = OpLabel
+       %1866 = OpAccessChain %_ptr_Function_float %542 %int_3
+       %1867 = OpLoad %float %1866
+       %1868 = OpAccessChain %_ptr_Function_float %542 %int_4
+       %1869 = OpLoad %float %1868
+       %1870 = OpAccessChain %_ptr_Function_float %542 %int_5
+       %1871 = OpLoad %float %1870
+       %1872 = OpCompositeConstruct %v3float %1867 %1869 %1871
+       %1873 = OpMatrixTimesVector %v3float %466 %1872
+       %1874 = OpCompositeExtract %float %1873 2
+               OpBranch %1832
+       %1832 = OpLabel
+       %1875 = OpPhi %float %1865 %1840 %1874 %1833
+               OpBranch %1826
+       %1827 = OpLabel
+       %1876 = OpExtInst %float %1 Log %float_9_99999975en05
+       %1877 = OpFDiv %float %1876 %1091
+               OpBranch %1826
+       %1826 = OpLabel
+       %1878 = OpPhi %float %1875 %1832 %1877 %1827
+       %1879 = OpExtInst %float %1 Pow %float_10 %1878
+       %1880 = OpExtInst %float %1 Pow %float_2 %float_11
+       %1881 = OpFMul %float %float_0_179999992 %1880
+               OpStore %544 %499
+               OpStore %543 %500
+       %1882 = OpFOrdLessThanEqual %bool %1881 %float_0
+       %1883 = OpSelect %float %1882 %1524 %1881
+       %1884 = OpExtInst %float %1 Log %1883
+       %1885 = OpFDiv %float %1884 %1091
+       %1886 = OpFOrdLessThanEqual %bool %1885 %1529
+               OpSelectionMerge %1887 None
+               OpBranchConditional %1886 %1888 %1889
+       %1889 = OpLabel
+       %1890 = OpFOrdGreaterThan %bool %1885 %1529
+       %1891 = OpFOrdLessThan %bool %1885 %1824
+       %1892 = OpLogicalAnd %bool %1890 %1891
+               OpSelectionMerge %1893 None
+               OpBranchConditional %1892 %1894 %1895
+       %1895 = OpLabel
+       %1896 = OpFOrdGreaterThanEqual %bool %1885 %1824
+       %1897 = OpExtInst %float %1 Log %1522
+       %1898 = OpFDiv %float %1897 %1091
+       %1899 = OpFOrdLessThan %bool %1885 %1898
+       %1900 = OpLogicalAnd %bool %1896 %1899
+               OpSelectionMerge %1901 None
+               OpBranchConditional %1900 %1902 %1903
+       %1903 = OpLabel
+       %1904 = OpExtInst %float %1 Log %float_10000
+       %1905 = OpFDiv %float %1904 %1091
+               OpBranch %1901
+       %1902 = OpLabel
+       %1906 = OpFSub %float %1885 %1824
+       %1907 = OpFMul %float %float_3 %1906
+       %1908 = OpFSub %float %1898 %1824
+       %1909 = OpFDiv %float %1907 %1908
+       %1910 = OpConvertFToS %int %1909
+       %1911 = OpConvertSToF %float %1910
+       %1912 = OpFSub %float %1909 %1911
+       %1913 = OpAccessChain %_ptr_Function_float %543 %1910
+       %1914 = OpLoad %float %1913
+       %1915 = OpIAdd %int %1910 %int_1
+       %1916 = OpAccessChain %_ptr_Function_float %543 %1915
+       %1917 = OpLoad %float %1916
+       %1918 = OpIAdd %int %1910 %int_2
+       %1919 = OpAccessChain %_ptr_Function_float %543 %1918
+       %1920 = OpLoad %float %1919
+       %1921 = OpCompositeConstruct %v3float %1914 %1917 %1920
+       %1922 = OpFMul %float %1912 %1912
+       %1923 = OpCompositeConstruct %v3float %1922 %1912 %float_1
+       %1924 = OpMatrixTimesVector %v3float %466 %1921
+       %1925 = OpDot %float %1923 %1924
+               OpBranch %1901
+       %1901 = OpLabel
+       %1926 = OpPhi %float %1905 %1903 %1925 %1902
+               OpBranch %1893
+       %1894 = OpLabel
+       %1927 = OpFSub %float %1885 %1529
+       %1928 = OpFMul %float %float_3 %1927
+       %1929 = OpFSub %float %1824 %1529
+       %1930 = OpFDiv %float %1928 %1929
+       %1931 = OpConvertFToS %int %1930
+       %1932 = OpConvertSToF %float %1931
+       %1933 = OpFSub %float %1930 %1932
+       %1934 = OpAccessChain %_ptr_Function_float %544 %1931
+       %1935 = OpLoad %float %1934
+       %1936 = OpIAdd %int %1931 %int_1
+       %1937 = OpAccessChain %_ptr_Function_float %544 %1936
+       %1938 = OpLoad %float %1937
+       %1939 = OpIAdd %int %1931 %int_2
+       %1940 = OpAccessChain %_ptr_Function_float %544 %1939
+       %1941 = OpLoad %float %1940
+       %1942 = OpCompositeConstruct %v3float %1935 %1938 %1941
+       %1943 = OpFMul %float %1933 %1933
+       %1944 = OpCompositeConstruct %v3float %1943 %1933 %float_1
+       %1945 = OpMatrixTimesVector %v3float %466 %1942
+       %1946 = OpDot %float %1944 %1945
+               OpBranch %1893
+       %1893 = OpLabel
+       %1947 = OpPhi %float %1926 %1901 %1946 %1894
+               OpBranch %1887
+       %1888 = OpLabel
+       %1948 = OpExtInst %float %1 Log %float_9_99999975en05
+       %1949 = OpFDiv %float %1948 %1091
+               OpBranch %1887
+       %1887 = OpLabel
+       %1950 = OpPhi %float %1947 %1893 %1949 %1888
+       %1951 = OpExtInst %float %1 Pow %float_10 %1950
+       %1952 = OpCompositeExtract %float %1748 0
+               OpStore %538 %506
+               OpStore %537 %507
+       %1953 = OpFOrdLessThanEqual %bool %1952 %float_0
+       %1954 = OpSelect %float %1953 %float_9_99999975en05 %1952
+       %1955 = OpExtInst %float %1 Log %1954
+       %1956 = OpFDiv %float %1955 %1091
+       %1957 = OpExtInst %float %1 Log %1822
+       %1958 = OpFDiv %float %1957 %1091
+       %1959 = OpFOrdLessThanEqual %bool %1956 %1958
+               OpSelectionMerge %1960 None
+               OpBranchConditional %1959 %1961 %1962
+       %1962 = OpLabel
+       %1963 = OpFOrdGreaterThan %bool %1956 %1958
+       %1964 = OpExtInst %float %1 Log %1879
+       %1965 = OpFDiv %float %1964 %1091
+       %1966 = OpFOrdLessThan %bool %1956 %1965
+       %1967 = OpLogicalAnd %bool %1963 %1966
+               OpSelectionMerge %1968 None
+               OpBranchConditional %1967 %1969 %1970
+       %1970 = OpLabel
+       %1971 = OpFOrdGreaterThanEqual %bool %1956 %1965
+       %1972 = OpExtInst %float %1 Log %1951
+       %1973 = OpFDiv %float %1972 %1091
+       %1974 = OpFOrdLessThan %bool %1956 %1973
+       %1975 = OpLogicalAnd %bool %1971 %1974
+               OpSelectionMerge %1976 None
+               OpBranchConditional %1975 %1977 %1978
+       %1978 = OpLabel
+       %1979 = OpFMul %float %1956 %float_0_119999997
+       %1980 = OpExtInst %float %1 Log %float_2000
+       %1981 = OpFDiv %float %1980 %1091
+       %1982 = OpFMul %float %float_0_119999997 %1972
+       %1983 = OpFDiv %float %1982 %1091
+       %1984 = OpFSub %float %1981 %1983
+       %1985 = OpFAdd %float %1979 %1984
+               OpBranch %1976
+       %1977 = OpLabel
+       %1986 = OpFSub %float %1956 %1965
+       %1987 = OpFMul %float %float_7 %1986
+       %1988 = OpFSub %float %1973 %1965
+       %1989 = OpFDiv %float %1987 %1988
+       %1990 = OpConvertFToS %int %1989
+       %1991 = OpConvertSToF %float %1990
+       %1992 = OpFSub %float %1989 %1991
+       %1993 = OpAccessChain %_ptr_Function_float %537 %1990
+       %1994 = OpLoad %float %1993
+       %1995 = OpIAdd %int %1990 %int_1
+       %1996 = OpAccessChain %_ptr_Function_float %537 %1995
+       %1997 = OpLoad %float %1996
+       %1998 = OpIAdd %int %1990 %int_2
+       %1999 = OpAccessChain %_ptr_Function_float %537 %1998
+       %2000 = OpLoad %float %1999
+       %2001 = OpCompositeConstruct %v3float %1994 %1997 %2000
+       %2002 = OpFMul %float %1992 %1992
+       %2003 = OpCompositeConstruct %v3float %2002 %1992 %float_1
+       %2004 = OpMatrixTimesVector %v3float %466 %2001
+       %2005 = OpDot %float %2003 %2004
+               OpBranch %1976
+       %1976 = OpLabel
+       %2006 = OpPhi %float %1985 %1978 %2005 %1977
+               OpBranch %1968
+       %1969 = OpLabel
+       %2007 = OpFSub %float %1956 %1958
+       %2008 = OpFMul %float %float_7 %2007
+       %2009 = OpFSub %float %1965 %1958
+       %2010 = OpFDiv %float %2008 %2009
+       %2011 = OpConvertFToS %int %2010
+       %2012 = OpConvertSToF %float %2011
+       %2013 = OpFSub %float %2010 %2012
+       %2014 = OpAccessChain %_ptr_Function_float %538 %2011
+       %2015 = OpLoad %float %2014
+       %2016 = OpIAdd %int %2011 %int_1
+       %2017 = OpAccessChain %_ptr_Function_float %538 %2016
+       %2018 = OpLoad %float %2017
+       %2019 = OpIAdd %int %2011 %int_2
+       %2020 = OpAccessChain %_ptr_Function_float %538 %2019
+       %2021 = OpLoad %float %2020
+       %2022 = OpCompositeConstruct %v3float %2015 %2018 %2021
+       %2023 = OpFMul %float %2013 %2013
+       %2024 = OpCompositeConstruct %v3float %2023 %2013 %float_1
+       %2025 = OpMatrixTimesVector %v3float %466 %2022
+       %2026 = OpDot %float %2024 %2025
+               OpBranch %1968
+       %1968 = OpLabel
+       %2027 = OpPhi %float %2006 %1976 %2026 %1969
+               OpBranch %1960
+       %1961 = OpLabel
+       %2028 = OpExtInst %float %1 Log %float_0_00499999989
+       %2029 = OpFDiv %float %2028 %1091
+               OpBranch %1960
+       %1960 = OpLabel
+       %2030 = OpPhi %float %2027 %1968 %2029 %1961
+       %2031 = OpExtInst %float %1 Pow %float_10 %2030
+       %2032 = OpCompositeInsert %v3float %2031 %523 0
+       %2033 = OpCompositeExtract %float %1748 1
+               OpStore %536 %506
+               OpStore %535 %507
+       %2034 = OpFOrdLessThanEqual %bool %2033 %float_0
+       %2035 = OpSelect %float %2034 %float_9_99999975en05 %2033
+       %2036 = OpExtInst %float %1 Log %2035
+       %2037 = OpFDiv %float %2036 %1091
+       %2038 = OpFOrdLessThanEqual %bool %2037 %1958
+               OpSelectionMerge %2039 None
+               OpBranchConditional %2038 %2040 %2041
+       %2041 = OpLabel
+       %2042 = OpFOrdGreaterThan %bool %2037 %1958
+       %2043 = OpExtInst %float %1 Log %1879
+       %2044 = OpFDiv %float %2043 %1091
+       %2045 = OpFOrdLessThan %bool %2037 %2044
+       %2046 = OpLogicalAnd %bool %2042 %2045
+               OpSelectionMerge %2047 None
+               OpBranchConditional %2046 %2048 %2049
+       %2049 = OpLabel
+       %2050 = OpFOrdGreaterThanEqual %bool %2037 %2044
+       %2051 = OpExtInst %float %1 Log %1951
+       %2052 = OpFDiv %float %2051 %1091
+       %2053 = OpFOrdLessThan %bool %2037 %2052
+       %2054 = OpLogicalAnd %bool %2050 %2053
+               OpSelectionMerge %2055 None
+               OpBranchConditional %2054 %2056 %2057
+       %2057 = OpLabel
+       %2058 = OpFMul %float %2037 %float_0_119999997
+       %2059 = OpExtInst %float %1 Log %float_2000
+       %2060 = OpFDiv %float %2059 %1091
+       %2061 = OpFMul %float %float_0_119999997 %2051
+       %2062 = OpFDiv %float %2061 %1091
+       %2063 = OpFSub %float %2060 %2062
+       %2064 = OpFAdd %float %2058 %2063
+               OpBranch %2055
+       %2056 = OpLabel
+       %2065 = OpFSub %float %2037 %2044
+       %2066 = OpFMul %float %float_7 %2065
+       %2067 = OpFSub %float %2052 %2044
+       %2068 = OpFDiv %float %2066 %2067
+       %2069 = OpConvertFToS %int %2068
+       %2070 = OpConvertSToF %float %2069
+       %2071 = OpFSub %float %2068 %2070
+       %2072 = OpAccessChain %_ptr_Function_float %535 %2069
+       %2073 = OpLoad %float %2072
+       %2074 = OpIAdd %int %2069 %int_1
+       %2075 = OpAccessChain %_ptr_Function_float %535 %2074
+       %2076 = OpLoad %float %2075
+       %2077 = OpIAdd %int %2069 %int_2
+       %2078 = OpAccessChain %_ptr_Function_float %535 %2077
+       %2079 = OpLoad %float %2078
+       %2080 = OpCompositeConstruct %v3float %2073 %2076 %2079
+       %2081 = OpFMul %float %2071 %2071
+       %2082 = OpCompositeConstruct %v3float %2081 %2071 %float_1
+       %2083 = OpMatrixTimesVector %v3float %466 %2080
+       %2084 = OpDot %float %2082 %2083
+               OpBranch %2055
+       %2055 = OpLabel
+       %2085 = OpPhi %float %2064 %2057 %2084 %2056
+               OpBranch %2047
+       %2048 = OpLabel
+       %2086 = OpFSub %float %2037 %1958
+       %2087 = OpFMul %float %float_7 %2086
+       %2088 = OpFSub %float %2044 %1958
+       %2089 = OpFDiv %float %2087 %2088
+       %2090 = OpConvertFToS %int %2089
+       %2091 = OpConvertSToF %float %2090
+       %2092 = OpFSub %float %2089 %2091
+       %2093 = OpAccessChain %_ptr_Function_float %536 %2090
+       %2094 = OpLoad %float %2093
+       %2095 = OpIAdd %int %2090 %int_1
+       %2096 = OpAccessChain %_ptr_Function_float %536 %2095
+       %2097 = OpLoad %float %2096
+       %2098 = OpIAdd %int %2090 %int_2
+       %2099 = OpAccessChain %_ptr_Function_float %536 %2098
+       %2100 = OpLoad %float %2099
+       %2101 = OpCompositeConstruct %v3float %2094 %2097 %2100
+       %2102 = OpFMul %float %2092 %2092
+       %2103 = OpCompositeConstruct %v3float %2102 %2092 %float_1
+       %2104 = OpMatrixTimesVector %v3float %466 %2101
+       %2105 = OpDot %float %2103 %2104
+               OpBranch %2047
+       %2047 = OpLabel
+       %2106 = OpPhi %float %2085 %2055 %2105 %2048
+               OpBranch %2039
+       %2040 = OpLabel
+       %2107 = OpExtInst %float %1 Log %float_0_00499999989
+       %2108 = OpFDiv %float %2107 %1091
+               OpBranch %2039
+       %2039 = OpLabel
+       %2109 = OpPhi %float %2106 %2047 %2108 %2040
+       %2110 = OpExtInst %float %1 Pow %float_10 %2109
+       %2111 = OpCompositeInsert %v3float %2110 %2032 1
+       %2112 = OpCompositeExtract %float %1748 2
+               OpStore %534 %506
+               OpStore %533 %507
+       %2113 = OpFOrdLessThanEqual %bool %2112 %float_0
+       %2114 = OpSelect %float %2113 %float_9_99999975en05 %2112
+       %2115 = OpExtInst %float %1 Log %2114
+       %2116 = OpFDiv %float %2115 %1091
+       %2117 = OpFOrdLessThanEqual %bool %2116 %1958
+               OpSelectionMerge %2118 None
+               OpBranchConditional %2117 %2119 %2120
+       %2120 = OpLabel
+       %2121 = OpFOrdGreaterThan %bool %2116 %1958
+       %2122 = OpExtInst %float %1 Log %1879
+       %2123 = OpFDiv %float %2122 %1091
+       %2124 = OpFOrdLessThan %bool %2116 %2123
+       %2125 = OpLogicalAnd %bool %2121 %2124
+               OpSelectionMerge %2126 None
+               OpBranchConditional %2125 %2127 %2128
+       %2128 = OpLabel
+       %2129 = OpFOrdGreaterThanEqual %bool %2116 %2123
+       %2130 = OpExtInst %float %1 Log %1951
+       %2131 = OpFDiv %float %2130 %1091
+       %2132 = OpFOrdLessThan %bool %2116 %2131
+       %2133 = OpLogicalAnd %bool %2129 %2132
+               OpSelectionMerge %2134 None
+               OpBranchConditional %2133 %2135 %2136
+       %2136 = OpLabel
+       %2137 = OpFMul %float %2116 %float_0_119999997
+       %2138 = OpExtInst %float %1 Log %float_2000
+       %2139 = OpFDiv %float %2138 %1091
+       %2140 = OpFMul %float %float_0_119999997 %2130
+       %2141 = OpFDiv %float %2140 %1091
+       %2142 = OpFSub %float %2139 %2141
+       %2143 = OpFAdd %float %2137 %2142
+               OpBranch %2134
+       %2135 = OpLabel
+       %2144 = OpFSub %float %2116 %2123
+       %2145 = OpFMul %float %float_7 %2144
+       %2146 = OpFSub %float %2131 %2123
+       %2147 = OpFDiv %float %2145 %2146
+       %2148 = OpConvertFToS %int %2147
+       %2149 = OpConvertSToF %float %2148
+       %2150 = OpFSub %float %2147 %2149
+       %2151 = OpAccessChain %_ptr_Function_float %533 %2148
+       %2152 = OpLoad %float %2151
+       %2153 = OpIAdd %int %2148 %int_1
+       %2154 = OpAccessChain %_ptr_Function_float %533 %2153
+       %2155 = OpLoad %float %2154
+       %2156 = OpIAdd %int %2148 %int_2
+       %2157 = OpAccessChain %_ptr_Function_float %533 %2156
+       %2158 = OpLoad %float %2157
+       %2159 = OpCompositeConstruct %v3float %2152 %2155 %2158
+       %2160 = OpFMul %float %2150 %2150
+       %2161 = OpCompositeConstruct %v3float %2160 %2150 %float_1
+       %2162 = OpMatrixTimesVector %v3float %466 %2159
+       %2163 = OpDot %float %2161 %2162
+               OpBranch %2134
+       %2134 = OpLabel
+       %2164 = OpPhi %float %2143 %2136 %2163 %2135
+               OpBranch %2126
+       %2127 = OpLabel
+       %2165 = OpFSub %float %2116 %1958
+       %2166 = OpFMul %float %float_7 %2165
+       %2167 = OpFSub %float %2123 %1958
+       %2168 = OpFDiv %float %2166 %2167
+       %2169 = OpConvertFToS %int %2168
+       %2170 = OpConvertSToF %float %2169
+       %2171 = OpFSub %float %2168 %2170
+       %2172 = OpAccessChain %_ptr_Function_float %534 %2169
+       %2173 = OpLoad %float %2172
+       %2174 = OpIAdd %int %2169 %int_1
+       %2175 = OpAccessChain %_ptr_Function_float %534 %2174
+       %2176 = OpLoad %float %2175
+       %2177 = OpIAdd %int %2169 %int_2
+       %2178 = OpAccessChain %_ptr_Function_float %534 %2177
+       %2179 = OpLoad %float %2178
+       %2180 = OpCompositeConstruct %v3float %2173 %2176 %2179
+       %2181 = OpFMul %float %2171 %2171
+       %2182 = OpCompositeConstruct %v3float %2181 %2171 %float_1
+       %2183 = OpMatrixTimesVector %v3float %466 %2180
+       %2184 = OpDot %float %2182 %2183
+               OpBranch %2126
+       %2126 = OpLabel
+       %2185 = OpPhi %float %2164 %2134 %2184 %2127
+               OpBranch %2118
+       %2119 = OpLabel
+       %2186 = OpExtInst %float %1 Log %float_0_00499999989
+       %2187 = OpFDiv %float %2186 %1091
+               OpBranch %2118
+       %2118 = OpLabel
+       %2188 = OpPhi %float %2185 %2126 %2187 %2119
+       %2189 = OpExtInst %float %1 Pow %float_10 %2188
+       %2190 = OpCompositeInsert %v3float %2189 %2111 2
+       %2191 = OpVectorTimesMatrix %v3float %2190 %602
+       %2192 = OpFMul %v3float %2191 %519
+       %2193 = OpExtInst %v3float %1 Pow %2192 %286
+       %2194 = OpFMul %v3float %196 %2193
+       %2195 = OpFAdd %v3float %195 %2194
+       %2196 = OpFMul %v3float %197 %2193
+       %2197 = OpFAdd %v3float %141 %2196
+       %2198 = OpFDiv %v3float %141 %2197
+       %2199 = OpFMul %v3float %2195 %2198
+       %2200 = OpExtInst %v3float %1 Pow %2199 %287
+               OpBranch %1342
+       %1342 = OpLabel
+       %2201 = OpPhi %v3float %1366 %1346 %2200 %2118
+               OpBranch %1336
+       %1337 = OpLabel
+       %2202 = OpMatrixTimesMatrix %mat3v3float %572 %423
+       %2203 = OpFMul %v3float %932 %285
+       %2204 = OpVectorTimesMatrix %v3float %2203 %2202
+       %2205 = OpCompositeExtract %float %2204 0
+       %2206 = OpCompositeExtract %float %2204 1
+       %2207 = OpExtInst %float %1 FMin %2205 %2206
+       %2208 = OpCompositeExtract %float %2204 2
+       %2209 = OpExtInst %float %1 FMin %2207 %2208
+       %2210 = OpExtInst %float %1 FMax %2205 %2206
+       %2211 = OpExtInst %float %1 FMax %2210 %2208
+       %2212 = OpExtInst %float %1 FMax %2211 %float_1_00000001en10
+       %2213 = OpExtInst %float %1 FMax %2209 %float_1_00000001en10
+       %2214 = OpFSub %float %2212 %2213
+       %2215 = OpExtInst %float %1 FMax %2211 %float_0_00999999978
+       %2216 = OpFDiv %float %2214 %2215
+       %2217 = OpFSub %float %2208 %2206
+       %2218 = OpFMul %float %2208 %2217
+       %2219 = OpFSub %float %2206 %2205
+       %2220 = OpFMul %float %2206 %2219
+       %2221 = OpFAdd %float %2218 %2220
+       %2222 = OpFSub %float %2205 %2208
+       %2223 = OpFMul %float %2205 %2222
+       %2224 = OpFAdd %float %2221 %2223
+       %2225 = OpExtInst %float %1 Sqrt %2224
+       %2226 = OpFAdd %float %2208 %2206
+       %2227 = OpFAdd %float %2226 %2205
+       %2228 = OpFMul %float %float_1_75 %2225
+       %2229 = OpFAdd %float %2227 %2228
+       %2230 = OpFMul %float %2229 %float_0_333333343
+       %2231 = OpFSub %float %2216 %float_0_400000006
+       %2232 = OpFMul %float %2231 %float_5
+       %2233 = OpFMul %float %2231 %float_2_5
+       %2234 = OpExtInst %float %1 FAbs %2233
+       %2235 = OpFSub %float %float_1 %2234
+       %2236 = OpExtInst %float %1 FMax %2235 %float_0
+       %2237 = OpExtInst %float %1 FSign %2232
+       %2238 = OpConvertFToS %int %2237
+       %2239 = OpConvertSToF %float %2238
+       %2240 = OpFMul %float %2236 %2236
+       %2241 = OpFSub %float %float_1 %2240
+       %2242 = OpFMul %float %2239 %2241
+       %2243 = OpFAdd %float %float_1 %2242
+       %2244 = OpFMul %float %2243 %float_0_0250000004
+       %2245 = OpFOrdLessThanEqual %bool %2230 %float_0_0533333346
+               OpSelectionMerge %2246 None
+               OpBranchConditional %2245 %2247 %2248
+       %2248 = OpLabel
+       %2249 = OpFOrdGreaterThanEqual %bool %2230 %float_0_159999996
+               OpSelectionMerge %2250 None
+               OpBranchConditional %2249 %2251 %2252
+       %2252 = OpLabel
+       %2253 = OpFDiv %float %float_0_239999995 %2229
+       %2254 = OpFSub %float %2253 %float_0_5
+       %2255 = OpFMul %float %2244 %2254
+               OpBranch %2250
+       %2251 = OpLabel
+               OpBranch %2250
+       %2250 = OpLabel
+       %2256 = OpPhi %float %2255 %2252 %float_0 %2251
+               OpBranch %2246
+       %2247 = OpLabel
+               OpBranch %2246
+       %2246 = OpLabel
+       %2257 = OpPhi %float %2256 %2250 %2244 %2247
+       %2258 = OpFAdd %float %float_1 %2257
+       %2259 = OpCompositeConstruct %v3float %2258 %2258 %2258
+       %2260 = OpFMul %v3float %2204 %2259
+       %2261 = OpCompositeExtract %float %2260 0
+       %2262 = OpCompositeExtract %float %2260 1
+       %2263 = OpFOrdEqual %bool %2261 %2262
+       %2264 = OpCompositeExtract %float %2260 2
+       %2265 = OpFOrdEqual %bool %2262 %2264
+       %2266 = OpLogicalAnd %bool %2263 %2265
+               OpSelectionMerge %2267 None
+               OpBranchConditional %2266 %2268 %2269
+       %2269 = OpLabel
+       %2270 = OpExtInst %float %1 Sqrt %float_3
+       %2271 = OpFSub %float %2262 %2264
+       %2272 = OpFMul %float %2270 %2271
+       %2273 = OpFMul %float %float_2 %2261
+       %2274 = OpFSub %float %2273 %2262
+       %2275 = OpFSub %float %2274 %2264
+       %2276 = OpExtInst %float %1 Atan2 %2272 %2275
+       %2277 = OpFMul %float %float_57_2957764 %2276
+               OpBranch %2267
+       %2268 = OpLabel
+               OpBranch %2267
+       %2267 = OpLabel
+       %2278 = OpPhi %float %2277 %2269 %float_0 %2268
+       %2279 = OpFOrdLessThan %bool %2278 %float_0
+               OpSelectionMerge %2280 None
+               OpBranchConditional %2279 %2281 %2280
+       %2281 = OpLabel
+       %2282 = OpFAdd %float %2278 %float_360
+               OpBranch %2280
+       %2280 = OpLabel
+       %2283 = OpPhi %float %2278 %2267 %2282 %2281
+       %2284 = OpExtInst %float %1 FClamp %2283 %float_0 %float_360
+       %2285 = OpFOrdGreaterThan %bool %2284 %float_180
+               OpSelectionMerge %2286 None
+               OpBranchConditional %2285 %2287 %2286
+       %2287 = OpLabel
+       %2288 = OpFSub %float %2284 %float_360
+               OpBranch %2286
+       %2286 = OpLabel
+       %2289 = OpPhi %float %2284 %2280 %2288 %2287
+       %2290 = OpFOrdGreaterThan %bool %2289 %float_n67_5
+       %2291 = OpFOrdLessThan %bool %2289 %float_67_5
+       %2292 = OpLogicalAnd %bool %2290 %2291
+               OpSelectionMerge %2293 None
+               OpBranchConditional %2292 %2294 %2293
+       %2294 = OpLabel
+       %2295 = OpFSub %float %2289 %float_n67_5
+       %2296 = OpFMul %float %2295 %float_0_0296296291
+       %2297 = OpConvertFToS %int %2296
+       %2298 = OpConvertSToF %float %2297
+       %2299 = OpFSub %float %2296 %2298
+       %2300 = OpFMul %float %2299 %2299
+       %2301 = OpFMul %float %2300 %2299
+       %2302 = OpIEqual %bool %2297 %int_3
+               OpSelectionMerge %2303 None
+               OpBranchConditional %2302 %2304 %2305
+       %2305 = OpLabel
+       %2306 = OpIEqual %bool %2297 %int_2
+               OpSelectionMerge %2307 None
+               OpBranchConditional %2306 %2308 %2309
+       %2309 = OpLabel
+       %2310 = OpIEqual %bool %2297 %int_1
+               OpSelectionMerge %2311 None
+               OpBranchConditional %2310 %2312 %2313
+       %2313 = OpLabel
+       %2314 = OpIEqual %bool %2297 %int_0
+               OpSelectionMerge %2315 None
+               OpBranchConditional %2314 %2316 %2317
+       %2317 = OpLabel
+               OpBranch %2315
+       %2316 = OpLabel
+       %2318 = OpFMul %float %2301 %float_0_166666672
+               OpBranch %2315
+       %2315 = OpLabel
+       %2319 = OpPhi %float %float_0 %2317 %2318 %2316
+               OpBranch %2311
+       %2312 = OpLabel
+       %2320 = OpFMul %float %2301 %float_n0_5
+       %2321 = OpFMul %float %2300 %float_0_5
+       %2322 = OpFAdd %float %2320 %2321
+       %2323 = OpFMul %float %2299 %float_0_5
+       %2324 = OpFAdd %float %2322 %2323
+       %2325 = OpFAdd %float %2324 %float_0_166666672
+               OpBranch %2311
+       %2311 = OpLabel
+       %2326 = OpPhi %float %2319 %2315 %2325 %2312
+               OpBranch %2307
+       %2308 = OpLabel
+       %2327 = OpFMul %float %2301 %float_0_5
+       %2328 = OpFMul %float %2300 %float_n1
+       %2329 = OpFAdd %float %2327 %2328
+       %2330 = OpFAdd %float %2329 %float_0_666666687
+               OpBranch %2307
+       %2307 = OpLabel
+       %2331 = OpPhi %float %2326 %2311 %2330 %2308
+               OpBranch %2303
+       %2304 = OpLabel
+       %2332 = OpFMul %float %2301 %float_n0_166666672
+       %2333 = OpFMul %float %2300 %float_0_5
+       %2334 = OpFAdd %float %2332 %2333
+       %2335 = OpFMul %float %2299 %float_n0_5
+       %2336 = OpFAdd %float %2334 %2335
+       %2337 = OpFAdd %float %2336 %float_0_166666672
+               OpBranch %2303
+       %2303 = OpLabel
+       %2338 = OpPhi %float %2331 %2307 %2337 %2304
+               OpBranch %2293
+       %2293 = OpLabel
+       %2339 = OpPhi %float %float_0 %2286 %2338 %2303
+       %2340 = OpFMul %float %2339 %float_1_5
+       %2341 = OpFMul %float %2340 %2216
+       %2342 = OpFSub %float %float_0_0299999993 %2261
+       %2343 = OpFMul %float %2341 %2342
+       %2344 = OpFMul %float %2343 %float_0_180000007
+       %2345 = OpFAdd %float %2261 %2344
+       %2346 = OpCompositeInsert %v3float %2345 %2260 0
+       %2347 = OpExtInst %v3float %1 FClamp %2346 %138 %337
+       %2348 = OpVectorTimesMatrix %v3float %2347 %434
+       %2349 = OpExtInst %v3float %1 FClamp %2348 %138 %337
+       %2350 = OpDot %float %2349 %73
+       %2351 = OpCompositeConstruct %v3float %2350 %2350 %2350
+       %2352 = OpExtInst %v3float %1 FMix %2351 %2349 %241
+       %2353 = OpCompositeExtract %float %2352 0
+       %2354 = OpExtInst %float %1 Exp2 %float_n15
+       %2355 = OpFMul %float %float_0_179999992 %2354
+       %2356 = OpExtInst %float %1 Exp2 %float_18
+       %2357 = OpFMul %float %float_0_179999992 %2356
+               OpStore %546 %499
+               OpStore %545 %500
+       %2358 = OpFOrdLessThanEqual %bool %2353 %float_0
+       %2359 = OpExtInst %float %1 Exp2 %float_n14
+       %2360 = OpSelect %float %2358 %2359 %2353
+       %2361 = OpExtInst %float %1 Log %2360
+       %2362 = OpFDiv %float %2361 %1091
+       %2363 = OpExtInst %float %1 Log %2355
+       %2364 = OpFDiv %float %2363 %1091
+       %2365 = OpFOrdLessThanEqual %bool %2362 %2364
+               OpSelectionMerge %2366 None
+               OpBranchConditional %2365 %2367 %2368
+       %2368 = OpLabel
+       %2369 = OpFOrdGreaterThan %bool %2362 %2364
+       %2370 = OpExtInst %float %1 Log %float_0_180000007
+       %2371 = OpFDiv %float %2370 %1091
+       %2372 = OpFOrdLessThan %bool %2362 %2371
+       %2373 = OpLogicalAnd %bool %2369 %2372
+               OpSelectionMerge %2374 None
+               OpBranchConditional %2373 %2375 %2376
+       %2376 = OpLabel
+       %2377 = OpFOrdGreaterThanEqual %bool %2362 %2371
+       %2378 = OpExtInst %float %1 Log %2357
+       %2379 = OpFDiv %float %2378 %1091
+       %2380 = OpFOrdLessThan %bool %2362 %2379
+       %2381 = OpLogicalAnd %bool %2377 %2380
+               OpSelectionMerge %2382 None
+               OpBranchConditional %2381 %2383 %2384
+       %2384 = OpLabel
+       %2385 = OpExtInst %float %1 Log %float_10000
+       %2386 = OpFDiv %float %2385 %1091
+               OpBranch %2382
+       %2383 = OpLabel
+       %2387 = OpFSub %float %2362 %2371
+       %2388 = OpFMul %float %float_3 %2387
+       %2389 = OpFSub %float %2379 %2371
+       %2390 = OpFDiv %float %2388 %2389
+       %2391 = OpConvertFToS %int %2390
+       %2392 = OpConvertSToF %float %2391
+       %2393 = OpFSub %float %2390 %2392
+       %2394 = OpAccessChain %_ptr_Function_float %545 %2391
+       %2395 = OpLoad %float %2394
+       %2396 = OpIAdd %int %2391 %int_1
+       %2397 = OpAccessChain %_ptr_Function_float %545 %2396
+       %2398 = OpLoad %float %2397
+       %2399 = OpIAdd %int %2391 %int_2
+       %2400 = OpAccessChain %_ptr_Function_float %545 %2399
+       %2401 = OpLoad %float %2400
+       %2402 = OpCompositeConstruct %v3float %2395 %2398 %2401
+       %2403 = OpFMul %float %2393 %2393
+       %2404 = OpCompositeConstruct %v3float %2403 %2393 %float_1
+       %2405 = OpMatrixTimesVector %v3float %466 %2402
+       %2406 = OpDot %float %2404 %2405
+               OpBranch %2382
+       %2382 = OpLabel
+       %2407 = OpPhi %float %2386 %2384 %2406 %2383
+               OpBranch %2374
+       %2375 = OpLabel
+       %2408 = OpFSub %float %2362 %2364
+       %2409 = OpFMul %float %float_3 %2408
+       %2410 = OpFSub %float %2371 %2364
+       %2411 = OpFDiv %float %2409 %2410
+       %2412 = OpConvertFToS %int %2411
+       %2413 = OpConvertSToF %float %2412
+       %2414 = OpFSub %float %2411 %2413
+       %2415 = OpAccessChain %_ptr_Function_float %546 %2412
+       %2416 = OpLoad %float %2415
+       %2417 = OpIAdd %int %2412 %int_1
+       %2418 = OpAccessChain %_ptr_Function_float %546 %2417
+       %2419 = OpLoad %float %2418
+       %2420 = OpIAdd %int %2412 %int_2
+       %2421 = OpAccessChain %_ptr_Function_float %546 %2420
+       %2422 = OpLoad %float %2421
+       %2423 = OpCompositeConstruct %v3float %2416 %2419 %2422
+       %2424 = OpFMul %float %2414 %2414
+       %2425 = OpCompositeConstruct %v3float %2424 %2414 %float_1
+       %2426 = OpMatrixTimesVector %v3float %466 %2423
+       %2427 = OpDot %float %2425 %2426
+               OpBranch %2374
+       %2374 = OpLabel
+       %2428 = OpPhi %float %2407 %2382 %2427 %2375
+               OpBranch %2366
+       %2367 = OpLabel
+       %2429 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2430 = OpFDiv %float %2429 %1091
+               OpBranch %2366
+       %2366 = OpLabel
+       %2431 = OpPhi %float %2428 %2374 %2430 %2367
+       %2432 = OpExtInst %float %1 Pow %float_10 %2431
+       %2433 = OpCompositeInsert %v3float %2432 %523 0
+       %2434 = OpCompositeExtract %float %2352 1
+               OpStore %548 %499
+               OpStore %547 %500
+       %2435 = OpFOrdLessThanEqual %bool %2434 %float_0
+       %2436 = OpSelect %float %2435 %2359 %2434
+       %2437 = OpExtInst %float %1 Log %2436
+       %2438 = OpFDiv %float %2437 %1091
+       %2439 = OpFOrdLessThanEqual %bool %2438 %2364
+               OpSelectionMerge %2440 None
+               OpBranchConditional %2439 %2441 %2442
+       %2442 = OpLabel
+       %2443 = OpFOrdGreaterThan %bool %2438 %2364
+       %2444 = OpExtInst %float %1 Log %float_0_180000007
+       %2445 = OpFDiv %float %2444 %1091
+       %2446 = OpFOrdLessThan %bool %2438 %2445
+       %2447 = OpLogicalAnd %bool %2443 %2446
+               OpSelectionMerge %2448 None
+               OpBranchConditional %2447 %2449 %2450
+       %2450 = OpLabel
+       %2451 = OpFOrdGreaterThanEqual %bool %2438 %2445
+       %2452 = OpExtInst %float %1 Log %2357
+       %2453 = OpFDiv %float %2452 %1091
+       %2454 = OpFOrdLessThan %bool %2438 %2453
+       %2455 = OpLogicalAnd %bool %2451 %2454
+               OpSelectionMerge %2456 None
+               OpBranchConditional %2455 %2457 %2458
+       %2458 = OpLabel
+       %2459 = OpExtInst %float %1 Log %float_10000
+       %2460 = OpFDiv %float %2459 %1091
+               OpBranch %2456
+       %2457 = OpLabel
+       %2461 = OpFSub %float %2438 %2445
+       %2462 = OpFMul %float %float_3 %2461
+       %2463 = OpFSub %float %2453 %2445
+       %2464 = OpFDiv %float %2462 %2463
+       %2465 = OpConvertFToS %int %2464
+       %2466 = OpConvertSToF %float %2465
+       %2467 = OpFSub %float %2464 %2466
+       %2468 = OpAccessChain %_ptr_Function_float %547 %2465
+       %2469 = OpLoad %float %2468
+       %2470 = OpIAdd %int %2465 %int_1
+       %2471 = OpAccessChain %_ptr_Function_float %547 %2470
+       %2472 = OpLoad %float %2471
+       %2473 = OpIAdd %int %2465 %int_2
+       %2474 = OpAccessChain %_ptr_Function_float %547 %2473
+       %2475 = OpLoad %float %2474
+       %2476 = OpCompositeConstruct %v3float %2469 %2472 %2475
+       %2477 = OpFMul %float %2467 %2467
+       %2478 = OpCompositeConstruct %v3float %2477 %2467 %float_1
+       %2479 = OpMatrixTimesVector %v3float %466 %2476
+       %2480 = OpDot %float %2478 %2479
+               OpBranch %2456
+       %2456 = OpLabel
+       %2481 = OpPhi %float %2460 %2458 %2480 %2457
+               OpBranch %2448
+       %2449 = OpLabel
+       %2482 = OpFSub %float %2438 %2364
+       %2483 = OpFMul %float %float_3 %2482
+       %2484 = OpFSub %float %2445 %2364
+       %2485 = OpFDiv %float %2483 %2484
+       %2486 = OpConvertFToS %int %2485
+       %2487 = OpConvertSToF %float %2486
+       %2488 = OpFSub %float %2485 %2487
+       %2489 = OpAccessChain %_ptr_Function_float %548 %2486
+       %2490 = OpLoad %float %2489
+       %2491 = OpIAdd %int %2486 %int_1
+       %2492 = OpAccessChain %_ptr_Function_float %548 %2491
+       %2493 = OpLoad %float %2492
+       %2494 = OpIAdd %int %2486 %int_2
+       %2495 = OpAccessChain %_ptr_Function_float %548 %2494
+       %2496 = OpLoad %float %2495
+       %2497 = OpCompositeConstruct %v3float %2490 %2493 %2496
+       %2498 = OpFMul %float %2488 %2488
+       %2499 = OpCompositeConstruct %v3float %2498 %2488 %float_1
+       %2500 = OpMatrixTimesVector %v3float %466 %2497
+       %2501 = OpDot %float %2499 %2500
+               OpBranch %2448
+       %2448 = OpLabel
+       %2502 = OpPhi %float %2481 %2456 %2501 %2449
+               OpBranch %2440
+       %2441 = OpLabel
+       %2503 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2504 = OpFDiv %float %2503 %1091
+               OpBranch %2440
+       %2440 = OpLabel
+       %2505 = OpPhi %float %2502 %2448 %2504 %2441
+       %2506 = OpExtInst %float %1 Pow %float_10 %2505
+       %2507 = OpCompositeInsert %v3float %2506 %2433 1
+       %2508 = OpCompositeExtract %float %2352 2
+               OpStore %550 %499
+               OpStore %549 %500
+       %2509 = OpFOrdLessThanEqual %bool %2508 %float_0
+       %2510 = OpSelect %float %2509 %2359 %2508
+       %2511 = OpExtInst %float %1 Log %2510
+       %2512 = OpFDiv %float %2511 %1091
+       %2513 = OpFOrdLessThanEqual %bool %2512 %2364
+               OpSelectionMerge %2514 None
+               OpBranchConditional %2513 %2515 %2516
+       %2516 = OpLabel
+       %2517 = OpFOrdGreaterThan %bool %2512 %2364
+       %2518 = OpExtInst %float %1 Log %float_0_180000007
+       %2519 = OpFDiv %float %2518 %1091
+       %2520 = OpFOrdLessThan %bool %2512 %2519
+       %2521 = OpLogicalAnd %bool %2517 %2520
+               OpSelectionMerge %2522 None
+               OpBranchConditional %2521 %2523 %2524
+       %2524 = OpLabel
+       %2525 = OpFOrdGreaterThanEqual %bool %2512 %2519
+       %2526 = OpExtInst %float %1 Log %2357
+       %2527 = OpFDiv %float %2526 %1091
+       %2528 = OpFOrdLessThan %bool %2512 %2527
+       %2529 = OpLogicalAnd %bool %2525 %2528
+               OpSelectionMerge %2530 None
+               OpBranchConditional %2529 %2531 %2532
+       %2532 = OpLabel
+       %2533 = OpExtInst %float %1 Log %float_10000
+       %2534 = OpFDiv %float %2533 %1091
+               OpBranch %2530
+       %2531 = OpLabel
+       %2535 = OpFSub %float %2512 %2519
+       %2536 = OpFMul %float %float_3 %2535
+       %2537 = OpFSub %float %2527 %2519
+       %2538 = OpFDiv %float %2536 %2537
+       %2539 = OpConvertFToS %int %2538
+       %2540 = OpConvertSToF %float %2539
+       %2541 = OpFSub %float %2538 %2540
+       %2542 = OpAccessChain %_ptr_Function_float %549 %2539
+       %2543 = OpLoad %float %2542
+       %2544 = OpIAdd %int %2539 %int_1
+       %2545 = OpAccessChain %_ptr_Function_float %549 %2544
+       %2546 = OpLoad %float %2545
+       %2547 = OpIAdd %int %2539 %int_2
+       %2548 = OpAccessChain %_ptr_Function_float %549 %2547
+       %2549 = OpLoad %float %2548
+       %2550 = OpCompositeConstruct %v3float %2543 %2546 %2549
+       %2551 = OpFMul %float %2541 %2541
+       %2552 = OpCompositeConstruct %v3float %2551 %2541 %float_1
+       %2553 = OpMatrixTimesVector %v3float %466 %2550
+       %2554 = OpDot %float %2552 %2553
+               OpBranch %2530
+       %2530 = OpLabel
+       %2555 = OpPhi %float %2534 %2532 %2554 %2531
+               OpBranch %2522
+       %2523 = OpLabel
+       %2556 = OpFSub %float %2512 %2364
+       %2557 = OpFMul %float %float_3 %2556
+       %2558 = OpFSub %float %2519 %2364
+       %2559 = OpFDiv %float %2557 %2558
+       %2560 = OpConvertFToS %int %2559
+       %2561 = OpConvertSToF %float %2560
+       %2562 = OpFSub %float %2559 %2561
+       %2563 = OpAccessChain %_ptr_Function_float %550 %2560
+       %2564 = OpLoad %float %2563
+       %2565 = OpIAdd %int %2560 %int_1
+       %2566 = OpAccessChain %_ptr_Function_float %550 %2565
+       %2567 = OpLoad %float %2566
+       %2568 = OpIAdd %int %2560 %int_2
+       %2569 = OpAccessChain %_ptr_Function_float %550 %2568
+       %2570 = OpLoad %float %2569
+       %2571 = OpCompositeConstruct %v3float %2564 %2567 %2570
+       %2572 = OpFMul %float %2562 %2562
+       %2573 = OpCompositeConstruct %v3float %2572 %2562 %float_1
+       %2574 = OpMatrixTimesVector %v3float %466 %2571
+       %2575 = OpDot %float %2573 %2574
+               OpBranch %2522
+       %2522 = OpLabel
+       %2576 = OpPhi %float %2555 %2530 %2575 %2523
+               OpBranch %2514
+       %2515 = OpLabel
+       %2577 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2578 = OpFDiv %float %2577 %1091
+               OpBranch %2514
+       %2514 = OpLabel
+       %2579 = OpPhi %float %2576 %2522 %2578 %2515
+       %2580 = OpExtInst %float %1 Pow %float_10 %2579
+       %2581 = OpCompositeInsert %v3float %2580 %2507 2
+       %2582 = OpVectorTimesMatrix %v3float %2581 %438
+       %2583 = OpVectorTimesMatrix %v3float %2582 %434
+       %2584 = OpExtInst %float %1 Pow %float_2 %float_n12
+       %2585 = OpFMul %float %float_0_179999992 %2584
+               OpStore %558 %499
+               OpStore %557 %500
+       %2586 = OpFOrdLessThanEqual %bool %2585 %float_0
+       %2587 = OpSelect %float %2586 %2359 %2585
+       %2588 = OpExtInst %float %1 Log %2587
+       %2589 = OpFDiv %float %2588 %1091
+       %2590 = OpFOrdLessThanEqual %bool %2589 %2364
+               OpSelectionMerge %2591 None
+               OpBranchConditional %2590 %2592 %2593
+       %2593 = OpLabel
+       %2594 = OpFOrdGreaterThan %bool %2589 %2364
+       %2595 = OpExtInst %float %1 Log %float_0_180000007
+       %2596 = OpFDiv %float %2595 %1091
+       %2597 = OpFOrdLessThan %bool %2589 %2596
+       %2598 = OpLogicalAnd %bool %2594 %2597
+               OpSelectionMerge %2599 None
+               OpBranchConditional %2598 %2600 %2601
+       %2601 = OpLabel
+       %2602 = OpFOrdGreaterThanEqual %bool %2589 %2596
+       %2603 = OpExtInst %float %1 Log %2357
+       %2604 = OpFDiv %float %2603 %1091
+       %2605 = OpFOrdLessThan %bool %2589 %2604
+       %2606 = OpLogicalAnd %bool %2602 %2605
+               OpSelectionMerge %2607 None
+               OpBranchConditional %2606 %2608 %2609
+       %2609 = OpLabel
+       %2610 = OpExtInst %float %1 Log %float_10000
+       %2611 = OpFDiv %float %2610 %1091
+               OpBranch %2607
+       %2608 = OpLabel
+       %2612 = OpFSub %float %2589 %2596
+       %2613 = OpFMul %float %float_3 %2612
+       %2614 = OpFSub %float %2604 %2596
+       %2615 = OpFDiv %float %2613 %2614
+       %2616 = OpConvertFToS %int %2615
+       %2617 = OpConvertSToF %float %2616
+       %2618 = OpFSub %float %2615 %2617
+       %2619 = OpAccessChain %_ptr_Function_float %557 %2616
+       %2620 = OpLoad %float %2619
+       %2621 = OpIAdd %int %2616 %int_1
+       %2622 = OpAccessChain %_ptr_Function_float %557 %2621
+       %2623 = OpLoad %float %2622
+       %2624 = OpIAdd %int %2616 %int_2
+       %2625 = OpAccessChain %_ptr_Function_float %557 %2624
+       %2626 = OpLoad %float %2625
+       %2627 = OpCompositeConstruct %v3float %2620 %2623 %2626
+       %2628 = OpFMul %float %2618 %2618
+       %2629 = OpCompositeConstruct %v3float %2628 %2618 %float_1
+       %2630 = OpMatrixTimesVector %v3float %466 %2627
+       %2631 = OpDot %float %2629 %2630
+               OpBranch %2607
+       %2607 = OpLabel
+       %2632 = OpPhi %float %2611 %2609 %2631 %2608
+               OpBranch %2599
+       %2600 = OpLabel
+       %2633 = OpFSub %float %2589 %2364
+       %2634 = OpFMul %float %float_3 %2633
+       %2635 = OpFSub %float %2596 %2364
+       %2636 = OpFDiv %float %2634 %2635
+       %2637 = OpConvertFToS %int %2636
+       %2638 = OpConvertSToF %float %2637
+       %2639 = OpFSub %float %2636 %2638
+       %2640 = OpAccessChain %_ptr_Function_float %558 %2637
+       %2641 = OpLoad %float %2640
+       %2642 = OpIAdd %int %2637 %int_1
+       %2643 = OpAccessChain %_ptr_Function_float %558 %2642
+       %2644 = OpLoad %float %2643
+       %2645 = OpIAdd %int %2637 %int_2
+       %2646 = OpAccessChain %_ptr_Function_float %558 %2645
+       %2647 = OpLoad %float %2646
+       %2648 = OpCompositeConstruct %v3float %2641 %2644 %2647
+       %2649 = OpFMul %float %2639 %2639
+       %2650 = OpCompositeConstruct %v3float %2649 %2639 %float_1
+       %2651 = OpMatrixTimesVector %v3float %466 %2648
+       %2652 = OpDot %float %2650 %2651
+               OpBranch %2599
+       %2599 = OpLabel
+       %2653 = OpPhi %float %2632 %2607 %2652 %2600
+               OpBranch %2591
+       %2592 = OpLabel
+       %2654 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2655 = OpFDiv %float %2654 %1091
+               OpBranch %2591
+       %2591 = OpLabel
+       %2656 = OpPhi %float %2653 %2599 %2655 %2592
+       %2657 = OpExtInst %float %1 Pow %float_10 %2656
+               OpStore %560 %499
+               OpStore %559 %500
+       %2658 = OpExtInst %float %1 Log %float_0_180000007
+       %2659 = OpFDiv %float %2658 %1091
+       %2660 = OpFOrdLessThanEqual %bool %2659 %2364
+               OpSelectionMerge %2661 None
+               OpBranchConditional %2660 %2662 %2663
+       %2663 = OpLabel
+       %2664 = OpFOrdGreaterThan %bool %2659 %2364
+       %2665 = OpFOrdLessThan %bool %2659 %2659
+       %2666 = OpLogicalAnd %bool %2664 %2665
+               OpSelectionMerge %2667 None
+               OpBranchConditional %2666 %2668 %2669
+       %2669 = OpLabel
+       %2670 = OpFOrdGreaterThanEqual %bool %2659 %2659
+       %2671 = OpExtInst %float %1 Log %2357
+       %2672 = OpFDiv %float %2671 %1091
+       %2673 = OpFOrdLessThan %bool %2659 %2672
+       %2674 = OpLogicalAnd %bool %2670 %2673
+               OpSelectionMerge %2675 None
+               OpBranchConditional %2674 %2676 %2677
+       %2677 = OpLabel
+       %2678 = OpExtInst %float %1 Log %float_10000
+       %2679 = OpFDiv %float %2678 %1091
+               OpBranch %2675
+       %2676 = OpLabel
+       %2680 = OpFSub %float %2659 %2659
+       %2681 = OpFMul %float %float_3 %2680
+       %2682 = OpFSub %float %2672 %2659
+       %2683 = OpFDiv %float %2681 %2682
+       %2684 = OpConvertFToS %int %2683
+       %2685 = OpConvertSToF %float %2684
+       %2686 = OpFSub %float %2683 %2685
+       %2687 = OpAccessChain %_ptr_Function_float %559 %2684
+       %2688 = OpLoad %float %2687
+       %2689 = OpIAdd %int %2684 %int_1
+       %2690 = OpAccessChain %_ptr_Function_float %559 %2689
+       %2691 = OpLoad %float %2690
+       %2692 = OpIAdd %int %2684 %int_2
+       %2693 = OpAccessChain %_ptr_Function_float %559 %2692
+       %2694 = OpLoad %float %2693
+       %2695 = OpCompositeConstruct %v3float %2688 %2691 %2694
+       %2696 = OpFMul %float %2686 %2686
+       %2697 = OpCompositeConstruct %v3float %2696 %2686 %float_1
+       %2698 = OpMatrixTimesVector %v3float %466 %2695
+       %2699 = OpDot %float %2697 %2698
+               OpBranch %2675
+       %2675 = OpLabel
+       %2700 = OpPhi %float %2679 %2677 %2699 %2676
+               OpBranch %2667
+       %2668 = OpLabel
+       %2701 = OpAccessChain %_ptr_Function_float %560 %int_3
+       %2702 = OpLoad %float %2701
+       %2703 = OpAccessChain %_ptr_Function_float %560 %int_4
+       %2704 = OpLoad %float %2703
+       %2705 = OpAccessChain %_ptr_Function_float %560 %int_5
+       %2706 = OpLoad %float %2705
+       %2707 = OpCompositeConstruct %v3float %2702 %2704 %2706
+       %2708 = OpMatrixTimesVector %v3float %466 %2707
+       %2709 = OpCompositeExtract %float %2708 2
+               OpBranch %2667
+       %2667 = OpLabel
+       %2710 = OpPhi %float %2700 %2675 %2709 %2668
+               OpBranch %2661
+       %2662 = OpLabel
+       %2711 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2712 = OpFDiv %float %2711 %1091
+               OpBranch %2661
+       %2661 = OpLabel
+       %2713 = OpPhi %float %2710 %2667 %2712 %2662
+       %2714 = OpExtInst %float %1 Pow %float_10 %2713
+       %2715 = OpExtInst %float %1 Pow %float_2 %float_10
+       %2716 = OpFMul %float %float_0_179999992 %2715
+               OpStore %562 %499
+               OpStore %561 %500
+       %2717 = OpFOrdLessThanEqual %bool %2716 %float_0
+       %2718 = OpSelect %float %2717 %2359 %2716
+       %2719 = OpExtInst %float %1 Log %2718
+       %2720 = OpFDiv %float %2719 %1091
+       %2721 = OpFOrdLessThanEqual %bool %2720 %2364
+               OpSelectionMerge %2722 None
+               OpBranchConditional %2721 %2723 %2724
+       %2724 = OpLabel
+       %2725 = OpFOrdGreaterThan %bool %2720 %2364
+       %2726 = OpFOrdLessThan %bool %2720 %2659
+       %2727 = OpLogicalAnd %bool %2725 %2726
+               OpSelectionMerge %2728 None
+               OpBranchConditional %2727 %2729 %2730
+       %2730 = OpLabel
+       %2731 = OpFOrdGreaterThanEqual %bool %2720 %2659
+       %2732 = OpExtInst %float %1 Log %2357
+       %2733 = OpFDiv %float %2732 %1091
+       %2734 = OpFOrdLessThan %bool %2720 %2733
+       %2735 = OpLogicalAnd %bool %2731 %2734
+               OpSelectionMerge %2736 None
+               OpBranchConditional %2735 %2737 %2738
+       %2738 = OpLabel
+       %2739 = OpExtInst %float %1 Log %float_10000
+       %2740 = OpFDiv %float %2739 %1091
+               OpBranch %2736
+       %2737 = OpLabel
+       %2741 = OpFSub %float %2720 %2659
+       %2742 = OpFMul %float %float_3 %2741
+       %2743 = OpFSub %float %2733 %2659
+       %2744 = OpFDiv %float %2742 %2743
+       %2745 = OpConvertFToS %int %2744
+       %2746 = OpConvertSToF %float %2745
+       %2747 = OpFSub %float %2744 %2746
+       %2748 = OpAccessChain %_ptr_Function_float %561 %2745
+       %2749 = OpLoad %float %2748
+       %2750 = OpIAdd %int %2745 %int_1
+       %2751 = OpAccessChain %_ptr_Function_float %561 %2750
+       %2752 = OpLoad %float %2751
+       %2753 = OpIAdd %int %2745 %int_2
+       %2754 = OpAccessChain %_ptr_Function_float %561 %2753
+       %2755 = OpLoad %float %2754
+       %2756 = OpCompositeConstruct %v3float %2749 %2752 %2755
+       %2757 = OpFMul %float %2747 %2747
+       %2758 = OpCompositeConstruct %v3float %2757 %2747 %float_1
+       %2759 = OpMatrixTimesVector %v3float %466 %2756
+       %2760 = OpDot %float %2758 %2759
+               OpBranch %2736
+       %2736 = OpLabel
+       %2761 = OpPhi %float %2740 %2738 %2760 %2737
+               OpBranch %2728
+       %2729 = OpLabel
+       %2762 = OpFSub %float %2720 %2364
+       %2763 = OpFMul %float %float_3 %2762
+       %2764 = OpFSub %float %2659 %2364
+       %2765 = OpFDiv %float %2763 %2764
+       %2766 = OpConvertFToS %int %2765
+       %2767 = OpConvertSToF %float %2766
+       %2768 = OpFSub %float %2765 %2767
+       %2769 = OpAccessChain %_ptr_Function_float %562 %2766
+       %2770 = OpLoad %float %2769
+       %2771 = OpIAdd %int %2766 %int_1
+       %2772 = OpAccessChain %_ptr_Function_float %562 %2771
+       %2773 = OpLoad %float %2772
+       %2774 = OpIAdd %int %2766 %int_2
+       %2775 = OpAccessChain %_ptr_Function_float %562 %2774
+       %2776 = OpLoad %float %2775
+       %2777 = OpCompositeConstruct %v3float %2770 %2773 %2776
+       %2778 = OpFMul %float %2768 %2768
+       %2779 = OpCompositeConstruct %v3float %2778 %2768 %float_1
+       %2780 = OpMatrixTimesVector %v3float %466 %2777
+       %2781 = OpDot %float %2779 %2780
+               OpBranch %2728
+       %2728 = OpLabel
+       %2782 = OpPhi %float %2761 %2736 %2781 %2729
+               OpBranch %2722
+       %2723 = OpLabel
+       %2783 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2784 = OpFDiv %float %2783 %1091
+               OpBranch %2722
+       %2722 = OpLabel
+       %2785 = OpPhi %float %2782 %2728 %2784 %2723
+       %2786 = OpExtInst %float %1 Pow %float_10 %2785
+       %2787 = OpCompositeExtract %float %2583 0
+               OpStore %556 %503
+               OpStore %555 %504
+       %2788 = OpFOrdLessThanEqual %bool %2787 %float_0
+       %2789 = OpSelect %float %2788 %float_9_99999975en05 %2787
+       %2790 = OpExtInst %float %1 Log %2789
+       %2791 = OpFDiv %float %2790 %1091
+       %2792 = OpExtInst %float %1 Log %2657
+       %2793 = OpFDiv %float %2792 %1091
+       %2794 = OpFOrdLessThanEqual %bool %2791 %2793
+               OpSelectionMerge %2795 None
+               OpBranchConditional %2794 %2796 %2797
+       %2797 = OpLabel
+       %2798 = OpFOrdGreaterThan %bool %2791 %2793
+       %2799 = OpExtInst %float %1 Log %2714
+       %2800 = OpFDiv %float %2799 %1091
+       %2801 = OpFOrdLessThan %bool %2791 %2800
+       %2802 = OpLogicalAnd %bool %2798 %2801
+               OpSelectionMerge %2803 None
+               OpBranchConditional %2802 %2804 %2805
+       %2805 = OpLabel
+       %2806 = OpFOrdGreaterThanEqual %bool %2791 %2800
+       %2807 = OpExtInst %float %1 Log %2786
+       %2808 = OpFDiv %float %2807 %1091
+       %2809 = OpFOrdLessThan %bool %2791 %2808
+       %2810 = OpLogicalAnd %bool %2806 %2809
+               OpSelectionMerge %2811 None
+               OpBranchConditional %2810 %2812 %2813
+       %2813 = OpLabel
+       %2814 = OpFMul %float %2791 %float_0_0599999987
+       %2815 = OpExtInst %float %1 Log %float_1000
+       %2816 = OpFDiv %float %2815 %1091
+       %2817 = OpFMul %float %float_0_0599999987 %2807
+       %2818 = OpFDiv %float %2817 %1091
+       %2819 = OpFSub %float %2816 %2818
+       %2820 = OpFAdd %float %2814 %2819
+               OpBranch %2811
+       %2812 = OpLabel
+       %2821 = OpFSub %float %2791 %2800
+       %2822 = OpFMul %float %float_7 %2821
+       %2823 = OpFSub %float %2808 %2800
+       %2824 = OpFDiv %float %2822 %2823
+       %2825 = OpConvertFToS %int %2824
+       %2826 = OpConvertSToF %float %2825
+       %2827 = OpFSub %float %2824 %2826
+       %2828 = OpAccessChain %_ptr_Function_float %555 %2825
+       %2829 = OpLoad %float %2828
+       %2830 = OpIAdd %int %2825 %int_1
+       %2831 = OpAccessChain %_ptr_Function_float %555 %2830
+       %2832 = OpLoad %float %2831
+       %2833 = OpIAdd %int %2825 %int_2
+       %2834 = OpAccessChain %_ptr_Function_float %555 %2833
+       %2835 = OpLoad %float %2834
+       %2836 = OpCompositeConstruct %v3float %2829 %2832 %2835
+       %2837 = OpFMul %float %2827 %2827
+       %2838 = OpCompositeConstruct %v3float %2837 %2827 %float_1
+       %2839 = OpMatrixTimesVector %v3float %466 %2836
+       %2840 = OpDot %float %2838 %2839
+               OpBranch %2811
+       %2811 = OpLabel
+       %2841 = OpPhi %float %2820 %2813 %2840 %2812
+               OpBranch %2803
+       %2804 = OpLabel
+       %2842 = OpFSub %float %2791 %2793
+       %2843 = OpFMul %float %float_7 %2842
+       %2844 = OpFSub %float %2800 %2793
+       %2845 = OpFDiv %float %2843 %2844
+       %2846 = OpConvertFToS %int %2845
+       %2847 = OpConvertSToF %float %2846
+       %2848 = OpFSub %float %2845 %2847
+       %2849 = OpAccessChain %_ptr_Function_float %556 %2846
+       %2850 = OpLoad %float %2849
+       %2851 = OpIAdd %int %2846 %int_1
+       %2852 = OpAccessChain %_ptr_Function_float %556 %2851
+       %2853 = OpLoad %float %2852
+       %2854 = OpIAdd %int %2846 %int_2
+       %2855 = OpAccessChain %_ptr_Function_float %556 %2854
+       %2856 = OpLoad %float %2855
+       %2857 = OpCompositeConstruct %v3float %2850 %2853 %2856
+       %2858 = OpFMul %float %2848 %2848
+       %2859 = OpCompositeConstruct %v3float %2858 %2848 %float_1
+       %2860 = OpMatrixTimesVector %v3float %466 %2857
+       %2861 = OpDot %float %2859 %2860
+               OpBranch %2803
+       %2803 = OpLabel
+       %2862 = OpPhi %float %2841 %2811 %2861 %2804
+               OpBranch %2795
+       %2796 = OpLabel
+       %2863 = OpFMul %float %2791 %float_3
+       %2864 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2865 = OpFDiv %float %2864 %1091
+       %2866 = OpFMul %float %float_3 %2792
+       %2867 = OpFDiv %float %2866 %1091
+       %2868 = OpFSub %float %2865 %2867
+       %2869 = OpFAdd %float %2863 %2868
+               OpBranch %2795
+       %2795 = OpLabel
+       %2870 = OpPhi %float %2862 %2803 %2869 %2796
+       %2871 = OpExtInst %float %1 Pow %float_10 %2870
+       %2872 = OpCompositeInsert %v3float %2871 %523 0
+       %2873 = OpCompositeExtract %float %2583 1
+               OpStore %554 %503
+               OpStore %553 %504
+       %2874 = OpFOrdLessThanEqual %bool %2873 %float_0
+       %2875 = OpSelect %float %2874 %float_9_99999975en05 %2873
+       %2876 = OpExtInst %float %1 Log %2875
+       %2877 = OpFDiv %float %2876 %1091
+       %2878 = OpFOrdLessThanEqual %bool %2877 %2793
+               OpSelectionMerge %2879 None
+               OpBranchConditional %2878 %2880 %2881
+       %2881 = OpLabel
+       %2882 = OpFOrdGreaterThan %bool %2877 %2793
+       %2883 = OpExtInst %float %1 Log %2714
+       %2884 = OpFDiv %float %2883 %1091
+       %2885 = OpFOrdLessThan %bool %2877 %2884
+       %2886 = OpLogicalAnd %bool %2882 %2885
+               OpSelectionMerge %2887 None
+               OpBranchConditional %2886 %2888 %2889
+       %2889 = OpLabel
+       %2890 = OpFOrdGreaterThanEqual %bool %2877 %2884
+       %2891 = OpExtInst %float %1 Log %2786
+       %2892 = OpFDiv %float %2891 %1091
+       %2893 = OpFOrdLessThan %bool %2877 %2892
+       %2894 = OpLogicalAnd %bool %2890 %2893
+               OpSelectionMerge %2895 None
+               OpBranchConditional %2894 %2896 %2897
+       %2897 = OpLabel
+       %2898 = OpFMul %float %2877 %float_0_0599999987
+       %2899 = OpExtInst %float %1 Log %float_1000
+       %2900 = OpFDiv %float %2899 %1091
+       %2901 = OpFMul %float %float_0_0599999987 %2891
+       %2902 = OpFDiv %float %2901 %1091
+       %2903 = OpFSub %float %2900 %2902
+       %2904 = OpFAdd %float %2898 %2903
+               OpBranch %2895
+       %2896 = OpLabel
+       %2905 = OpFSub %float %2877 %2884
+       %2906 = OpFMul %float %float_7 %2905
+       %2907 = OpFSub %float %2892 %2884
+       %2908 = OpFDiv %float %2906 %2907
+       %2909 = OpConvertFToS %int %2908
+       %2910 = OpConvertSToF %float %2909
+       %2911 = OpFSub %float %2908 %2910
+       %2912 = OpAccessChain %_ptr_Function_float %553 %2909
+       %2913 = OpLoad %float %2912
+       %2914 = OpIAdd %int %2909 %int_1
+       %2915 = OpAccessChain %_ptr_Function_float %553 %2914
+       %2916 = OpLoad %float %2915
+       %2917 = OpIAdd %int %2909 %int_2
+       %2918 = OpAccessChain %_ptr_Function_float %553 %2917
+       %2919 = OpLoad %float %2918
+       %2920 = OpCompositeConstruct %v3float %2913 %2916 %2919
+       %2921 = OpFMul %float %2911 %2911
+       %2922 = OpCompositeConstruct %v3float %2921 %2911 %float_1
+       %2923 = OpMatrixTimesVector %v3float %466 %2920
+       %2924 = OpDot %float %2922 %2923
+               OpBranch %2895
+       %2895 = OpLabel
+       %2925 = OpPhi %float %2904 %2897 %2924 %2896
+               OpBranch %2887
+       %2888 = OpLabel
+       %2926 = OpFSub %float %2877 %2793
+       %2927 = OpFMul %float %float_7 %2926
+       %2928 = OpFSub %float %2884 %2793
+       %2929 = OpFDiv %float %2927 %2928
+       %2930 = OpConvertFToS %int %2929
+       %2931 = OpConvertSToF %float %2930
+       %2932 = OpFSub %float %2929 %2931
+       %2933 = OpAccessChain %_ptr_Function_float %554 %2930
+       %2934 = OpLoad %float %2933
+       %2935 = OpIAdd %int %2930 %int_1
+       %2936 = OpAccessChain %_ptr_Function_float %554 %2935
+       %2937 = OpLoad %float %2936
+       %2938 = OpIAdd %int %2930 %int_2
+       %2939 = OpAccessChain %_ptr_Function_float %554 %2938
+       %2940 = OpLoad %float %2939
+       %2941 = OpCompositeConstruct %v3float %2934 %2937 %2940
+       %2942 = OpFMul %float %2932 %2932
+       %2943 = OpCompositeConstruct %v3float %2942 %2932 %float_1
+       %2944 = OpMatrixTimesVector %v3float %466 %2941
+       %2945 = OpDot %float %2943 %2944
+               OpBranch %2887
+       %2887 = OpLabel
+       %2946 = OpPhi %float %2925 %2895 %2945 %2888
+               OpBranch %2879
+       %2880 = OpLabel
+       %2947 = OpFMul %float %2877 %float_3
+       %2948 = OpExtInst %float %1 Log %float_9_99999975en05
+       %2949 = OpFDiv %float %2948 %1091
+       %2950 = OpFMul %float %float_3 %2792
+       %2951 = OpFDiv %float %2950 %1091
+       %2952 = OpFSub %float %2949 %2951
+       %2953 = OpFAdd %float %2947 %2952
+               OpBranch %2879
+       %2879 = OpLabel
+       %2954 = OpPhi %float %2946 %2887 %2953 %2880
+       %2955 = OpExtInst %float %1 Pow %float_10 %2954
+       %2956 = OpCompositeInsert %v3float %2955 %2872 1
+       %2957 = OpCompositeExtract %float %2583 2
+               OpStore %552 %503
+               OpStore %551 %504
+       %2958 = OpFOrdLessThanEqual %bool %2957 %float_0
+       %2959 = OpSelect %float %2958 %float_9_99999975en05 %2957
+       %2960 = OpExtInst %float %1 Log %2959
+       %2961 = OpFDiv %float %2960 %1091
+       %2962 = OpFOrdLessThanEqual %bool %2961 %2793
+               OpSelectionMerge %2963 None
+               OpBranchConditional %2962 %2964 %2965
+       %2965 = OpLabel
+       %2966 = OpFOrdGreaterThan %bool %2961 %2793
+       %2967 = OpExtInst %float %1 Log %2714
+       %2968 = OpFDiv %float %2967 %1091
+       %2969 = OpFOrdLessThan %bool %2961 %2968
+       %2970 = OpLogicalAnd %bool %2966 %2969
+               OpSelectionMerge %2971 None
+               OpBranchConditional %2970 %2972 %2973
+       %2973 = OpLabel
+       %2974 = OpFOrdGreaterThanEqual %bool %2961 %2968
+       %2975 = OpExtInst %float %1 Log %2786
+       %2976 = OpFDiv %float %2975 %1091
+       %2977 = OpFOrdLessThan %bool %2961 %2976
+       %2978 = OpLogicalAnd %bool %2974 %2977
+               OpSelectionMerge %2979 None
+               OpBranchConditional %2978 %2980 %2981
+       %2981 = OpLabel
+       %2982 = OpFMul %float %2961 %float_0_0599999987
+       %2983 = OpExtInst %float %1 Log %float_1000
+       %2984 = OpFDiv %float %2983 %1091
+       %2985 = OpFMul %float %float_0_0599999987 %2975
+       %2986 = OpFDiv %float %2985 %1091
+       %2987 = OpFSub %float %2984 %2986
+       %2988 = OpFAdd %float %2982 %2987
+               OpBranch %2979
+       %2980 = OpLabel
+       %2989 = OpFSub %float %2961 %2968
+       %2990 = OpFMul %float %float_7 %2989
+       %2991 = OpFSub %float %2976 %2968
+       %2992 = OpFDiv %float %2990 %2991
+       %2993 = OpConvertFToS %int %2992
+       %2994 = OpConvertSToF %float %2993
+       %2995 = OpFSub %float %2992 %2994
+       %2996 = OpAccessChain %_ptr_Function_float %551 %2993
+       %2997 = OpLoad %float %2996
+       %2998 = OpIAdd %int %2993 %int_1
+       %2999 = OpAccessChain %_ptr_Function_float %551 %2998
+       %3000 = OpLoad %float %2999
+       %3001 = OpIAdd %int %2993 %int_2
+       %3002 = OpAccessChain %_ptr_Function_float %551 %3001
+       %3003 = OpLoad %float %3002
+       %3004 = OpCompositeConstruct %v3float %2997 %3000 %3003
+       %3005 = OpFMul %float %2995 %2995
+       %3006 = OpCompositeConstruct %v3float %3005 %2995 %float_1
+       %3007 = OpMatrixTimesVector %v3float %466 %3004
+       %3008 = OpDot %float %3006 %3007
+               OpBranch %2979
+       %2979 = OpLabel
+       %3009 = OpPhi %float %2988 %2981 %3008 %2980
+               OpBranch %2971
+       %2972 = OpLabel
+       %3010 = OpFSub %float %2961 %2793
+       %3011 = OpFMul %float %float_7 %3010
+       %3012 = OpFSub %float %2968 %2793
+       %3013 = OpFDiv %float %3011 %3012
+       %3014 = OpConvertFToS %int %3013
+       %3015 = OpConvertSToF %float %3014
+       %3016 = OpFSub %float %3013 %3015
+       %3017 = OpAccessChain %_ptr_Function_float %552 %3014
+       %3018 = OpLoad %float %3017
+       %3019 = OpIAdd %int %3014 %int_1
+       %3020 = OpAccessChain %_ptr_Function_float %552 %3019
+       %3021 = OpLoad %float %3020
+       %3022 = OpIAdd %int %3014 %int_2
+       %3023 = OpAccessChain %_ptr_Function_float %552 %3022
+       %3024 = OpLoad %float %3023
+       %3025 = OpCompositeConstruct %v3float %3018 %3021 %3024
+       %3026 = OpFMul %float %3016 %3016
+       %3027 = OpCompositeConstruct %v3float %3026 %3016 %float_1
+       %3028 = OpMatrixTimesVector %v3float %466 %3025
+       %3029 = OpDot %float %3027 %3028
+               OpBranch %2971
+       %2971 = OpLabel
+       %3030 = OpPhi %float %3009 %2979 %3029 %2972
+               OpBranch %2963
+       %2964 = OpLabel
+       %3031 = OpFMul %float %2961 %float_3
+       %3032 = OpExtInst %float %1 Log %float_9_99999975en05
+       %3033 = OpFDiv %float %3032 %1091
+       %3034 = OpFMul %float %float_3 %2792
+       %3035 = OpFDiv %float %3034 %1091
+       %3036 = OpFSub %float %3033 %3035
+       %3037 = OpFAdd %float %3031 %3036
+               OpBranch %2963
+       %2963 = OpLabel
+       %3038 = OpPhi %float %3030 %2971 %3037 %2964
+       %3039 = OpExtInst %float %1 Pow %float_10 %3038
+       %3040 = OpCompositeInsert %v3float %3039 %2956 2
+       %3041 = OpFSub %v3float %3040 %361
+       %3042 = OpVectorTimesMatrix %v3float %3041 %602
+       %3043 = OpFMul %v3float %3042 %519
+       %3044 = OpExtInst %v3float %1 Pow %3043 %286
+       %3045 = OpFMul %v3float %196 %3044
+       %3046 = OpFAdd %v3float %195 %3045
+       %3047 = OpFMul %v3float %197 %3044
+       %3048 = OpFAdd %v3float %141 %3047
+       %3049 = OpFDiv %v3float %141 %3048
+       %3050 = OpFMul %v3float %3046 %3049
+       %3051 = OpExtInst %v3float %1 Pow %3050 %287
+               OpBranch %1336
+       %1336 = OpLabel
+       %3052 = OpPhi %v3float %2201 %1342 %3051 %2963
+               OpBranch %1330
+       %1331 = OpLabel
+       %3053 = OpVectorTimesMatrix %v3float %1324 %573
+       %3054 = OpVectorTimesMatrix %v3float %3053 %602
+       %3055 = OpExtInst %v3float %1 FMax %263 %3054
+       %3056 = OpFMul %v3float %3055 %275
+       %3057 = OpExtInst %v3float %1 FMax %3055 %277
+       %3058 = OpExtInst %v3float %1 Pow %3057 %279
+       %3059 = OpFMul %v3float %3058 %281
+       %3060 = OpFSub %v3float %3059 %283
+       %3061 = OpExtInst %v3float %1 FMin %3056 %3060
+               OpBranch %1330
+       %1330 = OpLabel
+       %3062 = OpPhi %v3float %3052 %1336 %3061 %1331
+               OpBranch %1326
+       %1327 = OpLabel
+       %3063 = OpCompositeExtract %float %1324 0
+               OpBranch %3064
+       %3064 = OpLabel
+               OpLoopMerge %3065 %3066 None
+               OpBranch %3067
+       %3067 = OpLabel
+       %3068 = OpFOrdLessThan %bool %3063 %float_0_00313066994
+               OpSelectionMerge %3069 None
+               OpBranchConditional %3068 %3070 %3069
+       %3070 = OpLabel
+       %3071 = OpFMul %float %3063 %float_12_9200001
+               OpBranch %3065
+       %3069 = OpLabel
+       %3072 = OpExtInst %float %1 Pow %3063 %float_0_416666657
+       %3073 = OpFMul %float %3072 %float_1_05499995
+       %3074 = OpFSub %float %3073 %float_0_0549999997
+               OpBranch %3065
+       %3066 = OpLabel
+               OpBranch %3064
+       %3065 = OpLabel
+       %3075 = OpPhi %float %3071 %3070 %3074 %3069
+       %3076 = OpCompositeExtract %float %1324 1
+               OpBranch %3077
+       %3077 = OpLabel
+               OpLoopMerge %3078 %3079 None
+               OpBranch %3080
+       %3080 = OpLabel
+       %3081 = OpFOrdLessThan %bool %3076 %float_0_00313066994
+               OpSelectionMerge %3082 None
+               OpBranchConditional %3081 %3083 %3082
+       %3083 = OpLabel
+       %3084 = OpFMul %float %3076 %float_12_9200001
+               OpBranch %3078
+       %3082 = OpLabel
+       %3085 = OpExtInst %float %1 Pow %3076 %float_0_416666657
+       %3086 = OpFMul %float %3085 %float_1_05499995
+       %3087 = OpFSub %float %3086 %float_0_0549999997
+               OpBranch %3078
+       %3079 = OpLabel
+               OpBranch %3077
+       %3078 = OpLabel
+       %3088 = OpPhi %float %3084 %3083 %3087 %3082
+       %3089 = OpCompositeExtract %float %1324 2
+               OpBranch %3090
+       %3090 = OpLabel
+               OpLoopMerge %3091 %3092 None
+               OpBranch %3093
+       %3093 = OpLabel
+       %3094 = OpFOrdLessThan %bool %3089 %float_0_00313066994
+               OpSelectionMerge %3095 None
+               OpBranchConditional %3094 %3096 %3095
+       %3096 = OpLabel
+       %3097 = OpFMul %float %3089 %float_12_9200001
+               OpBranch %3091
+       %3095 = OpLabel
+       %3098 = OpExtInst %float %1 Pow %3089 %float_0_416666657
+       %3099 = OpFMul %float %3098 %float_1_05499995
+       %3100 = OpFSub %float %3099 %float_0_0549999997
+               OpBranch %3091
+       %3092 = OpLabel
+               OpBranch %3090
+       %3091 = OpLabel
+       %3101 = OpPhi %float %3097 %3096 %3100 %3095
+       %3102 = OpCompositeConstruct %v3float %3075 %3088 %3101
+               OpBranch %1326
+       %1326 = OpLabel
+       %3103 = OpPhi %v3float %3062 %1330 %3102 %3091
+       %3104 = OpFMul %v3float %3103 %522
+       %3105 = OpVectorShuffle %v4float %135 %3104 4 5 6 3
+       %3106 = OpCompositeInsert %v4float %float_0 %3105 3
+               OpStore %out_var_SV_Target0 %3106
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag b/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag
new file mode 100644
index 00000000000..097eb6354d5
--- /dev/null
+++ b/shaders-ue4/asm/frag/sample-mask-not-array.asm.frag
@@ -0,0 +1,1230 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 271
+; Schema: 0
+               OpCapability Shader
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %MainPS "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_PRIMITIVE_ID %in_var_TEXCOORD7 %gl_FragCoord %gl_FrontFacing %gl_SampleMask %out_var_SV_Target0 %gl_SampleMask_0
+               OpExecutionMode %MainPS OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_TranslatedWorldToView"
+               OpMemberName %type_View 3 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 4 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 6 "View_ViewToClip"
+               OpMemberName %type_View 7 "View_ViewToClipNoAA"
+               OpMemberName %type_View 8 "View_ClipToView"
+               OpMemberName %type_View 9 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 11 "View_ScreenToWorld"
+               OpMemberName %type_View 12 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 13 "View_ViewForward"
+               OpMemberName %type_View 14 "PrePadding_View_844"
+               OpMemberName %type_View 15 "View_ViewUp"
+               OpMemberName %type_View 16 "PrePadding_View_860"
+               OpMemberName %type_View 17 "View_ViewRight"
+               OpMemberName %type_View 18 "PrePadding_View_876"
+               OpMemberName %type_View 19 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 20 "PrePadding_View_892"
+               OpMemberName %type_View 21 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 22 "PrePadding_View_908"
+               OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 24 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 25 "View_WorldCameraOrigin"
+               OpMemberName %type_View 26 "PrePadding_View_956"
+               OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 28 "PrePadding_View_972"
+               OpMemberName %type_View 29 "View_WorldViewOrigin"
+               OpMemberName %type_View 30 "PrePadding_View_988"
+               OpMemberName %type_View 31 "View_PreViewTranslation"
+               OpMemberName %type_View 32 "PrePadding_View_1004"
+               OpMemberName %type_View 33 "View_PrevProjection"
+               OpMemberName %type_View 34 "View_PrevViewProj"
+               OpMemberName %type_View 35 "View_PrevViewRotationProj"
+               OpMemberName %type_View 36 "View_PrevViewToClip"
+               OpMemberName %type_View 37 "View_PrevClipToView"
+               OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 43 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 44 "PrePadding_View_1660"
+               OpMemberName %type_View 45 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 46 "PrePadding_View_1676"
+               OpMemberName %type_View 47 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 48 "PrePadding_View_1692"
+               OpMemberName %type_View 49 "View_PrevInvViewProj"
+               OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 51 "View_ClipToPrevClip"
+               OpMemberName %type_View 52 "View_TemporalAAJitter"
+               OpMemberName %type_View 53 "View_GlobalClippingPlane"
+               OpMemberName %type_View 54 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_ViewRectMin"
+               OpMemberName %type_View 57 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 58 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 60 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 61 "View_PreExposure"
+               OpMemberName %type_View 62 "View_OneOverPreExposure"
+               OpMemberName %type_View 63 "PrePadding_View_2012"
+               OpMemberName %type_View 64 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 65 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 66 "View_NormalOverrideParameter"
+               OpMemberName %type_View 67 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 68 "View_PrevFrameGameTime"
+               OpMemberName %type_View 69 "View_PrevFrameRealTime"
+               OpMemberName %type_View 70 "View_OutOfBoundsMask"
+               OpMemberName %type_View 71 "PrePadding_View_2084"
+               OpMemberName %type_View 72 "PrePadding_View_2088"
+               OpMemberName %type_View 73 "PrePadding_View_2092"
+               OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 75 "View_CullingSign"
+               OpMemberName %type_View 76 "View_NearPlane"
+               OpMemberName %type_View 77 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 78 "View_GameTime"
+               OpMemberName %type_View 79 "View_RealTime"
+               OpMemberName %type_View 80 "View_DeltaTime"
+               OpMemberName %type_View 81 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 83 "View_Random"
+               OpMemberName %type_View 84 "View_FrameNumber"
+               OpMemberName %type_View 85 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 86 "View_StateFrameIndex"
+               OpMemberName %type_View 87 "View_CameraCut"
+               OpMemberName %type_View 88 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 89 "PrePadding_View_2164"
+               OpMemberName %type_View 90 "PrePadding_View_2168"
+               OpMemberName %type_View 91 "PrePadding_View_2172"
+               OpMemberName %type_View 92 "View_DirectionalLightColor"
+               OpMemberName %type_View 93 "View_DirectionalLightDirection"
+               OpMemberName %type_View 94 "PrePadding_View_2204"
+               OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 97 "View_TemporalAAParams"
+               OpMemberName %type_View 98 "View_CircleDOFParams"
+               OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 101 "View_DepthOfFieldScale"
+               OpMemberName %type_View 102 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 108 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 109 "View_DemosaicVposOffset"
+               OpMemberName %type_View 110 "PrePadding_View_2348"
+               OpMemberName %type_View 111 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 112 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 113 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 115 "View_AtmosphericFogPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 122 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 125 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 127 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 130 "View_AmbientCubemapTint"
+               OpMemberName %type_View 131 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 132 "View_SkyLightParameters"
+               OpMemberName %type_View 133 "PrePadding_View_2488"
+               OpMemberName %type_View 134 "PrePadding_View_2492"
+               OpMemberName %type_View 135 "View_SkyLightColor"
+               OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 137 "View_MobilePreviewMode"
+               OpMemberName %type_View 138 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 140 "View_ShowDecalsMask"
+               OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 143 "PrePadding_View_2648"
+               OpMemberName %type_View 144 "PrePadding_View_2652"
+               OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 146 "View_StereoPassIndex"
+               OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 149 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 150 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 151 "View_MaxGlobalDistance"
+               OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 153 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 154 "PrePadding_View_2828"
+               OpMemberName %type_View 155 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 156 "PrePadding_View_2844"
+               OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 158 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 159 "PrePadding_View_2860"
+               OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 161 "PrePadding_View_2876"
+               OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 163 "PrePadding_View_2892"
+               OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 167 "View_StereoIPD"
+               OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle"
+               OpName %View "View"
+               OpName %type_3d_image "type.3d.image"
+               OpName %type_sampler "type.sampler"
+               OpName %View_SharedBilinearClampedSampler "View_SharedBilinearClampedSampler"
+               OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float"
+               OpName %View_PrimitiveSceneData "View_PrimitiveSceneData"
+               OpName %type_TranslucentBasePass "type.TranslucentBasePass"
+               OpMemberName %type_TranslucentBasePass 0 "TranslucentBasePass_Shared_Forward_NumLocalLights"
+               OpMemberName %type_TranslucentBasePass 1 "TranslucentBasePass_Shared_Forward_NumReflectionCaptures"
+               OpMemberName %type_TranslucentBasePass 2 "TranslucentBasePass_Shared_Forward_HasDirectionalLight"
+               OpMemberName %type_TranslucentBasePass 3 "TranslucentBasePass_Shared_Forward_NumGridCells"
+               OpMemberName %type_TranslucentBasePass 4 "TranslucentBasePass_Shared_Forward_CulledGridSize"
+               OpMemberName %type_TranslucentBasePass 5 "TranslucentBasePass_Shared_Forward_MaxCulledLightsPerCell"
+               OpMemberName %type_TranslucentBasePass 6 "TranslucentBasePass_Shared_Forward_LightGridPixelSizeShift"
+               OpMemberName %type_TranslucentBasePass 7 "PrePadding_TranslucentBasePass_Shared_Forward_36"
+               OpMemberName %type_TranslucentBasePass 8 "PrePadding_TranslucentBasePass_Shared_Forward_40"
+               OpMemberName %type_TranslucentBasePass 9 "PrePadding_TranslucentBasePass_Shared_Forward_44"
+               OpMemberName %type_TranslucentBasePass 10 "TranslucentBasePass_Shared_Forward_LightGridZParams"
+               OpMemberName %type_TranslucentBasePass 11 "PrePadding_TranslucentBasePass_Shared_Forward_60"
+               OpMemberName %type_TranslucentBasePass 12 "TranslucentBasePass_Shared_Forward_DirectionalLightDirection"
+               OpMemberName %type_TranslucentBasePass 13 "PrePadding_TranslucentBasePass_Shared_Forward_76"
+               OpMemberName %type_TranslucentBasePass 14 "TranslucentBasePass_Shared_Forward_DirectionalLightColor"
+               OpMemberName %type_TranslucentBasePass 15 "TranslucentBasePass_Shared_Forward_DirectionalLightVolumetricScatteringIntensity"
+               OpMemberName %type_TranslucentBasePass 16 "TranslucentBasePass_Shared_Forward_DirectionalLightShadowMapChannelMask"
+               OpMemberName %type_TranslucentBasePass 17 "PrePadding_TranslucentBasePass_Shared_Forward_100"
+               OpMemberName %type_TranslucentBasePass 18 "TranslucentBasePass_Shared_Forward_DirectionalLightDistanceFadeMAD"
+               OpMemberName %type_TranslucentBasePass 19 "TranslucentBasePass_Shared_Forward_NumDirectionalLightCascades"
+               OpMemberName %type_TranslucentBasePass 20 "PrePadding_TranslucentBasePass_Shared_Forward_116"
+               OpMemberName %type_TranslucentBasePass 21 "PrePadding_TranslucentBasePass_Shared_Forward_120"
+               OpMemberName %type_TranslucentBasePass 22 "PrePadding_TranslucentBasePass_Shared_Forward_124"
+               OpMemberName %type_TranslucentBasePass 23 "TranslucentBasePass_Shared_Forward_CascadeEndDepths"
+               OpMemberName %type_TranslucentBasePass 24 "TranslucentBasePass_Shared_Forward_DirectionalLightWorldToShadowMatrix"
+               OpMemberName %type_TranslucentBasePass 25 "TranslucentBasePass_Shared_Forward_DirectionalLightShadowmapMinMax"
+               OpMemberName %type_TranslucentBasePass 26 "TranslucentBasePass_Shared_Forward_DirectionalLightShadowmapAtlasBufferSize"
+               OpMemberName %type_TranslucentBasePass 27 "TranslucentBasePass_Shared_Forward_DirectionalLightDepthBias"
+               OpMemberName %type_TranslucentBasePass 28 "TranslucentBasePass_Shared_Forward_DirectionalLightUseStaticShadowing"
+               OpMemberName %type_TranslucentBasePass 29 "PrePadding_TranslucentBasePass_Shared_Forward_488"
+               OpMemberName %type_TranslucentBasePass 30 "PrePadding_TranslucentBasePass_Shared_Forward_492"
+               OpMemberName %type_TranslucentBasePass 31 "TranslucentBasePass_Shared_Forward_DirectionalLightStaticShadowBufferSize"
+               OpMemberName %type_TranslucentBasePass 32 "TranslucentBasePass_Shared_Forward_DirectionalLightWorldToStaticShadow"
+               OpMemberName %type_TranslucentBasePass 33 "PrePadding_TranslucentBasePass_Shared_ForwardISR_576"
+               OpMemberName %type_TranslucentBasePass 34 "PrePadding_TranslucentBasePass_Shared_ForwardISR_580"
+               OpMemberName %type_TranslucentBasePass 35 "PrePadding_TranslucentBasePass_Shared_ForwardISR_584"
+               OpMemberName %type_TranslucentBasePass 36 "PrePadding_TranslucentBasePass_Shared_ForwardISR_588"
+               OpMemberName %type_TranslucentBasePass 37 "PrePadding_TranslucentBasePass_Shared_ForwardISR_592"
+               OpMemberName %type_TranslucentBasePass 38 "PrePadding_TranslucentBasePass_Shared_ForwardISR_596"
+               OpMemberName %type_TranslucentBasePass 39 "PrePadding_TranslucentBasePass_Shared_ForwardISR_600"
+               OpMemberName %type_TranslucentBasePass 40 "PrePadding_TranslucentBasePass_Shared_ForwardISR_604"
+               OpMemberName %type_TranslucentBasePass 41 "PrePadding_TranslucentBasePass_Shared_ForwardISR_608"
+               OpMemberName %type_TranslucentBasePass 42 "PrePadding_TranslucentBasePass_Shared_ForwardISR_612"
+               OpMemberName %type_TranslucentBasePass 43 "PrePadding_TranslucentBasePass_Shared_ForwardISR_616"
+               OpMemberName %type_TranslucentBasePass 44 "PrePadding_TranslucentBasePass_Shared_ForwardISR_620"
+               OpMemberName %type_TranslucentBasePass 45 "PrePadding_TranslucentBasePass_Shared_ForwardISR_624"
+               OpMemberName %type_TranslucentBasePass 46 "PrePadding_TranslucentBasePass_Shared_ForwardISR_628"
+               OpMemberName %type_TranslucentBasePass 47 "PrePadding_TranslucentBasePass_Shared_ForwardISR_632"
+               OpMemberName %type_TranslucentBasePass 48 "PrePadding_TranslucentBasePass_Shared_ForwardISR_636"
+               OpMemberName %type_TranslucentBasePass 49 "TranslucentBasePass_Shared_ForwardISR_NumLocalLights"
+               OpMemberName %type_TranslucentBasePass 50 "TranslucentBasePass_Shared_ForwardISR_NumReflectionCaptures"
+               OpMemberName %type_TranslucentBasePass 51 "TranslucentBasePass_Shared_ForwardISR_HasDirectionalLight"
+               OpMemberName %type_TranslucentBasePass 52 "TranslucentBasePass_Shared_ForwardISR_NumGridCells"
+               OpMemberName %type_TranslucentBasePass 53 "TranslucentBasePass_Shared_ForwardISR_CulledGridSize"
+               OpMemberName %type_TranslucentBasePass 54 "TranslucentBasePass_Shared_ForwardISR_MaxCulledLightsPerCell"
+               OpMemberName %type_TranslucentBasePass 55 "TranslucentBasePass_Shared_ForwardISR_LightGridPixelSizeShift"
+               OpMemberName %type_TranslucentBasePass 56 "PrePadding_TranslucentBasePass_Shared_ForwardISR_676"
+               OpMemberName %type_TranslucentBasePass 57 "PrePadding_TranslucentBasePass_Shared_ForwardISR_680"
+               OpMemberName %type_TranslucentBasePass 58 "PrePadding_TranslucentBasePass_Shared_ForwardISR_684"
+               OpMemberName %type_TranslucentBasePass 59 "TranslucentBasePass_Shared_ForwardISR_LightGridZParams"
+               OpMemberName %type_TranslucentBasePass 60 "PrePadding_TranslucentBasePass_Shared_ForwardISR_700"
+               OpMemberName %type_TranslucentBasePass 61 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightDirection"
+               OpMemberName %type_TranslucentBasePass 62 "PrePadding_TranslucentBasePass_Shared_ForwardISR_716"
+               OpMemberName %type_TranslucentBasePass 63 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightColor"
+               OpMemberName %type_TranslucentBasePass 64 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightVolumetricScatteringIntensity"
+               OpMemberName %type_TranslucentBasePass 65 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowMapChannelMask"
+               OpMemberName %type_TranslucentBasePass 66 "PrePadding_TranslucentBasePass_Shared_ForwardISR_740"
+               OpMemberName %type_TranslucentBasePass 67 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightDistanceFadeMAD"
+               OpMemberName %type_TranslucentBasePass 68 "TranslucentBasePass_Shared_ForwardISR_NumDirectionalLightCascades"
+               OpMemberName %type_TranslucentBasePass 69 "PrePadding_TranslucentBasePass_Shared_ForwardISR_756"
+               OpMemberName %type_TranslucentBasePass 70 "PrePadding_TranslucentBasePass_Shared_ForwardISR_760"
+               OpMemberName %type_TranslucentBasePass 71 "PrePadding_TranslucentBasePass_Shared_ForwardISR_764"
+               OpMemberName %type_TranslucentBasePass 72 "TranslucentBasePass_Shared_ForwardISR_CascadeEndDepths"
+               OpMemberName %type_TranslucentBasePass 73 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightWorldToShadowMatrix"
+               OpMemberName %type_TranslucentBasePass 74 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowmapMinMax"
+               OpMemberName %type_TranslucentBasePass 75 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightShadowmapAtlasBufferSize"
+               OpMemberName %type_TranslucentBasePass 76 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightDepthBias"
+               OpMemberName %type_TranslucentBasePass 77 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightUseStaticShadowing"
+               OpMemberName %type_TranslucentBasePass 78 "PrePadding_TranslucentBasePass_Shared_ForwardISR_1128"
+               OpMemberName %type_TranslucentBasePass 79 "PrePadding_TranslucentBasePass_Shared_ForwardISR_1132"
+               OpMemberName %type_TranslucentBasePass 80 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightStaticShadowBufferSize"
+               OpMemberName %type_TranslucentBasePass 81 "TranslucentBasePass_Shared_ForwardISR_DirectionalLightWorldToStaticShadow"
+               OpMemberName %type_TranslucentBasePass 82 "PrePadding_TranslucentBasePass_Shared_Reflection_1216"
+               OpMemberName %type_TranslucentBasePass 83 "PrePadding_TranslucentBasePass_Shared_Reflection_1220"
+               OpMemberName %type_TranslucentBasePass 84 "PrePadding_TranslucentBasePass_Shared_Reflection_1224"
+               OpMemberName %type_TranslucentBasePass 85 "PrePadding_TranslucentBasePass_Shared_Reflection_1228"
+               OpMemberName %type_TranslucentBasePass 86 "PrePadding_TranslucentBasePass_Shared_Reflection_1232"
+               OpMemberName %type_TranslucentBasePass 87 "PrePadding_TranslucentBasePass_Shared_Reflection_1236"
+               OpMemberName %type_TranslucentBasePass 88 "PrePadding_TranslucentBasePass_Shared_Reflection_1240"
+               OpMemberName %type_TranslucentBasePass 89 "PrePadding_TranslucentBasePass_Shared_Reflection_1244"
+               OpMemberName %type_TranslucentBasePass 90 "PrePadding_TranslucentBasePass_Shared_Reflection_1248"
+               OpMemberName %type_TranslucentBasePass 91 "PrePadding_TranslucentBasePass_Shared_Reflection_1252"
+               OpMemberName %type_TranslucentBasePass 92 "PrePadding_TranslucentBasePass_Shared_Reflection_1256"
+               OpMemberName %type_TranslucentBasePass 93 "PrePadding_TranslucentBasePass_Shared_Reflection_1260"
+               OpMemberName %type_TranslucentBasePass 94 "PrePadding_TranslucentBasePass_Shared_Reflection_1264"
+               OpMemberName %type_TranslucentBasePass 95 "PrePadding_TranslucentBasePass_Shared_Reflection_1268"
+               OpMemberName %type_TranslucentBasePass 96 "PrePadding_TranslucentBasePass_Shared_Reflection_1272"
+               OpMemberName %type_TranslucentBasePass 97 "PrePadding_TranslucentBasePass_Shared_Reflection_1276"
+               OpMemberName %type_TranslucentBasePass 98 "TranslucentBasePass_Shared_Reflection_SkyLightParameters"
+               OpMemberName %type_TranslucentBasePass 99 "TranslucentBasePass_Shared_Reflection_SkyLightCubemapBrightness"
+               OpMemberName %type_TranslucentBasePass 100 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1300"
+               OpMemberName %type_TranslucentBasePass 101 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1304"
+               OpMemberName %type_TranslucentBasePass 102 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1308"
+               OpMemberName %type_TranslucentBasePass 103 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1312"
+               OpMemberName %type_TranslucentBasePass 104 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1316"
+               OpMemberName %type_TranslucentBasePass 105 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1320"
+               OpMemberName %type_TranslucentBasePass 106 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1324"
+               OpMemberName %type_TranslucentBasePass 107 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1328"
+               OpMemberName %type_TranslucentBasePass 108 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1332"
+               OpMemberName %type_TranslucentBasePass 109 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1336"
+               OpMemberName %type_TranslucentBasePass 110 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1340"
+               OpMemberName %type_TranslucentBasePass 111 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1344"
+               OpMemberName %type_TranslucentBasePass 112 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1348"
+               OpMemberName %type_TranslucentBasePass 113 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1352"
+               OpMemberName %type_TranslucentBasePass 114 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1356"
+               OpMemberName %type_TranslucentBasePass 115 "TranslucentBasePass_Shared_PlanarReflection_ReflectionPlane"
+               OpMemberName %type_TranslucentBasePass 116 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionOrigin"
+               OpMemberName %type_TranslucentBasePass 117 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionXAxis"
+               OpMemberName %type_TranslucentBasePass 118 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionYAxis"
+               OpMemberName %type_TranslucentBasePass 119 "TranslucentBasePass_Shared_PlanarReflection_InverseTransposeMirrorMatrix"
+               OpMemberName %type_TranslucentBasePass 120 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionParameters"
+               OpMemberName %type_TranslucentBasePass 121 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1484"
+               OpMemberName %type_TranslucentBasePass 122 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionParameters2"
+               OpMemberName %type_TranslucentBasePass 123 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1496"
+               OpMemberName %type_TranslucentBasePass 124 "PrePadding_TranslucentBasePass_Shared_PlanarReflection_1500"
+               OpMemberName %type_TranslucentBasePass 125 "TranslucentBasePass_Shared_PlanarReflection_ProjectionWithExtraFOV"
+               OpMemberName %type_TranslucentBasePass 126 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionScreenScaleBias"
+               OpMemberName %type_TranslucentBasePass 127 "TranslucentBasePass_Shared_PlanarReflection_PlanarReflectionScreenBound"
+               OpMemberName %type_TranslucentBasePass 128 "TranslucentBasePass_Shared_PlanarReflection_bIsStereo"
+               OpMemberName %type_TranslucentBasePass 129 "PrePadding_TranslucentBasePass_Shared_Fog_1676"
+               OpMemberName %type_TranslucentBasePass 130 "PrePadding_TranslucentBasePass_Shared_Fog_1680"
+               OpMemberName %type_TranslucentBasePass 131 "PrePadding_TranslucentBasePass_Shared_Fog_1684"
+               OpMemberName %type_TranslucentBasePass 132 "PrePadding_TranslucentBasePass_Shared_Fog_1688"
+               OpMemberName %type_TranslucentBasePass 133 "PrePadding_TranslucentBasePass_Shared_Fog_1692"
+               OpMemberName %type_TranslucentBasePass 134 "TranslucentBasePass_Shared_Fog_ExponentialFogParameters"
+               OpMemberName %type_TranslucentBasePass 135 "TranslucentBasePass_Shared_Fog_ExponentialFogParameters2"
+               OpMemberName %type_TranslucentBasePass 136 "TranslucentBasePass_Shared_Fog_ExponentialFogColorParameter"
+               OpMemberName %type_TranslucentBasePass 137 "TranslucentBasePass_Shared_Fog_ExponentialFogParameters3"
+               OpMemberName %type_TranslucentBasePass 138 "TranslucentBasePass_Shared_Fog_InscatteringLightDirection"
+               OpMemberName %type_TranslucentBasePass 139 "TranslucentBasePass_Shared_Fog_DirectionalInscatteringColor"
+               OpMemberName %type_TranslucentBasePass 140 "TranslucentBasePass_Shared_Fog_SinCosInscatteringColorCubemapRotation"
+               OpMemberName %type_TranslucentBasePass 141 "PrePadding_TranslucentBasePass_Shared_Fog_1800"
+               OpMemberName %type_TranslucentBasePass 142 "PrePadding_TranslucentBasePass_Shared_Fog_1804"
+               OpMemberName %type_TranslucentBasePass 143 "TranslucentBasePass_Shared_Fog_FogInscatteringTextureParameters"
+               OpMemberName %type_TranslucentBasePass 144 "TranslucentBasePass_Shared_Fog_ApplyVolumetricFog"
+               OpMemberName %type_TranslucentBasePass 145 "PrePadding_TranslucentBasePass_1824"
+               OpMemberName %type_TranslucentBasePass 146 "PrePadding_TranslucentBasePass_1828"
+               OpMemberName %type_TranslucentBasePass 147 "PrePadding_TranslucentBasePass_1832"
+               OpMemberName %type_TranslucentBasePass 148 "PrePadding_TranslucentBasePass_1836"
+               OpMemberName %type_TranslucentBasePass 149 "PrePadding_TranslucentBasePass_1840"
+               OpMemberName %type_TranslucentBasePass 150 "PrePadding_TranslucentBasePass_1844"
+               OpMemberName %type_TranslucentBasePass 151 "PrePadding_TranslucentBasePass_1848"
+               OpMemberName %type_TranslucentBasePass 152 "PrePadding_TranslucentBasePass_1852"
+               OpMemberName %type_TranslucentBasePass 153 "PrePadding_TranslucentBasePass_1856"
+               OpMemberName %type_TranslucentBasePass 154 "PrePadding_TranslucentBasePass_1860"
+               OpMemberName %type_TranslucentBasePass 155 "PrePadding_TranslucentBasePass_1864"
+               OpMemberName %type_TranslucentBasePass 156 "PrePadding_TranslucentBasePass_1868"
+               OpMemberName %type_TranslucentBasePass 157 "PrePadding_TranslucentBasePass_1872"
+               OpMemberName %type_TranslucentBasePass 158 "PrePadding_TranslucentBasePass_1876"
+               OpMemberName %type_TranslucentBasePass 159 "PrePadding_TranslucentBasePass_1880"
+               OpMemberName %type_TranslucentBasePass 160 "PrePadding_TranslucentBasePass_1884"
+               OpMemberName %type_TranslucentBasePass 161 "PrePadding_TranslucentBasePass_1888"
+               OpMemberName %type_TranslucentBasePass 162 "PrePadding_TranslucentBasePass_1892"
+               OpMemberName %type_TranslucentBasePass 163 "PrePadding_TranslucentBasePass_1896"
+               OpMemberName %type_TranslucentBasePass 164 "PrePadding_TranslucentBasePass_1900"
+               OpMemberName %type_TranslucentBasePass 165 "PrePadding_TranslucentBasePass_1904"
+               OpMemberName %type_TranslucentBasePass 166 "PrePadding_TranslucentBasePass_1908"
+               OpMemberName %type_TranslucentBasePass 167 "PrePadding_TranslucentBasePass_1912"
+               OpMemberName %type_TranslucentBasePass 168 "PrePadding_TranslucentBasePass_1916"
+               OpMemberName %type_TranslucentBasePass 169 "PrePadding_TranslucentBasePass_1920"
+               OpMemberName %type_TranslucentBasePass 170 "PrePadding_TranslucentBasePass_1924"
+               OpMemberName %type_TranslucentBasePass 171 "PrePadding_TranslucentBasePass_1928"
+               OpMemberName %type_TranslucentBasePass 172 "PrePadding_TranslucentBasePass_1932"
+               OpMemberName %type_TranslucentBasePass 173 "PrePadding_TranslucentBasePass_1936"
+               OpMemberName %type_TranslucentBasePass 174 "PrePadding_TranslucentBasePass_1940"
+               OpMemberName %type_TranslucentBasePass 175 "PrePadding_TranslucentBasePass_1944"
+               OpMemberName %type_TranslucentBasePass 176 "PrePadding_TranslucentBasePass_1948"
+               OpMemberName %type_TranslucentBasePass 177 "PrePadding_TranslucentBasePass_1952"
+               OpMemberName %type_TranslucentBasePass 178 "PrePadding_TranslucentBasePass_1956"
+               OpMemberName %type_TranslucentBasePass 179 "PrePadding_TranslucentBasePass_1960"
+               OpMemberName %type_TranslucentBasePass 180 "PrePadding_TranslucentBasePass_1964"
+               OpMemberName %type_TranslucentBasePass 181 "PrePadding_TranslucentBasePass_1968"
+               OpMemberName %type_TranslucentBasePass 182 "PrePadding_TranslucentBasePass_1972"
+               OpMemberName %type_TranslucentBasePass 183 "PrePadding_TranslucentBasePass_1976"
+               OpMemberName %type_TranslucentBasePass 184 "PrePadding_TranslucentBasePass_1980"
+               OpMemberName %type_TranslucentBasePass 185 "PrePadding_TranslucentBasePass_1984"
+               OpMemberName %type_TranslucentBasePass 186 "PrePadding_TranslucentBasePass_1988"
+               OpMemberName %type_TranslucentBasePass 187 "PrePadding_TranslucentBasePass_1992"
+               OpMemberName %type_TranslucentBasePass 188 "PrePadding_TranslucentBasePass_1996"
+               OpMemberName %type_TranslucentBasePass 189 "PrePadding_TranslucentBasePass_2000"
+               OpMemberName %type_TranslucentBasePass 190 "PrePadding_TranslucentBasePass_2004"
+               OpMemberName %type_TranslucentBasePass 191 "PrePadding_TranslucentBasePass_2008"
+               OpMemberName %type_TranslucentBasePass 192 "PrePadding_TranslucentBasePass_2012"
+               OpMemberName %type_TranslucentBasePass 193 "PrePadding_TranslucentBasePass_2016"
+               OpMemberName %type_TranslucentBasePass 194 "PrePadding_TranslucentBasePass_2020"
+               OpMemberName %type_TranslucentBasePass 195 "PrePadding_TranslucentBasePass_2024"
+               OpMemberName %type_TranslucentBasePass 196 "PrePadding_TranslucentBasePass_2028"
+               OpMemberName %type_TranslucentBasePass 197 "PrePadding_TranslucentBasePass_2032"
+               OpMemberName %type_TranslucentBasePass 198 "PrePadding_TranslucentBasePass_2036"
+               OpMemberName %type_TranslucentBasePass 199 "PrePadding_TranslucentBasePass_2040"
+               OpMemberName %type_TranslucentBasePass 200 "PrePadding_TranslucentBasePass_2044"
+               OpMemberName %type_TranslucentBasePass 201 "PrePadding_TranslucentBasePass_2048"
+               OpMemberName %type_TranslucentBasePass 202 "PrePadding_TranslucentBasePass_2052"
+               OpMemberName %type_TranslucentBasePass 203 "PrePadding_TranslucentBasePass_2056"
+               OpMemberName %type_TranslucentBasePass 204 "PrePadding_TranslucentBasePass_2060"
+               OpMemberName %type_TranslucentBasePass 205 "PrePadding_TranslucentBasePass_2064"
+               OpMemberName %type_TranslucentBasePass 206 "PrePadding_TranslucentBasePass_2068"
+               OpMemberName %type_TranslucentBasePass 207 "PrePadding_TranslucentBasePass_2072"
+               OpMemberName %type_TranslucentBasePass 208 "PrePadding_TranslucentBasePass_2076"
+               OpMemberName %type_TranslucentBasePass 209 "PrePadding_TranslucentBasePass_2080"
+               OpMemberName %type_TranslucentBasePass 210 "PrePadding_TranslucentBasePass_2084"
+               OpMemberName %type_TranslucentBasePass 211 "PrePadding_TranslucentBasePass_2088"
+               OpMemberName %type_TranslucentBasePass 212 "PrePadding_TranslucentBasePass_2092"
+               OpMemberName %type_TranslucentBasePass 213 "PrePadding_TranslucentBasePass_2096"
+               OpMemberName %type_TranslucentBasePass 214 "PrePadding_TranslucentBasePass_2100"
+               OpMemberName %type_TranslucentBasePass 215 "PrePadding_TranslucentBasePass_2104"
+               OpMemberName %type_TranslucentBasePass 216 "PrePadding_TranslucentBasePass_2108"
+               OpMemberName %type_TranslucentBasePass 217 "PrePadding_TranslucentBasePass_2112"
+               OpMemberName %type_TranslucentBasePass 218 "PrePadding_TranslucentBasePass_2116"
+               OpMemberName %type_TranslucentBasePass 219 "PrePadding_TranslucentBasePass_2120"
+               OpMemberName %type_TranslucentBasePass 220 "PrePadding_TranslucentBasePass_2124"
+               OpMemberName %type_TranslucentBasePass 221 "PrePadding_TranslucentBasePass_2128"
+               OpMemberName %type_TranslucentBasePass 222 "PrePadding_TranslucentBasePass_2132"
+               OpMemberName %type_TranslucentBasePass 223 "PrePadding_TranslucentBasePass_2136"
+               OpMemberName %type_TranslucentBasePass 224 "PrePadding_TranslucentBasePass_2140"
+               OpMemberName %type_TranslucentBasePass 225 "TranslucentBasePass_HZBUvFactorAndInvFactor"
+               OpMemberName %type_TranslucentBasePass 226 "TranslucentBasePass_PrevScreenPositionScaleBias"
+               OpMemberName %type_TranslucentBasePass 227 "TranslucentBasePass_PrevSceneColorPreExposureInv"
+               OpName %TranslucentBasePass "TranslucentBasePass"
+               OpName %TranslucentBasePass_Shared_Fog_IntegratedLightScattering "TranslucentBasePass_Shared_Fog_IntegratedLightScattering"
+               OpName %type_Material "type.Material"
+               OpMemberName %type_Material 0 "Material_VectorExpressions"
+               OpMemberName %type_Material 1 "Material_ScalarExpressions"
+               OpName %Material "Material"
+               OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid"
+               OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid"
+               OpName %in_var_PRIMITIVE_ID "in.var.PRIMITIVE_ID"
+               OpName %in_var_TEXCOORD7 "in.var.TEXCOORD7"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %MainPS "MainPS"
+               OpName %type_sampled_image "type.sampled.image"
+               OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %in_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID"
+               OpDecorate %in_var_PRIMITIVE_ID Flat
+               OpDecorateString %in_var_TEXCOORD7 UserSemantic "TEXCOORD7"
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_Position"
+               OpDecorate %gl_FrontFacing BuiltIn FrontFacing
+               OpDecorateString %gl_FrontFacing UserSemantic "SV_IsFrontFace"
+               OpDecorate %gl_FrontFacing Flat
+               OpDecorate %gl_SampleMask BuiltIn SampleMask
+               OpDecorateString %gl_SampleMask UserSemantic "SV_Coverage"
+               OpDecorate %gl_SampleMask Flat
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %gl_SampleMask_0 BuiltIn SampleMask
+               OpDecorateString %gl_SampleMask_0 UserSemantic "SV_Coverage"
+               OpDecorate %in_var_TEXCOORD10_centroid Location 0
+               OpDecorate %in_var_TEXCOORD11_centroid Location 1
+               OpDecorate %in_var_PRIMITIVE_ID Location 2
+               OpDecorate %in_var_TEXCOORD7 Location 3
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 1
+               OpDecorate %View_SharedBilinearClampedSampler DescriptorSet 0
+               OpDecorate %View_SharedBilinearClampedSampler Binding 0
+               OpDecorate %View_PrimitiveSceneData DescriptorSet 0
+               OpDecorate %View_PrimitiveSceneData Binding 0
+               OpDecorate %TranslucentBasePass DescriptorSet 0
+               OpDecorate %TranslucentBasePass Binding 2
+               OpDecorate %TranslucentBasePass_Shared_Fog_IntegratedLightScattering DescriptorSet 0
+               OpDecorate %TranslucentBasePass_Shared_Fog_IntegratedLightScattering Binding 0
+               OpDecorate %Material DescriptorSet 0
+               OpDecorate %Material Binding 3
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 14 Offset 844
+               OpMemberDecorate %type_View 15 Offset 848
+               OpMemberDecorate %type_View 16 Offset 860
+               OpMemberDecorate %type_View 17 Offset 864
+               OpMemberDecorate %type_View 18 Offset 876
+               OpMemberDecorate %type_View 19 Offset 880
+               OpMemberDecorate %type_View 20 Offset 892
+               OpMemberDecorate %type_View 21 Offset 896
+               OpMemberDecorate %type_View 22 Offset 908
+               OpMemberDecorate %type_View 23 Offset 912
+               OpMemberDecorate %type_View 24 Offset 928
+               OpMemberDecorate %type_View 25 Offset 944
+               OpMemberDecorate %type_View 26 Offset 956
+               OpMemberDecorate %type_View 27 Offset 960
+               OpMemberDecorate %type_View 28 Offset 972
+               OpMemberDecorate %type_View 29 Offset 976
+               OpMemberDecorate %type_View 30 Offset 988
+               OpMemberDecorate %type_View 31 Offset 992
+               OpMemberDecorate %type_View 32 Offset 1004
+               OpMemberDecorate %type_View 33 Offset 1008
+               OpMemberDecorate %type_View 33 MatrixStride 16
+               OpMemberDecorate %type_View 33 ColMajor
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 44 Offset 1660
+               OpMemberDecorate %type_View 45 Offset 1664
+               OpMemberDecorate %type_View 46 Offset 1676
+               OpMemberDecorate %type_View 47 Offset 1680
+               OpMemberDecorate %type_View 48 Offset 1692
+               OpMemberDecorate %type_View 49 Offset 1696
+               OpMemberDecorate %type_View 49 MatrixStride 16
+               OpMemberDecorate %type_View 49 ColMajor
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 53 Offset 1904
+               OpMemberDecorate %type_View 54 Offset 1920
+               OpMemberDecorate %type_View 55 Offset 1928
+               OpMemberDecorate %type_View 56 Offset 1936
+               OpMemberDecorate %type_View 57 Offset 1952
+               OpMemberDecorate %type_View 58 Offset 1968
+               OpMemberDecorate %type_View 59 Offset 1984
+               OpMemberDecorate %type_View 60 Offset 2000
+               OpMemberDecorate %type_View 61 Offset 2004
+               OpMemberDecorate %type_View 62 Offset 2008
+               OpMemberDecorate %type_View 63 Offset 2012
+               OpMemberDecorate %type_View 64 Offset 2016
+               OpMemberDecorate %type_View 65 Offset 2032
+               OpMemberDecorate %type_View 66 Offset 2048
+               OpMemberDecorate %type_View 67 Offset 2064
+               OpMemberDecorate %type_View 68 Offset 2072
+               OpMemberDecorate %type_View 69 Offset 2076
+               OpMemberDecorate %type_View 70 Offset 2080
+               OpMemberDecorate %type_View 71 Offset 2084
+               OpMemberDecorate %type_View 72 Offset 2088
+               OpMemberDecorate %type_View 73 Offset 2092
+               OpMemberDecorate %type_View 74 Offset 2096
+               OpMemberDecorate %type_View 75 Offset 2108
+               OpMemberDecorate %type_View 76 Offset 2112
+               OpMemberDecorate %type_View 77 Offset 2116
+               OpMemberDecorate %type_View 78 Offset 2120
+               OpMemberDecorate %type_View 79 Offset 2124
+               OpMemberDecorate %type_View 80 Offset 2128
+               OpMemberDecorate %type_View 81 Offset 2132
+               OpMemberDecorate %type_View 82 Offset 2136
+               OpMemberDecorate %type_View 83 Offset 2140
+               OpMemberDecorate %type_View 84 Offset 2144
+               OpMemberDecorate %type_View 85 Offset 2148
+               OpMemberDecorate %type_View 86 Offset 2152
+               OpMemberDecorate %type_View 87 Offset 2156
+               OpMemberDecorate %type_View 88 Offset 2160
+               OpMemberDecorate %type_View 89 Offset 2164
+               OpMemberDecorate %type_View 90 Offset 2168
+               OpMemberDecorate %type_View 91 Offset 2172
+               OpMemberDecorate %type_View 92 Offset 2176
+               OpMemberDecorate %type_View 93 Offset 2192
+               OpMemberDecorate %type_View 94 Offset 2204
+               OpMemberDecorate %type_View 95 Offset 2208
+               OpMemberDecorate %type_View 96 Offset 2240
+               OpMemberDecorate %type_View 97 Offset 2272
+               OpMemberDecorate %type_View 98 Offset 2288
+               OpMemberDecorate %type_View 99 Offset 2304
+               OpMemberDecorate %type_View 100 Offset 2308
+               OpMemberDecorate %type_View 101 Offset 2312
+               OpMemberDecorate %type_View 102 Offset 2316
+               OpMemberDecorate %type_View 103 Offset 2320
+               OpMemberDecorate %type_View 104 Offset 2324
+               OpMemberDecorate %type_View 105 Offset 2328
+               OpMemberDecorate %type_View 106 Offset 2332
+               OpMemberDecorate %type_View 107 Offset 2336
+               OpMemberDecorate %type_View 108 Offset 2340
+               OpMemberDecorate %type_View 109 Offset 2344
+               OpMemberDecorate %type_View 110 Offset 2348
+               OpMemberDecorate %type_View 111 Offset 2352
+               OpMemberDecorate %type_View 112 Offset 2364
+               OpMemberDecorate %type_View 113 Offset 2368
+               OpMemberDecorate %type_View 114 Offset 2380
+               OpMemberDecorate %type_View 115 Offset 2384
+               OpMemberDecorate %type_View 116 Offset 2388
+               OpMemberDecorate %type_View 117 Offset 2392
+               OpMemberDecorate %type_View 118 Offset 2396
+               OpMemberDecorate %type_View 119 Offset 2400
+               OpMemberDecorate %type_View 120 Offset 2404
+               OpMemberDecorate %type_View 121 Offset 2408
+               OpMemberDecorate %type_View 122 Offset 2412
+               OpMemberDecorate %type_View 123 Offset 2416
+               OpMemberDecorate %type_View 124 Offset 2420
+               OpMemberDecorate %type_View 125 Offset 2424
+               OpMemberDecorate %type_View 126 Offset 2428
+               OpMemberDecorate %type_View 127 Offset 2432
+               OpMemberDecorate %type_View 128 Offset 2448
+               OpMemberDecorate %type_View 129 Offset 2460
+               OpMemberDecorate %type_View 130 Offset 2464
+               OpMemberDecorate %type_View 131 Offset 2480
+               OpMemberDecorate %type_View 132 Offset 2484
+               OpMemberDecorate %type_View 133 Offset 2488
+               OpMemberDecorate %type_View 134 Offset 2492
+               OpMemberDecorate %type_View 135 Offset 2496
+               OpMemberDecorate %type_View 136 Offset 2512
+               OpMemberDecorate %type_View 137 Offset 2624
+               OpMemberDecorate %type_View 138 Offset 2628
+               OpMemberDecorate %type_View 139 Offset 2632
+               OpMemberDecorate %type_View 140 Offset 2636
+               OpMemberDecorate %type_View 141 Offset 2640
+               OpMemberDecorate %type_View 142 Offset 2644
+               OpMemberDecorate %type_View 143 Offset 2648
+               OpMemberDecorate %type_View 144 Offset 2652
+               OpMemberDecorate %type_View 145 Offset 2656
+               OpMemberDecorate %type_View 146 Offset 2668
+               OpMemberDecorate %type_View 147 Offset 2672
+               OpMemberDecorate %type_View 148 Offset 2736
+               OpMemberDecorate %type_View 149 Offset 2800
+               OpMemberDecorate %type_View 150 Offset 2804
+               OpMemberDecorate %type_View 151 Offset 2808
+               OpMemberDecorate %type_View 152 Offset 2812
+               OpMemberDecorate %type_View 153 Offset 2816
+               OpMemberDecorate %type_View 154 Offset 2828
+               OpMemberDecorate %type_View 155 Offset 2832
+               OpMemberDecorate %type_View 156 Offset 2844
+               OpMemberDecorate %type_View 157 Offset 2848
+               OpMemberDecorate %type_View 158 Offset 2856
+               OpMemberDecorate %type_View 159 Offset 2860
+               OpMemberDecorate %type_View 160 Offset 2864
+               OpMemberDecorate %type_View 161 Offset 2876
+               OpMemberDecorate %type_View 162 Offset 2880
+               OpMemberDecorate %type_View 163 Offset 2892
+               OpMemberDecorate %type_View 164 Offset 2896
+               OpMemberDecorate %type_View 165 Offset 2908
+               OpMemberDecorate %type_View 166 Offset 2912
+               OpMemberDecorate %type_View 167 Offset 2924
+               OpMemberDecorate %type_View 168 Offset 2928
+               OpMemberDecorate %type_View 169 Offset 2932
+               OpDecorate %type_View Block
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0
+               OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable
+               OpDecorate %type_StructuredBuffer_v4float BufferBlock
+               OpDecorate %_arr_mat4v4float_uint_4 ArrayStride 64
+               OpDecorate %_arr_mat4v4float_uint_2 ArrayStride 64
+               OpMemberDecorate %type_TranslucentBasePass 0 Offset 0
+               OpMemberDecorate %type_TranslucentBasePass 1 Offset 4
+               OpMemberDecorate %type_TranslucentBasePass 2 Offset 8
+               OpMemberDecorate %type_TranslucentBasePass 3 Offset 12
+               OpMemberDecorate %type_TranslucentBasePass 4 Offset 16
+               OpMemberDecorate %type_TranslucentBasePass 5 Offset 28
+               OpMemberDecorate %type_TranslucentBasePass 6 Offset 32
+               OpMemberDecorate %type_TranslucentBasePass 7 Offset 36
+               OpMemberDecorate %type_TranslucentBasePass 8 Offset 40
+               OpMemberDecorate %type_TranslucentBasePass 9 Offset 44
+               OpMemberDecorate %type_TranslucentBasePass 10 Offset 48
+               OpMemberDecorate %type_TranslucentBasePass 11 Offset 60
+               OpMemberDecorate %type_TranslucentBasePass 12 Offset 64
+               OpMemberDecorate %type_TranslucentBasePass 13 Offset 76
+               OpMemberDecorate %type_TranslucentBasePass 14 Offset 80
+               OpMemberDecorate %type_TranslucentBasePass 15 Offset 92
+               OpMemberDecorate %type_TranslucentBasePass 16 Offset 96
+               OpMemberDecorate %type_TranslucentBasePass 17 Offset 100
+               OpMemberDecorate %type_TranslucentBasePass 18 Offset 104
+               OpMemberDecorate %type_TranslucentBasePass 19 Offset 112
+               OpMemberDecorate %type_TranslucentBasePass 20 Offset 116
+               OpMemberDecorate %type_TranslucentBasePass 21 Offset 120
+               OpMemberDecorate %type_TranslucentBasePass 22 Offset 124
+               OpMemberDecorate %type_TranslucentBasePass 23 Offset 128
+               OpMemberDecorate %type_TranslucentBasePass 24 Offset 144
+               OpMemberDecorate %type_TranslucentBasePass 24 MatrixStride 16
+               OpMemberDecorate %type_TranslucentBasePass 24 ColMajor
+               OpMemberDecorate %type_TranslucentBasePass 25 Offset 400
+               OpMemberDecorate %type_TranslucentBasePass 26 Offset 464
+               OpMemberDecorate %type_TranslucentBasePass 27 Offset 480
+               OpMemberDecorate %type_TranslucentBasePass 28 Offset 484
+               OpMemberDecorate %type_TranslucentBasePass 29 Offset 488
+               OpMemberDecorate %type_TranslucentBasePass 30 Offset 492
+               OpMemberDecorate %type_TranslucentBasePass 31 Offset 496
+               OpMemberDecorate %type_TranslucentBasePass 32 Offset 512
+               OpMemberDecorate %type_TranslucentBasePass 32 MatrixStride 16
+               OpMemberDecorate %type_TranslucentBasePass 32 ColMajor
+               OpMemberDecorate %type_TranslucentBasePass 33 Offset 576
+               OpMemberDecorate %type_TranslucentBasePass 34 Offset 580
+               OpMemberDecorate %type_TranslucentBasePass 35 Offset 584
+               OpMemberDecorate %type_TranslucentBasePass 36 Offset 588
+               OpMemberDecorate %type_TranslucentBasePass 37 Offset 592
+               OpMemberDecorate %type_TranslucentBasePass 38 Offset 596
+               OpMemberDecorate %type_TranslucentBasePass 39 Offset 600
+               OpMemberDecorate %type_TranslucentBasePass 40 Offset 604
+               OpMemberDecorate %type_TranslucentBasePass 41 Offset 608
+               OpMemberDecorate %type_TranslucentBasePass 42 Offset 612
+               OpMemberDecorate %type_TranslucentBasePass 43 Offset 616
+               OpMemberDecorate %type_TranslucentBasePass 44 Offset 620
+               OpMemberDecorate %type_TranslucentBasePass 45 Offset 624
+               OpMemberDecorate %type_TranslucentBasePass 46 Offset 628
+               OpMemberDecorate %type_TranslucentBasePass 47 Offset 632
+               OpMemberDecorate %type_TranslucentBasePass 48 Offset 636
+               OpMemberDecorate %type_TranslucentBasePass 49 Offset 640
+               OpMemberDecorate %type_TranslucentBasePass 50 Offset 644
+               OpMemberDecorate %type_TranslucentBasePass 51 Offset 648
+               OpMemberDecorate %type_TranslucentBasePass 52 Offset 652
+               OpMemberDecorate %type_TranslucentBasePass 53 Offset 656
+               OpMemberDecorate %type_TranslucentBasePass 54 Offset 668
+               OpMemberDecorate %type_TranslucentBasePass 55 Offset 672
+               OpMemberDecorate %type_TranslucentBasePass 56 Offset 676
+               OpMemberDecorate %type_TranslucentBasePass 57 Offset 680
+               OpMemberDecorate %type_TranslucentBasePass 58 Offset 684
+               OpMemberDecorate %type_TranslucentBasePass 59 Offset 688
+               OpMemberDecorate %type_TranslucentBasePass 60 Offset 700
+               OpMemberDecorate %type_TranslucentBasePass 61 Offset 704
+               OpMemberDecorate %type_TranslucentBasePass 62 Offset 716
+               OpMemberDecorate %type_TranslucentBasePass 63 Offset 720
+               OpMemberDecorate %type_TranslucentBasePass 64 Offset 732
+               OpMemberDecorate %type_TranslucentBasePass 65 Offset 736
+               OpMemberDecorate %type_TranslucentBasePass 66 Offset 740
+               OpMemberDecorate %type_TranslucentBasePass 67 Offset 744
+               OpMemberDecorate %type_TranslucentBasePass 68 Offset 752
+               OpMemberDecorate %type_TranslucentBasePass 69 Offset 756
+               OpMemberDecorate %type_TranslucentBasePass 70 Offset 760
+               OpMemberDecorate %type_TranslucentBasePass 71 Offset 764
+               OpMemberDecorate %type_TranslucentBasePass 72 Offset 768
+               OpMemberDecorate %type_TranslucentBasePass 73 Offset 784
+               OpMemberDecorate %type_TranslucentBasePass 73 MatrixStride 16
+               OpMemberDecorate %type_TranslucentBasePass 73 ColMajor
+               OpMemberDecorate %type_TranslucentBasePass 74 Offset 1040
+               OpMemberDecorate %type_TranslucentBasePass 75 Offset 1104
+               OpMemberDecorate %type_TranslucentBasePass 76 Offset 1120
+               OpMemberDecorate %type_TranslucentBasePass 77 Offset 1124
+               OpMemberDecorate %type_TranslucentBasePass 78 Offset 1128
+               OpMemberDecorate %type_TranslucentBasePass 79 Offset 1132
+               OpMemberDecorate %type_TranslucentBasePass 80 Offset 1136
+               OpMemberDecorate %type_TranslucentBasePass 81 Offset 1152
+               OpMemberDecorate %type_TranslucentBasePass 81 MatrixStride 16
+               OpMemberDecorate %type_TranslucentBasePass 81 ColMajor
+               OpMemberDecorate %type_TranslucentBasePass 82 Offset 1216
+               OpMemberDecorate %type_TranslucentBasePass 83 Offset 1220
+               OpMemberDecorate %type_TranslucentBasePass 84 Offset 1224
+               OpMemberDecorate %type_TranslucentBasePass 85 Offset 1228
+               OpMemberDecorate %type_TranslucentBasePass 86 Offset 1232
+               OpMemberDecorate %type_TranslucentBasePass 87 Offset 1236
+               OpMemberDecorate %type_TranslucentBasePass 88 Offset 1240
+               OpMemberDecorate %type_TranslucentBasePass 89 Offset 1244
+               OpMemberDecorate %type_TranslucentBasePass 90 Offset 1248
+               OpMemberDecorate %type_TranslucentBasePass 91 Offset 1252
+               OpMemberDecorate %type_TranslucentBasePass 92 Offset 1256
+               OpMemberDecorate %type_TranslucentBasePass 93 Offset 1260
+               OpMemberDecorate %type_TranslucentBasePass 94 Offset 1264
+               OpMemberDecorate %type_TranslucentBasePass 95 Offset 1268
+               OpMemberDecorate %type_TranslucentBasePass 96 Offset 1272
+               OpMemberDecorate %type_TranslucentBasePass 97 Offset 1276
+               OpMemberDecorate %type_TranslucentBasePass 98 Offset 1280
+               OpMemberDecorate %type_TranslucentBasePass 99 Offset 1296
+               OpMemberDecorate %type_TranslucentBasePass 100 Offset 1300
+               OpMemberDecorate %type_TranslucentBasePass 101 Offset 1304
+               OpMemberDecorate %type_TranslucentBasePass 102 Offset 1308
+               OpMemberDecorate %type_TranslucentBasePass 103 Offset 1312
+               OpMemberDecorate %type_TranslucentBasePass 104 Offset 1316
+               OpMemberDecorate %type_TranslucentBasePass 105 Offset 1320
+               OpMemberDecorate %type_TranslucentBasePass 106 Offset 1324
+               OpMemberDecorate %type_TranslucentBasePass 107 Offset 1328
+               OpMemberDecorate %type_TranslucentBasePass 108 Offset 1332
+               OpMemberDecorate %type_TranslucentBasePass 109 Offset 1336
+               OpMemberDecorate %type_TranslucentBasePass 110 Offset 1340
+               OpMemberDecorate %type_TranslucentBasePass 111 Offset 1344
+               OpMemberDecorate %type_TranslucentBasePass 112 Offset 1348
+               OpMemberDecorate %type_TranslucentBasePass 113 Offset 1352
+               OpMemberDecorate %type_TranslucentBasePass 114 Offset 1356
+               OpMemberDecorate %type_TranslucentBasePass 115 Offset 1360
+               OpMemberDecorate %type_TranslucentBasePass 116 Offset 1376
+               OpMemberDecorate %type_TranslucentBasePass 117 Offset 1392
+               OpMemberDecorate %type_TranslucentBasePass 118 Offset 1408
+               OpMemberDecorate %type_TranslucentBasePass 119 Offset 1424
+               OpMemberDecorate %type_TranslucentBasePass 119 MatrixStride 16
+               OpMemberDecorate %type_TranslucentBasePass 119 ColMajor
+               OpMemberDecorate %type_TranslucentBasePass 120 Offset 1472
+               OpMemberDecorate %type_TranslucentBasePass 121 Offset 1484
+               OpMemberDecorate %type_TranslucentBasePass 122 Offset 1488
+               OpMemberDecorate %type_TranslucentBasePass 123 Offset 1496
+               OpMemberDecorate %type_TranslucentBasePass 124 Offset 1500
+               OpMemberDecorate %type_TranslucentBasePass 125 Offset 1504
+               OpMemberDecorate %type_TranslucentBasePass 125 MatrixStride 16
+               OpMemberDecorate %type_TranslucentBasePass 125 ColMajor
+               OpMemberDecorate %type_TranslucentBasePass 126 Offset 1632
+               OpMemberDecorate %type_TranslucentBasePass 127 Offset 1664
+               OpMemberDecorate %type_TranslucentBasePass 128 Offset 1672
+               OpMemberDecorate %type_TranslucentBasePass 129 Offset 1676
+               OpMemberDecorate %type_TranslucentBasePass 130 Offset 1680
+               OpMemberDecorate %type_TranslucentBasePass 131 Offset 1684
+               OpMemberDecorate %type_TranslucentBasePass 132 Offset 1688
+               OpMemberDecorate %type_TranslucentBasePass 133 Offset 1692
+               OpMemberDecorate %type_TranslucentBasePass 134 Offset 1696
+               OpMemberDecorate %type_TranslucentBasePass 135 Offset 1712
+               OpMemberDecorate %type_TranslucentBasePass 136 Offset 1728
+               OpMemberDecorate %type_TranslucentBasePass 137 Offset 1744
+               OpMemberDecorate %type_TranslucentBasePass 138 Offset 1760
+               OpMemberDecorate %type_TranslucentBasePass 139 Offset 1776
+               OpMemberDecorate %type_TranslucentBasePass 140 Offset 1792
+               OpMemberDecorate %type_TranslucentBasePass 141 Offset 1800
+               OpMemberDecorate %type_TranslucentBasePass 142 Offset 1804
+               OpMemberDecorate %type_TranslucentBasePass 143 Offset 1808
+               OpMemberDecorate %type_TranslucentBasePass 144 Offset 1820
+               OpMemberDecorate %type_TranslucentBasePass 145 Offset 1824
+               OpMemberDecorate %type_TranslucentBasePass 146 Offset 1828
+               OpMemberDecorate %type_TranslucentBasePass 147 Offset 1832
+               OpMemberDecorate %type_TranslucentBasePass 148 Offset 1836
+               OpMemberDecorate %type_TranslucentBasePass 149 Offset 1840
+               OpMemberDecorate %type_TranslucentBasePass 150 Offset 1844
+               OpMemberDecorate %type_TranslucentBasePass 151 Offset 1848
+               OpMemberDecorate %type_TranslucentBasePass 152 Offset 1852
+               OpMemberDecorate %type_TranslucentBasePass 153 Offset 1856
+               OpMemberDecorate %type_TranslucentBasePass 154 Offset 1860
+               OpMemberDecorate %type_TranslucentBasePass 155 Offset 1864
+               OpMemberDecorate %type_TranslucentBasePass 156 Offset 1868
+               OpMemberDecorate %type_TranslucentBasePass 157 Offset 1872
+               OpMemberDecorate %type_TranslucentBasePass 158 Offset 1876
+               OpMemberDecorate %type_TranslucentBasePass 159 Offset 1880
+               OpMemberDecorate %type_TranslucentBasePass 160 Offset 1884
+               OpMemberDecorate %type_TranslucentBasePass 161 Offset 1888
+               OpMemberDecorate %type_TranslucentBasePass 162 Offset 1892
+               OpMemberDecorate %type_TranslucentBasePass 163 Offset 1896
+               OpMemberDecorate %type_TranslucentBasePass 164 Offset 1900
+               OpMemberDecorate %type_TranslucentBasePass 165 Offset 1904
+               OpMemberDecorate %type_TranslucentBasePass 166 Offset 1908
+               OpMemberDecorate %type_TranslucentBasePass 167 Offset 1912
+               OpMemberDecorate %type_TranslucentBasePass 168 Offset 1916
+               OpMemberDecorate %type_TranslucentBasePass 169 Offset 1920
+               OpMemberDecorate %type_TranslucentBasePass 170 Offset 1924
+               OpMemberDecorate %type_TranslucentBasePass 171 Offset 1928
+               OpMemberDecorate %type_TranslucentBasePass 172 Offset 1932
+               OpMemberDecorate %type_TranslucentBasePass 173 Offset 1936
+               OpMemberDecorate %type_TranslucentBasePass 174 Offset 1940
+               OpMemberDecorate %type_TranslucentBasePass 175 Offset 1944
+               OpMemberDecorate %type_TranslucentBasePass 176 Offset 1948
+               OpMemberDecorate %type_TranslucentBasePass 177 Offset 1952
+               OpMemberDecorate %type_TranslucentBasePass 178 Offset 1956
+               OpMemberDecorate %type_TranslucentBasePass 179 Offset 1960
+               OpMemberDecorate %type_TranslucentBasePass 180 Offset 1964
+               OpMemberDecorate %type_TranslucentBasePass 181 Offset 1968
+               OpMemberDecorate %type_TranslucentBasePass 182 Offset 1972
+               OpMemberDecorate %type_TranslucentBasePass 183 Offset 1976
+               OpMemberDecorate %type_TranslucentBasePass 184 Offset 1980
+               OpMemberDecorate %type_TranslucentBasePass 185 Offset 1984
+               OpMemberDecorate %type_TranslucentBasePass 186 Offset 1988
+               OpMemberDecorate %type_TranslucentBasePass 187 Offset 1992
+               OpMemberDecorate %type_TranslucentBasePass 188 Offset 1996
+               OpMemberDecorate %type_TranslucentBasePass 189 Offset 2000
+               OpMemberDecorate %type_TranslucentBasePass 190 Offset 2004
+               OpMemberDecorate %type_TranslucentBasePass 191 Offset 2008
+               OpMemberDecorate %type_TranslucentBasePass 192 Offset 2012
+               OpMemberDecorate %type_TranslucentBasePass 193 Offset 2016
+               OpMemberDecorate %type_TranslucentBasePass 194 Offset 2020
+               OpMemberDecorate %type_TranslucentBasePass 195 Offset 2024
+               OpMemberDecorate %type_TranslucentBasePass 196 Offset 2028
+               OpMemberDecorate %type_TranslucentBasePass 197 Offset 2032
+               OpMemberDecorate %type_TranslucentBasePass 198 Offset 2036
+               OpMemberDecorate %type_TranslucentBasePass 199 Offset 2040
+               OpMemberDecorate %type_TranslucentBasePass 200 Offset 2044
+               OpMemberDecorate %type_TranslucentBasePass 201 Offset 2048
+               OpMemberDecorate %type_TranslucentBasePass 202 Offset 2052
+               OpMemberDecorate %type_TranslucentBasePass 203 Offset 2056
+               OpMemberDecorate %type_TranslucentBasePass 204 Offset 2060
+               OpMemberDecorate %type_TranslucentBasePass 205 Offset 2064
+               OpMemberDecorate %type_TranslucentBasePass 206 Offset 2068
+               OpMemberDecorate %type_TranslucentBasePass 207 Offset 2072
+               OpMemberDecorate %type_TranslucentBasePass 208 Offset 2076
+               OpMemberDecorate %type_TranslucentBasePass 209 Offset 2080
+               OpMemberDecorate %type_TranslucentBasePass 210 Offset 2084
+               OpMemberDecorate %type_TranslucentBasePass 211 Offset 2088
+               OpMemberDecorate %type_TranslucentBasePass 212 Offset 2092
+               OpMemberDecorate %type_TranslucentBasePass 213 Offset 2096
+               OpMemberDecorate %type_TranslucentBasePass 214 Offset 2100
+               OpMemberDecorate %type_TranslucentBasePass 215 Offset 2104
+               OpMemberDecorate %type_TranslucentBasePass 216 Offset 2108
+               OpMemberDecorate %type_TranslucentBasePass 217 Offset 2112
+               OpMemberDecorate %type_TranslucentBasePass 218 Offset 2116
+               OpMemberDecorate %type_TranslucentBasePass 219 Offset 2120
+               OpMemberDecorate %type_TranslucentBasePass 220 Offset 2124
+               OpMemberDecorate %type_TranslucentBasePass 221 Offset 2128
+               OpMemberDecorate %type_TranslucentBasePass 222 Offset 2132
+               OpMemberDecorate %type_TranslucentBasePass 223 Offset 2136
+               OpMemberDecorate %type_TranslucentBasePass 224 Offset 2140
+               OpMemberDecorate %type_TranslucentBasePass 225 Offset 2144
+               OpMemberDecorate %type_TranslucentBasePass 226 Offset 2160
+               OpMemberDecorate %type_TranslucentBasePass 227 Offset 2176
+               OpDecorate %type_TranslucentBasePass Block
+               OpDecorate %_arr_v4float_uint_1 ArrayStride 16
+               OpMemberDecorate %type_Material 0 Offset 0
+               OpMemberDecorate %type_Material 1 Offset 32
+               OpDecorate %type_Material Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+     %uint_0 = OpConstant %uint 0
+       %bool = OpTypeBool
+      %int_1 = OpConstant %int 1
+      %int_0 = OpConstant %int 0
+      %int_2 = OpConstant %int 2
+    %float_0 = OpConstant %float 0
+         %48 = OpConstantComposite %v3float %float_0 %float_0 %float_0
+     %int_10 = OpConstant %int 10
+    %int_144 = OpConstant %int 144
+     %int_70 = OpConstant %int 70
+    %float_1 = OpConstant %float 1
+         %53 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+%float_0_577000022 = OpConstant %float 0.577000022
+         %55 = OpConstantComposite %v3float %float_0_577000022 %float_0_577000022 %float_0_577000022
+         %56 = OpConstantComposite %v3float %float_1 %float_1 %float_0
+         %57 = OpConstantComposite %v3float %float_0 %float_1 %float_1
+  %float_0_5 = OpConstant %float 0.5
+         %59 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5
+     %int_60 = OpConstant %int 60
+         %61 = OpConstantComposite %v2float %float_0_5 %float_0_5
+    %uint_26 = OpConstant %uint 26
+     %uint_1 = OpConstant %uint 1
+     %uint_5 = OpConstant %uint 5
+    %uint_19 = OpConstant %uint 19
+ %float_n0_5 = OpConstant %float -0.5
+         %67 = OpConstantComposite %v2float %float_0_5 %float_n0_5
+         %68 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
+ %float_0_25 = OpConstant %float 0.25
+     %int_31 = OpConstant %int 31
+     %int_66 = OpConstant %int 66
+    %int_153 = OpConstant %int 153
+    %int_155 = OpConstant %int 155
+%mat3v3float = OpTypeMatrix %v3float 3
+         %75 = OpConstantComposite %v3float %float_0 %float_0 %float_1
+   %float_n1 = OpConstant %float -1
+%float_0_200000003 = OpConstant %float 0.200000003
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%type_3d_image = OpTypeImage %float 3D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float
+      %v3int = OpTypeVector %int 3
+%_arr_mat4v4float_uint_4 = OpTypeArray %mat4v4float %uint_4
+%mat3v4float = OpTypeMatrix %v4float 3
+%_arr_mat4v4float_uint_2 = OpTypeArray %mat4v4float %uint_2
+%type_TranslucentBasePass = OpTypeStruct %uint %uint %uint %uint %v3int %uint %uint %uint %uint %uint %v3float %float %v3float %float %v3float %float %uint %uint %v2float %uint %uint %uint %uint %v4float %_arr_mat4v4float_uint_4 %_arr_v4float_uint_4 %v4float %float %uint %uint %uint %v4float %mat4v4float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %v3int %uint %uint %uint %uint %uint %v3float %float %v3float %float %v3float %float %uint %uint %v2float %uint %uint %uint %uint %v4float %_arr_mat4v4float_uint_4 %_arr_v4float_uint_4 %v4float %float %uint %uint %uint %v4float %mat4v4float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %v4float %v4float %v4float %mat3v4float %v3float %float %v2float %float %float %_arr_mat4v4float_uint_2 %_arr_v4float_uint_2 %v2float %uint %float %float %float %float %float %v4float %v4float %v4float %v4float %v4float %v4float %v2float %float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %v4float %float
+%_ptr_Uniform_type_TranslucentBasePass = OpTypePointer Uniform %type_TranslucentBasePass
+%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1
+%type_Material = OpTypeStruct %_arr_v4float_uint_2 %_arr_v4float_uint_1
+%_ptr_Uniform_type_Material = OpTypePointer Uniform %type_Material
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Input_bool = OpTypePointer Input %bool
+%_arr_uint_uint_1 = OpTypeArray %uint %uint_1
+%_ptr_Input__arr_uint_uint_1 = OpTypePointer Input %_arr_uint_uint_1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_ptr_Output__arr_uint_uint_1 = OpTypePointer Output %_arr_uint_uint_1
+       %void = OpTypeVoid
+         %94 = OpTypeFunction %void
+%_ptr_Output_uint = OpTypePointer Output %uint
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+     %v3bool = OpTypeVector %bool 3
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%type_sampled_image = OpTypeSampledImage %type_3d_image
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+%View_SharedBilinearClampedSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%View_PrimitiveSceneData = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform
+%TranslucentBasePass = OpVariable %_ptr_Uniform_type_TranslucentBasePass Uniform
+%TranslucentBasePass_Shared_Fog_IntegratedLightScattering = OpVariable %_ptr_UniformConstant_type_3d_image UniformConstant
+   %Material = OpVariable %_ptr_Uniform_type_Material Uniform
+%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input_v4float Input
+%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input_v4float Input
+%in_var_PRIMITIVE_ID = OpVariable %_ptr_Input_uint Input
+%in_var_TEXCOORD7 = OpVariable %_ptr_Input_v4float Input
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%gl_FrontFacing = OpVariable %_ptr_Input_bool Input
+%gl_SampleMask = OpVariable %_ptr_Input__arr_uint_uint_1 Input
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+%gl_SampleMask_0 = OpVariable %_ptr_Output__arr_uint_uint_1 Output
+        %102 = OpConstantNull %v4float
+ %float_n1_5 = OpConstant %float -1.5
+    %float_3 = OpConstant %float 3
+        %105 = OpConstantComposite %v3float %float_n1 %float_n1_5 %float_3
+%float_12_25 = OpConstant %float 12.25
+%float_0_00200000009 = OpConstant %float 0.00200000009
+        %108 = OpUndef %float
+    %uint_15 = OpConstant %uint 15
+     %MainPS = OpFunction %void None %94
+        %110 = OpLabel
+        %111 = OpLoad %v4float %in_var_TEXCOORD10_centroid
+        %112 = OpLoad %v4float %in_var_TEXCOORD11_centroid
+        %113 = OpLoad %uint %in_var_PRIMITIVE_ID
+        %114 = OpLoad %v4float %in_var_TEXCOORD7
+        %115 = OpLoad %v4float %gl_FragCoord
+        %116 = OpLoad %_arr_uint_uint_1 %gl_SampleMask
+        %117 = OpCompositeExtract %uint %116 0
+        %118 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_1
+        %119 = OpLoad %mat4v4float %118
+        %120 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_10
+        %121 = OpLoad %mat4v4float %120
+        %122 = OpAccessChain %_ptr_Uniform_v3float %View %int_31
+        %123 = OpLoad %v3float %122
+        %124 = OpAccessChain %_ptr_Uniform_v4float %View %int_66
+        %125 = OpLoad %v4float %124
+        %126 = OpVectorShuffle %v3float %111 %111 0 1 2
+        %127 = OpVectorShuffle %v3float %112 %112 0 1 2
+        %128 = OpExtInst %v3float %1 Cross %127 %126
+        %129 = OpCompositeExtract %float %112 3
+        %130 = OpCompositeConstruct %v3float %129 %129 %129
+        %131 = OpFMul %v3float %128 %130
+        %132 = OpCompositeConstruct %mat3v3float %126 %131 %127
+        %133 = OpCompositeExtract %float %115 0
+        %134 = OpCompositeExtract %float %115 1
+        %135 = OpCompositeExtract %float %115 2
+        %136 = OpCompositeConstruct %v4float %133 %134 %135 %float_1
+        %137 = OpMatrixTimesVector %v4float %121 %136
+        %138 = OpVectorShuffle %v3float %137 %137 0 1 2
+        %139 = OpCompositeExtract %float %137 3
+        %140 = OpCompositeConstruct %v3float %139 %139 %139
+        %141 = OpFDiv %v3float %138 %140
+        %142 = OpFSub %v3float %141 %123
+        %143 = OpVectorShuffle %v3float %125 %125 0 1 2
+        %144 = OpCompositeExtract %float %125 3
+        %145 = OpCompositeConstruct %v3float %144 %144 %144
+        %146 = OpFMul %v3float %75 %145
+        %147 = OpFAdd %v3float %146 %143
+        %148 = OpExtInst %v3float %1 Normalize %147
+        %149 = OpMatrixTimesVector %v3float %132 %148
+        %150 = OpExtInst %v3float %1 Normalize %149
+        %151 = OpExtInst %float %1 Sqrt %float_12_25
+        %152 = OpCompositeConstruct %v3float %151 %151 %151
+        %153 = OpFDiv %v3float %105 %152
+        %154 = OpDot %float %153 %150
+        %155 = OpFAdd %float %float_1 %154
+        %156 = OpFMul %float %155 %float_0_5
+        %157 = OpFAdd %float %156 %float_0_200000003
+        %158 = OpAccessChain %_ptr_Uniform_v4float %Material %int_0 %int_1
+        %159 = OpLoad %v4float %158
+        %160 = OpVectorShuffle %v3float %159 %159 0 1 2
+        %161 = OpCompositeConstruct %v3float %157 %157 %157
+        %162 = OpFMul %v3float %160 %161
+        %163 = OpAccessChain %_ptr_Uniform_float %TranslucentBasePass %int_144
+        %164 = OpLoad %float %163
+        %165 = OpFOrdGreaterThan %bool %164 %float_0
+               OpSelectionMerge %166 None
+               OpBranchConditional %165 %167 %166
+        %167 = OpLabel
+        %168 = OpCompositeExtract %float %142 0
+        %169 = OpCompositeExtract %float %142 1
+        %170 = OpCompositeExtract %float %142 2
+        %171 = OpCompositeConstruct %v4float %168 %169 %170 %float_1
+        %172 = OpMatrixTimesVector %v4float %119 %171
+        %173 = OpCompositeExtract %float %172 3
+        %174 = OpCompositeConstruct %v2float %173 %173
+        %175 = OpVectorShuffle %v2float %172 %172 0 1
+        %176 = OpFDiv %v2float %175 %174
+        %177 = OpVectorShuffle %v2float %176 %102 0 1
+        %178 = OpFMul %v2float %177 %67
+        %179 = OpFAdd %v2float %178 %61
+        %180 = OpCompositeExtract %float %179 0
+        %181 = OpCompositeExtract %float %179 1
+        %182 = OpAccessChain %_ptr_Uniform_float %View %int_155 %int_0
+        %183 = OpLoad %float %182
+        %184 = OpFMul %float %173 %183
+        %185 = OpAccessChain %_ptr_Uniform_float %View %int_155 %int_1
+        %186 = OpLoad %float %185
+        %187 = OpFAdd %float %184 %186
+        %188 = OpExtInst %float %1 Log2 %187
+        %189 = OpAccessChain %_ptr_Uniform_float %View %int_155 %int_2
+        %190 = OpLoad %float %189
+        %191 = OpFMul %float %188 %190
+        %192 = OpAccessChain %_ptr_Uniform_float %View %int_153 %int_2
+        %193 = OpLoad %float %192
+        %194 = OpFMul %float %191 %193
+        %195 = OpCompositeConstruct %v3float %180 %181 %194
+               OpSelectionMerge %196 None
+               OpBranchConditional %165 %197 %196
+        %197 = OpLabel
+        %198 = OpLoad %type_3d_image %TranslucentBasePass_Shared_Fog_IntegratedLightScattering
+        %199 = OpLoad %type_sampler %View_SharedBilinearClampedSampler
+        %200 = OpSampledImage %type_sampled_image %198 %199
+        %201 = OpImageSampleExplicitLod %v4float %200 %195 Lod %float_0
+               OpBranch %196
+        %196 = OpLabel
+        %202 = OpPhi %v4float %68 %167 %201 %197
+        %203 = OpVectorShuffle %v3float %202 %202 0 1 2
+        %204 = OpVectorShuffle %v3float %114 %114 0 1 2
+        %205 = OpCompositeExtract %float %202 3
+        %206 = OpCompositeConstruct %v3float %205 %205 %205
+        %207 = OpFMul %v3float %204 %206
+        %208 = OpFAdd %v3float %203 %207
+        %209 = OpCompositeExtract %float %208 0
+        %210 = OpCompositeExtract %float %208 1
+        %211 = OpCompositeExtract %float %208 2
+        %212 = OpCompositeExtract %float %114 3
+        %213 = OpFMul %float %205 %212
+        %214 = OpCompositeConstruct %v4float %209 %210 %211 %213
+               OpBranch %166
+        %166 = OpLabel
+        %215 = OpPhi %v4float %114 %110 %214 %196
+        %216 = OpExtInst %v3float %1 FMax %162 %48
+        %217 = OpAccessChain %_ptr_Uniform_float %View %int_70
+        %218 = OpLoad %float %217
+        %219 = OpFOrdGreaterThan %bool %218 %float_0
+               OpSelectionMerge %220 DontFlatten
+               OpBranchConditional %219 %221 %220
+        %221 = OpLabel
+        %222 = OpIMul %uint %113 %uint_26
+        %223 = OpIAdd %uint %222 %uint_5
+        %224 = OpAccessChain %_ptr_Uniform_v4float %View_PrimitiveSceneData %int_0 %223
+        %225 = OpLoad %v4float %224
+        %226 = OpVectorShuffle %v3float %225 %225 0 1 2
+        %227 = OpFSub %v3float %142 %226
+        %228 = OpExtInst %v3float %1 FAbs %227
+        %229 = OpIAdd %uint %222 %uint_19
+        %230 = OpAccessChain %_ptr_Uniform_v4float %View_PrimitiveSceneData %int_0 %229
+        %231 = OpLoad %v4float %230
+        %232 = OpVectorShuffle %v3float %231 %231 0 1 2
+        %233 = OpFAdd %v3float %232 %53
+        %234 = OpFOrdGreaterThan %v3bool %228 %233
+        %235 = OpAny %bool %234
+               OpSelectionMerge %236 None
+               OpBranchConditional %235 %237 %236
+        %237 = OpLabel
+        %238 = OpDot %float %142 %55
+        %239 = OpFMul %float %238 %float_0_00200000009
+        %240 = OpExtInst %float %1 Fract %239
+        %241 = OpCompositeConstruct %v3float %240 %240 %240
+        %242 = OpFOrdGreaterThan %v3bool %241 %59
+        %243 = OpSelect %v3float %242 %53 %48
+        %244 = OpExtInst %v3float %1 FMix %56 %57 %243
+               OpBranch %236
+        %236 = OpLabel
+        %245 = OpPhi %v3float %216 %221 %244 %237
+               OpBranch %220
+        %220 = OpLabel
+        %246 = OpPhi %v3float %216 %166 %245 %236
+        %247 = OpCompositeExtract %float %215 3
+        %248 = OpCompositeConstruct %v3float %247 %247 %247
+        %249 = OpFMul %v3float %246 %248
+        %250 = OpVectorShuffle %v3float %215 %215 0 1 2
+        %251 = OpFAdd %v3float %249 %250
+        %252 = OpCompositeExtract %float %251 0
+        %253 = OpCompositeExtract %float %251 1
+        %254 = OpCompositeExtract %float %251 2
+        %255 = OpCompositeConstruct %v4float %252 %253 %254 %108
+        %256 = OpCompositeInsert %v4float %float_1 %255 3
+        %257 = OpAccessChain %_ptr_Uniform_int %View %int_60
+        %258 = OpLoad %int %257
+        %259 = OpSGreaterThan %bool %258 %int_1
+               OpSelectionMerge %260 None
+               OpBranchConditional %259 %261 %262
+        %262 = OpLabel
+               OpBranch %260
+        %261 = OpLabel
+        %263 = OpConvertSToF %float %258
+        %264 = OpFMul %float %263 %float_0_25
+        %265 = OpCompositeConstruct %v4float %264 %264 %264 %264
+        %266 = OpFMul %v4float %256 %265
+        %267 = OpBitwiseAnd %uint %117 %uint_15
+               OpBranch %260
+        %260 = OpLabel
+        %268 = OpPhi %v4float %266 %261 %256 %262
+        %269 = OpPhi %uint %267 %261 %117 %262
+               OpStore %out_var_SV_Target0 %268
+        %270 = OpAccessChain %_ptr_Output_uint %gl_SampleMask_0 %uint_0
+               OpStore %270 %269
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag b/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag
new file mode 100644
index 00000000000..e6565873a01
--- /dev/null
+++ b/shaders-ue4/asm/frag/subpass-input.ios.framebuffer-fetch.asm.frag
@@ -0,0 +1,589 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 130
+; Schema: 0
+               OpCapability Shader
+               OpCapability InputAttachment
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %Main "main" %gl_FragCoord %out_var_SV_Target0
+               OpExecutionMode %Main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_subpass_image "type.subpass.image"
+               OpName %gl_LastFragData "gl_LastFragData"
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_TranslatedWorldToView"
+               OpMemberName %type_View 3 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 4 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 6 "View_ViewToClip"
+               OpMemberName %type_View 7 "View_ViewToClipNoAA"
+               OpMemberName %type_View 8 "View_ClipToView"
+               OpMemberName %type_View 9 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 11 "View_ScreenToWorld"
+               OpMemberName %type_View 12 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 13 "View_ViewForward"
+               OpMemberName %type_View 14 "PrePadding_View_844"
+               OpMemberName %type_View 15 "View_ViewUp"
+               OpMemberName %type_View 16 "PrePadding_View_860"
+               OpMemberName %type_View 17 "View_ViewRight"
+               OpMemberName %type_View 18 "PrePadding_View_876"
+               OpMemberName %type_View 19 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 20 "PrePadding_View_892"
+               OpMemberName %type_View 21 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 22 "PrePadding_View_908"
+               OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 24 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 25 "View_WorldCameraOrigin"
+               OpMemberName %type_View 26 "PrePadding_View_956"
+               OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 28 "PrePadding_View_972"
+               OpMemberName %type_View 29 "View_WorldViewOrigin"
+               OpMemberName %type_View 30 "PrePadding_View_988"
+               OpMemberName %type_View 31 "View_PreViewTranslation"
+               OpMemberName %type_View 32 "PrePadding_View_1004"
+               OpMemberName %type_View 33 "View_PrevProjection"
+               OpMemberName %type_View 34 "View_PrevViewProj"
+               OpMemberName %type_View 35 "View_PrevViewRotationProj"
+               OpMemberName %type_View 36 "View_PrevViewToClip"
+               OpMemberName %type_View 37 "View_PrevClipToView"
+               OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 43 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 44 "PrePadding_View_1660"
+               OpMemberName %type_View 45 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 46 "PrePadding_View_1676"
+               OpMemberName %type_View 47 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 48 "PrePadding_View_1692"
+               OpMemberName %type_View 49 "View_PrevInvViewProj"
+               OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 51 "View_ClipToPrevClip"
+               OpMemberName %type_View 52 "View_TemporalAAJitter"
+               OpMemberName %type_View 53 "View_GlobalClippingPlane"
+               OpMemberName %type_View 54 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_ViewRectMin"
+               OpMemberName %type_View 57 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 58 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 60 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 61 "View_PreExposure"
+               OpMemberName %type_View 62 "View_OneOverPreExposure"
+               OpMemberName %type_View 63 "PrePadding_View_2012"
+               OpMemberName %type_View 64 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 65 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 66 "View_NormalOverrideParameter"
+               OpMemberName %type_View 67 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 68 "View_PrevFrameGameTime"
+               OpMemberName %type_View 69 "View_PrevFrameRealTime"
+               OpMemberName %type_View 70 "View_OutOfBoundsMask"
+               OpMemberName %type_View 71 "PrePadding_View_2084"
+               OpMemberName %type_View 72 "PrePadding_View_2088"
+               OpMemberName %type_View 73 "PrePadding_View_2092"
+               OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 75 "View_CullingSign"
+               OpMemberName %type_View 76 "View_NearPlane"
+               OpMemberName %type_View 77 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 78 "View_GameTime"
+               OpMemberName %type_View 79 "View_RealTime"
+               OpMemberName %type_View 80 "View_DeltaTime"
+               OpMemberName %type_View 81 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 83 "View_Random"
+               OpMemberName %type_View 84 "View_FrameNumber"
+               OpMemberName %type_View 85 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 86 "View_StateFrameIndex"
+               OpMemberName %type_View 87 "View_CameraCut"
+               OpMemberName %type_View 88 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 89 "PrePadding_View_2164"
+               OpMemberName %type_View 90 "PrePadding_View_2168"
+               OpMemberName %type_View 91 "PrePadding_View_2172"
+               OpMemberName %type_View 92 "View_DirectionalLightColor"
+               OpMemberName %type_View 93 "View_DirectionalLightDirection"
+               OpMemberName %type_View 94 "PrePadding_View_2204"
+               OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 97 "View_TemporalAAParams"
+               OpMemberName %type_View 98 "View_CircleDOFParams"
+               OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 101 "View_DepthOfFieldScale"
+               OpMemberName %type_View 102 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 108 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 109 "View_DemosaicVposOffset"
+               OpMemberName %type_View 110 "PrePadding_View_2348"
+               OpMemberName %type_View 111 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 112 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 113 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 115 "View_AtmosphericFogPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 122 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 125 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 127 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 130 "View_AmbientCubemapTint"
+               OpMemberName %type_View 131 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 132 "View_SkyLightParameters"
+               OpMemberName %type_View 133 "PrePadding_View_2488"
+               OpMemberName %type_View 134 "PrePadding_View_2492"
+               OpMemberName %type_View 135 "View_SkyLightColor"
+               OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 137 "View_MobilePreviewMode"
+               OpMemberName %type_View 138 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 140 "View_ShowDecalsMask"
+               OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 143 "PrePadding_View_2648"
+               OpMemberName %type_View 144 "PrePadding_View_2652"
+               OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 146 "View_StereoPassIndex"
+               OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 149 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 150 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 151 "View_MaxGlobalDistance"
+               OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 153 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 154 "PrePadding_View_2828"
+               OpMemberName %type_View 155 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 156 "PrePadding_View_2844"
+               OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 158 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 159 "PrePadding_View_2860"
+               OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 161 "PrePadding_View_2876"
+               OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 163 "PrePadding_View_2892"
+               OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 167 "View_StereoIPD"
+               OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle"
+               OpName %View "View"
+               OpName %type_sampler "type.sampler"
+               OpName %type_2d_image "type.2d.image"
+               OpName %ShadowDepthTexture "ShadowDepthTexture"
+               OpName %ShadowDepthTextureSampler "ShadowDepthTextureSampler"
+               OpName %type__Globals "type.$Globals"
+               OpMemberName %type__Globals 0 "SoftTransitionScale"
+               OpMemberName %type__Globals 1 "ShadowBufferSize"
+               OpMemberName %type__Globals 2 "ShadowFadeFraction"
+               OpMemberName %type__Globals 3 "ShadowSharpen"
+               OpMemberName %type__Globals 4 "LightPositionAndInvRadius"
+               OpMemberName %type__Globals 5 "ScreenToShadowMatrix"
+               OpMemberName %type__Globals 6 "ProjectionDepthBiasParameters"
+               OpMemberName %type__Globals 7 "ModulatedShadowColor"
+               OpMemberName %type__Globals 8 "ShadowTileOffsetAndSize"
+               OpName %_Globals "$Globals"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %Main "Main"
+               OpName %type_sampled_image "type.sampled.image"
+               OpDecorate %gl_LastFragData InputAttachmentIndex 0
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION"
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %gl_LastFragData DescriptorSet 0
+               OpDecorate %gl_LastFragData Binding 0
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 0
+               OpDecorate %ShadowDepthTexture DescriptorSet 0
+               OpDecorate %ShadowDepthTexture Binding 0
+               OpDecorate %ShadowDepthTextureSampler DescriptorSet 0
+               OpDecorate %ShadowDepthTextureSampler Binding 0
+               OpDecorate %_Globals DescriptorSet 0
+               OpDecorate %_Globals Binding 1
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 14 Offset 844
+               OpMemberDecorate %type_View 15 Offset 848
+               OpMemberDecorate %type_View 16 Offset 860
+               OpMemberDecorate %type_View 17 Offset 864
+               OpMemberDecorate %type_View 18 Offset 876
+               OpMemberDecorate %type_View 19 Offset 880
+               OpMemberDecorate %type_View 20 Offset 892
+               OpMemberDecorate %type_View 21 Offset 896
+               OpMemberDecorate %type_View 22 Offset 908
+               OpMemberDecorate %type_View 23 Offset 912
+               OpMemberDecorate %type_View 24 Offset 928
+               OpMemberDecorate %type_View 25 Offset 944
+               OpMemberDecorate %type_View 26 Offset 956
+               OpMemberDecorate %type_View 27 Offset 960
+               OpMemberDecorate %type_View 28 Offset 972
+               OpMemberDecorate %type_View 29 Offset 976
+               OpMemberDecorate %type_View 30 Offset 988
+               OpMemberDecorate %type_View 31 Offset 992
+               OpMemberDecorate %type_View 32 Offset 1004
+               OpMemberDecorate %type_View 33 Offset 1008
+               OpMemberDecorate %type_View 33 MatrixStride 16
+               OpMemberDecorate %type_View 33 ColMajor
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 44 Offset 1660
+               OpMemberDecorate %type_View 45 Offset 1664
+               OpMemberDecorate %type_View 46 Offset 1676
+               OpMemberDecorate %type_View 47 Offset 1680
+               OpMemberDecorate %type_View 48 Offset 1692
+               OpMemberDecorate %type_View 49 Offset 1696
+               OpMemberDecorate %type_View 49 MatrixStride 16
+               OpMemberDecorate %type_View 49 ColMajor
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 53 Offset 1904
+               OpMemberDecorate %type_View 54 Offset 1920
+               OpMemberDecorate %type_View 55 Offset 1928
+               OpMemberDecorate %type_View 56 Offset 1936
+               OpMemberDecorate %type_View 57 Offset 1952
+               OpMemberDecorate %type_View 58 Offset 1968
+               OpMemberDecorate %type_View 59 Offset 1984
+               OpMemberDecorate %type_View 60 Offset 2000
+               OpMemberDecorate %type_View 61 Offset 2004
+               OpMemberDecorate %type_View 62 Offset 2008
+               OpMemberDecorate %type_View 63 Offset 2012
+               OpMemberDecorate %type_View 64 Offset 2016
+               OpMemberDecorate %type_View 65 Offset 2032
+               OpMemberDecorate %type_View 66 Offset 2048
+               OpMemberDecorate %type_View 67 Offset 2064
+               OpMemberDecorate %type_View 68 Offset 2072
+               OpMemberDecorate %type_View 69 Offset 2076
+               OpMemberDecorate %type_View 70 Offset 2080
+               OpMemberDecorate %type_View 71 Offset 2084
+               OpMemberDecorate %type_View 72 Offset 2088
+               OpMemberDecorate %type_View 73 Offset 2092
+               OpMemberDecorate %type_View 74 Offset 2096
+               OpMemberDecorate %type_View 75 Offset 2108
+               OpMemberDecorate %type_View 76 Offset 2112
+               OpMemberDecorate %type_View 77 Offset 2116
+               OpMemberDecorate %type_View 78 Offset 2120
+               OpMemberDecorate %type_View 79 Offset 2124
+               OpMemberDecorate %type_View 80 Offset 2128
+               OpMemberDecorate %type_View 81 Offset 2132
+               OpMemberDecorate %type_View 82 Offset 2136
+               OpMemberDecorate %type_View 83 Offset 2140
+               OpMemberDecorate %type_View 84 Offset 2144
+               OpMemberDecorate %type_View 85 Offset 2148
+               OpMemberDecorate %type_View 86 Offset 2152
+               OpMemberDecorate %type_View 87 Offset 2156
+               OpMemberDecorate %type_View 88 Offset 2160
+               OpMemberDecorate %type_View 89 Offset 2164
+               OpMemberDecorate %type_View 90 Offset 2168
+               OpMemberDecorate %type_View 91 Offset 2172
+               OpMemberDecorate %type_View 92 Offset 2176
+               OpMemberDecorate %type_View 93 Offset 2192
+               OpMemberDecorate %type_View 94 Offset 2204
+               OpMemberDecorate %type_View 95 Offset 2208
+               OpMemberDecorate %type_View 96 Offset 2240
+               OpMemberDecorate %type_View 97 Offset 2272
+               OpMemberDecorate %type_View 98 Offset 2288
+               OpMemberDecorate %type_View 99 Offset 2304
+               OpMemberDecorate %type_View 100 Offset 2308
+               OpMemberDecorate %type_View 101 Offset 2312
+               OpMemberDecorate %type_View 102 Offset 2316
+               OpMemberDecorate %type_View 103 Offset 2320
+               OpMemberDecorate %type_View 104 Offset 2324
+               OpMemberDecorate %type_View 105 Offset 2328
+               OpMemberDecorate %type_View 106 Offset 2332
+               OpMemberDecorate %type_View 107 Offset 2336
+               OpMemberDecorate %type_View 108 Offset 2340
+               OpMemberDecorate %type_View 109 Offset 2344
+               OpMemberDecorate %type_View 110 Offset 2348
+               OpMemberDecorate %type_View 111 Offset 2352
+               OpMemberDecorate %type_View 112 Offset 2364
+               OpMemberDecorate %type_View 113 Offset 2368
+               OpMemberDecorate %type_View 114 Offset 2380
+               OpMemberDecorate %type_View 115 Offset 2384
+               OpMemberDecorate %type_View 116 Offset 2388
+               OpMemberDecorate %type_View 117 Offset 2392
+               OpMemberDecorate %type_View 118 Offset 2396
+               OpMemberDecorate %type_View 119 Offset 2400
+               OpMemberDecorate %type_View 120 Offset 2404
+               OpMemberDecorate %type_View 121 Offset 2408
+               OpMemberDecorate %type_View 122 Offset 2412
+               OpMemberDecorate %type_View 123 Offset 2416
+               OpMemberDecorate %type_View 124 Offset 2420
+               OpMemberDecorate %type_View 125 Offset 2424
+               OpMemberDecorate %type_View 126 Offset 2428
+               OpMemberDecorate %type_View 127 Offset 2432
+               OpMemberDecorate %type_View 128 Offset 2448
+               OpMemberDecorate %type_View 129 Offset 2460
+               OpMemberDecorate %type_View 130 Offset 2464
+               OpMemberDecorate %type_View 131 Offset 2480
+               OpMemberDecorate %type_View 132 Offset 2484
+               OpMemberDecorate %type_View 133 Offset 2488
+               OpMemberDecorate %type_View 134 Offset 2492
+               OpMemberDecorate %type_View 135 Offset 2496
+               OpMemberDecorate %type_View 136 Offset 2512
+               OpMemberDecorate %type_View 137 Offset 2624
+               OpMemberDecorate %type_View 138 Offset 2628
+               OpMemberDecorate %type_View 139 Offset 2632
+               OpMemberDecorate %type_View 140 Offset 2636
+               OpMemberDecorate %type_View 141 Offset 2640
+               OpMemberDecorate %type_View 142 Offset 2644
+               OpMemberDecorate %type_View 143 Offset 2648
+               OpMemberDecorate %type_View 144 Offset 2652
+               OpMemberDecorate %type_View 145 Offset 2656
+               OpMemberDecorate %type_View 146 Offset 2668
+               OpMemberDecorate %type_View 147 Offset 2672
+               OpMemberDecorate %type_View 148 Offset 2736
+               OpMemberDecorate %type_View 149 Offset 2800
+               OpMemberDecorate %type_View 150 Offset 2804
+               OpMemberDecorate %type_View 151 Offset 2808
+               OpMemberDecorate %type_View 152 Offset 2812
+               OpMemberDecorate %type_View 153 Offset 2816
+               OpMemberDecorate %type_View 154 Offset 2828
+               OpMemberDecorate %type_View 155 Offset 2832
+               OpMemberDecorate %type_View 156 Offset 2844
+               OpMemberDecorate %type_View 157 Offset 2848
+               OpMemberDecorate %type_View 158 Offset 2856
+               OpMemberDecorate %type_View 159 Offset 2860
+               OpMemberDecorate %type_View 160 Offset 2864
+               OpMemberDecorate %type_View 161 Offset 2876
+               OpMemberDecorate %type_View 162 Offset 2880
+               OpMemberDecorate %type_View 163 Offset 2892
+               OpMemberDecorate %type_View 164 Offset 2896
+               OpMemberDecorate %type_View 165 Offset 2908
+               OpMemberDecorate %type_View 166 Offset 2912
+               OpMemberDecorate %type_View 167 Offset 2924
+               OpMemberDecorate %type_View 168 Offset 2928
+               OpMemberDecorate %type_View 169 Offset 2932
+               OpDecorate %type_View Block
+               OpMemberDecorate %type__Globals 0 Offset 0
+               OpMemberDecorate %type__Globals 1 Offset 16
+               OpMemberDecorate %type__Globals 2 Offset 32
+               OpMemberDecorate %type__Globals 3 Offset 36
+               OpMemberDecorate %type__Globals 4 Offset 48
+               OpMemberDecorate %type__Globals 5 Offset 64
+               OpMemberDecorate %type__Globals 5 MatrixStride 16
+               OpMemberDecorate %type__Globals 5 ColMajor
+               OpMemberDecorate %type__Globals 6 Offset 128
+               OpMemberDecorate %type__Globals 7 Offset 144
+               OpMemberDecorate %type__Globals 8 Offset 160
+               OpDecorate %type__Globals Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+    %float_1 = OpConstant %float 1
+     %int_58 = OpConstant %int 58
+     %int_24 = OpConstant %int 24
+      %int_5 = OpConstant %int 5
+      %int_2 = OpConstant %int 2
+      %int_3 = OpConstant %int 3
+      %int_8 = OpConstant %int 8
+%float_0_999989986 = OpConstant %float 0.999989986
+      %int_0 = OpConstant %int 0
+    %float_0 = OpConstant %float 0
+      %int_7 = OpConstant %int 7
+  %float_0_5 = OpConstant %float 0.5
+         %41 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+      %v2int = OpTypeVector %int 2
+         %43 = OpConstantComposite %v2int %int_0 %int_0
+         %44 = OpConstantComposite %v3float %float_0 %float_0 %float_0
+%type_subpass_image = OpTypeImage %float SubpassData 2 0 0 2 Unknown
+%_ptr_UniformConstant_type_subpass_image = OpTypePointer UniformConstant %type_subpass_image
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+%type__Globals = OpTypeStruct %v3float %v4float %float %float %v4float %mat4v4float %v2float %v4float %v4float
+%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %53 = OpTypeFunction %void
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%type_sampled_image = OpTypeSampledImage %type_2d_image
+%gl_LastFragData = OpVariable %_ptr_UniformConstant_type_subpass_image UniformConstant
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+%ShadowDepthTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%ShadowDepthTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+   %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+         %57 = OpConstantNull %v4float
+         %58 = OpUndef %v4float
+       %Main = OpFunction %void None %53
+         %59 = OpLabel
+         %60 = OpLoad %v4float %gl_FragCoord
+         %61 = OpVectorShuffle %v2float %60 %60 0 1
+         %62 = OpAccessChain %_ptr_Uniform_v4float %View %int_58
+         %63 = OpLoad %v4float %62
+         %64 = OpVectorShuffle %v2float %63 %63 2 3
+         %65 = OpFMul %v2float %61 %64
+         %66 = OpLoad %type_subpass_image %gl_LastFragData
+         %67 = OpImageRead %v4float %66 %43 None
+         %68 = OpCompositeExtract %float %67 3
+         %69 = OpAccessChain %_ptr_Uniform_v4float %View %int_24
+         %70 = OpLoad %v4float %69
+         %71 = OpVectorShuffle %v2float %70 %70 3 2
+         %72 = OpFSub %v2float %65 %71
+         %73 = OpVectorShuffle %v2float %70 %70 0 1
+         %74 = OpFDiv %v2float %72 %73
+         %75 = OpCompositeConstruct %v2float %68 %68
+         %76 = OpFMul %v2float %74 %75
+         %77 = OpCompositeExtract %float %76 0
+         %78 = OpCompositeExtract %float %76 1
+         %79 = OpCompositeConstruct %v4float %77 %78 %68 %float_1
+         %80 = OpAccessChain %_ptr_Uniform_mat4v4float %_Globals %int_5
+         %81 = OpLoad %mat4v4float %80
+         %82 = OpMatrixTimesVector %v4float %81 %79
+         %83 = OpCompositeExtract %float %82 2
+         %84 = OpCompositeExtract %float %82 3
+         %85 = OpCompositeConstruct %v3float %84 %84 %84
+         %86 = OpVectorShuffle %v3float %82 %82 0 1 2
+         %87 = OpFDiv %v3float %86 %85
+         %88 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_8
+         %89 = OpLoad %v4float %88
+         %90 = OpVectorShuffle %v2float %89 %89 2 3
+         %91 = OpVectorShuffle %v2float %87 %57 0 1
+         %92 = OpFMul %v2float %91 %90
+         %93 = OpVectorShuffle %v2float %89 %89 0 1
+         %94 = OpVectorShuffle %v2float %92 %57 0 1
+         %95 = OpFAdd %v2float %94 %93
+         %96 = OpExtInst %float %1 FMin %83 %float_0_999989986
+         %97 = OpLoad %type_2d_image %ShadowDepthTexture
+         %98 = OpLoad %type_sampler %ShadowDepthTextureSampler
+         %99 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_2
+        %100 = OpLoad %float %99
+        %101 = OpVectorShuffle %v2float %95 %57 0 1
+        %102 = OpSampledImage %type_sampled_image %97 %98
+        %103 = OpImageSampleExplicitLod %v4float %102 %101 Lod %float_0
+        %104 = OpVectorShuffle %v3float %103 %103 0 0 0
+        %105 = OpFMul %float %96 %100
+        %106 = OpFSub %float %105 %float_1
+        %107 = OpCompositeConstruct %v3float %100 %100 %100
+        %108 = OpFMul %v3float %104 %107
+        %109 = OpCompositeConstruct %v3float %106 %106 %106
+        %110 = OpFSub %v3float %108 %109
+        %111 = OpExtInst %v3float %1 FClamp %110 %44 %41
+        %112 = OpCompositeExtract %float %111 0
+        %113 = OpFSub %float %112 %float_0_5
+        %114 = OpAccessChain %_ptr_Uniform_float %_Globals %int_3
+        %115 = OpLoad %float %114
+        %116 = OpFMul %float %113 %115
+        %117 = OpFAdd %float %116 %float_0_5
+        %118 = OpExtInst %float %1 FClamp %117 %float_0 %float_1
+        %119 = OpFMul %float %118 %118
+        %120 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2
+        %121 = OpLoad %float %120
+        %122 = OpExtInst %float %1 FMix %float_1 %119 %121
+        %123 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_7
+        %124 = OpLoad %v4float %123
+        %125 = OpVectorShuffle %v3float %124 %124 0 1 2
+        %126 = OpCompositeConstruct %v3float %122 %122 %122
+        %127 = OpExtInst %v3float %1 FMix %125 %41 %126
+        %128 = OpVectorShuffle %v4float %58 %127 4 5 6 3
+        %129 = OpCompositeInsert %v4float %float_0 %128 3
+               OpStore %out_var_SV_Target0 %129
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag b/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag
new file mode 100644
index 00000000000..e6565873a01
--- /dev/null
+++ b/shaders-ue4/asm/frag/subpass-input.msl23.framebuffer-fetch.asm.frag
@@ -0,0 +1,589 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 130
+; Schema: 0
+               OpCapability Shader
+               OpCapability InputAttachment
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %Main "main" %gl_FragCoord %out_var_SV_Target0
+               OpExecutionMode %Main OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_subpass_image "type.subpass.image"
+               OpName %gl_LastFragData "gl_LastFragData"
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_TranslatedWorldToView"
+               OpMemberName %type_View 3 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 4 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 6 "View_ViewToClip"
+               OpMemberName %type_View 7 "View_ViewToClipNoAA"
+               OpMemberName %type_View 8 "View_ClipToView"
+               OpMemberName %type_View 9 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 11 "View_ScreenToWorld"
+               OpMemberName %type_View 12 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 13 "View_ViewForward"
+               OpMemberName %type_View 14 "PrePadding_View_844"
+               OpMemberName %type_View 15 "View_ViewUp"
+               OpMemberName %type_View 16 "PrePadding_View_860"
+               OpMemberName %type_View 17 "View_ViewRight"
+               OpMemberName %type_View 18 "PrePadding_View_876"
+               OpMemberName %type_View 19 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 20 "PrePadding_View_892"
+               OpMemberName %type_View 21 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 22 "PrePadding_View_908"
+               OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 24 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 25 "View_WorldCameraOrigin"
+               OpMemberName %type_View 26 "PrePadding_View_956"
+               OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 28 "PrePadding_View_972"
+               OpMemberName %type_View 29 "View_WorldViewOrigin"
+               OpMemberName %type_View 30 "PrePadding_View_988"
+               OpMemberName %type_View 31 "View_PreViewTranslation"
+               OpMemberName %type_View 32 "PrePadding_View_1004"
+               OpMemberName %type_View 33 "View_PrevProjection"
+               OpMemberName %type_View 34 "View_PrevViewProj"
+               OpMemberName %type_View 35 "View_PrevViewRotationProj"
+               OpMemberName %type_View 36 "View_PrevViewToClip"
+               OpMemberName %type_View 37 "View_PrevClipToView"
+               OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 43 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 44 "PrePadding_View_1660"
+               OpMemberName %type_View 45 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 46 "PrePadding_View_1676"
+               OpMemberName %type_View 47 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 48 "PrePadding_View_1692"
+               OpMemberName %type_View 49 "View_PrevInvViewProj"
+               OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 51 "View_ClipToPrevClip"
+               OpMemberName %type_View 52 "View_TemporalAAJitter"
+               OpMemberName %type_View 53 "View_GlobalClippingPlane"
+               OpMemberName %type_View 54 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_ViewRectMin"
+               OpMemberName %type_View 57 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 58 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 60 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 61 "View_PreExposure"
+               OpMemberName %type_View 62 "View_OneOverPreExposure"
+               OpMemberName %type_View 63 "PrePadding_View_2012"
+               OpMemberName %type_View 64 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 65 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 66 "View_NormalOverrideParameter"
+               OpMemberName %type_View 67 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 68 "View_PrevFrameGameTime"
+               OpMemberName %type_View 69 "View_PrevFrameRealTime"
+               OpMemberName %type_View 70 "View_OutOfBoundsMask"
+               OpMemberName %type_View 71 "PrePadding_View_2084"
+               OpMemberName %type_View 72 "PrePadding_View_2088"
+               OpMemberName %type_View 73 "PrePadding_View_2092"
+               OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 75 "View_CullingSign"
+               OpMemberName %type_View 76 "View_NearPlane"
+               OpMemberName %type_View 77 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 78 "View_GameTime"
+               OpMemberName %type_View 79 "View_RealTime"
+               OpMemberName %type_View 80 "View_DeltaTime"
+               OpMemberName %type_View 81 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 83 "View_Random"
+               OpMemberName %type_View 84 "View_FrameNumber"
+               OpMemberName %type_View 85 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 86 "View_StateFrameIndex"
+               OpMemberName %type_View 87 "View_CameraCut"
+               OpMemberName %type_View 88 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 89 "PrePadding_View_2164"
+               OpMemberName %type_View 90 "PrePadding_View_2168"
+               OpMemberName %type_View 91 "PrePadding_View_2172"
+               OpMemberName %type_View 92 "View_DirectionalLightColor"
+               OpMemberName %type_View 93 "View_DirectionalLightDirection"
+               OpMemberName %type_View 94 "PrePadding_View_2204"
+               OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 97 "View_TemporalAAParams"
+               OpMemberName %type_View 98 "View_CircleDOFParams"
+               OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 101 "View_DepthOfFieldScale"
+               OpMemberName %type_View 102 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 108 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 109 "View_DemosaicVposOffset"
+               OpMemberName %type_View 110 "PrePadding_View_2348"
+               OpMemberName %type_View 111 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 112 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 113 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 115 "View_AtmosphericFogPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 122 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 125 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 127 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 130 "View_AmbientCubemapTint"
+               OpMemberName %type_View 131 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 132 "View_SkyLightParameters"
+               OpMemberName %type_View 133 "PrePadding_View_2488"
+               OpMemberName %type_View 134 "PrePadding_View_2492"
+               OpMemberName %type_View 135 "View_SkyLightColor"
+               OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 137 "View_MobilePreviewMode"
+               OpMemberName %type_View 138 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 140 "View_ShowDecalsMask"
+               OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 143 "PrePadding_View_2648"
+               OpMemberName %type_View 144 "PrePadding_View_2652"
+               OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 146 "View_StereoPassIndex"
+               OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 149 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 150 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 151 "View_MaxGlobalDistance"
+               OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 153 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 154 "PrePadding_View_2828"
+               OpMemberName %type_View 155 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 156 "PrePadding_View_2844"
+               OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 158 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 159 "PrePadding_View_2860"
+               OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 161 "PrePadding_View_2876"
+               OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 163 "PrePadding_View_2892"
+               OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 167 "View_StereoIPD"
+               OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle"
+               OpName %View "View"
+               OpName %type_sampler "type.sampler"
+               OpName %type_2d_image "type.2d.image"
+               OpName %ShadowDepthTexture "ShadowDepthTexture"
+               OpName %ShadowDepthTextureSampler "ShadowDepthTextureSampler"
+               OpName %type__Globals "type.$Globals"
+               OpMemberName %type__Globals 0 "SoftTransitionScale"
+               OpMemberName %type__Globals 1 "ShadowBufferSize"
+               OpMemberName %type__Globals 2 "ShadowFadeFraction"
+               OpMemberName %type__Globals 3 "ShadowSharpen"
+               OpMemberName %type__Globals 4 "LightPositionAndInvRadius"
+               OpMemberName %type__Globals 5 "ScreenToShadowMatrix"
+               OpMemberName %type__Globals 6 "ProjectionDepthBiasParameters"
+               OpMemberName %type__Globals 7 "ModulatedShadowColor"
+               OpMemberName %type__Globals 8 "ShadowTileOffsetAndSize"
+               OpName %_Globals "$Globals"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %Main "Main"
+               OpName %type_sampled_image "type.sampled.image"
+               OpDecorate %gl_LastFragData InputAttachmentIndex 0
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION"
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %gl_LastFragData DescriptorSet 0
+               OpDecorate %gl_LastFragData Binding 0
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 0
+               OpDecorate %ShadowDepthTexture DescriptorSet 0
+               OpDecorate %ShadowDepthTexture Binding 0
+               OpDecorate %ShadowDepthTextureSampler DescriptorSet 0
+               OpDecorate %ShadowDepthTextureSampler Binding 0
+               OpDecorate %_Globals DescriptorSet 0
+               OpDecorate %_Globals Binding 1
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 14 Offset 844
+               OpMemberDecorate %type_View 15 Offset 848
+               OpMemberDecorate %type_View 16 Offset 860
+               OpMemberDecorate %type_View 17 Offset 864
+               OpMemberDecorate %type_View 18 Offset 876
+               OpMemberDecorate %type_View 19 Offset 880
+               OpMemberDecorate %type_View 20 Offset 892
+               OpMemberDecorate %type_View 21 Offset 896
+               OpMemberDecorate %type_View 22 Offset 908
+               OpMemberDecorate %type_View 23 Offset 912
+               OpMemberDecorate %type_View 24 Offset 928
+               OpMemberDecorate %type_View 25 Offset 944
+               OpMemberDecorate %type_View 26 Offset 956
+               OpMemberDecorate %type_View 27 Offset 960
+               OpMemberDecorate %type_View 28 Offset 972
+               OpMemberDecorate %type_View 29 Offset 976
+               OpMemberDecorate %type_View 30 Offset 988
+               OpMemberDecorate %type_View 31 Offset 992
+               OpMemberDecorate %type_View 32 Offset 1004
+               OpMemberDecorate %type_View 33 Offset 1008
+               OpMemberDecorate %type_View 33 MatrixStride 16
+               OpMemberDecorate %type_View 33 ColMajor
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 44 Offset 1660
+               OpMemberDecorate %type_View 45 Offset 1664
+               OpMemberDecorate %type_View 46 Offset 1676
+               OpMemberDecorate %type_View 47 Offset 1680
+               OpMemberDecorate %type_View 48 Offset 1692
+               OpMemberDecorate %type_View 49 Offset 1696
+               OpMemberDecorate %type_View 49 MatrixStride 16
+               OpMemberDecorate %type_View 49 ColMajor
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 53 Offset 1904
+               OpMemberDecorate %type_View 54 Offset 1920
+               OpMemberDecorate %type_View 55 Offset 1928
+               OpMemberDecorate %type_View 56 Offset 1936
+               OpMemberDecorate %type_View 57 Offset 1952
+               OpMemberDecorate %type_View 58 Offset 1968
+               OpMemberDecorate %type_View 59 Offset 1984
+               OpMemberDecorate %type_View 60 Offset 2000
+               OpMemberDecorate %type_View 61 Offset 2004
+               OpMemberDecorate %type_View 62 Offset 2008
+               OpMemberDecorate %type_View 63 Offset 2012
+               OpMemberDecorate %type_View 64 Offset 2016
+               OpMemberDecorate %type_View 65 Offset 2032
+               OpMemberDecorate %type_View 66 Offset 2048
+               OpMemberDecorate %type_View 67 Offset 2064
+               OpMemberDecorate %type_View 68 Offset 2072
+               OpMemberDecorate %type_View 69 Offset 2076
+               OpMemberDecorate %type_View 70 Offset 2080
+               OpMemberDecorate %type_View 71 Offset 2084
+               OpMemberDecorate %type_View 72 Offset 2088
+               OpMemberDecorate %type_View 73 Offset 2092
+               OpMemberDecorate %type_View 74 Offset 2096
+               OpMemberDecorate %type_View 75 Offset 2108
+               OpMemberDecorate %type_View 76 Offset 2112
+               OpMemberDecorate %type_View 77 Offset 2116
+               OpMemberDecorate %type_View 78 Offset 2120
+               OpMemberDecorate %type_View 79 Offset 2124
+               OpMemberDecorate %type_View 80 Offset 2128
+               OpMemberDecorate %type_View 81 Offset 2132
+               OpMemberDecorate %type_View 82 Offset 2136
+               OpMemberDecorate %type_View 83 Offset 2140
+               OpMemberDecorate %type_View 84 Offset 2144
+               OpMemberDecorate %type_View 85 Offset 2148
+               OpMemberDecorate %type_View 86 Offset 2152
+               OpMemberDecorate %type_View 87 Offset 2156
+               OpMemberDecorate %type_View 88 Offset 2160
+               OpMemberDecorate %type_View 89 Offset 2164
+               OpMemberDecorate %type_View 90 Offset 2168
+               OpMemberDecorate %type_View 91 Offset 2172
+               OpMemberDecorate %type_View 92 Offset 2176
+               OpMemberDecorate %type_View 93 Offset 2192
+               OpMemberDecorate %type_View 94 Offset 2204
+               OpMemberDecorate %type_View 95 Offset 2208
+               OpMemberDecorate %type_View 96 Offset 2240
+               OpMemberDecorate %type_View 97 Offset 2272
+               OpMemberDecorate %type_View 98 Offset 2288
+               OpMemberDecorate %type_View 99 Offset 2304
+               OpMemberDecorate %type_View 100 Offset 2308
+               OpMemberDecorate %type_View 101 Offset 2312
+               OpMemberDecorate %type_View 102 Offset 2316
+               OpMemberDecorate %type_View 103 Offset 2320
+               OpMemberDecorate %type_View 104 Offset 2324
+               OpMemberDecorate %type_View 105 Offset 2328
+               OpMemberDecorate %type_View 106 Offset 2332
+               OpMemberDecorate %type_View 107 Offset 2336
+               OpMemberDecorate %type_View 108 Offset 2340
+               OpMemberDecorate %type_View 109 Offset 2344
+               OpMemberDecorate %type_View 110 Offset 2348
+               OpMemberDecorate %type_View 111 Offset 2352
+               OpMemberDecorate %type_View 112 Offset 2364
+               OpMemberDecorate %type_View 113 Offset 2368
+               OpMemberDecorate %type_View 114 Offset 2380
+               OpMemberDecorate %type_View 115 Offset 2384
+               OpMemberDecorate %type_View 116 Offset 2388
+               OpMemberDecorate %type_View 117 Offset 2392
+               OpMemberDecorate %type_View 118 Offset 2396
+               OpMemberDecorate %type_View 119 Offset 2400
+               OpMemberDecorate %type_View 120 Offset 2404
+               OpMemberDecorate %type_View 121 Offset 2408
+               OpMemberDecorate %type_View 122 Offset 2412
+               OpMemberDecorate %type_View 123 Offset 2416
+               OpMemberDecorate %type_View 124 Offset 2420
+               OpMemberDecorate %type_View 125 Offset 2424
+               OpMemberDecorate %type_View 126 Offset 2428
+               OpMemberDecorate %type_View 127 Offset 2432
+               OpMemberDecorate %type_View 128 Offset 2448
+               OpMemberDecorate %type_View 129 Offset 2460
+               OpMemberDecorate %type_View 130 Offset 2464
+               OpMemberDecorate %type_View 131 Offset 2480
+               OpMemberDecorate %type_View 132 Offset 2484
+               OpMemberDecorate %type_View 133 Offset 2488
+               OpMemberDecorate %type_View 134 Offset 2492
+               OpMemberDecorate %type_View 135 Offset 2496
+               OpMemberDecorate %type_View 136 Offset 2512
+               OpMemberDecorate %type_View 137 Offset 2624
+               OpMemberDecorate %type_View 138 Offset 2628
+               OpMemberDecorate %type_View 139 Offset 2632
+               OpMemberDecorate %type_View 140 Offset 2636
+               OpMemberDecorate %type_View 141 Offset 2640
+               OpMemberDecorate %type_View 142 Offset 2644
+               OpMemberDecorate %type_View 143 Offset 2648
+               OpMemberDecorate %type_View 144 Offset 2652
+               OpMemberDecorate %type_View 145 Offset 2656
+               OpMemberDecorate %type_View 146 Offset 2668
+               OpMemberDecorate %type_View 147 Offset 2672
+               OpMemberDecorate %type_View 148 Offset 2736
+               OpMemberDecorate %type_View 149 Offset 2800
+               OpMemberDecorate %type_View 150 Offset 2804
+               OpMemberDecorate %type_View 151 Offset 2808
+               OpMemberDecorate %type_View 152 Offset 2812
+               OpMemberDecorate %type_View 153 Offset 2816
+               OpMemberDecorate %type_View 154 Offset 2828
+               OpMemberDecorate %type_View 155 Offset 2832
+               OpMemberDecorate %type_View 156 Offset 2844
+               OpMemberDecorate %type_View 157 Offset 2848
+               OpMemberDecorate %type_View 158 Offset 2856
+               OpMemberDecorate %type_View 159 Offset 2860
+               OpMemberDecorate %type_View 160 Offset 2864
+               OpMemberDecorate %type_View 161 Offset 2876
+               OpMemberDecorate %type_View 162 Offset 2880
+               OpMemberDecorate %type_View 163 Offset 2892
+               OpMemberDecorate %type_View 164 Offset 2896
+               OpMemberDecorate %type_View 165 Offset 2908
+               OpMemberDecorate %type_View 166 Offset 2912
+               OpMemberDecorate %type_View 167 Offset 2924
+               OpMemberDecorate %type_View 168 Offset 2928
+               OpMemberDecorate %type_View 169 Offset 2932
+               OpDecorate %type_View Block
+               OpMemberDecorate %type__Globals 0 Offset 0
+               OpMemberDecorate %type__Globals 1 Offset 16
+               OpMemberDecorate %type__Globals 2 Offset 32
+               OpMemberDecorate %type__Globals 3 Offset 36
+               OpMemberDecorate %type__Globals 4 Offset 48
+               OpMemberDecorate %type__Globals 5 Offset 64
+               OpMemberDecorate %type__Globals 5 MatrixStride 16
+               OpMemberDecorate %type__Globals 5 ColMajor
+               OpMemberDecorate %type__Globals 6 Offset 128
+               OpMemberDecorate %type__Globals 7 Offset 144
+               OpMemberDecorate %type__Globals 8 Offset 160
+               OpDecorate %type__Globals Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+    %float_1 = OpConstant %float 1
+     %int_58 = OpConstant %int 58
+     %int_24 = OpConstant %int 24
+      %int_5 = OpConstant %int 5
+      %int_2 = OpConstant %int 2
+      %int_3 = OpConstant %int 3
+      %int_8 = OpConstant %int 8
+%float_0_999989986 = OpConstant %float 0.999989986
+      %int_0 = OpConstant %int 0
+    %float_0 = OpConstant %float 0
+      %int_7 = OpConstant %int 7
+  %float_0_5 = OpConstant %float 0.5
+         %41 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+      %v2int = OpTypeVector %int 2
+         %43 = OpConstantComposite %v2int %int_0 %int_0
+         %44 = OpConstantComposite %v3float %float_0 %float_0 %float_0
+%type_subpass_image = OpTypeImage %float SubpassData 2 0 0 2 Unknown
+%_ptr_UniformConstant_type_subpass_image = OpTypePointer UniformConstant %type_subpass_image
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+%type__Globals = OpTypeStruct %v3float %v4float %float %float %v4float %mat4v4float %v2float %v4float %v4float
+%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %53 = OpTypeFunction %void
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%type_sampled_image = OpTypeSampledImage %type_2d_image
+%gl_LastFragData = OpVariable %_ptr_UniformConstant_type_subpass_image UniformConstant
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+%ShadowDepthTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%ShadowDepthTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+   %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+         %57 = OpConstantNull %v4float
+         %58 = OpUndef %v4float
+       %Main = OpFunction %void None %53
+         %59 = OpLabel
+         %60 = OpLoad %v4float %gl_FragCoord
+         %61 = OpVectorShuffle %v2float %60 %60 0 1
+         %62 = OpAccessChain %_ptr_Uniform_v4float %View %int_58
+         %63 = OpLoad %v4float %62
+         %64 = OpVectorShuffle %v2float %63 %63 2 3
+         %65 = OpFMul %v2float %61 %64
+         %66 = OpLoad %type_subpass_image %gl_LastFragData
+         %67 = OpImageRead %v4float %66 %43 None
+         %68 = OpCompositeExtract %float %67 3
+         %69 = OpAccessChain %_ptr_Uniform_v4float %View %int_24
+         %70 = OpLoad %v4float %69
+         %71 = OpVectorShuffle %v2float %70 %70 3 2
+         %72 = OpFSub %v2float %65 %71
+         %73 = OpVectorShuffle %v2float %70 %70 0 1
+         %74 = OpFDiv %v2float %72 %73
+         %75 = OpCompositeConstruct %v2float %68 %68
+         %76 = OpFMul %v2float %74 %75
+         %77 = OpCompositeExtract %float %76 0
+         %78 = OpCompositeExtract %float %76 1
+         %79 = OpCompositeConstruct %v4float %77 %78 %68 %float_1
+         %80 = OpAccessChain %_ptr_Uniform_mat4v4float %_Globals %int_5
+         %81 = OpLoad %mat4v4float %80
+         %82 = OpMatrixTimesVector %v4float %81 %79
+         %83 = OpCompositeExtract %float %82 2
+         %84 = OpCompositeExtract %float %82 3
+         %85 = OpCompositeConstruct %v3float %84 %84 %84
+         %86 = OpVectorShuffle %v3float %82 %82 0 1 2
+         %87 = OpFDiv %v3float %86 %85
+         %88 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_8
+         %89 = OpLoad %v4float %88
+         %90 = OpVectorShuffle %v2float %89 %89 2 3
+         %91 = OpVectorShuffle %v2float %87 %57 0 1
+         %92 = OpFMul %v2float %91 %90
+         %93 = OpVectorShuffle %v2float %89 %89 0 1
+         %94 = OpVectorShuffle %v2float %92 %57 0 1
+         %95 = OpFAdd %v2float %94 %93
+         %96 = OpExtInst %float %1 FMin %83 %float_0_999989986
+         %97 = OpLoad %type_2d_image %ShadowDepthTexture
+         %98 = OpLoad %type_sampler %ShadowDepthTextureSampler
+         %99 = OpAccessChain %_ptr_Uniform_float %_Globals %int_0 %int_2
+        %100 = OpLoad %float %99
+        %101 = OpVectorShuffle %v2float %95 %57 0 1
+        %102 = OpSampledImage %type_sampled_image %97 %98
+        %103 = OpImageSampleExplicitLod %v4float %102 %101 Lod %float_0
+        %104 = OpVectorShuffle %v3float %103 %103 0 0 0
+        %105 = OpFMul %float %96 %100
+        %106 = OpFSub %float %105 %float_1
+        %107 = OpCompositeConstruct %v3float %100 %100 %100
+        %108 = OpFMul %v3float %104 %107
+        %109 = OpCompositeConstruct %v3float %106 %106 %106
+        %110 = OpFSub %v3float %108 %109
+        %111 = OpExtInst %v3float %1 FClamp %110 %44 %41
+        %112 = OpCompositeExtract %float %111 0
+        %113 = OpFSub %float %112 %float_0_5
+        %114 = OpAccessChain %_ptr_Uniform_float %_Globals %int_3
+        %115 = OpLoad %float %114
+        %116 = OpFMul %float %113 %115
+        %117 = OpFAdd %float %116 %float_0_5
+        %118 = OpExtInst %float %1 FClamp %117 %float_0 %float_1
+        %119 = OpFMul %float %118 %118
+        %120 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2
+        %121 = OpLoad %float %120
+        %122 = OpExtInst %float %1 FMix %float_1 %119 %121
+        %123 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_7
+        %124 = OpLoad %v4float %123
+        %125 = OpVectorShuffle %v3float %124 %124 0 1 2
+        %126 = OpCompositeConstruct %v3float %122 %122 %122
+        %127 = OpExtInst %v3float %1 FMix %125 %41 %126
+        %128 = OpVectorShuffle %v4float %58 %127 4 5 6 3
+        %129 = OpCompositeInsert %v4float %float_0 %128 3
+               OpStore %out_var_SV_Target0 %129
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag b/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag
new file mode 100644
index 00000000000..270a1978fcf
--- /dev/null
+++ b/shaders-ue4/asm/frag/texture-atomics.asm.argument.msl2.frag
@@ -0,0 +1,242 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 180
+; Schema: 0
+               OpCapability Shader
+               OpCapability SampledBuffer
+               OpCapability ImageBuffer
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %ShadowObjectCullPS "main" %in_var_TEXCOORD0 %gl_FragCoord %out_var_SV_Target0
+               OpExecutionMode %ShadowObjectCullPS OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float"
+               OpName %CulledObjectBoxBounds "CulledObjectBoxBounds"
+               OpName %type__Globals "type.$Globals"
+               OpMemberName %type__Globals 0 "ShadowTileListGroupSize"
+               OpName %_Globals "$Globals"
+               OpName %type_buffer_image "type.buffer.image"
+               OpName %RWShadowTileNumCulledObjects "RWShadowTileNumCulledObjects"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %ShadowObjectCullPS "ShadowObjectCullPS"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorate %in_var_TEXCOORD0 Flat
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION"
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %in_var_TEXCOORD0 Location 0
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %CulledObjectBoxBounds DescriptorSet 0
+               OpDecorate %CulledObjectBoxBounds Binding 1
+               OpDecorate %_Globals DescriptorSet 0
+               OpDecorate %_Globals Binding 2
+               OpDecorate %RWShadowTileNumCulledObjects DescriptorSet 0
+               OpDecorate %RWShadowTileNumCulledObjects Binding 0
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0
+               OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable
+               OpDecorate %type_StructuredBuffer_v4float BufferBlock
+               OpMemberDecorate %type__Globals 0 Offset 0
+               OpDecorate %type__Globals Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_4 = OpConstant %uint 4
+    %float_0 = OpConstant %float 0
+         %22 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+      %int_1 = OpConstant %int 1
+      %int_0 = OpConstant %int 0
+     %uint_1 = OpConstant %uint 1
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v2float %float_2 %float_2
+    %float_1 = OpConstant %float 1
+         %29 = OpConstantComposite %v2float %float_1 %float_1
+%float_n1000 = OpConstant %float -1000
+      %int_2 = OpConstant %int 2
+  %float_0_5 = OpConstant %float 0.5
+         %33 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5
+%float_500000 = OpConstant %float 500000
+         %35 = OpConstantComposite %v3float %float_500000 %float_500000 %float_500000
+%float_n500000 = OpConstant %float -500000
+         %37 = OpConstantComposite %v3float %float_n500000 %float_n500000 %float_n500000
+      %int_3 = OpConstant %int 3
+      %int_4 = OpConstant %int 4
+      %int_5 = OpConstant %int 5
+      %int_6 = OpConstant %int 6
+      %int_7 = OpConstant %int 7
+      %int_8 = OpConstant %int 8
+         %44 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+   %float_n1 = OpConstant %float -1
+         %46 = OpConstantComposite %v3float %float_n1 %float_n1 %float_n1
+     %uint_5 = OpConstant %uint 5
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float
+     %v2uint = OpTypeVector %uint 2
+%type__Globals = OpTypeStruct %v2uint
+%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+%type_buffer_image = OpTypeImage %uint Buffer 2 0 0 2 R32ui
+%_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %58 = OpTypeFunction %void
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+     %uint_8 = OpConstant %uint 8
+%_arr_v3float_uint_8 = OpTypeArray %v3float %uint_8
+%_ptr_Function__arr_v3float_uint_8 = OpTypePointer Function %_arr_v3float_uint_8
+%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+       %bool = OpTypeBool
+     %v2bool = OpTypeVector %bool 2
+     %v3bool = OpTypeVector %bool 3
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Image_uint = OpTypePointer Image %uint
+%CulledObjectBoxBounds = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform
+   %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+%RWShadowTileNumCulledObjects = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input_uint Input
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+         %70 = OpUndef %v3float
+         %71 = OpConstantNull %v3float
+%ShadowObjectCullPS = OpFunction %void None %58
+         %72 = OpLabel
+         %73 = OpVariable %_ptr_Function__arr_v3float_uint_8 Function
+         %74 = OpLoad %uint %in_var_TEXCOORD0
+         %75 = OpLoad %v4float %gl_FragCoord
+         %76 = OpVectorShuffle %v2float %75 %75 0 1
+         %77 = OpConvertFToU %v2uint %76
+         %78 = OpCompositeExtract %uint %77 1
+         %79 = OpAccessChain %_ptr_Uniform_v2uint %_Globals %int_0
+         %80 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_0
+         %81 = OpLoad %uint %80
+         %82 = OpIMul %uint %78 %81
+         %83 = OpCompositeExtract %uint %77 0
+         %84 = OpIAdd %uint %82 %83
+         %85 = OpConvertUToF %float %83
+         %86 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_1
+         %87 = OpLoad %uint %86
+         %88 = OpISub %uint %87 %uint_1
+         %89 = OpISub %uint %88 %78
+         %90 = OpConvertUToF %float %89
+         %91 = OpCompositeConstruct %v2float %85 %90
+         %92 = OpLoad %v2uint %79
+         %93 = OpConvertUToF %v2float %92
+         %94 = OpFDiv %v2float %91 %93
+         %95 = OpFMul %v2float %94 %27
+         %96 = OpFSub %v2float %95 %29
+         %97 = OpFAdd %v2float %91 %29
+         %98 = OpFDiv %v2float %97 %93
+         %99 = OpFMul %v2float %98 %27
+        %100 = OpFSub %v2float %99 %29
+        %101 = OpVectorShuffle %v3float %70 %100 3 4 2
+        %102 = OpCompositeInsert %v3float %float_1 %101 2
+        %103 = OpIMul %uint %74 %uint_5
+        %104 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %103
+        %105 = OpLoad %v4float %104
+        %106 = OpVectorShuffle %v3float %105 %105 0 1 2
+        %107 = OpIAdd %uint %103 %uint_1
+        %108 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %107
+        %109 = OpLoad %v4float %108
+        %110 = OpVectorShuffle %v3float %109 %109 0 1 2
+        %111 = OpVectorShuffle %v2float %109 %71 0 1
+        %112 = OpVectorShuffle %v2float %96 %71 0 1
+        %113 = OpFOrdGreaterThan %v2bool %111 %112
+        %114 = OpAll %bool %113
+        %115 = OpFOrdLessThan %v3bool %106 %102
+        %116 = OpAll %bool %115
+        %117 = OpLogicalAnd %bool %114 %116
+               OpSelectionMerge %118 DontFlatten
+               OpBranchConditional %117 %119 %118
+        %119 = OpLabel
+        %120 = OpFAdd %v3float %106 %110
+        %121 = OpFMul %v3float %33 %120
+        %122 = OpCompositeExtract %float %96 0
+        %123 = OpCompositeExtract %float %96 1
+        %124 = OpCompositeConstruct %v3float %122 %123 %float_n1000
+        %125 = OpAccessChain %_ptr_Function_v3float %73 %int_0
+               OpStore %125 %124
+        %126 = OpCompositeExtract %float %100 0
+        %127 = OpCompositeConstruct %v3float %126 %123 %float_n1000
+        %128 = OpAccessChain %_ptr_Function_v3float %73 %int_1
+               OpStore %128 %127
+        %129 = OpCompositeExtract %float %100 1
+        %130 = OpCompositeConstruct %v3float %122 %129 %float_n1000
+        %131 = OpAccessChain %_ptr_Function_v3float %73 %int_2
+               OpStore %131 %130
+        %132 = OpCompositeConstruct %v3float %126 %129 %float_n1000
+        %133 = OpAccessChain %_ptr_Function_v3float %73 %int_3
+               OpStore %133 %132
+        %134 = OpCompositeConstruct %v3float %122 %123 %float_1
+        %135 = OpAccessChain %_ptr_Function_v3float %73 %int_4
+               OpStore %135 %134
+        %136 = OpCompositeConstruct %v3float %126 %123 %float_1
+        %137 = OpAccessChain %_ptr_Function_v3float %73 %int_5
+               OpStore %137 %136
+        %138 = OpCompositeConstruct %v3float %122 %129 %float_1
+        %139 = OpAccessChain %_ptr_Function_v3float %73 %int_6
+               OpStore %139 %138
+        %140 = OpCompositeConstruct %v3float %126 %129 %float_1
+        %141 = OpAccessChain %_ptr_Function_v3float %73 %int_7
+               OpStore %141 %140
+        %142 = OpIAdd %uint %103 %uint_2
+        %143 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %142
+        %144 = OpLoad %v4float %143
+        %145 = OpVectorShuffle %v3float %144 %144 0 1 2
+        %146 = OpIAdd %uint %103 %uint_3
+        %147 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %146
+        %148 = OpLoad %v4float %147
+        %149 = OpVectorShuffle %v3float %148 %148 0 1 2
+        %150 = OpIAdd %uint %103 %uint_4
+        %151 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %150
+        %152 = OpLoad %v4float %151
+        %153 = OpVectorShuffle %v3float %152 %152 0 1 2
+               OpBranch %154
+        %154 = OpLabel
+        %155 = OpPhi %v3float %37 %119 %156 %157
+        %158 = OpPhi %v3float %35 %119 %159 %157
+        %160 = OpPhi %int %int_0 %119 %161 %157
+        %162 = OpSLessThan %bool %160 %int_8
+               OpLoopMerge %163 %157 Unroll
+               OpBranchConditional %162 %157 %163
+        %157 = OpLabel
+        %164 = OpAccessChain %_ptr_Function_v3float %73 %160
+        %165 = OpLoad %v3float %164
+        %166 = OpFSub %v3float %165 %121
+        %167 = OpDot %float %166 %145
+        %168 = OpDot %float %166 %149
+        %169 = OpDot %float %166 %153
+        %170 = OpCompositeConstruct %v3float %167 %168 %169
+        %159 = OpExtInst %v3float %1 FMin %158 %170
+        %156 = OpExtInst %v3float %1 FMax %155 %170
+        %161 = OpIAdd %int %160 %int_1
+               OpBranch %154
+        %163 = OpLabel
+        %171 = OpFOrdLessThan %v3bool %158 %44
+        %172 = OpAll %bool %171
+        %173 = OpFOrdGreaterThan %v3bool %155 %46
+        %174 = OpAll %bool %173
+        %175 = OpLogicalAnd %bool %172 %174
+               OpSelectionMerge %176 DontFlatten
+               OpBranchConditional %175 %177 %176
+        %177 = OpLabel
+        %178 = OpImageTexelPointer %_ptr_Image_uint %RWShadowTileNumCulledObjects %84 %uint_0
+        %179 = OpAtomicIAdd %uint %178 %uint_1 %uint_0 %uint_1
+               OpBranch %176
+        %176 = OpLabel
+               OpBranch %118
+        %118 = OpLabel
+               OpStore %out_var_SV_Target0 %22
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/frag/texture-atomics.asm.frag b/shaders-ue4/asm/frag/texture-atomics.asm.frag
new file mode 100644
index 00000000000..270a1978fcf
--- /dev/null
+++ b/shaders-ue4/asm/frag/texture-atomics.asm.frag
@@ -0,0 +1,242 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 180
+; Schema: 0
+               OpCapability Shader
+               OpCapability SampledBuffer
+               OpCapability ImageBuffer
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %ShadowObjectCullPS "main" %in_var_TEXCOORD0 %gl_FragCoord %out_var_SV_Target0
+               OpExecutionMode %ShadowObjectCullPS OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float"
+               OpName %CulledObjectBoxBounds "CulledObjectBoxBounds"
+               OpName %type__Globals "type.$Globals"
+               OpMemberName %type__Globals 0 "ShadowTileListGroupSize"
+               OpName %_Globals "$Globals"
+               OpName %type_buffer_image "type.buffer.image"
+               OpName %RWShadowTileNumCulledObjects "RWShadowTileNumCulledObjects"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %ShadowObjectCullPS "ShadowObjectCullPS"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorate %in_var_TEXCOORD0 Flat
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION"
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %in_var_TEXCOORD0 Location 0
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %CulledObjectBoxBounds DescriptorSet 0
+               OpDecorate %CulledObjectBoxBounds Binding 1
+               OpDecorate %_Globals DescriptorSet 0
+               OpDecorate %_Globals Binding 2
+               OpDecorate %RWShadowTileNumCulledObjects DescriptorSet 0
+               OpDecorate %RWShadowTileNumCulledObjects Binding 0
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0
+               OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable
+               OpDecorate %type_StructuredBuffer_v4float BufferBlock
+               OpMemberDecorate %type__Globals 0 Offset 0
+               OpDecorate %type__Globals Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_4 = OpConstant %uint 4
+    %float_0 = OpConstant %float 0
+         %22 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+      %int_1 = OpConstant %int 1
+      %int_0 = OpConstant %int 0
+     %uint_1 = OpConstant %uint 1
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v2float %float_2 %float_2
+    %float_1 = OpConstant %float 1
+         %29 = OpConstantComposite %v2float %float_1 %float_1
+%float_n1000 = OpConstant %float -1000
+      %int_2 = OpConstant %int 2
+  %float_0_5 = OpConstant %float 0.5
+         %33 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5
+%float_500000 = OpConstant %float 500000
+         %35 = OpConstantComposite %v3float %float_500000 %float_500000 %float_500000
+%float_n500000 = OpConstant %float -500000
+         %37 = OpConstantComposite %v3float %float_n500000 %float_n500000 %float_n500000
+      %int_3 = OpConstant %int 3
+      %int_4 = OpConstant %int 4
+      %int_5 = OpConstant %int 5
+      %int_6 = OpConstant %int 6
+      %int_7 = OpConstant %int 7
+      %int_8 = OpConstant %int 8
+         %44 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+   %float_n1 = OpConstant %float -1
+         %46 = OpConstantComposite %v3float %float_n1 %float_n1 %float_n1
+     %uint_5 = OpConstant %uint 5
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float
+     %v2uint = OpTypeVector %uint 2
+%type__Globals = OpTypeStruct %v2uint
+%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+%type_buffer_image = OpTypeImage %uint Buffer 2 0 0 2 R32ui
+%_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %58 = OpTypeFunction %void
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+     %uint_8 = OpConstant %uint 8
+%_arr_v3float_uint_8 = OpTypeArray %v3float %uint_8
+%_ptr_Function__arr_v3float_uint_8 = OpTypePointer Function %_arr_v3float_uint_8
+%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+       %bool = OpTypeBool
+     %v2bool = OpTypeVector %bool 2
+     %v3bool = OpTypeVector %bool 3
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Image_uint = OpTypePointer Image %uint
+%CulledObjectBoxBounds = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform
+   %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+%RWShadowTileNumCulledObjects = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input_uint Input
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+         %70 = OpUndef %v3float
+         %71 = OpConstantNull %v3float
+%ShadowObjectCullPS = OpFunction %void None %58
+         %72 = OpLabel
+         %73 = OpVariable %_ptr_Function__arr_v3float_uint_8 Function
+         %74 = OpLoad %uint %in_var_TEXCOORD0
+         %75 = OpLoad %v4float %gl_FragCoord
+         %76 = OpVectorShuffle %v2float %75 %75 0 1
+         %77 = OpConvertFToU %v2uint %76
+         %78 = OpCompositeExtract %uint %77 1
+         %79 = OpAccessChain %_ptr_Uniform_v2uint %_Globals %int_0
+         %80 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_0
+         %81 = OpLoad %uint %80
+         %82 = OpIMul %uint %78 %81
+         %83 = OpCompositeExtract %uint %77 0
+         %84 = OpIAdd %uint %82 %83
+         %85 = OpConvertUToF %float %83
+         %86 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_1
+         %87 = OpLoad %uint %86
+         %88 = OpISub %uint %87 %uint_1
+         %89 = OpISub %uint %88 %78
+         %90 = OpConvertUToF %float %89
+         %91 = OpCompositeConstruct %v2float %85 %90
+         %92 = OpLoad %v2uint %79
+         %93 = OpConvertUToF %v2float %92
+         %94 = OpFDiv %v2float %91 %93
+         %95 = OpFMul %v2float %94 %27
+         %96 = OpFSub %v2float %95 %29
+         %97 = OpFAdd %v2float %91 %29
+         %98 = OpFDiv %v2float %97 %93
+         %99 = OpFMul %v2float %98 %27
+        %100 = OpFSub %v2float %99 %29
+        %101 = OpVectorShuffle %v3float %70 %100 3 4 2
+        %102 = OpCompositeInsert %v3float %float_1 %101 2
+        %103 = OpIMul %uint %74 %uint_5
+        %104 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %103
+        %105 = OpLoad %v4float %104
+        %106 = OpVectorShuffle %v3float %105 %105 0 1 2
+        %107 = OpIAdd %uint %103 %uint_1
+        %108 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %107
+        %109 = OpLoad %v4float %108
+        %110 = OpVectorShuffle %v3float %109 %109 0 1 2
+        %111 = OpVectorShuffle %v2float %109 %71 0 1
+        %112 = OpVectorShuffle %v2float %96 %71 0 1
+        %113 = OpFOrdGreaterThan %v2bool %111 %112
+        %114 = OpAll %bool %113
+        %115 = OpFOrdLessThan %v3bool %106 %102
+        %116 = OpAll %bool %115
+        %117 = OpLogicalAnd %bool %114 %116
+               OpSelectionMerge %118 DontFlatten
+               OpBranchConditional %117 %119 %118
+        %119 = OpLabel
+        %120 = OpFAdd %v3float %106 %110
+        %121 = OpFMul %v3float %33 %120
+        %122 = OpCompositeExtract %float %96 0
+        %123 = OpCompositeExtract %float %96 1
+        %124 = OpCompositeConstruct %v3float %122 %123 %float_n1000
+        %125 = OpAccessChain %_ptr_Function_v3float %73 %int_0
+               OpStore %125 %124
+        %126 = OpCompositeExtract %float %100 0
+        %127 = OpCompositeConstruct %v3float %126 %123 %float_n1000
+        %128 = OpAccessChain %_ptr_Function_v3float %73 %int_1
+               OpStore %128 %127
+        %129 = OpCompositeExtract %float %100 1
+        %130 = OpCompositeConstruct %v3float %122 %129 %float_n1000
+        %131 = OpAccessChain %_ptr_Function_v3float %73 %int_2
+               OpStore %131 %130
+        %132 = OpCompositeConstruct %v3float %126 %129 %float_n1000
+        %133 = OpAccessChain %_ptr_Function_v3float %73 %int_3
+               OpStore %133 %132
+        %134 = OpCompositeConstruct %v3float %122 %123 %float_1
+        %135 = OpAccessChain %_ptr_Function_v3float %73 %int_4
+               OpStore %135 %134
+        %136 = OpCompositeConstruct %v3float %126 %123 %float_1
+        %137 = OpAccessChain %_ptr_Function_v3float %73 %int_5
+               OpStore %137 %136
+        %138 = OpCompositeConstruct %v3float %122 %129 %float_1
+        %139 = OpAccessChain %_ptr_Function_v3float %73 %int_6
+               OpStore %139 %138
+        %140 = OpCompositeConstruct %v3float %126 %129 %float_1
+        %141 = OpAccessChain %_ptr_Function_v3float %73 %int_7
+               OpStore %141 %140
+        %142 = OpIAdd %uint %103 %uint_2
+        %143 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %142
+        %144 = OpLoad %v4float %143
+        %145 = OpVectorShuffle %v3float %144 %144 0 1 2
+        %146 = OpIAdd %uint %103 %uint_3
+        %147 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %146
+        %148 = OpLoad %v4float %147
+        %149 = OpVectorShuffle %v3float %148 %148 0 1 2
+        %150 = OpIAdd %uint %103 %uint_4
+        %151 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %150
+        %152 = OpLoad %v4float %151
+        %153 = OpVectorShuffle %v3float %152 %152 0 1 2
+               OpBranch %154
+        %154 = OpLabel
+        %155 = OpPhi %v3float %37 %119 %156 %157
+        %158 = OpPhi %v3float %35 %119 %159 %157
+        %160 = OpPhi %int %int_0 %119 %161 %157
+        %162 = OpSLessThan %bool %160 %int_8
+               OpLoopMerge %163 %157 Unroll
+               OpBranchConditional %162 %157 %163
+        %157 = OpLabel
+        %164 = OpAccessChain %_ptr_Function_v3float %73 %160
+        %165 = OpLoad %v3float %164
+        %166 = OpFSub %v3float %165 %121
+        %167 = OpDot %float %166 %145
+        %168 = OpDot %float %166 %149
+        %169 = OpDot %float %166 %153
+        %170 = OpCompositeConstruct %v3float %167 %168 %169
+        %159 = OpExtInst %v3float %1 FMin %158 %170
+        %156 = OpExtInst %v3float %1 FMax %155 %170
+        %161 = OpIAdd %int %160 %int_1
+               OpBranch %154
+        %163 = OpLabel
+        %171 = OpFOrdLessThan %v3bool %158 %44
+        %172 = OpAll %bool %171
+        %173 = OpFOrdGreaterThan %v3bool %155 %46
+        %174 = OpAll %bool %173
+        %175 = OpLogicalAnd %bool %172 %174
+               OpSelectionMerge %176 DontFlatten
+               OpBranchConditional %175 %177 %176
+        %177 = OpLabel
+        %178 = OpImageTexelPointer %_ptr_Image_uint %RWShadowTileNumCulledObjects %84 %uint_0
+        %179 = OpAtomicIAdd %uint %178 %uint_1 %uint_0 %uint_1
+               OpBranch %176
+        %176 = OpLabel
+               OpBranch %118
+        %118 = OpLabel
+               OpStore %out_var_SV_Target0 %22
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag b/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag
new file mode 100644
index 00000000000..270a1978fcf
--- /dev/null
+++ b/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag
@@ -0,0 +1,242 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 180
+; Schema: 0
+               OpCapability Shader
+               OpCapability SampledBuffer
+               OpCapability ImageBuffer
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %ShadowObjectCullPS "main" %in_var_TEXCOORD0 %gl_FragCoord %out_var_SV_Target0
+               OpExecutionMode %ShadowObjectCullPS OriginUpperLeft
+               OpSource HLSL 600
+               OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float"
+               OpName %CulledObjectBoxBounds "CulledObjectBoxBounds"
+               OpName %type__Globals "type.$Globals"
+               OpMemberName %type__Globals 0 "ShadowTileListGroupSize"
+               OpName %_Globals "$Globals"
+               OpName %type_buffer_image "type.buffer.image"
+               OpName %RWShadowTileNumCulledObjects "RWShadowTileNumCulledObjects"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %out_var_SV_Target0 "out.var.SV_Target0"
+               OpName %ShadowObjectCullPS "ShadowObjectCullPS"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorate %in_var_TEXCOORD0 Flat
+               OpDecorate %gl_FragCoord BuiltIn FragCoord
+               OpDecorateString %gl_FragCoord UserSemantic "SV_POSITION"
+               OpDecorateString %out_var_SV_Target0 UserSemantic "SV_Target0"
+               OpDecorate %in_var_TEXCOORD0 Location 0
+               OpDecorate %out_var_SV_Target0 Location 0
+               OpDecorate %CulledObjectBoxBounds DescriptorSet 0
+               OpDecorate %CulledObjectBoxBounds Binding 1
+               OpDecorate %_Globals DescriptorSet 0
+               OpDecorate %_Globals Binding 2
+               OpDecorate %RWShadowTileNumCulledObjects DescriptorSet 0
+               OpDecorate %RWShadowTileNumCulledObjects Binding 0
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0
+               OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable
+               OpDecorate %type_StructuredBuffer_v4float BufferBlock
+               OpMemberDecorate %type__Globals 0 Offset 0
+               OpDecorate %type__Globals Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_4 = OpConstant %uint 4
+    %float_0 = OpConstant %float 0
+         %22 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+      %int_1 = OpConstant %int 1
+      %int_0 = OpConstant %int 0
+     %uint_1 = OpConstant %uint 1
+    %float_2 = OpConstant %float 2
+         %27 = OpConstantComposite %v2float %float_2 %float_2
+    %float_1 = OpConstant %float 1
+         %29 = OpConstantComposite %v2float %float_1 %float_1
+%float_n1000 = OpConstant %float -1000
+      %int_2 = OpConstant %int 2
+  %float_0_5 = OpConstant %float 0.5
+         %33 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5
+%float_500000 = OpConstant %float 500000
+         %35 = OpConstantComposite %v3float %float_500000 %float_500000 %float_500000
+%float_n500000 = OpConstant %float -500000
+         %37 = OpConstantComposite %v3float %float_n500000 %float_n500000 %float_n500000
+      %int_3 = OpConstant %int 3
+      %int_4 = OpConstant %int 4
+      %int_5 = OpConstant %int 5
+      %int_6 = OpConstant %int 6
+      %int_7 = OpConstant %int 7
+      %int_8 = OpConstant %int 8
+         %44 = OpConstantComposite %v3float %float_1 %float_1 %float_1
+   %float_n1 = OpConstant %float -1
+         %46 = OpConstantComposite %v3float %float_n1 %float_n1 %float_n1
+     %uint_5 = OpConstant %uint 5
+     %uint_0 = OpConstant %uint 0
+     %uint_3 = OpConstant %uint 3
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float
+     %v2uint = OpTypeVector %uint 2
+%type__Globals = OpTypeStruct %v2uint
+%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+%type_buffer_image = OpTypeImage %uint Buffer 2 0 0 2 R32ui
+%_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %58 = OpTypeFunction %void
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+     %uint_8 = OpConstant %uint 8
+%_arr_v3float_uint_8 = OpTypeArray %v3float %uint_8
+%_ptr_Function__arr_v3float_uint_8 = OpTypePointer Function %_arr_v3float_uint_8
+%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+       %bool = OpTypeBool
+     %v2bool = OpTypeVector %bool 2
+     %v3bool = OpTypeVector %bool 3
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Image_uint = OpTypePointer Image %uint
+%CulledObjectBoxBounds = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform
+   %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+%RWShadowTileNumCulledObjects = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input_uint Input
+%gl_FragCoord = OpVariable %_ptr_Input_v4float Input
+%out_var_SV_Target0 = OpVariable %_ptr_Output_v4float Output
+         %70 = OpUndef %v3float
+         %71 = OpConstantNull %v3float
+%ShadowObjectCullPS = OpFunction %void None %58
+         %72 = OpLabel
+         %73 = OpVariable %_ptr_Function__arr_v3float_uint_8 Function
+         %74 = OpLoad %uint %in_var_TEXCOORD0
+         %75 = OpLoad %v4float %gl_FragCoord
+         %76 = OpVectorShuffle %v2float %75 %75 0 1
+         %77 = OpConvertFToU %v2uint %76
+         %78 = OpCompositeExtract %uint %77 1
+         %79 = OpAccessChain %_ptr_Uniform_v2uint %_Globals %int_0
+         %80 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_0
+         %81 = OpLoad %uint %80
+         %82 = OpIMul %uint %78 %81
+         %83 = OpCompositeExtract %uint %77 0
+         %84 = OpIAdd %uint %82 %83
+         %85 = OpConvertUToF %float %83
+         %86 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0 %int_1
+         %87 = OpLoad %uint %86
+         %88 = OpISub %uint %87 %uint_1
+         %89 = OpISub %uint %88 %78
+         %90 = OpConvertUToF %float %89
+         %91 = OpCompositeConstruct %v2float %85 %90
+         %92 = OpLoad %v2uint %79
+         %93 = OpConvertUToF %v2float %92
+         %94 = OpFDiv %v2float %91 %93
+         %95 = OpFMul %v2float %94 %27
+         %96 = OpFSub %v2float %95 %29
+         %97 = OpFAdd %v2float %91 %29
+         %98 = OpFDiv %v2float %97 %93
+         %99 = OpFMul %v2float %98 %27
+        %100 = OpFSub %v2float %99 %29
+        %101 = OpVectorShuffle %v3float %70 %100 3 4 2
+        %102 = OpCompositeInsert %v3float %float_1 %101 2
+        %103 = OpIMul %uint %74 %uint_5
+        %104 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %103
+        %105 = OpLoad %v4float %104
+        %106 = OpVectorShuffle %v3float %105 %105 0 1 2
+        %107 = OpIAdd %uint %103 %uint_1
+        %108 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %107
+        %109 = OpLoad %v4float %108
+        %110 = OpVectorShuffle %v3float %109 %109 0 1 2
+        %111 = OpVectorShuffle %v2float %109 %71 0 1
+        %112 = OpVectorShuffle %v2float %96 %71 0 1
+        %113 = OpFOrdGreaterThan %v2bool %111 %112
+        %114 = OpAll %bool %113
+        %115 = OpFOrdLessThan %v3bool %106 %102
+        %116 = OpAll %bool %115
+        %117 = OpLogicalAnd %bool %114 %116
+               OpSelectionMerge %118 DontFlatten
+               OpBranchConditional %117 %119 %118
+        %119 = OpLabel
+        %120 = OpFAdd %v3float %106 %110
+        %121 = OpFMul %v3float %33 %120
+        %122 = OpCompositeExtract %float %96 0
+        %123 = OpCompositeExtract %float %96 1
+        %124 = OpCompositeConstruct %v3float %122 %123 %float_n1000
+        %125 = OpAccessChain %_ptr_Function_v3float %73 %int_0
+               OpStore %125 %124
+        %126 = OpCompositeExtract %float %100 0
+        %127 = OpCompositeConstruct %v3float %126 %123 %float_n1000
+        %128 = OpAccessChain %_ptr_Function_v3float %73 %int_1
+               OpStore %128 %127
+        %129 = OpCompositeExtract %float %100 1
+        %130 = OpCompositeConstruct %v3float %122 %129 %float_n1000
+        %131 = OpAccessChain %_ptr_Function_v3float %73 %int_2
+               OpStore %131 %130
+        %132 = OpCompositeConstruct %v3float %126 %129 %float_n1000
+        %133 = OpAccessChain %_ptr_Function_v3float %73 %int_3
+               OpStore %133 %132
+        %134 = OpCompositeConstruct %v3float %122 %123 %float_1
+        %135 = OpAccessChain %_ptr_Function_v3float %73 %int_4
+               OpStore %135 %134
+        %136 = OpCompositeConstruct %v3float %126 %123 %float_1
+        %137 = OpAccessChain %_ptr_Function_v3float %73 %int_5
+               OpStore %137 %136
+        %138 = OpCompositeConstruct %v3float %122 %129 %float_1
+        %139 = OpAccessChain %_ptr_Function_v3float %73 %int_6
+               OpStore %139 %138
+        %140 = OpCompositeConstruct %v3float %126 %129 %float_1
+        %141 = OpAccessChain %_ptr_Function_v3float %73 %int_7
+               OpStore %141 %140
+        %142 = OpIAdd %uint %103 %uint_2
+        %143 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %142
+        %144 = OpLoad %v4float %143
+        %145 = OpVectorShuffle %v3float %144 %144 0 1 2
+        %146 = OpIAdd %uint %103 %uint_3
+        %147 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %146
+        %148 = OpLoad %v4float %147
+        %149 = OpVectorShuffle %v3float %148 %148 0 1 2
+        %150 = OpIAdd %uint %103 %uint_4
+        %151 = OpAccessChain %_ptr_Uniform_v4float %CulledObjectBoxBounds %int_0 %150
+        %152 = OpLoad %v4float %151
+        %153 = OpVectorShuffle %v3float %152 %152 0 1 2
+               OpBranch %154
+        %154 = OpLabel
+        %155 = OpPhi %v3float %37 %119 %156 %157
+        %158 = OpPhi %v3float %35 %119 %159 %157
+        %160 = OpPhi %int %int_0 %119 %161 %157
+        %162 = OpSLessThan %bool %160 %int_8
+               OpLoopMerge %163 %157 Unroll
+               OpBranchConditional %162 %157 %163
+        %157 = OpLabel
+        %164 = OpAccessChain %_ptr_Function_v3float %73 %160
+        %165 = OpLoad %v3float %164
+        %166 = OpFSub %v3float %165 %121
+        %167 = OpDot %float %166 %145
+        %168 = OpDot %float %166 %149
+        %169 = OpDot %float %166 %153
+        %170 = OpCompositeConstruct %v3float %167 %168 %169
+        %159 = OpExtInst %v3float %1 FMin %158 %170
+        %156 = OpExtInst %v3float %1 FMax %155 %170
+        %161 = OpIAdd %int %160 %int_1
+               OpBranch %154
+        %163 = OpLabel
+        %171 = OpFOrdLessThan %v3bool %158 %44
+        %172 = OpAll %bool %171
+        %173 = OpFOrdGreaterThan %v3bool %155 %46
+        %174 = OpAll %bool %173
+        %175 = OpLogicalAnd %bool %172 %174
+               OpSelectionMerge %176 DontFlatten
+               OpBranchConditional %175 %177 %176
+        %177 = OpLabel
+        %178 = OpImageTexelPointer %_ptr_Image_uint %RWShadowTileNumCulledObjects %84 %uint_0
+        %179 = OpAtomicIAdd %uint %178 %uint_1 %uint_0 %uint_1
+               OpBranch %176
+        %176 = OpLabel
+               OpBranch %118
+        %118 = OpLabel
+               OpStore %out_var_SV_Target0 %22
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc b/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc
new file mode 100644
index 00000000000..4c70e14cb72
--- /dev/null
+++ b/shaders-ue4/asm/tesc/hs-incorrect-base-type.invalid.asm.tesc
@@ -0,0 +1,1158 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 598
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability SampledBuffer
+               OpCapability StorageImageExtendedFormats
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %MainHull "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_COLOR0 %in_var_TEXCOORD0 %in_var_TEXCOORD4 %in_var_PRIMITIVE_ID %in_var_LIGHTMAP_ID %in_var_VS_To_DS_Position %gl_InvocationID %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_COLOR0 %out_var_TEXCOORD0 %out_var_TEXCOORD4 %out_var_PRIMITIVE_ID %out_var_LIGHTMAP_ID %out_var_VS_To_DS_Position %out_var_PN_POSITION %out_var_PN_DisplacementScales %out_var_PN_TessellationMultiplier %out_var_PN_WorldDisplacementMultiplier %gl_TessLevelOuter %gl_TessLevelInner %out_var_PN_POSITION9
+               OpExecutionMode %MainHull Triangles
+               OpExecutionMode %MainHull SpacingFractionalOdd
+               OpExecutionMode %MainHull VertexOrderCw
+               OpExecutionMode %MainHull OutputVertices 3
+               OpSource HLSL 600
+               OpName %FPNTessellationHSToDS "FPNTessellationHSToDS"
+               OpMemberName %FPNTessellationHSToDS 0 "PassSpecificData"
+               OpMemberName %FPNTessellationHSToDS 1 "WorldPosition"
+               OpMemberName %FPNTessellationHSToDS 2 "DisplacementScale"
+               OpMemberName %FPNTessellationHSToDS 3 "TessellationMultiplier"
+               OpMemberName %FPNTessellationHSToDS 4 "WorldDisplacementMultiplier"
+               OpName %FBasePassVSToDS "FBasePassVSToDS"
+               OpMemberName %FBasePassVSToDS 0 "FactoryInterpolants"
+               OpMemberName %FBasePassVSToDS 1 "BasePassInterpolants"
+               OpMemberName %FBasePassVSToDS 2 "Position"
+               OpName %FVertexFactoryInterpolantsVSToDS "FVertexFactoryInterpolantsVSToDS"
+               OpMemberName %FVertexFactoryInterpolantsVSToDS 0 "InterpolantsVSToPS"
+               OpName %FVertexFactoryInterpolantsVSToPS "FVertexFactoryInterpolantsVSToPS"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 0 "TangentToWorld0"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 1 "TangentToWorld2"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 2 "Color"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 3 "TexCoords"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 4 "LightMapCoordinate"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 5 "PrimitiveId"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 6 "LightmapDataIndex"
+               OpName %FBasePassInterpolantsVSToDS "FBasePassInterpolantsVSToDS"
+               OpName %FSharedBasePassInterpolants "FSharedBasePassInterpolants"
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_TranslatedWorldToView"
+               OpMemberName %type_View 3 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 4 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 6 "View_ViewToClip"
+               OpMemberName %type_View 7 "View_ViewToClipNoAA"
+               OpMemberName %type_View 8 "View_ClipToView"
+               OpMemberName %type_View 9 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 11 "View_ScreenToWorld"
+               OpMemberName %type_View 12 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 13 "View_ViewForward"
+               OpMemberName %type_View 14 "PrePadding_View_844"
+               OpMemberName %type_View 15 "View_ViewUp"
+               OpMemberName %type_View 16 "PrePadding_View_860"
+               OpMemberName %type_View 17 "View_ViewRight"
+               OpMemberName %type_View 18 "PrePadding_View_876"
+               OpMemberName %type_View 19 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 20 "PrePadding_View_892"
+               OpMemberName %type_View 21 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 22 "PrePadding_View_908"
+               OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 24 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 25 "View_WorldCameraOrigin"
+               OpMemberName %type_View 26 "PrePadding_View_956"
+               OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 28 "PrePadding_View_972"
+               OpMemberName %type_View 29 "View_WorldViewOrigin"
+               OpMemberName %type_View 30 "PrePadding_View_988"
+               OpMemberName %type_View 31 "View_PreViewTranslation"
+               OpMemberName %type_View 32 "PrePadding_View_1004"
+               OpMemberName %type_View 33 "View_PrevProjection"
+               OpMemberName %type_View 34 "View_PrevViewProj"
+               OpMemberName %type_View 35 "View_PrevViewRotationProj"
+               OpMemberName %type_View 36 "View_PrevViewToClip"
+               OpMemberName %type_View 37 "View_PrevClipToView"
+               OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 43 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 44 "PrePadding_View_1660"
+               OpMemberName %type_View 45 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 46 "PrePadding_View_1676"
+               OpMemberName %type_View 47 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 48 "PrePadding_View_1692"
+               OpMemberName %type_View 49 "View_PrevInvViewProj"
+               OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 51 "View_ClipToPrevClip"
+               OpMemberName %type_View 52 "View_TemporalAAJitter"
+               OpMemberName %type_View 53 "View_GlobalClippingPlane"
+               OpMemberName %type_View 54 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_ViewRectMin"
+               OpMemberName %type_View 57 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 58 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 60 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 61 "View_PreExposure"
+               OpMemberName %type_View 62 "View_OneOverPreExposure"
+               OpMemberName %type_View 63 "PrePadding_View_2012"
+               OpMemberName %type_View 64 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 65 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 66 "View_NormalOverrideParameter"
+               OpMemberName %type_View 67 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 68 "View_PrevFrameGameTime"
+               OpMemberName %type_View 69 "View_PrevFrameRealTime"
+               OpMemberName %type_View 70 "View_OutOfBoundsMask"
+               OpMemberName %type_View 71 "PrePadding_View_2084"
+               OpMemberName %type_View 72 "PrePadding_View_2088"
+               OpMemberName %type_View 73 "PrePadding_View_2092"
+               OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 75 "View_CullingSign"
+               OpMemberName %type_View 76 "View_NearPlane"
+               OpMemberName %type_View 77 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 78 "View_GameTime"
+               OpMemberName %type_View 79 "View_RealTime"
+               OpMemberName %type_View 80 "View_DeltaTime"
+               OpMemberName %type_View 81 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 83 "View_Random"
+               OpMemberName %type_View 84 "View_FrameNumber"
+               OpMemberName %type_View 85 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 86 "View_StateFrameIndex"
+               OpMemberName %type_View 87 "View_CameraCut"
+               OpMemberName %type_View 88 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 89 "PrePadding_View_2164"
+               OpMemberName %type_View 90 "PrePadding_View_2168"
+               OpMemberName %type_View 91 "PrePadding_View_2172"
+               OpMemberName %type_View 92 "View_DirectionalLightColor"
+               OpMemberName %type_View 93 "View_DirectionalLightDirection"
+               OpMemberName %type_View 94 "PrePadding_View_2204"
+               OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 97 "View_TemporalAAParams"
+               OpMemberName %type_View 98 "View_CircleDOFParams"
+               OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 101 "View_DepthOfFieldScale"
+               OpMemberName %type_View 102 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 108 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 109 "View_DemosaicVposOffset"
+               OpMemberName %type_View 110 "PrePadding_View_2348"
+               OpMemberName %type_View 111 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 112 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 113 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 115 "View_AtmosphericFogPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 122 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 125 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 127 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 130 "View_AmbientCubemapTint"
+               OpMemberName %type_View 131 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 132 "View_SkyLightParameters"
+               OpMemberName %type_View 133 "PrePadding_View_2488"
+               OpMemberName %type_View 134 "PrePadding_View_2492"
+               OpMemberName %type_View 135 "View_SkyLightColor"
+               OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 137 "View_MobilePreviewMode"
+               OpMemberName %type_View 138 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 140 "View_ShowDecalsMask"
+               OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 143 "PrePadding_View_2648"
+               OpMemberName %type_View 144 "PrePadding_View_2652"
+               OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 146 "View_StereoPassIndex"
+               OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 149 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 150 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 151 "View_MaxGlobalDistance"
+               OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 153 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 154 "PrePadding_View_2828"
+               OpMemberName %type_View 155 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 156 "PrePadding_View_2844"
+               OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 158 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 159 "PrePadding_View_2860"
+               OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 161 "PrePadding_View_2876"
+               OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 163 "PrePadding_View_2892"
+               OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 167 "View_StereoIPD"
+               OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle"
+               OpName %View "View"
+               OpName %type_StructuredBuffer_v4float "type.StructuredBuffer.v4float"
+               OpName %View_PrimitiveSceneData "View_PrimitiveSceneData"
+               OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid"
+               OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid"
+               OpName %in_var_COLOR0 "in.var.COLOR0"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %in_var_TEXCOORD4 "in.var.TEXCOORD4"
+               OpName %in_var_PRIMITIVE_ID "in.var.PRIMITIVE_ID"
+               OpName %in_var_LIGHTMAP_ID "in.var.LIGHTMAP_ID"
+               OpName %in_var_VS_To_DS_Position "in.var.VS_To_DS_Position"
+               OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid"
+               OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid"
+               OpName %out_var_COLOR0 "out.var.COLOR0"
+               OpName %out_var_TEXCOORD0 "out.var.TEXCOORD0"
+               OpName %out_var_TEXCOORD4 "out.var.TEXCOORD4"
+               OpName %out_var_PRIMITIVE_ID "out.var.PRIMITIVE_ID"
+               OpName %out_var_LIGHTMAP_ID "out.var.LIGHTMAP_ID"
+               OpName %out_var_VS_To_DS_Position "out.var.VS_To_DS_Position"
+               OpName %out_var_PN_POSITION "out.var.PN_POSITION"
+               OpName %out_var_PN_DisplacementScales "out.var.PN_DisplacementScales"
+               OpName %out_var_PN_TessellationMultiplier "out.var.PN_TessellationMultiplier"
+               OpName %out_var_PN_WorldDisplacementMultiplier "out.var.PN_WorldDisplacementMultiplier"
+               OpName %out_var_PN_POSITION9 "out.var.PN_POSITION9"
+               OpName %MainHull "MainHull"
+               OpName %param_var_I "param.var.I"
+               OpName %temp_var_hullMainRetVal "temp.var.hullMainRetVal"
+               OpName %if_merge "if.merge"
+               OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %in_var_COLOR0 UserSemantic "COLOR0"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %in_var_TEXCOORD4 UserSemantic "TEXCOORD4"
+               OpDecorateString %in_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID"
+               OpDecorateString %in_var_LIGHTMAP_ID UserSemantic "LIGHTMAP_ID"
+               OpDecorateString %in_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position"
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorateString %gl_InvocationID UserSemantic "SV_OutputControlPointID"
+               OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %out_var_COLOR0 UserSemantic "COLOR0"
+               OpDecorateString %out_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %out_var_TEXCOORD4 UserSemantic "TEXCOORD4"
+               OpDecorateString %out_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID"
+               OpDecorateString %out_var_LIGHTMAP_ID UserSemantic "LIGHTMAP_ID"
+               OpDecorateString %out_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position"
+               OpDecorateString %out_var_PN_POSITION UserSemantic "PN_POSITION"
+               OpDecorateString %out_var_PN_DisplacementScales UserSemantic "PN_DisplacementScales"
+               OpDecorateString %out_var_PN_TessellationMultiplier UserSemantic "PN_TessellationMultiplier"
+               OpDecorateString %out_var_PN_WorldDisplacementMultiplier UserSemantic "PN_WorldDisplacementMultiplier"
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+               OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor"
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor"
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorateString %out_var_PN_POSITION9 UserSemantic "PN_POSITION9"
+               OpDecorate %out_var_PN_POSITION9 Patch
+               OpDecorate %in_var_TEXCOORD10_centroid Location 0
+               OpDecorate %in_var_TEXCOORD11_centroid Location 1
+               OpDecorate %in_var_COLOR0 Location 2
+               OpDecorate %in_var_TEXCOORD0 Location 3
+               OpDecorate %in_var_TEXCOORD4 Location 4
+               OpDecorate %in_var_PRIMITIVE_ID Location 5
+               OpDecorate %in_var_LIGHTMAP_ID Location 6
+               OpDecorate %in_var_VS_To_DS_Position Location 7
+               OpDecorate %out_var_COLOR0 Location 0
+               OpDecorate %out_var_LIGHTMAP_ID Location 1
+               OpDecorate %out_var_PN_DisplacementScales Location 2
+               OpDecorate %out_var_PN_POSITION Location 3
+               OpDecorate %out_var_PN_POSITION9 Location 6
+               OpDecorate %out_var_PN_TessellationMultiplier Location 7
+               OpDecorate %out_var_PN_WorldDisplacementMultiplier Location 8
+               OpDecorate %out_var_PRIMITIVE_ID Location 9
+               OpDecorate %out_var_TEXCOORD0 Location 10
+               OpDecorate %out_var_TEXCOORD10_centroid Location 11
+               OpDecorate %out_var_TEXCOORD11_centroid Location 12
+               OpDecorate %out_var_TEXCOORD4 Location 13
+               OpDecorate %out_var_VS_To_DS_Position Location 14
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 1
+               OpDecorate %View_PrimitiveSceneData DescriptorSet 0
+               OpDecorate %View_PrimitiveSceneData Binding 0
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 14 Offset 844
+               OpMemberDecorate %type_View 15 Offset 848
+               OpMemberDecorate %type_View 16 Offset 860
+               OpMemberDecorate %type_View 17 Offset 864
+               OpMemberDecorate %type_View 18 Offset 876
+               OpMemberDecorate %type_View 19 Offset 880
+               OpMemberDecorate %type_View 20 Offset 892
+               OpMemberDecorate %type_View 21 Offset 896
+               OpMemberDecorate %type_View 22 Offset 908
+               OpMemberDecorate %type_View 23 Offset 912
+               OpMemberDecorate %type_View 24 Offset 928
+               OpMemberDecorate %type_View 25 Offset 944
+               OpMemberDecorate %type_View 26 Offset 956
+               OpMemberDecorate %type_View 27 Offset 960
+               OpMemberDecorate %type_View 28 Offset 972
+               OpMemberDecorate %type_View 29 Offset 976
+               OpMemberDecorate %type_View 30 Offset 988
+               OpMemberDecorate %type_View 31 Offset 992
+               OpMemberDecorate %type_View 32 Offset 1004
+               OpMemberDecorate %type_View 33 Offset 1008
+               OpMemberDecorate %type_View 33 MatrixStride 16
+               OpMemberDecorate %type_View 33 ColMajor
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 44 Offset 1660
+               OpMemberDecorate %type_View 45 Offset 1664
+               OpMemberDecorate %type_View 46 Offset 1676
+               OpMemberDecorate %type_View 47 Offset 1680
+               OpMemberDecorate %type_View 48 Offset 1692
+               OpMemberDecorate %type_View 49 Offset 1696
+               OpMemberDecorate %type_View 49 MatrixStride 16
+               OpMemberDecorate %type_View 49 ColMajor
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 53 Offset 1904
+               OpMemberDecorate %type_View 54 Offset 1920
+               OpMemberDecorate %type_View 55 Offset 1928
+               OpMemberDecorate %type_View 56 Offset 1936
+               OpMemberDecorate %type_View 57 Offset 1952
+               OpMemberDecorate %type_View 58 Offset 1968
+               OpMemberDecorate %type_View 59 Offset 1984
+               OpMemberDecorate %type_View 60 Offset 2000
+               OpMemberDecorate %type_View 61 Offset 2004
+               OpMemberDecorate %type_View 62 Offset 2008
+               OpMemberDecorate %type_View 63 Offset 2012
+               OpMemberDecorate %type_View 64 Offset 2016
+               OpMemberDecorate %type_View 65 Offset 2032
+               OpMemberDecorate %type_View 66 Offset 2048
+               OpMemberDecorate %type_View 67 Offset 2064
+               OpMemberDecorate %type_View 68 Offset 2072
+               OpMemberDecorate %type_View 69 Offset 2076
+               OpMemberDecorate %type_View 70 Offset 2080
+               OpMemberDecorate %type_View 71 Offset 2084
+               OpMemberDecorate %type_View 72 Offset 2088
+               OpMemberDecorate %type_View 73 Offset 2092
+               OpMemberDecorate %type_View 74 Offset 2096
+               OpMemberDecorate %type_View 75 Offset 2108
+               OpMemberDecorate %type_View 76 Offset 2112
+               OpMemberDecorate %type_View 77 Offset 2116
+               OpMemberDecorate %type_View 78 Offset 2120
+               OpMemberDecorate %type_View 79 Offset 2124
+               OpMemberDecorate %type_View 80 Offset 2128
+               OpMemberDecorate %type_View 81 Offset 2132
+               OpMemberDecorate %type_View 82 Offset 2136
+               OpMemberDecorate %type_View 83 Offset 2140
+               OpMemberDecorate %type_View 84 Offset 2144
+               OpMemberDecorate %type_View 85 Offset 2148
+               OpMemberDecorate %type_View 86 Offset 2152
+               OpMemberDecorate %type_View 87 Offset 2156
+               OpMemberDecorate %type_View 88 Offset 2160
+               OpMemberDecorate %type_View 89 Offset 2164
+               OpMemberDecorate %type_View 90 Offset 2168
+               OpMemberDecorate %type_View 91 Offset 2172
+               OpMemberDecorate %type_View 92 Offset 2176
+               OpMemberDecorate %type_View 93 Offset 2192
+               OpMemberDecorate %type_View 94 Offset 2204
+               OpMemberDecorate %type_View 95 Offset 2208
+               OpMemberDecorate %type_View 96 Offset 2240
+               OpMemberDecorate %type_View 97 Offset 2272
+               OpMemberDecorate %type_View 98 Offset 2288
+               OpMemberDecorate %type_View 99 Offset 2304
+               OpMemberDecorate %type_View 100 Offset 2308
+               OpMemberDecorate %type_View 101 Offset 2312
+               OpMemberDecorate %type_View 102 Offset 2316
+               OpMemberDecorate %type_View 103 Offset 2320
+               OpMemberDecorate %type_View 104 Offset 2324
+               OpMemberDecorate %type_View 105 Offset 2328
+               OpMemberDecorate %type_View 106 Offset 2332
+               OpMemberDecorate %type_View 107 Offset 2336
+               OpMemberDecorate %type_View 108 Offset 2340
+               OpMemberDecorate %type_View 109 Offset 2344
+               OpMemberDecorate %type_View 110 Offset 2348
+               OpMemberDecorate %type_View 111 Offset 2352
+               OpMemberDecorate %type_View 112 Offset 2364
+               OpMemberDecorate %type_View 113 Offset 2368
+               OpMemberDecorate %type_View 114 Offset 2380
+               OpMemberDecorate %type_View 115 Offset 2384
+               OpMemberDecorate %type_View 116 Offset 2388
+               OpMemberDecorate %type_View 117 Offset 2392
+               OpMemberDecorate %type_View 118 Offset 2396
+               OpMemberDecorate %type_View 119 Offset 2400
+               OpMemberDecorate %type_View 120 Offset 2404
+               OpMemberDecorate %type_View 121 Offset 2408
+               OpMemberDecorate %type_View 122 Offset 2412
+               OpMemberDecorate %type_View 123 Offset 2416
+               OpMemberDecorate %type_View 124 Offset 2420
+               OpMemberDecorate %type_View 125 Offset 2424
+               OpMemberDecorate %type_View 126 Offset 2428
+               OpMemberDecorate %type_View 127 Offset 2432
+               OpMemberDecorate %type_View 128 Offset 2448
+               OpMemberDecorate %type_View 129 Offset 2460
+               OpMemberDecorate %type_View 130 Offset 2464
+               OpMemberDecorate %type_View 131 Offset 2480
+               OpMemberDecorate %type_View 132 Offset 2484
+               OpMemberDecorate %type_View 133 Offset 2488
+               OpMemberDecorate %type_View 134 Offset 2492
+               OpMemberDecorate %type_View 135 Offset 2496
+               OpMemberDecorate %type_View 136 Offset 2512
+               OpMemberDecorate %type_View 137 Offset 2624
+               OpMemberDecorate %type_View 138 Offset 2628
+               OpMemberDecorate %type_View 139 Offset 2632
+               OpMemberDecorate %type_View 140 Offset 2636
+               OpMemberDecorate %type_View 141 Offset 2640
+               OpMemberDecorate %type_View 142 Offset 2644
+               OpMemberDecorate %type_View 143 Offset 2648
+               OpMemberDecorate %type_View 144 Offset 2652
+               OpMemberDecorate %type_View 145 Offset 2656
+               OpMemberDecorate %type_View 146 Offset 2668
+               OpMemberDecorate %type_View 147 Offset 2672
+               OpMemberDecorate %type_View 148 Offset 2736
+               OpMemberDecorate %type_View 149 Offset 2800
+               OpMemberDecorate %type_View 150 Offset 2804
+               OpMemberDecorate %type_View 151 Offset 2808
+               OpMemberDecorate %type_View 152 Offset 2812
+               OpMemberDecorate %type_View 153 Offset 2816
+               OpMemberDecorate %type_View 154 Offset 2828
+               OpMemberDecorate %type_View 155 Offset 2832
+               OpMemberDecorate %type_View 156 Offset 2844
+               OpMemberDecorate %type_View 157 Offset 2848
+               OpMemberDecorate %type_View 158 Offset 2856
+               OpMemberDecorate %type_View 159 Offset 2860
+               OpMemberDecorate %type_View 160 Offset 2864
+               OpMemberDecorate %type_View 161 Offset 2876
+               OpMemberDecorate %type_View 162 Offset 2880
+               OpMemberDecorate %type_View 163 Offset 2892
+               OpMemberDecorate %type_View 164 Offset 2896
+               OpMemberDecorate %type_View 165 Offset 2908
+               OpMemberDecorate %type_View 166 Offset 2912
+               OpMemberDecorate %type_View 167 Offset 2924
+               OpMemberDecorate %type_View 168 Offset 2928
+               OpMemberDecorate %type_View 169 Offset 2932
+               OpDecorate %type_View Block
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %type_StructuredBuffer_v4float 0 Offset 0
+               OpMemberDecorate %type_StructuredBuffer_v4float 0 NonWritable
+               OpDecorate %type_StructuredBuffer_v4float BufferBlock
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_3 = OpConstant %uint 3
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+      %int_1 = OpConstant %int 1
+      %int_0 = OpConstant %int 0
+      %int_2 = OpConstant %int 2
+    %float_2 = OpConstant %float 2
+         %62 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+  %float_0_5 = OpConstant %float 0.5
+      %int_3 = OpConstant %int 3
+%float_0_333000004 = OpConstant %float 0.333000004
+    %float_1 = OpConstant %float 1
+         %67 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+   %float_15 = OpConstant %float 15
+         %69 = OpConstantComposite %v4float %float_15 %float_15 %float_15 %float_15
+%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1
+%FVertexFactoryInterpolantsVSToPS = OpTypeStruct %v4float %v4float %v4float %_arr_v4float_uint_1 %v4float %uint %uint
+%FVertexFactoryInterpolantsVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToPS
+%FSharedBasePassInterpolants = OpTypeStruct
+%FBasePassInterpolantsVSToDS = OpTypeStruct %FSharedBasePassInterpolants
+%FBasePassVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToDS %FBasePassInterpolantsVSToDS %v4float
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%FPNTessellationHSToDS = OpTypeStruct %FBasePassVSToDS %_arr_v4float_uint_3 %v3float %float %float
+      %v3int = OpTypeVector %int 3
+         %73 = OpConstantComposite %v3int %int_0 %int_0 %int_0
+         %74 = OpConstantComposite %v3int %int_3 %int_3 %int_3
+    %float_0 = OpConstant %float 0
+         %76 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+         %77 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5
+     %int_77 = OpConstant %int 77
+      %int_6 = OpConstant %int 6
+     %int_27 = OpConstant %int 27
+         %81 = OpConstantComposite %v3int %int_1 %int_1 %int_1
+         %82 = OpConstantComposite %v3int %int_2 %int_2 %int_2
+    %uint_26 = OpConstant %uint 26
+    %uint_12 = OpConstant %uint 12
+    %uint_22 = OpConstant %uint 22
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+%type_StructuredBuffer_v4float = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_type_StructuredBuffer_v4float = OpTypePointer Uniform %type_StructuredBuffer_v4float
+%_arr_v4float_uint_12 = OpTypeArray %v4float %uint_12
+%_ptr_Input__arr_v4float_uint_12 = OpTypePointer Input %_arr_v4float_uint_12
+%_arr__arr_v4float_uint_1_uint_12 = OpTypeArray %_arr_v4float_uint_1 %uint_12
+%_ptr_Input__arr__arr_v4float_uint_1_uint_12 = OpTypePointer Input %_arr__arr_v4float_uint_1_uint_12
+%_arr_uint_uint_12 = OpTypeArray %uint %uint_12
+%_ptr_Input__arr_uint_uint_12 = OpTypePointer Input %_arr_uint_uint_12
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3
+%_arr__arr_v4float_uint_1_uint_3 = OpTypeArray %_arr_v4float_uint_1 %uint_3
+%_ptr_Output__arr__arr_v4float_uint_1_uint_3 = OpTypePointer Output %_arr__arr_v4float_uint_1_uint_3
+%_arr_uint_uint_3 = OpTypeArray %uint %uint_3
+%_ptr_Output__arr_uint_uint_3 = OpTypePointer Output %_arr_uint_uint_3
+%_arr__arr_v4float_uint_3_uint_3 = OpTypeArray %_arr_v4float_uint_3 %uint_3
+%_ptr_Output__arr__arr_v4float_uint_3_uint_3 = OpTypePointer Output %_arr__arr_v4float_uint_3_uint_3
+%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3
+%_ptr_Output__arr_v3float_uint_3 = OpTypePointer Output %_arr_v3float_uint_3
+%_ptr_Output__arr_float_uint_3 = OpTypePointer Output %_arr_float_uint_3
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+        %111 = OpTypeFunction %void
+%_arr_FBasePassVSToDS_uint_12 = OpTypeArray %FBasePassVSToDS %uint_12
+%_ptr_Function__arr_FBasePassVSToDS_uint_12 = OpTypePointer Function %_arr_FBasePassVSToDS_uint_12
+%_arr_FPNTessellationHSToDS_uint_3 = OpTypeArray %FPNTessellationHSToDS %uint_3
+%_ptr_Workgroup__arr_FPNTessellationHSToDS_uint_3 = OpTypePointer Workgroup %_arr_FPNTessellationHSToDS_uint_3
+%_ptr_Output__arr_v4float_uint_1 = OpTypePointer Output %_arr_v4float_uint_1
+%_ptr_Output_uint = OpTypePointer Output %uint
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+%_ptr_Output_float = OpTypePointer Output %float
+%_ptr_Workgroup_FPNTessellationHSToDS = OpTypePointer Workgroup %FPNTessellationHSToDS
+       %bool = OpTypeBool
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%mat3v3float = OpTypeMatrix %v3float 3
+%_ptr_Function_FVertexFactoryInterpolantsVSToDS = OpTypePointer Function %FVertexFactoryInterpolantsVSToDS
+%_ptr_Function_FVertexFactoryInterpolantsVSToPS = OpTypePointer Function %FVertexFactoryInterpolantsVSToPS
+%_ptr_Function_FBasePassVSToDS = OpTypePointer Function %FBasePassVSToDS
+     %v3bool = OpTypeVector %bool 3
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+%View_PrimitiveSceneData = OpVariable %_ptr_Uniform_type_StructuredBuffer_v4float Uniform
+%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_12 Input
+%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_12 Input
+%in_var_COLOR0 = OpVariable %_ptr_Input__arr_v4float_uint_12 Input
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input__arr__arr_v4float_uint_1_uint_12 Input
+%in_var_TEXCOORD4 = OpVariable %_ptr_Input__arr_v4float_uint_12 Input
+%in_var_PRIMITIVE_ID = OpVariable %_ptr_Input__arr_uint_uint_12 Input
+%in_var_LIGHTMAP_ID = OpVariable %_ptr_Input__arr_uint_uint_12 Input
+%in_var_VS_To_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_12 Input
+%gl_InvocationID = OpVariable %_ptr_Input_uint Input
+%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_COLOR0 = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_TEXCOORD0 = OpVariable %_ptr_Output__arr__arr_v4float_uint_1_uint_3 Output
+%out_var_TEXCOORD4 = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_PRIMITIVE_ID = OpVariable %_ptr_Output__arr_uint_uint_3 Output
+%out_var_LIGHTMAP_ID = OpVariable %_ptr_Output__arr_uint_uint_3 Output
+%out_var_VS_To_DS_Position = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_PN_POSITION = OpVariable %_ptr_Output__arr__arr_v4float_uint_3_uint_3 Output
+%out_var_PN_DisplacementScales = OpVariable %_ptr_Output__arr_v3float_uint_3 Output
+%out_var_PN_TessellationMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output
+%out_var_PN_WorldDisplacementMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output
+%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output
+%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output
+%out_var_PN_POSITION9 = OpVariable %_ptr_Output_v4float Output
+        %133 = OpConstantNull %FSharedBasePassInterpolants
+        %134 = OpConstantComposite %FBasePassInterpolantsVSToDS %133
+%float_0_333333343 = OpConstant %float 0.333333343
+        %136 = OpConstantComposite %v4float %float_0_333333343 %float_0_333333343 %float_0_333333343 %float_0_333333343
+        %137 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5
+%float_0_166666672 = OpConstant %float 0.166666672
+        %139 = OpConstantComposite %v4float %float_0_166666672 %float_0_166666672 %float_0_166666672 %float_0_166666672
+        %140 = OpUndef %v4float
+
+; XXX: Original asm used Function here, which is wrong.
+; This patches the SPIR-V to be correct.
+%temp_var_hullMainRetVal = OpVariable %_ptr_Workgroup__arr_FPNTessellationHSToDS_uint_3 Workgroup
+
+   %MainHull = OpFunction %void None %111
+        %141 = OpLabel
+%param_var_I = OpVariable %_ptr_Function__arr_FBasePassVSToDS_uint_12 Function
+        %142 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD10_centroid
+        %143 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD11_centroid
+        %144 = OpLoad %_arr_v4float_uint_12 %in_var_COLOR0
+        %145 = OpLoad %_arr__arr_v4float_uint_1_uint_12 %in_var_TEXCOORD0
+        %146 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD4
+        %147 = OpLoad %_arr_uint_uint_12 %in_var_PRIMITIVE_ID
+        %148 = OpLoad %_arr_uint_uint_12 %in_var_LIGHTMAP_ID
+        %149 = OpCompositeExtract %v4float %142 0
+        %150 = OpCompositeExtract %v4float %143 0
+        %151 = OpCompositeExtract %v4float %144 0
+        %152 = OpCompositeExtract %_arr_v4float_uint_1 %145 0
+        %153 = OpCompositeExtract %v4float %146 0
+        %154 = OpCompositeExtract %uint %147 0
+        %155 = OpCompositeExtract %uint %148 0
+        %156 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %149 %150 %151 %152 %153 %154 %155
+        %157 = OpCompositeExtract %v4float %142 1
+        %158 = OpCompositeExtract %v4float %143 1
+        %159 = OpCompositeExtract %v4float %144 1
+        %160 = OpCompositeExtract %_arr_v4float_uint_1 %145 1
+        %161 = OpCompositeExtract %v4float %146 1
+        %162 = OpCompositeExtract %uint %147 1
+        %163 = OpCompositeExtract %uint %148 1
+        %164 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %157 %158 %159 %160 %161 %162 %163
+        %165 = OpCompositeExtract %v4float %142 2
+        %166 = OpCompositeExtract %v4float %143 2
+        %167 = OpCompositeExtract %v4float %144 2
+        %168 = OpCompositeExtract %_arr_v4float_uint_1 %145 2
+        %169 = OpCompositeExtract %v4float %146 2
+        %170 = OpCompositeExtract %uint %147 2
+        %171 = OpCompositeExtract %uint %148 2
+        %172 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %165 %166 %167 %168 %169 %170 %171
+        %173 = OpCompositeExtract %v4float %142 3
+        %174 = OpCompositeExtract %v4float %143 3
+        %175 = OpCompositeExtract %v4float %144 3
+        %176 = OpCompositeExtract %_arr_v4float_uint_1 %145 3
+        %177 = OpCompositeExtract %v4float %146 3
+        %178 = OpCompositeExtract %uint %147 3
+        %179 = OpCompositeExtract %uint %148 3
+        %180 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %173 %174 %175 %176 %177 %178 %179
+        %181 = OpCompositeExtract %v4float %142 4
+        %182 = OpCompositeExtract %v4float %143 4
+        %183 = OpCompositeExtract %v4float %144 4
+        %184 = OpCompositeExtract %_arr_v4float_uint_1 %145 4
+        %185 = OpCompositeExtract %v4float %146 4
+        %186 = OpCompositeExtract %uint %147 4
+        %187 = OpCompositeExtract %uint %148 4
+        %188 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %181 %182 %183 %184 %185 %186 %187
+        %189 = OpCompositeExtract %v4float %142 5
+        %190 = OpCompositeExtract %v4float %143 5
+        %191 = OpCompositeExtract %v4float %144 5
+        %192 = OpCompositeExtract %_arr_v4float_uint_1 %145 5
+        %193 = OpCompositeExtract %v4float %146 5
+        %194 = OpCompositeExtract %uint %147 5
+        %195 = OpCompositeExtract %uint %148 5
+        %196 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %189 %190 %191 %192 %193 %194 %195
+        %197 = OpCompositeExtract %v4float %142 6
+        %198 = OpCompositeExtract %v4float %143 6
+        %199 = OpCompositeExtract %v4float %144 6
+        %200 = OpCompositeExtract %_arr_v4float_uint_1 %145 6
+        %201 = OpCompositeExtract %v4float %146 6
+        %202 = OpCompositeExtract %uint %147 6
+        %203 = OpCompositeExtract %uint %148 6
+        %204 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %197 %198 %199 %200 %201 %202 %203
+        %205 = OpCompositeExtract %v4float %142 7
+        %206 = OpCompositeExtract %v4float %143 7
+        %207 = OpCompositeExtract %v4float %144 7
+        %208 = OpCompositeExtract %_arr_v4float_uint_1 %145 7
+        %209 = OpCompositeExtract %v4float %146 7
+        %210 = OpCompositeExtract %uint %147 7
+        %211 = OpCompositeExtract %uint %148 7
+        %212 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %205 %206 %207 %208 %209 %210 %211
+        %213 = OpCompositeExtract %v4float %142 8
+        %214 = OpCompositeExtract %v4float %143 8
+        %215 = OpCompositeExtract %v4float %144 8
+        %216 = OpCompositeExtract %_arr_v4float_uint_1 %145 8
+        %217 = OpCompositeExtract %v4float %146 8
+        %218 = OpCompositeExtract %uint %147 8
+        %219 = OpCompositeExtract %uint %148 8
+        %220 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %213 %214 %215 %216 %217 %218 %219
+        %221 = OpCompositeExtract %v4float %142 9
+        %222 = OpCompositeExtract %v4float %143 9
+        %223 = OpCompositeExtract %v4float %144 9
+        %224 = OpCompositeExtract %_arr_v4float_uint_1 %145 9
+        %225 = OpCompositeExtract %v4float %146 9
+        %226 = OpCompositeExtract %uint %147 9
+        %227 = OpCompositeExtract %uint %148 9
+        %228 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %221 %222 %223 %224 %225 %226 %227
+        %229 = OpCompositeExtract %v4float %142 10
+        %230 = OpCompositeExtract %v4float %143 10
+        %231 = OpCompositeExtract %v4float %144 10
+        %232 = OpCompositeExtract %_arr_v4float_uint_1 %145 10
+        %233 = OpCompositeExtract %v4float %146 10
+        %234 = OpCompositeExtract %uint %147 10
+        %235 = OpCompositeExtract %uint %148 10
+        %236 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %229 %230 %231 %232 %233 %234 %235
+        %237 = OpCompositeExtract %v4float %142 11
+        %238 = OpCompositeExtract %v4float %143 11
+        %239 = OpCompositeExtract %v4float %144 11
+        %240 = OpCompositeExtract %_arr_v4float_uint_1 %145 11
+        %241 = OpCompositeExtract %v4float %146 11
+        %242 = OpCompositeExtract %uint %147 11
+        %243 = OpCompositeExtract %uint %148 11
+        %244 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %237 %238 %239 %240 %241 %242 %243
+        %245 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %156
+        %246 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %164
+        %247 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %172
+        %248 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %180
+        %249 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %188
+        %250 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %196
+        %251 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %204
+        %252 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %212
+        %253 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %220
+        %254 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %228
+        %255 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %236
+        %256 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %244
+        %257 = OpLoad %_arr_v4float_uint_12 %in_var_VS_To_DS_Position
+        %258 = OpCompositeExtract %v4float %257 0
+        %259 = OpCompositeConstruct %FBasePassVSToDS %245 %134 %258
+        %260 = OpCompositeExtract %v4float %257 1
+        %261 = OpCompositeConstruct %FBasePassVSToDS %246 %134 %260
+        %262 = OpCompositeExtract %v4float %257 2
+        %263 = OpCompositeConstruct %FBasePassVSToDS %247 %134 %262
+        %264 = OpCompositeExtract %v4float %257 3
+        %265 = OpCompositeConstruct %FBasePassVSToDS %248 %134 %264
+        %266 = OpCompositeExtract %v4float %257 4
+        %267 = OpCompositeConstruct %FBasePassVSToDS %249 %134 %266
+        %268 = OpCompositeExtract %v4float %257 5
+        %269 = OpCompositeConstruct %FBasePassVSToDS %250 %134 %268
+        %270 = OpCompositeExtract %v4float %257 6
+        %271 = OpCompositeConstruct %FBasePassVSToDS %251 %134 %270
+        %272 = OpCompositeExtract %v4float %257 7
+        %273 = OpCompositeConstruct %FBasePassVSToDS %252 %134 %272
+        %274 = OpCompositeExtract %v4float %257 8
+        %275 = OpCompositeConstruct %FBasePassVSToDS %253 %134 %274
+        %276 = OpCompositeExtract %v4float %257 9
+        %277 = OpCompositeConstruct %FBasePassVSToDS %254 %134 %276
+        %278 = OpCompositeExtract %v4float %257 10
+        %279 = OpCompositeConstruct %FBasePassVSToDS %255 %134 %278
+        %280 = OpCompositeExtract %v4float %257 11
+        %281 = OpCompositeConstruct %FBasePassVSToDS %256 %134 %280
+        %282 = OpCompositeConstruct %_arr_FBasePassVSToDS_uint_12 %259 %261 %263 %265 %267 %269 %271 %273 %275 %277 %279 %281
+               OpStore %param_var_I %282
+        %283 = OpLoad %uint %gl_InvocationID
+        %284 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %283 %int_0
+        %285 = OpLoad %FVertexFactoryInterpolantsVSToDS %284
+        %286 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %285 0
+        %287 = OpCompositeExtract %v4float %286 0
+        %288 = OpCompositeExtract %v4float %286 1
+        %289 = OpVectorShuffle %v3float %287 %287 0 1 2
+        %290 = OpVectorShuffle %v3float %288 %288 0 1 2
+        %291 = OpExtInst %v3float %1 Cross %290 %289
+        %292 = OpCompositeExtract %float %288 3
+        %293 = OpCompositeConstruct %v3float %292 %292 %292
+        %294 = OpFMul %v3float %291 %293
+        %295 = OpCompositeConstruct %mat3v3float %289 %294 %290
+        %296 = OpCompositeExtract %float %288 0
+        %297 = OpCompositeExtract %float %288 1
+        %298 = OpCompositeExtract %float %288 2
+        %299 = OpCompositeConstruct %v4float %296 %297 %298 %float_0
+        %300 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToPS %param_var_I %283 %int_0 %int_0
+        %301 = OpLoad %FVertexFactoryInterpolantsVSToPS %300
+        %302 = OpCompositeExtract %uint %301 5
+        %303 = OpIMul %uint %302 %uint_26
+        %304 = OpIAdd %uint %303 %uint_22
+        %305 = OpAccessChain %_ptr_Uniform_v4float %View_PrimitiveSceneData %int_0 %304
+        %306 = OpLoad %v4float %305
+        %307 = OpVectorShuffle %v3float %306 %306 0 1 2
+        %308 = OpVectorTimesMatrix %v3float %307 %295
+        %309 = OpULessThan %bool %283 %uint_2
+        %310 = OpIAdd %uint %283 %uint_1
+        %311 = OpSelect %uint %309 %310 %uint_0
+        %312 = OpIMul %uint %uint_2 %283
+        %313 = OpIAdd %uint %uint_3 %312
+        %314 = OpIAdd %uint %312 %uint_4
+        %315 = OpAccessChain %_ptr_Function_FBasePassVSToDS %param_var_I %283
+        %316 = OpLoad %FBasePassVSToDS %315
+        %317 = OpAccessChain %_ptr_Function_v4float %param_var_I %283 %int_2
+        %318 = OpLoad %v4float %317
+        %319 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %311 %int_0
+        %320 = OpLoad %FVertexFactoryInterpolantsVSToDS %319
+        %321 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %320 0
+        %322 = OpCompositeExtract %v4float %321 1
+        %323 = OpCompositeExtract %float %322 0
+        %324 = OpCompositeExtract %float %322 1
+        %325 = OpCompositeExtract %float %322 2
+        %326 = OpCompositeConstruct %v4float %323 %324 %325 %float_0
+        %327 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %313 %int_0
+        %328 = OpLoad %FVertexFactoryInterpolantsVSToDS %327
+        %329 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %328 0
+        %330 = OpCompositeExtract %v4float %329 1
+        %331 = OpCompositeExtract %float %330 0
+        %332 = OpCompositeExtract %float %330 1
+        %333 = OpCompositeExtract %float %330 2
+        %334 = OpCompositeConstruct %v4float %331 %332 %333 %float_0
+        %335 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %314 %int_0
+        %336 = OpLoad %FVertexFactoryInterpolantsVSToDS %335
+        %337 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %336 0
+        %338 = OpCompositeExtract %v4float %337 1
+        %339 = OpCompositeExtract %float %338 0
+        %340 = OpCompositeExtract %float %338 1
+        %341 = OpCompositeExtract %float %338 2
+        %342 = OpCompositeConstruct %v4float %339 %340 %341 %float_0
+        %343 = OpLoad %v4float %317
+        %344 = OpAccessChain %_ptr_Function_v4float %param_var_I %311 %int_2
+        %345 = OpLoad %v4float %344
+        %346 = OpFMul %v4float %62 %343
+        %347 = OpFAdd %v4float %346 %345
+        %348 = OpFSub %v4float %345 %343
+        %349 = OpDot %float %348 %299
+        %350 = OpCompositeConstruct %v4float %349 %349 %349 %349
+        %351 = OpFMul %v4float %350 %299
+        %352 = OpFSub %v4float %347 %351
+        %353 = OpFMul %v4float %352 %136
+        %354 = OpAccessChain %_ptr_Function_v4float %param_var_I %313 %int_2
+        %355 = OpLoad %v4float %354
+        %356 = OpAccessChain %_ptr_Function_v4float %param_var_I %314 %int_2
+        %357 = OpLoad %v4float %356
+        %358 = OpFMul %v4float %62 %355
+        %359 = OpFAdd %v4float %358 %357
+        %360 = OpFSub %v4float %357 %355
+        %361 = OpDot %float %360 %334
+        %362 = OpCompositeConstruct %v4float %361 %361 %361 %361
+        %363 = OpFMul %v4float %362 %334
+        %364 = OpFSub %v4float %359 %363
+        %365 = OpFMul %v4float %364 %136
+        %366 = OpFAdd %v4float %353 %365
+        %367 = OpFMul %v4float %366 %137
+        %368 = OpLoad %v4float %344
+        %369 = OpLoad %v4float %317
+        %370 = OpFMul %v4float %62 %368
+        %371 = OpFAdd %v4float %370 %369
+        %372 = OpFSub %v4float %369 %368
+        %373 = OpDot %float %372 %326
+        %374 = OpCompositeConstruct %v4float %373 %373 %373 %373
+        %375 = OpFMul %v4float %374 %326
+        %376 = OpFSub %v4float %371 %375
+        %377 = OpFMul %v4float %376 %136
+        %378 = OpLoad %v4float %356
+        %379 = OpLoad %v4float %354
+        %380 = OpFMul %v4float %62 %378
+        %381 = OpFAdd %v4float %380 %379
+        %382 = OpFSub %v4float %379 %378
+        %383 = OpDot %float %382 %342
+        %384 = OpCompositeConstruct %v4float %383 %383 %383 %383
+        %385 = OpFMul %v4float %384 %342
+        %386 = OpFSub %v4float %381 %385
+        %387 = OpFMul %v4float %386 %136
+        %388 = OpFAdd %v4float %377 %387
+        %389 = OpFMul %v4float %388 %137
+        %390 = OpCompositeConstruct %_arr_v4float_uint_3 %318 %367 %389
+        %391 = OpCompositeConstruct %FPNTessellationHSToDS %316 %390 %308 %float_1 %float_1
+        %392 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %316 0
+        %393 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %392 0
+        %394 = OpCompositeExtract %v4float %393 0
+        %395 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD10_centroid %283
+               OpStore %395 %394
+        %396 = OpCompositeExtract %v4float %393 1
+        %397 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD11_centroid %283
+               OpStore %397 %396
+        %398 = OpCompositeExtract %v4float %393 2
+        %399 = OpAccessChain %_ptr_Output_v4float %out_var_COLOR0 %283
+               OpStore %399 %398
+        %400 = OpCompositeExtract %_arr_v4float_uint_1 %393 3
+        %401 = OpAccessChain %_ptr_Output__arr_v4float_uint_1 %out_var_TEXCOORD0 %283
+               OpStore %401 %400
+        %402 = OpCompositeExtract %v4float %393 4
+        %403 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD4 %283
+               OpStore %403 %402
+        %404 = OpCompositeExtract %uint %393 5
+        %405 = OpAccessChain %_ptr_Output_uint %out_var_PRIMITIVE_ID %283
+               OpStore %405 %404
+        %406 = OpCompositeExtract %uint %393 6
+        %407 = OpAccessChain %_ptr_Output_uint %out_var_LIGHTMAP_ID %283
+               OpStore %407 %406
+        %408 = OpCompositeExtract %v4float %316 2
+        %409 = OpAccessChain %_ptr_Output_v4float %out_var_VS_To_DS_Position %283
+               OpStore %409 %408
+        %410 = OpAccessChain %_ptr_Output__arr_v4float_uint_3 %out_var_PN_POSITION %283
+               OpStore %410 %390
+        %411 = OpAccessChain %_ptr_Output_v3float %out_var_PN_DisplacementScales %283
+               OpStore %411 %308
+        %412 = OpAccessChain %_ptr_Output_float %out_var_PN_TessellationMultiplier %283
+               OpStore %412 %float_1
+        %413 = OpAccessChain %_ptr_Output_float %out_var_PN_WorldDisplacementMultiplier %283
+               OpStore %413 %float_1
+        %414 = OpAccessChain %_ptr_Workgroup_FPNTessellationHSToDS %temp_var_hullMainRetVal %283
+               OpStore %414 %391
+               OpControlBarrier %uint_2 %uint_4 %uint_0
+        %415 = OpIEqual %bool %283 %uint_0
+               OpSelectionMerge %if_merge None
+               OpBranchConditional %415 %416 %if_merge
+        %416 = OpLabel
+        %417 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_0
+        %418 = OpLoad %mat4v4float %417
+        %419 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_6
+        %420 = OpLoad %mat4v4float %419
+        %421 = OpAccessChain %_ptr_Uniform_v3float %View %int_27
+        %422 = OpLoad %v3float %421
+        %423 = OpAccessChain %_ptr_Uniform_float %View %int_77
+        %424 = OpLoad %float %423
+        %425 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_0
+        %426 = OpLoad %v4float %425
+        %427 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_1
+        %428 = OpLoad %v4float %427
+        %429 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_2
+        %430 = OpLoad %v4float %429
+        %431 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_0
+        %432 = OpLoad %v4float %431
+        %433 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_1
+        %434 = OpLoad %v4float %433
+        %435 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_2
+        %436 = OpLoad %v4float %435
+        %437 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_0
+        %438 = OpLoad %v4float %437
+        %439 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_1
+        %440 = OpLoad %v4float %439
+        %441 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_2
+        %442 = OpLoad %v4float %441
+        %443 = OpFAdd %v4float %428 %430
+        %444 = OpFAdd %v4float %443 %434
+        %445 = OpFAdd %v4float %444 %436
+        %446 = OpFAdd %v4float %445 %440
+        %447 = OpFAdd %v4float %446 %442
+        %448 = OpFMul %v4float %447 %139
+        %449 = OpFAdd %v4float %438 %432
+        %450 = OpFAdd %v4float %449 %426
+        %451 = OpFMul %v4float %450 %136
+        %452 = OpFSub %v4float %448 %451
+        %453 = OpFMul %v4float %452 %137
+        %454 = OpFAdd %v4float %448 %453
+        %455 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_1 %int_3
+        %456 = OpLoad %float %455
+        %457 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_2 %int_3
+        %458 = OpLoad %float %457
+        %459 = OpFAdd %float %456 %458
+        %460 = OpFMul %float %float_0_5 %459
+        %461 = OpCompositeInsert %v4float %460 %140 0
+        %462 = OpLoad %float %457
+        %463 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_0 %int_3
+        %464 = OpLoad %float %463
+        %465 = OpFAdd %float %462 %464
+        %466 = OpFMul %float %float_0_5 %465
+        %467 = OpCompositeInsert %v4float %466 %461 1
+        %468 = OpLoad %float %463
+        %469 = OpLoad %float %455
+        %470 = OpFAdd %float %468 %469
+        %471 = OpFMul %float %float_0_5 %470
+        %472 = OpCompositeInsert %v4float %471 %467 2
+        %473 = OpLoad %float %463
+        %474 = OpLoad %float %455
+        %475 = OpFAdd %float %473 %474
+        %476 = OpLoad %float %457
+        %477 = OpFAdd %float %475 %476
+        %478 = OpFMul %float %float_0_333000004 %477
+        %479 = OpCompositeInsert %v4float %478 %472 3
+        %480 = OpVectorShuffle %v3float %426 %426 0 1 2
+        %481 = OpVectorShuffle %v3float %432 %432 0 1 2
+        %482 = OpVectorShuffle %v3float %438 %438 0 1 2
+               OpBranch %483
+        %483 = OpLabel
+               OpLoopMerge %484 %485 None
+               OpBranch %486
+        %486 = OpLabel
+        %487 = OpMatrixTimesVector %v4float %420 %76
+        %488 = OpCompositeExtract %float %426 0
+        %489 = OpCompositeExtract %float %426 1
+        %490 = OpCompositeExtract %float %426 2
+        %491 = OpCompositeConstruct %v4float %488 %489 %490 %float_1
+        %492 = OpMatrixTimesVector %v4float %418 %491
+        %493 = OpVectorShuffle %v3float %492 %492 0 1 2
+        %494 = OpVectorShuffle %v3float %487 %487 0 1 2
+        %495 = OpFSub %v3float %493 %494
+        %496 = OpCompositeExtract %float %492 3
+        %497 = OpCompositeExtract %float %487 3
+        %498 = OpFAdd %float %496 %497
+        %499 = OpCompositeConstruct %v3float %498 %498 %498
+        %500 = OpFOrdLessThan %v3bool %495 %499
+        %501 = OpSelect %v3int %500 %81 %73
+        %502 = OpFAdd %v3float %493 %494
+        %503 = OpFNegate %float %496
+        %504 = OpFSub %float %503 %497
+        %505 = OpCompositeConstruct %v3float %504 %504 %504
+        %506 = OpFOrdGreaterThan %v3bool %502 %505
+        %507 = OpSelect %v3int %506 %81 %73
+        %508 = OpIMul %v3int %82 %507
+        %509 = OpIAdd %v3int %501 %508
+        %510 = OpCompositeExtract %float %432 0
+        %511 = OpCompositeExtract %float %432 1
+        %512 = OpCompositeExtract %float %432 2
+        %513 = OpCompositeConstruct %v4float %510 %511 %512 %float_1
+        %514 = OpMatrixTimesVector %v4float %418 %513
+        %515 = OpVectorShuffle %v3float %514 %514 0 1 2
+        %516 = OpFSub %v3float %515 %494
+        %517 = OpCompositeExtract %float %514 3
+        %518 = OpFAdd %float %517 %497
+        %519 = OpCompositeConstruct %v3float %518 %518 %518
+        %520 = OpFOrdLessThan %v3bool %516 %519
+        %521 = OpSelect %v3int %520 %81 %73
+        %522 = OpFAdd %v3float %515 %494
+        %523 = OpFNegate %float %517
+        %524 = OpFSub %float %523 %497
+        %525 = OpCompositeConstruct %v3float %524 %524 %524
+        %526 = OpFOrdGreaterThan %v3bool %522 %525
+        %527 = OpSelect %v3int %526 %81 %73
+        %528 = OpIMul %v3int %82 %527
+        %529 = OpIAdd %v3int %521 %528
+        %530 = OpBitwiseOr %v3int %509 %529
+        %531 = OpCompositeExtract %float %438 0
+        %532 = OpCompositeExtract %float %438 1
+        %533 = OpCompositeExtract %float %438 2
+        %534 = OpCompositeConstruct %v4float %531 %532 %533 %float_1
+        %535 = OpMatrixTimesVector %v4float %418 %534
+        %536 = OpVectorShuffle %v3float %535 %535 0 1 2
+        %537 = OpFSub %v3float %536 %494
+        %538 = OpCompositeExtract %float %535 3
+        %539 = OpFAdd %float %538 %497
+        %540 = OpCompositeConstruct %v3float %539 %539 %539
+        %541 = OpFOrdLessThan %v3bool %537 %540
+        %542 = OpSelect %v3int %541 %81 %73
+        %543 = OpFAdd %v3float %536 %494
+        %544 = OpFNegate %float %538
+        %545 = OpFSub %float %544 %497
+        %546 = OpCompositeConstruct %v3float %545 %545 %545
+        %547 = OpFOrdGreaterThan %v3bool %543 %546
+        %548 = OpSelect %v3int %547 %81 %73
+        %549 = OpIMul %v3int %82 %548
+        %550 = OpIAdd %v3int %542 %549
+        %551 = OpBitwiseOr %v3int %530 %550
+        %552 = OpINotEqual %v3bool %551 %74
+        %553 = OpAny %bool %552
+               OpSelectionMerge %554 None
+               OpBranchConditional %553 %555 %554
+        %555 = OpLabel
+               OpBranch %484
+        %554 = OpLabel
+        %556 = OpFSub %v3float %480 %481
+        %557 = OpFSub %v3float %481 %482
+        %558 = OpFSub %v3float %482 %480
+        %559 = OpFAdd %v3float %480 %481
+        %560 = OpFMul %v3float %77 %559
+        %561 = OpFSub %v3float %560 %422
+        %562 = OpFAdd %v3float %481 %482
+        %563 = OpFMul %v3float %77 %562
+        %564 = OpFSub %v3float %563 %422
+        %565 = OpFAdd %v3float %482 %480
+        %566 = OpFMul %v3float %77 %565
+        %567 = OpFSub %v3float %566 %422
+        %568 = OpDot %float %557 %557
+        %569 = OpDot %float %564 %564
+        %570 = OpFDiv %float %568 %569
+        %571 = OpExtInst %float %1 Sqrt %570
+        %572 = OpDot %float %558 %558
+        %573 = OpDot %float %567 %567
+        %574 = OpFDiv %float %572 %573
+        %575 = OpExtInst %float %1 Sqrt %574
+        %576 = OpDot %float %556 %556
+        %577 = OpDot %float %561 %561
+        %578 = OpFDiv %float %576 %577
+        %579 = OpExtInst %float %1 Sqrt %578
+        %580 = OpCompositeConstruct %v4float %571 %575 %579 %float_1
+        %581 = OpFAdd %float %571 %575
+        %582 = OpFAdd %float %581 %579
+        %583 = OpFMul %float %float_0_333000004 %582
+        %584 = OpCompositeInsert %v4float %583 %580 3
+        %585 = OpCompositeConstruct %v4float %424 %424 %424 %424
+        %586 = OpFMul %v4float %585 %584
+               OpBranch %484
+        %485 = OpLabel
+               OpBranch %483
+        %484 = OpLabel
+        %587 = OpPhi %v4float %76 %555 %586 %554
+        %588 = OpFMul %v4float %479 %587
+        %589 = OpExtInst %v4float %1 FClamp %588 %67 %69
+        %590 = OpCompositeExtract %float %589 0
+        %591 = OpCompositeExtract %float %589 1
+        %592 = OpCompositeExtract %float %589 2
+        %593 = OpCompositeExtract %float %589 3
+        %594 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_0
+               OpStore %594 %590
+        %595 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_1
+               OpStore %595 %591
+        %596 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_2
+               OpStore %596 %592
+        %597 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %uint_0
+               OpStore %597 %593
+               OpStore %out_var_PN_POSITION9 %454
+               OpBranch %if_merge
+   %if_merge = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc b/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc
new file mode 100644
index 00000000000..a146896b90b
--- /dev/null
+++ b/shaders-ue4/asm/tesc/hs-input-array-access.invalid.asm.tesc
@@ -0,0 +1,1264 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 607
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability SampledBuffer
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %MainHull "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_VS_To_DS_Position %in_var_VS_To_DS_VertexID %gl_InvocationID %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_VS_To_DS_Position %out_var_VS_To_DS_VertexID %out_var_PN_POSITION %out_var_PN_DisplacementScales %out_var_PN_TessellationMultiplier %out_var_PN_WorldDisplacementMultiplier %out_var_PN_DominantVertex %out_var_PN_DominantVertex1 %out_var_PN_DominantVertex2 %out_var_PN_DominantEdge %out_var_PN_DominantEdge1 %out_var_PN_DominantEdge2 %out_var_PN_DominantEdge3 %out_var_PN_DominantEdge4 %out_var_PN_DominantEdge5 %gl_TessLevelOuter %gl_TessLevelInner %out_var_PN_POSITION9
+               OpExecutionMode %MainHull Triangles
+               OpExecutionMode %MainHull SpacingFractionalOdd
+               OpExecutionMode %MainHull VertexOrderCw
+               OpExecutionMode %MainHull OutputVertices 3
+               OpSource HLSL 600
+               OpName %FPNTessellationHSToDS "FPNTessellationHSToDS"
+               OpMemberName %FPNTessellationHSToDS 0 "PassSpecificData"
+               OpMemberName %FPNTessellationHSToDS 1 "WorldPosition"
+               OpMemberName %FPNTessellationHSToDS 2 "DisplacementScale"
+               OpMemberName %FPNTessellationHSToDS 3 "TessellationMultiplier"
+               OpMemberName %FPNTessellationHSToDS 4 "WorldDisplacementMultiplier"
+               OpMemberName %FPNTessellationHSToDS 5 "DominantVertex"
+               OpMemberName %FPNTessellationHSToDS 6 "DominantEdge"
+               OpName %FHitProxyVSToDS "FHitProxyVSToDS"
+               OpMemberName %FHitProxyVSToDS 0 "FactoryInterpolants"
+               OpMemberName %FHitProxyVSToDS 1 "Position"
+               OpMemberName %FHitProxyVSToDS 2 "VertexID"
+               OpName %FVertexFactoryInterpolantsVSToDS "FVertexFactoryInterpolantsVSToDS"
+               OpMemberName %FVertexFactoryInterpolantsVSToDS 0 "InterpolantsVSToPS"
+               OpName %FVertexFactoryInterpolantsVSToPS "FVertexFactoryInterpolantsVSToPS"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 0 "TangentToWorld0"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 1 "TangentToWorld2"
+               OpName %FHullShaderConstantDominantVertexData "FHullShaderConstantDominantVertexData"
+               OpMemberName %FHullShaderConstantDominantVertexData 0 "UV"
+               OpMemberName %FHullShaderConstantDominantVertexData 1 "Normal"
+               OpMemberName %FHullShaderConstantDominantVertexData 2 "Tangent"
+               OpName %FHullShaderConstantDominantEdgeData "FHullShaderConstantDominantEdgeData"
+               OpMemberName %FHullShaderConstantDominantEdgeData 0 "UV0"
+               OpMemberName %FHullShaderConstantDominantEdgeData 1 "UV1"
+               OpMemberName %FHullShaderConstantDominantEdgeData 2 "Normal0"
+               OpMemberName %FHullShaderConstantDominantEdgeData 3 "Normal1"
+               OpMemberName %FHullShaderConstantDominantEdgeData 4 "Tangent0"
+               OpMemberName %FHullShaderConstantDominantEdgeData 5 "Tangent1"
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_ClipToWorld"
+               OpMemberName %type_View 3 "View_TranslatedWorldToView"
+               OpMemberName %type_View 4 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 5 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 7 "View_ViewToClip"
+               OpMemberName %type_View 8 "View_ViewToClipNoAA"
+               OpMemberName %type_View 9 "View_ClipToView"
+               OpMemberName %type_View 10 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 12 "View_ScreenToWorld"
+               OpMemberName %type_View 13 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 14 "View_ViewForward"
+               OpMemberName %type_View 15 "PrePadding_View_908"
+               OpMemberName %type_View 16 "View_ViewUp"
+               OpMemberName %type_View 17 "PrePadding_View_924"
+               OpMemberName %type_View 18 "View_ViewRight"
+               OpMemberName %type_View 19 "PrePadding_View_940"
+               OpMemberName %type_View 20 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 21 "PrePadding_View_956"
+               OpMemberName %type_View 22 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 23 "PrePadding_View_972"
+               OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 25 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 26 "View_WorldCameraOrigin"
+               OpMemberName %type_View 27 "PrePadding_View_1020"
+               OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 29 "PrePadding_View_1036"
+               OpMemberName %type_View 30 "View_WorldViewOrigin"
+               OpMemberName %type_View 31 "PrePadding_View_1052"
+               OpMemberName %type_View 32 "View_PreViewTranslation"
+               OpMemberName %type_View 33 "PrePadding_View_1068"
+               OpMemberName %type_View 34 "View_PrevProjection"
+               OpMemberName %type_View 35 "View_PrevViewProj"
+               OpMemberName %type_View 36 "View_PrevViewRotationProj"
+               OpMemberName %type_View 37 "View_PrevViewToClip"
+               OpMemberName %type_View 38 "View_PrevClipToView"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 40 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 44 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 45 "PrePadding_View_1724"
+               OpMemberName %type_View 46 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 47 "PrePadding_View_1740"
+               OpMemberName %type_View 48 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 49 "PrePadding_View_1756"
+               OpMemberName %type_View 50 "View_PrevInvViewProj"
+               OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 52 "View_ClipToPrevClip"
+               OpMemberName %type_View 53 "View_TemporalAAJitter"
+               OpMemberName %type_View 54 "View_GlobalClippingPlane"
+               OpMemberName %type_View 55 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 57 "View_ViewRectMin"
+               OpMemberName %type_View 58 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 60 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 61 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 62 "View_PreExposure"
+               OpMemberName %type_View 63 "View_OneOverPreExposure"
+               OpMemberName %type_View 64 "PrePadding_View_2076"
+               OpMemberName %type_View 65 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 66 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 67 "View_NormalOverrideParameter"
+               OpMemberName %type_View 68 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 69 "View_PrevFrameGameTime"
+               OpMemberName %type_View 70 "View_PrevFrameRealTime"
+               OpMemberName %type_View 71 "View_OutOfBoundsMask"
+               OpMemberName %type_View 72 "PrePadding_View_2148"
+               OpMemberName %type_View 73 "PrePadding_View_2152"
+               OpMemberName %type_View 74 "PrePadding_View_2156"
+               OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 76 "View_CullingSign"
+               OpMemberName %type_View 77 "View_NearPlane"
+               OpMemberName %type_View 78 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 79 "View_GameTime"
+               OpMemberName %type_View 80 "View_RealTime"
+               OpMemberName %type_View 81 "View_DeltaTime"
+               OpMemberName %type_View 82 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 84 "View_Random"
+               OpMemberName %type_View 85 "View_FrameNumber"
+               OpMemberName %type_View 86 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 87 "View_StateFrameIndex"
+               OpMemberName %type_View 88 "View_CameraCut"
+               OpMemberName %type_View 89 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 90 "PrePadding_View_2228"
+               OpMemberName %type_View 91 "PrePadding_View_2232"
+               OpMemberName %type_View 92 "PrePadding_View_2236"
+               OpMemberName %type_View 93 "View_DirectionalLightColor"
+               OpMemberName %type_View 94 "View_DirectionalLightDirection"
+               OpMemberName %type_View 95 "PrePadding_View_2268"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 98 "View_TemporalAAParams"
+               OpMemberName %type_View 99 "View_CircleDOFParams"
+               OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 102 "View_DepthOfFieldScale"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 109 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 110 "View_DemosaicVposOffset"
+               OpMemberName %type_View 111 "PrePadding_View_2412"
+               OpMemberName %type_View 112 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 113 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 115 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogPower"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 123 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian"
+               OpMemberName %type_View 127 "PrePadding_View_2492"
+               OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance"
+               OpMemberName %type_View 129 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 131 "PrePadding_View_2520"
+               OpMemberName %type_View 132 "PrePadding_View_2524"
+               OpMemberName %type_View 133 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 136 "View_AmbientCubemapTint"
+               OpMemberName %type_View 137 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 138 "View_SkyLightParameters"
+               OpMemberName %type_View 139 "PrePadding_View_2584"
+               OpMemberName %type_View 140 "PrePadding_View_2588"
+               OpMemberName %type_View 141 "View_SkyLightColor"
+               OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 143 "View_MobilePreviewMode"
+               OpMemberName %type_View 144 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 146 "View_ShowDecalsMask"
+               OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 149 "PrePadding_View_2744"
+               OpMemberName %type_View 150 "PrePadding_View_2748"
+               OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 152 "View_StereoPassIndex"
+               OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 155 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 156 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 157 "View_MaxGlobalDistance"
+               OpMemberName %type_View 158 "PrePadding_View_2908"
+               OpMemberName %type_View 159 "View_CursorPosition"
+               OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 161 "PrePadding_View_2924"
+               OpMemberName %type_View 162 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 163 "PrePadding_View_2940"
+               OpMemberName %type_View 164 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 165 "PrePadding_View_2956"
+               OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 167 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 168 "PrePadding_View_2972"
+               OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 170 "PrePadding_View_2988"
+               OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 172 "PrePadding_View_3004"
+               OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 176 "View_StereoIPD"
+               OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle"
+               OpName %View "View"
+               OpName %type_Primitive "type.Primitive"
+               OpMemberName %type_Primitive 0 "Primitive_LocalToWorld"
+               OpMemberName %type_Primitive 1 "Primitive_InvNonUniformScaleAndDeterminantSign"
+               OpMemberName %type_Primitive 2 "Primitive_ObjectWorldPositionAndRadius"
+               OpMemberName %type_Primitive 3 "Primitive_WorldToLocal"
+               OpMemberName %type_Primitive 4 "Primitive_PreviousLocalToWorld"
+               OpMemberName %type_Primitive 5 "Primitive_PreviousWorldToLocal"
+               OpMemberName %type_Primitive 6 "Primitive_ActorWorldPosition"
+               OpMemberName %type_Primitive 7 "Primitive_UseSingleSampleShadowFromStationaryLights"
+               OpMemberName %type_Primitive 8 "Primitive_ObjectBounds"
+               OpMemberName %type_Primitive 9 "Primitive_LpvBiasMultiplier"
+               OpMemberName %type_Primitive 10 "Primitive_DecalReceiverMask"
+               OpMemberName %type_Primitive 11 "Primitive_PerObjectGBufferData"
+               OpMemberName %type_Primitive 12 "Primitive_UseVolumetricLightmapShadowFromStationaryLights"
+               OpMemberName %type_Primitive 13 "Primitive_DrawsVelocity"
+               OpMemberName %type_Primitive 14 "Primitive_ObjectOrientation"
+               OpMemberName %type_Primitive 15 "Primitive_NonUniformScale"
+               OpMemberName %type_Primitive 16 "Primitive_LocalObjectBoundsMin"
+               OpMemberName %type_Primitive 17 "Primitive_LightingChannelMask"
+               OpMemberName %type_Primitive 18 "Primitive_LocalObjectBoundsMax"
+               OpMemberName %type_Primitive 19 "Primitive_LightmapDataIndex"
+               OpMemberName %type_Primitive 20 "Primitive_PreSkinnedLocalBounds"
+               OpMemberName %type_Primitive 21 "Primitive_SingleCaptureIndex"
+               OpMemberName %type_Primitive 22 "Primitive_OutputVelocity"
+               OpMemberName %type_Primitive 23 "PrePadding_Primitive_420"
+               OpMemberName %type_Primitive 24 "PrePadding_Primitive_424"
+               OpMemberName %type_Primitive 25 "PrePadding_Primitive_428"
+               OpMemberName %type_Primitive 26 "Primitive_CustomPrimitiveData"
+               OpName %Primitive "Primitive"
+               OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid"
+               OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid"
+               OpName %in_var_VS_To_DS_Position "in.var.VS_To_DS_Position"
+               OpName %in_var_VS_To_DS_VertexID "in.var.VS_To_DS_VertexID"
+               OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid"
+               OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid"
+               OpName %out_var_VS_To_DS_Position "out.var.VS_To_DS_Position"
+               OpName %out_var_VS_To_DS_VertexID "out.var.VS_To_DS_VertexID"
+               OpName %out_var_PN_POSITION "out.var.PN_POSITION"
+               OpName %out_var_PN_DisplacementScales "out.var.PN_DisplacementScales"
+               OpName %out_var_PN_TessellationMultiplier "out.var.PN_TessellationMultiplier"
+               OpName %out_var_PN_WorldDisplacementMultiplier "out.var.PN_WorldDisplacementMultiplier"
+               OpName %out_var_PN_DominantVertex "out.var.PN_DominantVertex"
+               OpName %out_var_PN_DominantVertex1 "out.var.PN_DominantVertex1"
+               OpName %out_var_PN_DominantVertex2 "out.var.PN_DominantVertex2"
+               OpName %out_var_PN_DominantEdge "out.var.PN_DominantEdge"
+               OpName %out_var_PN_DominantEdge1 "out.var.PN_DominantEdge1"
+               OpName %out_var_PN_DominantEdge2 "out.var.PN_DominantEdge2"
+               OpName %out_var_PN_DominantEdge3 "out.var.PN_DominantEdge3"
+               OpName %out_var_PN_DominantEdge4 "out.var.PN_DominantEdge4"
+               OpName %out_var_PN_DominantEdge5 "out.var.PN_DominantEdge5"
+               OpName %out_var_PN_POSITION9 "out.var.PN_POSITION9"
+               OpName %MainHull "MainHull"
+               OpName %param_var_I "param.var.I"
+               OpName %temp_var_hullMainRetVal "temp.var.hullMainRetVal"
+               OpName %if_merge "if.merge"
+               OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %in_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position"
+               OpDecorateString %in_var_VS_To_DS_VertexID UserSemantic "VS_To_DS_VertexID"
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorateString %gl_InvocationID UserSemantic "SV_OutputControlPointID"
+               OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %out_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position"
+               OpDecorateString %out_var_VS_To_DS_VertexID UserSemantic "VS_To_DS_VertexID"
+               OpDecorateString %out_var_PN_POSITION UserSemantic "PN_POSITION"
+               OpDecorateString %out_var_PN_DisplacementScales UserSemantic "PN_DisplacementScales"
+               OpDecorateString %out_var_PN_TessellationMultiplier UserSemantic "PN_TessellationMultiplier"
+               OpDecorateString %out_var_PN_WorldDisplacementMultiplier UserSemantic "PN_WorldDisplacementMultiplier"
+               OpDecorateString %out_var_PN_DominantVertex UserSemantic "PN_DominantVertex"
+               OpDecorateString %out_var_PN_DominantVertex1 UserSemantic "PN_DominantVertex"
+               OpDecorateString %out_var_PN_DominantVertex2 UserSemantic "PN_DominantVertex"
+               OpDecorateString %out_var_PN_DominantEdge UserSemantic "PN_DominantEdge"
+               OpDecorateString %out_var_PN_DominantEdge1 UserSemantic "PN_DominantEdge"
+               OpDecorateString %out_var_PN_DominantEdge2 UserSemantic "PN_DominantEdge"
+               OpDecorateString %out_var_PN_DominantEdge3 UserSemantic "PN_DominantEdge"
+               OpDecorateString %out_var_PN_DominantEdge4 UserSemantic "PN_DominantEdge"
+               OpDecorateString %out_var_PN_DominantEdge5 UserSemantic "PN_DominantEdge"
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+               OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor"
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor"
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorateString %out_var_PN_POSITION9 UserSemantic "PN_POSITION9"
+               OpDecorate %out_var_PN_POSITION9 Patch
+               OpDecorate %in_var_TEXCOORD10_centroid Location 0
+               OpDecorate %in_var_TEXCOORD11_centroid Location 1
+               OpDecorate %in_var_VS_To_DS_Position Location 2
+               OpDecorate %in_var_VS_To_DS_VertexID Location 3
+               OpDecorate %out_var_PN_DisplacementScales Location 0
+               OpDecorate %out_var_PN_DominantEdge Location 1
+               OpDecorate %out_var_PN_DominantEdge1 Location 2
+               OpDecorate %out_var_PN_DominantEdge2 Location 3
+               OpDecorate %out_var_PN_DominantEdge3 Location 4
+               OpDecorate %out_var_PN_DominantEdge4 Location 5
+               OpDecorate %out_var_PN_DominantEdge5 Location 6
+               OpDecorate %out_var_PN_DominantVertex Location 7
+               OpDecorate %out_var_PN_DominantVertex1 Location 8
+               OpDecorate %out_var_PN_DominantVertex2 Location 9
+               OpDecorate %out_var_PN_POSITION Location 10
+               OpDecorate %out_var_PN_POSITION9 Location 13
+               OpDecorate %out_var_PN_TessellationMultiplier Location 14
+               OpDecorate %out_var_PN_WorldDisplacementMultiplier Location 15
+               OpDecorate %out_var_TEXCOORD10_centroid Location 16
+               OpDecorate %out_var_TEXCOORD11_centroid Location 17
+               OpDecorate %out_var_VS_To_DS_Position Location 18
+               OpDecorate %out_var_VS_To_DS_VertexID Location 19
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 0
+               OpDecorate %Primitive DescriptorSet 0
+               OpDecorate %Primitive Binding 1
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 13 MatrixStride 16
+               OpMemberDecorate %type_View 13 ColMajor
+               OpMemberDecorate %type_View 14 Offset 896
+               OpMemberDecorate %type_View 15 Offset 908
+               OpMemberDecorate %type_View 16 Offset 912
+               OpMemberDecorate %type_View 17 Offset 924
+               OpMemberDecorate %type_View 18 Offset 928
+               OpMemberDecorate %type_View 19 Offset 940
+               OpMemberDecorate %type_View 20 Offset 944
+               OpMemberDecorate %type_View 21 Offset 956
+               OpMemberDecorate %type_View 22 Offset 960
+               OpMemberDecorate %type_View 23 Offset 972
+               OpMemberDecorate %type_View 24 Offset 976
+               OpMemberDecorate %type_View 25 Offset 992
+               OpMemberDecorate %type_View 26 Offset 1008
+               OpMemberDecorate %type_View 27 Offset 1020
+               OpMemberDecorate %type_View 28 Offset 1024
+               OpMemberDecorate %type_View 29 Offset 1036
+               OpMemberDecorate %type_View 30 Offset 1040
+               OpMemberDecorate %type_View 31 Offset 1052
+               OpMemberDecorate %type_View 32 Offset 1056
+               OpMemberDecorate %type_View 33 Offset 1068
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 43 MatrixStride 16
+               OpMemberDecorate %type_View 43 ColMajor
+               OpMemberDecorate %type_View 44 Offset 1712
+               OpMemberDecorate %type_View 45 Offset 1724
+               OpMemberDecorate %type_View 46 Offset 1728
+               OpMemberDecorate %type_View 47 Offset 1740
+               OpMemberDecorate %type_View 48 Offset 1744
+               OpMemberDecorate %type_View 49 Offset 1756
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 52 MatrixStride 16
+               OpMemberDecorate %type_View 52 ColMajor
+               OpMemberDecorate %type_View 53 Offset 1952
+               OpMemberDecorate %type_View 54 Offset 1968
+               OpMemberDecorate %type_View 55 Offset 1984
+               OpMemberDecorate %type_View 56 Offset 1992
+               OpMemberDecorate %type_View 57 Offset 2000
+               OpMemberDecorate %type_View 58 Offset 2016
+               OpMemberDecorate %type_View 59 Offset 2032
+               OpMemberDecorate %type_View 60 Offset 2048
+               OpMemberDecorate %type_View 61 Offset 2064
+               OpMemberDecorate %type_View 62 Offset 2068
+               OpMemberDecorate %type_View 63 Offset 2072
+               OpMemberDecorate %type_View 64 Offset 2076
+               OpMemberDecorate %type_View 65 Offset 2080
+               OpMemberDecorate %type_View 66 Offset 2096
+               OpMemberDecorate %type_View 67 Offset 2112
+               OpMemberDecorate %type_View 68 Offset 2128
+               OpMemberDecorate %type_View 69 Offset 2136
+               OpMemberDecorate %type_View 70 Offset 2140
+               OpMemberDecorate %type_View 71 Offset 2144
+               OpMemberDecorate %type_View 72 Offset 2148
+               OpMemberDecorate %type_View 73 Offset 2152
+               OpMemberDecorate %type_View 74 Offset 2156
+               OpMemberDecorate %type_View 75 Offset 2160
+               OpMemberDecorate %type_View 76 Offset 2172
+               OpMemberDecorate %type_View 77 Offset 2176
+               OpMemberDecorate %type_View 78 Offset 2180
+               OpMemberDecorate %type_View 79 Offset 2184
+               OpMemberDecorate %type_View 80 Offset 2188
+               OpMemberDecorate %type_View 81 Offset 2192
+               OpMemberDecorate %type_View 82 Offset 2196
+               OpMemberDecorate %type_View 83 Offset 2200
+               OpMemberDecorate %type_View 84 Offset 2204
+               OpMemberDecorate %type_View 85 Offset 2208
+               OpMemberDecorate %type_View 86 Offset 2212
+               OpMemberDecorate %type_View 87 Offset 2216
+               OpMemberDecorate %type_View 88 Offset 2220
+               OpMemberDecorate %type_View 89 Offset 2224
+               OpMemberDecorate %type_View 90 Offset 2228
+               OpMemberDecorate %type_View 91 Offset 2232
+               OpMemberDecorate %type_View 92 Offset 2236
+               OpMemberDecorate %type_View 93 Offset 2240
+               OpMemberDecorate %type_View 94 Offset 2256
+               OpMemberDecorate %type_View 95 Offset 2268
+               OpMemberDecorate %type_View 96 Offset 2272
+               OpMemberDecorate %type_View 97 Offset 2304
+               OpMemberDecorate %type_View 98 Offset 2336
+               OpMemberDecorate %type_View 99 Offset 2352
+               OpMemberDecorate %type_View 100 Offset 2368
+               OpMemberDecorate %type_View 101 Offset 2372
+               OpMemberDecorate %type_View 102 Offset 2376
+               OpMemberDecorate %type_View 103 Offset 2380
+               OpMemberDecorate %type_View 104 Offset 2384
+               OpMemberDecorate %type_View 105 Offset 2388
+               OpMemberDecorate %type_View 106 Offset 2392
+               OpMemberDecorate %type_View 107 Offset 2396
+               OpMemberDecorate %type_View 108 Offset 2400
+               OpMemberDecorate %type_View 109 Offset 2404
+               OpMemberDecorate %type_View 110 Offset 2408
+               OpMemberDecorate %type_View 111 Offset 2412
+               OpMemberDecorate %type_View 112 Offset 2416
+               OpMemberDecorate %type_View 113 Offset 2428
+               OpMemberDecorate %type_View 114 Offset 2432
+               OpMemberDecorate %type_View 115 Offset 2444
+               OpMemberDecorate %type_View 116 Offset 2448
+               OpMemberDecorate %type_View 117 Offset 2452
+               OpMemberDecorate %type_View 118 Offset 2456
+               OpMemberDecorate %type_View 119 Offset 2460
+               OpMemberDecorate %type_View 120 Offset 2464
+               OpMemberDecorate %type_View 121 Offset 2468
+               OpMemberDecorate %type_View 122 Offset 2472
+               OpMemberDecorate %type_View 123 Offset 2476
+               OpMemberDecorate %type_View 124 Offset 2480
+               OpMemberDecorate %type_View 125 Offset 2484
+               OpMemberDecorate %type_View 126 Offset 2488
+               OpMemberDecorate %type_View 127 Offset 2492
+               OpMemberDecorate %type_View 128 Offset 2496
+               OpMemberDecorate %type_View 129 Offset 2512
+               OpMemberDecorate %type_View 130 Offset 2516
+               OpMemberDecorate %type_View 131 Offset 2520
+               OpMemberDecorate %type_View 132 Offset 2524
+               OpMemberDecorate %type_View 133 Offset 2528
+               OpMemberDecorate %type_View 134 Offset 2544
+               OpMemberDecorate %type_View 135 Offset 2556
+               OpMemberDecorate %type_View 136 Offset 2560
+               OpMemberDecorate %type_View 137 Offset 2576
+               OpMemberDecorate %type_View 138 Offset 2580
+               OpMemberDecorate %type_View 139 Offset 2584
+               OpMemberDecorate %type_View 140 Offset 2588
+               OpMemberDecorate %type_View 141 Offset 2592
+               OpMemberDecorate %type_View 142 Offset 2608
+               OpMemberDecorate %type_View 143 Offset 2720
+               OpMemberDecorate %type_View 144 Offset 2724
+               OpMemberDecorate %type_View 145 Offset 2728
+               OpMemberDecorate %type_View 146 Offset 2732
+               OpMemberDecorate %type_View 147 Offset 2736
+               OpMemberDecorate %type_View 148 Offset 2740
+               OpMemberDecorate %type_View 149 Offset 2744
+               OpMemberDecorate %type_View 150 Offset 2748
+               OpMemberDecorate %type_View 151 Offset 2752
+               OpMemberDecorate %type_View 152 Offset 2764
+               OpMemberDecorate %type_View 153 Offset 2768
+               OpMemberDecorate %type_View 154 Offset 2832
+               OpMemberDecorate %type_View 155 Offset 2896
+               OpMemberDecorate %type_View 156 Offset 2900
+               OpMemberDecorate %type_View 157 Offset 2904
+               OpMemberDecorate %type_View 158 Offset 2908
+               OpMemberDecorate %type_View 159 Offset 2912
+               OpMemberDecorate %type_View 160 Offset 2920
+               OpMemberDecorate %type_View 161 Offset 2924
+               OpMemberDecorate %type_View 162 Offset 2928
+               OpMemberDecorate %type_View 163 Offset 2940
+               OpMemberDecorate %type_View 164 Offset 2944
+               OpMemberDecorate %type_View 165 Offset 2956
+               OpMemberDecorate %type_View 166 Offset 2960
+               OpMemberDecorate %type_View 167 Offset 2968
+               OpMemberDecorate %type_View 168 Offset 2972
+               OpMemberDecorate %type_View 169 Offset 2976
+               OpMemberDecorate %type_View 170 Offset 2988
+               OpMemberDecorate %type_View 171 Offset 2992
+               OpMemberDecorate %type_View 172 Offset 3004
+               OpMemberDecorate %type_View 173 Offset 3008
+               OpMemberDecorate %type_View 174 Offset 3020
+               OpMemberDecorate %type_View 175 Offset 3024
+               OpMemberDecorate %type_View 176 Offset 3036
+               OpMemberDecorate %type_View 177 Offset 3040
+               OpMemberDecorate %type_View 178 Offset 3044
+               OpDecorate %type_View Block
+               OpMemberDecorate %type_Primitive 0 Offset 0
+               OpMemberDecorate %type_Primitive 0 MatrixStride 16
+               OpMemberDecorate %type_Primitive 0 ColMajor
+               OpMemberDecorate %type_Primitive 1 Offset 64
+               OpMemberDecorate %type_Primitive 2 Offset 80
+               OpMemberDecorate %type_Primitive 3 Offset 96
+               OpMemberDecorate %type_Primitive 3 MatrixStride 16
+               OpMemberDecorate %type_Primitive 3 ColMajor
+               OpMemberDecorate %type_Primitive 4 Offset 160
+               OpMemberDecorate %type_Primitive 4 MatrixStride 16
+               OpMemberDecorate %type_Primitive 4 ColMajor
+               OpMemberDecorate %type_Primitive 5 Offset 224
+               OpMemberDecorate %type_Primitive 5 MatrixStride 16
+               OpMemberDecorate %type_Primitive 5 ColMajor
+               OpMemberDecorate %type_Primitive 6 Offset 288
+               OpMemberDecorate %type_Primitive 7 Offset 300
+               OpMemberDecorate %type_Primitive 8 Offset 304
+               OpMemberDecorate %type_Primitive 9 Offset 316
+               OpMemberDecorate %type_Primitive 10 Offset 320
+               OpMemberDecorate %type_Primitive 11 Offset 324
+               OpMemberDecorate %type_Primitive 12 Offset 328
+               OpMemberDecorate %type_Primitive 13 Offset 332
+               OpMemberDecorate %type_Primitive 14 Offset 336
+               OpMemberDecorate %type_Primitive 15 Offset 352
+               OpMemberDecorate %type_Primitive 16 Offset 368
+               OpMemberDecorate %type_Primitive 17 Offset 380
+               OpMemberDecorate %type_Primitive 18 Offset 384
+               OpMemberDecorate %type_Primitive 19 Offset 396
+               OpMemberDecorate %type_Primitive 20 Offset 400
+               OpMemberDecorate %type_Primitive 21 Offset 412
+               OpMemberDecorate %type_Primitive 22 Offset 416
+               OpMemberDecorate %type_Primitive 23 Offset 420
+               OpMemberDecorate %type_Primitive 24 Offset 424
+               OpMemberDecorate %type_Primitive 25 Offset 428
+               OpMemberDecorate %type_Primitive 26 Offset 432
+               OpDecorate %type_Primitive Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+      %v2int = OpTypeVector %int 2
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_3 = OpConstant %uint 3
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+      %int_1 = OpConstant %int 1
+      %int_0 = OpConstant %int 0
+      %int_2 = OpConstant %int 2
+    %float_2 = OpConstant %float 2
+         %63 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+  %float_0_5 = OpConstant %float 0.5
+      %int_3 = OpConstant %int 3
+%float_0_333000004 = OpConstant %float 0.333000004
+    %float_1 = OpConstant %float 1
+         %68 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+   %float_15 = OpConstant %float 15
+         %70 = OpConstantComposite %v4float %float_15 %float_15 %float_15 %float_15
+%FVertexFactoryInterpolantsVSToPS = OpTypeStruct %v4float %v4float
+%FVertexFactoryInterpolantsVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToPS
+%FHitProxyVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToDS %v4float %uint
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%FHullShaderConstantDominantVertexData = OpTypeStruct %v2float %v4float %v3float
+%FHullShaderConstantDominantEdgeData = OpTypeStruct %v2float %v2float %v4float %v4float %v3float %v3float
+%FPNTessellationHSToDS = OpTypeStruct %FHitProxyVSToDS %_arr_v4float_uint_3 %v3float %float %float %FHullShaderConstantDominantVertexData %FHullShaderConstantDominantEdgeData
+     %uint_9 = OpConstant %uint 9
+      %v3int = OpTypeVector %int 3
+         %74 = OpConstantComposite %v3int %int_0 %int_0 %int_0
+         %75 = OpConstantComposite %v3int %int_3 %int_3 %int_3
+    %float_0 = OpConstant %float 0
+         %77 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+         %78 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5
+     %int_78 = OpConstant %int 78
+     %int_15 = OpConstant %int 15
+      %int_7 = OpConstant %int 7
+     %int_28 = OpConstant %int 28
+         %83 = OpConstantComposite %v3int %int_1 %int_1 %int_1
+         %84 = OpConstantComposite %v3int %int_2 %int_2 %int_2
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%type_Primitive = OpTypeStruct %mat4v4float %v4float %v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %float %float %float %float %v4float %v4float %v3float %uint %v3float %uint %v3float %int %uint %uint %uint %uint %_arr_v4float_uint_4
+%_ptr_Uniform_type_Primitive = OpTypePointer Uniform %type_Primitive
+    %uint_12 = OpConstant %uint 12
+%_arr_v4float_uint_12 = OpTypeArray %v4float %uint_12
+%_ptr_Input__arr_v4float_uint_12 = OpTypePointer Input %_arr_v4float_uint_12
+%_arr_uint_uint_12 = OpTypeArray %uint %uint_12
+%_ptr_Input__arr_uint_uint_12 = OpTypePointer Input %_arr_uint_uint_12
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3
+%_arr_uint_uint_3 = OpTypeArray %uint %uint_3
+%_ptr_Output__arr_uint_uint_3 = OpTypePointer Output %_arr_uint_uint_3
+%_arr__arr_v4float_uint_3_uint_3 = OpTypeArray %_arr_v4float_uint_3 %uint_3
+%_ptr_Output__arr__arr_v4float_uint_3_uint_3 = OpTypePointer Output %_arr__arr_v4float_uint_3_uint_3
+%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3
+%_ptr_Output__arr_v3float_uint_3 = OpTypePointer Output %_arr_v3float_uint_3
+%_ptr_Output__arr_float_uint_3 = OpTypePointer Output %_arr_float_uint_3
+%_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3
+%_ptr_Output__arr_v2float_uint_3 = OpTypePointer Output %_arr_v2float_uint_3
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+        %109 = OpTypeFunction %void
+%_arr_FHitProxyVSToDS_uint_12 = OpTypeArray %FHitProxyVSToDS %uint_12
+%_ptr_Function__arr_FHitProxyVSToDS_uint_12 = OpTypePointer Function %_arr_FHitProxyVSToDS_uint_12
+%_arr_FPNTessellationHSToDS_uint_3 = OpTypeArray %FPNTessellationHSToDS %uint_3
+%_ptr_Function__arr_FPNTessellationHSToDS_uint_3 = OpTypePointer Function %_arr_FPNTessellationHSToDS_uint_3
+%_ptr_Workgroup__arr_FPNTessellationHSToDS_uint_3 = OpTypePointer Workgroup %_arr_FPNTessellationHSToDS_uint_3
+%_ptr_Output_uint = OpTypePointer Output %uint
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+%_ptr_Output_float = OpTypePointer Output %float
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+%_ptr_Function_FPNTessellationHSToDS = OpTypePointer Function %FPNTessellationHSToDS
+%_ptr_Workgroup_FPNTessellationHSToDS = OpTypePointer Workgroup %FPNTessellationHSToDS
+       %bool = OpTypeBool
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%mat3v3float = OpTypeMatrix %v3float 3
+%_ptr_Function_FVertexFactoryInterpolantsVSToDS = OpTypePointer Function %FVertexFactoryInterpolantsVSToDS
+%_ptr_Function_FHitProxyVSToDS = OpTypePointer Function %FHitProxyVSToDS
+     %v3bool = OpTypeVector %bool 3
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+  %Primitive = OpVariable %_ptr_Uniform_type_Primitive Uniform
+%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_12 Input
+%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_12 Input
+%in_var_VS_To_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_12 Input
+%in_var_VS_To_DS_VertexID = OpVariable %_ptr_Input__arr_uint_uint_12 Input
+%gl_InvocationID = OpVariable %_ptr_Input_uint Input
+%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_VS_To_DS_Position = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_VS_To_DS_VertexID = OpVariable %_ptr_Output__arr_uint_uint_3 Output
+%out_var_PN_POSITION = OpVariable %_ptr_Output__arr__arr_v4float_uint_3_uint_3 Output
+%out_var_PN_DisplacementScales = OpVariable %_ptr_Output__arr_v3float_uint_3 Output
+%out_var_PN_TessellationMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output
+%out_var_PN_WorldDisplacementMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output
+%out_var_PN_DominantVertex = OpVariable %_ptr_Output__arr_v2float_uint_3 Output
+%out_var_PN_DominantVertex1 = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_PN_DominantVertex2 = OpVariable %_ptr_Output__arr_v3float_uint_3 Output
+%out_var_PN_DominantEdge = OpVariable %_ptr_Output__arr_v2float_uint_3 Output
+%out_var_PN_DominantEdge1 = OpVariable %_ptr_Output__arr_v2float_uint_3 Output
+%out_var_PN_DominantEdge2 = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_PN_DominantEdge3 = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_PN_DominantEdge4 = OpVariable %_ptr_Output__arr_v3float_uint_3 Output
+%out_var_PN_DominantEdge5 = OpVariable %_ptr_Output__arr_v3float_uint_3 Output
+%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output
+%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output
+%out_var_PN_POSITION9 = OpVariable %_ptr_Output_v4float Output
+        %130 = OpConstantNull %v2float
+%float_0_333333343 = OpConstant %float 0.333333343
+        %132 = OpConstantComposite %v4float %float_0_333333343 %float_0_333333343 %float_0_333333343 %float_0_333333343
+        %133 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5
+%float_0_166666672 = OpConstant %float 0.166666672
+        %135 = OpConstantComposite %v4float %float_0_166666672 %float_0_166666672 %float_0_166666672 %float_0_166666672
+        %136 = OpUndef %v4float
+
+; XXX: Original asm used Function here, which is wrong.
+; This patches the SPIR-V to be correct.
+%temp_var_hullMainRetVal = OpVariable %_ptr_Workgroup__arr_FPNTessellationHSToDS_uint_3 Workgroup
+
+   %MainHull = OpFunction %void None %109
+        %137 = OpLabel
+%param_var_I = OpVariable %_ptr_Function__arr_FHitProxyVSToDS_uint_12 Function
+        %138 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD10_centroid
+        %139 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD11_centroid
+        %140 = OpCompositeExtract %v4float %138 0
+        %141 = OpCompositeExtract %v4float %139 0
+        %142 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %140 %141
+        %143 = OpCompositeExtract %v4float %138 1
+        %144 = OpCompositeExtract %v4float %139 1
+        %145 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %143 %144
+        %146 = OpCompositeExtract %v4float %138 2
+        %147 = OpCompositeExtract %v4float %139 2
+        %148 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %146 %147
+        %149 = OpCompositeExtract %v4float %138 3
+        %150 = OpCompositeExtract %v4float %139 3
+        %151 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %149 %150
+        %152 = OpCompositeExtract %v4float %138 4
+        %153 = OpCompositeExtract %v4float %139 4
+        %154 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %152 %153
+        %155 = OpCompositeExtract %v4float %138 5
+        %156 = OpCompositeExtract %v4float %139 5
+        %157 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %155 %156
+        %158 = OpCompositeExtract %v4float %138 6
+        %159 = OpCompositeExtract %v4float %139 6
+        %160 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %158 %159
+        %161 = OpCompositeExtract %v4float %138 7
+        %162 = OpCompositeExtract %v4float %139 7
+        %163 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %161 %162
+        %164 = OpCompositeExtract %v4float %138 8
+        %165 = OpCompositeExtract %v4float %139 8
+        %166 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %164 %165
+        %167 = OpCompositeExtract %v4float %138 9
+        %168 = OpCompositeExtract %v4float %139 9
+        %169 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %167 %168
+        %170 = OpCompositeExtract %v4float %138 10
+        %171 = OpCompositeExtract %v4float %139 10
+        %172 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %170 %171
+        %173 = OpCompositeExtract %v4float %138 11
+        %174 = OpCompositeExtract %v4float %139 11
+        %175 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %173 %174
+        %176 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %142
+        %177 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %145
+        %178 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %148
+        %179 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %151
+        %180 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %154
+        %181 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %157
+        %182 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %160
+        %183 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %163
+        %184 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %166
+        %185 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %169
+        %186 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %172
+        %187 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %175
+        %188 = OpLoad %_arr_v4float_uint_12 %in_var_VS_To_DS_Position
+        %189 = OpLoad %_arr_uint_uint_12 %in_var_VS_To_DS_VertexID
+        %190 = OpCompositeExtract %v4float %188 0
+        %191 = OpCompositeExtract %uint %189 0
+        %192 = OpCompositeConstruct %FHitProxyVSToDS %176 %190 %191
+        %193 = OpCompositeExtract %v4float %188 1
+        %194 = OpCompositeExtract %uint %189 1
+        %195 = OpCompositeConstruct %FHitProxyVSToDS %177 %193 %194
+        %196 = OpCompositeExtract %v4float %188 2
+        %197 = OpCompositeExtract %uint %189 2
+        %198 = OpCompositeConstruct %FHitProxyVSToDS %178 %196 %197
+        %199 = OpCompositeExtract %v4float %188 3
+        %200 = OpCompositeExtract %uint %189 3
+        %201 = OpCompositeConstruct %FHitProxyVSToDS %179 %199 %200
+        %202 = OpCompositeExtract %v4float %188 4
+        %203 = OpCompositeExtract %uint %189 4
+        %204 = OpCompositeConstruct %FHitProxyVSToDS %180 %202 %203
+        %205 = OpCompositeExtract %v4float %188 5
+        %206 = OpCompositeExtract %uint %189 5
+        %207 = OpCompositeConstruct %FHitProxyVSToDS %181 %205 %206
+        %208 = OpCompositeExtract %v4float %188 6
+        %209 = OpCompositeExtract %uint %189 6
+        %210 = OpCompositeConstruct %FHitProxyVSToDS %182 %208 %209
+        %211 = OpCompositeExtract %v4float %188 7
+        %212 = OpCompositeExtract %uint %189 7
+        %213 = OpCompositeConstruct %FHitProxyVSToDS %183 %211 %212
+        %214 = OpCompositeExtract %v4float %188 8
+        %215 = OpCompositeExtract %uint %189 8
+        %216 = OpCompositeConstruct %FHitProxyVSToDS %184 %214 %215
+        %217 = OpCompositeExtract %v4float %188 9
+        %218 = OpCompositeExtract %uint %189 9
+        %219 = OpCompositeConstruct %FHitProxyVSToDS %185 %217 %218
+        %220 = OpCompositeExtract %v4float %188 10
+        %221 = OpCompositeExtract %uint %189 10
+        %222 = OpCompositeConstruct %FHitProxyVSToDS %186 %220 %221
+        %223 = OpCompositeExtract %v4float %188 11
+        %224 = OpCompositeExtract %uint %189 11
+        %225 = OpCompositeConstruct %FHitProxyVSToDS %187 %223 %224
+        %226 = OpCompositeConstruct %_arr_FHitProxyVSToDS_uint_12 %192 %195 %198 %201 %204 %207 %210 %213 %216 %219 %222 %225
+               OpStore %param_var_I %226
+        %227 = OpLoad %uint %gl_InvocationID
+        %228 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %227 %int_0
+        %229 = OpLoad %FVertexFactoryInterpolantsVSToDS %228
+        %230 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %229 0
+        %231 = OpCompositeExtract %v4float %230 0
+        %232 = OpCompositeExtract %v4float %230 1
+        %233 = OpVectorShuffle %v3float %231 %231 0 1 2
+        %234 = OpVectorShuffle %v3float %232 %232 0 1 2
+        %235 = OpExtInst %v3float %1 Cross %234 %233
+        %236 = OpCompositeExtract %float %232 3
+        %237 = OpCompositeConstruct %v3float %236 %236 %236
+        %238 = OpFMul %v3float %235 %237
+        %239 = OpCompositeConstruct %mat3v3float %233 %238 %234
+        %240 = OpCompositeExtract %float %232 0
+        %241 = OpCompositeExtract %float %232 1
+        %242 = OpCompositeExtract %float %232 2
+        %243 = OpCompositeConstruct %v4float %240 %241 %242 %float_0
+        %244 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_15
+        %245 = OpLoad %v4float %244
+        %246 = OpVectorShuffle %v3float %245 %245 0 1 2
+        %247 = OpVectorTimesMatrix %v3float %246 %239
+        %248 = OpULessThan %bool %227 %uint_2
+        %249 = OpIAdd %uint %227 %uint_1
+        %250 = OpSelect %uint %248 %249 %uint_0
+        %251 = OpIMul %uint %uint_2 %227
+        %252 = OpIAdd %uint %uint_3 %251
+        %253 = OpIAdd %uint %251 %uint_4
+        %254 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %227
+        %255 = OpLoad %FHitProxyVSToDS %254
+        %256 = OpAccessChain %_ptr_Function_v4float %param_var_I %227 %int_1
+        %257 = OpLoad %v4float %256
+        %258 = OpULessThan %bool %250 %uint_2
+        %259 = OpIAdd %uint %250 %uint_1
+        %260 = OpSelect %uint %258 %259 %uint_0
+        %261 = OpIMul %uint %uint_2 %250
+        %262 = OpIAdd %uint %uint_3 %261
+        %263 = OpIAdd %uint %261 %uint_4
+        %264 = OpIAdd %uint %uint_9 %227
+        %265 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %264
+        %266 = OpLoad %FHitProxyVSToDS %265
+        %267 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %266 0
+        %268 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %267 0
+        %269 = OpCompositeExtract %v4float %268 0
+        %270 = OpCompositeExtract %v4float %268 1
+        %271 = OpVectorShuffle %v3float %269 %269 0 1 2
+        %272 = OpCompositeExtract %float %270 0
+        %273 = OpCompositeExtract %float %270 1
+        %274 = OpCompositeExtract %float %270 2
+        %275 = OpCompositeConstruct %v4float %272 %273 %274 %float_0
+        %276 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %250
+        %277 = OpLoad %FHitProxyVSToDS %276
+        %278 = OpCompositeExtract %uint %277 2
+        %279 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %260
+        %280 = OpLoad %FHitProxyVSToDS %279
+        %281 = OpCompositeExtract %uint %280 2
+        %282 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %262
+        %283 = OpLoad %FHitProxyVSToDS %282
+        %284 = OpCompositeExtract %uint %283 2
+        %285 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %263
+        %286 = OpLoad %FHitProxyVSToDS %285
+        %287 = OpCompositeExtract %uint %286 2
+        %288 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %277 0
+        %289 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %288 0
+        %290 = OpCompositeExtract %v4float %289 0
+        %291 = OpCompositeExtract %v4float %289 1
+        %292 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %280 0
+        %293 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %292 0
+        %294 = OpCompositeExtract %v4float %293 0
+        %295 = OpCompositeExtract %v4float %293 1
+        %296 = OpULessThan %bool %284 %278
+        %297 = OpIEqual %bool %284 %278
+        %298 = OpULessThan %bool %287 %281
+        %299 = OpLogicalAnd %bool %297 %298
+        %300 = OpLogicalOr %bool %296 %299
+               OpSelectionMerge %301 None
+               OpBranchConditional %300 %302 %301
+        %302 = OpLabel
+        %303 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %283 0
+        %304 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %303 0
+        %305 = OpCompositeExtract %v4float %304 0
+        %306 = OpCompositeExtract %v4float %304 1
+        %307 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %286 0
+        %308 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %307 0
+        %309 = OpCompositeExtract %v4float %308 0
+        %310 = OpCompositeExtract %v4float %308 1
+               OpBranch %301
+        %301 = OpLabel
+        %311 = OpPhi %v4float %294 %137 %309 %302
+        %312 = OpPhi %v4float %295 %137 %310 %302
+        %313 = OpPhi %v4float %290 %137 %305 %302
+        %314 = OpPhi %v4float %291 %137 %306 %302
+        %315 = OpVectorShuffle %v3float %313 %313 0 1 2
+        %316 = OpVectorShuffle %v3float %311 %311 0 1 2
+        %317 = OpCompositeExtract %float %314 0
+        %318 = OpCompositeExtract %float %314 1
+        %319 = OpCompositeExtract %float %314 2
+        %320 = OpCompositeConstruct %v4float %317 %318 %319 %float_0
+        %321 = OpCompositeExtract %float %312 0
+        %322 = OpCompositeExtract %float %312 1
+        %323 = OpCompositeExtract %float %312 2
+        %324 = OpCompositeConstruct %v4float %321 %322 %323 %float_0
+        %325 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %250 %int_0
+        %326 = OpLoad %FVertexFactoryInterpolantsVSToDS %325
+        %327 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %326 0
+        %328 = OpCompositeExtract %v4float %327 1
+        %329 = OpCompositeExtract %float %328 0
+        %330 = OpCompositeExtract %float %328 1
+        %331 = OpCompositeExtract %float %328 2
+        %332 = OpCompositeConstruct %v4float %329 %330 %331 %float_0
+        %333 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %252 %int_0
+        %334 = OpLoad %FVertexFactoryInterpolantsVSToDS %333
+        %335 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %334 0
+        %336 = OpCompositeExtract %v4float %335 1
+        %337 = OpCompositeExtract %float %336 0
+        %338 = OpCompositeExtract %float %336 1
+        %339 = OpCompositeExtract %float %336 2
+        %340 = OpCompositeConstruct %v4float %337 %338 %339 %float_0
+        %341 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %253 %int_0
+        %342 = OpLoad %FVertexFactoryInterpolantsVSToDS %341
+        %343 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %342 0
+        %344 = OpCompositeExtract %v4float %343 1
+        %345 = OpCompositeExtract %float %344 0
+        %346 = OpCompositeExtract %float %344 1
+        %347 = OpCompositeExtract %float %344 2
+        %348 = OpCompositeConstruct %v4float %345 %346 %347 %float_0
+        %349 = OpLoad %v4float %256
+        %350 = OpAccessChain %_ptr_Function_v4float %param_var_I %250 %int_1
+        %351 = OpLoad %v4float %350
+        %352 = OpFMul %v4float %63 %349
+        %353 = OpFAdd %v4float %352 %351
+        %354 = OpFSub %v4float %351 %349
+        %355 = OpDot %float %354 %243
+        %356 = OpCompositeConstruct %v4float %355 %355 %355 %355
+        %357 = OpFMul %v4float %356 %243
+        %358 = OpFSub %v4float %353 %357
+        %359 = OpFMul %v4float %358 %132
+        %360 = OpAccessChain %_ptr_Function_v4float %param_var_I %252 %int_1
+        %361 = OpLoad %v4float %360
+        %362 = OpAccessChain %_ptr_Function_v4float %param_var_I %253 %int_1
+        %363 = OpLoad %v4float %362
+        %364 = OpFMul %v4float %63 %361
+        %365 = OpFAdd %v4float %364 %363
+        %366 = OpFSub %v4float %363 %361
+        %367 = OpDot %float %366 %340
+        %368 = OpCompositeConstruct %v4float %367 %367 %367 %367
+        %369 = OpFMul %v4float %368 %340
+        %370 = OpFSub %v4float %365 %369
+        %371 = OpFMul %v4float %370 %132
+        %372 = OpFAdd %v4float %359 %371
+        %373 = OpFMul %v4float %372 %133
+        %374 = OpLoad %v4float %350
+        %375 = OpLoad %v4float %256
+        %376 = OpFMul %v4float %63 %374
+        %377 = OpFAdd %v4float %376 %375
+        %378 = OpFSub %v4float %375 %374
+        %379 = OpDot %float %378 %332
+        %380 = OpCompositeConstruct %v4float %379 %379 %379 %379
+        %381 = OpFMul %v4float %380 %332
+        %382 = OpFSub %v4float %377 %381
+        %383 = OpFMul %v4float %382 %132
+        %384 = OpLoad %v4float %362
+        %385 = OpLoad %v4float %360
+        %386 = OpFMul %v4float %63 %384
+        %387 = OpFAdd %v4float %386 %385
+        %388 = OpFSub %v4float %385 %384
+        %389 = OpDot %float %388 %348
+        %390 = OpCompositeConstruct %v4float %389 %389 %389 %389
+        %391 = OpFMul %v4float %390 %348
+        %392 = OpFSub %v4float %387 %391
+        %393 = OpFMul %v4float %392 %132
+        %394 = OpFAdd %v4float %383 %393
+        %395 = OpFMul %v4float %394 %133
+        %396 = OpCompositeConstruct %FHullShaderConstantDominantEdgeData %130 %130 %320 %324 %315 %316
+        %397 = OpCompositeConstruct %FHullShaderConstantDominantVertexData %130 %275 %271
+        %398 = OpCompositeConstruct %_arr_v4float_uint_3 %257 %373 %395
+        %399 = OpCompositeConstruct %FPNTessellationHSToDS %255 %398 %247 %float_1 %float_1 %397 %396
+        %400 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %255 0
+        %401 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %400 0
+        %402 = OpCompositeExtract %v4float %401 0
+        %403 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD10_centroid %227
+               OpStore %403 %402
+        %404 = OpCompositeExtract %v4float %401 1
+        %405 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD11_centroid %227
+               OpStore %405 %404
+        %406 = OpCompositeExtract %v4float %255 1
+        %407 = OpAccessChain %_ptr_Output_v4float %out_var_VS_To_DS_Position %227
+               OpStore %407 %406
+        %408 = OpCompositeExtract %uint %255 2
+        %409 = OpAccessChain %_ptr_Output_uint %out_var_VS_To_DS_VertexID %227
+               OpStore %409 %408
+        %410 = OpAccessChain %_ptr_Output__arr_v4float_uint_3 %out_var_PN_POSITION %227
+               OpStore %410 %398
+        %411 = OpAccessChain %_ptr_Output_v3float %out_var_PN_DisplacementScales %227
+               OpStore %411 %247
+        %412 = OpAccessChain %_ptr_Output_float %out_var_PN_TessellationMultiplier %227
+               OpStore %412 %float_1
+        %413 = OpAccessChain %_ptr_Output_float %out_var_PN_WorldDisplacementMultiplier %227
+               OpStore %413 %float_1
+        %414 = OpAccessChain %_ptr_Output_v2float %out_var_PN_DominantVertex %227
+               OpStore %414 %130
+        %415 = OpAccessChain %_ptr_Output_v4float %out_var_PN_DominantVertex1 %227
+               OpStore %415 %275
+        %416 = OpAccessChain %_ptr_Output_v3float %out_var_PN_DominantVertex2 %227
+               OpStore %416 %271
+        %417 = OpAccessChain %_ptr_Output_v2float %out_var_PN_DominantEdge %227
+               OpStore %417 %130
+        %418 = OpAccessChain %_ptr_Output_v2float %out_var_PN_DominantEdge1 %227
+               OpStore %418 %130
+        %419 = OpAccessChain %_ptr_Output_v4float %out_var_PN_DominantEdge2 %227
+               OpStore %419 %320
+        %420 = OpAccessChain %_ptr_Output_v4float %out_var_PN_DominantEdge3 %227
+               OpStore %420 %324
+        %421 = OpAccessChain %_ptr_Output_v3float %out_var_PN_DominantEdge4 %227
+               OpStore %421 %315
+        %422 = OpAccessChain %_ptr_Output_v3float %out_var_PN_DominantEdge5 %227
+               OpStore %422 %316
+        %423 = OpAccessChain %_ptr_Workgroup_FPNTessellationHSToDS %temp_var_hullMainRetVal %227
+               OpStore %423 %399
+               OpControlBarrier %uint_2 %uint_4 %uint_0
+        %424 = OpIEqual %bool %227 %uint_0
+               OpSelectionMerge %if_merge None
+               OpBranchConditional %424 %425 %if_merge
+        %425 = OpLabel
+        %426 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_0
+        %427 = OpLoad %mat4v4float %426
+        %428 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_7
+        %429 = OpLoad %mat4v4float %428
+        %430 = OpAccessChain %_ptr_Uniform_v3float %View %int_28
+        %431 = OpLoad %v3float %430
+        %432 = OpAccessChain %_ptr_Uniform_float %View %int_78
+        %433 = OpLoad %float %432
+        %434 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_0
+        %435 = OpLoad %v4float %434
+        %436 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_1
+        %437 = OpLoad %v4float %436
+        %438 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_2
+        %439 = OpLoad %v4float %438
+        %440 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_0
+        %441 = OpLoad %v4float %440
+        %442 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_1
+        %443 = OpLoad %v4float %442
+        %444 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_2
+        %445 = OpLoad %v4float %444
+        %446 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_0
+        %447 = OpLoad %v4float %446
+        %448 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_1
+        %449 = OpLoad %v4float %448
+        %450 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_2
+        %451 = OpLoad %v4float %450
+        %452 = OpFAdd %v4float %437 %439
+        %453 = OpFAdd %v4float %452 %443
+        %454 = OpFAdd %v4float %453 %445
+        %455 = OpFAdd %v4float %454 %449
+        %456 = OpFAdd %v4float %455 %451
+        %457 = OpFMul %v4float %456 %135
+        %458 = OpFAdd %v4float %447 %441
+        %459 = OpFAdd %v4float %458 %435
+        %460 = OpFMul %v4float %459 %132
+        %461 = OpFSub %v4float %457 %460
+        %462 = OpFMul %v4float %461 %133
+        %463 = OpFAdd %v4float %457 %462
+        %464 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_1 %int_3
+        %465 = OpLoad %float %464
+        %466 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_2 %int_3
+        %467 = OpLoad %float %466
+        %468 = OpFAdd %float %465 %467
+        %469 = OpFMul %float %float_0_5 %468
+        %470 = OpCompositeInsert %v4float %469 %136 0
+        %471 = OpLoad %float %466
+        %472 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_0 %int_3
+        %473 = OpLoad %float %472
+        %474 = OpFAdd %float %471 %473
+        %475 = OpFMul %float %float_0_5 %474
+        %476 = OpCompositeInsert %v4float %475 %470 1
+        %477 = OpLoad %float %472
+        %478 = OpLoad %float %464
+        %479 = OpFAdd %float %477 %478
+        %480 = OpFMul %float %float_0_5 %479
+        %481 = OpCompositeInsert %v4float %480 %476 2
+        %482 = OpLoad %float %472
+        %483 = OpLoad %float %464
+        %484 = OpFAdd %float %482 %483
+        %485 = OpLoad %float %466
+        %486 = OpFAdd %float %484 %485
+        %487 = OpFMul %float %float_0_333000004 %486
+        %488 = OpCompositeInsert %v4float %487 %481 3
+        %489 = OpVectorShuffle %v3float %435 %435 0 1 2
+        %490 = OpVectorShuffle %v3float %441 %441 0 1 2
+        %491 = OpVectorShuffle %v3float %447 %447 0 1 2
+               OpBranch %492
+        %492 = OpLabel
+               OpLoopMerge %493 %494 None
+               OpBranch %495
+        %495 = OpLabel
+        %496 = OpMatrixTimesVector %v4float %429 %77
+        %497 = OpCompositeExtract %float %435 0
+        %498 = OpCompositeExtract %float %435 1
+        %499 = OpCompositeExtract %float %435 2
+        %500 = OpCompositeConstruct %v4float %497 %498 %499 %float_1
+        %501 = OpMatrixTimesVector %v4float %427 %500
+        %502 = OpVectorShuffle %v3float %501 %501 0 1 2
+        %503 = OpVectorShuffle %v3float %496 %496 0 1 2
+        %504 = OpFSub %v3float %502 %503
+        %505 = OpCompositeExtract %float %501 3
+        %506 = OpCompositeExtract %float %496 3
+        %507 = OpFAdd %float %505 %506
+        %508 = OpCompositeConstruct %v3float %507 %507 %507
+        %509 = OpFOrdLessThan %v3bool %504 %508
+        %510 = OpSelect %v3int %509 %83 %74
+        %511 = OpFAdd %v3float %502 %503
+        %512 = OpFNegate %float %505
+        %513 = OpFSub %float %512 %506
+        %514 = OpCompositeConstruct %v3float %513 %513 %513
+        %515 = OpFOrdGreaterThan %v3bool %511 %514
+        %516 = OpSelect %v3int %515 %83 %74
+        %517 = OpIMul %v3int %84 %516
+        %518 = OpIAdd %v3int %510 %517
+        %519 = OpCompositeExtract %float %441 0
+        %520 = OpCompositeExtract %float %441 1
+        %521 = OpCompositeExtract %float %441 2
+        %522 = OpCompositeConstruct %v4float %519 %520 %521 %float_1
+        %523 = OpMatrixTimesVector %v4float %427 %522
+        %524 = OpVectorShuffle %v3float %523 %523 0 1 2
+        %525 = OpFSub %v3float %524 %503
+        %526 = OpCompositeExtract %float %523 3
+        %527 = OpFAdd %float %526 %506
+        %528 = OpCompositeConstruct %v3float %527 %527 %527
+        %529 = OpFOrdLessThan %v3bool %525 %528
+        %530 = OpSelect %v3int %529 %83 %74
+        %531 = OpFAdd %v3float %524 %503
+        %532 = OpFNegate %float %526
+        %533 = OpFSub %float %532 %506
+        %534 = OpCompositeConstruct %v3float %533 %533 %533
+        %535 = OpFOrdGreaterThan %v3bool %531 %534
+        %536 = OpSelect %v3int %535 %83 %74
+        %537 = OpIMul %v3int %84 %536
+        %538 = OpIAdd %v3int %530 %537
+        %539 = OpBitwiseOr %v3int %518 %538
+        %540 = OpCompositeExtract %float %447 0
+        %541 = OpCompositeExtract %float %447 1
+        %542 = OpCompositeExtract %float %447 2
+        %543 = OpCompositeConstruct %v4float %540 %541 %542 %float_1
+        %544 = OpMatrixTimesVector %v4float %427 %543
+        %545 = OpVectorShuffle %v3float %544 %544 0 1 2
+        %546 = OpFSub %v3float %545 %503
+        %547 = OpCompositeExtract %float %544 3
+        %548 = OpFAdd %float %547 %506
+        %549 = OpCompositeConstruct %v3float %548 %548 %548
+        %550 = OpFOrdLessThan %v3bool %546 %549
+        %551 = OpSelect %v3int %550 %83 %74
+        %552 = OpFAdd %v3float %545 %503
+        %553 = OpFNegate %float %547
+        %554 = OpFSub %float %553 %506
+        %555 = OpCompositeConstruct %v3float %554 %554 %554
+        %556 = OpFOrdGreaterThan %v3bool %552 %555
+        %557 = OpSelect %v3int %556 %83 %74
+        %558 = OpIMul %v3int %84 %557
+        %559 = OpIAdd %v3int %551 %558
+        %560 = OpBitwiseOr %v3int %539 %559
+        %561 = OpINotEqual %v3bool %560 %75
+        %562 = OpAny %bool %561
+               OpSelectionMerge %563 None
+               OpBranchConditional %562 %564 %563
+        %564 = OpLabel
+               OpBranch %493
+        %563 = OpLabel
+        %565 = OpFSub %v3float %489 %490
+        %566 = OpFSub %v3float %490 %491
+        %567 = OpFSub %v3float %491 %489
+        %568 = OpFAdd %v3float %489 %490
+        %569 = OpFMul %v3float %78 %568
+        %570 = OpFSub %v3float %569 %431
+        %571 = OpFAdd %v3float %490 %491
+        %572 = OpFMul %v3float %78 %571
+        %573 = OpFSub %v3float %572 %431
+        %574 = OpFAdd %v3float %491 %489
+        %575 = OpFMul %v3float %78 %574
+        %576 = OpFSub %v3float %575 %431
+        %577 = OpDot %float %566 %566
+        %578 = OpDot %float %573 %573
+        %579 = OpFDiv %float %577 %578
+        %580 = OpExtInst %float %1 Sqrt %579
+        %581 = OpDot %float %567 %567
+        %582 = OpDot %float %576 %576
+        %583 = OpFDiv %float %581 %582
+        %584 = OpExtInst %float %1 Sqrt %583
+        %585 = OpDot %float %565 %565
+        %586 = OpDot %float %570 %570
+        %587 = OpFDiv %float %585 %586
+        %588 = OpExtInst %float %1 Sqrt %587
+        %589 = OpCompositeConstruct %v4float %580 %584 %588 %float_1
+        %590 = OpFAdd %float %580 %584
+        %591 = OpFAdd %float %590 %588
+        %592 = OpFMul %float %float_0_333000004 %591
+        %593 = OpCompositeInsert %v4float %592 %589 3
+        %594 = OpCompositeConstruct %v4float %433 %433 %433 %433
+        %595 = OpFMul %v4float %594 %593
+               OpBranch %493
+        %494 = OpLabel
+               OpBranch %492
+        %493 = OpLabel
+        %596 = OpPhi %v4float %77 %564 %595 %563
+        %597 = OpFMul %v4float %488 %596
+        %598 = OpExtInst %v4float %1 FClamp %597 %68 %70
+        %599 = OpCompositeExtract %float %598 0
+        %600 = OpCompositeExtract %float %598 1
+        %601 = OpCompositeExtract %float %598 2
+        %602 = OpCompositeExtract %float %598 3
+        %603 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_0
+               OpStore %603 %599
+        %604 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_1
+               OpStore %604 %600
+        %605 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_2
+               OpStore %605 %601
+        %606 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %uint_0
+               OpStore %606 %602
+               OpStore %out_var_PN_POSITION9 %463
+               OpBranch %if_merge
+   %if_merge = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc b/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc
new file mode 100644
index 00000000000..1a9b95e085a
--- /dev/null
+++ b/shaders-ue4/asm/tesc/hs-texcoord-array.invalid.asm.tesc
@@ -0,0 +1,1144 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 531
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability SampledBuffer
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %MainHull "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_COLOR0 %in_var_TEXCOORD0 %in_var_VS_To_DS_Position %gl_InvocationID %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_COLOR0 %out_var_TEXCOORD0 %out_var_VS_To_DS_Position %out_var_PN_POSITION %out_var_PN_DisplacementScales %out_var_PN_TessellationMultiplier %out_var_PN_WorldDisplacementMultiplier %gl_TessLevelOuter %gl_TessLevelInner %out_var_PN_POSITION9
+               OpExecutionMode %MainHull Triangles
+               OpExecutionMode %MainHull SpacingFractionalOdd
+               OpExecutionMode %MainHull VertexOrderCw
+               OpExecutionMode %MainHull OutputVertices 3
+               OpSource HLSL 600
+               OpName %FPNTessellationHSToDS "FPNTessellationHSToDS"
+               OpMemberName %FPNTessellationHSToDS 0 "PassSpecificData"
+               OpMemberName %FPNTessellationHSToDS 1 "WorldPosition"
+               OpMemberName %FPNTessellationHSToDS 2 "DisplacementScale"
+               OpMemberName %FPNTessellationHSToDS 3 "TessellationMultiplier"
+               OpMemberName %FPNTessellationHSToDS 4 "WorldDisplacementMultiplier"
+               OpName %FHitProxyVSToDS "FHitProxyVSToDS"
+               OpMemberName %FHitProxyVSToDS 0 "FactoryInterpolants"
+               OpMemberName %FHitProxyVSToDS 1 "Position"
+               OpName %FVertexFactoryInterpolantsVSToDS "FVertexFactoryInterpolantsVSToDS"
+               OpMemberName %FVertexFactoryInterpolantsVSToDS 0 "InterpolantsVSToPS"
+               OpName %FVertexFactoryInterpolantsVSToPS "FVertexFactoryInterpolantsVSToPS"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 0 "TangentToWorld0"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 1 "TangentToWorld2"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 2 "Color"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 3 "TexCoords"
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_ClipToWorld"
+               OpMemberName %type_View 3 "View_TranslatedWorldToView"
+               OpMemberName %type_View 4 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 5 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 7 "View_ViewToClip"
+               OpMemberName %type_View 8 "View_ViewToClipNoAA"
+               OpMemberName %type_View 9 "View_ClipToView"
+               OpMemberName %type_View 10 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 12 "View_ScreenToWorld"
+               OpMemberName %type_View 13 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 14 "View_ViewForward"
+               OpMemberName %type_View 15 "PrePadding_View_908"
+               OpMemberName %type_View 16 "View_ViewUp"
+               OpMemberName %type_View 17 "PrePadding_View_924"
+               OpMemberName %type_View 18 "View_ViewRight"
+               OpMemberName %type_View 19 "PrePadding_View_940"
+               OpMemberName %type_View 20 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 21 "PrePadding_View_956"
+               OpMemberName %type_View 22 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 23 "PrePadding_View_972"
+               OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 25 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 26 "View_WorldCameraOrigin"
+               OpMemberName %type_View 27 "PrePadding_View_1020"
+               OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 29 "PrePadding_View_1036"
+               OpMemberName %type_View 30 "View_WorldViewOrigin"
+               OpMemberName %type_View 31 "PrePadding_View_1052"
+               OpMemberName %type_View 32 "View_PreViewTranslation"
+               OpMemberName %type_View 33 "PrePadding_View_1068"
+               OpMemberName %type_View 34 "View_PrevProjection"
+               OpMemberName %type_View 35 "View_PrevViewProj"
+               OpMemberName %type_View 36 "View_PrevViewRotationProj"
+               OpMemberName %type_View 37 "View_PrevViewToClip"
+               OpMemberName %type_View 38 "View_PrevClipToView"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 40 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 44 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 45 "PrePadding_View_1724"
+               OpMemberName %type_View 46 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 47 "PrePadding_View_1740"
+               OpMemberName %type_View 48 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 49 "PrePadding_View_1756"
+               OpMemberName %type_View 50 "View_PrevInvViewProj"
+               OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 52 "View_ClipToPrevClip"
+               OpMemberName %type_View 53 "View_TemporalAAJitter"
+               OpMemberName %type_View 54 "View_GlobalClippingPlane"
+               OpMemberName %type_View 55 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 57 "View_ViewRectMin"
+               OpMemberName %type_View 58 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 60 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 61 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 62 "View_PreExposure"
+               OpMemberName %type_View 63 "View_OneOverPreExposure"
+               OpMemberName %type_View 64 "PrePadding_View_2076"
+               OpMemberName %type_View 65 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 66 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 67 "View_NormalOverrideParameter"
+               OpMemberName %type_View 68 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 69 "View_PrevFrameGameTime"
+               OpMemberName %type_View 70 "View_PrevFrameRealTime"
+               OpMemberName %type_View 71 "View_OutOfBoundsMask"
+               OpMemberName %type_View 72 "PrePadding_View_2148"
+               OpMemberName %type_View 73 "PrePadding_View_2152"
+               OpMemberName %type_View 74 "PrePadding_View_2156"
+               OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 76 "View_CullingSign"
+               OpMemberName %type_View 77 "View_NearPlane"
+               OpMemberName %type_View 78 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 79 "View_GameTime"
+               OpMemberName %type_View 80 "View_RealTime"
+               OpMemberName %type_View 81 "View_DeltaTime"
+               OpMemberName %type_View 82 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 84 "View_Random"
+               OpMemberName %type_View 85 "View_FrameNumber"
+               OpMemberName %type_View 86 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 87 "View_StateFrameIndex"
+               OpMemberName %type_View 88 "View_CameraCut"
+               OpMemberName %type_View 89 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 90 "PrePadding_View_2228"
+               OpMemberName %type_View 91 "PrePadding_View_2232"
+               OpMemberName %type_View 92 "PrePadding_View_2236"
+               OpMemberName %type_View 93 "View_DirectionalLightColor"
+               OpMemberName %type_View 94 "View_DirectionalLightDirection"
+               OpMemberName %type_View 95 "PrePadding_View_2268"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 98 "View_TemporalAAParams"
+               OpMemberName %type_View 99 "View_CircleDOFParams"
+               OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 102 "View_DepthOfFieldScale"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 109 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 110 "View_DemosaicVposOffset"
+               OpMemberName %type_View 111 "PrePadding_View_2412"
+               OpMemberName %type_View 112 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 113 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 115 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogPower"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 123 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian"
+               OpMemberName %type_View 127 "PrePadding_View_2492"
+               OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance"
+               OpMemberName %type_View 129 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 131 "PrePadding_View_2520"
+               OpMemberName %type_View 132 "PrePadding_View_2524"
+               OpMemberName %type_View 133 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 136 "View_AmbientCubemapTint"
+               OpMemberName %type_View 137 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 138 "View_SkyLightParameters"
+               OpMemberName %type_View 139 "PrePadding_View_2584"
+               OpMemberName %type_View 140 "PrePadding_View_2588"
+               OpMemberName %type_View 141 "View_SkyLightColor"
+               OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 143 "View_MobilePreviewMode"
+               OpMemberName %type_View 144 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 146 "View_ShowDecalsMask"
+               OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 149 "PrePadding_View_2744"
+               OpMemberName %type_View 150 "PrePadding_View_2748"
+               OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 152 "View_StereoPassIndex"
+               OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 155 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 156 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 157 "View_MaxGlobalDistance"
+               OpMemberName %type_View 158 "PrePadding_View_2908"
+               OpMemberName %type_View 159 "View_CursorPosition"
+               OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 161 "PrePadding_View_2924"
+               OpMemberName %type_View 162 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 163 "PrePadding_View_2940"
+               OpMemberName %type_View 164 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 165 "PrePadding_View_2956"
+               OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 167 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 168 "PrePadding_View_2972"
+               OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 170 "PrePadding_View_2988"
+               OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 172 "PrePadding_View_3004"
+               OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 176 "View_StereoIPD"
+               OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle"
+               OpName %View "View"
+               OpName %type_Primitive "type.Primitive"
+               OpMemberName %type_Primitive 0 "Primitive_LocalToWorld"
+               OpMemberName %type_Primitive 1 "Primitive_InvNonUniformScaleAndDeterminantSign"
+               OpMemberName %type_Primitive 2 "Primitive_ObjectWorldPositionAndRadius"
+               OpMemberName %type_Primitive 3 "Primitive_WorldToLocal"
+               OpMemberName %type_Primitive 4 "Primitive_PreviousLocalToWorld"
+               OpMemberName %type_Primitive 5 "Primitive_PreviousWorldToLocal"
+               OpMemberName %type_Primitive 6 "Primitive_ActorWorldPosition"
+               OpMemberName %type_Primitive 7 "Primitive_UseSingleSampleShadowFromStationaryLights"
+               OpMemberName %type_Primitive 8 "Primitive_ObjectBounds"
+               OpMemberName %type_Primitive 9 "Primitive_LpvBiasMultiplier"
+               OpMemberName %type_Primitive 10 "Primitive_DecalReceiverMask"
+               OpMemberName %type_Primitive 11 "Primitive_PerObjectGBufferData"
+               OpMemberName %type_Primitive 12 "Primitive_UseVolumetricLightmapShadowFromStationaryLights"
+               OpMemberName %type_Primitive 13 "Primitive_DrawsVelocity"
+               OpMemberName %type_Primitive 14 "Primitive_ObjectOrientation"
+               OpMemberName %type_Primitive 15 "Primitive_NonUniformScale"
+               OpMemberName %type_Primitive 16 "Primitive_LocalObjectBoundsMin"
+               OpMemberName %type_Primitive 17 "Primitive_LightingChannelMask"
+               OpMemberName %type_Primitive 18 "Primitive_LocalObjectBoundsMax"
+               OpMemberName %type_Primitive 19 "Primitive_LightmapDataIndex"
+               OpMemberName %type_Primitive 20 "Primitive_PreSkinnedLocalBounds"
+               OpMemberName %type_Primitive 21 "Primitive_SingleCaptureIndex"
+               OpMemberName %type_Primitive 22 "Primitive_OutputVelocity"
+               OpMemberName %type_Primitive 23 "PrePadding_Primitive_420"
+               OpMemberName %type_Primitive 24 "PrePadding_Primitive_424"
+               OpMemberName %type_Primitive 25 "PrePadding_Primitive_428"
+               OpMemberName %type_Primitive 26 "Primitive_CustomPrimitiveData"
+               OpName %Primitive "Primitive"
+               OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid"
+               OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid"
+               OpName %in_var_COLOR0 "in.var.COLOR0"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %in_var_VS_To_DS_Position "in.var.VS_To_DS_Position"
+               OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid"
+               OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid"
+               OpName %out_var_COLOR0 "out.var.COLOR0"
+               OpName %out_var_TEXCOORD0 "out.var.TEXCOORD0"
+               OpName %out_var_VS_To_DS_Position "out.var.VS_To_DS_Position"
+               OpName %out_var_PN_POSITION "out.var.PN_POSITION"
+               OpName %out_var_PN_DisplacementScales "out.var.PN_DisplacementScales"
+               OpName %out_var_PN_TessellationMultiplier "out.var.PN_TessellationMultiplier"
+               OpName %out_var_PN_WorldDisplacementMultiplier "out.var.PN_WorldDisplacementMultiplier"
+               OpName %out_var_PN_POSITION9 "out.var.PN_POSITION9"
+               OpName %MainHull "MainHull"
+               OpName %param_var_I "param.var.I"
+               OpName %temp_var_hullMainRetVal "temp.var.hullMainRetVal"
+               OpName %if_merge "if.merge"
+               OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %in_var_COLOR0 UserSemantic "COLOR0"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %in_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position"
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorateString %gl_InvocationID UserSemantic "SV_OutputControlPointID"
+               OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %out_var_COLOR0 UserSemantic "COLOR0"
+               OpDecorateString %out_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %out_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position"
+               OpDecorateString %out_var_PN_POSITION UserSemantic "PN_POSITION"
+               OpDecorateString %out_var_PN_DisplacementScales UserSemantic "PN_DisplacementScales"
+               OpDecorateString %out_var_PN_TessellationMultiplier UserSemantic "PN_TessellationMultiplier"
+               OpDecorateString %out_var_PN_WorldDisplacementMultiplier UserSemantic "PN_WorldDisplacementMultiplier"
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+               OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor"
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor"
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorateString %out_var_PN_POSITION9 UserSemantic "PN_POSITION9"
+               OpDecorate %out_var_PN_POSITION9 Patch
+               OpDecorate %in_var_TEXCOORD10_centroid Location 0
+               OpDecorate %in_var_TEXCOORD11_centroid Location 1
+               OpDecorate %in_var_COLOR0 Location 2
+               OpDecorate %in_var_TEXCOORD0 Location 3
+               OpDecorate %in_var_VS_To_DS_Position Location 5
+               OpDecorate %out_var_COLOR0 Location 0
+               OpDecorate %out_var_PN_DisplacementScales Location 1
+               OpDecorate %out_var_PN_POSITION Location 2
+               OpDecorate %out_var_PN_POSITION9 Location 5
+               OpDecorate %out_var_PN_TessellationMultiplier Location 6
+               OpDecorate %out_var_PN_WorldDisplacementMultiplier Location 7
+               OpDecorate %out_var_TEXCOORD0 Location 8
+               OpDecorate %out_var_TEXCOORD10_centroid Location 10
+               OpDecorate %out_var_TEXCOORD11_centroid Location 11
+               OpDecorate %out_var_VS_To_DS_Position Location 12
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 0
+               OpDecorate %Primitive DescriptorSet 0
+               OpDecorate %Primitive Binding 1
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 13 MatrixStride 16
+               OpMemberDecorate %type_View 13 ColMajor
+               OpMemberDecorate %type_View 14 Offset 896
+               OpMemberDecorate %type_View 15 Offset 908
+               OpMemberDecorate %type_View 16 Offset 912
+               OpMemberDecorate %type_View 17 Offset 924
+               OpMemberDecorate %type_View 18 Offset 928
+               OpMemberDecorate %type_View 19 Offset 940
+               OpMemberDecorate %type_View 20 Offset 944
+               OpMemberDecorate %type_View 21 Offset 956
+               OpMemberDecorate %type_View 22 Offset 960
+               OpMemberDecorate %type_View 23 Offset 972
+               OpMemberDecorate %type_View 24 Offset 976
+               OpMemberDecorate %type_View 25 Offset 992
+               OpMemberDecorate %type_View 26 Offset 1008
+               OpMemberDecorate %type_View 27 Offset 1020
+               OpMemberDecorate %type_View 28 Offset 1024
+               OpMemberDecorate %type_View 29 Offset 1036
+               OpMemberDecorate %type_View 30 Offset 1040
+               OpMemberDecorate %type_View 31 Offset 1052
+               OpMemberDecorate %type_View 32 Offset 1056
+               OpMemberDecorate %type_View 33 Offset 1068
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 43 MatrixStride 16
+               OpMemberDecorate %type_View 43 ColMajor
+               OpMemberDecorate %type_View 44 Offset 1712
+               OpMemberDecorate %type_View 45 Offset 1724
+               OpMemberDecorate %type_View 46 Offset 1728
+               OpMemberDecorate %type_View 47 Offset 1740
+               OpMemberDecorate %type_View 48 Offset 1744
+               OpMemberDecorate %type_View 49 Offset 1756
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 52 MatrixStride 16
+               OpMemberDecorate %type_View 52 ColMajor
+               OpMemberDecorate %type_View 53 Offset 1952
+               OpMemberDecorate %type_View 54 Offset 1968
+               OpMemberDecorate %type_View 55 Offset 1984
+               OpMemberDecorate %type_View 56 Offset 1992
+               OpMemberDecorate %type_View 57 Offset 2000
+               OpMemberDecorate %type_View 58 Offset 2016
+               OpMemberDecorate %type_View 59 Offset 2032
+               OpMemberDecorate %type_View 60 Offset 2048
+               OpMemberDecorate %type_View 61 Offset 2064
+               OpMemberDecorate %type_View 62 Offset 2068
+               OpMemberDecorate %type_View 63 Offset 2072
+               OpMemberDecorate %type_View 64 Offset 2076
+               OpMemberDecorate %type_View 65 Offset 2080
+               OpMemberDecorate %type_View 66 Offset 2096
+               OpMemberDecorate %type_View 67 Offset 2112
+               OpMemberDecorate %type_View 68 Offset 2128
+               OpMemberDecorate %type_View 69 Offset 2136
+               OpMemberDecorate %type_View 70 Offset 2140
+               OpMemberDecorate %type_View 71 Offset 2144
+               OpMemberDecorate %type_View 72 Offset 2148
+               OpMemberDecorate %type_View 73 Offset 2152
+               OpMemberDecorate %type_View 74 Offset 2156
+               OpMemberDecorate %type_View 75 Offset 2160
+               OpMemberDecorate %type_View 76 Offset 2172
+               OpMemberDecorate %type_View 77 Offset 2176
+               OpMemberDecorate %type_View 78 Offset 2180
+               OpMemberDecorate %type_View 79 Offset 2184
+               OpMemberDecorate %type_View 80 Offset 2188
+               OpMemberDecorate %type_View 81 Offset 2192
+               OpMemberDecorate %type_View 82 Offset 2196
+               OpMemberDecorate %type_View 83 Offset 2200
+               OpMemberDecorate %type_View 84 Offset 2204
+               OpMemberDecorate %type_View 85 Offset 2208
+               OpMemberDecorate %type_View 86 Offset 2212
+               OpMemberDecorate %type_View 87 Offset 2216
+               OpMemberDecorate %type_View 88 Offset 2220
+               OpMemberDecorate %type_View 89 Offset 2224
+               OpMemberDecorate %type_View 90 Offset 2228
+               OpMemberDecorate %type_View 91 Offset 2232
+               OpMemberDecorate %type_View 92 Offset 2236
+               OpMemberDecorate %type_View 93 Offset 2240
+               OpMemberDecorate %type_View 94 Offset 2256
+               OpMemberDecorate %type_View 95 Offset 2268
+               OpMemberDecorate %type_View 96 Offset 2272
+               OpMemberDecorate %type_View 97 Offset 2304
+               OpMemberDecorate %type_View 98 Offset 2336
+               OpMemberDecorate %type_View 99 Offset 2352
+               OpMemberDecorate %type_View 100 Offset 2368
+               OpMemberDecorate %type_View 101 Offset 2372
+               OpMemberDecorate %type_View 102 Offset 2376
+               OpMemberDecorate %type_View 103 Offset 2380
+               OpMemberDecorate %type_View 104 Offset 2384
+               OpMemberDecorate %type_View 105 Offset 2388
+               OpMemberDecorate %type_View 106 Offset 2392
+               OpMemberDecorate %type_View 107 Offset 2396
+               OpMemberDecorate %type_View 108 Offset 2400
+               OpMemberDecorate %type_View 109 Offset 2404
+               OpMemberDecorate %type_View 110 Offset 2408
+               OpMemberDecorate %type_View 111 Offset 2412
+               OpMemberDecorate %type_View 112 Offset 2416
+               OpMemberDecorate %type_View 113 Offset 2428
+               OpMemberDecorate %type_View 114 Offset 2432
+               OpMemberDecorate %type_View 115 Offset 2444
+               OpMemberDecorate %type_View 116 Offset 2448
+               OpMemberDecorate %type_View 117 Offset 2452
+               OpMemberDecorate %type_View 118 Offset 2456
+               OpMemberDecorate %type_View 119 Offset 2460
+               OpMemberDecorate %type_View 120 Offset 2464
+               OpMemberDecorate %type_View 121 Offset 2468
+               OpMemberDecorate %type_View 122 Offset 2472
+               OpMemberDecorate %type_View 123 Offset 2476
+               OpMemberDecorate %type_View 124 Offset 2480
+               OpMemberDecorate %type_View 125 Offset 2484
+               OpMemberDecorate %type_View 126 Offset 2488
+               OpMemberDecorate %type_View 127 Offset 2492
+               OpMemberDecorate %type_View 128 Offset 2496
+               OpMemberDecorate %type_View 129 Offset 2512
+               OpMemberDecorate %type_View 130 Offset 2516
+               OpMemberDecorate %type_View 131 Offset 2520
+               OpMemberDecorate %type_View 132 Offset 2524
+               OpMemberDecorate %type_View 133 Offset 2528
+               OpMemberDecorate %type_View 134 Offset 2544
+               OpMemberDecorate %type_View 135 Offset 2556
+               OpMemberDecorate %type_View 136 Offset 2560
+               OpMemberDecorate %type_View 137 Offset 2576
+               OpMemberDecorate %type_View 138 Offset 2580
+               OpMemberDecorate %type_View 139 Offset 2584
+               OpMemberDecorate %type_View 140 Offset 2588
+               OpMemberDecorate %type_View 141 Offset 2592
+               OpMemberDecorate %type_View 142 Offset 2608
+               OpMemberDecorate %type_View 143 Offset 2720
+               OpMemberDecorate %type_View 144 Offset 2724
+               OpMemberDecorate %type_View 145 Offset 2728
+               OpMemberDecorate %type_View 146 Offset 2732
+               OpMemberDecorate %type_View 147 Offset 2736
+               OpMemberDecorate %type_View 148 Offset 2740
+               OpMemberDecorate %type_View 149 Offset 2744
+               OpMemberDecorate %type_View 150 Offset 2748
+               OpMemberDecorate %type_View 151 Offset 2752
+               OpMemberDecorate %type_View 152 Offset 2764
+               OpMemberDecorate %type_View 153 Offset 2768
+               OpMemberDecorate %type_View 154 Offset 2832
+               OpMemberDecorate %type_View 155 Offset 2896
+               OpMemberDecorate %type_View 156 Offset 2900
+               OpMemberDecorate %type_View 157 Offset 2904
+               OpMemberDecorate %type_View 158 Offset 2908
+               OpMemberDecorate %type_View 159 Offset 2912
+               OpMemberDecorate %type_View 160 Offset 2920
+               OpMemberDecorate %type_View 161 Offset 2924
+               OpMemberDecorate %type_View 162 Offset 2928
+               OpMemberDecorate %type_View 163 Offset 2940
+               OpMemberDecorate %type_View 164 Offset 2944
+               OpMemberDecorate %type_View 165 Offset 2956
+               OpMemberDecorate %type_View 166 Offset 2960
+               OpMemberDecorate %type_View 167 Offset 2968
+               OpMemberDecorate %type_View 168 Offset 2972
+               OpMemberDecorate %type_View 169 Offset 2976
+               OpMemberDecorate %type_View 170 Offset 2988
+               OpMemberDecorate %type_View 171 Offset 2992
+               OpMemberDecorate %type_View 172 Offset 3004
+               OpMemberDecorate %type_View 173 Offset 3008
+               OpMemberDecorate %type_View 174 Offset 3020
+               OpMemberDecorate %type_View 175 Offset 3024
+               OpMemberDecorate %type_View 176 Offset 3036
+               OpMemberDecorate %type_View 177 Offset 3040
+               OpMemberDecorate %type_View 178 Offset 3044
+               OpDecorate %type_View Block
+               OpMemberDecorate %type_Primitive 0 Offset 0
+               OpMemberDecorate %type_Primitive 0 MatrixStride 16
+               OpMemberDecorate %type_Primitive 0 ColMajor
+               OpMemberDecorate %type_Primitive 1 Offset 64
+               OpMemberDecorate %type_Primitive 2 Offset 80
+               OpMemberDecorate %type_Primitive 3 Offset 96
+               OpMemberDecorate %type_Primitive 3 MatrixStride 16
+               OpMemberDecorate %type_Primitive 3 ColMajor
+               OpMemberDecorate %type_Primitive 4 Offset 160
+               OpMemberDecorate %type_Primitive 4 MatrixStride 16
+               OpMemberDecorate %type_Primitive 4 ColMajor
+               OpMemberDecorate %type_Primitive 5 Offset 224
+               OpMemberDecorate %type_Primitive 5 MatrixStride 16
+               OpMemberDecorate %type_Primitive 5 ColMajor
+               OpMemberDecorate %type_Primitive 6 Offset 288
+               OpMemberDecorate %type_Primitive 7 Offset 300
+               OpMemberDecorate %type_Primitive 8 Offset 304
+               OpMemberDecorate %type_Primitive 9 Offset 316
+               OpMemberDecorate %type_Primitive 10 Offset 320
+               OpMemberDecorate %type_Primitive 11 Offset 324
+               OpMemberDecorate %type_Primitive 12 Offset 328
+               OpMemberDecorate %type_Primitive 13 Offset 332
+               OpMemberDecorate %type_Primitive 14 Offset 336
+               OpMemberDecorate %type_Primitive 15 Offset 352
+               OpMemberDecorate %type_Primitive 16 Offset 368
+               OpMemberDecorate %type_Primitive 17 Offset 380
+               OpMemberDecorate %type_Primitive 18 Offset 384
+               OpMemberDecorate %type_Primitive 19 Offset 396
+               OpMemberDecorate %type_Primitive 20 Offset 400
+               OpMemberDecorate %type_Primitive 21 Offset 412
+               OpMemberDecorate %type_Primitive 22 Offset 416
+               OpMemberDecorate %type_Primitive 23 Offset 420
+               OpMemberDecorate %type_Primitive 24 Offset 424
+               OpMemberDecorate %type_Primitive 25 Offset 428
+               OpMemberDecorate %type_Primitive 26 Offset 432
+               OpDecorate %type_Primitive Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+      %v2int = OpTypeVector %int 2
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_3 = OpConstant %uint 3
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+      %int_1 = OpConstant %int 1
+      %int_0 = OpConstant %int 0
+      %int_2 = OpConstant %int 2
+    %float_2 = OpConstant %float 2
+         %54 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+  %float_0_5 = OpConstant %float 0.5
+      %int_3 = OpConstant %int 3
+%float_0_333000004 = OpConstant %float 0.333000004
+    %float_1 = OpConstant %float 1
+         %59 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+   %float_15 = OpConstant %float 15
+         %61 = OpConstantComposite %v4float %float_15 %float_15 %float_15 %float_15
+%_arr_v2float_uint_2 = OpTypeArray %v2float %uint_2
+%FVertexFactoryInterpolantsVSToPS = OpTypeStruct %v4float %v4float %v4float %_arr_v2float_uint_2
+%FVertexFactoryInterpolantsVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToPS
+%FHitProxyVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToDS %v4float
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%FPNTessellationHSToDS = OpTypeStruct %FHitProxyVSToDS %_arr_v4float_uint_3 %v3float %float %float
+      %v3int = OpTypeVector %int 3
+         %65 = OpConstantComposite %v3int %int_0 %int_0 %int_0
+         %66 = OpConstantComposite %v3int %int_3 %int_3 %int_3
+    %float_0 = OpConstant %float 0
+         %68 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+         %69 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5
+     %int_78 = OpConstant %int 78
+     %int_15 = OpConstant %int 15
+      %int_7 = OpConstant %int 7
+     %int_28 = OpConstant %int 28
+         %74 = OpConstantComposite %v3int %int_1 %int_1 %int_1
+         %75 = OpConstantComposite %v3int %int_2 %int_2 %int_2
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%type_Primitive = OpTypeStruct %mat4v4float %v4float %v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %float %float %float %float %v4float %v4float %v3float %uint %v3float %uint %v3float %int %uint %uint %uint %uint %_arr_v4float_uint_4
+%_ptr_Uniform_type_Primitive = OpTypePointer Uniform %type_Primitive
+    %uint_12 = OpConstant %uint 12
+%_arr_v4float_uint_12 = OpTypeArray %v4float %uint_12
+%_ptr_Input__arr_v4float_uint_12 = OpTypePointer Input %_arr_v4float_uint_12
+%_arr__arr_v2float_uint_2_uint_12 = OpTypeArray %_arr_v2float_uint_2 %uint_12
+%_ptr_Input__arr__arr_v2float_uint_2_uint_12 = OpTypePointer Input %_arr__arr_v2float_uint_2_uint_12
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Output__arr_v4float_uint_3 = OpTypePointer Output %_arr_v4float_uint_3
+%_arr__arr_v2float_uint_2_uint_3 = OpTypeArray %_arr_v2float_uint_2 %uint_3
+%_ptr_Output__arr__arr_v2float_uint_2_uint_3 = OpTypePointer Output %_arr__arr_v2float_uint_2_uint_3
+%_arr__arr_v4float_uint_3_uint_3 = OpTypeArray %_arr_v4float_uint_3 %uint_3
+%_ptr_Output__arr__arr_v4float_uint_3_uint_3 = OpTypePointer Output %_arr__arr_v4float_uint_3_uint_3
+%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3
+%_ptr_Output__arr_v3float_uint_3 = OpTypePointer Output %_arr_v3float_uint_3
+%_ptr_Output__arr_float_uint_3 = OpTypePointer Output %_arr_float_uint_3
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %98 = OpTypeFunction %void
+%_arr_FHitProxyVSToDS_uint_12 = OpTypeArray %FHitProxyVSToDS %uint_12
+%_ptr_Function__arr_FHitProxyVSToDS_uint_12 = OpTypePointer Function %_arr_FHitProxyVSToDS_uint_12
+%_arr_FPNTessellationHSToDS_uint_3 = OpTypeArray %FPNTessellationHSToDS %uint_3
+%_ptr_Function__arr_FPNTessellationHSToDS_uint_3 = OpTypePointer Function %_arr_FPNTessellationHSToDS_uint_3
+%_ptr_Workgroup__arr_FPNTessellationHSToDS_uint_3 = OpTypePointer Workgroup %_arr_FPNTessellationHSToDS_uint_3
+%_ptr_Output__arr_v2float_uint_2 = OpTypePointer Output %_arr_v2float_uint_2
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+%_ptr_Output_float = OpTypePointer Output %float
+%_ptr_Function_FPNTessellationHSToDS = OpTypePointer Function %FPNTessellationHSToDS
+%_ptr_Workgroup_FPNTessellationHSToDS = OpTypePointer Workgroup %FPNTessellationHSToDS
+       %bool = OpTypeBool
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Workgroup_v4float = OpTypePointer Workgroup %v4float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%mat3v3float = OpTypeMatrix %v3float 3
+%_ptr_Function_FVertexFactoryInterpolantsVSToDS = OpTypePointer Function %FVertexFactoryInterpolantsVSToDS
+%_ptr_Function_FHitProxyVSToDS = OpTypePointer Function %FHitProxyVSToDS
+     %v3bool = OpTypeVector %bool 3
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+  %Primitive = OpVariable %_ptr_Uniform_type_Primitive Uniform
+%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_12 Input
+%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_12 Input
+%in_var_COLOR0 = OpVariable %_ptr_Input__arr_v4float_uint_12 Input
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input__arr__arr_v2float_uint_2_uint_12 Input
+%in_var_VS_To_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_12 Input
+%gl_InvocationID = OpVariable %_ptr_Input_uint Input
+%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_COLOR0 = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_TEXCOORD0 = OpVariable %_ptr_Output__arr__arr_v2float_uint_2_uint_3 Output
+%out_var_VS_To_DS_Position = OpVariable %_ptr_Output__arr_v4float_uint_3 Output
+%out_var_PN_POSITION = OpVariable %_ptr_Output__arr__arr_v4float_uint_3_uint_3 Output
+%out_var_PN_DisplacementScales = OpVariable %_ptr_Output__arr_v3float_uint_3 Output
+%out_var_PN_TessellationMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output
+%out_var_PN_WorldDisplacementMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output
+%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output
+%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output
+%out_var_PN_POSITION9 = OpVariable %_ptr_Output_v4float Output
+%float_0_333333343 = OpConstant %float 0.333333343
+        %119 = OpConstantComposite %v4float %float_0_333333343 %float_0_333333343 %float_0_333333343 %float_0_333333343
+        %120 = OpConstantComposite %v4float %float_0_5 %float_0_5 %float_0_5 %float_0_5
+%float_0_166666672 = OpConstant %float 0.166666672
+        %122 = OpConstantComposite %v4float %float_0_166666672 %float_0_166666672 %float_0_166666672 %float_0_166666672
+        %123 = OpUndef %v4float
+
+; XXX: Original asm used Function here, which is wrong.
+; This patches the SPIR-V to be correct.
+%temp_var_hullMainRetVal = OpVariable %_ptr_Workgroup__arr_FPNTessellationHSToDS_uint_3 Workgroup
+
+   %MainHull = OpFunction %void None %98
+        %124 = OpLabel
+%param_var_I = OpVariable %_ptr_Function__arr_FHitProxyVSToDS_uint_12 Function
+        %125 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD10_centroid
+        %126 = OpLoad %_arr_v4float_uint_12 %in_var_TEXCOORD11_centroid
+        %127 = OpLoad %_arr_v4float_uint_12 %in_var_COLOR0
+        %128 = OpLoad %_arr__arr_v2float_uint_2_uint_12 %in_var_TEXCOORD0
+        %129 = OpCompositeExtract %v4float %125 0
+        %130 = OpCompositeExtract %v4float %126 0
+        %131 = OpCompositeExtract %v4float %127 0
+        %132 = OpCompositeExtract %_arr_v2float_uint_2 %128 0
+        %133 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %129 %130 %131 %132
+        %134 = OpCompositeExtract %v4float %125 1
+        %135 = OpCompositeExtract %v4float %126 1
+        %136 = OpCompositeExtract %v4float %127 1
+        %137 = OpCompositeExtract %_arr_v2float_uint_2 %128 1
+        %138 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %134 %135 %136 %137
+        %139 = OpCompositeExtract %v4float %125 2
+        %140 = OpCompositeExtract %v4float %126 2
+        %141 = OpCompositeExtract %v4float %127 2
+        %142 = OpCompositeExtract %_arr_v2float_uint_2 %128 2
+        %143 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %139 %140 %141 %142
+        %144 = OpCompositeExtract %v4float %125 3
+        %145 = OpCompositeExtract %v4float %126 3
+        %146 = OpCompositeExtract %v4float %127 3
+        %147 = OpCompositeExtract %_arr_v2float_uint_2 %128 3
+        %148 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %144 %145 %146 %147
+        %149 = OpCompositeExtract %v4float %125 4
+        %150 = OpCompositeExtract %v4float %126 4
+        %151 = OpCompositeExtract %v4float %127 4
+        %152 = OpCompositeExtract %_arr_v2float_uint_2 %128 4
+        %153 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %149 %150 %151 %152
+        %154 = OpCompositeExtract %v4float %125 5
+        %155 = OpCompositeExtract %v4float %126 5
+        %156 = OpCompositeExtract %v4float %127 5
+        %157 = OpCompositeExtract %_arr_v2float_uint_2 %128 5
+        %158 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %154 %155 %156 %157
+        %159 = OpCompositeExtract %v4float %125 6
+        %160 = OpCompositeExtract %v4float %126 6
+        %161 = OpCompositeExtract %v4float %127 6
+        %162 = OpCompositeExtract %_arr_v2float_uint_2 %128 6
+        %163 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %159 %160 %161 %162
+        %164 = OpCompositeExtract %v4float %125 7
+        %165 = OpCompositeExtract %v4float %126 7
+        %166 = OpCompositeExtract %v4float %127 7
+        %167 = OpCompositeExtract %_arr_v2float_uint_2 %128 7
+        %168 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %164 %165 %166 %167
+        %169 = OpCompositeExtract %v4float %125 8
+        %170 = OpCompositeExtract %v4float %126 8
+        %171 = OpCompositeExtract %v4float %127 8
+        %172 = OpCompositeExtract %_arr_v2float_uint_2 %128 8
+        %173 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %169 %170 %171 %172
+        %174 = OpCompositeExtract %v4float %125 9
+        %175 = OpCompositeExtract %v4float %126 9
+        %176 = OpCompositeExtract %v4float %127 9
+        %177 = OpCompositeExtract %_arr_v2float_uint_2 %128 9
+        %178 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %174 %175 %176 %177
+        %179 = OpCompositeExtract %v4float %125 10
+        %180 = OpCompositeExtract %v4float %126 10
+        %181 = OpCompositeExtract %v4float %127 10
+        %182 = OpCompositeExtract %_arr_v2float_uint_2 %128 10
+        %183 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %179 %180 %181 %182
+        %184 = OpCompositeExtract %v4float %125 11
+        %185 = OpCompositeExtract %v4float %126 11
+        %186 = OpCompositeExtract %v4float %127 11
+        %187 = OpCompositeExtract %_arr_v2float_uint_2 %128 11
+        %188 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %184 %185 %186 %187
+        %189 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %133
+        %190 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %138
+        %191 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %143
+        %192 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %148
+        %193 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %153
+        %194 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %158
+        %195 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %163
+        %196 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %168
+        %197 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %173
+        %198 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %178
+        %199 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %183
+        %200 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %188
+        %201 = OpLoad %_arr_v4float_uint_12 %in_var_VS_To_DS_Position
+        %202 = OpCompositeExtract %v4float %201 0
+        %203 = OpCompositeConstruct %FHitProxyVSToDS %189 %202
+        %204 = OpCompositeExtract %v4float %201 1
+        %205 = OpCompositeConstruct %FHitProxyVSToDS %190 %204
+        %206 = OpCompositeExtract %v4float %201 2
+        %207 = OpCompositeConstruct %FHitProxyVSToDS %191 %206
+        %208 = OpCompositeExtract %v4float %201 3
+        %209 = OpCompositeConstruct %FHitProxyVSToDS %192 %208
+        %210 = OpCompositeExtract %v4float %201 4
+        %211 = OpCompositeConstruct %FHitProxyVSToDS %193 %210
+        %212 = OpCompositeExtract %v4float %201 5
+        %213 = OpCompositeConstruct %FHitProxyVSToDS %194 %212
+        %214 = OpCompositeExtract %v4float %201 6
+        %215 = OpCompositeConstruct %FHitProxyVSToDS %195 %214
+        %216 = OpCompositeExtract %v4float %201 7
+        %217 = OpCompositeConstruct %FHitProxyVSToDS %196 %216
+        %218 = OpCompositeExtract %v4float %201 8
+        %219 = OpCompositeConstruct %FHitProxyVSToDS %197 %218
+        %220 = OpCompositeExtract %v4float %201 9
+        %221 = OpCompositeConstruct %FHitProxyVSToDS %198 %220
+        %222 = OpCompositeExtract %v4float %201 10
+        %223 = OpCompositeConstruct %FHitProxyVSToDS %199 %222
+        %224 = OpCompositeExtract %v4float %201 11
+        %225 = OpCompositeConstruct %FHitProxyVSToDS %200 %224
+        %226 = OpCompositeConstruct %_arr_FHitProxyVSToDS_uint_12 %203 %205 %207 %209 %211 %213 %215 %217 %219 %221 %223 %225
+               OpStore %param_var_I %226
+        %227 = OpLoad %uint %gl_InvocationID
+        %228 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %227 %int_0
+        %229 = OpLoad %FVertexFactoryInterpolantsVSToDS %228
+        %230 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %229 0
+        %231 = OpCompositeExtract %v4float %230 0
+        %232 = OpCompositeExtract %v4float %230 1
+        %233 = OpVectorShuffle %v3float %231 %231 0 1 2
+        %234 = OpVectorShuffle %v3float %232 %232 0 1 2
+        %235 = OpExtInst %v3float %1 Cross %234 %233
+        %236 = OpCompositeExtract %float %232 3
+        %237 = OpCompositeConstruct %v3float %236 %236 %236
+        %238 = OpFMul %v3float %235 %237
+        %239 = OpCompositeConstruct %mat3v3float %233 %238 %234
+        %240 = OpCompositeExtract %float %232 0
+        %241 = OpCompositeExtract %float %232 1
+        %242 = OpCompositeExtract %float %232 2
+        %243 = OpCompositeConstruct %v4float %240 %241 %242 %float_0
+        %244 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_15
+        %245 = OpLoad %v4float %244
+        %246 = OpVectorShuffle %v3float %245 %245 0 1 2
+        %247 = OpVectorTimesMatrix %v3float %246 %239
+        %248 = OpULessThan %bool %227 %uint_2
+        %249 = OpIAdd %uint %227 %uint_1
+        %250 = OpSelect %uint %248 %249 %uint_0
+        %251 = OpIMul %uint %uint_2 %227
+        %252 = OpIAdd %uint %uint_3 %251
+        %253 = OpIAdd %uint %251 %uint_4
+        %254 = OpAccessChain %_ptr_Function_FHitProxyVSToDS %param_var_I %227
+        %255 = OpLoad %FHitProxyVSToDS %254
+        %256 = OpAccessChain %_ptr_Function_v4float %param_var_I %227 %int_1
+        %257 = OpLoad %v4float %256
+        %258 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %250 %int_0
+        %259 = OpLoad %FVertexFactoryInterpolantsVSToDS %258
+        %260 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %259 0
+        %261 = OpCompositeExtract %v4float %260 1
+        %262 = OpCompositeExtract %float %261 0
+        %263 = OpCompositeExtract %float %261 1
+        %264 = OpCompositeExtract %float %261 2
+        %265 = OpCompositeConstruct %v4float %262 %263 %264 %float_0
+        %266 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %252 %int_0
+        %267 = OpLoad %FVertexFactoryInterpolantsVSToDS %266
+        %268 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %267 0
+        %269 = OpCompositeExtract %v4float %268 1
+        %270 = OpCompositeExtract %float %269 0
+        %271 = OpCompositeExtract %float %269 1
+        %272 = OpCompositeExtract %float %269 2
+        %273 = OpCompositeConstruct %v4float %270 %271 %272 %float_0
+        %274 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %253 %int_0
+        %275 = OpLoad %FVertexFactoryInterpolantsVSToDS %274
+        %276 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %275 0
+        %277 = OpCompositeExtract %v4float %276 1
+        %278 = OpCompositeExtract %float %277 0
+        %279 = OpCompositeExtract %float %277 1
+        %280 = OpCompositeExtract %float %277 2
+        %281 = OpCompositeConstruct %v4float %278 %279 %280 %float_0
+        %282 = OpLoad %v4float %256
+        %283 = OpAccessChain %_ptr_Function_v4float %param_var_I %250 %int_1
+        %284 = OpLoad %v4float %283
+        %285 = OpFMul %v4float %54 %282
+        %286 = OpFAdd %v4float %285 %284
+        %287 = OpFSub %v4float %284 %282
+        %288 = OpDot %float %287 %243
+        %289 = OpCompositeConstruct %v4float %288 %288 %288 %288
+        %290 = OpFMul %v4float %289 %243
+        %291 = OpFSub %v4float %286 %290
+        %292 = OpFMul %v4float %291 %119
+        %293 = OpAccessChain %_ptr_Function_v4float %param_var_I %252 %int_1
+        %294 = OpLoad %v4float %293
+        %295 = OpAccessChain %_ptr_Function_v4float %param_var_I %253 %int_1
+        %296 = OpLoad %v4float %295
+        %297 = OpFMul %v4float %54 %294
+        %298 = OpFAdd %v4float %297 %296
+        %299 = OpFSub %v4float %296 %294
+        %300 = OpDot %float %299 %273
+        %301 = OpCompositeConstruct %v4float %300 %300 %300 %300
+        %302 = OpFMul %v4float %301 %273
+        %303 = OpFSub %v4float %298 %302
+        %304 = OpFMul %v4float %303 %119
+        %305 = OpFAdd %v4float %292 %304
+        %306 = OpFMul %v4float %305 %120
+        %307 = OpLoad %v4float %283
+        %308 = OpLoad %v4float %256
+        %309 = OpFMul %v4float %54 %307
+        %310 = OpFAdd %v4float %309 %308
+        %311 = OpFSub %v4float %308 %307
+        %312 = OpDot %float %311 %265
+        %313 = OpCompositeConstruct %v4float %312 %312 %312 %312
+        %314 = OpFMul %v4float %313 %265
+        %315 = OpFSub %v4float %310 %314
+        %316 = OpFMul %v4float %315 %119
+        %317 = OpLoad %v4float %295
+        %318 = OpLoad %v4float %293
+        %319 = OpFMul %v4float %54 %317
+        %320 = OpFAdd %v4float %319 %318
+        %321 = OpFSub %v4float %318 %317
+        %322 = OpDot %float %321 %281
+        %323 = OpCompositeConstruct %v4float %322 %322 %322 %322
+        %324 = OpFMul %v4float %323 %281
+        %325 = OpFSub %v4float %320 %324
+        %326 = OpFMul %v4float %325 %119
+        %327 = OpFAdd %v4float %316 %326
+        %328 = OpFMul %v4float %327 %120
+        %329 = OpCompositeConstruct %_arr_v4float_uint_3 %257 %306 %328
+        %330 = OpCompositeConstruct %FPNTessellationHSToDS %255 %329 %247 %float_1 %float_1
+        %331 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %255 0
+        %332 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %331 0
+        %333 = OpCompositeExtract %v4float %332 0
+        %334 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD10_centroid %227
+               OpStore %334 %333
+        %335 = OpCompositeExtract %v4float %332 1
+        %336 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD11_centroid %227
+               OpStore %336 %335
+        %337 = OpCompositeExtract %v4float %332 2
+        %338 = OpAccessChain %_ptr_Output_v4float %out_var_COLOR0 %227
+               OpStore %338 %337
+        %339 = OpCompositeExtract %_arr_v2float_uint_2 %332 3
+        %340 = OpAccessChain %_ptr_Output__arr_v2float_uint_2 %out_var_TEXCOORD0 %227
+               OpStore %340 %339
+        %341 = OpCompositeExtract %v4float %255 1
+        %342 = OpAccessChain %_ptr_Output_v4float %out_var_VS_To_DS_Position %227
+               OpStore %342 %341
+        %343 = OpAccessChain %_ptr_Output__arr_v4float_uint_3 %out_var_PN_POSITION %227
+               OpStore %343 %329
+        %344 = OpAccessChain %_ptr_Output_v3float %out_var_PN_DisplacementScales %227
+               OpStore %344 %247
+        %345 = OpAccessChain %_ptr_Output_float %out_var_PN_TessellationMultiplier %227
+               OpStore %345 %float_1
+        %346 = OpAccessChain %_ptr_Output_float %out_var_PN_WorldDisplacementMultiplier %227
+               OpStore %346 %float_1
+        %347 = OpAccessChain %_ptr_Workgroup_FPNTessellationHSToDS %temp_var_hullMainRetVal %227
+               OpStore %347 %330
+               OpControlBarrier %uint_2 %uint_4 %uint_0
+        %348 = OpIEqual %bool %227 %uint_0
+               OpSelectionMerge %if_merge None
+               OpBranchConditional %348 %349 %if_merge
+        %349 = OpLabel
+        %350 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_0
+        %351 = OpLoad %mat4v4float %350
+        %352 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_7
+        %353 = OpLoad %mat4v4float %352
+        %354 = OpAccessChain %_ptr_Uniform_v3float %View %int_28
+        %355 = OpLoad %v3float %354
+        %356 = OpAccessChain %_ptr_Uniform_float %View %int_78
+        %357 = OpLoad %float %356
+        %358 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_0
+        %359 = OpLoad %v4float %358
+        %360 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_1
+        %361 = OpLoad %v4float %360
+        %362 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_0 %int_1 %int_2
+        %363 = OpLoad %v4float %362
+        %364 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_0
+        %365 = OpLoad %v4float %364
+        %366 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_1
+        %367 = OpLoad %v4float %366
+        %368 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_1 %int_1 %int_2
+        %369 = OpLoad %v4float %368
+        %370 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_0
+        %371 = OpLoad %v4float %370
+        %372 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_1
+        %373 = OpLoad %v4float %372
+        %374 = OpAccessChain %_ptr_Workgroup_v4float %temp_var_hullMainRetVal %uint_2 %int_1 %int_2
+        %375 = OpLoad %v4float %374
+        %376 = OpFAdd %v4float %361 %363
+        %377 = OpFAdd %v4float %376 %367
+        %378 = OpFAdd %v4float %377 %369
+        %379 = OpFAdd %v4float %378 %373
+        %380 = OpFAdd %v4float %379 %375
+        %381 = OpFMul %v4float %380 %122
+        %382 = OpFAdd %v4float %371 %365
+        %383 = OpFAdd %v4float %382 %359
+        %384 = OpFMul %v4float %383 %119
+        %385 = OpFSub %v4float %381 %384
+        %386 = OpFMul %v4float %385 %120
+        %387 = OpFAdd %v4float %381 %386
+        %388 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_1 %int_3
+        %389 = OpLoad %float %388
+        %390 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_2 %int_3
+        %391 = OpLoad %float %390
+        %392 = OpFAdd %float %389 %391
+        %393 = OpFMul %float %float_0_5 %392
+        %394 = OpCompositeInsert %v4float %393 %123 0
+        %395 = OpLoad %float %390
+        %396 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_0 %int_3
+        %397 = OpLoad %float %396
+        %398 = OpFAdd %float %395 %397
+        %399 = OpFMul %float %float_0_5 %398
+        %400 = OpCompositeInsert %v4float %399 %394 1
+        %401 = OpLoad %float %396
+        %402 = OpLoad %float %388
+        %403 = OpFAdd %float %401 %402
+        %404 = OpFMul %float %float_0_5 %403
+        %405 = OpCompositeInsert %v4float %404 %400 2
+        %406 = OpLoad %float %396
+        %407 = OpLoad %float %388
+        %408 = OpFAdd %float %406 %407
+        %409 = OpLoad %float %390
+        %410 = OpFAdd %float %408 %409
+        %411 = OpFMul %float %float_0_333000004 %410
+        %412 = OpCompositeInsert %v4float %411 %405 3
+        %413 = OpVectorShuffle %v3float %359 %359 0 1 2
+        %414 = OpVectorShuffle %v3float %365 %365 0 1 2
+        %415 = OpVectorShuffle %v3float %371 %371 0 1 2
+               OpBranch %416
+        %416 = OpLabel
+               OpLoopMerge %417 %418 None
+               OpBranch %419
+        %419 = OpLabel
+        %420 = OpMatrixTimesVector %v4float %353 %68
+        %421 = OpCompositeExtract %float %359 0
+        %422 = OpCompositeExtract %float %359 1
+        %423 = OpCompositeExtract %float %359 2
+        %424 = OpCompositeConstruct %v4float %421 %422 %423 %float_1
+        %425 = OpMatrixTimesVector %v4float %351 %424
+        %426 = OpVectorShuffle %v3float %425 %425 0 1 2
+        %427 = OpVectorShuffle %v3float %420 %420 0 1 2
+        %428 = OpFSub %v3float %426 %427
+        %429 = OpCompositeExtract %float %425 3
+        %430 = OpCompositeExtract %float %420 3
+        %431 = OpFAdd %float %429 %430
+        %432 = OpCompositeConstruct %v3float %431 %431 %431
+        %433 = OpFOrdLessThan %v3bool %428 %432
+        %434 = OpSelect %v3int %433 %74 %65
+        %435 = OpFAdd %v3float %426 %427
+        %436 = OpFNegate %float %429
+        %437 = OpFSub %float %436 %430
+        %438 = OpCompositeConstruct %v3float %437 %437 %437
+        %439 = OpFOrdGreaterThan %v3bool %435 %438
+        %440 = OpSelect %v3int %439 %74 %65
+        %441 = OpIMul %v3int %75 %440
+        %442 = OpIAdd %v3int %434 %441
+        %443 = OpCompositeExtract %float %365 0
+        %444 = OpCompositeExtract %float %365 1
+        %445 = OpCompositeExtract %float %365 2
+        %446 = OpCompositeConstruct %v4float %443 %444 %445 %float_1
+        %447 = OpMatrixTimesVector %v4float %351 %446
+        %448 = OpVectorShuffle %v3float %447 %447 0 1 2
+        %449 = OpFSub %v3float %448 %427
+        %450 = OpCompositeExtract %float %447 3
+        %451 = OpFAdd %float %450 %430
+        %452 = OpCompositeConstruct %v3float %451 %451 %451
+        %453 = OpFOrdLessThan %v3bool %449 %452
+        %454 = OpSelect %v3int %453 %74 %65
+        %455 = OpFAdd %v3float %448 %427
+        %456 = OpFNegate %float %450
+        %457 = OpFSub %float %456 %430
+        %458 = OpCompositeConstruct %v3float %457 %457 %457
+        %459 = OpFOrdGreaterThan %v3bool %455 %458
+        %460 = OpSelect %v3int %459 %74 %65
+        %461 = OpIMul %v3int %75 %460
+        %462 = OpIAdd %v3int %454 %461
+        %463 = OpBitwiseOr %v3int %442 %462
+        %464 = OpCompositeExtract %float %371 0
+        %465 = OpCompositeExtract %float %371 1
+        %466 = OpCompositeExtract %float %371 2
+        %467 = OpCompositeConstruct %v4float %464 %465 %466 %float_1
+        %468 = OpMatrixTimesVector %v4float %351 %467
+        %469 = OpVectorShuffle %v3float %468 %468 0 1 2
+        %470 = OpFSub %v3float %469 %427
+        %471 = OpCompositeExtract %float %468 3
+        %472 = OpFAdd %float %471 %430
+        %473 = OpCompositeConstruct %v3float %472 %472 %472
+        %474 = OpFOrdLessThan %v3bool %470 %473
+        %475 = OpSelect %v3int %474 %74 %65
+        %476 = OpFAdd %v3float %469 %427
+        %477 = OpFNegate %float %471
+        %478 = OpFSub %float %477 %430
+        %479 = OpCompositeConstruct %v3float %478 %478 %478
+        %480 = OpFOrdGreaterThan %v3bool %476 %479
+        %481 = OpSelect %v3int %480 %74 %65
+        %482 = OpIMul %v3int %75 %481
+        %483 = OpIAdd %v3int %475 %482
+        %484 = OpBitwiseOr %v3int %463 %483
+        %485 = OpINotEqual %v3bool %484 %66
+        %486 = OpAny %bool %485
+               OpSelectionMerge %487 None
+               OpBranchConditional %486 %488 %487
+        %488 = OpLabel
+               OpBranch %417
+        %487 = OpLabel
+        %489 = OpFSub %v3float %413 %414
+        %490 = OpFSub %v3float %414 %415
+        %491 = OpFSub %v3float %415 %413
+        %492 = OpFAdd %v3float %413 %414
+        %493 = OpFMul %v3float %69 %492
+        %494 = OpFSub %v3float %493 %355
+        %495 = OpFAdd %v3float %414 %415
+        %496 = OpFMul %v3float %69 %495
+        %497 = OpFSub %v3float %496 %355
+        %498 = OpFAdd %v3float %415 %413
+        %499 = OpFMul %v3float %69 %498
+        %500 = OpFSub %v3float %499 %355
+        %501 = OpDot %float %490 %490
+        %502 = OpDot %float %497 %497
+        %503 = OpFDiv %float %501 %502
+        %504 = OpExtInst %float %1 Sqrt %503
+        %505 = OpDot %float %491 %491
+        %506 = OpDot %float %500 %500
+        %507 = OpFDiv %float %505 %506
+        %508 = OpExtInst %float %1 Sqrt %507
+        %509 = OpDot %float %489 %489
+        %510 = OpDot %float %494 %494
+        %511 = OpFDiv %float %509 %510
+        %512 = OpExtInst %float %1 Sqrt %511
+        %513 = OpCompositeConstruct %v4float %504 %508 %512 %float_1
+        %514 = OpFAdd %float %504 %508
+        %515 = OpFAdd %float %514 %512
+        %516 = OpFMul %float %float_0_333000004 %515
+        %517 = OpCompositeInsert %v4float %516 %513 3
+        %518 = OpCompositeConstruct %v4float %357 %357 %357 %357
+        %519 = OpFMul %v4float %518 %517
+               OpBranch %417
+        %418 = OpLabel
+               OpBranch %416
+        %417 = OpLabel
+        %520 = OpPhi %v4float %68 %488 %519 %487
+        %521 = OpFMul %v4float %412 %520
+        %522 = OpExtInst %v4float %1 FClamp %521 %59 %61
+        %523 = OpCompositeExtract %float %522 0
+        %524 = OpCompositeExtract %float %522 1
+        %525 = OpCompositeExtract %float %522 2
+        %526 = OpCompositeExtract %float %522 3
+        %527 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_0
+               OpStore %527 %523
+        %528 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_1
+               OpStore %528 %524
+        %529 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_2
+               OpStore %529 %525
+        %530 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %uint_0
+               OpStore %530 %526
+               OpStore %out_var_PN_POSITION9 %387
+               OpBranch %if_merge
+   %if_merge = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc b/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc
new file mode 100644
index 00000000000..98216e79243
--- /dev/null
+++ b/shaders-ue4/asm/tesc/tess-factor-must-be-threadgroup.invalid.asm.tesc
@@ -0,0 +1,352 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 179
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability SampledBuffer
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationControl %MainHull "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_VS_To_DS_Position %gl_InvocationID %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_VS_To_DS_Position %out_var_Flat_DisplacementScales %out_var_Flat_TessellationMultiplier %out_var_Flat_WorldDisplacementMultiplier %gl_TessLevelOuter %gl_TessLevelInner
+               OpExecutionMode %MainHull Triangles
+               OpExecutionMode %MainHull SpacingFractionalOdd
+               OpExecutionMode %MainHull VertexOrderCw
+               OpExecutionMode %MainHull OutputVertices 3
+               OpSource HLSL 600
+               OpName %FFlatTessellationHSToDS "FFlatTessellationHSToDS"
+               OpMemberName %FFlatTessellationHSToDS 0 "PassSpecificData"
+               OpMemberName %FFlatTessellationHSToDS 1 "DisplacementScale"
+               OpMemberName %FFlatTessellationHSToDS 2 "TessellationMultiplier"
+               OpMemberName %FFlatTessellationHSToDS 3 "WorldDisplacementMultiplier"
+               OpName %FBasePassVSToDS "FBasePassVSToDS"
+               OpMemberName %FBasePassVSToDS 0 "FactoryInterpolants"
+               OpMemberName %FBasePassVSToDS 1 "BasePassInterpolants"
+               OpMemberName %FBasePassVSToDS 2 "Position"
+               OpName %FVertexFactoryInterpolantsVSToDS "FVertexFactoryInterpolantsVSToDS"
+               OpMemberName %FVertexFactoryInterpolantsVSToDS 0 "InterpolantsVSToPS"
+               OpName %FVertexFactoryInterpolantsVSToPS "FVertexFactoryInterpolantsVSToPS"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 0 "TangentToWorld0"
+               OpMemberName %FVertexFactoryInterpolantsVSToPS 1 "TangentToWorld2"
+               OpName %FBasePassInterpolantsVSToDS "FBasePassInterpolantsVSToDS"
+               OpName %FSharedBasePassInterpolants "FSharedBasePassInterpolants"
+               OpName %type_Primitive "type.Primitive"
+               OpMemberName %type_Primitive 0 "Primitive_LocalToWorld"
+               OpMemberName %type_Primitive 1 "Primitive_InvNonUniformScaleAndDeterminantSign"
+               OpMemberName %type_Primitive 2 "Primitive_ObjectWorldPositionAndRadius"
+               OpMemberName %type_Primitive 3 "Primitive_WorldToLocal"
+               OpMemberName %type_Primitive 4 "Primitive_PreviousLocalToWorld"
+               OpMemberName %type_Primitive 5 "Primitive_PreviousWorldToLocal"
+               OpMemberName %type_Primitive 6 "Primitive_ActorWorldPosition"
+               OpMemberName %type_Primitive 7 "Primitive_UseSingleSampleShadowFromStationaryLights"
+               OpMemberName %type_Primitive 8 "Primitive_ObjectBounds"
+               OpMemberName %type_Primitive 9 "Primitive_LpvBiasMultiplier"
+               OpMemberName %type_Primitive 10 "Primitive_DecalReceiverMask"
+               OpMemberName %type_Primitive 11 "Primitive_PerObjectGBufferData"
+               OpMemberName %type_Primitive 12 "Primitive_UseVolumetricLightmapShadowFromStationaryLights"
+               OpMemberName %type_Primitive 13 "Primitive_DrawsVelocity"
+               OpMemberName %type_Primitive 14 "Primitive_ObjectOrientation"
+               OpMemberName %type_Primitive 15 "Primitive_NonUniformScale"
+               OpMemberName %type_Primitive 16 "Primitive_LocalObjectBoundsMin"
+               OpMemberName %type_Primitive 17 "Primitive_LightingChannelMask"
+               OpMemberName %type_Primitive 18 "Primitive_LocalObjectBoundsMax"
+               OpMemberName %type_Primitive 19 "Primitive_LightmapDataIndex"
+               OpMemberName %type_Primitive 20 "Primitive_PreSkinnedLocalBounds"
+               OpMemberName %type_Primitive 21 "Primitive_SingleCaptureIndex"
+               OpMemberName %type_Primitive 22 "Primitive_OutputVelocity"
+               OpMemberName %type_Primitive 23 "PrePadding_Primitive_420"
+               OpMemberName %type_Primitive 24 "PrePadding_Primitive_424"
+               OpMemberName %type_Primitive 25 "PrePadding_Primitive_428"
+               OpMemberName %type_Primitive 26 "Primitive_CustomPrimitiveData"
+               OpName %Primitive "Primitive"
+               OpName %type_Material "type.Material"
+               OpMemberName %type_Material 0 "Material_VectorExpressions"
+               OpMemberName %type_Material 1 "Material_ScalarExpressions"
+               OpName %Material "Material"
+               OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid"
+               OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid"
+               OpName %in_var_VS_To_DS_Position "in.var.VS_To_DS_Position"
+               OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid"
+               OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid"
+               OpName %out_var_VS_To_DS_Position "out.var.VS_To_DS_Position"
+               OpName %out_var_Flat_DisplacementScales "out.var.Flat_DisplacementScales"
+               OpName %out_var_Flat_TessellationMultiplier "out.var.Flat_TessellationMultiplier"
+               OpName %out_var_Flat_WorldDisplacementMultiplier "out.var.Flat_WorldDisplacementMultiplier"
+               OpName %MainHull "MainHull"
+               OpName %param_var_I "param.var.I"
+               OpName %temp_var_hullMainRetVal "temp.var.hullMainRetVal"
+               OpName %if_merge "if.merge"
+               OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %in_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position"
+               OpDecorate %gl_InvocationID BuiltIn InvocationId
+               OpDecorateString %gl_InvocationID UserSemantic "SV_OutputControlPointID"
+               OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %out_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position"
+               OpDecorateString %out_var_Flat_DisplacementScales UserSemantic "Flat_DisplacementScales"
+               OpDecorateString %out_var_Flat_TessellationMultiplier UserSemantic "Flat_TessellationMultiplier"
+               OpDecorateString %out_var_Flat_WorldDisplacementMultiplier UserSemantic "Flat_WorldDisplacementMultiplier"
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+               OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor"
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor"
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorate %in_var_TEXCOORD10_centroid Location 0
+               OpDecorate %in_var_TEXCOORD11_centroid Location 1
+               OpDecorate %in_var_VS_To_DS_Position Location 2
+               OpDecorate %out_var_Flat_DisplacementScales Location 0
+               OpDecorate %out_var_Flat_TessellationMultiplier Location 1
+               OpDecorate %out_var_Flat_WorldDisplacementMultiplier Location 2
+               OpDecorate %out_var_TEXCOORD10_centroid Location 3
+               OpDecorate %out_var_TEXCOORD11_centroid Location 4
+               OpDecorate %out_var_VS_To_DS_Position Location 5
+               OpDecorate %Primitive DescriptorSet 0
+               OpDecorate %Primitive Binding 0
+               OpDecorate %Material DescriptorSet 0
+               OpDecorate %Material Binding 1
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_Primitive 0 Offset 0
+               OpMemberDecorate %type_Primitive 0 MatrixStride 16
+               OpMemberDecorate %type_Primitive 0 ColMajor
+               OpMemberDecorate %type_Primitive 1 Offset 64
+               OpMemberDecorate %type_Primitive 2 Offset 80
+               OpMemberDecorate %type_Primitive 3 Offset 96
+               OpMemberDecorate %type_Primitive 3 MatrixStride 16
+               OpMemberDecorate %type_Primitive 3 ColMajor
+               OpMemberDecorate %type_Primitive 4 Offset 160
+               OpMemberDecorate %type_Primitive 4 MatrixStride 16
+               OpMemberDecorate %type_Primitive 4 ColMajor
+               OpMemberDecorate %type_Primitive 5 Offset 224
+               OpMemberDecorate %type_Primitive 5 MatrixStride 16
+               OpMemberDecorate %type_Primitive 5 ColMajor
+               OpMemberDecorate %type_Primitive 6 Offset 288
+               OpMemberDecorate %type_Primitive 7 Offset 300
+               OpMemberDecorate %type_Primitive 8 Offset 304
+               OpMemberDecorate %type_Primitive 9 Offset 316
+               OpMemberDecorate %type_Primitive 10 Offset 320
+               OpMemberDecorate %type_Primitive 11 Offset 324
+               OpMemberDecorate %type_Primitive 12 Offset 328
+               OpMemberDecorate %type_Primitive 13 Offset 332
+               OpMemberDecorate %type_Primitive 14 Offset 336
+               OpMemberDecorate %type_Primitive 15 Offset 352
+               OpMemberDecorate %type_Primitive 16 Offset 368
+               OpMemberDecorate %type_Primitive 17 Offset 380
+               OpMemberDecorate %type_Primitive 18 Offset 384
+               OpMemberDecorate %type_Primitive 19 Offset 396
+               OpMemberDecorate %type_Primitive 20 Offset 400
+               OpMemberDecorate %type_Primitive 21 Offset 412
+               OpMemberDecorate %type_Primitive 22 Offset 416
+               OpMemberDecorate %type_Primitive 23 Offset 420
+               OpMemberDecorate %type_Primitive 24 Offset 424
+               OpMemberDecorate %type_Primitive 25 Offset 428
+               OpMemberDecorate %type_Primitive 26 Offset 432
+               OpDecorate %type_Primitive Block
+               OpDecorate %_arr_v4float_uint_3 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_1 ArrayStride 16
+               OpMemberDecorate %type_Material 0 Offset 0
+               OpMemberDecorate %type_Material 1 Offset 48
+               OpDecorate %type_Material Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_4 = OpConstant %uint 4
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+     %uint_3 = OpConstant %uint 3
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+      %int_0 = OpConstant %int 0
+      %int_2 = OpConstant %int 2
+  %float_0_5 = OpConstant %float 0.5
+      %int_1 = OpConstant %int 1
+%float_0_333000004 = OpConstant %float 0.333000004
+    %float_1 = OpConstant %float 1
+         %49 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+   %float_15 = OpConstant %float 15
+         %51 = OpConstantComposite %v4float %float_15 %float_15 %float_15 %float_15
+%FVertexFactoryInterpolantsVSToPS = OpTypeStruct %v4float %v4float
+%FVertexFactoryInterpolantsVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToPS
+%FSharedBasePassInterpolants = OpTypeStruct
+%FBasePassInterpolantsVSToDS = OpTypeStruct %FSharedBasePassInterpolants
+%FBasePassVSToDS = OpTypeStruct %FVertexFactoryInterpolantsVSToDS %FBasePassInterpolantsVSToDS %v4float
+%FFlatTessellationHSToDS = OpTypeStruct %FBasePassVSToDS %v3float %float %float
+     %int_15 = OpConstant %int 15
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+%type_Primitive = OpTypeStruct %mat4v4float %v4float %v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %float %float %float %float %v4float %v4float %v3float %uint %v3float %uint %v3float %int %uint %uint %uint %uint %_arr_v4float_uint_4
+%_ptr_Uniform_type_Primitive = OpTypePointer Uniform %type_Primitive
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1
+%type_Material = OpTypeStruct %_arr_v4float_uint_3 %_arr_v4float_uint_1
+%_ptr_Uniform_type_Material = OpTypePointer Uniform %type_Material
+%_arr_v4float_uint_3_0 = OpTypeArray %v4float %uint_3
+%_ptr_Input__arr_v4float_uint_3_0 = OpTypePointer Input %_arr_v4float_uint_3_0
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Output__arr_v4float_uint_3_0 = OpTypePointer Output %_arr_v4float_uint_3_0
+%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3
+%_ptr_Output__arr_v3float_uint_3 = OpTypePointer Output %_arr_v3float_uint_3
+%_ptr_Output__arr_float_uint_3 = OpTypePointer Output %_arr_float_uint_3
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Output__arr_float_uint_2 = OpTypePointer Output %_arr_float_uint_2
+       %void = OpTypeVoid
+         %67 = OpTypeFunction %void
+%_arr_FBasePassVSToDS_uint_3 = OpTypeArray %FBasePassVSToDS %uint_3
+%_ptr_Function__arr_FBasePassVSToDS_uint_3 = OpTypePointer Function %_arr_FBasePassVSToDS_uint_3
+%_arr_FFlatTessellationHSToDS_uint_3 = OpTypeArray %FFlatTessellationHSToDS %uint_3
+%_ptr_Function__arr_FFlatTessellationHSToDS_uint_3 = OpTypePointer Function %_arr_FFlatTessellationHSToDS_uint_3
+%_ptr_Workgroup__arr_FFlatTessellationHSToDS_uint_3 = OpTypePointer Workgroup %_arr_FFlatTessellationHSToDS_uint_3
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+%_ptr_Output_float = OpTypePointer Output %float
+%_ptr_Function_FFlatTessellationHSToDS = OpTypePointer Function %FFlatTessellationHSToDS
+%_ptr_Workgroup_FFlatTessellationHSToDS = OpTypePointer Workgroup %FFlatTessellationHSToDS
+       %bool = OpTypeBool
+%_ptr_Function_float = OpTypePointer Function %float
+%_ptr_Workgroup_float = OpTypePointer Workgroup %float
+%mat3v3float = OpTypeMatrix %v3float 3
+%_ptr_Function_FVertexFactoryInterpolantsVSToDS = OpTypePointer Function %FVertexFactoryInterpolantsVSToDS
+%_ptr_Function_FBasePassVSToDS = OpTypePointer Function %FBasePassVSToDS
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+  %Primitive = OpVariable %_ptr_Uniform_type_Primitive Uniform
+   %Material = OpVariable %_ptr_Uniform_type_Material Uniform
+%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3_0 Input
+%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3_0 Input
+%in_var_VS_To_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_3_0 Input
+%gl_InvocationID = OpVariable %_ptr_Input_uint Input
+%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3_0 Output
+%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output__arr_v4float_uint_3_0 Output
+%out_var_VS_To_DS_Position = OpVariable %_ptr_Output__arr_v4float_uint_3_0 Output
+%out_var_Flat_DisplacementScales = OpVariable %_ptr_Output__arr_v3float_uint_3 Output
+%out_var_Flat_TessellationMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output
+%out_var_Flat_WorldDisplacementMultiplier = OpVariable %_ptr_Output__arr_float_uint_3 Output
+%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output
+%gl_TessLevelInner = OpVariable %_ptr_Output__arr_float_uint_2 Output
+         %83 = OpConstantNull %FSharedBasePassInterpolants
+         %84 = OpConstantComposite %FBasePassInterpolantsVSToDS %83
+         %85 = OpUndef %v4float
+
+; XXX: Original asm used Function here, which is wrong.
+; This patches the SPIR-V to be correct.
+%temp_var_hullMainRetVal = OpVariable %_ptr_Workgroup__arr_FFlatTessellationHSToDS_uint_3 Workgroup
+
+   %MainHull = OpFunction %void None %67
+         %86 = OpLabel
+%param_var_I = OpVariable %_ptr_Function__arr_FBasePassVSToDS_uint_3 Function
+         %87 = OpLoad %_arr_v4float_uint_3_0 %in_var_TEXCOORD10_centroid
+         %88 = OpLoad %_arr_v4float_uint_3_0 %in_var_TEXCOORD11_centroid
+         %89 = OpCompositeExtract %v4float %87 0
+         %90 = OpCompositeExtract %v4float %88 0
+         %91 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %89 %90
+         %92 = OpCompositeExtract %v4float %87 1
+         %93 = OpCompositeExtract %v4float %88 1
+         %94 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %92 %93
+         %95 = OpCompositeExtract %v4float %87 2
+         %96 = OpCompositeExtract %v4float %88 2
+         %97 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToPS %95 %96
+         %98 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %91
+         %99 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %94
+        %100 = OpCompositeConstruct %FVertexFactoryInterpolantsVSToDS %97
+        %101 = OpLoad %_arr_v4float_uint_3_0 %in_var_VS_To_DS_Position
+        %102 = OpCompositeExtract %v4float %101 0
+        %103 = OpCompositeConstruct %FBasePassVSToDS %98 %84 %102
+        %104 = OpCompositeExtract %v4float %101 1
+        %105 = OpCompositeConstruct %FBasePassVSToDS %99 %84 %104
+        %106 = OpCompositeExtract %v4float %101 2
+        %107 = OpCompositeConstruct %FBasePassVSToDS %100 %84 %106
+        %108 = OpCompositeConstruct %_arr_FBasePassVSToDS_uint_3 %103 %105 %107
+               OpStore %param_var_I %108
+        %109 = OpLoad %uint %gl_InvocationID
+        %110 = OpAccessChain %_ptr_Function_FVertexFactoryInterpolantsVSToDS %param_var_I %109 %int_0
+        %111 = OpLoad %FVertexFactoryInterpolantsVSToDS %110
+        %112 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %111 0
+        %113 = OpCompositeExtract %v4float %112 0
+        %114 = OpCompositeExtract %v4float %112 1
+        %115 = OpVectorShuffle %v3float %113 %113 0 1 2
+        %116 = OpVectorShuffle %v3float %114 %114 0 1 2
+        %117 = OpExtInst %v3float %1 Cross %116 %115
+        %118 = OpCompositeExtract %float %114 3
+        %119 = OpCompositeConstruct %v3float %118 %118 %118
+        %120 = OpFMul %v3float %117 %119
+        %121 = OpCompositeConstruct %mat3v3float %115 %120 %116
+        %122 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_15
+        %123 = OpLoad %v4float %122
+        %124 = OpVectorShuffle %v3float %123 %123 0 1 2
+        %125 = OpVectorTimesMatrix %v3float %124 %121
+        %126 = OpAccessChain %_ptr_Function_FBasePassVSToDS %param_var_I %109
+        %127 = OpLoad %FBasePassVSToDS %126
+        %128 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_0
+        %129 = OpLoad %float %128
+        %130 = OpCompositeConstruct %FFlatTessellationHSToDS %127 %125 %129 %float_1
+        %131 = OpCompositeExtract %FVertexFactoryInterpolantsVSToDS %127 0
+        %132 = OpCompositeExtract %FVertexFactoryInterpolantsVSToPS %131 0
+        %133 = OpCompositeExtract %v4float %132 0
+        %134 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD10_centroid %109
+               OpStore %134 %133
+        %135 = OpCompositeExtract %v4float %132 1
+        %136 = OpAccessChain %_ptr_Output_v4float %out_var_TEXCOORD11_centroid %109
+               OpStore %136 %135
+        %137 = OpCompositeExtract %v4float %127 2
+        %138 = OpAccessChain %_ptr_Output_v4float %out_var_VS_To_DS_Position %109
+               OpStore %138 %137
+        %139 = OpAccessChain %_ptr_Output_v3float %out_var_Flat_DisplacementScales %109
+               OpStore %139 %125
+        %140 = OpAccessChain %_ptr_Output_float %out_var_Flat_TessellationMultiplier %109
+               OpStore %140 %129
+        %141 = OpAccessChain %_ptr_Output_float %out_var_Flat_WorldDisplacementMultiplier %109
+               OpStore %141 %float_1
+        %142 = OpAccessChain %_ptr_Workgroup_FFlatTessellationHSToDS %temp_var_hullMainRetVal %109
+               OpStore %142 %130
+               OpControlBarrier %uint_2 %uint_4 %uint_0
+        %143 = OpIEqual %bool %109 %uint_0
+               OpSelectionMerge %if_merge None
+               OpBranchConditional %143 %144 %if_merge
+        %144 = OpLabel
+        %145 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_1 %int_2
+        %146 = OpLoad %float %145
+        %147 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_2 %int_2
+        %148 = OpLoad %float %147
+        %149 = OpFAdd %float %146 %148
+        %150 = OpFMul %float %float_0_5 %149
+        %151 = OpCompositeInsert %v4float %150 %85 0
+        %152 = OpLoad %float %147
+        %153 = OpAccessChain %_ptr_Workgroup_float %temp_var_hullMainRetVal %uint_0 %int_2
+        %154 = OpLoad %float %153
+        %155 = OpFAdd %float %152 %154
+        %156 = OpFMul %float %float_0_5 %155
+        %157 = OpCompositeInsert %v4float %156 %151 1
+        %158 = OpLoad %float %153
+        %159 = OpLoad %float %145
+        %160 = OpFAdd %float %158 %159
+        %161 = OpFMul %float %float_0_5 %160
+        %162 = OpCompositeInsert %v4float %161 %157 2
+        %163 = OpLoad %float %153
+        %164 = OpLoad %float %145
+        %165 = OpFAdd %float %163 %164
+        %166 = OpLoad %float %147
+        %167 = OpFAdd %float %165 %166
+        %168 = OpFMul %float %float_0_333000004 %167
+        %169 = OpCompositeInsert %v4float %168 %162 3
+        %170 = OpExtInst %v4float %1 FClamp %169 %49 %51
+        %171 = OpCompositeExtract %float %170 0
+        %172 = OpCompositeExtract %float %170 1
+        %173 = OpCompositeExtract %float %170 2
+        %174 = OpCompositeExtract %float %170 3
+        %175 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_0
+               OpStore %175 %171
+        %176 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_1
+               OpStore %176 %172
+        %177 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_2
+               OpStore %177 %173
+        %178 = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %uint_0
+               OpStore %178 %174
+               OpBranch %if_merge
+   %if_merge = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese b/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese
new file mode 100644
index 00000000000..dc543d1f861
--- /dev/null
+++ b/shaders-ue4/asm/tese/ds-double-gl-in-deref.asm.tese
@@ -0,0 +1,1046 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 310
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability SampledBuffer
+               OpCapability StorageImageExtendedFormats
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationEvaluation %MainDomain "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_COLOR0 %in_var_TEXCOORD0 %in_var_PRIMITIVE_ID %in_var_VS_to_DS_Position %in_var_PN_POSITION %in_var_PN_DisplacementScales %in_var_PN_TessellationMultiplier %in_var_PN_WorldDisplacementMultiplier %gl_TessLevelOuter %gl_TessLevelInner %in_var_PN_POSITION9 %gl_TessCoord %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_COLOR0 %out_var_TEXCOORD0 %out_var_PRIMITIVE_ID %out_var_TEXCOORD6 %out_var_TEXCOORD8 %out_var_TEXCOORD7 %gl_Position
+               OpExecutionMode %MainDomain Triangles
+               OpSource HLSL 600
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_ClipToWorld"
+               OpMemberName %type_View 3 "View_TranslatedWorldToView"
+               OpMemberName %type_View 4 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 5 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 7 "View_ViewToClip"
+               OpMemberName %type_View 8 "View_ViewToClipNoAA"
+               OpMemberName %type_View 9 "View_ClipToView"
+               OpMemberName %type_View 10 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 12 "View_ScreenToWorld"
+               OpMemberName %type_View 13 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 14 "View_ViewForward"
+               OpMemberName %type_View 15 "PrePadding_View_908"
+               OpMemberName %type_View 16 "View_ViewUp"
+               OpMemberName %type_View 17 "PrePadding_View_924"
+               OpMemberName %type_View 18 "View_ViewRight"
+               OpMemberName %type_View 19 "PrePadding_View_940"
+               OpMemberName %type_View 20 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 21 "PrePadding_View_956"
+               OpMemberName %type_View 22 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 23 "PrePadding_View_972"
+               OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 25 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 26 "View_WorldCameraOrigin"
+               OpMemberName %type_View 27 "PrePadding_View_1020"
+               OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 29 "PrePadding_View_1036"
+               OpMemberName %type_View 30 "View_WorldViewOrigin"
+               OpMemberName %type_View 31 "PrePadding_View_1052"
+               OpMemberName %type_View 32 "View_PreViewTranslation"
+               OpMemberName %type_View 33 "PrePadding_View_1068"
+               OpMemberName %type_View 34 "View_PrevProjection"
+               OpMemberName %type_View 35 "View_PrevViewProj"
+               OpMemberName %type_View 36 "View_PrevViewRotationProj"
+               OpMemberName %type_View 37 "View_PrevViewToClip"
+               OpMemberName %type_View 38 "View_PrevClipToView"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 40 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 44 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 45 "PrePadding_View_1724"
+               OpMemberName %type_View 46 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 47 "PrePadding_View_1740"
+               OpMemberName %type_View 48 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 49 "PrePadding_View_1756"
+               OpMemberName %type_View 50 "View_PrevInvViewProj"
+               OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 52 "View_ClipToPrevClip"
+               OpMemberName %type_View 53 "View_TemporalAAJitter"
+               OpMemberName %type_View 54 "View_GlobalClippingPlane"
+               OpMemberName %type_View 55 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 57 "View_ViewRectMin"
+               OpMemberName %type_View 58 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 60 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 61 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 62 "View_PreExposure"
+               OpMemberName %type_View 63 "View_OneOverPreExposure"
+               OpMemberName %type_View 64 "PrePadding_View_2076"
+               OpMemberName %type_View 65 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 66 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 67 "View_NormalOverrideParameter"
+               OpMemberName %type_View 68 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 69 "View_PrevFrameGameTime"
+               OpMemberName %type_View 70 "View_PrevFrameRealTime"
+               OpMemberName %type_View 71 "View_OutOfBoundsMask"
+               OpMemberName %type_View 72 "PrePadding_View_2148"
+               OpMemberName %type_View 73 "PrePadding_View_2152"
+               OpMemberName %type_View 74 "PrePadding_View_2156"
+               OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 76 "View_CullingSign"
+               OpMemberName %type_View 77 "View_NearPlane"
+               OpMemberName %type_View 78 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 79 "View_GameTime"
+               OpMemberName %type_View 80 "View_RealTime"
+               OpMemberName %type_View 81 "View_DeltaTime"
+               OpMemberName %type_View 82 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 84 "View_Random"
+               OpMemberName %type_View 85 "View_FrameNumber"
+               OpMemberName %type_View 86 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 87 "View_StateFrameIndex"
+               OpMemberName %type_View 88 "View_CameraCut"
+               OpMemberName %type_View 89 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 90 "PrePadding_View_2228"
+               OpMemberName %type_View 91 "PrePadding_View_2232"
+               OpMemberName %type_View 92 "PrePadding_View_2236"
+               OpMemberName %type_View 93 "View_DirectionalLightColor"
+               OpMemberName %type_View 94 "View_DirectionalLightDirection"
+               OpMemberName %type_View 95 "PrePadding_View_2268"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 98 "View_TemporalAAParams"
+               OpMemberName %type_View 99 "View_CircleDOFParams"
+               OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 102 "View_DepthOfFieldScale"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 109 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 110 "View_DemosaicVposOffset"
+               OpMemberName %type_View 111 "PrePadding_View_2412"
+               OpMemberName %type_View 112 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 113 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 115 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogPower"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 123 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian"
+               OpMemberName %type_View 127 "PrePadding_View_2492"
+               OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance"
+               OpMemberName %type_View 129 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 131 "PrePadding_View_2520"
+               OpMemberName %type_View 132 "PrePadding_View_2524"
+               OpMemberName %type_View 133 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 136 "View_AmbientCubemapTint"
+               OpMemberName %type_View 137 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 138 "View_SkyLightParameters"
+               OpMemberName %type_View 139 "PrePadding_View_2584"
+               OpMemberName %type_View 140 "PrePadding_View_2588"
+               OpMemberName %type_View 141 "View_SkyLightColor"
+               OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 143 "View_MobilePreviewMode"
+               OpMemberName %type_View 144 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 146 "View_ShowDecalsMask"
+               OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 149 "PrePadding_View_2744"
+               OpMemberName %type_View 150 "PrePadding_View_2748"
+               OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 152 "View_StereoPassIndex"
+               OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 155 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 156 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 157 "View_MaxGlobalDistance"
+               OpMemberName %type_View 158 "PrePadding_View_2908"
+               OpMemberName %type_View 159 "View_CursorPosition"
+               OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 161 "PrePadding_View_2924"
+               OpMemberName %type_View 162 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 163 "PrePadding_View_2940"
+               OpMemberName %type_View 164 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 165 "PrePadding_View_2956"
+               OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 167 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 168 "PrePadding_View_2972"
+               OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 170 "PrePadding_View_2988"
+               OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 172 "PrePadding_View_3004"
+               OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 176 "View_StereoIPD"
+               OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle"
+               OpName %View "View"
+               OpName %type_sampler "type.sampler"
+               OpName %type_2d_image "type.2d.image"
+               OpName %type_ShadowDepthPass "type.ShadowDepthPass"
+               OpMemberName %type_ShadowDepthPass 0 "PrePadding_ShadowDepthPass_LPV_0"
+               OpMemberName %type_ShadowDepthPass 1 "PrePadding_ShadowDepthPass_LPV_4"
+               OpMemberName %type_ShadowDepthPass 2 "PrePadding_ShadowDepthPass_LPV_8"
+               OpMemberName %type_ShadowDepthPass 3 "PrePadding_ShadowDepthPass_LPV_12"
+               OpMemberName %type_ShadowDepthPass 4 "PrePadding_ShadowDepthPass_LPV_16"
+               OpMemberName %type_ShadowDepthPass 5 "PrePadding_ShadowDepthPass_LPV_20"
+               OpMemberName %type_ShadowDepthPass 6 "PrePadding_ShadowDepthPass_LPV_24"
+               OpMemberName %type_ShadowDepthPass 7 "PrePadding_ShadowDepthPass_LPV_28"
+               OpMemberName %type_ShadowDepthPass 8 "PrePadding_ShadowDepthPass_LPV_32"
+               OpMemberName %type_ShadowDepthPass 9 "PrePadding_ShadowDepthPass_LPV_36"
+               OpMemberName %type_ShadowDepthPass 10 "PrePadding_ShadowDepthPass_LPV_40"
+               OpMemberName %type_ShadowDepthPass 11 "PrePadding_ShadowDepthPass_LPV_44"
+               OpMemberName %type_ShadowDepthPass 12 "PrePadding_ShadowDepthPass_LPV_48"
+               OpMemberName %type_ShadowDepthPass 13 "PrePadding_ShadowDepthPass_LPV_52"
+               OpMemberName %type_ShadowDepthPass 14 "PrePadding_ShadowDepthPass_LPV_56"
+               OpMemberName %type_ShadowDepthPass 15 "PrePadding_ShadowDepthPass_LPV_60"
+               OpMemberName %type_ShadowDepthPass 16 "PrePadding_ShadowDepthPass_LPV_64"
+               OpMemberName %type_ShadowDepthPass 17 "PrePadding_ShadowDepthPass_LPV_68"
+               OpMemberName %type_ShadowDepthPass 18 "PrePadding_ShadowDepthPass_LPV_72"
+               OpMemberName %type_ShadowDepthPass 19 "PrePadding_ShadowDepthPass_LPV_76"
+               OpMemberName %type_ShadowDepthPass 20 "PrePadding_ShadowDepthPass_LPV_80"
+               OpMemberName %type_ShadowDepthPass 21 "PrePadding_ShadowDepthPass_LPV_84"
+               OpMemberName %type_ShadowDepthPass 22 "PrePadding_ShadowDepthPass_LPV_88"
+               OpMemberName %type_ShadowDepthPass 23 "PrePadding_ShadowDepthPass_LPV_92"
+               OpMemberName %type_ShadowDepthPass 24 "PrePadding_ShadowDepthPass_LPV_96"
+               OpMemberName %type_ShadowDepthPass 25 "PrePadding_ShadowDepthPass_LPV_100"
+               OpMemberName %type_ShadowDepthPass 26 "PrePadding_ShadowDepthPass_LPV_104"
+               OpMemberName %type_ShadowDepthPass 27 "PrePadding_ShadowDepthPass_LPV_108"
+               OpMemberName %type_ShadowDepthPass 28 "PrePadding_ShadowDepthPass_LPV_112"
+               OpMemberName %type_ShadowDepthPass 29 "PrePadding_ShadowDepthPass_LPV_116"
+               OpMemberName %type_ShadowDepthPass 30 "PrePadding_ShadowDepthPass_LPV_120"
+               OpMemberName %type_ShadowDepthPass 31 "PrePadding_ShadowDepthPass_LPV_124"
+               OpMemberName %type_ShadowDepthPass 32 "PrePadding_ShadowDepthPass_LPV_128"
+               OpMemberName %type_ShadowDepthPass 33 "PrePadding_ShadowDepthPass_LPV_132"
+               OpMemberName %type_ShadowDepthPass 34 "PrePadding_ShadowDepthPass_LPV_136"
+               OpMemberName %type_ShadowDepthPass 35 "PrePadding_ShadowDepthPass_LPV_140"
+               OpMemberName %type_ShadowDepthPass 36 "PrePadding_ShadowDepthPass_LPV_144"
+               OpMemberName %type_ShadowDepthPass 37 "PrePadding_ShadowDepthPass_LPV_148"
+               OpMemberName %type_ShadowDepthPass 38 "PrePadding_ShadowDepthPass_LPV_152"
+               OpMemberName %type_ShadowDepthPass 39 "PrePadding_ShadowDepthPass_LPV_156"
+               OpMemberName %type_ShadowDepthPass 40 "PrePadding_ShadowDepthPass_LPV_160"
+               OpMemberName %type_ShadowDepthPass 41 "PrePadding_ShadowDepthPass_LPV_164"
+               OpMemberName %type_ShadowDepthPass 42 "PrePadding_ShadowDepthPass_LPV_168"
+               OpMemberName %type_ShadowDepthPass 43 "PrePadding_ShadowDepthPass_LPV_172"
+               OpMemberName %type_ShadowDepthPass 44 "PrePadding_ShadowDepthPass_LPV_176"
+               OpMemberName %type_ShadowDepthPass 45 "PrePadding_ShadowDepthPass_LPV_180"
+               OpMemberName %type_ShadowDepthPass 46 "PrePadding_ShadowDepthPass_LPV_184"
+               OpMemberName %type_ShadowDepthPass 47 "PrePadding_ShadowDepthPass_LPV_188"
+               OpMemberName %type_ShadowDepthPass 48 "PrePadding_ShadowDepthPass_LPV_192"
+               OpMemberName %type_ShadowDepthPass 49 "PrePadding_ShadowDepthPass_LPV_196"
+               OpMemberName %type_ShadowDepthPass 50 "PrePadding_ShadowDepthPass_LPV_200"
+               OpMemberName %type_ShadowDepthPass 51 "PrePadding_ShadowDepthPass_LPV_204"
+               OpMemberName %type_ShadowDepthPass 52 "PrePadding_ShadowDepthPass_LPV_208"
+               OpMemberName %type_ShadowDepthPass 53 "PrePadding_ShadowDepthPass_LPV_212"
+               OpMemberName %type_ShadowDepthPass 54 "PrePadding_ShadowDepthPass_LPV_216"
+               OpMemberName %type_ShadowDepthPass 55 "PrePadding_ShadowDepthPass_LPV_220"
+               OpMemberName %type_ShadowDepthPass 56 "PrePadding_ShadowDepthPass_LPV_224"
+               OpMemberName %type_ShadowDepthPass 57 "PrePadding_ShadowDepthPass_LPV_228"
+               OpMemberName %type_ShadowDepthPass 58 "PrePadding_ShadowDepthPass_LPV_232"
+               OpMemberName %type_ShadowDepthPass 59 "PrePadding_ShadowDepthPass_LPV_236"
+               OpMemberName %type_ShadowDepthPass 60 "PrePadding_ShadowDepthPass_LPV_240"
+               OpMemberName %type_ShadowDepthPass 61 "PrePadding_ShadowDepthPass_LPV_244"
+               OpMemberName %type_ShadowDepthPass 62 "PrePadding_ShadowDepthPass_LPV_248"
+               OpMemberName %type_ShadowDepthPass 63 "PrePadding_ShadowDepthPass_LPV_252"
+               OpMemberName %type_ShadowDepthPass 64 "PrePadding_ShadowDepthPass_LPV_256"
+               OpMemberName %type_ShadowDepthPass 65 "PrePadding_ShadowDepthPass_LPV_260"
+               OpMemberName %type_ShadowDepthPass 66 "PrePadding_ShadowDepthPass_LPV_264"
+               OpMemberName %type_ShadowDepthPass 67 "PrePadding_ShadowDepthPass_LPV_268"
+               OpMemberName %type_ShadowDepthPass 68 "ShadowDepthPass_LPV_mRsmToWorld"
+               OpMemberName %type_ShadowDepthPass 69 "ShadowDepthPass_LPV_mLightColour"
+               OpMemberName %type_ShadowDepthPass 70 "ShadowDepthPass_LPV_GeometryVolumeCaptureLightDirection"
+               OpMemberName %type_ShadowDepthPass 71 "ShadowDepthPass_LPV_mEyePos"
+               OpMemberName %type_ShadowDepthPass 72 "ShadowDepthPass_LPV_mOldGridOffset"
+               OpMemberName %type_ShadowDepthPass 73 "PrePadding_ShadowDepthPass_LPV_396"
+               OpMemberName %type_ShadowDepthPass 74 "ShadowDepthPass_LPV_mLpvGridOffset"
+               OpMemberName %type_ShadowDepthPass 75 "ShadowDepthPass_LPV_ClearMultiplier"
+               OpMemberName %type_ShadowDepthPass 76 "ShadowDepthPass_LPV_LpvScale"
+               OpMemberName %type_ShadowDepthPass 77 "ShadowDepthPass_LPV_OneOverLpvScale"
+               OpMemberName %type_ShadowDepthPass 78 "ShadowDepthPass_LPV_DirectionalOcclusionIntensity"
+               OpMemberName %type_ShadowDepthPass 79 "ShadowDepthPass_LPV_DirectionalOcclusionRadius"
+               OpMemberName %type_ShadowDepthPass 80 "ShadowDepthPass_LPV_RsmAreaIntensityMultiplier"
+               OpMemberName %type_ShadowDepthPass 81 "ShadowDepthPass_LPV_RsmPixelToTexcoordMultiplier"
+               OpMemberName %type_ShadowDepthPass 82 "ShadowDepthPass_LPV_SecondaryOcclusionStrength"
+               OpMemberName %type_ShadowDepthPass 83 "ShadowDepthPass_LPV_SecondaryBounceStrength"
+               OpMemberName %type_ShadowDepthPass 84 "ShadowDepthPass_LPV_VplInjectionBias"
+               OpMemberName %type_ShadowDepthPass 85 "ShadowDepthPass_LPV_GeometryVolumeInjectionBias"
+               OpMemberName %type_ShadowDepthPass 86 "ShadowDepthPass_LPV_EmissiveInjectionMultiplier"
+               OpMemberName %type_ShadowDepthPass 87 "ShadowDepthPass_LPV_PropagationIndex"
+               OpMemberName %type_ShadowDepthPass 88 "ShadowDepthPass_ProjectionMatrix"
+               OpMemberName %type_ShadowDepthPass 89 "ShadowDepthPass_ViewMatrix"
+               OpMemberName %type_ShadowDepthPass 90 "ShadowDepthPass_ShadowParams"
+               OpMemberName %type_ShadowDepthPass 91 "ShadowDepthPass_bClampToNearPlane"
+               OpMemberName %type_ShadowDepthPass 92 "PrePadding_ShadowDepthPass_612"
+               OpMemberName %type_ShadowDepthPass 93 "PrePadding_ShadowDepthPass_616"
+               OpMemberName %type_ShadowDepthPass 94 "PrePadding_ShadowDepthPass_620"
+               OpMemberName %type_ShadowDepthPass 95 "ShadowDepthPass_ShadowViewProjectionMatrices"
+               OpMemberName %type_ShadowDepthPass 96 "ShadowDepthPass_ShadowViewMatrices"
+               OpName %ShadowDepthPass "ShadowDepthPass"
+               OpName %Material_Texture2D_3 "Material_Texture2D_3"
+               OpName %Material_Texture2D_3Sampler "Material_Texture2D_3Sampler"
+               OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid"
+               OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid"
+               OpName %in_var_COLOR0 "in.var.COLOR0"
+               OpName %in_var_TEXCOORD0 "in.var.TEXCOORD0"
+               OpName %in_var_PRIMITIVE_ID "in.var.PRIMITIVE_ID"
+               OpName %in_var_VS_to_DS_Position "in.var.VS_to_DS_Position"
+               OpName %in_var_PN_POSITION "in.var.PN_POSITION"
+               OpName %in_var_PN_DisplacementScales "in.var.PN_DisplacementScales"
+               OpName %in_var_PN_TessellationMultiplier "in.var.PN_TessellationMultiplier"
+               OpName %in_var_PN_WorldDisplacementMultiplier "in.var.PN_WorldDisplacementMultiplier"
+               OpName %in_var_PN_POSITION9 "in.var.PN_POSITION9"
+               OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid"
+               OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid"
+               OpName %out_var_COLOR0 "out.var.COLOR0"
+               OpName %out_var_TEXCOORD0 "out.var.TEXCOORD0"
+               OpName %out_var_PRIMITIVE_ID "out.var.PRIMITIVE_ID"
+               OpName %out_var_TEXCOORD6 "out.var.TEXCOORD6"
+               OpName %out_var_TEXCOORD8 "out.var.TEXCOORD8"
+               OpName %out_var_TEXCOORD7 "out.var.TEXCOORD7"
+               OpName %MainDomain "MainDomain"
+               OpName %type_sampled_image "type.sampled.image"
+               OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %in_var_COLOR0 UserSemantic "COLOR0"
+               OpDecorateString %in_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %in_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID"
+               OpDecorateString %in_var_VS_to_DS_Position UserSemantic "VS_to_DS_Position"
+               OpDecorateString %in_var_PN_POSITION UserSemantic "PN_POSITION"
+               OpDecorateString %in_var_PN_DisplacementScales UserSemantic "PN_DisplacementScales"
+               OpDecorateString %in_var_PN_TessellationMultiplier UserSemantic "PN_TessellationMultiplier"
+               OpDecorateString %in_var_PN_WorldDisplacementMultiplier UserSemantic "PN_WorldDisplacementMultiplier"
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+               OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor"
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor"
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorateString %in_var_PN_POSITION9 UserSemantic "PN_POSITION9"
+               OpDecorate %in_var_PN_POSITION9 Patch
+               OpDecorate %gl_TessCoord BuiltIn TessCoord
+               OpDecorateString %gl_TessCoord UserSemantic "SV_DomainLocation"
+               OpDecorate %gl_TessCoord Patch
+               OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %out_var_COLOR0 UserSemantic "COLOR0"
+               OpDecorateString %out_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %out_var_PRIMITIVE_ID UserSemantic "PRIMITIVE_ID"
+               OpDecorateString %out_var_TEXCOORD6 UserSemantic "TEXCOORD6"
+               OpDecorateString %out_var_TEXCOORD8 UserSemantic "TEXCOORD8"
+               OpDecorateString %out_var_TEXCOORD7 UserSemantic "TEXCOORD7"
+               OpDecorate %gl_Position BuiltIn Position
+               OpDecorateString %gl_Position UserSemantic "SV_POSITION"
+               OpDecorate %in_var_COLOR0 Location 0
+               OpDecorate %in_var_PN_DisplacementScales Location 1
+               OpDecorate %in_var_PN_POSITION Location 2
+               OpDecorate %in_var_PN_POSITION9 Location 5
+               OpDecorate %in_var_PN_TessellationMultiplier Location 6
+               OpDecorate %in_var_PN_WorldDisplacementMultiplier Location 7
+               OpDecorate %in_var_PRIMITIVE_ID Location 8
+               OpDecorate %in_var_TEXCOORD0 Location 9
+               OpDecorate %in_var_TEXCOORD10_centroid Location 10
+               OpDecorate %in_var_TEXCOORD11_centroid Location 11
+               OpDecorate %in_var_VS_to_DS_Position Location 12
+               OpDecorate %out_var_TEXCOORD10_centroid Location 0
+               OpDecorate %out_var_TEXCOORD11_centroid Location 1
+               OpDecorate %out_var_COLOR0 Location 2
+               OpDecorate %out_var_TEXCOORD0 Location 3
+               OpDecorate %out_var_PRIMITIVE_ID Location 4
+               OpDecorate %out_var_TEXCOORD6 Location 5
+               OpDecorate %out_var_TEXCOORD8 Location 6
+               OpDecorate %out_var_TEXCOORD7 Location 7
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 0
+               OpDecorate %ShadowDepthPass DescriptorSet 0
+               OpDecorate %ShadowDepthPass Binding 1
+               OpDecorate %Material_Texture2D_3 DescriptorSet 0
+               OpDecorate %Material_Texture2D_3 Binding 0
+               OpDecorate %Material_Texture2D_3Sampler DescriptorSet 0
+               OpDecorate %Material_Texture2D_3Sampler Binding 0
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 13 MatrixStride 16
+               OpMemberDecorate %type_View 13 ColMajor
+               OpMemberDecorate %type_View 14 Offset 896
+               OpMemberDecorate %type_View 15 Offset 908
+               OpMemberDecorate %type_View 16 Offset 912
+               OpMemberDecorate %type_View 17 Offset 924
+               OpMemberDecorate %type_View 18 Offset 928
+               OpMemberDecorate %type_View 19 Offset 940
+               OpMemberDecorate %type_View 20 Offset 944
+               OpMemberDecorate %type_View 21 Offset 956
+               OpMemberDecorate %type_View 22 Offset 960
+               OpMemberDecorate %type_View 23 Offset 972
+               OpMemberDecorate %type_View 24 Offset 976
+               OpMemberDecorate %type_View 25 Offset 992
+               OpMemberDecorate %type_View 26 Offset 1008
+               OpMemberDecorate %type_View 27 Offset 1020
+               OpMemberDecorate %type_View 28 Offset 1024
+               OpMemberDecorate %type_View 29 Offset 1036
+               OpMemberDecorate %type_View 30 Offset 1040
+               OpMemberDecorate %type_View 31 Offset 1052
+               OpMemberDecorate %type_View 32 Offset 1056
+               OpMemberDecorate %type_View 33 Offset 1068
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 43 MatrixStride 16
+               OpMemberDecorate %type_View 43 ColMajor
+               OpMemberDecorate %type_View 44 Offset 1712
+               OpMemberDecorate %type_View 45 Offset 1724
+               OpMemberDecorate %type_View 46 Offset 1728
+               OpMemberDecorate %type_View 47 Offset 1740
+               OpMemberDecorate %type_View 48 Offset 1744
+               OpMemberDecorate %type_View 49 Offset 1756
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 52 MatrixStride 16
+               OpMemberDecorate %type_View 52 ColMajor
+               OpMemberDecorate %type_View 53 Offset 1952
+               OpMemberDecorate %type_View 54 Offset 1968
+               OpMemberDecorate %type_View 55 Offset 1984
+               OpMemberDecorate %type_View 56 Offset 1992
+               OpMemberDecorate %type_View 57 Offset 2000
+               OpMemberDecorate %type_View 58 Offset 2016
+               OpMemberDecorate %type_View 59 Offset 2032
+               OpMemberDecorate %type_View 60 Offset 2048
+               OpMemberDecorate %type_View 61 Offset 2064
+               OpMemberDecorate %type_View 62 Offset 2068
+               OpMemberDecorate %type_View 63 Offset 2072
+               OpMemberDecorate %type_View 64 Offset 2076
+               OpMemberDecorate %type_View 65 Offset 2080
+               OpMemberDecorate %type_View 66 Offset 2096
+               OpMemberDecorate %type_View 67 Offset 2112
+               OpMemberDecorate %type_View 68 Offset 2128
+               OpMemberDecorate %type_View 69 Offset 2136
+               OpMemberDecorate %type_View 70 Offset 2140
+               OpMemberDecorate %type_View 71 Offset 2144
+               OpMemberDecorate %type_View 72 Offset 2148
+               OpMemberDecorate %type_View 73 Offset 2152
+               OpMemberDecorate %type_View 74 Offset 2156
+               OpMemberDecorate %type_View 75 Offset 2160
+               OpMemberDecorate %type_View 76 Offset 2172
+               OpMemberDecorate %type_View 77 Offset 2176
+               OpMemberDecorate %type_View 78 Offset 2180
+               OpMemberDecorate %type_View 79 Offset 2184
+               OpMemberDecorate %type_View 80 Offset 2188
+               OpMemberDecorate %type_View 81 Offset 2192
+               OpMemberDecorate %type_View 82 Offset 2196
+               OpMemberDecorate %type_View 83 Offset 2200
+               OpMemberDecorate %type_View 84 Offset 2204
+               OpMemberDecorate %type_View 85 Offset 2208
+               OpMemberDecorate %type_View 86 Offset 2212
+               OpMemberDecorate %type_View 87 Offset 2216
+               OpMemberDecorate %type_View 88 Offset 2220
+               OpMemberDecorate %type_View 89 Offset 2224
+               OpMemberDecorate %type_View 90 Offset 2228
+               OpMemberDecorate %type_View 91 Offset 2232
+               OpMemberDecorate %type_View 92 Offset 2236
+               OpMemberDecorate %type_View 93 Offset 2240
+               OpMemberDecorate %type_View 94 Offset 2256
+               OpMemberDecorate %type_View 95 Offset 2268
+               OpMemberDecorate %type_View 96 Offset 2272
+               OpMemberDecorate %type_View 97 Offset 2304
+               OpMemberDecorate %type_View 98 Offset 2336
+               OpMemberDecorate %type_View 99 Offset 2352
+               OpMemberDecorate %type_View 100 Offset 2368
+               OpMemberDecorate %type_View 101 Offset 2372
+               OpMemberDecorate %type_View 102 Offset 2376
+               OpMemberDecorate %type_View 103 Offset 2380
+               OpMemberDecorate %type_View 104 Offset 2384
+               OpMemberDecorate %type_View 105 Offset 2388
+               OpMemberDecorate %type_View 106 Offset 2392
+               OpMemberDecorate %type_View 107 Offset 2396
+               OpMemberDecorate %type_View 108 Offset 2400
+               OpMemberDecorate %type_View 109 Offset 2404
+               OpMemberDecorate %type_View 110 Offset 2408
+               OpMemberDecorate %type_View 111 Offset 2412
+               OpMemberDecorate %type_View 112 Offset 2416
+               OpMemberDecorate %type_View 113 Offset 2428
+               OpMemberDecorate %type_View 114 Offset 2432
+               OpMemberDecorate %type_View 115 Offset 2444
+               OpMemberDecorate %type_View 116 Offset 2448
+               OpMemberDecorate %type_View 117 Offset 2452
+               OpMemberDecorate %type_View 118 Offset 2456
+               OpMemberDecorate %type_View 119 Offset 2460
+               OpMemberDecorate %type_View 120 Offset 2464
+               OpMemberDecorate %type_View 121 Offset 2468
+               OpMemberDecorate %type_View 122 Offset 2472
+               OpMemberDecorate %type_View 123 Offset 2476
+               OpMemberDecorate %type_View 124 Offset 2480
+               OpMemberDecorate %type_View 125 Offset 2484
+               OpMemberDecorate %type_View 126 Offset 2488
+               OpMemberDecorate %type_View 127 Offset 2492
+               OpMemberDecorate %type_View 128 Offset 2496
+               OpMemberDecorate %type_View 129 Offset 2512
+               OpMemberDecorate %type_View 130 Offset 2516
+               OpMemberDecorate %type_View 131 Offset 2520
+               OpMemberDecorate %type_View 132 Offset 2524
+               OpMemberDecorate %type_View 133 Offset 2528
+               OpMemberDecorate %type_View 134 Offset 2544
+               OpMemberDecorate %type_View 135 Offset 2556
+               OpMemberDecorate %type_View 136 Offset 2560
+               OpMemberDecorate %type_View 137 Offset 2576
+               OpMemberDecorate %type_View 138 Offset 2580
+               OpMemberDecorate %type_View 139 Offset 2584
+               OpMemberDecorate %type_View 140 Offset 2588
+               OpMemberDecorate %type_View 141 Offset 2592
+               OpMemberDecorate %type_View 142 Offset 2608
+               OpMemberDecorate %type_View 143 Offset 2720
+               OpMemberDecorate %type_View 144 Offset 2724
+               OpMemberDecorate %type_View 145 Offset 2728
+               OpMemberDecorate %type_View 146 Offset 2732
+               OpMemberDecorate %type_View 147 Offset 2736
+               OpMemberDecorate %type_View 148 Offset 2740
+               OpMemberDecorate %type_View 149 Offset 2744
+               OpMemberDecorate %type_View 150 Offset 2748
+               OpMemberDecorate %type_View 151 Offset 2752
+               OpMemberDecorate %type_View 152 Offset 2764
+               OpMemberDecorate %type_View 153 Offset 2768
+               OpMemberDecorate %type_View 154 Offset 2832
+               OpMemberDecorate %type_View 155 Offset 2896
+               OpMemberDecorate %type_View 156 Offset 2900
+               OpMemberDecorate %type_View 157 Offset 2904
+               OpMemberDecorate %type_View 158 Offset 2908
+               OpMemberDecorate %type_View 159 Offset 2912
+               OpMemberDecorate %type_View 160 Offset 2920
+               OpMemberDecorate %type_View 161 Offset 2924
+               OpMemberDecorate %type_View 162 Offset 2928
+               OpMemberDecorate %type_View 163 Offset 2940
+               OpMemberDecorate %type_View 164 Offset 2944
+               OpMemberDecorate %type_View 165 Offset 2956
+               OpMemberDecorate %type_View 166 Offset 2960
+               OpMemberDecorate %type_View 167 Offset 2968
+               OpMemberDecorate %type_View 168 Offset 2972
+               OpMemberDecorate %type_View 169 Offset 2976
+               OpMemberDecorate %type_View 170 Offset 2988
+               OpMemberDecorate %type_View 171 Offset 2992
+               OpMemberDecorate %type_View 172 Offset 3004
+               OpMemberDecorate %type_View 173 Offset 3008
+               OpMemberDecorate %type_View 174 Offset 3020
+               OpMemberDecorate %type_View 175 Offset 3024
+               OpMemberDecorate %type_View 176 Offset 3036
+               OpMemberDecorate %type_View 177 Offset 3040
+               OpMemberDecorate %type_View 178 Offset 3044
+               OpDecorate %type_View Block
+               OpDecorate %_arr_mat4v4float_uint_6 ArrayStride 64
+               OpMemberDecorate %type_ShadowDepthPass 0 Offset 0
+               OpMemberDecorate %type_ShadowDepthPass 1 Offset 4
+               OpMemberDecorate %type_ShadowDepthPass 2 Offset 8
+               OpMemberDecorate %type_ShadowDepthPass 3 Offset 12
+               OpMemberDecorate %type_ShadowDepthPass 4 Offset 16
+               OpMemberDecorate %type_ShadowDepthPass 5 Offset 20
+               OpMemberDecorate %type_ShadowDepthPass 6 Offset 24
+               OpMemberDecorate %type_ShadowDepthPass 7 Offset 28
+               OpMemberDecorate %type_ShadowDepthPass 8 Offset 32
+               OpMemberDecorate %type_ShadowDepthPass 9 Offset 36
+               OpMemberDecorate %type_ShadowDepthPass 10 Offset 40
+               OpMemberDecorate %type_ShadowDepthPass 11 Offset 44
+               OpMemberDecorate %type_ShadowDepthPass 12 Offset 48
+               OpMemberDecorate %type_ShadowDepthPass 13 Offset 52
+               OpMemberDecorate %type_ShadowDepthPass 14 Offset 56
+               OpMemberDecorate %type_ShadowDepthPass 15 Offset 60
+               OpMemberDecorate %type_ShadowDepthPass 16 Offset 64
+               OpMemberDecorate %type_ShadowDepthPass 17 Offset 68
+               OpMemberDecorate %type_ShadowDepthPass 18 Offset 72
+               OpMemberDecorate %type_ShadowDepthPass 19 Offset 76
+               OpMemberDecorate %type_ShadowDepthPass 20 Offset 80
+               OpMemberDecorate %type_ShadowDepthPass 21 Offset 84
+               OpMemberDecorate %type_ShadowDepthPass 22 Offset 88
+               OpMemberDecorate %type_ShadowDepthPass 23 Offset 92
+               OpMemberDecorate %type_ShadowDepthPass 24 Offset 96
+               OpMemberDecorate %type_ShadowDepthPass 25 Offset 100
+               OpMemberDecorate %type_ShadowDepthPass 26 Offset 104
+               OpMemberDecorate %type_ShadowDepthPass 27 Offset 108
+               OpMemberDecorate %type_ShadowDepthPass 28 Offset 112
+               OpMemberDecorate %type_ShadowDepthPass 29 Offset 116
+               OpMemberDecorate %type_ShadowDepthPass 30 Offset 120
+               OpMemberDecorate %type_ShadowDepthPass 31 Offset 124
+               OpMemberDecorate %type_ShadowDepthPass 32 Offset 128
+               OpMemberDecorate %type_ShadowDepthPass 33 Offset 132
+               OpMemberDecorate %type_ShadowDepthPass 34 Offset 136
+               OpMemberDecorate %type_ShadowDepthPass 35 Offset 140
+               OpMemberDecorate %type_ShadowDepthPass 36 Offset 144
+               OpMemberDecorate %type_ShadowDepthPass 37 Offset 148
+               OpMemberDecorate %type_ShadowDepthPass 38 Offset 152
+               OpMemberDecorate %type_ShadowDepthPass 39 Offset 156
+               OpMemberDecorate %type_ShadowDepthPass 40 Offset 160
+               OpMemberDecorate %type_ShadowDepthPass 41 Offset 164
+               OpMemberDecorate %type_ShadowDepthPass 42 Offset 168
+               OpMemberDecorate %type_ShadowDepthPass 43 Offset 172
+               OpMemberDecorate %type_ShadowDepthPass 44 Offset 176
+               OpMemberDecorate %type_ShadowDepthPass 45 Offset 180
+               OpMemberDecorate %type_ShadowDepthPass 46 Offset 184
+               OpMemberDecorate %type_ShadowDepthPass 47 Offset 188
+               OpMemberDecorate %type_ShadowDepthPass 48 Offset 192
+               OpMemberDecorate %type_ShadowDepthPass 49 Offset 196
+               OpMemberDecorate %type_ShadowDepthPass 50 Offset 200
+               OpMemberDecorate %type_ShadowDepthPass 51 Offset 204
+               OpMemberDecorate %type_ShadowDepthPass 52 Offset 208
+               OpMemberDecorate %type_ShadowDepthPass 53 Offset 212
+               OpMemberDecorate %type_ShadowDepthPass 54 Offset 216
+               OpMemberDecorate %type_ShadowDepthPass 55 Offset 220
+               OpMemberDecorate %type_ShadowDepthPass 56 Offset 224
+               OpMemberDecorate %type_ShadowDepthPass 57 Offset 228
+               OpMemberDecorate %type_ShadowDepthPass 58 Offset 232
+               OpMemberDecorate %type_ShadowDepthPass 59 Offset 236
+               OpMemberDecorate %type_ShadowDepthPass 60 Offset 240
+               OpMemberDecorate %type_ShadowDepthPass 61 Offset 244
+               OpMemberDecorate %type_ShadowDepthPass 62 Offset 248
+               OpMemberDecorate %type_ShadowDepthPass 63 Offset 252
+               OpMemberDecorate %type_ShadowDepthPass 64 Offset 256
+               OpMemberDecorate %type_ShadowDepthPass 65 Offset 260
+               OpMemberDecorate %type_ShadowDepthPass 66 Offset 264
+               OpMemberDecorate %type_ShadowDepthPass 67 Offset 268
+               OpMemberDecorate %type_ShadowDepthPass 68 Offset 272
+               OpMemberDecorate %type_ShadowDepthPass 68 MatrixStride 16
+               OpMemberDecorate %type_ShadowDepthPass 68 ColMajor
+               OpMemberDecorate %type_ShadowDepthPass 69 Offset 336
+               OpMemberDecorate %type_ShadowDepthPass 70 Offset 352
+               OpMemberDecorate %type_ShadowDepthPass 71 Offset 368
+               OpMemberDecorate %type_ShadowDepthPass 72 Offset 384
+               OpMemberDecorate %type_ShadowDepthPass 73 Offset 396
+               OpMemberDecorate %type_ShadowDepthPass 74 Offset 400
+               OpMemberDecorate %type_ShadowDepthPass 75 Offset 412
+               OpMemberDecorate %type_ShadowDepthPass 76 Offset 416
+               OpMemberDecorate %type_ShadowDepthPass 77 Offset 420
+               OpMemberDecorate %type_ShadowDepthPass 78 Offset 424
+               OpMemberDecorate %type_ShadowDepthPass 79 Offset 428
+               OpMemberDecorate %type_ShadowDepthPass 80 Offset 432
+               OpMemberDecorate %type_ShadowDepthPass 81 Offset 436
+               OpMemberDecorate %type_ShadowDepthPass 82 Offset 440
+               OpMemberDecorate %type_ShadowDepthPass 83 Offset 444
+               OpMemberDecorate %type_ShadowDepthPass 84 Offset 448
+               OpMemberDecorate %type_ShadowDepthPass 85 Offset 452
+               OpMemberDecorate %type_ShadowDepthPass 86 Offset 456
+               OpMemberDecorate %type_ShadowDepthPass 87 Offset 460
+               OpMemberDecorate %type_ShadowDepthPass 88 Offset 464
+               OpMemberDecorate %type_ShadowDepthPass 88 MatrixStride 16
+               OpMemberDecorate %type_ShadowDepthPass 88 ColMajor
+               OpMemberDecorate %type_ShadowDepthPass 89 Offset 528
+               OpMemberDecorate %type_ShadowDepthPass 89 MatrixStride 16
+               OpMemberDecorate %type_ShadowDepthPass 89 ColMajor
+               OpMemberDecorate %type_ShadowDepthPass 90 Offset 592
+               OpMemberDecorate %type_ShadowDepthPass 91 Offset 608
+               OpMemberDecorate %type_ShadowDepthPass 92 Offset 612
+               OpMemberDecorate %type_ShadowDepthPass 93 Offset 616
+               OpMemberDecorate %type_ShadowDepthPass 94 Offset 620
+               OpMemberDecorate %type_ShadowDepthPass 95 Offset 624
+               OpMemberDecorate %type_ShadowDepthPass 95 MatrixStride 16
+               OpMemberDecorate %type_ShadowDepthPass 95 ColMajor
+               OpMemberDecorate %type_ShadowDepthPass 96 Offset 1008
+               OpMemberDecorate %type_ShadowDepthPass 96 MatrixStride 16
+               OpMemberDecorate %type_ShadowDepthPass 96 ColMajor
+               OpDecorate %type_ShadowDepthPass Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+      %v2int = OpTypeVector %int 2
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+    %float_3 = OpConstant %float 3
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+    %float_6 = OpConstant %float 6
+         %57 = OpConstantComposite %v4float %float_6 %float_6 %float_6 %float_6
+    %float_1 = OpConstant %float 1
+     %int_79 = OpConstant %int 79
+%float_0_200000003 = OpConstant %float 0.200000003
+%float_n0_699999988 = OpConstant %float -0.699999988
+    %float_2 = OpConstant %float 2
+         %63 = OpConstantComposite %v2float %float_1 %float_2
+   %float_n1 = OpConstant %float -1
+   %float_10 = OpConstant %float 10
+  %float_0_5 = OpConstant %float 0.5
+         %67 = OpConstantComposite %v3float %float_0_5 %float_0_5 %float_0_5
+     %int_88 = OpConstant %int 88
+     %int_89 = OpConstant %int 89
+     %int_90 = OpConstant %int 90
+     %int_91 = OpConstant %int 91
+    %float_0 = OpConstant %float 0
+%float_9_99999997en07 = OpConstant %float 9.99999997e-07
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+     %uint_6 = OpConstant %uint 6
+%_arr_mat4v4float_uint_6 = OpTypeArray %mat4v4float %uint_6
+      %v3int = OpTypeVector %int 3
+%type_ShadowDepthPass = OpTypeStruct %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %mat4v4float %v4float %v4float %v4float %v3int %int %v3int %float %float %float %float %float %float %float %float %float %float %float %float %int %mat4v4float %mat4v4float %v4float %float %float %float %float %_arr_mat4v4float_uint_6 %_arr_mat4v4float_uint_6
+%_ptr_Uniform_type_ShadowDepthPass = OpTypePointer Uniform %type_ShadowDepthPass
+     %uint_3 = OpConstant %uint 3
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3
+%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1
+%_arr__arr_v4float_uint_1_uint_3 = OpTypeArray %_arr_v4float_uint_1 %uint_3
+%_ptr_Input__arr__arr_v4float_uint_1_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_1_uint_3
+%_arr_uint_uint_3 = OpTypeArray %uint %uint_3
+%_ptr_Input__arr_uint_uint_3 = OpTypePointer Input %_arr_uint_uint_3
+%_arr__arr_v4float_uint_3_uint_3 = OpTypeArray %_arr_v4float_uint_3 %uint_3
+%_ptr_Input__arr__arr_v4float_uint_3_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_3_uint_3
+%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3
+%_ptr_Input__arr_v3float_uint_3 = OpTypePointer Input %_arr_v3float_uint_3
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+%_ptr_Input__arr_float_uint_3 = OpTypePointer Input %_arr_float_uint_3
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Input__arr_float_uint_2 = OpTypePointer Input %_arr_float_uint_2
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_ptr_Output__arr_v4float_uint_1 = OpTypePointer Output %_arr_v4float_uint_1
+%_ptr_Output_uint = OpTypePointer Output %uint
+%_ptr_Output_float = OpTypePointer Output %float
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+       %void = OpTypeVoid
+        %106 = OpTypeFunction %void
+%_ptr_Function_float = OpTypePointer Function %float
+       %bool = OpTypeBool
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Function_mat4v4float = OpTypePointer Function %mat4v4float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%type_sampled_image = OpTypeSampledImage %type_2d_image
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+%ShadowDepthPass = OpVariable %_ptr_Uniform_type_ShadowDepthPass Uniform
+%Material_Texture2D_3 = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%Material_Texture2D_3Sampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_COLOR0 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_TEXCOORD0 = OpVariable %_ptr_Input__arr__arr_v4float_uint_1_uint_3 Input
+%in_var_PRIMITIVE_ID = OpVariable %_ptr_Input__arr_uint_uint_3 Input
+%in_var_VS_to_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_PN_POSITION = OpVariable %_ptr_Input__arr__arr_v4float_uint_3_uint_3 Input
+%in_var_PN_DisplacementScales = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
+%in_var_PN_TessellationMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input
+%in_var_PN_WorldDisplacementMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input
+%gl_TessLevelOuter = OpVariable %_ptr_Input__arr_float_uint_4 Input
+%gl_TessLevelInner = OpVariable %_ptr_Input__arr_float_uint_2 Input
+%in_var_PN_POSITION9 = OpVariable %_ptr_Input_v4float Input
+%gl_TessCoord = OpVariable %_ptr_Input_v3float Input
+%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output_v4float Output
+%out_var_COLOR0 = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD0 = OpVariable %_ptr_Output__arr_v4float_uint_1 Output
+%out_var_PRIMITIVE_ID = OpVariable %_ptr_Output_uint Output
+%out_var_TEXCOORD6 = OpVariable %_ptr_Output_float Output
+%out_var_TEXCOORD8 = OpVariable %_ptr_Output_float Output
+%out_var_TEXCOORD7 = OpVariable %_ptr_Output_v3float Output
+%gl_Position = OpVariable %_ptr_Output_v4float Output
+        %112 = OpConstantNull %v4float
+        %113 = OpUndef %v4float
+%_ptr_Input_uint = OpTypePointer Input %uint
+ %MainDomain = OpFunction %void None %106
+        %115 = OpLabel
+        %116 = OpVariable %_ptr_Function_mat4v4float Function
+        %117 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD10_centroid
+        %118 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD11_centroid
+        %119 = OpLoad %_arr_v4float_uint_3 %in_var_COLOR0
+        %120 = OpLoad %_arr__arr_v4float_uint_1_uint_3 %in_var_TEXCOORD0
+        %121 = OpAccessChain %_ptr_Input_uint %in_var_PRIMITIVE_ID %uint_0
+        %122 = OpLoad %uint %121
+        %123 = OpCompositeExtract %v4float %117 0
+        %124 = OpCompositeExtract %v4float %118 0
+        %125 = OpCompositeExtract %v4float %119 0
+        %126 = OpCompositeExtract %_arr_v4float_uint_1 %120 0
+        %127 = OpCompositeExtract %v4float %117 1
+        %128 = OpCompositeExtract %v4float %118 1
+        %129 = OpCompositeExtract %v4float %119 1
+        %130 = OpCompositeExtract %_arr_v4float_uint_1 %120 1
+        %131 = OpCompositeExtract %v4float %117 2
+        %132 = OpCompositeExtract %v4float %118 2
+        %133 = OpCompositeExtract %v4float %119 2
+        %134 = OpCompositeExtract %_arr_v4float_uint_1 %120 2
+        %135 = OpLoad %_arr__arr_v4float_uint_3_uint_3 %in_var_PN_POSITION
+        %136 = OpLoad %_arr_float_uint_3 %in_var_PN_WorldDisplacementMultiplier
+        %137 = OpCompositeExtract %_arr_v4float_uint_3 %135 0
+        %138 = OpCompositeExtract %float %136 0
+        %139 = OpCompositeExtract %_arr_v4float_uint_3 %135 1
+        %140 = OpCompositeExtract %float %136 1
+        %141 = OpCompositeExtract %_arr_v4float_uint_3 %135 2
+        %142 = OpCompositeExtract %float %136 2
+        %143 = OpCompositeExtract %v4float %137 0
+        %144 = OpCompositeExtract %v4float %137 1
+        %145 = OpCompositeExtract %v4float %137 2
+        %146 = OpCompositeExtract %v4float %139 0
+        %147 = OpCompositeExtract %v4float %139 1
+        %148 = OpCompositeExtract %v4float %139 2
+        %149 = OpCompositeExtract %v4float %141 0
+        %150 = OpCompositeExtract %v4float %141 1
+        %151 = OpCompositeExtract %v4float %141 2
+        %152 = OpLoad %v4float %in_var_PN_POSITION9
+        %153 = OpLoad %v3float %gl_TessCoord
+        %154 = OpCompositeExtract %float %153 0
+        %155 = OpCompositeExtract %float %153 1
+        %156 = OpCompositeExtract %float %153 2
+        %157 = OpFMul %float %154 %154
+        %158 = OpFMul %float %155 %155
+        %159 = OpFMul %float %156 %156
+        %160 = OpFMul %float %157 %float_3
+        %161 = OpFMul %float %158 %float_3
+        %162 = OpFMul %float %159 %float_3
+        %163 = OpCompositeConstruct %v4float %157 %157 %157 %157
+        %164 = OpFMul %v4float %143 %163
+        %165 = OpCompositeConstruct %v4float %154 %154 %154 %154
+        %166 = OpFMul %v4float %164 %165
+        %167 = OpCompositeConstruct %v4float %158 %158 %158 %158
+        %168 = OpFMul %v4float %146 %167
+        %169 = OpCompositeConstruct %v4float %155 %155 %155 %155
+        %170 = OpFMul %v4float %168 %169
+        %171 = OpFAdd %v4float %166 %170
+        %172 = OpCompositeConstruct %v4float %159 %159 %159 %159
+        %173 = OpFMul %v4float %149 %172
+        %174 = OpCompositeConstruct %v4float %156 %156 %156 %156
+        %175 = OpFMul %v4float %173 %174
+        %176 = OpFAdd %v4float %171 %175
+        %177 = OpCompositeConstruct %v4float %160 %160 %160 %160
+        %178 = OpFMul %v4float %144 %177
+        %179 = OpFMul %v4float %178 %169
+        %180 = OpFAdd %v4float %176 %179
+        %181 = OpCompositeConstruct %v4float %161 %161 %161 %161
+        %182 = OpFMul %v4float %145 %181
+        %183 = OpFMul %v4float %182 %165
+        %184 = OpFAdd %v4float %180 %183
+        %185 = OpFMul %v4float %147 %181
+        %186 = OpFMul %v4float %185 %174
+        %187 = OpFAdd %v4float %184 %186
+        %188 = OpCompositeConstruct %v4float %162 %162 %162 %162
+        %189 = OpFMul %v4float %148 %188
+        %190 = OpFMul %v4float %189 %169
+        %191 = OpFAdd %v4float %187 %190
+        %192 = OpFMul %v4float %150 %188
+        %193 = OpFMul %v4float %192 %165
+        %194 = OpFAdd %v4float %191 %193
+        %195 = OpFMul %v4float %151 %177
+        %196 = OpFMul %v4float %195 %174
+        %197 = OpFAdd %v4float %194 %196
+        %198 = OpFMul %v4float %152 %57
+        %199 = OpFMul %v4float %198 %174
+        %200 = OpFMul %v4float %199 %165
+        %201 = OpFMul %v4float %200 %169
+        %202 = OpFAdd %v4float %197 %201
+        %203 = OpCompositeExtract %v4float %126 0
+        %204 = OpCompositeExtract %v4float %130 0
+        %205 = OpVectorShuffle %v3float %123 %123 0 1 2
+        %206 = OpCompositeConstruct %v3float %154 %154 %154
+        %207 = OpFMul %v3float %205 %206
+        %208 = OpVectorShuffle %v3float %127 %127 0 1 2
+        %209 = OpCompositeConstruct %v3float %155 %155 %155
+        %210 = OpFMul %v3float %208 %209
+        %211 = OpFAdd %v3float %207 %210
+        %212 = OpFMul %v4float %124 %165
+        %213 = OpFMul %v4float %128 %169
+        %214 = OpFAdd %v4float %212 %213
+        %215 = OpFMul %v4float %125 %165
+        %216 = OpFMul %v4float %129 %169
+        %217 = OpFAdd %v4float %215 %216
+        %218 = OpFMul %v4float %203 %165
+        %219 = OpFMul %v4float %204 %169
+        %220 = OpFAdd %v4float %218 %219
+        %221 = OpCompositeExtract %v4float %134 0
+        %222 = OpVectorShuffle %v3float %211 %112 0 1 2
+        %223 = OpVectorShuffle %v3float %131 %131 0 1 2
+        %224 = OpCompositeConstruct %v3float %156 %156 %156
+        %225 = OpFMul %v3float %223 %224
+        %226 = OpFAdd %v3float %222 %225
+        %227 = OpVectorShuffle %v4float %113 %226 4 5 6 3
+        %228 = OpFMul %v4float %132 %174
+        %229 = OpFAdd %v4float %214 %228
+        %230 = OpFMul %v4float %133 %174
+        %231 = OpFAdd %v4float %217 %230
+        %232 = OpFMul %v4float %221 %174
+        %233 = OpFAdd %v4float %220 %232
+        %234 = OpCompositeConstruct %_arr_v4float_uint_1 %233
+        %235 = OpVectorShuffle %v2float %233 %233 2 3
+        %236 = OpVectorShuffle %v3float %229 %229 0 1 2
+        %237 = OpAccessChain %_ptr_Uniform_float %View %int_79
+        %238 = OpLoad %float %237
+        %239 = OpFMul %float %238 %float_0_200000003
+        %240 = OpFMul %float %238 %float_n0_699999988
+        %241 = OpFMul %v2float %235 %63
+        %242 = OpCompositeConstruct %v2float %239 %240
+        %243 = OpFAdd %v2float %242 %241
+        %244 = OpLoad %type_2d_image %Material_Texture2D_3
+        %245 = OpLoad %type_sampler %Material_Texture2D_3Sampler
+        %246 = OpSampledImage %type_sampled_image %244 %245
+        %247 = OpImageSampleExplicitLod %v4float %246 %243 Lod %float_n1
+        %248 = OpCompositeExtract %float %247 0
+        %249 = OpFMul %float %248 %float_10
+        %250 = OpCompositeExtract %float %231 0
+        %251 = OpFSub %float %float_1 %250
+        %252 = OpFMul %float %249 %251
+        %253 = OpCompositeConstruct %v3float %252 %252 %252
+        %254 = OpFMul %v3float %253 %236
+        %255 = OpFMul %v3float %254 %67
+        %256 = OpFMul %float %138 %154
+        %257 = OpFMul %float %140 %155
+        %258 = OpFAdd %float %256 %257
+        %259 = OpFMul %float %142 %156
+        %260 = OpFAdd %float %258 %259
+        %261 = OpCompositeConstruct %v3float %260 %260 %260
+        %262 = OpFMul %v3float %255 %261
+        %263 = OpVectorShuffle %v3float %202 %202 0 1 2
+        %264 = OpFAdd %v3float %263 %262
+        %265 = OpVectorShuffle %v4float %202 %264 4 5 6 3
+        %266 = OpAccessChain %_ptr_Uniform_mat4v4float %ShadowDepthPass %int_88
+        %267 = OpLoad %mat4v4float %266
+        %268 = OpAccessChain %_ptr_Uniform_mat4v4float %ShadowDepthPass %int_89
+        %269 = OpLoad %mat4v4float %268
+               OpStore %116 %269
+        %270 = OpMatrixTimesVector %v4float %267 %265
+        %271 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_91
+        %272 = OpLoad %float %271
+        %273 = OpFOrdGreaterThan %bool %272 %float_0
+        %274 = OpCompositeExtract %float %270 2
+        %275 = OpFOrdLessThan %bool %274 %float_0
+        %276 = OpLogicalAnd %bool %273 %275
+               OpSelectionMerge %277 None
+               OpBranchConditional %276 %278 %277
+        %278 = OpLabel
+        %279 = OpCompositeInsert %v4float %float_9_99999997en07 %270 2
+        %280 = OpCompositeInsert %v4float %float_1 %279 3
+               OpBranch %277
+        %277 = OpLabel
+        %281 = OpPhi %v4float %270 %115 %280 %278
+        %282 = OpAccessChain %_ptr_Function_float %116 %uint_0 %int_2
+        %283 = OpLoad %float %282
+        %284 = OpAccessChain %_ptr_Function_float %116 %uint_1 %int_2
+        %285 = OpLoad %float %284
+        %286 = OpAccessChain %_ptr_Function_float %116 %uint_2 %int_2
+        %287 = OpLoad %float %286
+        %288 = OpCompositeConstruct %v3float %283 %285 %287
+        %289 = OpDot %float %288 %236
+        %290 = OpExtInst %float %1 FAbs %289
+        %291 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_2
+        %292 = OpLoad %float %291
+        %293 = OpExtInst %float %1 FAbs %290
+        %294 = OpFOrdGreaterThan %bool %293 %float_0
+        %295 = OpFMul %float %290 %290
+        %296 = OpFSub %float %float_1 %295
+        %297 = OpExtInst %float %1 FClamp %296 %float_0 %float_1
+        %298 = OpExtInst %float %1 Sqrt %297
+        %299 = OpFDiv %float %298 %290
+        %300 = OpSelect %float %294 %299 %292
+        %301 = OpExtInst %float %1 FClamp %300 %float_0 %292
+        %302 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_1
+        %303 = OpLoad %float %302
+        %304 = OpFMul %float %303 %301
+        %305 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_0
+        %306 = OpLoad %float %305
+        %307 = OpFAdd %float %304 %306
+        %308 = OpCompositeExtract %float %281 2
+        %309 = OpVectorShuffle %v3float %264 %112 0 1 2
+               OpStore %out_var_TEXCOORD10_centroid %227
+               OpStore %out_var_TEXCOORD11_centroid %229
+               OpStore %out_var_COLOR0 %231
+               OpStore %out_var_TEXCOORD0 %234
+               OpStore %out_var_PRIMITIVE_ID %122
+               OpStore %out_var_TEXCOORD6 %308
+               OpStore %out_var_TEXCOORD8 %307
+               OpStore %out_var_TEXCOORD7 %309
+               OpStore %gl_Position %281
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese b/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese
new file mode 100644
index 00000000000..cb55bb42503
--- /dev/null
+++ b/shaders-ue4/asm/tese/ds-patch-input-fixes.asm.tese
@@ -0,0 +1,1175 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 581
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability ClipDistance
+               OpCapability SampledBuffer
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationEvaluation %MainDomain "main" %gl_ClipDistance %in_var_TEXCOORD6 %in_var_TEXCOORD8 %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_VS_To_DS_Position %in_var_VS_To_DS_VertexID %in_var_PN_POSITION %in_var_PN_DisplacementScales %in_var_PN_TessellationMultiplier %in_var_PN_WorldDisplacementMultiplier %in_var_PN_DominantVertex %in_var_PN_DominantVertex1 %in_var_PN_DominantVertex2 %in_var_PN_DominantEdge %in_var_PN_DominantEdge1 %in_var_PN_DominantEdge2 %in_var_PN_DominantEdge3 %in_var_PN_DominantEdge4 %in_var_PN_DominantEdge5 %gl_TessLevelOuter %gl_TessLevelInner %in_var_PN_POSITION9 %gl_TessCoord %gl_Position %out_var_TEXCOORD6 %out_var_TEXCOORD7 %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid
+               OpExecutionMode %MainDomain Triangles
+               OpExecutionMode %MainDomain SpacingFractionalOdd
+               OpExecutionMode %MainDomain VertexOrderCw
+               OpSource HLSL 600
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_ClipToWorld"
+               OpMemberName %type_View 3 "View_TranslatedWorldToView"
+               OpMemberName %type_View 4 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 5 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 6 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 7 "View_ViewToClip"
+               OpMemberName %type_View 8 "View_ViewToClipNoAA"
+               OpMemberName %type_View 9 "View_ClipToView"
+               OpMemberName %type_View 10 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 11 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 12 "View_ScreenToWorld"
+               OpMemberName %type_View 13 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 14 "View_ViewForward"
+               OpMemberName %type_View 15 "PrePadding_View_908"
+               OpMemberName %type_View 16 "View_ViewUp"
+               OpMemberName %type_View 17 "PrePadding_View_924"
+               OpMemberName %type_View 18 "View_ViewRight"
+               OpMemberName %type_View 19 "PrePadding_View_940"
+               OpMemberName %type_View 20 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 21 "PrePadding_View_956"
+               OpMemberName %type_View 22 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 23 "PrePadding_View_972"
+               OpMemberName %type_View 24 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 25 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 26 "View_WorldCameraOrigin"
+               OpMemberName %type_View 27 "PrePadding_View_1020"
+               OpMemberName %type_View 28 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 29 "PrePadding_View_1036"
+               OpMemberName %type_View 30 "View_WorldViewOrigin"
+               OpMemberName %type_View 31 "PrePadding_View_1052"
+               OpMemberName %type_View 32 "View_PreViewTranslation"
+               OpMemberName %type_View 33 "PrePadding_View_1068"
+               OpMemberName %type_View 34 "View_PrevProjection"
+               OpMemberName %type_View 35 "View_PrevViewProj"
+               OpMemberName %type_View 36 "View_PrevViewRotationProj"
+               OpMemberName %type_View 37 "View_PrevViewToClip"
+               OpMemberName %type_View 38 "View_PrevClipToView"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 40 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 41 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 42 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 43 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 44 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 45 "PrePadding_View_1724"
+               OpMemberName %type_View 46 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 47 "PrePadding_View_1740"
+               OpMemberName %type_View 48 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 49 "PrePadding_View_1756"
+               OpMemberName %type_View 50 "View_PrevInvViewProj"
+               OpMemberName %type_View 51 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 52 "View_ClipToPrevClip"
+               OpMemberName %type_View 53 "View_TemporalAAJitter"
+               OpMemberName %type_View 54 "View_GlobalClippingPlane"
+               OpMemberName %type_View 55 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 57 "View_ViewRectMin"
+               OpMemberName %type_View 58 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 60 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 61 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 62 "View_PreExposure"
+               OpMemberName %type_View 63 "View_OneOverPreExposure"
+               OpMemberName %type_View 64 "PrePadding_View_2076"
+               OpMemberName %type_View 65 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 66 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 67 "View_NormalOverrideParameter"
+               OpMemberName %type_View 68 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 69 "View_PrevFrameGameTime"
+               OpMemberName %type_View 70 "View_PrevFrameRealTime"
+               OpMemberName %type_View 71 "View_OutOfBoundsMask"
+               OpMemberName %type_View 72 "PrePadding_View_2148"
+               OpMemberName %type_View 73 "PrePadding_View_2152"
+               OpMemberName %type_View 74 "PrePadding_View_2156"
+               OpMemberName %type_View 75 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 76 "View_CullingSign"
+               OpMemberName %type_View 77 "View_NearPlane"
+               OpMemberName %type_View 78 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 79 "View_GameTime"
+               OpMemberName %type_View 80 "View_RealTime"
+               OpMemberName %type_View 81 "View_DeltaTime"
+               OpMemberName %type_View 82 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 83 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 84 "View_Random"
+               OpMemberName %type_View 85 "View_FrameNumber"
+               OpMemberName %type_View 86 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 87 "View_StateFrameIndex"
+               OpMemberName %type_View 88 "View_CameraCut"
+               OpMemberName %type_View 89 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 90 "PrePadding_View_2228"
+               OpMemberName %type_View 91 "PrePadding_View_2232"
+               OpMemberName %type_View 92 "PrePadding_View_2236"
+               OpMemberName %type_View 93 "View_DirectionalLightColor"
+               OpMemberName %type_View 94 "View_DirectionalLightDirection"
+               OpMemberName %type_View 95 "PrePadding_View_2268"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 97 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 98 "View_TemporalAAParams"
+               OpMemberName %type_View 99 "View_CircleDOFParams"
+               OpMemberName %type_View 100 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 101 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 102 "View_DepthOfFieldScale"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 104 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 106 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 107 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 108 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 109 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 110 "View_DemosaicVposOffset"
+               OpMemberName %type_View 111 "PrePadding_View_2412"
+               OpMemberName %type_View 112 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 113 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 115 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogPower"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 118 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 120 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 122 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 123 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 124 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 125 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 126 "View_AtmosphericFogSunDiscHalfApexAngleRadian"
+               OpMemberName %type_View 127 "PrePadding_View_2492"
+               OpMemberName %type_View 128 "View_AtmosphericFogSunDiscLuminance"
+               OpMemberName %type_View 129 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 130 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 131 "PrePadding_View_2520"
+               OpMemberName %type_View 132 "PrePadding_View_2524"
+               OpMemberName %type_View 133 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 134 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 135 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 136 "View_AmbientCubemapTint"
+               OpMemberName %type_View 137 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 138 "View_SkyLightParameters"
+               OpMemberName %type_View 139 "PrePadding_View_2584"
+               OpMemberName %type_View 140 "PrePadding_View_2588"
+               OpMemberName %type_View 141 "View_SkyLightColor"
+               OpMemberName %type_View 142 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 143 "View_MobilePreviewMode"
+               OpMemberName %type_View 144 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 145 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 146 "View_ShowDecalsMask"
+               OpMemberName %type_View 147 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 148 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 149 "PrePadding_View_2744"
+               OpMemberName %type_View 150 "PrePadding_View_2748"
+               OpMemberName %type_View 151 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 152 "View_StereoPassIndex"
+               OpMemberName %type_View 153 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 154 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 155 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 156 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 157 "View_MaxGlobalDistance"
+               OpMemberName %type_View 158 "PrePadding_View_2908"
+               OpMemberName %type_View 159 "View_CursorPosition"
+               OpMemberName %type_View 160 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 161 "PrePadding_View_2924"
+               OpMemberName %type_View 162 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 163 "PrePadding_View_2940"
+               OpMemberName %type_View 164 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 165 "PrePadding_View_2956"
+               OpMemberName %type_View 166 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 167 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 168 "PrePadding_View_2972"
+               OpMemberName %type_View 169 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 170 "PrePadding_View_2988"
+               OpMemberName %type_View 171 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 172 "PrePadding_View_3004"
+               OpMemberName %type_View 173 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 174 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 175 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 176 "View_StereoIPD"
+               OpMemberName %type_View 177 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 178 "View_EyeToPixelSpreadAngle"
+               OpMemberName %type_View 179 "PrePadding_View_3048"
+               OpMemberName %type_View 180 "PrePadding_View_3052"
+               OpMemberName %type_View 181 "View_WorldToVirtualTexture"
+               OpMemberName %type_View 182 "View_VirtualTextureParams"
+               OpMemberName %type_View 183 "View_XRPassthroughCameraUVs"
+               OpName %View "View"
+               OpName %type_sampler "type.sampler"
+               OpName %type_3d_image "type.3d.image"
+               OpName %View_GlobalDistanceFieldTexture0 "View_GlobalDistanceFieldTexture0"
+               OpName %View_GlobalDistanceFieldSampler0 "View_GlobalDistanceFieldSampler0"
+               OpName %View_GlobalDistanceFieldTexture1 "View_GlobalDistanceFieldTexture1"
+               OpName %View_GlobalDistanceFieldTexture2 "View_GlobalDistanceFieldTexture2"
+               OpName %View_GlobalDistanceFieldTexture3 "View_GlobalDistanceFieldTexture3"
+               OpName %type_Material "type.Material"
+               OpMemberName %type_Material 0 "Material_VectorExpressions"
+               OpMemberName %type_Material 1 "Material_ScalarExpressions"
+               OpName %Material "Material"
+               OpName %in_var_TEXCOORD6 "in.var.TEXCOORD6"
+               OpName %in_var_TEXCOORD8 "in.var.TEXCOORD8"
+               OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid"
+               OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid"
+               OpName %in_var_VS_To_DS_Position "in.var.VS_To_DS_Position"
+               OpName %in_var_VS_To_DS_VertexID "in.var.VS_To_DS_VertexID"
+               OpName %in_var_PN_POSITION "in.var.PN_POSITION"
+               OpName %in_var_PN_DisplacementScales "in.var.PN_DisplacementScales"
+               OpName %in_var_PN_TessellationMultiplier "in.var.PN_TessellationMultiplier"
+               OpName %in_var_PN_WorldDisplacementMultiplier "in.var.PN_WorldDisplacementMultiplier"
+               OpName %in_var_PN_DominantVertex "in.var.PN_DominantVertex"
+               OpName %in_var_PN_DominantVertex1 "in.var.PN_DominantVertex1"
+               OpName %in_var_PN_DominantVertex2 "in.var.PN_DominantVertex2"
+               OpName %in_var_PN_DominantEdge "in.var.PN_DominantEdge"
+               OpName %in_var_PN_DominantEdge1 "in.var.PN_DominantEdge1"
+               OpName %in_var_PN_DominantEdge2 "in.var.PN_DominantEdge2"
+               OpName %in_var_PN_DominantEdge3 "in.var.PN_DominantEdge3"
+               OpName %in_var_PN_DominantEdge4 "in.var.PN_DominantEdge4"
+               OpName %in_var_PN_DominantEdge5 "in.var.PN_DominantEdge5"
+               OpName %in_var_PN_POSITION9 "in.var.PN_POSITION9"
+               OpName %out_var_TEXCOORD6 "out.var.TEXCOORD6"
+               OpName %out_var_TEXCOORD7 "out.var.TEXCOORD7"
+               OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid"
+               OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid"
+               OpName %MainDomain "MainDomain"
+               OpName %type_sampled_image "type.sampled.image"
+               OpDecorate %gl_ClipDistance BuiltIn ClipDistance
+               OpDecorateString %gl_ClipDistance UserSemantic "SV_ClipDistance"
+               OpDecorateString %in_var_TEXCOORD6 UserSemantic "TEXCOORD6"
+               OpDecorateString %in_var_TEXCOORD8 UserSemantic "TEXCOORD8"
+               OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %in_var_VS_To_DS_Position UserSemantic "VS_To_DS_Position"
+               OpDecorateString %in_var_VS_To_DS_VertexID UserSemantic "VS_To_DS_VertexID"
+               OpDecorateString %in_var_PN_POSITION UserSemantic "PN_POSITION"
+               OpDecorateString %in_var_PN_DisplacementScales UserSemantic "PN_DisplacementScales"
+               OpDecorateString %in_var_PN_TessellationMultiplier UserSemantic "PN_TessellationMultiplier"
+               OpDecorateString %in_var_PN_WorldDisplacementMultiplier UserSemantic "PN_WorldDisplacementMultiplier"
+               OpDecorateString %in_var_PN_DominantVertex UserSemantic "PN_DominantVertex"
+               OpDecorateString %in_var_PN_DominantVertex1 UserSemantic "PN_DominantVertex"
+               OpDecorateString %in_var_PN_DominantVertex2 UserSemantic "PN_DominantVertex"
+               OpDecorateString %in_var_PN_DominantEdge UserSemantic "PN_DominantEdge"
+               OpDecorateString %in_var_PN_DominantEdge1 UserSemantic "PN_DominantEdge"
+               OpDecorateString %in_var_PN_DominantEdge2 UserSemantic "PN_DominantEdge"
+               OpDecorateString %in_var_PN_DominantEdge3 UserSemantic "PN_DominantEdge"
+               OpDecorateString %in_var_PN_DominantEdge4 UserSemantic "PN_DominantEdge"
+               OpDecorateString %in_var_PN_DominantEdge5 UserSemantic "PN_DominantEdge"
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+               OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor"
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor"
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorateString %in_var_PN_POSITION9 UserSemantic "PN_POSITION9"
+               OpDecorate %in_var_PN_POSITION9 Patch
+               OpDecorate %gl_TessCoord BuiltIn TessCoord
+               OpDecorateString %gl_TessCoord UserSemantic "SV_DomainLocation"
+               OpDecorate %gl_TessCoord Patch
+               OpDecorate %gl_Position BuiltIn Position
+               OpDecorateString %gl_Position UserSemantic "SV_POSITION"
+               OpDecorateString %out_var_TEXCOORD6 UserSemantic "TEXCOORD6"
+               OpDecorateString %out_var_TEXCOORD7 UserSemantic "TEXCOORD7"
+               OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorate %in_var_PN_DisplacementScales Location 0
+               OpDecorate %in_var_PN_DominantEdge Location 1
+               OpDecorate %in_var_PN_DominantEdge1 Location 2
+               OpDecorate %in_var_PN_DominantEdge2 Location 3
+               OpDecorate %in_var_PN_DominantEdge3 Location 4
+               OpDecorate %in_var_PN_DominantEdge4 Location 5
+               OpDecorate %in_var_PN_DominantEdge5 Location 6
+               OpDecorate %in_var_PN_DominantVertex Location 7
+               OpDecorate %in_var_PN_DominantVertex1 Location 8
+               OpDecorate %in_var_PN_DominantVertex2 Location 9
+               OpDecorate %in_var_PN_POSITION Location 10
+               OpDecorate %in_var_PN_POSITION9 Location 13
+               OpDecorate %in_var_PN_TessellationMultiplier Location 14
+               OpDecorate %in_var_PN_WorldDisplacementMultiplier Location 15
+               OpDecorate %in_var_TEXCOORD10_centroid Location 16
+               OpDecorate %in_var_TEXCOORD11_centroid Location 17
+               OpDecorate %in_var_TEXCOORD6 Location 18
+               OpDecorate %in_var_TEXCOORD8 Location 19
+               OpDecorate %in_var_VS_To_DS_Position Location 20
+               OpDecorate %in_var_VS_To_DS_VertexID Location 21
+               OpDecorate %out_var_TEXCOORD6 Location 0
+               OpDecorate %out_var_TEXCOORD7 Location 1
+               OpDecorate %out_var_TEXCOORD10_centroid Location 2
+               OpDecorate %out_var_TEXCOORD11_centroid Location 3
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 0
+               OpDecorate %View_GlobalDistanceFieldTexture0 DescriptorSet 0
+               OpDecorate %View_GlobalDistanceFieldTexture0 Binding 0
+               OpDecorate %View_GlobalDistanceFieldSampler0 DescriptorSet 0
+               OpDecorate %View_GlobalDistanceFieldSampler0 Binding 0
+               OpDecorate %View_GlobalDistanceFieldTexture1 DescriptorSet 0
+               OpDecorate %View_GlobalDistanceFieldTexture1 Binding 1
+               OpDecorate %View_GlobalDistanceFieldTexture2 DescriptorSet 0
+               OpDecorate %View_GlobalDistanceFieldTexture2 Binding 2
+               OpDecorate %View_GlobalDistanceFieldTexture3 DescriptorSet 0
+               OpDecorate %View_GlobalDistanceFieldTexture3 Binding 3
+               OpDecorate %Material DescriptorSet 0
+               OpDecorate %Material Binding 1
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 13 MatrixStride 16
+               OpMemberDecorate %type_View 13 ColMajor
+               OpMemberDecorate %type_View 14 Offset 896
+               OpMemberDecorate %type_View 15 Offset 908
+               OpMemberDecorate %type_View 16 Offset 912
+               OpMemberDecorate %type_View 17 Offset 924
+               OpMemberDecorate %type_View 18 Offset 928
+               OpMemberDecorate %type_View 19 Offset 940
+               OpMemberDecorate %type_View 20 Offset 944
+               OpMemberDecorate %type_View 21 Offset 956
+               OpMemberDecorate %type_View 22 Offset 960
+               OpMemberDecorate %type_View 23 Offset 972
+               OpMemberDecorate %type_View 24 Offset 976
+               OpMemberDecorate %type_View 25 Offset 992
+               OpMemberDecorate %type_View 26 Offset 1008
+               OpMemberDecorate %type_View 27 Offset 1020
+               OpMemberDecorate %type_View 28 Offset 1024
+               OpMemberDecorate %type_View 29 Offset 1036
+               OpMemberDecorate %type_View 30 Offset 1040
+               OpMemberDecorate %type_View 31 Offset 1052
+               OpMemberDecorate %type_View 32 Offset 1056
+               OpMemberDecorate %type_View 33 Offset 1068
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 43 MatrixStride 16
+               OpMemberDecorate %type_View 43 ColMajor
+               OpMemberDecorate %type_View 44 Offset 1712
+               OpMemberDecorate %type_View 45 Offset 1724
+               OpMemberDecorate %type_View 46 Offset 1728
+               OpMemberDecorate %type_View 47 Offset 1740
+               OpMemberDecorate %type_View 48 Offset 1744
+               OpMemberDecorate %type_View 49 Offset 1756
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 52 MatrixStride 16
+               OpMemberDecorate %type_View 52 ColMajor
+               OpMemberDecorate %type_View 53 Offset 1952
+               OpMemberDecorate %type_View 54 Offset 1968
+               OpMemberDecorate %type_View 55 Offset 1984
+               OpMemberDecorate %type_View 56 Offset 1992
+               OpMemberDecorate %type_View 57 Offset 2000
+               OpMemberDecorate %type_View 58 Offset 2016
+               OpMemberDecorate %type_View 59 Offset 2032
+               OpMemberDecorate %type_View 60 Offset 2048
+               OpMemberDecorate %type_View 61 Offset 2064
+               OpMemberDecorate %type_View 62 Offset 2068
+               OpMemberDecorate %type_View 63 Offset 2072
+               OpMemberDecorate %type_View 64 Offset 2076
+               OpMemberDecorate %type_View 65 Offset 2080
+               OpMemberDecorate %type_View 66 Offset 2096
+               OpMemberDecorate %type_View 67 Offset 2112
+               OpMemberDecorate %type_View 68 Offset 2128
+               OpMemberDecorate %type_View 69 Offset 2136
+               OpMemberDecorate %type_View 70 Offset 2140
+               OpMemberDecorate %type_View 71 Offset 2144
+               OpMemberDecorate %type_View 72 Offset 2148
+               OpMemberDecorate %type_View 73 Offset 2152
+               OpMemberDecorate %type_View 74 Offset 2156
+               OpMemberDecorate %type_View 75 Offset 2160
+               OpMemberDecorate %type_View 76 Offset 2172
+               OpMemberDecorate %type_View 77 Offset 2176
+               OpMemberDecorate %type_View 78 Offset 2180
+               OpMemberDecorate %type_View 79 Offset 2184
+               OpMemberDecorate %type_View 80 Offset 2188
+               OpMemberDecorate %type_View 81 Offset 2192
+               OpMemberDecorate %type_View 82 Offset 2196
+               OpMemberDecorate %type_View 83 Offset 2200
+               OpMemberDecorate %type_View 84 Offset 2204
+               OpMemberDecorate %type_View 85 Offset 2208
+               OpMemberDecorate %type_View 86 Offset 2212
+               OpMemberDecorate %type_View 87 Offset 2216
+               OpMemberDecorate %type_View 88 Offset 2220
+               OpMemberDecorate %type_View 89 Offset 2224
+               OpMemberDecorate %type_View 90 Offset 2228
+               OpMemberDecorate %type_View 91 Offset 2232
+               OpMemberDecorate %type_View 92 Offset 2236
+               OpMemberDecorate %type_View 93 Offset 2240
+               OpMemberDecorate %type_View 94 Offset 2256
+               OpMemberDecorate %type_View 95 Offset 2268
+               OpMemberDecorate %type_View 96 Offset 2272
+               OpMemberDecorate %type_View 97 Offset 2304
+               OpMemberDecorate %type_View 98 Offset 2336
+               OpMemberDecorate %type_View 99 Offset 2352
+               OpMemberDecorate %type_View 100 Offset 2368
+               OpMemberDecorate %type_View 101 Offset 2372
+               OpMemberDecorate %type_View 102 Offset 2376
+               OpMemberDecorate %type_View 103 Offset 2380
+               OpMemberDecorate %type_View 104 Offset 2384
+               OpMemberDecorate %type_View 105 Offset 2388
+               OpMemberDecorate %type_View 106 Offset 2392
+               OpMemberDecorate %type_View 107 Offset 2396
+               OpMemberDecorate %type_View 108 Offset 2400
+               OpMemberDecorate %type_View 109 Offset 2404
+               OpMemberDecorate %type_View 110 Offset 2408
+               OpMemberDecorate %type_View 111 Offset 2412
+               OpMemberDecorate %type_View 112 Offset 2416
+               OpMemberDecorate %type_View 113 Offset 2428
+               OpMemberDecorate %type_View 114 Offset 2432
+               OpMemberDecorate %type_View 115 Offset 2444
+               OpMemberDecorate %type_View 116 Offset 2448
+               OpMemberDecorate %type_View 117 Offset 2452
+               OpMemberDecorate %type_View 118 Offset 2456
+               OpMemberDecorate %type_View 119 Offset 2460
+               OpMemberDecorate %type_View 120 Offset 2464
+               OpMemberDecorate %type_View 121 Offset 2468
+               OpMemberDecorate %type_View 122 Offset 2472
+               OpMemberDecorate %type_View 123 Offset 2476
+               OpMemberDecorate %type_View 124 Offset 2480
+               OpMemberDecorate %type_View 125 Offset 2484
+               OpMemberDecorate %type_View 126 Offset 2488
+               OpMemberDecorate %type_View 127 Offset 2492
+               OpMemberDecorate %type_View 128 Offset 2496
+               OpMemberDecorate %type_View 129 Offset 2512
+               OpMemberDecorate %type_View 130 Offset 2516
+               OpMemberDecorate %type_View 131 Offset 2520
+               OpMemberDecorate %type_View 132 Offset 2524
+               OpMemberDecorate %type_View 133 Offset 2528
+               OpMemberDecorate %type_View 134 Offset 2544
+               OpMemberDecorate %type_View 135 Offset 2556
+               OpMemberDecorate %type_View 136 Offset 2560
+               OpMemberDecorate %type_View 137 Offset 2576
+               OpMemberDecorate %type_View 138 Offset 2580
+               OpMemberDecorate %type_View 139 Offset 2584
+               OpMemberDecorate %type_View 140 Offset 2588
+               OpMemberDecorate %type_View 141 Offset 2592
+               OpMemberDecorate %type_View 142 Offset 2608
+               OpMemberDecorate %type_View 143 Offset 2720
+               OpMemberDecorate %type_View 144 Offset 2724
+               OpMemberDecorate %type_View 145 Offset 2728
+               OpMemberDecorate %type_View 146 Offset 2732
+               OpMemberDecorate %type_View 147 Offset 2736
+               OpMemberDecorate %type_View 148 Offset 2740
+               OpMemberDecorate %type_View 149 Offset 2744
+               OpMemberDecorate %type_View 150 Offset 2748
+               OpMemberDecorate %type_View 151 Offset 2752
+               OpMemberDecorate %type_View 152 Offset 2764
+               OpMemberDecorate %type_View 153 Offset 2768
+               OpMemberDecorate %type_View 154 Offset 2832
+               OpMemberDecorate %type_View 155 Offset 2896
+               OpMemberDecorate %type_View 156 Offset 2900
+               OpMemberDecorate %type_View 157 Offset 2904
+               OpMemberDecorate %type_View 158 Offset 2908
+               OpMemberDecorate %type_View 159 Offset 2912
+               OpMemberDecorate %type_View 160 Offset 2920
+               OpMemberDecorate %type_View 161 Offset 2924
+               OpMemberDecorate %type_View 162 Offset 2928
+               OpMemberDecorate %type_View 163 Offset 2940
+               OpMemberDecorate %type_View 164 Offset 2944
+               OpMemberDecorate %type_View 165 Offset 2956
+               OpMemberDecorate %type_View 166 Offset 2960
+               OpMemberDecorate %type_View 167 Offset 2968
+               OpMemberDecorate %type_View 168 Offset 2972
+               OpMemberDecorate %type_View 169 Offset 2976
+               OpMemberDecorate %type_View 170 Offset 2988
+               OpMemberDecorate %type_View 171 Offset 2992
+               OpMemberDecorate %type_View 172 Offset 3004
+               OpMemberDecorate %type_View 173 Offset 3008
+               OpMemberDecorate %type_View 174 Offset 3020
+               OpMemberDecorate %type_View 175 Offset 3024
+               OpMemberDecorate %type_View 176 Offset 3036
+               OpMemberDecorate %type_View 177 Offset 3040
+               OpMemberDecorate %type_View 178 Offset 3044
+               OpMemberDecorate %type_View 179 Offset 3048
+               OpMemberDecorate %type_View 180 Offset 3052
+               OpMemberDecorate %type_View 181 Offset 3056
+               OpMemberDecorate %type_View 181 MatrixStride 16
+               OpMemberDecorate %type_View 181 ColMajor
+               OpMemberDecorate %type_View 182 Offset 3120
+               OpMemberDecorate %type_View 183 Offset 3136
+               OpDecorate %type_View Block
+               OpDecorate %_arr_v4float_uint_5 ArrayStride 16
+               OpMemberDecorate %type_Material 0 Offset 0
+               OpMemberDecorate %type_Material 1 Offset 80
+               OpDecorate %type_Material Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+      %v2int = OpTypeVector %int 2
+%float_0_00100000005 = OpConstant %float 0.00100000005
+     %uint_0 = OpConstant %uint 0
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+    %float_3 = OpConstant %float 3
+     %uint_1 = OpConstant %uint 1
+    %float_6 = OpConstant %float 6
+         %67 = OpConstantComposite %v4float %float_6 %float_6 %float_6 %float_6
+    %float_1 = OpConstant %float 1
+    %float_0 = OpConstant %float 0
+      %int_3 = OpConstant %int 3
+    %float_2 = OpConstant %float 2
+     %int_26 = OpConstant %int 26
+     %int_32 = OpConstant %int 32
+     %int_54 = OpConstant %int 54
+    %int_153 = OpConstant %int 153
+    %int_154 = OpConstant %int 154
+    %int_156 = OpConstant %int 156
+    %int_157 = OpConstant %int 157
+   %float_10 = OpConstant %float 10
+     %uint_3 = OpConstant %uint 3
+         %81 = OpConstantComposite %v3float %float_0 %float_0 %float_0
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %float %float %v4float %uint %uint %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v2int %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float %float %float %mat4v4float %v4float %_arr_v4float_uint_2
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+%type_3d_image = OpTypeImage %float 3D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image
+     %uint_5 = OpConstant %uint 5
+%_arr_v4float_uint_5 = OpTypeArray %v4float %uint_5
+%type_Material = OpTypeStruct %_arr_v4float_uint_5 %_arr_v4float_uint_2
+%_ptr_Uniform_type_Material = OpTypePointer Uniform %type_Material
+%_arr_float_uint_1 = OpTypeArray %float %uint_1
+%_ptr_Output__arr_float_uint_1 = OpTypePointer Output %_arr_float_uint_1
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3
+%_arr_uint_uint_3 = OpTypeArray %uint %uint_3
+%_ptr_Input__arr_uint_uint_3 = OpTypePointer Input %_arr_uint_uint_3
+%_arr__arr_v4float_uint_3_uint_3 = OpTypeArray %_arr_v4float_uint_3 %uint_3
+%_ptr_Input__arr__arr_v4float_uint_3_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_3_uint_3
+%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3
+%_ptr_Input__arr_v3float_uint_3 = OpTypePointer Input %_arr_v3float_uint_3
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+%_ptr_Input__arr_float_uint_3 = OpTypePointer Input %_arr_float_uint_3
+%_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3
+%_ptr_Input__arr_v2float_uint_3 = OpTypePointer Input %_arr_v2float_uint_3
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Input__arr_float_uint_2 = OpTypePointer Input %_arr_float_uint_2
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+        %109 = OpTypeFunction %void
+%_ptr_Output_float = OpTypePointer Output %float
+%mat3v3float = OpTypeMatrix %v3float 3
+       %bool = OpTypeBool
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%type_sampled_image = OpTypeSampledImage %type_3d_image
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+%View_GlobalDistanceFieldTexture0 = OpVariable %_ptr_UniformConstant_type_3d_image UniformConstant
+%View_GlobalDistanceFieldSampler0 = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%View_GlobalDistanceFieldTexture1 = OpVariable %_ptr_UniformConstant_type_3d_image UniformConstant
+%View_GlobalDistanceFieldTexture2 = OpVariable %_ptr_UniformConstant_type_3d_image UniformConstant
+%View_GlobalDistanceFieldTexture3 = OpVariable %_ptr_UniformConstant_type_3d_image UniformConstant
+   %Material = OpVariable %_ptr_Uniform_type_Material Uniform
+%gl_ClipDistance = OpVariable %_ptr_Output__arr_float_uint_1 Output
+%in_var_TEXCOORD6 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_TEXCOORD8 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_VS_To_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_VS_To_DS_VertexID = OpVariable %_ptr_Input__arr_uint_uint_3 Input
+%in_var_PN_POSITION = OpVariable %_ptr_Input__arr__arr_v4float_uint_3_uint_3 Input
+%in_var_PN_DisplacementScales = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
+%in_var_PN_TessellationMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input
+%in_var_PN_WorldDisplacementMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input
+%in_var_PN_DominantVertex = OpVariable %_ptr_Input__arr_v2float_uint_3 Input
+%in_var_PN_DominantVertex1 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_PN_DominantVertex2 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
+%in_var_PN_DominantEdge = OpVariable %_ptr_Input__arr_v2float_uint_3 Input
+%in_var_PN_DominantEdge1 = OpVariable %_ptr_Input__arr_v2float_uint_3 Input
+%in_var_PN_DominantEdge2 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_PN_DominantEdge3 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_PN_DominantEdge4 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
+%in_var_PN_DominantEdge5 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
+%gl_TessLevelOuter = OpVariable %_ptr_Input__arr_float_uint_4 Input
+%gl_TessLevelInner = OpVariable %_ptr_Input__arr_float_uint_2 Input
+%in_var_PN_POSITION9 = OpVariable %_ptr_Input_v4float Input
+%gl_TessCoord = OpVariable %_ptr_Input_v3float Input
+%gl_Position = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD6 = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD7 = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output_v4float Output
+        %117 = OpConstantNull %v4float
+        %118 = OpUndef %v4float
+ %MainDomain = OpFunction %void None %109
+        %119 = OpLabel
+        %120 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD6
+        %121 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD8
+        %122 = OpCompositeExtract %v4float %120 0
+        %123 = OpCompositeExtract %v4float %121 0
+        %124 = OpCompositeExtract %v4float %120 1
+        %125 = OpCompositeExtract %v4float %121 1
+        %126 = OpCompositeExtract %v4float %120 2
+        %127 = OpCompositeExtract %v4float %121 2
+        %128 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD10_centroid
+        %129 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD11_centroid
+        %130 = OpCompositeExtract %v4float %128 0
+        %131 = OpCompositeExtract %v4float %129 0
+        %132 = OpCompositeExtract %v4float %128 1
+        %133 = OpCompositeExtract %v4float %129 1
+        %134 = OpCompositeExtract %v4float %128 2
+        %135 = OpCompositeExtract %v4float %129 2
+        %136 = OpLoad %_arr__arr_v4float_uint_3_uint_3 %in_var_PN_POSITION
+        %137 = OpLoad %_arr_float_uint_3 %in_var_PN_WorldDisplacementMultiplier
+        %138 = OpLoad %_arr_v4float_uint_3 %in_var_PN_DominantVertex1
+        %139 = OpLoad %_arr_v3float_uint_3 %in_var_PN_DominantVertex2
+        %140 = OpCompositeExtract %v4float %138 0
+        %141 = OpCompositeExtract %v3float %139 0
+        %142 = OpCompositeExtract %v4float %138 1
+        %143 = OpCompositeExtract %v3float %139 1
+        %144 = OpCompositeExtract %v4float %138 2
+        %145 = OpCompositeExtract %v3float %139 2
+        %146 = OpLoad %_arr_v4float_uint_3 %in_var_PN_DominantEdge2
+        %147 = OpLoad %_arr_v4float_uint_3 %in_var_PN_DominantEdge3
+        %148 = OpLoad %_arr_v3float_uint_3 %in_var_PN_DominantEdge4
+        %149 = OpLoad %_arr_v3float_uint_3 %in_var_PN_DominantEdge5
+        %150 = OpCompositeExtract %v4float %146 0
+        %151 = OpCompositeExtract %v4float %147 0
+        %152 = OpCompositeExtract %v3float %148 0
+        %153 = OpCompositeExtract %v3float %149 0
+        %154 = OpCompositeExtract %v4float %146 1
+        %155 = OpCompositeExtract %v4float %147 1
+        %156 = OpCompositeExtract %v3float %148 1
+        %157 = OpCompositeExtract %v3float %149 1
+        %158 = OpCompositeExtract %v4float %146 2
+        %159 = OpCompositeExtract %v4float %147 2
+        %160 = OpCompositeExtract %v3float %148 2
+        %161 = OpCompositeExtract %v3float %149 2
+        %162 = OpCompositeExtract %_arr_v4float_uint_3 %136 0
+        %163 = OpCompositeExtract %float %137 0
+        %164 = OpCompositeExtract %_arr_v4float_uint_3 %136 1
+        %165 = OpCompositeExtract %float %137 1
+        %166 = OpCompositeExtract %_arr_v4float_uint_3 %136 2
+        %167 = OpCompositeExtract %float %137 2
+        %168 = OpCompositeExtract %v4float %162 0
+        %169 = OpCompositeExtract %v4float %162 1
+        %170 = OpCompositeExtract %v4float %162 2
+        %171 = OpCompositeExtract %v4float %164 0
+        %172 = OpCompositeExtract %v4float %164 1
+        %173 = OpCompositeExtract %v4float %164 2
+        %174 = OpCompositeExtract %v4float %166 0
+        %175 = OpCompositeExtract %v4float %166 1
+        %176 = OpCompositeExtract %v4float %166 2
+        %177 = OpLoad %v4float %in_var_PN_POSITION9
+        %178 = OpLoad %v3float %gl_TessCoord
+        %179 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_0
+        %180 = OpLoad %mat4v4float %179
+        %181 = OpAccessChain %_ptr_Uniform_v3float %View %int_26
+        %182 = OpLoad %v3float %181
+        %183 = OpAccessChain %_ptr_Uniform_v3float %View %int_32
+        %184 = OpLoad %v3float %183
+        %185 = OpAccessChain %_ptr_Uniform_v4float %View %int_54
+        %186 = OpLoad %v4float %185
+        %187 = OpCompositeExtract %float %178 0
+        %188 = OpCompositeExtract %float %178 1
+        %189 = OpCompositeExtract %float %178 2
+        %190 = OpFMul %float %187 %187
+        %191 = OpFMul %float %188 %188
+        %192 = OpFMul %float %189 %189
+        %193 = OpFMul %float %190 %float_3
+        %194 = OpFMul %float %191 %float_3
+        %195 = OpFMul %float %192 %float_3
+        %196 = OpCompositeConstruct %v4float %190 %190 %190 %190
+        %197 = OpFMul %v4float %168 %196
+        %198 = OpCompositeConstruct %v4float %187 %187 %187 %187
+        %199 = OpFMul %v4float %197 %198
+        %200 = OpCompositeConstruct %v4float %191 %191 %191 %191
+        %201 = OpFMul %v4float %171 %200
+        %202 = OpCompositeConstruct %v4float %188 %188 %188 %188
+        %203 = OpFMul %v4float %201 %202
+        %204 = OpFAdd %v4float %199 %203
+        %205 = OpCompositeConstruct %v4float %192 %192 %192 %192
+        %206 = OpFMul %v4float %174 %205
+        %207 = OpCompositeConstruct %v4float %189 %189 %189 %189
+        %208 = OpFMul %v4float %206 %207
+        %209 = OpFAdd %v4float %204 %208
+        %210 = OpCompositeConstruct %v4float %193 %193 %193 %193
+        %211 = OpFMul %v4float %169 %210
+        %212 = OpFMul %v4float %211 %202
+        %213 = OpFAdd %v4float %209 %212
+        %214 = OpCompositeConstruct %v4float %194 %194 %194 %194
+        %215 = OpFMul %v4float %170 %214
+        %216 = OpFMul %v4float %215 %198
+        %217 = OpFAdd %v4float %213 %216
+        %218 = OpFMul %v4float %172 %214
+        %219 = OpFMul %v4float %218 %207
+        %220 = OpFAdd %v4float %217 %219
+        %221 = OpCompositeConstruct %v4float %195 %195 %195 %195
+        %222 = OpFMul %v4float %173 %221
+        %223 = OpFMul %v4float %222 %202
+        %224 = OpFAdd %v4float %220 %223
+        %225 = OpFMul %v4float %175 %221
+        %226 = OpFMul %v4float %225 %198
+        %227 = OpFAdd %v4float %224 %226
+        %228 = OpFMul %v4float %176 %210
+        %229 = OpFMul %v4float %228 %207
+        %230 = OpFAdd %v4float %227 %229
+        %231 = OpFMul %v4float %177 %67
+        %232 = OpFMul %v4float %231 %207
+        %233 = OpFMul %v4float %232 %198
+        %234 = OpFMul %v4float %233 %202
+        %235 = OpFAdd %v4float %230 %234
+        %236 = OpVectorShuffle %v3float %130 %130 0 1 2
+        %237 = OpCompositeConstruct %v3float %187 %187 %187
+        %238 = OpFMul %v3float %236 %237
+        %239 = OpVectorShuffle %v3float %132 %132 0 1 2
+        %240 = OpCompositeConstruct %v3float %188 %188 %188
+        %241 = OpFMul %v3float %239 %240
+        %242 = OpFAdd %v3float %238 %241
+        %243 = OpFMul %v4float %131 %198
+        %244 = OpFMul %v4float %133 %202
+        %245 = OpFAdd %v4float %243 %244
+        %246 = OpFMul %v4float %122 %198
+        %247 = OpFMul %v4float %124 %202
+        %248 = OpFAdd %v4float %246 %247
+        %249 = OpFMul %v4float %123 %198
+        %250 = OpFMul %v4float %125 %202
+        %251 = OpFAdd %v4float %249 %250
+        %252 = OpVectorShuffle %v3float %242 %117 0 1 2
+        %253 = OpVectorShuffle %v3float %134 %134 0 1 2
+        %254 = OpCompositeConstruct %v3float %189 %189 %189
+        %255 = OpFMul %v3float %253 %254
+        %256 = OpFAdd %v3float %252 %255
+        %257 = OpVectorShuffle %v4float %118 %256 4 5 6 3
+        %258 = OpFMul %v4float %135 %207
+        %259 = OpFAdd %v4float %245 %258
+        %260 = OpFMul %v4float %126 %207
+        %261 = OpFAdd %v4float %248 %260
+        %262 = OpFMul %v4float %127 %207
+        %263 = OpFAdd %v4float %251 %262
+        %264 = OpVectorShuffle %v3float %235 %235 0 1 2
+        %265 = OpVectorShuffle %v3float %256 %117 0 1 2
+        %266 = OpVectorShuffle %v3float %259 %259 0 1 2
+        %267 = OpExtInst %v3float %1 Cross %266 %265
+        %268 = OpCompositeExtract %float %259 3
+        %269 = OpCompositeConstruct %v3float %268 %268 %268
+        %270 = OpFMul %v3float %267 %269
+        %271 = OpCompositeConstruct %mat3v3float %265 %270 %266
+        %272 = OpFAdd %v3float %264 %182
+        %273 = OpCompositeExtract %float %259 0
+        %274 = OpCompositeExtract %float %259 1
+        %275 = OpCompositeExtract %float %259 2
+        %276 = OpCompositeConstruct %v4float %273 %274 %275 %float_0
+        %277 = OpFOrdEqual %bool %187 %float_0
+        %278 = OpSelect %int %277 %int_1 %int_0
+        %279 = OpConvertSToF %float %278
+        %280 = OpFOrdEqual %bool %188 %float_0
+        %281 = OpSelect %int %280 %int_1 %int_0
+        %282 = OpConvertSToF %float %281
+        %283 = OpFOrdEqual %bool %189 %float_0
+        %284 = OpSelect %int %283 %int_1 %int_0
+        %285 = OpConvertSToF %float %284
+        %286 = OpFAdd %float %279 %282
+        %287 = OpFAdd %float %286 %285
+        %288 = OpFOrdEqual %bool %287 %float_2
+        %289 = OpSelect %int %288 %int_1 %int_0
+        %290 = OpConvertSToF %float %289
+        %291 = OpFOrdEqual %bool %287 %float_1
+        %292 = OpSelect %int %291 %int_1 %int_0
+        %293 = OpConvertSToF %float %292
+        %294 = OpFOrdEqual %bool %287 %float_0
+        %295 = OpSelect %int %294 %int_1 %int_0
+        %296 = OpConvertSToF %float %295
+        %297 = OpFOrdEqual %bool %290 %float_1
+               OpSelectionMerge %298 None
+               OpBranchConditional %297 %299 %300
+        %300 = OpLabel
+        %301 = OpFOrdNotEqual %bool %293 %float_0
+               OpSelectionMerge %302 None
+               OpBranchConditional %301 %303 %302
+        %303 = OpLabel
+        %304 = OpCompositeConstruct %v4float %279 %279 %279 %279
+        %305 = OpFMul %v4float %304 %150
+        %306 = OpCompositeConstruct %v4float %282 %282 %282 %282
+        %307 = OpFMul %v4float %306 %154
+        %308 = OpFAdd %v4float %305 %307
+        %309 = OpCompositeConstruct %v4float %285 %285 %285 %285
+        %310 = OpFMul %v4float %309 %158
+        %311 = OpFAdd %v4float %308 %310
+        %312 = OpFMul %v4float %304 %151
+        %313 = OpFMul %v4float %306 %155
+        %314 = OpFAdd %v4float %312 %313
+        %315 = OpFMul %v4float %309 %159
+        %316 = OpFAdd %v4float %314 %315
+        %317 = OpFMul %v4float %202 %311
+        %318 = OpFMul %v4float %207 %316
+        %319 = OpFAdd %v4float %317 %318
+        %320 = OpFMul %v4float %304 %319
+        %321 = OpFMul %v4float %207 %311
+        %322 = OpFMul %v4float %198 %316
+        %323 = OpFAdd %v4float %321 %322
+        %324 = OpFMul %v4float %306 %323
+        %325 = OpFAdd %v4float %320 %324
+        %326 = OpFMul %v4float %198 %311
+        %327 = OpFMul %v4float %202 %316
+        %328 = OpFAdd %v4float %326 %327
+        %329 = OpFMul %v4float %309 %328
+        %330 = OpFAdd %v4float %325 %329
+        %331 = OpCompositeConstruct %v3float %279 %279 %279
+        %332 = OpFMul %v3float %331 %152
+        %333 = OpCompositeConstruct %v3float %282 %282 %282
+        %334 = OpFMul %v3float %333 %156
+        %335 = OpFAdd %v3float %332 %334
+        %336 = OpCompositeConstruct %v3float %285 %285 %285
+        %337 = OpFMul %v3float %336 %160
+        %338 = OpFAdd %v3float %335 %337
+        %339 = OpFMul %v3float %331 %153
+        %340 = OpFMul %v3float %333 %157
+        %341 = OpFAdd %v3float %339 %340
+        %342 = OpFMul %v3float %336 %161
+        %343 = OpFAdd %v3float %341 %342
+        %344 = OpFMul %v3float %240 %338
+        %345 = OpFMul %v3float %254 %343
+        %346 = OpFAdd %v3float %344 %345
+        %347 = OpFMul %v3float %331 %346
+        %348 = OpFMul %v3float %254 %338
+        %349 = OpFMul %v3float %237 %343
+        %350 = OpFAdd %v3float %348 %349
+        %351 = OpFMul %v3float %333 %350
+        %352 = OpFAdd %v3float %347 %351
+        %353 = OpFMul %v3float %237 %338
+        %354 = OpFMul %v3float %240 %343
+        %355 = OpFAdd %v3float %353 %354
+        %356 = OpFMul %v3float %336 %355
+        %357 = OpFAdd %v3float %352 %356
+               OpBranch %302
+        %302 = OpLabel
+        %358 = OpPhi %v4float %276 %300 %330 %303
+        %359 = OpPhi %v3float %265 %300 %357 %303
+               OpBranch %298
+        %299 = OpLabel
+        %360 = OpFAdd %float %282 %285
+        %361 = OpFOrdEqual %bool %360 %float_2
+        %362 = OpSelect %int %361 %int_1 %int_0
+        %363 = OpConvertSToF %float %362
+        %364 = OpFAdd %float %285 %279
+        %365 = OpFOrdEqual %bool %364 %float_2
+        %366 = OpSelect %int %365 %int_1 %int_0
+        %367 = OpConvertSToF %float %366
+        %368 = OpFOrdEqual %bool %286 %float_2
+        %369 = OpSelect %int %368 %int_1 %int_0
+        %370 = OpConvertSToF %float %369
+        %371 = OpCompositeConstruct %v4float %363 %363 %363 %363
+        %372 = OpFMul %v4float %371 %140
+        %373 = OpCompositeConstruct %v4float %367 %367 %367 %367
+        %374 = OpFMul %v4float %373 %142
+        %375 = OpFAdd %v4float %372 %374
+        %376 = OpCompositeConstruct %v4float %370 %370 %370 %370
+        %377 = OpFMul %v4float %376 %144
+        %378 = OpFAdd %v4float %375 %377
+        %379 = OpCompositeConstruct %v3float %363 %363 %363
+        %380 = OpFMul %v3float %379 %141
+        %381 = OpCompositeConstruct %v3float %367 %367 %367
+        %382 = OpFMul %v3float %381 %143
+        %383 = OpFAdd %v3float %380 %382
+        %384 = OpCompositeConstruct %v3float %370 %370 %370
+        %385 = OpFMul %v3float %384 %145
+        %386 = OpFAdd %v3float %383 %385
+               OpBranch %298
+        %298 = OpLabel
+        %387 = OpPhi %v4float %378 %299 %358 %302
+        %388 = OpPhi %v3float %386 %299 %359 %302
+        %389 = OpFOrdEqual %bool %296 %float_0
+               OpSelectionMerge %390 None
+               OpBranchConditional %389 %391 %390
+        %391 = OpLabel
+        %392 = OpVectorShuffle %v3float %387 %387 0 1 2
+        %393 = OpExtInst %v3float %1 Cross %392 %388
+        %394 = OpCompositeExtract %float %387 3
+        %395 = OpCompositeConstruct %v3float %394 %394 %394
+        %396 = OpFMul %v3float %393 %395
+        %397 = OpCompositeConstruct %mat3v3float %388 %396 %392
+               OpBranch %390
+        %390 = OpLabel
+        %398 = OpPhi %mat3v3float %271 %298 %397 %391
+        %399 = OpAccessChain %_ptr_Uniform_float %View %int_157
+        %400 = OpLoad %float %399
+        %401 = OpAccessChain %_ptr_Uniform_v4float %View %int_153 %int_0
+        %402 = OpLoad %v4float %401
+        %403 = OpVectorShuffle %v3float %402 %402 0 1 2
+        %404 = OpVectorShuffle %v3float %402 %402 3 3 3
+        %405 = OpFSub %v3float %272 %403
+        %406 = OpFAdd %v3float %405 %404
+        %407 = OpExtInst %v3float %1 FMax %406 %81
+        %408 = OpFAdd %v3float %403 %404
+        %409 = OpFSub %v3float %408 %272
+        %410 = OpExtInst %v3float %1 FMax %409 %81
+        %411 = OpExtInst %v3float %1 FMin %407 %410
+        %412 = OpCompositeExtract %float %411 0
+        %413 = OpCompositeExtract %float %411 1
+        %414 = OpCompositeExtract %float %411 2
+        %415 = OpExtInst %float %1 FMin %413 %414
+        %416 = OpExtInst %float %1 FMin %412 %415
+        %417 = OpAccessChain %_ptr_Uniform_float %View %int_153 %int_0 %int_3
+        %418 = OpLoad %float %417
+        %419 = OpAccessChain %_ptr_Uniform_float %View %int_156
+        %420 = OpLoad %float %419
+        %421 = OpFMul %float %418 %420
+        %422 = OpFOrdGreaterThan %bool %416 %421
+               OpSelectionMerge %423 DontFlatten
+               OpBranchConditional %422 %424 %425
+        %425 = OpLabel
+        %426 = OpAccessChain %_ptr_Uniform_v4float %View %int_153 %int_1
+        %427 = OpLoad %v4float %426
+        %428 = OpVectorShuffle %v3float %427 %427 0 1 2
+        %429 = OpVectorShuffle %v3float %427 %427 3 3 3
+        %430 = OpFSub %v3float %272 %428
+        %431 = OpFAdd %v3float %430 %429
+        %432 = OpExtInst %v3float %1 FMax %431 %81
+        %433 = OpFAdd %v3float %428 %429
+        %434 = OpFSub %v3float %433 %272
+        %435 = OpExtInst %v3float %1 FMax %434 %81
+        %436 = OpExtInst %v3float %1 FMin %432 %435
+        %437 = OpCompositeExtract %float %436 0
+        %438 = OpCompositeExtract %float %436 1
+        %439 = OpCompositeExtract %float %436 2
+        %440 = OpExtInst %float %1 FMin %438 %439
+        %441 = OpExtInst %float %1 FMin %437 %440
+        %442 = OpAccessChain %_ptr_Uniform_float %View %int_153 %int_1 %int_3
+        %443 = OpLoad %float %442
+        %444 = OpFMul %float %443 %420
+        %445 = OpFOrdGreaterThan %bool %441 %444
+               OpSelectionMerge %446 DontFlatten
+               OpBranchConditional %445 %447 %448
+        %448 = OpLabel
+        %449 = OpAccessChain %_ptr_Uniform_v4float %View %int_153 %int_2
+        %450 = OpLoad %v4float %449
+        %451 = OpVectorShuffle %v3float %450 %450 0 1 2
+        %452 = OpVectorShuffle %v3float %450 %450 3 3 3
+        %453 = OpFSub %v3float %272 %451
+        %454 = OpFAdd %v3float %453 %452
+        %455 = OpExtInst %v3float %1 FMax %454 %81
+        %456 = OpFAdd %v3float %451 %452
+        %457 = OpFSub %v3float %456 %272
+        %458 = OpExtInst %v3float %1 FMax %457 %81
+        %459 = OpExtInst %v3float %1 FMin %455 %458
+        %460 = OpCompositeExtract %float %459 0
+        %461 = OpCompositeExtract %float %459 1
+        %462 = OpCompositeExtract %float %459 2
+        %463 = OpExtInst %float %1 FMin %461 %462
+        %464 = OpExtInst %float %1 FMin %460 %463
+        %465 = OpAccessChain %_ptr_Uniform_v4float %View %int_153 %int_3
+        %466 = OpLoad %v4float %465
+        %467 = OpVectorShuffle %v3float %466 %466 0 1 2
+        %468 = OpVectorShuffle %v3float %466 %466 3 3 3
+        %469 = OpFSub %v3float %272 %467
+        %470 = OpFAdd %v3float %469 %468
+        %471 = OpExtInst %v3float %1 FMax %470 %81
+        %472 = OpFAdd %v3float %467 %468
+        %473 = OpFSub %v3float %472 %272
+        %474 = OpExtInst %v3float %1 FMax %473 %81
+        %475 = OpExtInst %v3float %1 FMin %471 %474
+        %476 = OpCompositeExtract %float %475 0
+        %477 = OpCompositeExtract %float %475 1
+        %478 = OpCompositeExtract %float %475 2
+        %479 = OpExtInst %float %1 FMin %477 %478
+        %480 = OpExtInst %float %1 FMin %476 %479
+        %481 = OpAccessChain %_ptr_Uniform_float %View %int_153 %int_2 %int_3
+        %482 = OpLoad %float %481
+        %483 = OpFMul %float %482 %420
+        %484 = OpFOrdGreaterThan %bool %464 %483
+               OpSelectionMerge %485 DontFlatten
+               OpBranchConditional %484 %486 %487
+        %487 = OpLabel
+        %488 = OpAccessChain %_ptr_Uniform_float %View %int_153 %int_3 %int_3
+        %489 = OpLoad %float %488
+        %490 = OpFMul %float %489 %420
+        %491 = OpFOrdGreaterThan %bool %480 %490
+               OpSelectionMerge %492 None
+               OpBranchConditional %491 %493 %492
+        %493 = OpLabel
+        %494 = OpFMul %float %480 %float_10
+        %495 = OpAccessChain %_ptr_Uniform_float %View %int_154 %int_3 %int_3
+        %496 = OpLoad %float %495
+        %497 = OpFMul %float %494 %496
+        %498 = OpExtInst %float %1 FClamp %497 %float_0 %float_1
+        %499 = OpAccessChain %_ptr_Uniform_v4float %View %int_154 %uint_3
+        %500 = OpLoad %v4float %499
+        %501 = OpVectorShuffle %v3float %500 %500 3 3 3
+        %502 = OpFMul %v3float %272 %501
+        %503 = OpVectorShuffle %v3float %500 %500 0 1 2
+        %504 = OpFAdd %v3float %502 %503
+        %505 = OpLoad %type_3d_image %View_GlobalDistanceFieldTexture3
+        %506 = OpLoad %type_sampler %View_GlobalDistanceFieldSampler0
+        %507 = OpSampledImage %type_sampled_image %505 %506
+        %508 = OpImageSampleExplicitLod %v4float %507 %504 Lod %float_0
+        %509 = OpCompositeExtract %float %508 0
+        %510 = OpExtInst %float %1 FMix %400 %509 %498
+               OpBranch %492
+        %492 = OpLabel
+        %511 = OpPhi %float %400 %487 %510 %493
+               OpBranch %485
+        %486 = OpLabel
+        %512 = OpAccessChain %_ptr_Uniform_v4float %View %int_154 %uint_2
+        %513 = OpLoad %v4float %512
+        %514 = OpVectorShuffle %v3float %513 %513 3 3 3
+        %515 = OpFMul %v3float %272 %514
+        %516 = OpVectorShuffle %v3float %513 %513 0 1 2
+        %517 = OpFAdd %v3float %515 %516
+        %518 = OpLoad %type_3d_image %View_GlobalDistanceFieldTexture2
+        %519 = OpLoad %type_sampler %View_GlobalDistanceFieldSampler0
+        %520 = OpSampledImage %type_sampled_image %518 %519
+        %521 = OpImageSampleExplicitLod %v4float %520 %517 Lod %float_0
+        %522 = OpCompositeExtract %float %521 0
+               OpBranch %485
+        %485 = OpLabel
+        %523 = OpPhi %float %522 %486 %511 %492
+               OpBranch %446
+        %447 = OpLabel
+        %524 = OpAccessChain %_ptr_Uniform_v4float %View %int_154 %uint_1
+        %525 = OpLoad %v4float %524
+        %526 = OpVectorShuffle %v3float %525 %525 3 3 3
+        %527 = OpFMul %v3float %272 %526
+        %528 = OpVectorShuffle %v3float %525 %525 0 1 2
+        %529 = OpFAdd %v3float %527 %528
+        %530 = OpLoad %type_3d_image %View_GlobalDistanceFieldTexture1
+        %531 = OpLoad %type_sampler %View_GlobalDistanceFieldSampler0
+        %532 = OpSampledImage %type_sampled_image %530 %531
+        %533 = OpImageSampleExplicitLod %v4float %532 %529 Lod %float_0
+        %534 = OpCompositeExtract %float %533 0
+               OpBranch %446
+        %446 = OpLabel
+        %535 = OpPhi %float %534 %447 %523 %485
+               OpBranch %423
+        %424 = OpLabel
+        %536 = OpAccessChain %_ptr_Uniform_v4float %View %int_154 %uint_0
+        %537 = OpLoad %v4float %536
+        %538 = OpVectorShuffle %v3float %537 %537 3 3 3
+        %539 = OpFMul %v3float %272 %538
+        %540 = OpVectorShuffle %v3float %537 %537 0 1 2
+        %541 = OpFAdd %v3float %539 %540
+        %542 = OpLoad %type_3d_image %View_GlobalDistanceFieldTexture0
+        %543 = OpLoad %type_sampler %View_GlobalDistanceFieldSampler0
+        %544 = OpSampledImage %type_sampled_image %542 %543
+        %545 = OpImageSampleExplicitLod %v4float %544 %541 Lod %float_0
+        %546 = OpCompositeExtract %float %545 0
+               OpBranch %423
+        %423 = OpLabel
+        %547 = OpPhi %float %546 %424 %535 %446
+        %548 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_2
+        %549 = OpLoad %float %548
+        %550 = OpFAdd %float %547 %549
+        %551 = OpExtInst %float %1 FMin %550 %float_0
+        %552 = OpAccessChain %_ptr_Uniform_float %Material %int_1 %int_0 %int_3
+        %553 = OpLoad %float %552
+        %554 = OpFMul %float %551 %553
+        %555 = OpCompositeExtract %v3float %398 2
+        %556 = OpCompositeConstruct %v3float %554 %554 %554
+        %557 = OpFMul %v3float %555 %556
+        %558 = OpFMul %float %163 %187
+        %559 = OpFMul %float %165 %188
+        %560 = OpFAdd %float %558 %559
+        %561 = OpFMul %float %167 %189
+        %562 = OpFAdd %float %560 %561
+        %563 = OpCompositeConstruct %v3float %562 %562 %562
+        %564 = OpFMul %v3float %557 %563
+        %565 = OpFAdd %v3float %264 %564
+        %566 = OpVectorShuffle %v4float %235 %565 4 5 6 3
+        %567 = OpVectorShuffle %v3float %565 %117 0 1 2
+        %568 = OpFSub %v3float %567 %184
+        %569 = OpCompositeExtract %float %568 0
+        %570 = OpCompositeExtract %float %568 1
+        %571 = OpCompositeExtract %float %568 2
+        %572 = OpCompositeConstruct %v4float %569 %570 %571 %float_1
+        %573 = OpDot %float %186 %572
+        %574 = OpMatrixTimesVector %v4float %180 %566
+        %575 = OpCompositeExtract %float %574 3
+        %576 = OpFMul %float %float_0_00100000005 %575
+        %577 = OpCompositeExtract %float %574 2
+        %578 = OpFAdd %float %577 %576
+        %579 = OpCompositeInsert %v4float %578 %574 2
+               OpStore %gl_Position %579
+               OpStore %out_var_TEXCOORD6 %261
+               OpStore %out_var_TEXCOORD7 %263
+               OpStore %out_var_TEXCOORD10_centroid %257
+               OpStore %out_var_TEXCOORD11_centroid %259
+        %580 = OpAccessChain %_ptr_Output_float %gl_ClipDistance %uint_0
+               OpStore %580 %573
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese b/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese
new file mode 100644
index 00000000000..e792c7e1160
--- /dev/null
+++ b/shaders-ue4/asm/tese/ds-patch-inputs.asm.tese
@@ -0,0 +1,547 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 236
+; Schema: 0
+               OpCapability Tessellation
+               OpCapability SampledBuffer
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint TessellationEvaluation %MainDomain "main" %in_var_TEXCOORD10_centroid %in_var_TEXCOORD11_centroid %in_var_VS_to_DS_Position %in_var_VS_To_DS_VertexID %in_var_PN_POSITION %in_var_PN_DisplacementScales %in_var_PN_TessellationMultiplier %in_var_PN_WorldDisplacementMultiplier %in_var_PN_DominantVertex %in_var_PN_DominantVertex1 %in_var_PN_DominantVertex2 %in_var_PN_DominantEdge %in_var_PN_DominantEdge1 %in_var_PN_DominantEdge2 %in_var_PN_DominantEdge3 %in_var_PN_DominantEdge4 %in_var_PN_DominantEdge5 %gl_TessLevelOuter %gl_TessLevelInner %in_var_PN_POSITION9 %gl_TessCoord %out_var_TEXCOORD10_centroid %out_var_TEXCOORD11_centroid %out_var_TEXCOORD6 %out_var_TEXCOORD7 %gl_Position
+               OpExecutionMode %MainDomain Triangles
+               OpSource HLSL 600
+               OpName %type_ShadowDepthPass "type.ShadowDepthPass"
+               OpMemberName %type_ShadowDepthPass 0 "PrePadding_ShadowDepthPass_LPV_0"
+               OpMemberName %type_ShadowDepthPass 1 "PrePadding_ShadowDepthPass_LPV_4"
+               OpMemberName %type_ShadowDepthPass 2 "PrePadding_ShadowDepthPass_LPV_8"
+               OpMemberName %type_ShadowDepthPass 3 "PrePadding_ShadowDepthPass_LPV_12"
+               OpMemberName %type_ShadowDepthPass 4 "PrePadding_ShadowDepthPass_LPV_16"
+               OpMemberName %type_ShadowDepthPass 5 "PrePadding_ShadowDepthPass_LPV_20"
+               OpMemberName %type_ShadowDepthPass 6 "PrePadding_ShadowDepthPass_LPV_24"
+               OpMemberName %type_ShadowDepthPass 7 "PrePadding_ShadowDepthPass_LPV_28"
+               OpMemberName %type_ShadowDepthPass 8 "PrePadding_ShadowDepthPass_LPV_32"
+               OpMemberName %type_ShadowDepthPass 9 "PrePadding_ShadowDepthPass_LPV_36"
+               OpMemberName %type_ShadowDepthPass 10 "PrePadding_ShadowDepthPass_LPV_40"
+               OpMemberName %type_ShadowDepthPass 11 "PrePadding_ShadowDepthPass_LPV_44"
+               OpMemberName %type_ShadowDepthPass 12 "PrePadding_ShadowDepthPass_LPV_48"
+               OpMemberName %type_ShadowDepthPass 13 "PrePadding_ShadowDepthPass_LPV_52"
+               OpMemberName %type_ShadowDepthPass 14 "PrePadding_ShadowDepthPass_LPV_56"
+               OpMemberName %type_ShadowDepthPass 15 "PrePadding_ShadowDepthPass_LPV_60"
+               OpMemberName %type_ShadowDepthPass 16 "PrePadding_ShadowDepthPass_LPV_64"
+               OpMemberName %type_ShadowDepthPass 17 "PrePadding_ShadowDepthPass_LPV_68"
+               OpMemberName %type_ShadowDepthPass 18 "PrePadding_ShadowDepthPass_LPV_72"
+               OpMemberName %type_ShadowDepthPass 19 "PrePadding_ShadowDepthPass_LPV_76"
+               OpMemberName %type_ShadowDepthPass 20 "PrePadding_ShadowDepthPass_LPV_80"
+               OpMemberName %type_ShadowDepthPass 21 "PrePadding_ShadowDepthPass_LPV_84"
+               OpMemberName %type_ShadowDepthPass 22 "PrePadding_ShadowDepthPass_LPV_88"
+               OpMemberName %type_ShadowDepthPass 23 "PrePadding_ShadowDepthPass_LPV_92"
+               OpMemberName %type_ShadowDepthPass 24 "PrePadding_ShadowDepthPass_LPV_96"
+               OpMemberName %type_ShadowDepthPass 25 "PrePadding_ShadowDepthPass_LPV_100"
+               OpMemberName %type_ShadowDepthPass 26 "PrePadding_ShadowDepthPass_LPV_104"
+               OpMemberName %type_ShadowDepthPass 27 "PrePadding_ShadowDepthPass_LPV_108"
+               OpMemberName %type_ShadowDepthPass 28 "PrePadding_ShadowDepthPass_LPV_112"
+               OpMemberName %type_ShadowDepthPass 29 "PrePadding_ShadowDepthPass_LPV_116"
+               OpMemberName %type_ShadowDepthPass 30 "PrePadding_ShadowDepthPass_LPV_120"
+               OpMemberName %type_ShadowDepthPass 31 "PrePadding_ShadowDepthPass_LPV_124"
+               OpMemberName %type_ShadowDepthPass 32 "PrePadding_ShadowDepthPass_LPV_128"
+               OpMemberName %type_ShadowDepthPass 33 "PrePadding_ShadowDepthPass_LPV_132"
+               OpMemberName %type_ShadowDepthPass 34 "PrePadding_ShadowDepthPass_LPV_136"
+               OpMemberName %type_ShadowDepthPass 35 "PrePadding_ShadowDepthPass_LPV_140"
+               OpMemberName %type_ShadowDepthPass 36 "PrePadding_ShadowDepthPass_LPV_144"
+               OpMemberName %type_ShadowDepthPass 37 "PrePadding_ShadowDepthPass_LPV_148"
+               OpMemberName %type_ShadowDepthPass 38 "PrePadding_ShadowDepthPass_LPV_152"
+               OpMemberName %type_ShadowDepthPass 39 "PrePadding_ShadowDepthPass_LPV_156"
+               OpMemberName %type_ShadowDepthPass 40 "PrePadding_ShadowDepthPass_LPV_160"
+               OpMemberName %type_ShadowDepthPass 41 "PrePadding_ShadowDepthPass_LPV_164"
+               OpMemberName %type_ShadowDepthPass 42 "PrePadding_ShadowDepthPass_LPV_168"
+               OpMemberName %type_ShadowDepthPass 43 "PrePadding_ShadowDepthPass_LPV_172"
+               OpMemberName %type_ShadowDepthPass 44 "PrePadding_ShadowDepthPass_LPV_176"
+               OpMemberName %type_ShadowDepthPass 45 "PrePadding_ShadowDepthPass_LPV_180"
+               OpMemberName %type_ShadowDepthPass 46 "PrePadding_ShadowDepthPass_LPV_184"
+               OpMemberName %type_ShadowDepthPass 47 "PrePadding_ShadowDepthPass_LPV_188"
+               OpMemberName %type_ShadowDepthPass 48 "PrePadding_ShadowDepthPass_LPV_192"
+               OpMemberName %type_ShadowDepthPass 49 "PrePadding_ShadowDepthPass_LPV_196"
+               OpMemberName %type_ShadowDepthPass 50 "PrePadding_ShadowDepthPass_LPV_200"
+               OpMemberName %type_ShadowDepthPass 51 "PrePadding_ShadowDepthPass_LPV_204"
+               OpMemberName %type_ShadowDepthPass 52 "PrePadding_ShadowDepthPass_LPV_208"
+               OpMemberName %type_ShadowDepthPass 53 "PrePadding_ShadowDepthPass_LPV_212"
+               OpMemberName %type_ShadowDepthPass 54 "PrePadding_ShadowDepthPass_LPV_216"
+               OpMemberName %type_ShadowDepthPass 55 "PrePadding_ShadowDepthPass_LPV_220"
+               OpMemberName %type_ShadowDepthPass 56 "PrePadding_ShadowDepthPass_LPV_224"
+               OpMemberName %type_ShadowDepthPass 57 "PrePadding_ShadowDepthPass_LPV_228"
+               OpMemberName %type_ShadowDepthPass 58 "PrePadding_ShadowDepthPass_LPV_232"
+               OpMemberName %type_ShadowDepthPass 59 "PrePadding_ShadowDepthPass_LPV_236"
+               OpMemberName %type_ShadowDepthPass 60 "PrePadding_ShadowDepthPass_LPV_240"
+               OpMemberName %type_ShadowDepthPass 61 "PrePadding_ShadowDepthPass_LPV_244"
+               OpMemberName %type_ShadowDepthPass 62 "PrePadding_ShadowDepthPass_LPV_248"
+               OpMemberName %type_ShadowDepthPass 63 "PrePadding_ShadowDepthPass_LPV_252"
+               OpMemberName %type_ShadowDepthPass 64 "PrePadding_ShadowDepthPass_LPV_256"
+               OpMemberName %type_ShadowDepthPass 65 "PrePadding_ShadowDepthPass_LPV_260"
+               OpMemberName %type_ShadowDepthPass 66 "PrePadding_ShadowDepthPass_LPV_264"
+               OpMemberName %type_ShadowDepthPass 67 "PrePadding_ShadowDepthPass_LPV_268"
+               OpMemberName %type_ShadowDepthPass 68 "ShadowDepthPass_LPV_mRsmToWorld"
+               OpMemberName %type_ShadowDepthPass 69 "ShadowDepthPass_LPV_mLightColour"
+               OpMemberName %type_ShadowDepthPass 70 "ShadowDepthPass_LPV_GeometryVolumeCaptureLightDirection"
+               OpMemberName %type_ShadowDepthPass 71 "ShadowDepthPass_LPV_mEyePos"
+               OpMemberName %type_ShadowDepthPass 72 "ShadowDepthPass_LPV_mOldGridOffset"
+               OpMemberName %type_ShadowDepthPass 73 "PrePadding_ShadowDepthPass_LPV_396"
+               OpMemberName %type_ShadowDepthPass 74 "ShadowDepthPass_LPV_mLpvGridOffset"
+               OpMemberName %type_ShadowDepthPass 75 "ShadowDepthPass_LPV_ClearMultiplier"
+               OpMemberName %type_ShadowDepthPass 76 "ShadowDepthPass_LPV_LpvScale"
+               OpMemberName %type_ShadowDepthPass 77 "ShadowDepthPass_LPV_OneOverLpvScale"
+               OpMemberName %type_ShadowDepthPass 78 "ShadowDepthPass_LPV_DirectionalOcclusionIntensity"
+               OpMemberName %type_ShadowDepthPass 79 "ShadowDepthPass_LPV_DirectionalOcclusionRadius"
+               OpMemberName %type_ShadowDepthPass 80 "ShadowDepthPass_LPV_RsmAreaIntensityMultiplier"
+               OpMemberName %type_ShadowDepthPass 81 "ShadowDepthPass_LPV_RsmPixelToTexcoordMultiplier"
+               OpMemberName %type_ShadowDepthPass 82 "ShadowDepthPass_LPV_SecondaryOcclusionStrength"
+               OpMemberName %type_ShadowDepthPass 83 "ShadowDepthPass_LPV_SecondaryBounceStrength"
+               OpMemberName %type_ShadowDepthPass 84 "ShadowDepthPass_LPV_VplInjectionBias"
+               OpMemberName %type_ShadowDepthPass 85 "ShadowDepthPass_LPV_GeometryVolumeInjectionBias"
+               OpMemberName %type_ShadowDepthPass 86 "ShadowDepthPass_LPV_EmissiveInjectionMultiplier"
+               OpMemberName %type_ShadowDepthPass 87 "ShadowDepthPass_LPV_PropagationIndex"
+               OpMemberName %type_ShadowDepthPass 88 "ShadowDepthPass_ProjectionMatrix"
+               OpMemberName %type_ShadowDepthPass 89 "ShadowDepthPass_ViewMatrix"
+               OpMemberName %type_ShadowDepthPass 90 "ShadowDepthPass_ShadowParams"
+               OpMemberName %type_ShadowDepthPass 91 "ShadowDepthPass_bClampToNearPlane"
+               OpMemberName %type_ShadowDepthPass 92 "PrePadding_ShadowDepthPass_612"
+               OpMemberName %type_ShadowDepthPass 93 "PrePadding_ShadowDepthPass_616"
+               OpMemberName %type_ShadowDepthPass 94 "PrePadding_ShadowDepthPass_620"
+               OpMemberName %type_ShadowDepthPass 95 "ShadowDepthPass_ShadowViewProjectionMatrices"
+               OpMemberName %type_ShadowDepthPass 96 "ShadowDepthPass_ShadowViewMatrices"
+               OpName %ShadowDepthPass "ShadowDepthPass"
+               OpName %in_var_TEXCOORD10_centroid "in.var.TEXCOORD10_centroid"
+               OpName %in_var_TEXCOORD11_centroid "in.var.TEXCOORD11_centroid"
+               OpName %in_var_VS_to_DS_Position "in.var.VS_to_DS_Position"
+               OpName %in_var_VS_To_DS_VertexID "in.var.VS_To_DS_VertexID"
+               OpName %in_var_PN_POSITION "in.var.PN_POSITION"
+               OpName %in_var_PN_DisplacementScales "in.var.PN_DisplacementScales"
+               OpName %in_var_PN_TessellationMultiplier "in.var.PN_TessellationMultiplier"
+               OpName %in_var_PN_WorldDisplacementMultiplier "in.var.PN_WorldDisplacementMultiplier"
+               OpName %in_var_PN_DominantVertex "in.var.PN_DominantVertex"
+               OpName %in_var_PN_DominantVertex1 "in.var.PN_DominantVertex1"
+               OpName %in_var_PN_DominantVertex2 "in.var.PN_DominantVertex2"
+               OpName %in_var_PN_DominantEdge "in.var.PN_DominantEdge"
+               OpName %in_var_PN_DominantEdge1 "in.var.PN_DominantEdge1"
+               OpName %in_var_PN_DominantEdge2 "in.var.PN_DominantEdge2"
+               OpName %in_var_PN_DominantEdge3 "in.var.PN_DominantEdge3"
+               OpName %in_var_PN_DominantEdge4 "in.var.PN_DominantEdge4"
+               OpName %in_var_PN_DominantEdge5 "in.var.PN_DominantEdge5"
+               OpName %in_var_PN_POSITION9 "in.var.PN_POSITION9"
+               OpName %out_var_TEXCOORD10_centroid "out.var.TEXCOORD10_centroid"
+               OpName %out_var_TEXCOORD11_centroid "out.var.TEXCOORD11_centroid"
+               OpName %out_var_TEXCOORD6 "out.var.TEXCOORD6"
+               OpName %out_var_TEXCOORD7 "out.var.TEXCOORD7"
+               OpName %MainDomain "MainDomain"
+               OpDecorateString %in_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %in_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %in_var_VS_to_DS_Position UserSemantic "VS_to_DS_Position"
+               OpDecorateString %in_var_VS_To_DS_VertexID UserSemantic "VS_To_DS_VertexID"
+               OpDecorateString %in_var_PN_POSITION UserSemantic "PN_POSITION"
+               OpDecorateString %in_var_PN_DisplacementScales UserSemantic "PN_DisplacementScales"
+               OpDecorateString %in_var_PN_TessellationMultiplier UserSemantic "PN_TessellationMultiplier"
+               OpDecorateString %in_var_PN_WorldDisplacementMultiplier UserSemantic "PN_WorldDisplacementMultiplier"
+               OpDecorateString %in_var_PN_DominantVertex UserSemantic "PN_DominantVertex"
+               OpDecorateString %in_var_PN_DominantVertex1 UserSemantic "PN_DominantVertex"
+               OpDecorateString %in_var_PN_DominantVertex2 UserSemantic "PN_DominantVertex"
+               OpDecorateString %in_var_PN_DominantEdge UserSemantic "PN_DominantEdge"
+               OpDecorateString %in_var_PN_DominantEdge1 UserSemantic "PN_DominantEdge"
+               OpDecorateString %in_var_PN_DominantEdge2 UserSemantic "PN_DominantEdge"
+               OpDecorateString %in_var_PN_DominantEdge3 UserSemantic "PN_DominantEdge"
+               OpDecorateString %in_var_PN_DominantEdge4 UserSemantic "PN_DominantEdge"
+               OpDecorateString %in_var_PN_DominantEdge5 UserSemantic "PN_DominantEdge"
+               OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter
+               OpDecorateString %gl_TessLevelOuter UserSemantic "SV_TessFactor"
+               OpDecorate %gl_TessLevelOuter Patch
+               OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner
+               OpDecorateString %gl_TessLevelInner UserSemantic "SV_InsideTessFactor"
+               OpDecorate %gl_TessLevelInner Patch
+               OpDecorateString %in_var_PN_POSITION9 UserSemantic "PN_POSITION9"
+               OpDecorate %in_var_PN_POSITION9 Patch
+               OpDecorate %gl_TessCoord BuiltIn TessCoord
+               OpDecorateString %gl_TessCoord UserSemantic "SV_DomainLocation"
+               OpDecorate %gl_TessCoord Patch
+               OpDecorateString %out_var_TEXCOORD10_centroid UserSemantic "TEXCOORD10_centroid"
+               OpDecorateString %out_var_TEXCOORD11_centroid UserSemantic "TEXCOORD11_centroid"
+               OpDecorateString %out_var_TEXCOORD6 UserSemantic "TEXCOORD6"
+               OpDecorateString %out_var_TEXCOORD7 UserSemantic "TEXCOORD7"
+               OpDecorate %gl_Position BuiltIn Position
+               OpDecorateString %gl_Position UserSemantic "SV_POSITION"
+               OpDecorate %in_var_PN_DisplacementScales Location 0
+               OpDecorate %in_var_PN_DominantEdge Location 1
+               OpDecorate %in_var_PN_DominantEdge1 Location 2
+               OpDecorate %in_var_PN_DominantEdge2 Location 3
+               OpDecorate %in_var_PN_DominantEdge3 Location 4
+               OpDecorate %in_var_PN_DominantEdge4 Location 5
+               OpDecorate %in_var_PN_DominantEdge5 Location 6
+               OpDecorate %in_var_PN_DominantVertex Location 7
+               OpDecorate %in_var_PN_DominantVertex1 Location 8
+               OpDecorate %in_var_PN_DominantVertex2 Location 9
+               OpDecorate %in_var_PN_POSITION Location 10
+               OpDecorate %in_var_PN_POSITION9 Location 13
+               OpDecorate %in_var_PN_TessellationMultiplier Location 14
+               OpDecorate %in_var_PN_WorldDisplacementMultiplier Location 15
+               OpDecorate %in_var_TEXCOORD10_centroid Location 16
+               OpDecorate %in_var_TEXCOORD11_centroid Location 17
+               OpDecorate %in_var_VS_To_DS_VertexID Location 18
+               OpDecorate %in_var_VS_to_DS_Position Location 19
+               OpDecorate %out_var_TEXCOORD10_centroid Location 0
+               OpDecorate %out_var_TEXCOORD11_centroid Location 1
+               OpDecorate %out_var_TEXCOORD6 Location 2
+               OpDecorate %out_var_TEXCOORD7 Location 3
+               OpDecorate %ShadowDepthPass DescriptorSet 0
+               OpDecorate %ShadowDepthPass Binding 0
+               OpDecorate %_arr_mat4v4float_uint_6 ArrayStride 64
+               OpMemberDecorate %type_ShadowDepthPass 0 Offset 0
+               OpMemberDecorate %type_ShadowDepthPass 1 Offset 4
+               OpMemberDecorate %type_ShadowDepthPass 2 Offset 8
+               OpMemberDecorate %type_ShadowDepthPass 3 Offset 12
+               OpMemberDecorate %type_ShadowDepthPass 4 Offset 16
+               OpMemberDecorate %type_ShadowDepthPass 5 Offset 20
+               OpMemberDecorate %type_ShadowDepthPass 6 Offset 24
+               OpMemberDecorate %type_ShadowDepthPass 7 Offset 28
+               OpMemberDecorate %type_ShadowDepthPass 8 Offset 32
+               OpMemberDecorate %type_ShadowDepthPass 9 Offset 36
+               OpMemberDecorate %type_ShadowDepthPass 10 Offset 40
+               OpMemberDecorate %type_ShadowDepthPass 11 Offset 44
+               OpMemberDecorate %type_ShadowDepthPass 12 Offset 48
+               OpMemberDecorate %type_ShadowDepthPass 13 Offset 52
+               OpMemberDecorate %type_ShadowDepthPass 14 Offset 56
+               OpMemberDecorate %type_ShadowDepthPass 15 Offset 60
+               OpMemberDecorate %type_ShadowDepthPass 16 Offset 64
+               OpMemberDecorate %type_ShadowDepthPass 17 Offset 68
+               OpMemberDecorate %type_ShadowDepthPass 18 Offset 72
+               OpMemberDecorate %type_ShadowDepthPass 19 Offset 76
+               OpMemberDecorate %type_ShadowDepthPass 20 Offset 80
+               OpMemberDecorate %type_ShadowDepthPass 21 Offset 84
+               OpMemberDecorate %type_ShadowDepthPass 22 Offset 88
+               OpMemberDecorate %type_ShadowDepthPass 23 Offset 92
+               OpMemberDecorate %type_ShadowDepthPass 24 Offset 96
+               OpMemberDecorate %type_ShadowDepthPass 25 Offset 100
+               OpMemberDecorate %type_ShadowDepthPass 26 Offset 104
+               OpMemberDecorate %type_ShadowDepthPass 27 Offset 108
+               OpMemberDecorate %type_ShadowDepthPass 28 Offset 112
+               OpMemberDecorate %type_ShadowDepthPass 29 Offset 116
+               OpMemberDecorate %type_ShadowDepthPass 30 Offset 120
+               OpMemberDecorate %type_ShadowDepthPass 31 Offset 124
+               OpMemberDecorate %type_ShadowDepthPass 32 Offset 128
+               OpMemberDecorate %type_ShadowDepthPass 33 Offset 132
+               OpMemberDecorate %type_ShadowDepthPass 34 Offset 136
+               OpMemberDecorate %type_ShadowDepthPass 35 Offset 140
+               OpMemberDecorate %type_ShadowDepthPass 36 Offset 144
+               OpMemberDecorate %type_ShadowDepthPass 37 Offset 148
+               OpMemberDecorate %type_ShadowDepthPass 38 Offset 152
+               OpMemberDecorate %type_ShadowDepthPass 39 Offset 156
+               OpMemberDecorate %type_ShadowDepthPass 40 Offset 160
+               OpMemberDecorate %type_ShadowDepthPass 41 Offset 164
+               OpMemberDecorate %type_ShadowDepthPass 42 Offset 168
+               OpMemberDecorate %type_ShadowDepthPass 43 Offset 172
+               OpMemberDecorate %type_ShadowDepthPass 44 Offset 176
+               OpMemberDecorate %type_ShadowDepthPass 45 Offset 180
+               OpMemberDecorate %type_ShadowDepthPass 46 Offset 184
+               OpMemberDecorate %type_ShadowDepthPass 47 Offset 188
+               OpMemberDecorate %type_ShadowDepthPass 48 Offset 192
+               OpMemberDecorate %type_ShadowDepthPass 49 Offset 196
+               OpMemberDecorate %type_ShadowDepthPass 50 Offset 200
+               OpMemberDecorate %type_ShadowDepthPass 51 Offset 204
+               OpMemberDecorate %type_ShadowDepthPass 52 Offset 208
+               OpMemberDecorate %type_ShadowDepthPass 53 Offset 212
+               OpMemberDecorate %type_ShadowDepthPass 54 Offset 216
+               OpMemberDecorate %type_ShadowDepthPass 55 Offset 220
+               OpMemberDecorate %type_ShadowDepthPass 56 Offset 224
+               OpMemberDecorate %type_ShadowDepthPass 57 Offset 228
+               OpMemberDecorate %type_ShadowDepthPass 58 Offset 232
+               OpMemberDecorate %type_ShadowDepthPass 59 Offset 236
+               OpMemberDecorate %type_ShadowDepthPass 60 Offset 240
+               OpMemberDecorate %type_ShadowDepthPass 61 Offset 244
+               OpMemberDecorate %type_ShadowDepthPass 62 Offset 248
+               OpMemberDecorate %type_ShadowDepthPass 63 Offset 252
+               OpMemberDecorate %type_ShadowDepthPass 64 Offset 256
+               OpMemberDecorate %type_ShadowDepthPass 65 Offset 260
+               OpMemberDecorate %type_ShadowDepthPass 66 Offset 264
+               OpMemberDecorate %type_ShadowDepthPass 67 Offset 268
+               OpMemberDecorate %type_ShadowDepthPass 68 Offset 272
+               OpMemberDecorate %type_ShadowDepthPass 68 MatrixStride 16
+               OpMemberDecorate %type_ShadowDepthPass 68 ColMajor
+               OpMemberDecorate %type_ShadowDepthPass 69 Offset 336
+               OpMemberDecorate %type_ShadowDepthPass 70 Offset 352
+               OpMemberDecorate %type_ShadowDepthPass 71 Offset 368
+               OpMemberDecorate %type_ShadowDepthPass 72 Offset 384
+               OpMemberDecorate %type_ShadowDepthPass 73 Offset 396
+               OpMemberDecorate %type_ShadowDepthPass 74 Offset 400
+               OpMemberDecorate %type_ShadowDepthPass 75 Offset 412
+               OpMemberDecorate %type_ShadowDepthPass 76 Offset 416
+               OpMemberDecorate %type_ShadowDepthPass 77 Offset 420
+               OpMemberDecorate %type_ShadowDepthPass 78 Offset 424
+               OpMemberDecorate %type_ShadowDepthPass 79 Offset 428
+               OpMemberDecorate %type_ShadowDepthPass 80 Offset 432
+               OpMemberDecorate %type_ShadowDepthPass 81 Offset 436
+               OpMemberDecorate %type_ShadowDepthPass 82 Offset 440
+               OpMemberDecorate %type_ShadowDepthPass 83 Offset 444
+               OpMemberDecorate %type_ShadowDepthPass 84 Offset 448
+               OpMemberDecorate %type_ShadowDepthPass 85 Offset 452
+               OpMemberDecorate %type_ShadowDepthPass 86 Offset 456
+               OpMemberDecorate %type_ShadowDepthPass 87 Offset 460
+               OpMemberDecorate %type_ShadowDepthPass 88 Offset 464
+               OpMemberDecorate %type_ShadowDepthPass 88 MatrixStride 16
+               OpMemberDecorate %type_ShadowDepthPass 88 ColMajor
+               OpMemberDecorate %type_ShadowDepthPass 89 Offset 528
+               OpMemberDecorate %type_ShadowDepthPass 89 MatrixStride 16
+               OpMemberDecorate %type_ShadowDepthPass 89 ColMajor
+               OpMemberDecorate %type_ShadowDepthPass 90 Offset 592
+               OpMemberDecorate %type_ShadowDepthPass 91 Offset 608
+               OpMemberDecorate %type_ShadowDepthPass 92 Offset 612
+               OpMemberDecorate %type_ShadowDepthPass 93 Offset 616
+               OpMemberDecorate %type_ShadowDepthPass 94 Offset 620
+               OpMemberDecorate %type_ShadowDepthPass 95 Offset 624
+               OpMemberDecorate %type_ShadowDepthPass 95 MatrixStride 16
+               OpMemberDecorate %type_ShadowDepthPass 95 ColMajor
+               OpMemberDecorate %type_ShadowDepthPass 96 Offset 1008
+               OpMemberDecorate %type_ShadowDepthPass 96 MatrixStride 16
+               OpMemberDecorate %type_ShadowDepthPass 96 ColMajor
+               OpDecorate %type_ShadowDepthPass Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_4 = OpConstant %uint 4
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+      %int_2 = OpConstant %int 2
+    %float_3 = OpConstant %float 3
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+    %float_6 = OpConstant %float 6
+         %48 = OpConstantComposite %v4float %float_6 %float_6 %float_6 %float_6
+    %float_1 = OpConstant %float 1
+    %float_0 = OpConstant %float 0
+      %int_3 = OpConstant %int 3
+     %int_88 = OpConstant %int 88
+     %int_89 = OpConstant %int 89
+     %int_90 = OpConstant %int 90
+     %int_91 = OpConstant %int 91
+%float_9_99999997en07 = OpConstant %float 9.99999997e-07
+     %uint_6 = OpConstant %uint 6
+%_arr_mat4v4float_uint_6 = OpTypeArray %mat4v4float %uint_6
+      %v3int = OpTypeVector %int 3
+%type_ShadowDepthPass = OpTypeStruct %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %mat4v4float %v4float %v4float %v4float %v3int %int %v3int %float %float %float %float %float %float %float %float %float %float %float %float %int %mat4v4float %mat4v4float %v4float %float %float %float %float %_arr_mat4v4float_uint_6 %_arr_mat4v4float_uint_6
+%_ptr_Uniform_type_ShadowDepthPass = OpTypePointer Uniform %type_ShadowDepthPass
+     %uint_3 = OpConstant %uint 3
+%_arr_v4float_uint_3 = OpTypeArray %v4float %uint_3
+%_ptr_Input__arr_v4float_uint_3 = OpTypePointer Input %_arr_v4float_uint_3
+%_arr_uint_uint_3 = OpTypeArray %uint %uint_3
+%_ptr_Input__arr_uint_uint_3 = OpTypePointer Input %_arr_uint_uint_3
+%_arr__arr_v4float_uint_3_uint_3 = OpTypeArray %_arr_v4float_uint_3 %uint_3
+%_ptr_Input__arr__arr_v4float_uint_3_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_3_uint_3
+%_arr_v3float_uint_3 = OpTypeArray %v3float %uint_3
+%_ptr_Input__arr_v3float_uint_3 = OpTypePointer Input %_arr_v3float_uint_3
+%_arr_float_uint_3 = OpTypeArray %float %uint_3
+%_ptr_Input__arr_float_uint_3 = OpTypePointer Input %_arr_float_uint_3
+%_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3
+%_ptr_Input__arr_v2float_uint_3 = OpTypePointer Input %_arr_v2float_uint_3
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4
+%_arr_float_uint_2 = OpTypeArray %float %uint_2
+%_ptr_Input__arr_float_uint_2 = OpTypePointer Input %_arr_float_uint_2
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_ptr_Output_float = OpTypePointer Output %float
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+       %void = OpTypeVoid
+         %83 = OpTypeFunction %void
+%_ptr_Function_float = OpTypePointer Function %float
+       %bool = OpTypeBool
+%_ptr_Function_mat4v4float = OpTypePointer Function %mat4v4float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%ShadowDepthPass = OpVariable %_ptr_Uniform_type_ShadowDepthPass Uniform
+%in_var_TEXCOORD10_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_TEXCOORD11_centroid = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_VS_to_DS_Position = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_VS_To_DS_VertexID = OpVariable %_ptr_Input__arr_uint_uint_3 Input
+%in_var_PN_POSITION = OpVariable %_ptr_Input__arr__arr_v4float_uint_3_uint_3 Input
+%in_var_PN_DisplacementScales = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
+%in_var_PN_TessellationMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input
+%in_var_PN_WorldDisplacementMultiplier = OpVariable %_ptr_Input__arr_float_uint_3 Input
+%in_var_PN_DominantVertex = OpVariable %_ptr_Input__arr_v2float_uint_3 Input
+%in_var_PN_DominantVertex1 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_PN_DominantVertex2 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
+%in_var_PN_DominantEdge = OpVariable %_ptr_Input__arr_v2float_uint_3 Input
+%in_var_PN_DominantEdge1 = OpVariable %_ptr_Input__arr_v2float_uint_3 Input
+%in_var_PN_DominantEdge2 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_PN_DominantEdge3 = OpVariable %_ptr_Input__arr_v4float_uint_3 Input
+%in_var_PN_DominantEdge4 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
+%in_var_PN_DominantEdge5 = OpVariable %_ptr_Input__arr_v3float_uint_3 Input
+%gl_TessLevelOuter = OpVariable %_ptr_Input__arr_float_uint_4 Input
+%gl_TessLevelInner = OpVariable %_ptr_Input__arr_float_uint_2 Input
+%in_var_PN_POSITION9 = OpVariable %_ptr_Input_v4float Input
+%gl_TessCoord = OpVariable %_ptr_Input_v3float Input
+%out_var_TEXCOORD10_centroid = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD11_centroid = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD6 = OpVariable %_ptr_Output_float Output
+%out_var_TEXCOORD7 = OpVariable %_ptr_Output_v3float Output
+%gl_Position = OpVariable %_ptr_Output_v4float Output
+         %89 = OpConstantNull %v4float
+         %90 = OpUndef %v4float
+ %MainDomain = OpFunction %void None %83
+         %91 = OpLabel
+         %92 = OpVariable %_ptr_Function_mat4v4float Function
+         %93 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD10_centroid
+         %94 = OpLoad %_arr_v4float_uint_3 %in_var_TEXCOORD11_centroid
+         %95 = OpCompositeExtract %v4float %93 0
+         %96 = OpCompositeExtract %v4float %94 0
+         %97 = OpCompositeExtract %v4float %93 1
+         %98 = OpCompositeExtract %v4float %94 1
+         %99 = OpCompositeExtract %v4float %93 2
+        %100 = OpCompositeExtract %v4float %94 2
+        %101 = OpLoad %_arr__arr_v4float_uint_3_uint_3 %in_var_PN_POSITION
+        %102 = OpCompositeExtract %_arr_v4float_uint_3 %101 0
+        %103 = OpCompositeExtract %_arr_v4float_uint_3 %101 1
+        %104 = OpCompositeExtract %_arr_v4float_uint_3 %101 2
+        %105 = OpCompositeExtract %v4float %102 0
+        %106 = OpCompositeExtract %v4float %102 1
+        %107 = OpCompositeExtract %v4float %102 2
+        %108 = OpCompositeExtract %v4float %103 0
+        %109 = OpCompositeExtract %v4float %103 1
+        %110 = OpCompositeExtract %v4float %103 2
+        %111 = OpCompositeExtract %v4float %104 0
+        %112 = OpCompositeExtract %v4float %104 1
+        %113 = OpCompositeExtract %v4float %104 2
+        %114 = OpLoad %v4float %in_var_PN_POSITION9
+        %115 = OpLoad %v3float %gl_TessCoord
+        %116 = OpCompositeExtract %float %115 0
+        %117 = OpCompositeExtract %float %115 1
+        %118 = OpCompositeExtract %float %115 2
+        %119 = OpFMul %float %116 %116
+        %120 = OpFMul %float %117 %117
+        %121 = OpFMul %float %118 %118
+        %122 = OpFMul %float %119 %float_3
+        %123 = OpFMul %float %120 %float_3
+        %124 = OpFMul %float %121 %float_3
+        %125 = OpCompositeConstruct %v4float %119 %119 %119 %119
+        %126 = OpFMul %v4float %105 %125
+        %127 = OpCompositeConstruct %v4float %116 %116 %116 %116
+        %128 = OpFMul %v4float %126 %127
+        %129 = OpCompositeConstruct %v4float %120 %120 %120 %120
+        %130 = OpFMul %v4float %108 %129
+        %131 = OpCompositeConstruct %v4float %117 %117 %117 %117
+        %132 = OpFMul %v4float %130 %131
+        %133 = OpFAdd %v4float %128 %132
+        %134 = OpCompositeConstruct %v4float %121 %121 %121 %121
+        %135 = OpFMul %v4float %111 %134
+        %136 = OpCompositeConstruct %v4float %118 %118 %118 %118
+        %137 = OpFMul %v4float %135 %136
+        %138 = OpFAdd %v4float %133 %137
+        %139 = OpCompositeConstruct %v4float %122 %122 %122 %122
+        %140 = OpFMul %v4float %106 %139
+        %141 = OpFMul %v4float %140 %131
+        %142 = OpFAdd %v4float %138 %141
+        %143 = OpCompositeConstruct %v4float %123 %123 %123 %123
+        %144 = OpFMul %v4float %107 %143
+        %145 = OpFMul %v4float %144 %127
+        %146 = OpFAdd %v4float %142 %145
+        %147 = OpFMul %v4float %109 %143
+        %148 = OpFMul %v4float %147 %136
+        %149 = OpFAdd %v4float %146 %148
+        %150 = OpCompositeConstruct %v4float %124 %124 %124 %124
+        %151 = OpFMul %v4float %110 %150
+        %152 = OpFMul %v4float %151 %131
+        %153 = OpFAdd %v4float %149 %152
+        %154 = OpFMul %v4float %112 %150
+        %155 = OpFMul %v4float %154 %127
+        %156 = OpFAdd %v4float %153 %155
+        %157 = OpFMul %v4float %113 %139
+        %158 = OpFMul %v4float %157 %136
+        %159 = OpFAdd %v4float %156 %158
+        %160 = OpFMul %v4float %114 %48
+        %161 = OpFMul %v4float %160 %136
+        %162 = OpFMul %v4float %161 %127
+        %163 = OpFMul %v4float %162 %131
+        %164 = OpFAdd %v4float %159 %163
+        %165 = OpVectorShuffle %v3float %95 %95 0 1 2
+        %166 = OpCompositeConstruct %v3float %116 %116 %116
+        %167 = OpFMul %v3float %165 %166
+        %168 = OpVectorShuffle %v3float %97 %97 0 1 2
+        %169 = OpCompositeConstruct %v3float %117 %117 %117
+        %170 = OpFMul %v3float %168 %169
+        %171 = OpFAdd %v3float %167 %170
+        %172 = OpFMul %v4float %96 %127
+        %173 = OpFMul %v4float %98 %131
+        %174 = OpFAdd %v4float %172 %173
+        %175 = OpVectorShuffle %v3float %171 %89 0 1 2
+        %176 = OpVectorShuffle %v3float %99 %99 0 1 2
+        %177 = OpCompositeConstruct %v3float %118 %118 %118
+        %178 = OpFMul %v3float %176 %177
+        %179 = OpFAdd %v3float %175 %178
+        %180 = OpVectorShuffle %v4float %90 %179 4 5 6 3
+        %181 = OpFMul %v4float %100 %136
+        %182 = OpFAdd %v4float %174 %181
+        %183 = OpVectorShuffle %v3float %182 %182 0 1 2
+        %184 = OpVectorShuffle %v4float %164 %164 4 5 6 3
+        %185 = OpAccessChain %_ptr_Uniform_mat4v4float %ShadowDepthPass %int_88
+        %186 = OpLoad %mat4v4float %185
+        %187 = OpAccessChain %_ptr_Uniform_mat4v4float %ShadowDepthPass %int_89
+        %188 = OpLoad %mat4v4float %187
+               OpStore %92 %188
+        %189 = OpMatrixTimesVector %v4float %186 %184
+        %190 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_91
+        %191 = OpLoad %float %190
+        %192 = OpFOrdGreaterThan %bool %191 %float_0
+        %193 = OpCompositeExtract %float %189 2
+        %194 = OpFOrdLessThan %bool %193 %float_0
+        %195 = OpLogicalAnd %bool %192 %194
+               OpSelectionMerge %196 None
+               OpBranchConditional %195 %197 %196
+        %197 = OpLabel
+        %198 = OpCompositeInsert %v4float %float_9_99999997en07 %189 2
+        %199 = OpCompositeInsert %v4float %float_1 %198 3
+               OpBranch %196
+        %196 = OpLabel
+        %200 = OpPhi %v4float %189 %91 %199 %197
+        %201 = OpAccessChain %_ptr_Function_float %92 %uint_0 %int_2
+        %202 = OpLoad %float %201
+        %203 = OpAccessChain %_ptr_Function_float %92 %uint_1 %int_2
+        %204 = OpLoad %float %203
+        %205 = OpAccessChain %_ptr_Function_float %92 %uint_2 %int_2
+        %206 = OpLoad %float %205
+        %207 = OpCompositeConstruct %v3float %202 %204 %206
+        %208 = OpDot %float %207 %183
+        %209 = OpExtInst %float %1 FAbs %208
+        %210 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_2
+        %211 = OpLoad %float %210
+        %212 = OpExtInst %float %1 FAbs %209
+        %213 = OpFOrdGreaterThan %bool %212 %float_0
+        %214 = OpFMul %float %209 %209
+        %215 = OpFSub %float %float_1 %214
+        %216 = OpExtInst %float %1 FClamp %215 %float_0 %float_1
+        %217 = OpExtInst %float %1 Sqrt %216
+        %218 = OpFDiv %float %217 %209
+        %219 = OpSelect %float %213 %218 %211
+        %220 = OpExtInst %float %1 FClamp %219 %float_0 %211
+        %221 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_1
+        %222 = OpLoad %float %221
+        %223 = OpFMul %float %222 %220
+        %224 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_0
+        %225 = OpLoad %float %224
+        %226 = OpFAdd %float %223 %225
+        %227 = OpAccessChain %_ptr_Uniform_float %ShadowDepthPass %int_90 %int_3
+        %228 = OpLoad %float %227
+        %229 = OpCompositeExtract %float %200 2
+        %230 = OpFMul %float %229 %228
+        %231 = OpFAdd %float %230 %226
+        %232 = OpCompositeExtract %float %200 3
+        %233 = OpFMul %float %231 %232
+        %234 = OpCompositeInsert %v4float %233 %200 2
+        %235 = OpVectorShuffle %v3float %164 %89 0 1 2
+               OpStore %out_var_TEXCOORD10_centroid %180
+               OpStore %out_var_TEXCOORD11_centroid %182
+               OpStore %out_var_TEXCOORD6 %float_0
+               OpStore %out_var_TEXCOORD7 %235
+               OpStore %gl_Position %234
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/vert/array-missing-copies.asm.vert b/shaders-ue4/asm/vert/array-missing-copies.asm.vert
new file mode 100644
index 00000000000..23dc7275601
--- /dev/null
+++ b/shaders-ue4/asm/vert/array-missing-copies.asm.vert
@@ -0,0 +1,1131 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 487
+; Schema: 0
+               OpCapability Shader
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %Main "main" %in_var_ATTRIBUTE0 %in_var_ATTRIBUTE1 %out_var_TEXCOORD0 %out_var_TEXCOORD1 %out_var_TEXCOORD2 %out_var_TEXCOORD3 %out_var_TEXCOORD8 %gl_Position
+               OpSource HLSL 600
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_TranslatedWorldToView"
+               OpMemberName %type_View 3 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 4 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 6 "View_ViewToClip"
+               OpMemberName %type_View 7 "View_ViewToClipNoAA"
+               OpMemberName %type_View 8 "View_ClipToView"
+               OpMemberName %type_View 9 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 11 "View_ScreenToWorld"
+               OpMemberName %type_View 12 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 13 "View_ViewForward"
+               OpMemberName %type_View 14 "PrePadding_View_844"
+               OpMemberName %type_View 15 "View_ViewUp"
+               OpMemberName %type_View 16 "PrePadding_View_860"
+               OpMemberName %type_View 17 "View_ViewRight"
+               OpMemberName %type_View 18 "PrePadding_View_876"
+               OpMemberName %type_View 19 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 20 "PrePadding_View_892"
+               OpMemberName %type_View 21 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 22 "PrePadding_View_908"
+               OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 24 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 25 "View_WorldCameraOrigin"
+               OpMemberName %type_View 26 "PrePadding_View_956"
+               OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 28 "PrePadding_View_972"
+               OpMemberName %type_View 29 "View_WorldViewOrigin"
+               OpMemberName %type_View 30 "PrePadding_View_988"
+               OpMemberName %type_View 31 "View_PreViewTranslation"
+               OpMemberName %type_View 32 "PrePadding_View_1004"
+               OpMemberName %type_View 33 "View_PrevProjection"
+               OpMemberName %type_View 34 "View_PrevViewProj"
+               OpMemberName %type_View 35 "View_PrevViewRotationProj"
+               OpMemberName %type_View 36 "View_PrevViewToClip"
+               OpMemberName %type_View 37 "View_PrevClipToView"
+               OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 43 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 44 "PrePadding_View_1660"
+               OpMemberName %type_View 45 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 46 "PrePadding_View_1676"
+               OpMemberName %type_View 47 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 48 "PrePadding_View_1692"
+               OpMemberName %type_View 49 "View_PrevInvViewProj"
+               OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 51 "View_ClipToPrevClip"
+               OpMemberName %type_View 52 "View_TemporalAAJitter"
+               OpMemberName %type_View 53 "View_GlobalClippingPlane"
+               OpMemberName %type_View 54 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_ViewRectMin"
+               OpMemberName %type_View 57 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 58 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 60 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 61 "View_PreExposure"
+               OpMemberName %type_View 62 "View_OneOverPreExposure"
+               OpMemberName %type_View 63 "PrePadding_View_2012"
+               OpMemberName %type_View 64 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 65 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 66 "View_NormalOverrideParameter"
+               OpMemberName %type_View 67 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 68 "View_PrevFrameGameTime"
+               OpMemberName %type_View 69 "View_PrevFrameRealTime"
+               OpMemberName %type_View 70 "View_OutOfBoundsMask"
+               OpMemberName %type_View 71 "PrePadding_View_2084"
+               OpMemberName %type_View 72 "PrePadding_View_2088"
+               OpMemberName %type_View 73 "PrePadding_View_2092"
+               OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 75 "View_CullingSign"
+               OpMemberName %type_View 76 "View_NearPlane"
+               OpMemberName %type_View 77 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 78 "View_GameTime"
+               OpMemberName %type_View 79 "View_RealTime"
+               OpMemberName %type_View 80 "View_DeltaTime"
+               OpMemberName %type_View 81 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 83 "View_Random"
+               OpMemberName %type_View 84 "View_FrameNumber"
+               OpMemberName %type_View 85 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 86 "View_StateFrameIndex"
+               OpMemberName %type_View 87 "View_CameraCut"
+               OpMemberName %type_View 88 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 89 "PrePadding_View_2164"
+               OpMemberName %type_View 90 "PrePadding_View_2168"
+               OpMemberName %type_View 91 "PrePadding_View_2172"
+               OpMemberName %type_View 92 "View_DirectionalLightColor"
+               OpMemberName %type_View 93 "View_DirectionalLightDirection"
+               OpMemberName %type_View 94 "PrePadding_View_2204"
+               OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 97 "View_TemporalAAParams"
+               OpMemberName %type_View 98 "View_CircleDOFParams"
+               OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 101 "View_DepthOfFieldScale"
+               OpMemberName %type_View 102 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 108 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 109 "View_DemosaicVposOffset"
+               OpMemberName %type_View 110 "PrePadding_View_2348"
+               OpMemberName %type_View 111 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 112 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 113 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 115 "View_AtmosphericFogPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 122 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 125 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 127 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 130 "View_AmbientCubemapTint"
+               OpMemberName %type_View 131 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 132 "View_SkyLightParameters"
+               OpMemberName %type_View 133 "PrePadding_View_2488"
+               OpMemberName %type_View 134 "PrePadding_View_2492"
+               OpMemberName %type_View 135 "View_SkyLightColor"
+               OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 137 "View_MobilePreviewMode"
+               OpMemberName %type_View 138 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 140 "View_ShowDecalsMask"
+               OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 143 "PrePadding_View_2648"
+               OpMemberName %type_View 144 "PrePadding_View_2652"
+               OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 146 "View_StereoPassIndex"
+               OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 149 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 150 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 151 "View_MaxGlobalDistance"
+               OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 153 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 154 "PrePadding_View_2828"
+               OpMemberName %type_View 155 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 156 "PrePadding_View_2844"
+               OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 158 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 159 "PrePadding_View_2860"
+               OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 161 "PrePadding_View_2876"
+               OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 163 "PrePadding_View_2892"
+               OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 167 "View_StereoIPD"
+               OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle"
+               OpName %View "View"
+               OpName %type_MobileBasePass "type.MobileBasePass"
+               OpMemberName %type_MobileBasePass 0 "MobileBasePass_Fog_ExponentialFogParameters"
+               OpMemberName %type_MobileBasePass 1 "MobileBasePass_Fog_ExponentialFogParameters2"
+               OpMemberName %type_MobileBasePass 2 "MobileBasePass_Fog_ExponentialFogColorParameter"
+               OpMemberName %type_MobileBasePass 3 "MobileBasePass_Fog_ExponentialFogParameters3"
+               OpMemberName %type_MobileBasePass 4 "MobileBasePass_Fog_InscatteringLightDirection"
+               OpMemberName %type_MobileBasePass 5 "MobileBasePass_Fog_DirectionalInscatteringColor"
+               OpMemberName %type_MobileBasePass 6 "MobileBasePass_Fog_SinCosInscatteringColorCubemapRotation"
+               OpMemberName %type_MobileBasePass 7 "PrePadding_MobileBasePass_Fog_104"
+               OpMemberName %type_MobileBasePass 8 "PrePadding_MobileBasePass_Fog_108"
+               OpMemberName %type_MobileBasePass 9 "MobileBasePass_Fog_FogInscatteringTextureParameters"
+               OpMemberName %type_MobileBasePass 10 "MobileBasePass_Fog_ApplyVolumetricFog"
+               OpMemberName %type_MobileBasePass 11 "PrePadding_MobileBasePass_PlanarReflection_128"
+               OpMemberName %type_MobileBasePass 12 "PrePadding_MobileBasePass_PlanarReflection_132"
+               OpMemberName %type_MobileBasePass 13 "PrePadding_MobileBasePass_PlanarReflection_136"
+               OpMemberName %type_MobileBasePass 14 "PrePadding_MobileBasePass_PlanarReflection_140"
+               OpMemberName %type_MobileBasePass 15 "PrePadding_MobileBasePass_PlanarReflection_144"
+               OpMemberName %type_MobileBasePass 16 "PrePadding_MobileBasePass_PlanarReflection_148"
+               OpMemberName %type_MobileBasePass 17 "PrePadding_MobileBasePass_PlanarReflection_152"
+               OpMemberName %type_MobileBasePass 18 "PrePadding_MobileBasePass_PlanarReflection_156"
+               OpMemberName %type_MobileBasePass 19 "MobileBasePass_PlanarReflection_ReflectionPlane"
+               OpMemberName %type_MobileBasePass 20 "MobileBasePass_PlanarReflection_PlanarReflectionOrigin"
+               OpMemberName %type_MobileBasePass 21 "MobileBasePass_PlanarReflection_PlanarReflectionXAxis"
+               OpMemberName %type_MobileBasePass 22 "MobileBasePass_PlanarReflection_PlanarReflectionYAxis"
+               OpMemberName %type_MobileBasePass 23 "MobileBasePass_PlanarReflection_InverseTransposeMirrorMatrix"
+               OpMemberName %type_MobileBasePass 24 "MobileBasePass_PlanarReflection_PlanarReflectionParameters"
+               OpMemberName %type_MobileBasePass 25 "PrePadding_MobileBasePass_PlanarReflection_284"
+               OpMemberName %type_MobileBasePass 26 "MobileBasePass_PlanarReflection_PlanarReflectionParameters2"
+               OpMemberName %type_MobileBasePass 27 "PrePadding_MobileBasePass_PlanarReflection_296"
+               OpMemberName %type_MobileBasePass 28 "PrePadding_MobileBasePass_PlanarReflection_300"
+               OpMemberName %type_MobileBasePass 29 "MobileBasePass_PlanarReflection_ProjectionWithExtraFOV"
+               OpMemberName %type_MobileBasePass 30 "MobileBasePass_PlanarReflection_PlanarReflectionScreenScaleBias"
+               OpMemberName %type_MobileBasePass 31 "MobileBasePass_PlanarReflection_PlanarReflectionScreenBound"
+               OpMemberName %type_MobileBasePass 32 "MobileBasePass_PlanarReflection_bIsStereo"
+               OpName %MobileBasePass "MobileBasePass"
+               OpName %type_Primitive "type.Primitive"
+               OpMemberName %type_Primitive 0 "Primitive_LocalToWorld"
+               OpMemberName %type_Primitive 1 "Primitive_InvNonUniformScaleAndDeterminantSign"
+               OpMemberName %type_Primitive 2 "Primitive_ObjectWorldPositionAndRadius"
+               OpMemberName %type_Primitive 3 "Primitive_WorldToLocal"
+               OpMemberName %type_Primitive 4 "Primitive_PreviousLocalToWorld"
+               OpMemberName %type_Primitive 5 "Primitive_PreviousWorldToLocal"
+               OpMemberName %type_Primitive 6 "Primitive_ActorWorldPosition"
+               OpMemberName %type_Primitive 7 "Primitive_UseSingleSampleShadowFromStationaryLights"
+               OpMemberName %type_Primitive 8 "Primitive_ObjectBounds"
+               OpMemberName %type_Primitive 9 "Primitive_LpvBiasMultiplier"
+               OpMemberName %type_Primitive 10 "Primitive_DecalReceiverMask"
+               OpMemberName %type_Primitive 11 "Primitive_PerObjectGBufferData"
+               OpMemberName %type_Primitive 12 "Primitive_UseVolumetricLightmapShadowFromStationaryLights"
+               OpMemberName %type_Primitive 13 "Primitive_UseEditorDepthTest"
+               OpMemberName %type_Primitive 14 "Primitive_ObjectOrientation"
+               OpMemberName %type_Primitive 15 "Primitive_NonUniformScale"
+               OpMemberName %type_Primitive 16 "Primitive_LocalObjectBoundsMin"
+               OpMemberName %type_Primitive 17 "PrePadding_Primitive_380"
+               OpMemberName %type_Primitive 18 "Primitive_LocalObjectBoundsMax"
+               OpMemberName %type_Primitive 19 "Primitive_LightingChannelMask"
+               OpMemberName %type_Primitive 20 "Primitive_LightmapDataIndex"
+               OpMemberName %type_Primitive 21 "Primitive_SingleCaptureIndex"
+               OpName %Primitive "Primitive"
+               OpName %type_LandscapeParameters "type.LandscapeParameters"
+               OpMemberName %type_LandscapeParameters 0 "LandscapeParameters_HeightmapUVScaleBias"
+               OpMemberName %type_LandscapeParameters 1 "LandscapeParameters_WeightmapUVScaleBias"
+               OpMemberName %type_LandscapeParameters 2 "LandscapeParameters_LandscapeLightmapScaleBias"
+               OpMemberName %type_LandscapeParameters 3 "LandscapeParameters_SubsectionSizeVertsLayerUVPan"
+               OpMemberName %type_LandscapeParameters 4 "LandscapeParameters_SubsectionOffsetParams"
+               OpMemberName %type_LandscapeParameters 5 "LandscapeParameters_LightmapSubsectionOffsetParams"
+               OpMemberName %type_LandscapeParameters 6 "LandscapeParameters_LocalToWorldNoScaling"
+               OpName %LandscapeParameters "LandscapeParameters"
+               OpName %type__Globals "type.$Globals"
+               OpMemberName %type__Globals 0 "LodBias"
+               OpMemberName %type__Globals 1 "LodValues"
+               OpMemberName %type__Globals 2 "SectionLods"
+               OpMemberName %type__Globals 3 "NeighborSectionLod"
+               OpName %_Globals "$Globals"
+               OpName %in_var_ATTRIBUTE0 "in.var.ATTRIBUTE0"
+               OpName %in_var_ATTRIBUTE1 "in.var.ATTRIBUTE1"
+               OpName %out_var_TEXCOORD0 "out.var.TEXCOORD0"
+               OpName %out_var_TEXCOORD1 "out.var.TEXCOORD1"
+               OpName %out_var_TEXCOORD2 "out.var.TEXCOORD2"
+               OpName %out_var_TEXCOORD3 "out.var.TEXCOORD3"
+               OpName %out_var_TEXCOORD8 "out.var.TEXCOORD8"
+               OpName %Main "Main"
+               OpDecorateString %in_var_ATTRIBUTE0 UserSemantic "ATTRIBUTE0"
+               OpDecorateString %in_var_ATTRIBUTE1 UserSemantic "ATTRIBUTE1"
+               OpDecorateString %out_var_TEXCOORD0 UserSemantic "TEXCOORD0"
+               OpDecorateString %out_var_TEXCOORD1 UserSemantic "TEXCOORD1"
+               OpDecorateString %out_var_TEXCOORD2 UserSemantic "TEXCOORD2"
+               OpDecorateString %out_var_TEXCOORD3 UserSemantic "TEXCOORD3"
+               OpDecorateString %out_var_TEXCOORD8 UserSemantic "TEXCOORD8"
+               OpDecorate %gl_Position BuiltIn Position
+               OpDecorateString %gl_Position UserSemantic "SV_POSITION"
+               OpDecorate %in_var_ATTRIBUTE0 Location 0
+               OpDecorate %in_var_ATTRIBUTE1 Location 1
+               OpDecorate %out_var_TEXCOORD0 Location 0
+               OpDecorate %out_var_TEXCOORD1 Location 1
+               OpDecorate %out_var_TEXCOORD2 Location 2
+               OpDecorate %out_var_TEXCOORD3 Location 3
+               OpDecorate %out_var_TEXCOORD8 Location 4
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 0
+               OpDecorate %MobileBasePass DescriptorSet 0
+               OpDecorate %MobileBasePass Binding 1
+               OpDecorate %Primitive DescriptorSet 0
+               OpDecorate %Primitive Binding 2
+               OpDecorate %LandscapeParameters DescriptorSet 0
+               OpDecorate %LandscapeParameters Binding 3
+               OpDecorate %_Globals DescriptorSet 0
+               OpDecorate %_Globals Binding 4
+               OpDecorate %_arr_v4float_uint_2_0 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 14 Offset 844
+               OpMemberDecorate %type_View 15 Offset 848
+               OpMemberDecorate %type_View 16 Offset 860
+               OpMemberDecorate %type_View 17 Offset 864
+               OpMemberDecorate %type_View 18 Offset 876
+               OpMemberDecorate %type_View 19 Offset 880
+               OpMemberDecorate %type_View 20 Offset 892
+               OpMemberDecorate %type_View 21 Offset 896
+               OpMemberDecorate %type_View 22 Offset 908
+               OpMemberDecorate %type_View 23 Offset 912
+               OpMemberDecorate %type_View 24 Offset 928
+               OpMemberDecorate %type_View 25 Offset 944
+               OpMemberDecorate %type_View 26 Offset 956
+               OpMemberDecorate %type_View 27 Offset 960
+               OpMemberDecorate %type_View 28 Offset 972
+               OpMemberDecorate %type_View 29 Offset 976
+               OpMemberDecorate %type_View 30 Offset 988
+               OpMemberDecorate %type_View 31 Offset 992
+               OpMemberDecorate %type_View 32 Offset 1004
+               OpMemberDecorate %type_View 33 Offset 1008
+               OpMemberDecorate %type_View 33 MatrixStride 16
+               OpMemberDecorate %type_View 33 ColMajor
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 44 Offset 1660
+               OpMemberDecorate %type_View 45 Offset 1664
+               OpMemberDecorate %type_View 46 Offset 1676
+               OpMemberDecorate %type_View 47 Offset 1680
+               OpMemberDecorate %type_View 48 Offset 1692
+               OpMemberDecorate %type_View 49 Offset 1696
+               OpMemberDecorate %type_View 49 MatrixStride 16
+               OpMemberDecorate %type_View 49 ColMajor
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 53 Offset 1904
+               OpMemberDecorate %type_View 54 Offset 1920
+               OpMemberDecorate %type_View 55 Offset 1928
+               OpMemberDecorate %type_View 56 Offset 1936
+               OpMemberDecorate %type_View 57 Offset 1952
+               OpMemberDecorate %type_View 58 Offset 1968
+               OpMemberDecorate %type_View 59 Offset 1984
+               OpMemberDecorate %type_View 60 Offset 2000
+               OpMemberDecorate %type_View 61 Offset 2004
+               OpMemberDecorate %type_View 62 Offset 2008
+               OpMemberDecorate %type_View 63 Offset 2012
+               OpMemberDecorate %type_View 64 Offset 2016
+               OpMemberDecorate %type_View 65 Offset 2032
+               OpMemberDecorate %type_View 66 Offset 2048
+               OpMemberDecorate %type_View 67 Offset 2064
+               OpMemberDecorate %type_View 68 Offset 2072
+               OpMemberDecorate %type_View 69 Offset 2076
+               OpMemberDecorate %type_View 70 Offset 2080
+               OpMemberDecorate %type_View 71 Offset 2084
+               OpMemberDecorate %type_View 72 Offset 2088
+               OpMemberDecorate %type_View 73 Offset 2092
+               OpMemberDecorate %type_View 74 Offset 2096
+               OpMemberDecorate %type_View 75 Offset 2108
+               OpMemberDecorate %type_View 76 Offset 2112
+               OpMemberDecorate %type_View 77 Offset 2116
+               OpMemberDecorate %type_View 78 Offset 2120
+               OpMemberDecorate %type_View 79 Offset 2124
+               OpMemberDecorate %type_View 80 Offset 2128
+               OpMemberDecorate %type_View 81 Offset 2132
+               OpMemberDecorate %type_View 82 Offset 2136
+               OpMemberDecorate %type_View 83 Offset 2140
+               OpMemberDecorate %type_View 84 Offset 2144
+               OpMemberDecorate %type_View 85 Offset 2148
+               OpMemberDecorate %type_View 86 Offset 2152
+               OpMemberDecorate %type_View 87 Offset 2156
+               OpMemberDecorate %type_View 88 Offset 2160
+               OpMemberDecorate %type_View 89 Offset 2164
+               OpMemberDecorate %type_View 90 Offset 2168
+               OpMemberDecorate %type_View 91 Offset 2172
+               OpMemberDecorate %type_View 92 Offset 2176
+               OpMemberDecorate %type_View 93 Offset 2192
+               OpMemberDecorate %type_View 94 Offset 2204
+               OpMemberDecorate %type_View 95 Offset 2208
+               OpMemberDecorate %type_View 96 Offset 2240
+               OpMemberDecorate %type_View 97 Offset 2272
+               OpMemberDecorate %type_View 98 Offset 2288
+               OpMemberDecorate %type_View 99 Offset 2304
+               OpMemberDecorate %type_View 100 Offset 2308
+               OpMemberDecorate %type_View 101 Offset 2312
+               OpMemberDecorate %type_View 102 Offset 2316
+               OpMemberDecorate %type_View 103 Offset 2320
+               OpMemberDecorate %type_View 104 Offset 2324
+               OpMemberDecorate %type_View 105 Offset 2328
+               OpMemberDecorate %type_View 106 Offset 2332
+               OpMemberDecorate %type_View 107 Offset 2336
+               OpMemberDecorate %type_View 108 Offset 2340
+               OpMemberDecorate %type_View 109 Offset 2344
+               OpMemberDecorate %type_View 110 Offset 2348
+               OpMemberDecorate %type_View 111 Offset 2352
+               OpMemberDecorate %type_View 112 Offset 2364
+               OpMemberDecorate %type_View 113 Offset 2368
+               OpMemberDecorate %type_View 114 Offset 2380
+               OpMemberDecorate %type_View 115 Offset 2384
+               OpMemberDecorate %type_View 116 Offset 2388
+               OpMemberDecorate %type_View 117 Offset 2392
+               OpMemberDecorate %type_View 118 Offset 2396
+               OpMemberDecorate %type_View 119 Offset 2400
+               OpMemberDecorate %type_View 120 Offset 2404
+               OpMemberDecorate %type_View 121 Offset 2408
+               OpMemberDecorate %type_View 122 Offset 2412
+               OpMemberDecorate %type_View 123 Offset 2416
+               OpMemberDecorate %type_View 124 Offset 2420
+               OpMemberDecorate %type_View 125 Offset 2424
+               OpMemberDecorate %type_View 126 Offset 2428
+               OpMemberDecorate %type_View 127 Offset 2432
+               OpMemberDecorate %type_View 128 Offset 2448
+               OpMemberDecorate %type_View 129 Offset 2460
+               OpMemberDecorate %type_View 130 Offset 2464
+               OpMemberDecorate %type_View 131 Offset 2480
+               OpMemberDecorate %type_View 132 Offset 2484
+               OpMemberDecorate %type_View 133 Offset 2488
+               OpMemberDecorate %type_View 134 Offset 2492
+               OpMemberDecorate %type_View 135 Offset 2496
+               OpMemberDecorate %type_View 136 Offset 2512
+               OpMemberDecorate %type_View 137 Offset 2624
+               OpMemberDecorate %type_View 138 Offset 2628
+               OpMemberDecorate %type_View 139 Offset 2632
+               OpMemberDecorate %type_View 140 Offset 2636
+               OpMemberDecorate %type_View 141 Offset 2640
+               OpMemberDecorate %type_View 142 Offset 2644
+               OpMemberDecorate %type_View 143 Offset 2648
+               OpMemberDecorate %type_View 144 Offset 2652
+               OpMemberDecorate %type_View 145 Offset 2656
+               OpMemberDecorate %type_View 146 Offset 2668
+               OpMemberDecorate %type_View 147 Offset 2672
+               OpMemberDecorate %type_View 148 Offset 2736
+               OpMemberDecorate %type_View 149 Offset 2800
+               OpMemberDecorate %type_View 150 Offset 2804
+               OpMemberDecorate %type_View 151 Offset 2808
+               OpMemberDecorate %type_View 152 Offset 2812
+               OpMemberDecorate %type_View 153 Offset 2816
+               OpMemberDecorate %type_View 154 Offset 2828
+               OpMemberDecorate %type_View 155 Offset 2832
+               OpMemberDecorate %type_View 156 Offset 2844
+               OpMemberDecorate %type_View 157 Offset 2848
+               OpMemberDecorate %type_View 158 Offset 2856
+               OpMemberDecorate %type_View 159 Offset 2860
+               OpMemberDecorate %type_View 160 Offset 2864
+               OpMemberDecorate %type_View 161 Offset 2876
+               OpMemberDecorate %type_View 162 Offset 2880
+               OpMemberDecorate %type_View 163 Offset 2892
+               OpMemberDecorate %type_View 164 Offset 2896
+               OpMemberDecorate %type_View 165 Offset 2908
+               OpMemberDecorate %type_View 166 Offset 2912
+               OpMemberDecorate %type_View 167 Offset 2924
+               OpMemberDecorate %type_View 168 Offset 2928
+               OpMemberDecorate %type_View 169 Offset 2932
+               OpDecorate %type_View Block
+               OpDecorate %_arr_mat4v4float_uint_2 ArrayStride 64
+               OpMemberDecorate %type_MobileBasePass 0 Offset 0
+               OpMemberDecorate %type_MobileBasePass 1 Offset 16
+               OpMemberDecorate %type_MobileBasePass 2 Offset 32
+               OpMemberDecorate %type_MobileBasePass 3 Offset 48
+               OpMemberDecorate %type_MobileBasePass 4 Offset 64
+               OpMemberDecorate %type_MobileBasePass 5 Offset 80
+               OpMemberDecorate %type_MobileBasePass 6 Offset 96
+               OpMemberDecorate %type_MobileBasePass 7 Offset 104
+               OpMemberDecorate %type_MobileBasePass 8 Offset 108
+               OpMemberDecorate %type_MobileBasePass 9 Offset 112
+               OpMemberDecorate %type_MobileBasePass 10 Offset 124
+               OpMemberDecorate %type_MobileBasePass 11 Offset 128
+               OpMemberDecorate %type_MobileBasePass 12 Offset 132
+               OpMemberDecorate %type_MobileBasePass 13 Offset 136
+               OpMemberDecorate %type_MobileBasePass 14 Offset 140
+               OpMemberDecorate %type_MobileBasePass 15 Offset 144
+               OpMemberDecorate %type_MobileBasePass 16 Offset 148
+               OpMemberDecorate %type_MobileBasePass 17 Offset 152
+               OpMemberDecorate %type_MobileBasePass 18 Offset 156
+               OpMemberDecorate %type_MobileBasePass 19 Offset 160
+               OpMemberDecorate %type_MobileBasePass 20 Offset 176
+               OpMemberDecorate %type_MobileBasePass 21 Offset 192
+               OpMemberDecorate %type_MobileBasePass 22 Offset 208
+               OpMemberDecorate %type_MobileBasePass 23 Offset 224
+               OpMemberDecorate %type_MobileBasePass 23 MatrixStride 16
+               OpMemberDecorate %type_MobileBasePass 23 ColMajor
+               OpMemberDecorate %type_MobileBasePass 24 Offset 272
+               OpMemberDecorate %type_MobileBasePass 25 Offset 284
+               OpMemberDecorate %type_MobileBasePass 26 Offset 288
+               OpMemberDecorate %type_MobileBasePass 27 Offset 296
+               OpMemberDecorate %type_MobileBasePass 28 Offset 300
+               OpMemberDecorate %type_MobileBasePass 29 Offset 304
+               OpMemberDecorate %type_MobileBasePass 29 MatrixStride 16
+               OpMemberDecorate %type_MobileBasePass 29 ColMajor
+               OpMemberDecorate %type_MobileBasePass 30 Offset 432
+               OpMemberDecorate %type_MobileBasePass 31 Offset 464
+               OpMemberDecorate %type_MobileBasePass 32 Offset 472
+               OpDecorate %type_MobileBasePass Block
+               OpMemberDecorate %type_Primitive 0 Offset 0
+               OpMemberDecorate %type_Primitive 0 MatrixStride 16
+               OpMemberDecorate %type_Primitive 0 ColMajor
+               OpMemberDecorate %type_Primitive 1 Offset 64
+               OpMemberDecorate %type_Primitive 2 Offset 80
+               OpMemberDecorate %type_Primitive 3 Offset 96
+               OpMemberDecorate %type_Primitive 3 MatrixStride 16
+               OpMemberDecorate %type_Primitive 3 ColMajor
+               OpMemberDecorate %type_Primitive 4 Offset 160
+               OpMemberDecorate %type_Primitive 4 MatrixStride 16
+               OpMemberDecorate %type_Primitive 4 ColMajor
+               OpMemberDecorate %type_Primitive 5 Offset 224
+               OpMemberDecorate %type_Primitive 5 MatrixStride 16
+               OpMemberDecorate %type_Primitive 5 ColMajor
+               OpMemberDecorate %type_Primitive 6 Offset 288
+               OpMemberDecorate %type_Primitive 7 Offset 300
+               OpMemberDecorate %type_Primitive 8 Offset 304
+               OpMemberDecorate %type_Primitive 9 Offset 316
+               OpMemberDecorate %type_Primitive 10 Offset 320
+               OpMemberDecorate %type_Primitive 11 Offset 324
+               OpMemberDecorate %type_Primitive 12 Offset 328
+               OpMemberDecorate %type_Primitive 13 Offset 332
+               OpMemberDecorate %type_Primitive 14 Offset 336
+               OpMemberDecorate %type_Primitive 15 Offset 352
+               OpMemberDecorate %type_Primitive 16 Offset 368
+               OpMemberDecorate %type_Primitive 17 Offset 380
+               OpMemberDecorate %type_Primitive 18 Offset 384
+               OpMemberDecorate %type_Primitive 19 Offset 396
+               OpMemberDecorate %type_Primitive 20 Offset 400
+               OpMemberDecorate %type_Primitive 21 Offset 404
+               OpDecorate %type_Primitive Block
+               OpMemberDecorate %type_LandscapeParameters 0 Offset 0
+               OpMemberDecorate %type_LandscapeParameters 1 Offset 16
+               OpMemberDecorate %type_LandscapeParameters 2 Offset 32
+               OpMemberDecorate %type_LandscapeParameters 3 Offset 48
+               OpMemberDecorate %type_LandscapeParameters 4 Offset 64
+               OpMemberDecorate %type_LandscapeParameters 5 Offset 80
+               OpMemberDecorate %type_LandscapeParameters 6 Offset 96
+               OpMemberDecorate %type_LandscapeParameters 6 MatrixStride 16
+               OpMemberDecorate %type_LandscapeParameters 6 ColMajor
+               OpDecorate %type_LandscapeParameters Block
+               OpMemberDecorate %type__Globals 0 Offset 0
+               OpMemberDecorate %type__Globals 1 Offset 16
+               OpMemberDecorate %type__Globals 2 Offset 32
+               OpMemberDecorate %type__Globals 3 Offset 48
+               OpDecorate %type__Globals Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+%float_0_00999999978 = OpConstant %float 0.00999999978
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+    %float_0 = OpConstant %float 0
+         %40 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
+      %int_2 = OpConstant %int 2
+      %int_3 = OpConstant %int 3
+  %float_255 = OpConstant %float 255
+         %44 = OpConstantComposite %v4float %float_255 %float_255 %float_255 %float_255
+  %float_0_5 = OpConstant %float 0.5
+         %46 = OpConstantComposite %v2float %float_0_5 %float_0_5
+    %float_2 = OpConstant %float 2
+         %48 = OpConstantComposite %v2float %float_2 %float_2
+    %float_1 = OpConstant %float 1
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+    %float_3 = OpConstant %float 3
+ %float_0_25 = OpConstant %float 0.25
+     %uint_3 = OpConstant %uint 3
+    %float_4 = OpConstant %float 4
+%float_0_125 = OpConstant %float 0.125
+    %float_5 = OpConstant %float 5
+%float_0_0625 = OpConstant %float 0.0625
+%float_0_03125 = OpConstant %float 0.03125
+         %60 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
+      %int_5 = OpConstant %int 5
+      %int_4 = OpConstant %int 4
+         %63 = OpConstantComposite %v3float %float_0 %float_0 %float_0
+     %int_25 = OpConstant %int 25
+     %int_27 = OpConstant %int 27
+     %int_31 = OpConstant %int 31
+         %67 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
+%float_32768 = OpConstant %float 32768
+%_arr_v4float_uint_2_0 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2_0 %_arr_v4float_uint_2_0 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%mat3v4float = OpTypeMatrix %v4float 3
+%_arr_mat4v4float_uint_2 = OpTypeArray %mat4v4float %uint_2
+%type_MobileBasePass = OpTypeStruct %v4float %v4float %v4float %v4float %v4float %v4float %v2float %float %float %v3float %float %float %float %float %float %float %float %float %float %v4float %v4float %v4float %v4float %mat3v4float %v3float %float %v2float %float %float %_arr_mat4v4float_uint_2 %_arr_v4float_uint_2_0 %v2float %uint
+%_ptr_Uniform_type_MobileBasePass = OpTypePointer Uniform %type_MobileBasePass
+%type_Primitive = OpTypeStruct %mat4v4float %v4float %v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %float %float %float %float %v4float %v4float %v3float %float %v3float %uint %uint %int
+%_ptr_Uniform_type_Primitive = OpTypePointer Uniform %type_Primitive
+%type_LandscapeParameters = OpTypeStruct %v4float %v4float %v4float %v4float %v4float %v4float %mat4v4float
+%_ptr_Uniform_type_LandscapeParameters = OpTypePointer Uniform %type_LandscapeParameters
+%type__Globals = OpTypeStruct %v4float %v4float %v4float %_arr_v4float_uint_4
+%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%_ptr_Input__arr_v4float_uint_2 = OpTypePointer Input %_arr_v4float_uint_2
+%_ptr_Output_v2float = OpTypePointer Output %v2float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %80 = OpTypeFunction %void
+%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1
+%_ptr_Function__arr_v4float_uint_1 = OpTypePointer Function %_arr_v4float_uint_1
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+       %bool = OpTypeBool
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+     %v3bool = OpTypeVector %bool 3
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+%MobileBasePass = OpVariable %_ptr_Uniform_type_MobileBasePass Uniform
+  %Primitive = OpVariable %_ptr_Uniform_type_Primitive Uniform
+%LandscapeParameters = OpVariable %_ptr_Uniform_type_LandscapeParameters Uniform
+   %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+%in_var_ATTRIBUTE0 = OpVariable %_ptr_Input_v4float Input
+%in_var_ATTRIBUTE1 = OpVariable %_ptr_Input__arr_v4float_uint_2 Input
+%out_var_TEXCOORD0 = OpVariable %_ptr_Output_v2float Output
+%out_var_TEXCOORD1 = OpVariable %_ptr_Output_v2float Output
+%out_var_TEXCOORD2 = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD3 = OpVariable %_ptr_Output_v4float Output
+%out_var_TEXCOORD8 = OpVariable %_ptr_Output_v4float Output
+%gl_Position = OpVariable %_ptr_Output_v4float Output
+%float_0_0078125 = OpConstant %float 0.0078125
+ %float_n127 = OpConstant %float -127
+         %92 = OpConstantNull %v4float
+%float_0_00392156886 = OpConstant %float 0.00392156886
+         %94 = OpConstantComposite %v2float %float_0_00392156886 %float_0_00392156886
+%float_65280 = OpConstant %float 65280
+       %Main = OpFunction %void None %80
+         %96 = OpLabel
+         %97 = OpVariable %_ptr_Function__arr_v4float_uint_1 Function
+         %98 = OpLoad %v4float %in_var_ATTRIBUTE0
+         %99 = OpLoad %_arr_v4float_uint_2 %in_var_ATTRIBUTE1
+        %100 = OpAccessChain %_ptr_Uniform_mat4v4float %View %int_0
+        %101 = OpLoad %mat4v4float %100
+        %102 = OpAccessChain %_ptr_Uniform_v3float %View %int_27
+        %103 = OpLoad %v3float %102
+        %104 = OpAccessChain %_ptr_Uniform_v3float %View %int_31
+        %105 = OpLoad %v3float %104
+               OpBranch %106
+        %106 = OpLabel
+        %107 = OpPhi %int %int_0 %96 %108 %109
+        %110 = OpSLessThan %bool %107 %int_1
+               OpLoopMerge %111 %109 Unroll
+               OpBranchConditional %110 %109 %111
+        %109 = OpLabel
+        %112 = OpAccessChain %_ptr_Function_v4float %97 %107
+               OpStore %112 %40
+        %108 = OpIAdd %int %107 %int_1
+               OpBranch %106
+        %111 = OpLabel
+        %113 = OpCompositeExtract %v4float %99 0
+        %114 = OpCompositeExtract %v4float %99 1
+        %115 = OpFMul %v4float %98 %44
+        %116 = OpVectorShuffle %v2float %115 %115 2 3
+        %117 = OpFMul %v2float %116 %46
+        %118 = OpExtInst %v2float %1 Fract %117
+        %119 = OpFMul %v2float %118 %48
+        %120 = OpFSub %v2float %116 %119
+        %121 = OpFMul %v2float %120 %94
+        %122 = OpVectorShuffle %v2float %115 %92 0 1
+        %123 = OpAccessChain %_ptr_Uniform_float %_Globals %int_1 %int_3
+        %124 = OpLoad %float %123
+        %125 = OpCompositeConstruct %v2float %124 %124
+        %126 = OpFMul %v2float %122 %125
+        %127 = OpCompositeExtract %float %126 1
+        %128 = OpCompositeExtract %float %126 0
+        %129 = OpFSub %float %float_1 %128
+        %130 = OpFSub %float %float_1 %127
+        %131 = OpCompositeConstruct %v4float %127 %128 %129 %130
+        %132 = OpFMul %v4float %131 %67
+        %133 = OpCompositeExtract %float %119 1
+        %134 = OpFOrdGreaterThan %bool %133 %float_0_5
+               OpSelectionMerge %135 None
+               OpBranchConditional %134 %136 %137
+        %136 = OpLabel
+        %138 = OpCompositeExtract %float %119 0
+        %139 = OpFOrdGreaterThan %bool %138 %float_0_5
+               OpSelectionMerge %140 None
+               OpBranchConditional %139 %141 %142
+        %141 = OpLabel
+        %143 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 %int_3
+        %144 = OpLoad %float %143
+        %145 = OpCompositeConstruct %v4float %144 %144 %144 %144
+        %146 = OpFMul %v4float %132 %145
+        %147 = OpFSub %v4float %60 %132
+        %148 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_3 %int_3
+        %149 = OpLoad %v4float %148
+        %150 = OpFMul %v4float %147 %149
+        %151 = OpFAdd %v4float %146 %150
+               OpBranch %140
+        %142 = OpLabel
+        %152 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 %int_2
+        %153 = OpLoad %float %152
+        %154 = OpCompositeConstruct %v4float %153 %153 %153 %153
+        %155 = OpFMul %v4float %132 %154
+        %156 = OpFSub %v4float %60 %132
+        %157 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_3 %int_2
+        %158 = OpLoad %v4float %157
+        %159 = OpFMul %v4float %156 %158
+        %160 = OpFAdd %v4float %155 %159
+               OpBranch %140
+        %140 = OpLabel
+        %161 = OpPhi %v4float %151 %141 %160 %142
+               OpBranch %135
+        %137 = OpLabel
+        %162 = OpCompositeExtract %float %119 0
+        %163 = OpFOrdGreaterThan %bool %162 %float_0_5
+               OpSelectionMerge %164 None
+               OpBranchConditional %163 %165 %166
+        %165 = OpLabel
+        %167 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 %int_1
+        %168 = OpLoad %float %167
+        %169 = OpCompositeConstruct %v4float %168 %168 %168 %168
+        %170 = OpFMul %v4float %132 %169
+        %171 = OpFSub %v4float %60 %132
+        %172 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_3 %int_1
+        %173 = OpLoad %v4float %172
+        %174 = OpFMul %v4float %171 %173
+        %175 = OpFAdd %v4float %170 %174
+               OpBranch %164
+        %166 = OpLabel
+        %176 = OpAccessChain %_ptr_Uniform_float %_Globals %int_2 %int_0
+        %177 = OpLoad %float %176
+        %178 = OpCompositeConstruct %v4float %177 %177 %177 %177
+        %179 = OpFMul %v4float %132 %178
+        %180 = OpFSub %v4float %60 %132
+        %181 = OpAccessChain %_ptr_Uniform_v4float %_Globals %int_3 %int_0
+        %182 = OpLoad %v4float %181
+        %183 = OpFMul %v4float %180 %182
+        %184 = OpFAdd %v4float %179 %183
+               OpBranch %164
+        %164 = OpLabel
+        %185 = OpPhi %v4float %175 %165 %184 %166
+               OpBranch %135
+        %135 = OpLabel
+        %186 = OpPhi %v4float %161 %140 %185 %164
+        %187 = OpFAdd %float %128 %127
+        %188 = OpFOrdGreaterThan %bool %187 %float_1
+               OpSelectionMerge %189 None
+               OpBranchConditional %188 %190 %191
+        %190 = OpLabel
+        %192 = OpFOrdLessThan %bool %128 %127
+               OpSelectionMerge %193 None
+               OpBranchConditional %192 %194 %195
+        %194 = OpLabel
+        %196 = OpCompositeExtract %float %186 3
+               OpBranch %193
+        %195 = OpLabel
+        %197 = OpCompositeExtract %float %186 2
+               OpBranch %193
+        %193 = OpLabel
+        %198 = OpPhi %float %196 %194 %197 %195
+               OpBranch %189
+        %191 = OpLabel
+        %199 = OpFOrdLessThan %bool %128 %127
+               OpSelectionMerge %200 None
+               OpBranchConditional %199 %201 %202
+        %201 = OpLabel
+        %203 = OpCompositeExtract %float %186 1
+               OpBranch %200
+        %202 = OpLabel
+        %204 = OpCompositeExtract %float %186 0
+               OpBranch %200
+        %200 = OpLabel
+        %205 = OpPhi %float %203 %201 %204 %202
+               OpBranch %189
+        %189 = OpLabel
+        %206 = OpPhi %float %198 %193 %205 %200
+        %207 = OpExtInst %float %1 Floor %206
+        %208 = OpFSub %float %206 %207
+        %209 = OpFOrdLessThan %bool %207 %float_1
+        %210 = OpCompositeExtract %float %114 0
+        %211 = OpCompositeExtract %float %114 1
+        %212 = OpCompositeConstruct %v3float %float_1 %210 %211
+        %213 = OpFOrdLessThan %bool %207 %float_2
+        %214 = OpCompositeExtract %float %114 2
+        %215 = OpCompositeConstruct %v3float %float_0_5 %211 %214
+        %216 = OpFOrdLessThan %bool %207 %float_3
+        %217 = OpCompositeExtract %float %114 3
+        %218 = OpCompositeConstruct %v3float %float_0_25 %214 %217
+        %219 = OpFOrdLessThan %bool %207 %float_4
+        %220 = OpCompositeExtract %float %121 0
+        %221 = OpCompositeConstruct %v3float %float_0_125 %217 %220
+        %222 = OpFOrdLessThan %bool %207 %float_5
+        %223 = OpCompositeExtract %float %121 1
+        %224 = OpCompositeConstruct %v3float %float_0_0625 %220 %223
+        %225 = OpCompositeConstruct %v3float %float_0_03125 %223 %223
+        %226 = OpCompositeConstruct %v3bool %222 %222 %222
+        %227 = OpSelect %v3float %226 %224 %225
+        %228 = OpCompositeConstruct %v3bool %219 %219 %219
+        %229 = OpSelect %v3float %228 %221 %227
+        %230 = OpCompositeConstruct %v3bool %216 %216 %216
+        %231 = OpSelect %v3float %230 %218 %229
+        %232 = OpCompositeConstruct %v3bool %213 %213 %213
+        %233 = OpSelect %v3float %232 %215 %231
+        %234 = OpCompositeConstruct %v3bool %209 %209 %209
+        %235 = OpSelect %v3float %234 %212 %233
+        %236 = OpCompositeExtract %float %235 0
+        %237 = OpCompositeExtract %float %235 1
+        %238 = OpCompositeExtract %float %235 2
+        %239 = OpCompositeExtract %float %113 0
+        %240 = OpFMul %float %239 %float_65280
+        %241 = OpCompositeExtract %float %113 1
+        %242 = OpFMul %float %241 %float_255
+        %243 = OpFAdd %float %240 %242
+        %244 = OpFSub %float %243 %float_32768
+        %245 = OpFMul %float %244 %float_0_0078125
+        %246 = OpCompositeExtract %float %113 2
+        %247 = OpFMul %float %246 %float_65280
+        %248 = OpCompositeExtract %float %113 3
+        %249 = OpFMul %float %248 %float_255
+        %250 = OpFAdd %float %247 %249
+        %251 = OpFSub %float %250 %float_32768
+        %252 = OpFMul %float %251 %float_0_0078125
+        %253 = OpExtInst %float %1 FMix %245 %252 %237
+        %254 = OpExtInst %float %1 FMix %245 %252 %238
+        %255 = OpCompositeConstruct %v2float %236 %236
+        %256 = OpFMul %v2float %122 %255
+        %257 = OpExtInst %v2float %1 Floor %256
+        %258 = OpAccessChain %_ptr_Uniform_v4float %LandscapeParameters %int_3
+        %259 = OpAccessChain %_ptr_Uniform_float %LandscapeParameters %int_3 %int_0
+        %260 = OpLoad %float %259
+        %261 = OpFMul %float %260 %236
+        %262 = OpFSub %float %261 %float_1
+        %263 = OpFMul %float %260 %float_0_5
+        %264 = OpFMul %float %263 %236
+        %265 = OpExtInst %float %1 FMax %264 %float_2
+        %266 = OpFSub %float %265 %float_1
+        %267 = OpCompositeConstruct %v2float %262 %266
+        %268 = OpAccessChain %_ptr_Uniform_float %LandscapeParameters %int_3 %int_1
+        %269 = OpLoad %float %268
+        %270 = OpCompositeConstruct %v2float %269 %269
+        %271 = OpFMul %v2float %267 %270
+        %272 = OpCompositeExtract %float %271 0
+        %273 = OpCompositeConstruct %v2float %272 %272
+        %274 = OpFDiv %v2float %257 %273
+        %275 = OpFMul %v2float %257 %46
+        %276 = OpExtInst %v2float %1 Floor %275
+        %277 = OpCompositeExtract %float %271 1
+        %278 = OpCompositeConstruct %v2float %277 %277
+        %279 = OpFDiv %v2float %276 %278
+        %280 = OpCompositeExtract %float %274 0
+        %281 = OpCompositeExtract %float %274 1
+        %282 = OpCompositeConstruct %v3float %280 %281 %253
+        %283 = OpCompositeExtract %float %279 0
+        %284 = OpCompositeExtract %float %279 1
+        %285 = OpCompositeConstruct %v3float %283 %284 %254
+        %286 = OpCompositeConstruct %v3float %208 %208 %208
+        %287 = OpExtInst %v3float %1 FMix %282 %285 %286
+        %288 = OpVectorShuffle %v2float %119 %92 0 1
+        %289 = OpAccessChain %_ptr_Uniform_v4float %LandscapeParameters %int_4
+        %290 = OpLoad %v4float %289
+        %291 = OpVectorShuffle %v2float %290 %290 3 3
+        %292 = OpFMul %v2float %288 %291
+        %293 = OpCompositeExtract %float %292 0
+        %294 = OpCompositeExtract %float %292 1
+        %295 = OpCompositeConstruct %v3float %293 %294 %float_0
+        %296 = OpFAdd %v3float %287 %295
+        %297 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_0
+        %298 = OpLoad %v4float %297
+        %299 = OpVectorShuffle %v3float %298 %298 0 1 2
+        %300 = OpVectorShuffle %v3float %296 %296 0 0 0
+        %301 = OpFMul %v3float %299 %300
+        %302 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_1
+        %303 = OpLoad %v4float %302
+        %304 = OpVectorShuffle %v3float %303 %303 0 1 2
+        %305 = OpVectorShuffle %v3float %296 %296 1 1 1
+        %306 = OpFMul %v3float %304 %305
+        %307 = OpFAdd %v3float %301 %306
+        %308 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_2
+        %309 = OpLoad %v4float %308
+        %310 = OpVectorShuffle %v3float %309 %309 0 1 2
+        %311 = OpVectorShuffle %v3float %296 %296 2 2 2
+        %312 = OpFMul %v3float %310 %311
+        %313 = OpFAdd %v3float %307 %312
+        %314 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_3
+        %315 = OpLoad %v4float %314
+        %316 = OpVectorShuffle %v3float %315 %315 0 1 2
+        %317 = OpFAdd %v3float %316 %105
+        %318 = OpFAdd %v3float %313 %317
+        %319 = OpCompositeExtract %float %318 0
+        %320 = OpCompositeExtract %float %318 1
+        %321 = OpCompositeExtract %float %318 2
+        %322 = OpCompositeConstruct %v4float %319 %320 %321 %float_1
+        %323 = OpVectorShuffle %v2float %287 %287 0 1
+        %324 = OpLoad %v4float %258
+        %325 = OpVectorShuffle %v2float %324 %324 2 3
+        %326 = OpFAdd %v2float %323 %325
+        %327 = OpFAdd %v2float %326 %292
+        %328 = OpAccessChain %_ptr_Uniform_v4float %LandscapeParameters %int_1
+        %329 = OpLoad %v4float %328
+        %330 = OpVectorShuffle %v2float %329 %329 0 1
+        %331 = OpFMul %v2float %323 %330
+        %332 = OpVectorShuffle %v2float %329 %329 2 3
+        %333 = OpFAdd %v2float %331 %332
+        %334 = OpVectorShuffle %v2float %290 %290 2 2
+        %335 = OpFMul %v2float %288 %334
+        %336 = OpFAdd %v2float %333 %335
+        %337 = OpVectorShuffle %v2float %327 %92 0 1
+        %338 = OpVectorShuffle %v4float %322 %322 4 5 6 3
+        %339 = OpMatrixTimesVector %v4float %101 %338
+        %340 = OpVectorShuffle %v3float %322 %92 0 1 2
+        %341 = OpFSub %v3float %340 %103
+        %342 = OpAccessChain %_ptr_Uniform_v4float %MobileBasePass %int_2
+        %343 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_2 %int_3
+        %344 = OpLoad %float %343
+        %345 = OpDot %float %341 %341
+        %346 = OpExtInst %float %1 InverseSqrt %345
+        %347 = OpFMul %float %345 %346
+        %348 = OpCompositeConstruct %v3float %346 %346 %346
+        %349 = OpFMul %v3float %341 %348
+        %350 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_0 %int_0
+        %351 = OpLoad %float %350
+        %352 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_1 %int_0
+        %353 = OpLoad %float %352
+        %354 = OpCompositeExtract %float %341 2
+        %355 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_0 %int_3
+        %356 = OpLoad %float %355
+        %357 = OpExtInst %float %1 FMax %float_0 %356
+        %358 = OpFOrdGreaterThan %bool %357 %float_0
+               OpSelectionMerge %359 None
+               OpBranchConditional %358 %360 %359
+        %360 = OpLabel
+        %361 = OpFMul %float %357 %346
+        %362 = OpFMul %float %361 %354
+        %363 = OpAccessChain %_ptr_Uniform_float %View %int_25 %int_2
+        %364 = OpLoad %float %363
+        %365 = OpFAdd %float %364 %362
+        %366 = OpFSub %float %354 %362
+        %367 = OpFSub %float %float_1 %361
+        %368 = OpFMul %float %367 %347
+        %369 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_0 %int_1
+        %370 = OpLoad %float %369
+        %371 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_3 %int_1
+        %372 = OpLoad %float %371
+        %373 = OpFSub %float %365 %372
+        %374 = OpFMul %float %370 %373
+        %375 = OpExtInst %float %1 FMax %float_n127 %374
+        %376 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_3 %int_0
+        %377 = OpLoad %float %376
+        %378 = OpFNegate %float %375
+        %379 = OpExtInst %float %1 Exp2 %378
+        %380 = OpFMul %float %377 %379
+        %381 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_1 %int_1
+        %382 = OpLoad %float %381
+        %383 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_1 %int_3
+        %384 = OpLoad %float %383
+        %385 = OpFSub %float %365 %384
+        %386 = OpFMul %float %382 %385
+        %387 = OpExtInst %float %1 FMax %float_n127 %386
+        %388 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_1 %int_2
+        %389 = OpLoad %float %388
+        %390 = OpFNegate %float %387
+        %391 = OpExtInst %float %1 Exp2 %390
+        %392 = OpFMul %float %389 %391
+               OpBranch %359
+        %359 = OpLabel
+        %393 = OpPhi %float %347 %189 %368 %360
+        %394 = OpPhi %float %353 %189 %392 %360
+        %395 = OpPhi %float %351 %189 %380 %360
+        %396 = OpPhi %float %354 %189 %366 %360
+        %397 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_0 %int_1
+        %398 = OpLoad %float %397
+        %399 = OpFMul %float %398 %396
+        %400 = OpExtInst %float %1 FMax %float_n127 %399
+        %401 = OpFNegate %float %400
+        %402 = OpExtInst %float %1 Exp2 %401
+        %403 = OpFSub %float %float_1 %402
+        %404 = OpFDiv %float %403 %400
+        %405 = OpExtInst %float %1 Log %float_2
+        %406 = OpFMul %float %405 %405
+        %407 = OpFMul %float %float_0_5 %406
+        %408 = OpFMul %float %407 %400
+        %409 = OpFSub %float %405 %408
+        %410 = OpExtInst %float %1 FAbs %400
+        %411 = OpFOrdGreaterThan %bool %410 %float_0_00999999978
+        %412 = OpSelect %float %411 %404 %409
+        %413 = OpFMul %float %395 %412
+        %414 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_1 %int_1
+        %415 = OpLoad %float %414
+        %416 = OpFMul %float %415 %396
+        %417 = OpExtInst %float %1 FMax %float_n127 %416
+        %418 = OpFNegate %float %417
+        %419 = OpExtInst %float %1 Exp2 %418
+        %420 = OpFSub %float %float_1 %419
+        %421 = OpFDiv %float %420 %417
+        %422 = OpFMul %float %407 %417
+        %423 = OpFSub %float %405 %422
+        %424 = OpExtInst %float %1 FAbs %417
+        %425 = OpFOrdGreaterThan %bool %424 %float_0_00999999978
+        %426 = OpSelect %float %425 %421 %423
+        %427 = OpFMul %float %394 %426
+        %428 = OpFAdd %float %413 %427
+        %429 = OpFMul %float %428 %393
+        %430 = OpLoad %v4float %342
+        %431 = OpVectorShuffle %v3float %430 %430 0 1 2
+        %432 = OpAccessChain %_ptr_Uniform_v4float %MobileBasePass %int_4
+        %433 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_4 %int_3
+        %434 = OpLoad %float %433
+        %435 = OpFOrdGreaterThanEqual %bool %434 %float_0
+               OpSelectionMerge %436 DontFlatten
+               OpBranchConditional %435 %437 %436
+        %437 = OpLabel
+        %438 = OpAccessChain %_ptr_Uniform_v4float %MobileBasePass %int_5
+        %439 = OpLoad %v4float %438
+        %440 = OpVectorShuffle %v3float %439 %439 0 1 2
+        %441 = OpLoad %v4float %432
+        %442 = OpVectorShuffle %v3float %441 %441 0 1 2
+        %443 = OpDot %float %349 %442
+        %444 = OpExtInst %float %1 FClamp %443 %float_0 %float_1
+        %445 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_5 %int_3
+        %446 = OpLoad %float %445
+        %447 = OpExtInst %float %1 Pow %444 %446
+        %448 = OpCompositeConstruct %v3float %447 %447 %447
+        %449 = OpFMul %v3float %440 %448
+        %450 = OpFSub %float %393 %434
+        %451 = OpExtInst %float %1 FMax %450 %float_0
+        %452 = OpFMul %float %428 %451
+        %453 = OpFNegate %float %452
+        %454 = OpExtInst %float %1 Exp2 %453
+        %455 = OpExtInst %float %1 FClamp %454 %float_0 %float_1
+        %456 = OpFSub %float %float_1 %455
+        %457 = OpCompositeConstruct %v3float %456 %456 %456
+        %458 = OpFMul %v3float %449 %457
+               OpBranch %436
+        %436 = OpLabel
+        %459 = OpPhi %v3float %63 %359 %458 %437
+        %460 = OpFNegate %float %429
+        %461 = OpExtInst %float %1 Exp2 %460
+        %462 = OpExtInst %float %1 FClamp %461 %float_0 %float_1
+        %463 = OpExtInst %float %1 FMax %462 %344
+        %464 = OpAccessChain %_ptr_Uniform_float %MobileBasePass %int_3 %int_3
+        %465 = OpLoad %float %464
+        %466 = OpFOrdGreaterThan %bool %465 %float_0
+        %467 = OpFOrdGreaterThan %bool %347 %465
+        %468 = OpLogicalAnd %bool %466 %467
+        %469 = OpCompositeConstruct %v3bool %468 %468 %468
+        %470 = OpSelect %v3float %469 %63 %459
+        %471 = OpSelect %float %468 %float_1 %463
+        %472 = OpFSub %float %float_1 %471
+        %473 = OpCompositeConstruct %v3float %472 %472 %472
+        %474 = OpFMul %v3float %431 %473
+        %475 = OpFAdd %v3float %474 %470
+        %476 = OpCompositeExtract %float %475 0
+        %477 = OpCompositeExtract %float %475 1
+        %478 = OpCompositeExtract %float %475 2
+        %479 = OpCompositeConstruct %v4float %476 %477 %478 %471
+        %480 = OpAccessChain %_ptr_Function_v4float %97 %int_0
+               OpStore %480 %479
+        %481 = OpCompositeExtract %float %339 3
+        %482 = OpCompositeInsert %v4float %481 %338 3
+        %483 = OpLoad %_arr_v4float_uint_1 %97
+        %484 = OpCompositeExtract %v4float %483 0
+        %485 = OpVectorShuffle %v4float %92 %484 0 1 4 5
+        %486 = OpVectorShuffle %v4float %92 %484 0 1 6 7
+               OpStore %out_var_TEXCOORD0 %337
+               OpStore %out_var_TEXCOORD1 %336
+               OpStore %out_var_TEXCOORD2 %485
+               OpStore %out_var_TEXCOORD3 %486
+               OpStore %out_var_TEXCOORD8 %482
+               OpStore %gl_Position %339
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-ue4/asm/vert/texture-buffer.asm.vert b/shaders-ue4/asm/vert/texture-buffer.asm.vert
new file mode 100644
index 00000000000..6d52623a145
--- /dev/null
+++ b/shaders-ue4/asm/vert/texture-buffer.asm.vert
@@ -0,0 +1,1054 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 397
+; Schema: 0
+               OpCapability Shader
+               OpCapability SampledBuffer
+               OpCapability StorageImageExtendedFormats
+               OpExtension "SPV_GOOGLE_hlsl_functionality1"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Vertex %Main "main" %gl_VertexIndex %gl_InstanceIndex %in_var_ATTRIBUTE0 %out_var_TEXCOORD6 %gl_Position
+               OpSource HLSL 600
+               OpName %type_View "type.View"
+               OpMemberName %type_View 0 "View_TranslatedWorldToClip"
+               OpMemberName %type_View 1 "View_WorldToClip"
+               OpMemberName %type_View 2 "View_TranslatedWorldToView"
+               OpMemberName %type_View 3 "View_ViewToTranslatedWorld"
+               OpMemberName %type_View 4 "View_TranslatedWorldToCameraView"
+               OpMemberName %type_View 5 "View_CameraViewToTranslatedWorld"
+               OpMemberName %type_View 6 "View_ViewToClip"
+               OpMemberName %type_View 7 "View_ViewToClipNoAA"
+               OpMemberName %type_View 8 "View_ClipToView"
+               OpMemberName %type_View 9 "View_ClipToTranslatedWorld"
+               OpMemberName %type_View 10 "View_SVPositionToTranslatedWorld"
+               OpMemberName %type_View 11 "View_ScreenToWorld"
+               OpMemberName %type_View 12 "View_ScreenToTranslatedWorld"
+               OpMemberName %type_View 13 "View_ViewForward"
+               OpMemberName %type_View 14 "PrePadding_View_844"
+               OpMemberName %type_View 15 "View_ViewUp"
+               OpMemberName %type_View 16 "PrePadding_View_860"
+               OpMemberName %type_View 17 "View_ViewRight"
+               OpMemberName %type_View 18 "PrePadding_View_876"
+               OpMemberName %type_View 19 "View_HMDViewNoRollUp"
+               OpMemberName %type_View 20 "PrePadding_View_892"
+               OpMemberName %type_View 21 "View_HMDViewNoRollRight"
+               OpMemberName %type_View 22 "PrePadding_View_908"
+               OpMemberName %type_View 23 "View_InvDeviceZToWorldZTransform"
+               OpMemberName %type_View 24 "View_ScreenPositionScaleBias"
+               OpMemberName %type_View 25 "View_WorldCameraOrigin"
+               OpMemberName %type_View 26 "PrePadding_View_956"
+               OpMemberName %type_View 27 "View_TranslatedWorldCameraOrigin"
+               OpMemberName %type_View 28 "PrePadding_View_972"
+               OpMemberName %type_View 29 "View_WorldViewOrigin"
+               OpMemberName %type_View 30 "PrePadding_View_988"
+               OpMemberName %type_View 31 "View_PreViewTranslation"
+               OpMemberName %type_View 32 "PrePadding_View_1004"
+               OpMemberName %type_View 33 "View_PrevProjection"
+               OpMemberName %type_View 34 "View_PrevViewProj"
+               OpMemberName %type_View 35 "View_PrevViewRotationProj"
+               OpMemberName %type_View 36 "View_PrevViewToClip"
+               OpMemberName %type_View 37 "View_PrevClipToView"
+               OpMemberName %type_View 38 "View_PrevTranslatedWorldToClip"
+               OpMemberName %type_View 39 "View_PrevTranslatedWorldToView"
+               OpMemberName %type_View 40 "View_PrevViewToTranslatedWorld"
+               OpMemberName %type_View 41 "View_PrevTranslatedWorldToCameraView"
+               OpMemberName %type_View 42 "View_PrevCameraViewToTranslatedWorld"
+               OpMemberName %type_View 43 "View_PrevWorldCameraOrigin"
+               OpMemberName %type_View 44 "PrePadding_View_1660"
+               OpMemberName %type_View 45 "View_PrevWorldViewOrigin"
+               OpMemberName %type_View 46 "PrePadding_View_1676"
+               OpMemberName %type_View 47 "View_PrevPreViewTranslation"
+               OpMemberName %type_View 48 "PrePadding_View_1692"
+               OpMemberName %type_View 49 "View_PrevInvViewProj"
+               OpMemberName %type_View 50 "View_PrevScreenToTranslatedWorld"
+               OpMemberName %type_View 51 "View_ClipToPrevClip"
+               OpMemberName %type_View 52 "View_TemporalAAJitter"
+               OpMemberName %type_View 53 "View_GlobalClippingPlane"
+               OpMemberName %type_View 54 "View_FieldOfViewWideAngles"
+               OpMemberName %type_View 55 "View_PrevFieldOfViewWideAngles"
+               OpMemberName %type_View 56 "View_ViewRectMin"
+               OpMemberName %type_View 57 "View_ViewSizeAndInvSize"
+               OpMemberName %type_View 58 "View_BufferSizeAndInvSize"
+               OpMemberName %type_View 59 "View_BufferBilinearUVMinMax"
+               OpMemberName %type_View 60 "View_NumSceneColorMSAASamples"
+               OpMemberName %type_View 61 "View_PreExposure"
+               OpMemberName %type_View 62 "View_OneOverPreExposure"
+               OpMemberName %type_View 63 "PrePadding_View_2012"
+               OpMemberName %type_View 64 "View_DiffuseOverrideParameter"
+               OpMemberName %type_View 65 "View_SpecularOverrideParameter"
+               OpMemberName %type_View 66 "View_NormalOverrideParameter"
+               OpMemberName %type_View 67 "View_RoughnessOverrideParameter"
+               OpMemberName %type_View 68 "View_PrevFrameGameTime"
+               OpMemberName %type_View 69 "View_PrevFrameRealTime"
+               OpMemberName %type_View 70 "View_OutOfBoundsMask"
+               OpMemberName %type_View 71 "PrePadding_View_2084"
+               OpMemberName %type_View 72 "PrePadding_View_2088"
+               OpMemberName %type_View 73 "PrePadding_View_2092"
+               OpMemberName %type_View 74 "View_WorldCameraMovementSinceLastFrame"
+               OpMemberName %type_View 75 "View_CullingSign"
+               OpMemberName %type_View 76 "View_NearPlane"
+               OpMemberName %type_View 77 "View_AdaptiveTessellationFactor"
+               OpMemberName %type_View 78 "View_GameTime"
+               OpMemberName %type_View 79 "View_RealTime"
+               OpMemberName %type_View 80 "View_DeltaTime"
+               OpMemberName %type_View 81 "View_MaterialTextureMipBias"
+               OpMemberName %type_View 82 "View_MaterialTextureDerivativeMultiply"
+               OpMemberName %type_View 83 "View_Random"
+               OpMemberName %type_View 84 "View_FrameNumber"
+               OpMemberName %type_View 85 "View_StateFrameIndexMod8"
+               OpMemberName %type_View 86 "View_StateFrameIndex"
+               OpMemberName %type_View 87 "View_CameraCut"
+               OpMemberName %type_View 88 "View_UnlitViewmodeMask"
+               OpMemberName %type_View 89 "PrePadding_View_2164"
+               OpMemberName %type_View 90 "PrePadding_View_2168"
+               OpMemberName %type_View 91 "PrePadding_View_2172"
+               OpMemberName %type_View 92 "View_DirectionalLightColor"
+               OpMemberName %type_View 93 "View_DirectionalLightDirection"
+               OpMemberName %type_View 94 "PrePadding_View_2204"
+               OpMemberName %type_View 95 "View_TranslucencyLightingVolumeMin"
+               OpMemberName %type_View 96 "View_TranslucencyLightingVolumeInvSize"
+               OpMemberName %type_View 97 "View_TemporalAAParams"
+               OpMemberName %type_View 98 "View_CircleDOFParams"
+               OpMemberName %type_View 99 "View_DepthOfFieldSensorWidth"
+               OpMemberName %type_View 100 "View_DepthOfFieldFocalDistance"
+               OpMemberName %type_View 101 "View_DepthOfFieldScale"
+               OpMemberName %type_View 102 "View_DepthOfFieldFocalLength"
+               OpMemberName %type_View 103 "View_DepthOfFieldFocalRegion"
+               OpMemberName %type_View 104 "View_DepthOfFieldNearTransitionRegion"
+               OpMemberName %type_View 105 "View_DepthOfFieldFarTransitionRegion"
+               OpMemberName %type_View 106 "View_MotionBlurNormalizedToPixel"
+               OpMemberName %type_View 107 "View_bSubsurfacePostprocessEnabled"
+               OpMemberName %type_View 108 "View_GeneralPurposeTweak"
+               OpMemberName %type_View 109 "View_DemosaicVposOffset"
+               OpMemberName %type_View 110 "PrePadding_View_2348"
+               OpMemberName %type_View 111 "View_IndirectLightingColorScale"
+               OpMemberName %type_View 112 "View_HDR32bppEncodingMode"
+               OpMemberName %type_View 113 "View_AtmosphericFogSunDirection"
+               OpMemberName %type_View 114 "View_AtmosphericFogSunPower"
+               OpMemberName %type_View 115 "View_AtmosphericFogPower"
+               OpMemberName %type_View 116 "View_AtmosphericFogDensityScale"
+               OpMemberName %type_View 117 "View_AtmosphericFogDensityOffset"
+               OpMemberName %type_View 118 "View_AtmosphericFogGroundOffset"
+               OpMemberName %type_View 119 "View_AtmosphericFogDistanceScale"
+               OpMemberName %type_View 120 "View_AtmosphericFogAltitudeScale"
+               OpMemberName %type_View 121 "View_AtmosphericFogHeightScaleRayleigh"
+               OpMemberName %type_View 122 "View_AtmosphericFogStartDistance"
+               OpMemberName %type_View 123 "View_AtmosphericFogDistanceOffset"
+               OpMemberName %type_View 124 "View_AtmosphericFogSunDiscScale"
+               OpMemberName %type_View 125 "View_AtmosphericFogRenderMask"
+               OpMemberName %type_View 126 "View_AtmosphericFogInscatterAltitudeSampleNum"
+               OpMemberName %type_View 127 "View_AtmosphericFogSunColor"
+               OpMemberName %type_View 128 "View_NormalCurvatureToRoughnessScaleBias"
+               OpMemberName %type_View 129 "View_RenderingReflectionCaptureMask"
+               OpMemberName %type_View 130 "View_AmbientCubemapTint"
+               OpMemberName %type_View 131 "View_AmbientCubemapIntensity"
+               OpMemberName %type_View 132 "View_SkyLightParameters"
+               OpMemberName %type_View 133 "PrePadding_View_2488"
+               OpMemberName %type_View 134 "PrePadding_View_2492"
+               OpMemberName %type_View 135 "View_SkyLightColor"
+               OpMemberName %type_View 136 "View_SkyIrradianceEnvironmentMap"
+               OpMemberName %type_View 137 "View_MobilePreviewMode"
+               OpMemberName %type_View 138 "View_HMDEyePaddingOffset"
+               OpMemberName %type_View 139 "View_ReflectionCubemapMaxMip"
+               OpMemberName %type_View 140 "View_ShowDecalsMask"
+               OpMemberName %type_View 141 "View_DistanceFieldAOSpecularOcclusionMode"
+               OpMemberName %type_View 142 "View_IndirectCapsuleSelfShadowingIntensity"
+               OpMemberName %type_View 143 "PrePadding_View_2648"
+               OpMemberName %type_View 144 "PrePadding_View_2652"
+               OpMemberName %type_View 145 "View_ReflectionEnvironmentRoughnessMixingScaleBiasAndLargestWeight"
+               OpMemberName %type_View 146 "View_StereoPassIndex"
+               OpMemberName %type_View 147 "View_GlobalVolumeCenterAndExtent"
+               OpMemberName %type_View 148 "View_GlobalVolumeWorldToUVAddAndMul"
+               OpMemberName %type_View 149 "View_GlobalVolumeDimension"
+               OpMemberName %type_View 150 "View_GlobalVolumeTexelSize"
+               OpMemberName %type_View 151 "View_MaxGlobalDistance"
+               OpMemberName %type_View 152 "View_bCheckerboardSubsurfaceProfileRendering"
+               OpMemberName %type_View 153 "View_VolumetricFogInvGridSize"
+               OpMemberName %type_View 154 "PrePadding_View_2828"
+               OpMemberName %type_View 155 "View_VolumetricFogGridZParams"
+               OpMemberName %type_View 156 "PrePadding_View_2844"
+               OpMemberName %type_View 157 "View_VolumetricFogSVPosToVolumeUV"
+               OpMemberName %type_View 158 "View_VolumetricFogMaxDistance"
+               OpMemberName %type_View 159 "PrePadding_View_2860"
+               OpMemberName %type_View 160 "View_VolumetricLightmapWorldToUVScale"
+               OpMemberName %type_View 161 "PrePadding_View_2876"
+               OpMemberName %type_View 162 "View_VolumetricLightmapWorldToUVAdd"
+               OpMemberName %type_View 163 "PrePadding_View_2892"
+               OpMemberName %type_View 164 "View_VolumetricLightmapIndirectionTextureSize"
+               OpMemberName %type_View 165 "View_VolumetricLightmapBrickSize"
+               OpMemberName %type_View 166 "View_VolumetricLightmapBrickTexelSize"
+               OpMemberName %type_View 167 "View_StereoIPD"
+               OpMemberName %type_View 168 "View_IndirectLightingCacheShowFlag"
+               OpMemberName %type_View 169 "View_EyeToPixelSpreadAngle"
+               OpName %View "View"
+               OpName %type_Primitive "type.Primitive"
+               OpMemberName %type_Primitive 0 "Primitive_LocalToWorld"
+               OpMemberName %type_Primitive 1 "Primitive_InvNonUniformScaleAndDeterminantSign"
+               OpMemberName %type_Primitive 2 "Primitive_ObjectWorldPositionAndRadius"
+               OpMemberName %type_Primitive 3 "Primitive_WorldToLocal"
+               OpMemberName %type_Primitive 4 "Primitive_PreviousLocalToWorld"
+               OpMemberName %type_Primitive 5 "Primitive_PreviousWorldToLocal"
+               OpMemberName %type_Primitive 6 "Primitive_ActorWorldPosition"
+               OpMemberName %type_Primitive 7 "Primitive_UseSingleSampleShadowFromStationaryLights"
+               OpMemberName %type_Primitive 8 "Primitive_ObjectBounds"
+               OpMemberName %type_Primitive 9 "Primitive_LpvBiasMultiplier"
+               OpMemberName %type_Primitive 10 "Primitive_DecalReceiverMask"
+               OpMemberName %type_Primitive 11 "Primitive_PerObjectGBufferData"
+               OpMemberName %type_Primitive 12 "Primitive_UseVolumetricLightmapShadowFromStationaryLights"
+               OpMemberName %type_Primitive 13 "Primitive_UseEditorDepthTest"
+               OpMemberName %type_Primitive 14 "Primitive_ObjectOrientation"
+               OpMemberName %type_Primitive 15 "Primitive_NonUniformScale"
+               OpMemberName %type_Primitive 16 "Primitive_LocalObjectBoundsMin"
+               OpMemberName %type_Primitive 17 "PrePadding_Primitive_380"
+               OpMemberName %type_Primitive 18 "Primitive_LocalObjectBoundsMax"
+               OpMemberName %type_Primitive 19 "Primitive_LightingChannelMask"
+               OpMemberName %type_Primitive 20 "Primitive_LightmapDataIndex"
+               OpMemberName %type_Primitive 21 "Primitive_SingleCaptureIndex"
+               OpName %Primitive "Primitive"
+               OpName %type_MobileShadowDepthPass "type.MobileShadowDepthPass"
+               OpMemberName %type_MobileShadowDepthPass 0 "PrePadding_MobileShadowDepthPass_0"
+               OpMemberName %type_MobileShadowDepthPass 1 "PrePadding_MobileShadowDepthPass_4"
+               OpMemberName %type_MobileShadowDepthPass 2 "PrePadding_MobileShadowDepthPass_8"
+               OpMemberName %type_MobileShadowDepthPass 3 "PrePadding_MobileShadowDepthPass_12"
+               OpMemberName %type_MobileShadowDepthPass 4 "PrePadding_MobileShadowDepthPass_16"
+               OpMemberName %type_MobileShadowDepthPass 5 "PrePadding_MobileShadowDepthPass_20"
+               OpMemberName %type_MobileShadowDepthPass 6 "PrePadding_MobileShadowDepthPass_24"
+               OpMemberName %type_MobileShadowDepthPass 7 "PrePadding_MobileShadowDepthPass_28"
+               OpMemberName %type_MobileShadowDepthPass 8 "PrePadding_MobileShadowDepthPass_32"
+               OpMemberName %type_MobileShadowDepthPass 9 "PrePadding_MobileShadowDepthPass_36"
+               OpMemberName %type_MobileShadowDepthPass 10 "PrePadding_MobileShadowDepthPass_40"
+               OpMemberName %type_MobileShadowDepthPass 11 "PrePadding_MobileShadowDepthPass_44"
+               OpMemberName %type_MobileShadowDepthPass 12 "PrePadding_MobileShadowDepthPass_48"
+               OpMemberName %type_MobileShadowDepthPass 13 "PrePadding_MobileShadowDepthPass_52"
+               OpMemberName %type_MobileShadowDepthPass 14 "PrePadding_MobileShadowDepthPass_56"
+               OpMemberName %type_MobileShadowDepthPass 15 "PrePadding_MobileShadowDepthPass_60"
+               OpMemberName %type_MobileShadowDepthPass 16 "PrePadding_MobileShadowDepthPass_64"
+               OpMemberName %type_MobileShadowDepthPass 17 "PrePadding_MobileShadowDepthPass_68"
+               OpMemberName %type_MobileShadowDepthPass 18 "PrePadding_MobileShadowDepthPass_72"
+               OpMemberName %type_MobileShadowDepthPass 19 "PrePadding_MobileShadowDepthPass_76"
+               OpMemberName %type_MobileShadowDepthPass 20 "MobileShadowDepthPass_ProjectionMatrix"
+               OpMemberName %type_MobileShadowDepthPass 21 "MobileShadowDepthPass_ShadowParams"
+               OpMemberName %type_MobileShadowDepthPass 22 "MobileShadowDepthPass_bClampToNearPlane"
+               OpMemberName %type_MobileShadowDepthPass 23 "PrePadding_MobileShadowDepthPass_156"
+               OpMemberName %type_MobileShadowDepthPass 24 "MobileShadowDepthPass_ShadowViewProjectionMatrices"
+               OpName %MobileShadowDepthPass "MobileShadowDepthPass"
+               OpName %type_EmitterDynamicUniforms "type.EmitterDynamicUniforms"
+               OpMemberName %type_EmitterDynamicUniforms 0 "EmitterDynamicUniforms_LocalToWorldScale"
+               OpMemberName %type_EmitterDynamicUniforms 1 "EmitterDynamicUniforms_EmitterInstRandom"
+               OpMemberName %type_EmitterDynamicUniforms 2 "PrePadding_EmitterDynamicUniforms_12"
+               OpMemberName %type_EmitterDynamicUniforms 3 "EmitterDynamicUniforms_AxisLockRight"
+               OpMemberName %type_EmitterDynamicUniforms 4 "EmitterDynamicUniforms_AxisLockUp"
+               OpMemberName %type_EmitterDynamicUniforms 5 "EmitterDynamicUniforms_DynamicColor"
+               OpMemberName %type_EmitterDynamicUniforms 6 "EmitterDynamicUniforms_MacroUVParameters"
+               OpName %EmitterDynamicUniforms "EmitterDynamicUniforms"
+               OpName %type_EmitterUniforms "type.EmitterUniforms"
+               OpMemberName %type_EmitterUniforms 0 "EmitterUniforms_ColorCurve"
+               OpMemberName %type_EmitterUniforms 1 "EmitterUniforms_ColorScale"
+               OpMemberName %type_EmitterUniforms 2 "EmitterUniforms_ColorBias"
+               OpMemberName %type_EmitterUniforms 3 "EmitterUniforms_MiscCurve"
+               OpMemberName %type_EmitterUniforms 4 "EmitterUniforms_MiscScale"
+               OpMemberName %type_EmitterUniforms 5 "EmitterUniforms_MiscBias"
+               OpMemberName %type_EmitterUniforms 6 "EmitterUniforms_SizeBySpeed"
+               OpMemberName %type_EmitterUniforms 7 "EmitterUniforms_SubImageSize"
+               OpMemberName %type_EmitterUniforms 8 "EmitterUniforms_TangentSelector"
+               OpMemberName %type_EmitterUniforms 9 "EmitterUniforms_CameraFacingBlend"
+               OpMemberName %type_EmitterUniforms 10 "EmitterUniforms_RemoveHMDRoll"
+               OpMemberName %type_EmitterUniforms 11 "EmitterUniforms_RotationRateScale"
+               OpMemberName %type_EmitterUniforms 12 "EmitterUniforms_RotationBias"
+               OpMemberName %type_EmitterUniforms 13 "EmitterUniforms_CameraMotionBlurAmount"
+               OpMemberName %type_EmitterUniforms 14 "PrePadding_EmitterUniforms_172"
+               OpMemberName %type_EmitterUniforms 15 "EmitterUniforms_PivotOffset"
+               OpName %EmitterUniforms "EmitterUniforms"
+               OpName %type_buffer_image "type.buffer.image"
+               OpName %ParticleIndices "ParticleIndices"
+               OpName %type__Globals "type.$Globals"
+               OpMemberName %type__Globals 0 "ParticleIndicesOffset"
+               OpName %_Globals "$Globals"
+               OpName %type_2d_image "type.2d.image"
+               OpName %PositionTexture "PositionTexture"
+               OpName %type_sampler "type.sampler"
+               OpName %PositionTextureSampler "PositionTextureSampler"
+               OpName %VelocityTexture "VelocityTexture"
+               OpName %VelocityTextureSampler "VelocityTextureSampler"
+               OpName %AttributesTexture "AttributesTexture"
+               OpName %AttributesTextureSampler "AttributesTextureSampler"
+               OpName %CurveTexture "CurveTexture"
+               OpName %CurveTextureSampler "CurveTextureSampler"
+               OpName %in_var_ATTRIBUTE0 "in.var.ATTRIBUTE0"
+               OpName %out_var_TEXCOORD6 "out.var.TEXCOORD6"
+               OpName %Main "Main"
+               OpName %type_sampled_image "type.sampled.image"
+               OpDecorate %gl_VertexIndex BuiltIn VertexIndex
+               OpDecorateString %gl_VertexIndex UserSemantic "SV_VertexID"
+               OpDecorate %gl_InstanceIndex BuiltIn InstanceIndex
+               OpDecorateString %gl_InstanceIndex UserSemantic "SV_InstanceID"
+               OpDecorateString %in_var_ATTRIBUTE0 UserSemantic "ATTRIBUTE0"
+               OpDecorateString %out_var_TEXCOORD6 UserSemantic "TEXCOORD6"
+               OpDecorate %gl_Position BuiltIn Position
+               OpDecorateString %gl_Position UserSemantic "SV_POSITION"
+               OpDecorate %in_var_ATTRIBUTE0 Location 0
+               OpDecorate %out_var_TEXCOORD6 Location 0
+               OpDecorate %View DescriptorSet 0
+               OpDecorate %View Binding 1
+               OpDecorate %Primitive DescriptorSet 0
+               OpDecorate %Primitive Binding 2
+               OpDecorate %MobileShadowDepthPass DescriptorSet 0
+               OpDecorate %MobileShadowDepthPass Binding 3
+               OpDecorate %EmitterDynamicUniforms DescriptorSet 0
+               OpDecorate %EmitterDynamicUniforms Binding 4
+               OpDecorate %EmitterUniforms DescriptorSet 0
+               OpDecorate %EmitterUniforms Binding 5
+               OpDecorate %ParticleIndices DescriptorSet 0
+               OpDecorate %ParticleIndices Binding 0
+               OpDecorate %_Globals DescriptorSet 0
+               OpDecorate %_Globals Binding 6
+               OpDecorate %PositionTexture DescriptorSet 0
+               OpDecorate %PositionTexture Binding 1
+               OpDecorate %PositionTextureSampler DescriptorSet 0
+               OpDecorate %PositionTextureSampler Binding 0
+               OpDecorate %VelocityTexture DescriptorSet 0
+               OpDecorate %VelocityTexture Binding 2
+               OpDecorate %VelocityTextureSampler DescriptorSet 0
+               OpDecorate %VelocityTextureSampler Binding 1
+               OpDecorate %AttributesTexture DescriptorSet 0
+               OpDecorate %AttributesTexture Binding 3
+               OpDecorate %AttributesTextureSampler DescriptorSet 0
+               OpDecorate %AttributesTextureSampler Binding 2
+               OpDecorate %CurveTexture DescriptorSet 0
+               OpDecorate %CurveTexture Binding 4
+               OpDecorate %CurveTextureSampler DescriptorSet 0
+               OpDecorate %CurveTextureSampler Binding 3
+               OpDecorate %_arr_v4float_uint_2 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_7 ArrayStride 16
+               OpDecorate %_arr_v4float_uint_4 ArrayStride 16
+               OpMemberDecorate %type_View 0 Offset 0
+               OpMemberDecorate %type_View 0 MatrixStride 16
+               OpMemberDecorate %type_View 0 ColMajor
+               OpMemberDecorate %type_View 1 Offset 64
+               OpMemberDecorate %type_View 1 MatrixStride 16
+               OpMemberDecorate %type_View 1 ColMajor
+               OpMemberDecorate %type_View 2 Offset 128
+               OpMemberDecorate %type_View 2 MatrixStride 16
+               OpMemberDecorate %type_View 2 ColMajor
+               OpMemberDecorate %type_View 3 Offset 192
+               OpMemberDecorate %type_View 3 MatrixStride 16
+               OpMemberDecorate %type_View 3 ColMajor
+               OpMemberDecorate %type_View 4 Offset 256
+               OpMemberDecorate %type_View 4 MatrixStride 16
+               OpMemberDecorate %type_View 4 ColMajor
+               OpMemberDecorate %type_View 5 Offset 320
+               OpMemberDecorate %type_View 5 MatrixStride 16
+               OpMemberDecorate %type_View 5 ColMajor
+               OpMemberDecorate %type_View 6 Offset 384
+               OpMemberDecorate %type_View 6 MatrixStride 16
+               OpMemberDecorate %type_View 6 ColMajor
+               OpMemberDecorate %type_View 7 Offset 448
+               OpMemberDecorate %type_View 7 MatrixStride 16
+               OpMemberDecorate %type_View 7 ColMajor
+               OpMemberDecorate %type_View 8 Offset 512
+               OpMemberDecorate %type_View 8 MatrixStride 16
+               OpMemberDecorate %type_View 8 ColMajor
+               OpMemberDecorate %type_View 9 Offset 576
+               OpMemberDecorate %type_View 9 MatrixStride 16
+               OpMemberDecorate %type_View 9 ColMajor
+               OpMemberDecorate %type_View 10 Offset 640
+               OpMemberDecorate %type_View 10 MatrixStride 16
+               OpMemberDecorate %type_View 10 ColMajor
+               OpMemberDecorate %type_View 11 Offset 704
+               OpMemberDecorate %type_View 11 MatrixStride 16
+               OpMemberDecorate %type_View 11 ColMajor
+               OpMemberDecorate %type_View 12 Offset 768
+               OpMemberDecorate %type_View 12 MatrixStride 16
+               OpMemberDecorate %type_View 12 ColMajor
+               OpMemberDecorate %type_View 13 Offset 832
+               OpMemberDecorate %type_View 14 Offset 844
+               OpMemberDecorate %type_View 15 Offset 848
+               OpMemberDecorate %type_View 16 Offset 860
+               OpMemberDecorate %type_View 17 Offset 864
+               OpMemberDecorate %type_View 18 Offset 876
+               OpMemberDecorate %type_View 19 Offset 880
+               OpMemberDecorate %type_View 20 Offset 892
+               OpMemberDecorate %type_View 21 Offset 896
+               OpMemberDecorate %type_View 22 Offset 908
+               OpMemberDecorate %type_View 23 Offset 912
+               OpMemberDecorate %type_View 24 Offset 928
+               OpMemberDecorate %type_View 25 Offset 944
+               OpMemberDecorate %type_View 26 Offset 956
+               OpMemberDecorate %type_View 27 Offset 960
+               OpMemberDecorate %type_View 28 Offset 972
+               OpMemberDecorate %type_View 29 Offset 976
+               OpMemberDecorate %type_View 30 Offset 988
+               OpMemberDecorate %type_View 31 Offset 992
+               OpMemberDecorate %type_View 32 Offset 1004
+               OpMemberDecorate %type_View 33 Offset 1008
+               OpMemberDecorate %type_View 33 MatrixStride 16
+               OpMemberDecorate %type_View 33 ColMajor
+               OpMemberDecorate %type_View 34 Offset 1072
+               OpMemberDecorate %type_View 34 MatrixStride 16
+               OpMemberDecorate %type_View 34 ColMajor
+               OpMemberDecorate %type_View 35 Offset 1136
+               OpMemberDecorate %type_View 35 MatrixStride 16
+               OpMemberDecorate %type_View 35 ColMajor
+               OpMemberDecorate %type_View 36 Offset 1200
+               OpMemberDecorate %type_View 36 MatrixStride 16
+               OpMemberDecorate %type_View 36 ColMajor
+               OpMemberDecorate %type_View 37 Offset 1264
+               OpMemberDecorate %type_View 37 MatrixStride 16
+               OpMemberDecorate %type_View 37 ColMajor
+               OpMemberDecorate %type_View 38 Offset 1328
+               OpMemberDecorate %type_View 38 MatrixStride 16
+               OpMemberDecorate %type_View 38 ColMajor
+               OpMemberDecorate %type_View 39 Offset 1392
+               OpMemberDecorate %type_View 39 MatrixStride 16
+               OpMemberDecorate %type_View 39 ColMajor
+               OpMemberDecorate %type_View 40 Offset 1456
+               OpMemberDecorate %type_View 40 MatrixStride 16
+               OpMemberDecorate %type_View 40 ColMajor
+               OpMemberDecorate %type_View 41 Offset 1520
+               OpMemberDecorate %type_View 41 MatrixStride 16
+               OpMemberDecorate %type_View 41 ColMajor
+               OpMemberDecorate %type_View 42 Offset 1584
+               OpMemberDecorate %type_View 42 MatrixStride 16
+               OpMemberDecorate %type_View 42 ColMajor
+               OpMemberDecorate %type_View 43 Offset 1648
+               OpMemberDecorate %type_View 44 Offset 1660
+               OpMemberDecorate %type_View 45 Offset 1664
+               OpMemberDecorate %type_View 46 Offset 1676
+               OpMemberDecorate %type_View 47 Offset 1680
+               OpMemberDecorate %type_View 48 Offset 1692
+               OpMemberDecorate %type_View 49 Offset 1696
+               OpMemberDecorate %type_View 49 MatrixStride 16
+               OpMemberDecorate %type_View 49 ColMajor
+               OpMemberDecorate %type_View 50 Offset 1760
+               OpMemberDecorate %type_View 50 MatrixStride 16
+               OpMemberDecorate %type_View 50 ColMajor
+               OpMemberDecorate %type_View 51 Offset 1824
+               OpMemberDecorate %type_View 51 MatrixStride 16
+               OpMemberDecorate %type_View 51 ColMajor
+               OpMemberDecorate %type_View 52 Offset 1888
+               OpMemberDecorate %type_View 53 Offset 1904
+               OpMemberDecorate %type_View 54 Offset 1920
+               OpMemberDecorate %type_View 55 Offset 1928
+               OpMemberDecorate %type_View 56 Offset 1936
+               OpMemberDecorate %type_View 57 Offset 1952
+               OpMemberDecorate %type_View 58 Offset 1968
+               OpMemberDecorate %type_View 59 Offset 1984
+               OpMemberDecorate %type_View 60 Offset 2000
+               OpMemberDecorate %type_View 61 Offset 2004
+               OpMemberDecorate %type_View 62 Offset 2008
+               OpMemberDecorate %type_View 63 Offset 2012
+               OpMemberDecorate %type_View 64 Offset 2016
+               OpMemberDecorate %type_View 65 Offset 2032
+               OpMemberDecorate %type_View 66 Offset 2048
+               OpMemberDecorate %type_View 67 Offset 2064
+               OpMemberDecorate %type_View 68 Offset 2072
+               OpMemberDecorate %type_View 69 Offset 2076
+               OpMemberDecorate %type_View 70 Offset 2080
+               OpMemberDecorate %type_View 71 Offset 2084
+               OpMemberDecorate %type_View 72 Offset 2088
+               OpMemberDecorate %type_View 73 Offset 2092
+               OpMemberDecorate %type_View 74 Offset 2096
+               OpMemberDecorate %type_View 75 Offset 2108
+               OpMemberDecorate %type_View 76 Offset 2112
+               OpMemberDecorate %type_View 77 Offset 2116
+               OpMemberDecorate %type_View 78 Offset 2120
+               OpMemberDecorate %type_View 79 Offset 2124
+               OpMemberDecorate %type_View 80 Offset 2128
+               OpMemberDecorate %type_View 81 Offset 2132
+               OpMemberDecorate %type_View 82 Offset 2136
+               OpMemberDecorate %type_View 83 Offset 2140
+               OpMemberDecorate %type_View 84 Offset 2144
+               OpMemberDecorate %type_View 85 Offset 2148
+               OpMemberDecorate %type_View 86 Offset 2152
+               OpMemberDecorate %type_View 87 Offset 2156
+               OpMemberDecorate %type_View 88 Offset 2160
+               OpMemberDecorate %type_View 89 Offset 2164
+               OpMemberDecorate %type_View 90 Offset 2168
+               OpMemberDecorate %type_View 91 Offset 2172
+               OpMemberDecorate %type_View 92 Offset 2176
+               OpMemberDecorate %type_View 93 Offset 2192
+               OpMemberDecorate %type_View 94 Offset 2204
+               OpMemberDecorate %type_View 95 Offset 2208
+               OpMemberDecorate %type_View 96 Offset 2240
+               OpMemberDecorate %type_View 97 Offset 2272
+               OpMemberDecorate %type_View 98 Offset 2288
+               OpMemberDecorate %type_View 99 Offset 2304
+               OpMemberDecorate %type_View 100 Offset 2308
+               OpMemberDecorate %type_View 101 Offset 2312
+               OpMemberDecorate %type_View 102 Offset 2316
+               OpMemberDecorate %type_View 103 Offset 2320
+               OpMemberDecorate %type_View 104 Offset 2324
+               OpMemberDecorate %type_View 105 Offset 2328
+               OpMemberDecorate %type_View 106 Offset 2332
+               OpMemberDecorate %type_View 107 Offset 2336
+               OpMemberDecorate %type_View 108 Offset 2340
+               OpMemberDecorate %type_View 109 Offset 2344
+               OpMemberDecorate %type_View 110 Offset 2348
+               OpMemberDecorate %type_View 111 Offset 2352
+               OpMemberDecorate %type_View 112 Offset 2364
+               OpMemberDecorate %type_View 113 Offset 2368
+               OpMemberDecorate %type_View 114 Offset 2380
+               OpMemberDecorate %type_View 115 Offset 2384
+               OpMemberDecorate %type_View 116 Offset 2388
+               OpMemberDecorate %type_View 117 Offset 2392
+               OpMemberDecorate %type_View 118 Offset 2396
+               OpMemberDecorate %type_View 119 Offset 2400
+               OpMemberDecorate %type_View 120 Offset 2404
+               OpMemberDecorate %type_View 121 Offset 2408
+               OpMemberDecorate %type_View 122 Offset 2412
+               OpMemberDecorate %type_View 123 Offset 2416
+               OpMemberDecorate %type_View 124 Offset 2420
+               OpMemberDecorate %type_View 125 Offset 2424
+               OpMemberDecorate %type_View 126 Offset 2428
+               OpMemberDecorate %type_View 127 Offset 2432
+               OpMemberDecorate %type_View 128 Offset 2448
+               OpMemberDecorate %type_View 129 Offset 2460
+               OpMemberDecorate %type_View 130 Offset 2464
+               OpMemberDecorate %type_View 131 Offset 2480
+               OpMemberDecorate %type_View 132 Offset 2484
+               OpMemberDecorate %type_View 133 Offset 2488
+               OpMemberDecorate %type_View 134 Offset 2492
+               OpMemberDecorate %type_View 135 Offset 2496
+               OpMemberDecorate %type_View 136 Offset 2512
+               OpMemberDecorate %type_View 137 Offset 2624
+               OpMemberDecorate %type_View 138 Offset 2628
+               OpMemberDecorate %type_View 139 Offset 2632
+               OpMemberDecorate %type_View 140 Offset 2636
+               OpMemberDecorate %type_View 141 Offset 2640
+               OpMemberDecorate %type_View 142 Offset 2644
+               OpMemberDecorate %type_View 143 Offset 2648
+               OpMemberDecorate %type_View 144 Offset 2652
+               OpMemberDecorate %type_View 145 Offset 2656
+               OpMemberDecorate %type_View 146 Offset 2668
+               OpMemberDecorate %type_View 147 Offset 2672
+               OpMemberDecorate %type_View 148 Offset 2736
+               OpMemberDecorate %type_View 149 Offset 2800
+               OpMemberDecorate %type_View 150 Offset 2804
+               OpMemberDecorate %type_View 151 Offset 2808
+               OpMemberDecorate %type_View 152 Offset 2812
+               OpMemberDecorate %type_View 153 Offset 2816
+               OpMemberDecorate %type_View 154 Offset 2828
+               OpMemberDecorate %type_View 155 Offset 2832
+               OpMemberDecorate %type_View 156 Offset 2844
+               OpMemberDecorate %type_View 157 Offset 2848
+               OpMemberDecorate %type_View 158 Offset 2856
+               OpMemberDecorate %type_View 159 Offset 2860
+               OpMemberDecorate %type_View 160 Offset 2864
+               OpMemberDecorate %type_View 161 Offset 2876
+               OpMemberDecorate %type_View 162 Offset 2880
+               OpMemberDecorate %type_View 163 Offset 2892
+               OpMemberDecorate %type_View 164 Offset 2896
+               OpMemberDecorate %type_View 165 Offset 2908
+               OpMemberDecorate %type_View 166 Offset 2912
+               OpMemberDecorate %type_View 167 Offset 2924
+               OpMemberDecorate %type_View 168 Offset 2928
+               OpMemberDecorate %type_View 169 Offset 2932
+               OpDecorate %type_View Block
+               OpMemberDecorate %type_Primitive 0 Offset 0
+               OpMemberDecorate %type_Primitive 0 MatrixStride 16
+               OpMemberDecorate %type_Primitive 0 ColMajor
+               OpMemberDecorate %type_Primitive 1 Offset 64
+               OpMemberDecorate %type_Primitive 2 Offset 80
+               OpMemberDecorate %type_Primitive 3 Offset 96
+               OpMemberDecorate %type_Primitive 3 MatrixStride 16
+               OpMemberDecorate %type_Primitive 3 ColMajor
+               OpMemberDecorate %type_Primitive 4 Offset 160
+               OpMemberDecorate %type_Primitive 4 MatrixStride 16
+               OpMemberDecorate %type_Primitive 4 ColMajor
+               OpMemberDecorate %type_Primitive 5 Offset 224
+               OpMemberDecorate %type_Primitive 5 MatrixStride 16
+               OpMemberDecorate %type_Primitive 5 ColMajor
+               OpMemberDecorate %type_Primitive 6 Offset 288
+               OpMemberDecorate %type_Primitive 7 Offset 300
+               OpMemberDecorate %type_Primitive 8 Offset 304
+               OpMemberDecorate %type_Primitive 9 Offset 316
+               OpMemberDecorate %type_Primitive 10 Offset 320
+               OpMemberDecorate %type_Primitive 11 Offset 324
+               OpMemberDecorate %type_Primitive 12 Offset 328
+               OpMemberDecorate %type_Primitive 13 Offset 332
+               OpMemberDecorate %type_Primitive 14 Offset 336
+               OpMemberDecorate %type_Primitive 15 Offset 352
+               OpMemberDecorate %type_Primitive 16 Offset 368
+               OpMemberDecorate %type_Primitive 17 Offset 380
+               OpMemberDecorate %type_Primitive 18 Offset 384
+               OpMemberDecorate %type_Primitive 19 Offset 396
+               OpMemberDecorate %type_Primitive 20 Offset 400
+               OpMemberDecorate %type_Primitive 21 Offset 404
+               OpDecorate %type_Primitive Block
+               OpDecorate %_arr_mat4v4float_uint_6 ArrayStride 64
+               OpMemberDecorate %type_MobileShadowDepthPass 0 Offset 0
+               OpMemberDecorate %type_MobileShadowDepthPass 1 Offset 4
+               OpMemberDecorate %type_MobileShadowDepthPass 2 Offset 8
+               OpMemberDecorate %type_MobileShadowDepthPass 3 Offset 12
+               OpMemberDecorate %type_MobileShadowDepthPass 4 Offset 16
+               OpMemberDecorate %type_MobileShadowDepthPass 5 Offset 20
+               OpMemberDecorate %type_MobileShadowDepthPass 6 Offset 24
+               OpMemberDecorate %type_MobileShadowDepthPass 7 Offset 28
+               OpMemberDecorate %type_MobileShadowDepthPass 8 Offset 32
+               OpMemberDecorate %type_MobileShadowDepthPass 9 Offset 36
+               OpMemberDecorate %type_MobileShadowDepthPass 10 Offset 40
+               OpMemberDecorate %type_MobileShadowDepthPass 11 Offset 44
+               OpMemberDecorate %type_MobileShadowDepthPass 12 Offset 48
+               OpMemberDecorate %type_MobileShadowDepthPass 13 Offset 52
+               OpMemberDecorate %type_MobileShadowDepthPass 14 Offset 56
+               OpMemberDecorate %type_MobileShadowDepthPass 15 Offset 60
+               OpMemberDecorate %type_MobileShadowDepthPass 16 Offset 64
+               OpMemberDecorate %type_MobileShadowDepthPass 17 Offset 68
+               OpMemberDecorate %type_MobileShadowDepthPass 18 Offset 72
+               OpMemberDecorate %type_MobileShadowDepthPass 19 Offset 76
+               OpMemberDecorate %type_MobileShadowDepthPass 20 Offset 80
+               OpMemberDecorate %type_MobileShadowDepthPass 20 MatrixStride 16
+               OpMemberDecorate %type_MobileShadowDepthPass 20 ColMajor
+               OpMemberDecorate %type_MobileShadowDepthPass 21 Offset 144
+               OpMemberDecorate %type_MobileShadowDepthPass 22 Offset 152
+               OpMemberDecorate %type_MobileShadowDepthPass 23 Offset 156
+               OpMemberDecorate %type_MobileShadowDepthPass 24 Offset 160
+               OpMemberDecorate %type_MobileShadowDepthPass 24 MatrixStride 16
+               OpMemberDecorate %type_MobileShadowDepthPass 24 ColMajor
+               OpDecorate %type_MobileShadowDepthPass Block
+               OpMemberDecorate %type_EmitterDynamicUniforms 0 Offset 0
+               OpMemberDecorate %type_EmitterDynamicUniforms 1 Offset 8
+               OpMemberDecorate %type_EmitterDynamicUniforms 2 Offset 12
+               OpMemberDecorate %type_EmitterDynamicUniforms 3 Offset 16
+               OpMemberDecorate %type_EmitterDynamicUniforms 4 Offset 32
+               OpMemberDecorate %type_EmitterDynamicUniforms 5 Offset 48
+               OpMemberDecorate %type_EmitterDynamicUniforms 6 Offset 64
+               OpDecorate %type_EmitterDynamicUniforms Block
+               OpMemberDecorate %type_EmitterUniforms 0 Offset 0
+               OpMemberDecorate %type_EmitterUniforms 1 Offset 16
+               OpMemberDecorate %type_EmitterUniforms 2 Offset 32
+               OpMemberDecorate %type_EmitterUniforms 3 Offset 48
+               OpMemberDecorate %type_EmitterUniforms 4 Offset 64
+               OpMemberDecorate %type_EmitterUniforms 5 Offset 80
+               OpMemberDecorate %type_EmitterUniforms 6 Offset 96
+               OpMemberDecorate %type_EmitterUniforms 7 Offset 112
+               OpMemberDecorate %type_EmitterUniforms 8 Offset 128
+               OpMemberDecorate %type_EmitterUniforms 9 Offset 144
+               OpMemberDecorate %type_EmitterUniforms 10 Offset 156
+               OpMemberDecorate %type_EmitterUniforms 11 Offset 160
+               OpMemberDecorate %type_EmitterUniforms 12 Offset 164
+               OpMemberDecorate %type_EmitterUniforms 13 Offset 168
+               OpMemberDecorate %type_EmitterUniforms 14 Offset 172
+               OpMemberDecorate %type_EmitterUniforms 15 Offset 176
+               OpDecorate %type_EmitterUniforms Block
+               OpMemberDecorate %type__Globals 0 Offset 0
+               OpDecorate %type__Globals Block
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%mat4v4float = OpTypeMatrix %v4float 4
+    %v3float = OpTypeVector %float 3
+    %v2float = OpTypeVector %float 2
+        %int = OpTypeInt 32 1
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+     %uint_7 = OpConstant %uint 7
+     %uint_4 = OpConstant %uint 4
+    %float_0 = OpConstant %float 0
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+    %uint_16 = OpConstant %uint 16
+      %int_3 = OpConstant %int 3
+    %float_1 = OpConstant %float 1
+%float_9_99999975en05 = OpConstant %float 9.99999975e-05
+         %54 = OpConstantComposite %v3float %float_0 %float_0 %float_9_99999975en05
+      %int_2 = OpConstant %int 2
+      %int_5 = OpConstant %int 5
+      %int_4 = OpConstant %int 4
+  %float_0_5 = OpConstant %float 0.5
+ %float_n0_5 = OpConstant %float -0.5
+    %float_2 = OpConstant %float 2
+         %61 = OpConstantComposite %v2float %float_2 %float_2
+      %int_6 = OpConstant %int 6
+         %63 = OpConstantComposite %v2float %float_1 %float_1
+     %int_11 = OpConstant %int 11
+     %int_15 = OpConstant %int 15
+      %int_8 = OpConstant %int 8
+      %int_9 = OpConstant %int 9
+     %int_10 = OpConstant %int 10
+     %int_12 = OpConstant %int 12
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+%mat3v3float = OpTypeMatrix %v3float 3
+     %int_20 = OpConstant %int 20
+     %int_22 = OpConstant %int 22
+%float_9_99999997en07 = OpConstant %float 9.99999997e-07
+     %int_21 = OpConstant %int 21
+     %int_17 = OpConstant %int 17
+     %int_19 = OpConstant %int 19
+     %int_27 = OpConstant %int 27
+     %int_31 = OpConstant %int 31
+     %uint_3 = OpConstant %uint 3
+         %82 = OpConstantComposite %v3float %float_0 %float_0 %float_1
+%float_0_00999999978 = OpConstant %float 0.00999999978
+%_arr_v4float_uint_2 = OpTypeArray %v4float %uint_2
+%_arr_v4float_uint_7 = OpTypeArray %v4float %uint_7
+%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
+  %type_View = OpTypeStruct %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %v3float %float %v3float %float %v4float %v4float %v3float %float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %v3float %float %mat4v4float %mat4v4float %mat4v4float %v4float %v4float %v2float %v2float %v4float %v4float %v4float %v4float %int %float %float %float %v4float %v4float %v4float %v2float %float %float %float %float %float %float %v3float %float %float %float %float %float %float %float %float %uint %uint %uint %uint %float %float %float %float %float %v4float %v3float %float %_arr_v4float_uint_2 %_arr_v4float_uint_2 %v4float %v4float %float %float %float %float %float %float %float %float %float %float %float %float %v3float %float %v3float %float %float %float %float %float %float %float %float %float %float %float %uint %uint %v4float %v3float %float %v4float %float %float %float %float %v4float %_arr_v4float_uint_7 %float %float %float %float %uint %float %float %float %v3float %int %_arr_v4float_uint_4 %_arr_v4float_uint_4 %float %float %float %float %v3float %float %v3float %float %v2float %float %float %v3float %float %v3float %float %v3float %float %v3float %float %float %float
+%_ptr_Uniform_type_View = OpTypePointer Uniform %type_View
+%type_Primitive = OpTypeStruct %mat4v4float %v4float %v4float %mat4v4float %mat4v4float %mat4v4float %v3float %float %v3float %float %float %float %float %float %v4float %v4float %v3float %float %v3float %uint %uint %int
+%_ptr_Uniform_type_Primitive = OpTypePointer Uniform %type_Primitive
+     %uint_6 = OpConstant %uint 6
+%_arr_mat4v4float_uint_6 = OpTypeArray %mat4v4float %uint_6
+%type_MobileShadowDepthPass = OpTypeStruct %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %float %mat4v4float %v2float %float %float %_arr_mat4v4float_uint_6
+%_ptr_Uniform_type_MobileShadowDepthPass = OpTypePointer Uniform %type_MobileShadowDepthPass
+%type_EmitterDynamicUniforms = OpTypeStruct %v2float %float %float %v4float %v4float %v4float %v4float
+%_ptr_Uniform_type_EmitterDynamicUniforms = OpTypePointer Uniform %type_EmitterDynamicUniforms
+%type_EmitterUniforms = OpTypeStruct %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v4float %v3float %float %float %float %float %float %v2float
+%_ptr_Uniform_type_EmitterUniforms = OpTypePointer Uniform %type_EmitterUniforms
+%type_buffer_image = OpTypeImage %float Buffer 2 0 0 1 Rg32f
+%_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image
+%type__Globals = OpTypeStruct %uint
+%_ptr_Uniform_type__Globals = OpTypePointer Uniform %type__Globals
+%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+%type_sampler = OpTypeSampler
+%_ptr_UniformConstant_type_sampler = OpTypePointer UniformConstant %type_sampler
+%_ptr_Input_uint = OpTypePointer Input %uint
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%_ptr_Output_float = OpTypePointer Output %float
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+       %void = OpTypeVoid
+         %99 = OpTypeFunction %void
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+       %bool = OpTypeBool
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform_mat4v4float = OpTypePointer Uniform %mat4v4float
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+%type_sampled_image = OpTypeSampledImage %type_2d_image
+       %View = OpVariable %_ptr_Uniform_type_View Uniform
+  %Primitive = OpVariable %_ptr_Uniform_type_Primitive Uniform
+%MobileShadowDepthPass = OpVariable %_ptr_Uniform_type_MobileShadowDepthPass Uniform
+%EmitterDynamicUniforms = OpVariable %_ptr_Uniform_type_EmitterDynamicUniforms Uniform
+%EmitterUniforms = OpVariable %_ptr_Uniform_type_EmitterUniforms Uniform
+%ParticleIndices = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant
+   %_Globals = OpVariable %_ptr_Uniform_type__Globals Uniform
+%PositionTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%PositionTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%VelocityTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%VelocityTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%AttributesTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%AttributesTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%CurveTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%CurveTextureSampler = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
+%gl_VertexIndex = OpVariable %_ptr_Input_uint Input
+%gl_InstanceIndex = OpVariable %_ptr_Input_uint Input
+%in_var_ATTRIBUTE0 = OpVariable %_ptr_Input_v2float Input
+%out_var_TEXCOORD6 = OpVariable %_ptr_Output_float Output
+%gl_Position = OpVariable %_ptr_Output_v4float Output
+%float_6_28318548 = OpConstant %float 6.28318548
+        %108 = OpConstantNull %v3float
+       %Main = OpFunction %void None %99
+        %109 = OpLabel
+        %110 = OpLoad %uint %gl_VertexIndex
+        %111 = OpLoad %uint %gl_InstanceIndex
+        %112 = OpLoad %v2float %in_var_ATTRIBUTE0
+        %113 = OpAccessChain %_ptr_Uniform_v3float %View %int_15
+        %114 = OpLoad %v3float %113
+        %115 = OpAccessChain %_ptr_Uniform_v3float %View %int_17
+        %116 = OpLoad %v3float %115
+        %117 = OpAccessChain %_ptr_Uniform_v3float %View %int_19
+        %118 = OpLoad %v3float %117
+        %119 = OpAccessChain %_ptr_Uniform_v3float %View %int_21
+        %120 = OpLoad %v3float %119
+        %121 = OpAccessChain %_ptr_Uniform_v3float %View %int_27
+        %122 = OpLoad %v3float %121
+        %123 = OpAccessChain %_ptr_Uniform_v3float %View %int_31
+        %124 = OpLoad %v3float %123
+        %125 = OpIMul %uint %111 %uint_16
+        %126 = OpUDiv %uint %110 %uint_4
+        %127 = OpIAdd %uint %125 %126
+        %128 = OpAccessChain %_ptr_Uniform_uint %_Globals %int_0
+        %129 = OpLoad %uint %128
+        %130 = OpIAdd %uint %129 %127
+        %131 = OpLoad %type_buffer_image %ParticleIndices
+        %132 = OpImageFetch %v4float %131 %130 None
+        %133 = OpVectorShuffle %v2float %132 %132 0 1
+        %134 = OpLoad %type_2d_image %PositionTexture
+        %135 = OpLoad %type_sampler %PositionTextureSampler
+        %136 = OpSampledImage %type_sampled_image %134 %135
+        %137 = OpImageSampleExplicitLod %v4float %136 %133 Lod %float_0
+        %138 = OpLoad %type_2d_image %VelocityTexture
+        %139 = OpLoad %type_sampler %VelocityTextureSampler
+        %140 = OpSampledImage %type_sampled_image %138 %139
+        %141 = OpImageSampleExplicitLod %v4float %140 %133 Lod %float_0
+        %142 = OpLoad %type_2d_image %AttributesTexture
+        %143 = OpLoad %type_sampler %AttributesTextureSampler
+        %144 = OpSampledImage %type_sampled_image %142 %143
+        %145 = OpImageSampleExplicitLod %v4float %144 %133 Lod %float_0
+        %146 = OpCompositeExtract %float %137 3
+        %147 = OpExtInst %float %1 Step %146 %float_1
+        %148 = OpVectorShuffle %v3float %141 %141 0 1 2
+        %149 = OpAccessChain %_ptr_Uniform_mat4v4float %Primitive %int_0
+        %150 = OpLoad %mat4v4float %149
+        %151 = OpCompositeExtract %v4float %150 0
+        %152 = OpVectorShuffle %v3float %151 %151 0 1 2
+        %153 = OpCompositeExtract %v4float %150 1
+        %154 = OpVectorShuffle %v3float %153 %153 0 1 2
+        %155 = OpCompositeExtract %v4float %150 2
+        %156 = OpVectorShuffle %v3float %155 %155 0 1 2
+        %157 = OpCompositeConstruct %mat3v3float %152 %154 %156
+        %158 = OpMatrixTimesVector %v3float %157 %148
+        %159 = OpFAdd %v3float %158 %54
+        %160 = OpExtInst %v3float %1 Normalize %159
+        %161 = OpExtInst %float %1 Length %158
+        %162 = OpAccessChain %_ptr_Uniform_v4float %EmitterUniforms %int_3
+        %163 = OpLoad %v4float %162
+        %164 = OpVectorShuffle %v2float %163 %163 0 1
+        %165 = OpVectorShuffle %v2float %163 %163 2 3
+        %166 = OpCompositeConstruct %v2float %146 %146
+        %167 = OpFMul %v2float %165 %166
+        %168 = OpFAdd %v2float %164 %167
+        %169 = OpLoad %type_2d_image %CurveTexture
+        %170 = OpLoad %type_sampler %CurveTextureSampler
+        %171 = OpSampledImage %type_sampled_image %169 %170
+        %172 = OpImageSampleExplicitLod %v4float %171 %168 Lod %float_0
+        %173 = OpAccessChain %_ptr_Uniform_v4float %EmitterUniforms %int_4
+        %174 = OpLoad %v4float %173
+        %175 = OpFMul %v4float %172 %174
+        %176 = OpAccessChain %_ptr_Uniform_v4float %EmitterUniforms %int_5
+        %177 = OpLoad %v4float %176
+        %178 = OpFAdd %v4float %175 %177
+        %179 = OpCompositeExtract %float %145 0
+        %180 = OpFOrdLessThan %bool %179 %float_0_5
+        %181 = OpSelect %float %180 %float_0 %float_n0_5
+        %182 = OpCompositeExtract %float %145 1
+        %183 = OpFOrdLessThan %bool %182 %float_0_5
+        %184 = OpSelect %float %183 %float_0 %float_n0_5
+        %185 = OpCompositeConstruct %v2float %181 %184
+        %186 = OpVectorShuffle %v2float %145 %145 0 1
+        %187 = OpFAdd %v2float %186 %185
+        %188 = OpFMul %v2float %187 %61
+        %189 = OpVectorShuffle %v2float %178 %178 0 1
+        %190 = OpAccessChain %_ptr_Uniform_v2float %EmitterDynamicUniforms %int_0
+        %191 = OpLoad %v2float %190
+        %192 = OpFMul %v2float %189 %191
+        %193 = OpAccessChain %_ptr_Uniform_v4float %EmitterUniforms %int_6
+        %194 = OpLoad %v4float %193
+        %195 = OpVectorShuffle %v2float %194 %194 0 1
+        %196 = OpCompositeConstruct %v2float %161 %161
+        %197 = OpFMul %v2float %195 %196
+        %198 = OpExtInst %v2float %1 FMax %197 %63
+        %199 = OpVectorShuffle %v2float %194 %194 2 3
+        %200 = OpExtInst %v2float %1 FMin %198 %199
+        %201 = OpFMul %v2float %188 %192
+        %202 = OpFMul %v2float %201 %200
+        %203 = OpCompositeConstruct %v2float %147 %147
+        %204 = OpFMul %v2float %202 %203
+        %205 = OpCompositeExtract %float %145 3
+        %206 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_11
+        %207 = OpLoad %float %206
+        %208 = OpFMul %float %205 %207
+        %209 = OpCompositeExtract %float %145 2
+        %210 = OpFMul %float %208 %146
+        %211 = OpFAdd %float %209 %210
+        %212 = OpFMul %float %211 %float_6_28318548
+        %213 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_0
+        %214 = OpLoad %v4float %213
+        %215 = OpVectorShuffle %v3float %214 %214 0 1 2
+        %216 = OpVectorShuffle %v3float %137 %108 0 0 0
+        %217 = OpFMul %v3float %215 %216
+        %218 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_1
+        %219 = OpLoad %v4float %218
+        %220 = OpVectorShuffle %v3float %219 %219 0 1 2
+        %221 = OpVectorShuffle %v3float %137 %108 1 1 1
+        %222 = OpFMul %v3float %220 %221
+        %223 = OpFAdd %v3float %217 %222
+        %224 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_2
+        %225 = OpLoad %v4float %224
+        %226 = OpVectorShuffle %v3float %225 %225 0 1 2
+        %227 = OpVectorShuffle %v3float %137 %108 2 2 2
+        %228 = OpFMul %v3float %226 %227
+        %229 = OpFAdd %v3float %223 %228
+        %230 = OpAccessChain %_ptr_Uniform_v4float %Primitive %int_0 %uint_3
+        %231 = OpLoad %v4float %230
+        %232 = OpVectorShuffle %v3float %231 %231 0 1 2
+        %233 = OpFAdd %v3float %232 %124
+        %234 = OpFAdd %v3float %229 %233
+        %235 = OpCompositeExtract %float %234 0
+        %236 = OpCompositeExtract %float %234 1
+        %237 = OpCompositeExtract %float %234 2
+        %238 = OpCompositeConstruct %v4float %235 %236 %237 %float_1
+        %239 = OpVectorShuffle %v3float %238 %238 0 1 2
+        %240 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_10
+        %241 = OpLoad %float %240
+        %242 = OpCompositeConstruct %v3float %241 %241 %241
+        %243 = OpExtInst %v3float %1 FMix %116 %120 %242
+        %244 = OpExtInst %v3float %1 FMix %114 %118 %242
+        %245 = OpAccessChain %_ptr_Uniform_v4float %EmitterDynamicUniforms %int_3
+        %246 = OpLoad %v4float %245
+        %247 = OpVectorShuffle %v3float %246 %246 0 1 2
+        %248 = OpAccessChain %_ptr_Uniform_float %EmitterDynamicUniforms %int_3 %int_3
+        %249 = OpLoad %float %248
+        %250 = OpCompositeConstruct %v3float %249 %249 %249
+        %251 = OpExtInst %v3float %1 FMix %243 %247 %250
+        %252 = OpFNegate %v3float %244
+        %253 = OpAccessChain %_ptr_Uniform_v4float %EmitterDynamicUniforms %int_4
+        %254 = OpLoad %v4float %253
+        %255 = OpVectorShuffle %v3float %254 %254 0 1 2
+        %256 = OpAccessChain %_ptr_Uniform_float %EmitterDynamicUniforms %int_4 %int_3
+        %257 = OpLoad %float %256
+        %258 = OpCompositeConstruct %v3float %257 %257 %257
+        %259 = OpExtInst %v3float %1 FMix %252 %255 %258
+        %260 = OpFSub %v3float %122 %239
+        %261 = OpDot %float %260 %260
+        %262 = OpExtInst %float %1 FMax %261 %float_0_00999999978
+        %263 = OpExtInst %float %1 Sqrt %262
+        %264 = OpCompositeConstruct %v3float %263 %263 %263
+        %265 = OpFDiv %v3float %260 %264
+        %266 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_9 %int_0
+        %267 = OpLoad %float %266
+        %268 = OpFOrdGreaterThan %bool %267 %float_0
+               OpSelectionMerge %269 DontFlatten
+               OpBranchConditional %268 %270 %271
+        %270 = OpLabel
+        %272 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_9 %int_1
+        %273 = OpLoad %float %272
+        %274 = OpFMul %float %261 %273
+        %275 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_9 %int_2
+        %276 = OpLoad %float %275
+        %277 = OpFSub %float %274 %276
+        %278 = OpExtInst %float %1 FClamp %277 %float_0 %float_1
+        %279 = OpExtInst %v3float %1 Cross %265 %82
+        %280 = OpDot %float %279 %279
+        %281 = OpExtInst %float %1 FMax %280 %float_0_00999999978
+        %282 = OpExtInst %float %1 Sqrt %281
+        %283 = OpCompositeConstruct %v3float %282 %282 %282
+        %284 = OpFDiv %v3float %279 %283
+        %285 = OpExtInst %v3float %1 Cross %265 %284
+        %286 = OpCompositeConstruct %v3float %278 %278 %278
+        %287 = OpExtInst %v3float %1 FMix %251 %284 %286
+        %288 = OpExtInst %v3float %1 Normalize %287
+        %289 = OpExtInst %v3float %1 FMix %259 %285 %286
+        %290 = OpExtInst %v3float %1 Normalize %289
+               OpBranch %269
+        %271 = OpLabel
+        %291 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_8 %int_1
+        %292 = OpLoad %float %291
+        %293 = OpFOrdGreaterThan %bool %292 %float_0
+               OpSelectionMerge %294 Flatten
+               OpBranchConditional %293 %295 %296
+        %295 = OpLabel
+        %297 = OpExtInst %v3float %1 Cross %265 %160
+        %298 = OpDot %float %297 %297
+        %299 = OpExtInst %float %1 FMax %298 %float_0_00999999978
+        %300 = OpExtInst %float %1 Sqrt %299
+        %301 = OpCompositeConstruct %v3float %300 %300 %300
+        %302 = OpFDiv %v3float %297 %301
+        %303 = OpFNegate %v3float %160
+               OpBranch %294
+        %296 = OpLabel
+        %304 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_8 %int_2
+        %305 = OpLoad %float %304
+        %306 = OpFOrdGreaterThan %bool %305 %float_0
+               OpSelectionMerge %307 None
+               OpBranchConditional %306 %308 %309
+        %308 = OpLabel
+        %310 = OpExtInst %v3float %1 Cross %247 %265
+        %311 = OpDot %float %310 %310
+        %312 = OpExtInst %float %1 FMax %311 %float_0_00999999978
+        %313 = OpExtInst %float %1 Sqrt %312
+        %314 = OpCompositeConstruct %v3float %313 %313 %313
+        %315 = OpFDiv %v3float %310 %314
+        %316 = OpFNegate %v3float %315
+               OpBranch %307
+        %309 = OpLabel
+        %317 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_8 %int_3
+        %318 = OpLoad %float %317
+        %319 = OpFOrdGreaterThan %bool %318 %float_0
+               OpSelectionMerge %320 None
+               OpBranchConditional %319 %321 %320
+        %321 = OpLabel
+        %322 = OpExtInst %v3float %1 Cross %265 %82
+        %323 = OpDot %float %322 %322
+        %324 = OpExtInst %float %1 FMax %323 %float_0_00999999978
+        %325 = OpExtInst %float %1 Sqrt %324
+        %326 = OpCompositeConstruct %v3float %325 %325 %325
+        %327 = OpFDiv %v3float %322 %326
+        %328 = OpExtInst %v3float %1 Cross %265 %327
+               OpBranch %320
+        %320 = OpLabel
+        %329 = OpPhi %v3float %251 %309 %327 %321
+        %330 = OpPhi %v3float %259 %309 %328 %321
+               OpBranch %307
+        %307 = OpLabel
+        %331 = OpPhi %v3float %247 %308 %329 %320
+        %332 = OpPhi %v3float %316 %308 %330 %320
+               OpBranch %294
+        %294 = OpLabel
+        %333 = OpPhi %v3float %302 %295 %331 %307
+        %334 = OpPhi %v3float %303 %295 %332 %307
+               OpBranch %269
+        %269 = OpLabel
+        %335 = OpPhi %v3float %288 %270 %333 %294
+        %336 = OpPhi %v3float %290 %270 %334 %294
+        %337 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_12
+        %338 = OpLoad %float %337
+        %339 = OpFAdd %float %212 %338
+        %340 = OpExtInst %float %1 Sin %339
+        %341 = OpExtInst %float %1 Cos %339
+        %342 = OpCompositeConstruct %v3float %340 %340 %340
+        %343 = OpFMul %v3float %342 %336
+        %344 = OpCompositeConstruct %v3float %341 %341 %341
+        %345 = OpFMul %v3float %344 %335
+        %346 = OpFAdd %v3float %343 %345
+        %347 = OpFMul %v3float %344 %336
+        %348 = OpFMul %v3float %342 %335
+        %349 = OpFSub %v3float %347 %348
+        %350 = OpCompositeExtract %float %204 0
+        %351 = OpCompositeExtract %float %112 0
+        %352 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_15 %int_0
+        %353 = OpLoad %float %352
+        %354 = OpFAdd %float %351 %353
+        %355 = OpFMul %float %350 %354
+        %356 = OpCompositeConstruct %v3float %355 %355 %355
+        %357 = OpFMul %v3float %356 %346
+        %358 = OpCompositeExtract %float %204 1
+        %359 = OpCompositeExtract %float %112 1
+        %360 = OpAccessChain %_ptr_Uniform_float %EmitterUniforms %int_15 %int_1
+        %361 = OpLoad %float %360
+        %362 = OpFAdd %float %359 %361
+        %363 = OpFMul %float %358 %362
+        %364 = OpCompositeConstruct %v3float %363 %363 %363
+        %365 = OpFMul %v3float %364 %349
+        %366 = OpFAdd %v3float %357 %365
+        %367 = OpFAdd %v3float %239 %366
+        %368 = OpCompositeExtract %float %367 0
+        %369 = OpCompositeExtract %float %367 1
+        %370 = OpCompositeExtract %float %367 2
+        %371 = OpCompositeConstruct %v4float %368 %369 %370 %float_1
+        %372 = OpVectorShuffle %v4float %371 %371 4 5 6 3
+        %373 = OpAccessChain %_ptr_Uniform_mat4v4float %MobileShadowDepthPass %int_20
+        %374 = OpLoad %mat4v4float %373
+        %375 = OpMatrixTimesVector %v4float %374 %372
+        %376 = OpAccessChain %_ptr_Uniform_float %MobileShadowDepthPass %int_22
+        %377 = OpLoad %float %376
+        %378 = OpFOrdGreaterThan %bool %377 %float_0
+        %379 = OpCompositeExtract %float %375 2
+        %380 = OpFOrdLessThan %bool %379 %float_0
+        %381 = OpLogicalAnd %bool %378 %380
+               OpSelectionMerge %382 None
+               OpBranchConditional %381 %383 %382
+        %383 = OpLabel
+        %384 = OpCompositeInsert %v4float %float_9_99999997en07 %375 2
+        %385 = OpCompositeInsert %v4float %float_1 %384 3
+               OpBranch %382
+        %382 = OpLabel
+        %386 = OpPhi %v4float %375 %269 %385 %383
+        %387 = OpAccessChain %_ptr_Uniform_float %MobileShadowDepthPass %int_21 %int_0
+        %388 = OpLoad %float %387
+        %389 = OpAccessChain %_ptr_Uniform_float %MobileShadowDepthPass %int_21 %int_1
+        %390 = OpLoad %float %389
+        %391 = OpCompositeExtract %float %386 2
+        %392 = OpFMul %float %391 %390
+        %393 = OpFAdd %float %392 %388
+        %394 = OpCompositeExtract %float %386 3
+        %395 = OpFMul %float %393 %394
+        %396 = OpCompositeInsert %v4float %395 %386 2
+               OpStore %out_var_TEXCOORD6 %float_0
+               OpStore %gl_Position %396
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/bitcast_icmp.asm.comp b/shaders/asm/comp/bitcast_icmp.asm.comp
new file mode 100644
index 00000000000..b7b4e0b2e1e
--- /dev/null
+++ b/shaders/asm/comp/bitcast_icmp.asm.comp
@@ -0,0 +1,101 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 1
+; Bound: 30
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %func "main"
+               OpExecutionMode %func LocalSize 1 1 1
+               OpSource ESSL 310
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpMemberDecorate %input_struct 0 Offset 0
+               OpMemberDecorate %input_struct 1 Offset 16
+               OpMemberDecorate %output_struct 0 Offset 0
+               OpMemberDecorate %output_struct 1 Offset 16
+               OpDecorate %input_struct BufferBlock
+               OpDecorate %inputs DescriptorSet 0
+               OpDecorate %inputs Binding 0
+			   OpDecorate %inputs Restrict
+               OpDecorate %output_struct BufferBlock
+               OpDecorate %outputs DescriptorSet 0
+               OpDecorate %outputs Binding 1
+			   OpDecorate %outputs Restrict
+
+          %void = OpTypeVoid
+          %main_func = OpTypeFunction %void
+
+		  %bool = OpTypeBool
+		  %bvec4 = OpTypeVector %bool 4
+
+          %uint = OpTypeInt 32 0
+          %uvec4 = OpTypeVector %uint 4
+
+         %int = OpTypeInt 32 1
+         %ivec4 = OpTypeVector %int 4
+
+         %ivec4_ptr = OpTypePointer Uniform %ivec4
+         %uvec4_ptr = OpTypePointer Uniform %uvec4
+
+		 %zero = OpConstant %int 0
+		 %one = OpConstant %int 1
+		 %uzero = OpConstant %uint 0
+		 %uone = OpConstant %uint 1
+		 %utrue = OpConstantComposite %uvec4 %uone %uone %uone %uone
+		 %ufalse = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero
+
+         %input_struct = OpTypeStruct %ivec4 %uvec4
+         %input_struct_ptr = OpTypePointer Uniform %input_struct
+         %inputs = OpVariable %input_struct_ptr Uniform
+         %output_struct = OpTypeStruct %uvec4 %ivec4
+         %output_struct_ptr = OpTypePointer Uniform %output_struct
+         %outputs = OpVariable %output_struct_ptr Uniform
+
+          %func = OpFunction %void None %main_func
+          %block = OpLabel
+
+         %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero
+         %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one
+         %input1 = OpLoad %ivec4 %input1_ptr
+         %input0 = OpLoad %uvec4 %input0_ptr
+
+         %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero
+
+         %result_slt = OpSLessThan %bvec4 %input0 %input1
+         %result_sle = OpSLessThanEqual %bvec4 %input0 %input1
+         %result_ult = OpULessThan %bvec4 %input0 %input1
+         %result_ule = OpULessThanEqual %bvec4 %input0 %input1
+         %result_sgt = OpSGreaterThan %bvec4 %input0 %input1
+         %result_sge = OpSGreaterThanEqual %bvec4 %input0 %input1
+         %result_ugt = OpUGreaterThan %bvec4 %input0 %input1
+         %result_uge = OpUGreaterThanEqual %bvec4 %input0 %input1
+
+		 %int_slt = OpSelect %uvec4 %result_slt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_slt
+
+		 %int_sle = OpSelect %uvec4 %result_sle %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sle
+
+		 %int_ult = OpSelect %uvec4 %result_ult %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ult
+
+		 %int_ule = OpSelect %uvec4 %result_ule %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ule
+
+		 %int_sgt = OpSelect %uvec4 %result_sgt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sgt
+
+		 %int_sge = OpSelect %uvec4 %result_sge %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_sge
+
+		 %int_ugt = OpSelect %uvec4 %result_ugt %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_ugt
+
+		 %int_uge = OpSelect %uvec4 %result_uge %utrue %ufalse
+		 OpStore %output_ptr_uvec4 %int_uge
+
+
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/control-flow-hints.asm.comp b/shaders/asm/comp/control-flow-hints.asm.comp
new file mode 100644
index 00000000000..74a15955c25
--- /dev/null
+++ b/shaders/asm/comp/control-flow-hints.asm.comp
@@ -0,0 +1,146 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 85
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource HLSL 500
+               OpName %main "main"
+               OpName %_main_ "@main("
+               OpName %i "i"
+               OpName %bar "bar"
+               OpMemberName %bar 0 "@data"
+               OpName %bar_0 "bar"
+               OpName %foo "foo"
+               OpName %i_0 "i"
+               OpName %v "v"
+               OpName %w "w"
+               OpName %value "value"
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %bar 0 Offset 0
+               OpDecorate %bar BufferBlock
+               OpDecorate %bar_0 DescriptorSet 0
+               OpDecorate %bar_0 Binding 0
+               OpDecorate %foo DescriptorSet 0
+               OpDecorate %foo Binding 1
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+      %int_0 = OpConstant %int 0
+     %int_16 = OpConstant %int 16
+       %bool = OpTypeBool
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+        %bar = OpTypeStruct %_runtimearr_v4float
+%_ptr_Uniform_bar = OpTypePointer Uniform %bar
+      %bar_0 = OpVariable %_ptr_Uniform_bar Uniform
+        %foo = OpVariable %_ptr_Uniform_bar Uniform
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+      %int_1 = OpConstant %int 1
+     %int_15 = OpConstant %int 15
+%_ptr_Function_float = OpTypePointer Function %float
+     %int_10 = OpConstant %int 10
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+   %float_10 = OpConstant %float 10
+     %int_20 = OpConstant %int 20
+    %float_5 = OpConstant %float 5
+         %72 = OpConstantComposite %v4float %float_5 %float_5 %float_5 %float_5
+   %float_20 = OpConstant %float 20
+   %float_40 = OpConstant %float 40
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %84 = OpFunctionCall %void %_main_
+               OpReturn
+               OpFunctionEnd
+     %_main_ = OpFunction %void None %3
+          %7 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+        %i_0 = OpVariable %_ptr_Function_int Function
+          %v = OpVariable %_ptr_Function_float Function
+          %w = OpVariable %_ptr_Function_float Function
+      %value = OpVariable %_ptr_Function_float Function
+               OpStore %i %int_0
+               OpBranch %12
+         %12 = OpLabel
+               OpLoopMerge %14 %15 Unroll
+               OpBranch %16
+         %16 = OpLabel
+         %17 = OpLoad %int %i
+         %20 = OpSLessThan %bool %17 %int_16
+               OpBranchConditional %20 %13 %14
+         %13 = OpLabel
+         %27 = OpLoad %int %i
+         %29 = OpLoad %int %i
+         %31 = OpAccessChain %_ptr_Uniform_v4float %foo %int_0 %29
+         %32 = OpLoad %v4float %31
+         %33 = OpAccessChain %_ptr_Uniform_v4float %bar_0 %int_0 %27
+               OpStore %33 %32
+               OpBranch %15
+         %15 = OpLabel
+         %34 = OpLoad %int %i
+         %36 = OpIAdd %int %34 %int_1
+               OpStore %i %36
+               OpBranch %12
+         %14 = OpLabel
+               OpStore %i_0 %int_0
+               OpBranch %38
+         %38 = OpLabel
+               OpLoopMerge %40 %41 DontUnroll
+               OpBranch %42
+         %42 = OpLabel
+         %43 = OpLoad %int %i_0
+         %44 = OpSLessThan %bool %43 %int_16
+               OpBranchConditional %44 %39 %40
+         %39 = OpLabel
+         %46 = OpLoad %int %i_0
+         %47 = OpISub %int %int_15 %46
+         %48 = OpLoad %int %i_0
+         %49 = OpAccessChain %_ptr_Uniform_v4float %foo %int_0 %48
+         %50 = OpLoad %v4float %49
+         %51 = OpAccessChain %_ptr_Uniform_v4float %bar_0 %int_0 %47
+               OpStore %51 %50
+               OpBranch %41
+         %41 = OpLabel
+         %52 = OpLoad %int %i_0
+         %53 = OpIAdd %int %52 %int_1
+               OpStore %i_0 %53
+               OpBranch %38
+         %40 = OpLabel
+         %60 = OpAccessChain %_ptr_Uniform_float %bar_0 %int_0 %int_10 %uint_0
+         %61 = OpLoad %float %60
+               OpStore %v %61
+         %63 = OpAccessChain %_ptr_Uniform_float %foo %int_0 %int_10 %uint_0
+         %64 = OpLoad %float %63
+               OpStore %w %64
+         %65 = OpLoad %float %v
+         %67 = OpFOrdGreaterThan %bool %65 %float_10
+               OpSelectionMerge %69 DontFlatten
+               OpBranchConditional %67 %68 %69
+         %68 = OpLabel
+         %73 = OpAccessChain %_ptr_Uniform_v4float %foo %int_0 %int_20
+               OpStore %73 %72
+               OpBranch %69
+         %69 = OpLabel
+               OpStore %value %float_20
+         %76 = OpLoad %float %w
+         %78 = OpFOrdGreaterThan %bool %76 %float_40
+               OpSelectionMerge %80 Flatten
+               OpBranchConditional %78 %79 %80
+         %79 = OpLabel
+               OpStore %value %float_20
+               OpBranch %80
+         %80 = OpLabel
+         %81 = OpLoad %float %value
+         %82 = OpCompositeConstruct %v4float %81 %81 %81 %81
+         %83 = OpAccessChain %_ptr_Uniform_v4float %foo %int_0 %int_20
+               OpStore %83 %82
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp b/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
new file mode 100644
index 00000000000..6c060eedad9
--- /dev/null
+++ b/shaders/asm/comp/nmin-max-clamp.relax-nan.asm.comp
@@ -0,0 +1,203 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 139
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "a1"
+               OpMemberName %SSBO 1 "a2"
+               OpMemberName %SSBO 2 "a3"
+               OpMemberName %SSBO 3 "a4"
+               OpMemberName %SSBO 4 "b1"
+               OpMemberName %SSBO 5 "b2"
+               OpMemberName %SSBO 6 "b3"
+               OpMemberName %SSBO 7 "b4"
+               OpMemberName %SSBO 8 "c1"
+               OpMemberName %SSBO 9 "c2"
+               OpMemberName %SSBO 10 "c3"
+               OpMemberName %SSBO 11 "c4"
+               OpName %_ ""
+               OpName %i "i"
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 8
+               OpMemberDecorate %SSBO 2 Offset 16
+               OpMemberDecorate %SSBO 3 Offset 32
+               OpMemberDecorate %SSBO 4 Offset 48
+               OpMemberDecorate %SSBO 5 Offset 56
+               OpMemberDecorate %SSBO 6 Offset 64
+               OpMemberDecorate %SSBO 7 Offset 80
+               OpMemberDecorate %SSBO 8 Offset 96
+               OpMemberDecorate %SSBO 9 Offset 104
+               OpMemberDecorate %SSBO 10 Offset 112
+               OpMemberDecorate %SSBO 11 Offset 128
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+       %void = OpTypeVoid
+          %7 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %v3float = OpTypeVector %float 3
+    %v4float = OpTypeVector %float 4
+       %SSBO = OpTypeStruct %float %v2float %v3float %v4float %float %v2float %v3float %v4float %float %v2float %v3float %v4float
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+      %int_4 = OpConstant %int 4
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+      %int_8 = OpConstant %int 8
+      %int_1 = OpConstant %int 1
+      %int_5 = OpConstant %int 5
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+      %int_9 = OpConstant %int 9
+      %int_2 = OpConstant %int 2
+      %int_6 = OpConstant %int 6
+%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float
+     %int_10 = OpConstant %int 10
+      %int_3 = OpConstant %int 3
+      %int_7 = OpConstant %int 7
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+     %int_11 = OpConstant %int 11
+%_ptr_Function_int = OpTypePointer Function %int
+       %bool = OpTypeBool
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+     %uint_1 = OpConstant %uint 1
+       %main = OpFunction %void None %7
+         %35 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+         %36 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %37 = OpLoad %float %36
+         %38 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %39 = OpLoad %float %38
+         %40 = OpExtInst %float %1 NMin %37 %39
+         %41 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %41 %40
+         %42 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %43 = OpLoad %v2float %42
+         %44 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %45 = OpLoad %v2float %44
+         %46 = OpExtInst %v2float %1 NMin %43 %45
+         %47 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %47 %46
+         %48 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+         %49 = OpLoad %v3float %48
+         %50 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+         %51 = OpLoad %v3float %50
+         %52 = OpExtInst %v3float %1 NMin %49 %51
+         %53 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %53 %52
+         %54 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+         %55 = OpLoad %v4float %54
+         %56 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+         %57 = OpLoad %v4float %56
+         %58 = OpExtInst %v4float %1 NMin %55 %57
+         %59 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %59 %58
+         %60 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %61 = OpLoad %float %60
+         %62 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %63 = OpLoad %float %62
+         %64 = OpExtInst %float %1 NMax %61 %63
+         %65 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %65 %64
+         %66 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %67 = OpLoad %v2float %66
+         %68 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %69 = OpLoad %v2float %68
+         %70 = OpExtInst %v2float %1 NMax %67 %69
+         %71 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %71 %70
+         %72 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+         %73 = OpLoad %v3float %72
+         %74 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+         %75 = OpLoad %v3float %74
+         %76 = OpExtInst %v3float %1 NMax %73 %75
+         %77 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %77 %76
+         %78 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+         %79 = OpLoad %v4float %78
+         %80 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+         %81 = OpLoad %v4float %80
+         %82 = OpExtInst %v4float %1 NMax %79 %81
+         %83 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %83 %82
+         %84 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+         %85 = OpLoad %float %84
+         %86 = OpAccessChain %_ptr_Uniform_float %_ %int_4
+         %87 = OpLoad %float %86
+         %88 = OpAccessChain %_ptr_Uniform_float %_ %int_8
+         %89 = OpLoad %float %88
+         %90 = OpExtInst %float %1 NClamp %85 %87 %89
+         %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %91 %90
+         %92 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+         %93 = OpLoad %v2float %92
+         %94 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+         %95 = OpLoad %v2float %94
+         %96 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+         %97 = OpLoad %v2float %96
+         %98 = OpExtInst %v2float %1 NClamp %93 %95 %97
+         %99 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %99 %98
+        %100 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+        %101 = OpLoad %v3float %100
+        %102 = OpAccessChain %_ptr_Uniform_v3float %_ %int_6
+        %103 = OpLoad %v3float %102
+        %104 = OpAccessChain %_ptr_Uniform_v3float %_ %int_10
+        %105 = OpLoad %v3float %104
+        %106 = OpExtInst %v3float %1 NClamp %101 %103 %105
+        %107 = OpAccessChain %_ptr_Uniform_v3float %_ %int_2
+               OpStore %107 %106
+        %108 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+        %109 = OpLoad %v4float %108
+        %110 = OpAccessChain %_ptr_Uniform_v4float %_ %int_7
+        %111 = OpLoad %v4float %110
+        %112 = OpAccessChain %_ptr_Uniform_v4float %_ %int_11
+        %113 = OpLoad %v4float %112
+        %114 = OpExtInst %v4float %1 NClamp %109 %111 %113
+        %115 = OpAccessChain %_ptr_Uniform_v4float %_ %int_3
+               OpStore %115 %114
+               OpStore %i %int_0
+               OpBranch %116
+        %116 = OpLabel
+               OpLoopMerge %117 %118 None
+               OpBranch %119
+        %119 = OpLabel
+        %120 = OpLoad %int %i
+        %121 = OpSLessThan %bool %120 %int_2
+               OpBranchConditional %121 %122 %117
+        %122 = OpLabel
+        %123 = OpAccessChain %_ptr_Uniform_v2float %_ %int_5
+        %124 = OpLoad %v2float %123
+        %125 = OpAccessChain %_ptr_Uniform_v2float %_ %int_9
+        %126 = OpLoad %v2float %125
+        %127 = OpExtInst %v2float %1 NMin %124 %126
+        %128 = OpAccessChain %_ptr_Uniform_v2float %_ %int_1
+               OpStore %128 %127
+               OpBranch %118
+        %118 = OpLabel
+        %129 = OpLoad %int %i
+        %130 = OpIAdd %int %129 %int_1
+               OpStore %i %130
+        %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+        %132 = OpLoad %float %131
+        %133 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_0
+        %134 = OpLoad %float %133
+        %135 = OpAccessChain %_ptr_Uniform_float %_ %int_5 %uint_1
+        %136 = OpLoad %float %135
+        %137 = OpExtInst %float %1 NClamp %132 %134 %136
+        %138 = OpAccessChain %_ptr_Uniform_float %_ %int_0
+               OpStore %138 %137
+               OpBranch %116
+        %117 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/switch-break-ladder.asm.comp b/shaders/asm/comp/switch-break-ladder.asm.invalid.comp
similarity index 100%
rename from shaders/asm/comp/switch-break-ladder.asm.comp
rename to shaders/asm/comp/switch-break-ladder.asm.invalid.comp
diff --git a/shaders/asm/comp/undefined-constant-composite.asm.comp b/shaders/asm/comp/undefined-constant-composite.asm.comp
new file mode 100644
index 00000000000..9de0501fe21
--- /dev/null
+++ b/shaders/asm/comp/undefined-constant-composite.asm.comp
@@ -0,0 +1,102 @@
+;
+; The shader below is based on the following GLSL shader:
+;
+;     #version 450
+;
+;     struct Pair {
+;         int first;
+;         int second;
+;     };
+;
+;     const Pair constant_pair = { 100, 200 };
+;
+;     layout(set=0, binding=0, std430) buffer InputBlock {
+;         int array[10];
+;     } inputValues;
+;
+;     layout(set=0, binding=1, std430) buffer OutputBlock {
+;         int array[10];
+;     } outputValues;
+;
+;     int add_second (int value, Pair pair) {
+;         return value + pair.second;
+;     }
+;
+;     void main() {
+;         uint idx = gl_GlobalInvocationID.x;
+;         outputValues.array[idx] = add_second(inputValues.array[idx], constant_pair);
+;     }
+;
+; However, the first element of constant_pair has been modified to be undefined.
+;
+                            OpCapability Shader
+                  %std450 = OpExtInstImport "GLSL.std.450"
+                            OpMemoryModel Logical GLSL450
+                            OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID
+                            OpExecutionMode %main LocalSize 1 1 1
+                            OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+                            OpDecorate %_arr_int_uint_10 ArrayStride 4
+                            OpMemberDecorate %OutputBlock 0 Offset 0
+                            OpDecorate %OutputBlock BufferBlock
+                            OpDecorate %outputValues DescriptorSet 0
+                            OpDecorate %outputValues Binding 1
+                            OpMemberDecorate %InputBlock 0 Offset 0
+                            OpDecorate %InputBlock BufferBlock
+                            OpDecorate %inputValues DescriptorSet 0
+                            OpDecorate %inputValues Binding 0
+                    %void = OpTypeVoid
+               %void_func = OpTypeFunction %void
+                     %int = OpTypeInt 32 1
+                    %uint = OpTypeInt 32 0
+                  %v3uint = OpTypeVector %uint 3
+                   %int_0 = OpConstant %int 0
+                   %int_1 = OpConstant %int 1
+                 %int_200 = OpConstant %int 200
+                  %uint_0 = OpConstant %uint 0
+                 %uint_10 = OpConstant %uint 10
+       %_ptr_Function_int = OpTypePointer Function %int
+                    %Pair = OpTypeStruct %int %int
+      %_ptr_Function_Pair = OpTypePointer Function %Pair
+    %add_second_func_type = OpTypeFunction %int %_ptr_Function_int %_ptr_Function_Pair
+      %_ptr_Function_uint = OpTypePointer Function %uint
+       %_ptr_Input_v3uint = OpTypePointer Input %v3uint
+         %_ptr_Input_uint = OpTypePointer Input %uint
+        %_arr_int_uint_10 = OpTypeArray %int %uint_10
+             %OutputBlock = OpTypeStruct %_arr_int_uint_10
+%_ptr_Uniform_OutputBlock = OpTypePointer Uniform %OutputBlock
+            %outputValues = OpVariable %_ptr_Uniform_OutputBlock Uniform
+              %InputBlock = OpTypeStruct %_arr_int_uint_10
+ %_ptr_Uniform_InputBlock = OpTypePointer Uniform %InputBlock
+             %inputValues = OpVariable %_ptr_Uniform_InputBlock Uniform
+                            ; Replaced %int_100 with an undefined int.
+               %undef_int = OpUndef %int
+                            ; Composed a constant Pair with the undefined int in the first member.
+              %const_Pair = OpConstantComposite %Pair %undef_int %int_200
+        %_ptr_Uniform_int = OpTypePointer Uniform %int
+   %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+                    %main = OpFunction %void None %void_func
+              %main_label = OpLabel
+                 %param_1 = OpVariable %_ptr_Function_int Function
+                 %param_2 = OpVariable %_ptr_Function_Pair Function
+                %gidx_ptr = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0
+                    %gidx = OpLoad %uint %gidx_ptr
+         %input_value_ptr = OpAccessChain %_ptr_Uniform_int %inputValues %int_0 %gidx
+             %input_value = OpLoad %int %input_value_ptr
+                            OpStore %param_1 %input_value
+                            OpStore %param_2 %const_Pair
+                  %retval = OpFunctionCall %int %add_second %param_1 %param_2
+        %output_value_ptr = OpAccessChain %_ptr_Uniform_int %outputValues %int_0 %gidx
+                            OpStore %output_value_ptr %retval
+                            OpReturn
+                            OpFunctionEnd
+              %add_second = OpFunction %int None %add_second_func_type
+               %value_ptr = OpFunctionParameter %_ptr_Function_int
+                    %pair = OpFunctionParameter %_ptr_Function_Pair
+        %add_second_label = OpLabel
+                   %value = OpLoad %int %value_ptr
+                            ; Access the second struct member, which is defined.
+         %pair_second_ptr = OpAccessChain %_ptr_Function_int %pair %int_1
+             %pair_second = OpLoad %int %pair_second_ptr
+              %add_result = OpIAdd %int %value %pair_second
+                            OpReturnValue %add_result
+                            OpFunctionEnd
diff --git a/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag b/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag
new file mode 100644
index 00000000000..a3d64c09d7e
--- /dev/null
+++ b/shaders/asm/frag/image-fetch-no-sampler.no-samplerless.asm.vk.frag
@@ -0,0 +1,163 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 2
+; Bound: 113
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %xIn_1 %_entryPointOutput
+               OpExecutionMode %main OriginUpperLeft
+               OpSource HLSL 500
+               OpName %main "main"
+               OpName %sample_fetch_t21_vi3_ "sample_fetch(t21;vi3;"
+               OpName %tex "tex"
+               OpName %UV "UV"
+               OpName %sample_sampler_t21_vf2_ "sample_sampler(t21;vf2;"
+               OpName %tex_0 "tex"
+               OpName %UV_0 "UV"
+               OpName %_main_vf4_ "@main(vf4;"
+               OpName %xIn "xIn"
+               OpName %Sampler "Sampler"
+               OpName %coord "coord"
+               OpName %value "value"
+               OpName %SampledImage "SampledImage"
+               OpName %param "param"
+               OpName %param_0 "param"
+               OpName %param_1 "param"
+               OpName %param_2 "param"
+               OpName %xIn_0 "xIn"
+               OpName %xIn_1 "xIn"
+               OpName %_entryPointOutput "@entryPointOutput"
+               OpName %param_3 "param"
+               OpDecorate %Sampler DescriptorSet 0
+               OpDecorate %Sampler Binding 0
+               OpDecorate %SampledImage DescriptorSet 0
+               OpDecorate %SampledImage Binding 0
+               OpDecorate %xIn_1 BuiltIn FragCoord
+               OpDecorate %_entryPointOutput Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+          %7 = OpTypeImage %float 2D 0 0 0 1 Unknown
+%_ptr_Function_7 = OpTypePointer Function %7
+        %int = OpTypeInt 32 1
+      %v3int = OpTypeVector %int 3
+%_ptr_Function_v3int = OpTypePointer Function %v3int
+    %v4float = OpTypeVector %float 4
+         %13 = OpTypeFunction %v4float %_ptr_Function_7 %_ptr_Function_v3int
+    %v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+         %20 = OpTypeFunction %v4float %_ptr_Function_7 %_ptr_Function_v2float
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+         %26 = OpTypeFunction %v4float %_ptr_Function_v4float
+      %v2int = OpTypeVector %int 2
+       %uint = OpTypeInt 32 0
+     %uint_2 = OpConstant %uint 2
+%_ptr_Function_int = OpTypePointer Function %int
+         %43 = OpTypeSampler
+%_ptr_UniformConstant_43 = OpTypePointer UniformConstant %43
+    %Sampler = OpVariable %_ptr_UniformConstant_43 UniformConstant
+         %47 = OpTypeSampledImage %7
+     %uint_0 = OpConstant %uint 0
+%_ptr_Function_float = OpTypePointer Function %float
+ %float_1280 = OpConstant %float 1280
+     %uint_1 = OpConstant %uint 1
+  %float_720 = OpConstant %float 720
+      %int_0 = OpConstant %int 0
+%_ptr_UniformConstant_7 = OpTypePointer UniformConstant %7
+%SampledImage = OpVariable %_ptr_UniformConstant_7 UniformConstant
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+      %xIn_1 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%_entryPointOutput = OpVariable %_ptr_Output_v4float Output
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+      %xIn_0 = OpVariable %_ptr_Function_v4float Function
+    %param_3 = OpVariable %_ptr_Function_v4float Function
+        %107 = OpLoad %v4float %xIn_1
+               OpStore %xIn_0 %107
+        %111 = OpLoad %v4float %xIn_0
+               OpStore %param_3 %111
+        %112 = OpFunctionCall %v4float %_main_vf4_ %param_3
+               OpStore %_entryPointOutput %112
+               OpReturn
+               OpFunctionEnd
+%sample_fetch_t21_vi3_ = OpFunction %v4float None %13
+        %tex = OpFunctionParameter %_ptr_Function_7
+         %UV = OpFunctionParameter %_ptr_Function_v3int
+         %17 = OpLabel
+         %30 = OpLoad %7 %tex
+         %32 = OpLoad %v3int %UV
+         %33 = OpVectorShuffle %v2int %32 %32 0 1
+         %37 = OpAccessChain %_ptr_Function_int %UV %uint_2
+         %38 = OpLoad %int %37
+         %39 = OpImageFetch %v4float %30 %33 Lod %38
+               OpReturnValue %39
+               OpFunctionEnd
+%sample_sampler_t21_vf2_ = OpFunction %v4float None %20
+      %tex_0 = OpFunctionParameter %_ptr_Function_7
+       %UV_0 = OpFunctionParameter %_ptr_Function_v2float
+         %24 = OpLabel
+         %42 = OpLoad %7 %tex_0
+         %46 = OpLoad %43 %Sampler
+         %48 = OpSampledImage %47 %42 %46
+         %49 = OpLoad %v2float %UV_0
+         %50 = OpImageSampleImplicitLod %v4float %48 %49
+               OpReturnValue %50
+               OpFunctionEnd
+ %_main_vf4_ = OpFunction %v4float None %26
+        %xIn = OpFunctionParameter %_ptr_Function_v4float
+         %29 = OpLabel
+      %coord = OpVariable %_ptr_Function_v3int Function
+      %value = OpVariable %_ptr_Function_v4float Function
+      %param = OpVariable %_ptr_Function_7 Function
+    %param_0 = OpVariable %_ptr_Function_v3int Function
+    %param_1 = OpVariable %_ptr_Function_7 Function
+    %param_2 = OpVariable %_ptr_Function_v2float Function
+         %56 = OpAccessChain %_ptr_Function_float %xIn %uint_0
+         %57 = OpLoad %float %56
+         %59 = OpFMul %float %57 %float_1280
+         %60 = OpConvertFToS %int %59
+         %62 = OpAccessChain %_ptr_Function_float %xIn %uint_1
+         %63 = OpLoad %float %62
+         %65 = OpFMul %float %63 %float_720
+         %66 = OpConvertFToS %int %65
+         %68 = OpCompositeConstruct %v3int %60 %66 %int_0
+               OpStore %coord %68
+         %73 = OpLoad %7 %SampledImage
+               OpStore %param %73
+         %75 = OpLoad %v3int %coord
+               OpStore %param_0 %75
+         %76 = OpFunctionCall %v4float %sample_fetch_t21_vi3_ %param %param_0
+               OpStore %value %76
+         %77 = OpLoad %7 %SampledImage
+         %78 = OpLoad %v3int %coord
+         %79 = OpVectorShuffle %v2int %78 %78 0 1
+         %80 = OpAccessChain %_ptr_Function_int %coord %uint_2
+         %81 = OpLoad %int %80
+         %82 = OpImageFetch %v4float %77 %79 Lod %81
+         %83 = OpLoad %v4float %value
+         %84 = OpFAdd %v4float %83 %82
+               OpStore %value %84
+         %86 = OpLoad %7 %SampledImage
+               OpStore %param_1 %86
+         %88 = OpLoad %v4float %xIn
+         %89 = OpVectorShuffle %v2float %88 %88 0 1
+               OpStore %param_2 %89
+         %90 = OpFunctionCall %v4float %sample_sampler_t21_vf2_ %param_1 %param_2
+         %91 = OpLoad %v4float %value
+         %92 = OpFAdd %v4float %91 %90
+               OpStore %value %92
+         %93 = OpLoad %7 %SampledImage
+         %94 = OpLoad %43 %Sampler
+         %95 = OpSampledImage %47 %93 %94
+         %96 = OpLoad %v4float %xIn
+         %97 = OpVectorShuffle %v2float %96 %96 0 1
+         %98 = OpImageSampleImplicitLod %v4float %95 %97
+         %99 = OpLoad %v4float %value
+        %100 = OpFAdd %v4float %99 %98
+               OpStore %value %100
+        %101 = OpLoad %v4float %value
+               OpReturnValue %101
+               OpFunctionEnd
diff --git a/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag b/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag
new file mode 100644
index 00000000000..a232bd48987
--- /dev/null
+++ b/shaders/asm/frag/image-query-no-sampler.no-samplerless.vk.asm.frag
@@ -0,0 +1,57 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 6
+; Bound: 36
+; Schema: 0
+               OpCapability Shader
+               OpCapability ImageQuery
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %b "b"
+               OpName %uSampler2D "uSampler2D"
+               OpName %c "c"
+               OpName %uSampler2DMS "uSampler2DMS"
+               OpName %l1 "l1"
+               OpName %s0 "s0"
+               OpDecorate %uSampler2D DescriptorSet 0
+               OpDecorate %uSampler2D Binding 0
+               OpDecorate %uSampler2DMS DescriptorSet 0
+               OpDecorate %uSampler2DMS Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+      %v2int = OpTypeVector %int 2
+%_ptr_Function_v2int = OpTypePointer Function %v2int
+      %float = OpTypeFloat 32
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %11
+ %uSampler2D = OpVariable %_ptr_UniformConstant_12 UniformConstant
+      %int_0 = OpConstant %int 0
+         %20 = OpTypeImage %float 2D 0 0 1 1 Unknown
+%_ptr_UniformConstant_21 = OpTypePointer UniformConstant %20
+%uSampler2DMS = OpVariable %_ptr_UniformConstant_21 UniformConstant
+%_ptr_Function_int = OpTypePointer Function %int
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %b = OpVariable %_ptr_Function_v2int Function
+          %c = OpVariable %_ptr_Function_v2int Function
+         %l1 = OpVariable %_ptr_Function_int Function
+         %s0 = OpVariable %_ptr_Function_int Function
+         %15 = OpLoad %11 %uSampler2D
+         %18 = OpImageQuerySizeLod %v2int %15 %int_0
+               OpStore %b %18
+         %24 = OpLoad %20 %uSampler2DMS
+         %26 = OpImageQuerySize %v2int %24
+               OpStore %c %26
+         %29 = OpLoad %11 %uSampler2D
+         %31 = OpImageQueryLevels %int %29
+               OpStore %l1 %31
+         %33 = OpLoad %20 %uSampler2DMS
+         %35 = OpImageQuerySamples %int %33
+               OpStore %s0 %35
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag b/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag
new file mode 100644
index 00000000000..628a9f5ba91
--- /dev/null
+++ b/shaders/asm/frag/out-of-bounds-access-opspecconstant.asm.frag
@@ -0,0 +1,42 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 10
+; Bound: 21
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 320
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpName %main "main"
+               OpName %v "v"
+               OpDecorate %v RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+    %float_0 = OpConstant %float 0
+         %11 = OpConstantComposite %v3float %float_0 %float_0 %float_0
+       %bool = OpTypeBool
+      %false = OpConstantFalse %bool
+   %float_99 = OpConstant %float 99
+       %uint = OpTypeInt 32 0
+%uint_spec_3 = OpSpecConstant %uint 3
+%_ptr_Function_float = OpTypePointer Function %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %v = OpVariable %_ptr_Function_v3float Function
+               OpStore %v %11
+               OpSelectionMerge %15 None
+               OpBranchConditional %false %14 %15
+         %14 = OpLabel
+         %20 = OpAccessChain %_ptr_Function_float %v %uint_spec_3
+               OpStore %20 %float_99
+               OpBranch %15
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/out-of-bounds-access.asm.frag b/shaders/asm/frag/out-of-bounds-access.asm.frag
new file mode 100644
index 00000000000..542b74b2fd9
--- /dev/null
+++ b/shaders/asm/frag/out-of-bounds-access.asm.frag
@@ -0,0 +1,47 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 10
+; Bound: 21
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpSource ESSL 320
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpName %main "main"
+               OpName %v "v"
+               OpDecorate %v RelaxedPrecision
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v3float = OpTypeVector %float 3
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+    %float_0 = OpConstant %float 0
+         %11 = OpConstantComposite %v3float %float_0 %float_0 %float_0
+       %bool = OpTypeBool
+      %false = OpConstantFalse %bool
+   %float_99 = OpConstant %float 99
+   %float_88 = OpConstant %float 88
+       %uint = OpTypeInt 32 0
+     %uint_3 = OpConstant %uint 3
+       %sint = OpTypeInt 32 1
+     %sint_3 = OpConstant %sint -1
+%_ptr_Function_float = OpTypePointer Function %float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %v = OpVariable %_ptr_Function_v3float Function
+               OpStore %v %11
+               OpSelectionMerge %15 None
+               OpBranchConditional %false %14 %15
+         %14 = OpLabel
+         %20 = OpAccessChain %_ptr_Function_float %v %uint_3
+               OpStore %20 %float_99
+         %99 = OpAccessChain %_ptr_Function_float %v %sint_3
+               OpStore %99 %float_88
+               OpBranch %15
+         %15 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/pack-and-unpack-uint2.asm.frag b/shaders/asm/frag/pack-and-unpack-uint2.asm.frag
new file mode 100644
index 00000000000..43d0970e8d5
--- /dev/null
+++ b/shaders/asm/frag/pack-and-unpack-uint2.asm.frag
@@ -0,0 +1,55 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 34
+; Schema: 0
+               OpCapability Shader
+               OpCapability Int64
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_ARB_gpu_shader_int64"
+               OpName %main "main"
+               OpName %packed "packed"
+               OpName %unpacked "unpacked"
+               OpName %FragColor "FragColor"
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %ulong = OpTypeInt 64 0
+%_ptr_Function_ulong = OpTypePointer Function %ulong
+       %uint = OpTypeInt 32 0
+     %v2uint = OpTypeVector %uint 2
+    %uint_18 = OpConstant %uint 18
+    %uint_52 = OpConstant %uint 52
+         %13 = OpConstantComposite %v2uint %uint_18 %uint_52
+%_ptr_Function_v2uint = OpTypePointer Function %v2uint
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+     %uint_0 = OpConstant %uint 0
+%_ptr_Function_uint = OpTypePointer Function %uint
+     %uint_1 = OpConstant %uint 1
+    %float_1 = OpConstant %float 1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+     %packed = OpVariable %_ptr_Function_ulong Function
+   %unpacked = OpVariable %_ptr_Function_v2uint Function
+         %14 = OpBitcast %ulong %13
+               OpStore %packed %14
+         %17 = OpLoad %ulong %packed
+         %18 = OpBitcast %v2uint %17
+               OpStore %unpacked %18
+         %25 = OpAccessChain %_ptr_Function_uint %unpacked %uint_0
+         %26 = OpLoad %uint %25
+         %27 = OpConvertUToF %float %26
+         %29 = OpAccessChain %_ptr_Function_uint %unpacked %uint_1
+         %30 = OpLoad %uint %29
+         %31 = OpConvertUToF %float %30
+         %33 = OpCompositeConstruct %v4float %27 %31 %float_1 %float_1
+               OpStore %FragColor %33
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/switch-preserve-sign-extension.asm.frag b/shaders/asm/frag/switch-preserve-sign-extension.asm.frag
new file mode 100644
index 00000000000..97140ee5536
--- /dev/null
+++ b/shaders/asm/frag/switch-preserve-sign-extension.asm.frag
@@ -0,0 +1,44 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google Shaderc over Glslang; 10
+; Bound: 19
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main"
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 330
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpName %main "main"
+               OpName %sw "sw"
+               OpName %result "result"
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+     %int_42 = OpConstant %int 42
+      %int_0 = OpConstant %int 0
+    %int_420 = OpConstant %int 420
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %sw = OpVariable %_ptr_Function_int Function
+     %result = OpVariable %_ptr_Function_int Function
+               OpStore %sw %int_42
+               OpStore %result %int_0
+         %12 = OpLoad %int %sw
+               OpSelectionMerge %16 None
+               OpSwitch %12 %16 -42 %13 420 %14 -1234 %15
+         %13 = OpLabel
+               OpStore %result %int_42
+               OpBranch %14
+         %14 = OpLabel
+               OpStore %result %int_420
+               OpBranch %15
+         %15 = OpLabel
+               OpStore %result %int_420
+               OpBranch %16
+         %16 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag b/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
index ae7a972d7b2..e7e6f37ea27 100644
--- a/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
+++ b/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
@@ -5,6 +5,7 @@
 ; Schema: 0
                OpCapability Shader
                OpCapability StorageInputOutput16
+               OpCapability Float16
                OpExtension "SPV_KHR_16bit_storage"
           %1 = OpExtInstImport "GLSL.std.450"
                OpMemoryModel Logical GLSL450
diff --git a/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert b/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
index b566a3d1a0f..1ff67798bdd 100644
--- a/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
+++ b/shaders/asm/vert/spec-constant-op-composite.asm.vk.vert
@@ -49,8 +49,10 @@
          %28 = OpConstant %17 2
          %33 = OpConstant %12 20
          %34 = OpConstant %12 30
+      %int_3 = OpConstant %12 -3
+        %bar = OpSpecConstantOp %12 SRem %13 %int_3
          %35 = OpTypeVector %12 4
-         %36 = OpSpecConstantComposite %35 %33 %34 %15 %15
+         %36 = OpSpecConstantComposite %35 %33 %34 %15 %bar
          %40 = OpTypeVector %12 2
          %41 = OpSpecConstantOp %40 VectorShuffle %36 %36 1 0
 		 %foo = OpSpecConstantOp %12 CompositeExtract %36 1
diff --git a/shaders/desktop-only/frag/image-size.frag b/shaders/desktop-only/frag/image-size.frag
new file mode 100644
index 00000000000..ffd0bfa20d3
--- /dev/null
+++ b/shaders/desktop-only/frag/image-size.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(r32f, set = 0, binding = 0) uniform image2D uImage1;
+layout(r32f, set = 0, binding = 1) uniform image2D uImage2;
+
+void main()
+{
+	FragColor = vec4(imageSize(uImage1), imageSize(uImage2));
+}
diff --git a/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag b/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag
new file mode 100644
index 00000000000..ffd0bfa20d3
--- /dev/null
+++ b/shaders/desktop-only/frag/image-size.no-qualifier-deduction.frag
@@ -0,0 +1,10 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(r32f, set = 0, binding = 0) uniform image2D uImage1;
+layout(r32f, set = 0, binding = 1) uniform image2D uImage2;
+
+void main()
+{
+	FragColor = vec4(imageSize(uImage1), imageSize(uImage2));
+}
diff --git a/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vert b/shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert
similarity index 100%
rename from shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vert
rename to shaders/desktop-only/vert/shader-draw-parameters-450.desktop.vk.vert
diff --git a/shaders/desktop-only/vert/shader-draw-parameters.desktop.vert b/shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert
similarity index 100%
rename from shaders/desktop-only/vert/shader-draw-parameters.desktop.vert
rename to shaders/desktop-only/vert/shader-draw-parameters.desktop.vk.vert
diff --git a/shaders/frag/avoid-expression-lowering-to-loop.frag b/shaders/frag/avoid-expression-lowering-to-loop.frag
new file mode 100644
index 00000000000..3473875a40a
--- /dev/null
+++ b/shaders/frag/avoid-expression-lowering-to-loop.frag
@@ -0,0 +1,23 @@
+#version 310 es
+precision mediump float;
+precision mediump int;
+
+layout(binding = 0) uniform mediump sampler2D tex;
+layout(binding = 1) uniform Count
+{
+	float count;
+};
+
+layout(location = 0) in highp vec4 vertex;
+layout(location = 0) out vec4 fragColor;
+
+void main() {
+
+	highp float size = 1.0 / float(textureSize(tex, 0).x);
+	float r = 0.0;
+	float d = dFdx(vertex.x);
+	for (float i = 0.0; i < count ; i += 1.0)
+		r += size * d;
+
+	fragColor = vec4(r);
+}
diff --git a/shaders/frag/barycentric-khr.frag b/shaders/frag/barycentric-khr.frag
new file mode 100644
index 00000000000..fcaca04e23c
--- /dev/null
+++ b/shaders/frag/barycentric-khr.frag
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_EXT_fragment_shader_barycentric : require
+
+layout(location = 0) out vec2 value;
+layout(location = 0) pervertexEXT in vec2 vUV[3];
+layout(location = 3) pervertexEXT in vec2 vUV2[3];
+      
+void main () {
+    value = gl_BaryCoordEXT.x * vUV[0] + gl_BaryCoordEXT.y * vUV[1] + gl_BaryCoordEXT.z * vUV[2];
+    value += gl_BaryCoordNoPerspEXT.x * vUV2[0] + gl_BaryCoordNoPerspEXT.y * vUV2[1] + gl_BaryCoordNoPerspEXT.z * vUV2[2];
+}
diff --git a/shaders/frag/barycentric-nv.frag b/shaders/frag/barycentric-nv.frag
index 340408370b9..38d651a780b 100644
--- a/shaders/frag/barycentric-nv.frag
+++ b/shaders/frag/barycentric-nv.frag
@@ -2,17 +2,10 @@
 #extension GL_NV_fragment_shader_barycentric : require
 
 layout(location = 0) out vec2 value;
-
-layout(set = 0, binding = 0) readonly buffer Vertices
-{
-	vec2 uvs[];
-};
+layout(location = 0) pervertexNV in vec2 vUV[3];
+layout(location = 1) pervertexNV in vec2 vUV2[3];
       
 void main () {
-	int prim = gl_PrimitiveID;
-	vec2 uv0 = uvs[3 * prim + 0];
-	vec2 uv1 = uvs[3 * prim + 1];
-	vec2 uv2 = uvs[3 * prim + 2];
-    value = gl_BaryCoordNV.x * uv0 + gl_BaryCoordNV.y * uv1 + gl_BaryCoordNV.z * uv2;
-    value += gl_BaryCoordNoPerspNV.x * uv0 + gl_BaryCoordNoPerspNV.y * uv1 + gl_BaryCoordNoPerspNV.z * uv2;
+    value = gl_BaryCoordNV.x * vUV[0] + gl_BaryCoordNV.y * vUV[1] + gl_BaryCoordNV.z * vUV[2];
+    value += gl_BaryCoordNoPerspNV.x * vUV2[0] + gl_BaryCoordNoPerspNV.y * vUV2[1] + gl_BaryCoordNoPerspNV.z * vUV2[2];
 }
diff --git a/shaders/frag/modf-pointer-function-analysis.frag b/shaders/frag/modf-pointer-function-analysis.frag
new file mode 100644
index 00000000000..21e51262f0c
--- /dev/null
+++ b/shaders/frag/modf-pointer-function-analysis.frag
@@ -0,0 +1,25 @@
+#version 450
+
+layout(location = 0) in vec4 v;
+layout(location = 0) out vec4 vo0;
+layout(location = 1) out vec4 vo1;
+
+vec4 modf_inner(out vec4 tmp)
+{
+	return modf(v, tmp);
+}
+
+float modf_inner_partial(inout vec4 tmp)
+{
+	return modf(v.x, tmp.x);
+}
+
+void main()
+{
+	vec4 tmp;
+	vo0 = modf_inner(tmp);
+	vo1 = tmp;
+
+	vo0.x += modf_inner_partial(tmp);
+	vo1.x += tmp.x;
+}
diff --git a/shaders/frag/pixel-interlock-ordered.frag b/shaders/frag/pixel-interlock-ordered.frag
new file mode 100644
index 00000000000..4439f0672b5
--- /dev/null
+++ b/shaders/frag/pixel-interlock-ordered.frag
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+
+void main()
+{
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, 0xff);
+	endInvocationInterlockARB();
+}
diff --git a/shaders/frag/pixel-interlock-unordered.frag b/shaders/frag/pixel-interlock-unordered.frag
new file mode 100644
index 00000000000..f8fd468c1bd
--- /dev/null
+++ b/shaders/frag/pixel-interlock-unordered.frag
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_unordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+
+void main()
+{
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, 0xff);
+	endInvocationInterlockARB();
+}
diff --git a/shaders/frag/post-depth-coverage-es.frag b/shaders/frag/post-depth-coverage-es.frag
new file mode 100644
index 00000000000..ecc57e4a8f2
--- /dev/null
+++ b/shaders/frag/post-depth-coverage-es.frag
@@ -0,0 +1,13 @@
+#version 310 es
+#extension GL_EXT_post_depth_coverage : require
+#extension GL_OES_sample_variables : require
+precision mediump float;
+
+layout(early_fragment_tests, post_depth_coverage) in;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = vec4(gl_SampleMaskIn[0]);
+}
diff --git a/shaders/frag/post-depth-coverage.frag b/shaders/frag/post-depth-coverage.frag
new file mode 100644
index 00000000000..4f134b4f3bb
--- /dev/null
+++ b/shaders/frag/post-depth-coverage.frag
@@ -0,0 +1,11 @@
+#version 450
+#extension GL_ARB_post_depth_coverage : require
+
+layout(post_depth_coverage) in;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = vec4(gl_SampleMaskIn[0]);
+}
diff --git a/shaders/frag/round-even.frag b/shaders/frag/round-even.frag
new file mode 100644
index 00000000000..594ac162939
--- /dev/null
+++ b/shaders/frag/round-even.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(location = 0) in vec4 vA;
+layout(location = 1) in float vB;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = roundEven(vA);
+	FragColor *= roundEven(vB);
+}
diff --git a/shaders/frag/round.frag b/shaders/frag/round.frag
new file mode 100644
index 00000000000..c87b0abbff5
--- /dev/null
+++ b/shaders/frag/round.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(location = 0) in vec4 vA;
+layout(location = 1) in float vB;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = round(vA);
+	FragColor *= round(vB);
+}
diff --git a/shaders/frag/sample-interlock-ordered.frag b/shaders/frag/sample-interlock-ordered.frag
new file mode 100644
index 00000000000..fa80dc9f82b
--- /dev/null
+++ b/shaders/frag/sample-interlock-ordered.frag
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(sample_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+
+void main()
+{
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, gl_SampleMaskIn[0]);
+	endInvocationInterlockARB();
+}
diff --git a/shaders/frag/sample-interlock-unordered.frag b/shaders/frag/sample-interlock-unordered.frag
new file mode 100644
index 00000000000..6fe5437f3aa
--- /dev/null
+++ b/shaders/frag/sample-interlock-unordered.frag
@@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(sample_interlock_unordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+
+void main()
+{
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, 0xff);
+	endInvocationInterlockARB();
+}
diff --git a/shaders/frag/struct-type-unrelated-alias.frag b/shaders/frag/struct-type-unrelated-alias.frag
new file mode 100644
index 00000000000..d1c7905225d
--- /dev/null
+++ b/shaders/frag/struct-type-unrelated-alias.frag
@@ -0,0 +1,19 @@
+#version 450
+
+layout(location = 0) out float FragColor;
+
+struct T
+{
+	float a;
+};
+
+void main()
+{
+	T foo;
+	struct T { float b; };
+	T bar;
+
+	foo.a = 10.0;
+	bar.b = 20.0;
+	FragColor = foo.a + bar.b;
+}
diff --git a/shaders/frag/switch-unreachable-break.frag b/shaders/frag/switch-unreachable-break.frag
new file mode 100644
index 00000000000..b0421e60ef3
--- /dev/null
+++ b/shaders/frag/switch-unreachable-break.frag
@@ -0,0 +1,32 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in vec4 vInput;
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	int cond;
+	int cond2;
+};
+
+void main()
+{
+	bool frog = false;
+	switch (cond)
+	{
+	case 1:
+		if (cond2 < 50)
+			break;
+		else
+			discard;
+
+		break;
+
+	default:
+		frog = true;
+		break;
+	}
+
+	FragColor = frog ? vec4(10.0) : vec4(20.0);
+}
+
diff --git a/shaders/frag/ubo-load-row-major-workaround.frag b/shaders/frag/ubo-load-row-major-workaround.frag
new file mode 100644
index 00000000000..03205ee8276
--- /dev/null
+++ b/shaders/frag/ubo-load-row-major-workaround.frag
@@ -0,0 +1,44 @@
+#version 450
+
+struct RowMajor
+{
+	mat4 B;
+};
+
+struct NestedRowMajor
+{
+	RowMajor rm;
+};
+
+layout(set = 0, binding = 0, row_major) uniform UBO
+{
+	mat4 A;
+	layout(column_major) mat4 C; // This should also be worked around.
+};
+
+
+layout(set = 0, binding = 1, row_major) uniform UBO2
+{
+	RowMajor rm;
+};
+
+layout(set = 0, binding = 2, row_major) uniform UBO3
+{
+	NestedRowMajor rm2;
+};
+
+layout(set = 0, binding = 3) uniform UBONoWorkaround
+{
+	mat4 D;
+};
+
+layout(location = 0) in vec4 Clip;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	NestedRowMajor rm2_loaded = rm2;
+	FragColor = rm2_loaded.rm.B * rm.B * A * C * Clip;
+	FragColor += D * Clip;
+	FragColor += A[1] * Clip;
+}
diff --git a/shaders/geom/geometry-passthrough.geom b/shaders/geom/geometry-passthrough.geom
new file mode 100644
index 00000000000..7f1997c76a0
--- /dev/null
+++ b/shaders/geom/geometry-passthrough.geom
@@ -0,0 +1,28 @@
+#version 450
+#extension GL_NV_geometry_shader_passthrough : require
+
+layout(triangles) in;
+
+layout(passthrough) in gl_PerVertex
+{
+	vec4 gl_Position;
+} gl_in[];
+
+layout(passthrough, location = 0) in VertexBlock
+{
+	int a;
+	int b;
+} v1[];
+
+layout(location = 2) in VertexBlock2
+{
+	int a;
+	layout(passthrough) int b;
+} v2[];
+
+layout(passthrough, location = 4) in vec4 vPoint[];
+
+void main()
+{
+	gl_Layer = gl_InvocationID + v1[0].a + v2[1].b;
+}
diff --git a/shaders/geom/multi-stream.geom b/shaders/geom/multi-stream.geom
new file mode 100644
index 00000000000..19b3bbb9c22
--- /dev/null
+++ b/shaders/geom/multi-stream.geom
@@ -0,0 +1,15 @@
+#version 450
+
+layout(triangles) in;
+layout(points, max_vertices = 2) out;
+
+void main()
+{
+	gl_Position = gl_in[0].gl_Position;
+	EmitStreamVertex(0);
+	EndStreamPrimitive(0);
+	gl_Position = gl_in[0].gl_Position + 2;
+	EmitStreamVertex(1);
+	EndStreamPrimitive(1);
+}
+
diff --git a/shaders/geom/transform-feedback-streams.geom b/shaders/geom/transform-feedback-streams.geom
new file mode 100644
index 00000000000..1e628907567
--- /dev/null
+++ b/shaders/geom/transform-feedback-streams.geom
@@ -0,0 +1,24 @@
+#version 450
+layout(max_vertices = 2, points) out;
+layout(points) in;
+layout(stream = 1, xfb_stride = 32, xfb_offset = 16, xfb_buffer = 2, location = 0) out vec4 vFoo;
+
+layout(stream = 1, xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex
+{
+	layout(xfb_offset = 4) vec4 gl_Position;
+	float gl_PointSize;
+};
+
+layout(stream = 2, xfb_buffer = 3) out VertOut
+{
+	layout(xfb_stride = 16, xfb_offset = 0, location = 1) vec4 vBar;
+};
+
+void main()
+{
+	gl_Position = vec4(1.0);
+	vFoo = vec4(3.0);
+	EmitStreamVertex(1);
+	vBar = vec4(5.0);
+	EmitStreamVertex(2);
+}
diff --git a/shaders/legacy/fragment/explicit-lod.legacy.vert b/shaders/legacy/fragment/explicit-lod.legacy.vert
new file mode 100644
index 00000000000..d2cbd5a4f94
--- /dev/null
+++ b/shaders/legacy/fragment/explicit-lod.legacy.vert
@@ -0,0 +1,12 @@
+#version 310 es
+
+precision mediump float;
+
+layout(binding = 0) uniform sampler2D tex;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = textureLod(tex, vec2(0.4, 0.6), 3.0);
+}
diff --git a/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag b/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag
new file mode 100644
index 00000000000..b1e7d1b4f30
--- /dev/null
+++ b/shaders/legacy/fragment/multiple-struct-flattening.legacy.frag
@@ -0,0 +1,37 @@
+#version 450
+
+struct Foo
+{
+	vec4 a;
+	vec4 b;
+};
+
+struct Bar
+{
+	vec4 a;
+	vec4 b;
+};
+
+struct Baz
+{
+	Foo foo;
+	Bar bar;
+};
+
+layout(location = 0) in VertexIn
+{
+	Foo a;
+	Bar b;
+};
+
+layout(location = 4) in Baz baz;
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	Baz bazzy = baz;
+	Foo bazzy_foo = baz.foo;
+	Bar bazzy_bar = baz.bar;
+	FragColor = a.a + b.b + bazzy.foo.b + bazzy_foo.a + bazzy_bar.b;
+}
diff --git a/shaders/legacy/fragment/round.legacy.frag b/shaders/legacy/fragment/round.legacy.frag
new file mode 100644
index 00000000000..c87b0abbff5
--- /dev/null
+++ b/shaders/legacy/fragment/round.legacy.frag
@@ -0,0 +1,11 @@
+#version 450
+
+layout(location = 0) in vec4 vA;
+layout(location = 1) in float vB;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+	FragColor = round(vA);
+	FragColor *= round(vB);
+}
diff --git a/shaders/legacy/fragment/switch.legacy.frag b/shaders/legacy/fragment/switch.legacy.frag
new file mode 100644
index 00000000000..d5117981731
--- /dev/null
+++ b/shaders/legacy/fragment/switch.legacy.frag
@@ -0,0 +1,43 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) in float vIndexF;
+
+void main()
+{
+	int vIndex = int(vIndexF);
+	vec4 v = vec4(0.0);
+	switch (vIndex)
+	{
+	case 2:
+		v = vec4(0, 2, 3, 4);
+		break;
+	case 4:
+	case 5:
+		v = vec4(1, 2, 3, 4);
+		break;
+	case 8:
+	case 9:
+		v = vec4(40, 20, 30, 40);
+		break;
+	case 10:
+		v = vec4(10.0);
+	case 11:
+		v += 1.0;
+	case 12:
+		v += 2.0;
+		break;
+	default:
+		v = vec4(10, 20, 30, 40);
+		break;
+	}
+
+	vec4 w = vec4(20.0);
+	switch (vIndex)
+	{
+	case 10:
+	case 20:
+		w = vec4(40.0);
+	}
+	FragColor = v + w;
+}
diff --git a/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert b/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert
new file mode 100644
index 00000000000..f6ad932eea1
--- /dev/null
+++ b/shaders/legacy/vert/struct-flatten-inner-array.legacy.vert
@@ -0,0 +1,15 @@
+#version 450
+
+struct Foo
+{
+	float a[4];
+};
+
+layout(location = 0) out Foo foo;
+
+void main()
+{
+	gl_Position = vec4(1.0);
+	for (int i = 0; i < 4; i++)
+		foo.a[i] = float(i + 2);
+}
diff --git a/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert b/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert
new file mode 100644
index 00000000000..57e914c1feb
--- /dev/null
+++ b/shaders/legacy/vert/struct-flatten-stores-multi-dimension.legacy.vert
@@ -0,0 +1,40 @@
+#version 450
+
+struct Foo
+{
+	vec4 a;
+	vec4 b;
+};
+
+struct Bar
+{
+	vec4 a;
+	vec4 b;
+};
+
+struct Baz
+{
+	Foo foo;
+	Bar bar;
+};
+
+layout(location = 0) out VertexIn
+{
+	Foo a;
+	Bar b;
+};
+
+layout(location = 4) out Baz baz;
+
+void main()
+{
+	a.a = vec4(10.0);
+	a.b = vec4(20.0);
+	b.a = vec4(30.0);
+	b.b = vec4(40.0);
+	a = Foo(vec4(50.0), vec4(60.0));
+	b = Bar(vec4(50.0), vec4(60.0));
+	baz.foo = Foo(vec4(100.0), vec4(200.0));
+	baz.bar = Bar(vec4(300.0), vec4(400.0));
+	baz = Baz(Foo(vec4(1000.0), vec4(2000.0)), Bar(vec4(3000.0), vec4(4000.0)));
+}
diff --git a/shaders/legacy/vert/switch-nested.legacy.vert b/shaders/legacy/vert/switch-nested.legacy.vert
new file mode 100644
index 00000000000..6726c1c6930
--- /dev/null
+++ b/shaders/legacy/vert/switch-nested.legacy.vert
@@ -0,0 +1,28 @@
+#version 450
+
+layout(set = 0, binding = 0) uniform UBO
+{
+	int func_arg;
+	int inner_func_arg;
+};
+
+vec4 test_inner_func(bool b)
+{
+	if (b)
+		return vec4(1.0);
+	else
+		return vec4(0.0);
+}
+
+vec4 test_func(bool b)
+{
+	if (b)
+		return test_inner_func(inner_func_arg != 0);
+	else
+		return vec4(0.0);
+}
+
+void main()
+{
+	gl_Position = test_func(func_arg != 0);
+}
diff --git a/shaders/legacy/vert/transpose.legacy.vert b/shaders/legacy/vert/transpose.legacy.vert
index 84f618262ac..588c28d53db 100644
--- a/shaders/legacy/vert/transpose.legacy.vert
+++ b/shaders/legacy/vert/transpose.legacy.vert
@@ -15,6 +15,18 @@ void main()
 	vec4 c1 = M * (MVPColMajor * Position);
 	vec4 c2 = M * (Position * MVPRowMajor);
 	vec4 c3 = M * (Position * MVPColMajor);
-	gl_Position = c0 + c1 + c2 + c3;
+
+	vec4 c4 = transpose(MVPRowMajor) * Position;
+	vec4 c5 = transpose(MVPColMajor) * Position;
+	vec4 c6 = Position * transpose(MVPRowMajor);
+	vec4 c7 = Position * transpose(MVPColMajor);
+
+	// Multiplying by scalar forces resolution of the transposition
+	vec4 c8 = (MVPRowMajor * 2.0) * Position;
+	vec4 c9 = (transpose(MVPColMajor) * 2.0) * Position;
+	vec4 c10 = Position * (MVPRowMajor * 2.0);
+	vec4 c11 = Position * (transpose(MVPColMajor) * 2.0);
+
+	gl_Position = c0 + c1 + c2 + c3 + c4 + c5 + c6 + c7 + c8 + c9 + c10 + c11;
 }
 
diff --git a/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000000..0f1beef75b9
--- /dev/null
+++ b/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh
@@ -0,0 +1,63 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(lines, max_vertices = 24, max_primitives = 22) out;
+
+out gl_MeshPerVertexEXT
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[2];
+} gl_MeshVerticesEXT[];
+
+layout(location = 0) out vec4 vOut[];
+layout(location = 1) perprimitiveEXT out vec4 vPrim[];
+
+layout(location = 2) out BlockOut
+{
+	vec4 a;
+	vec4 b;
+} outputs[];
+
+layout(location = 4) perprimitiveEXT out BlockOutPrim
+{
+	vec4 a;
+	vec4 b;
+} prim_outputs[];
+
+shared float shared_float[16];
+
+struct TaskPayload
+{
+	float a;
+	float b;
+	int c;
+};
+
+taskPayloadSharedEXT TaskPayload payload;
+
+void main()
+{
+	SetMeshOutputsEXT(24, 22);
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0);
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
+	vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0);
+	outputs[gl_LocalInvocationIndex].a = vec4(5.0);
+	outputs[gl_LocalInvocationIndex].b = vec4(6.0);
+	barrier();
+	if (gl_LocalInvocationIndex < 22)
+	{
+		vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0);
+		prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
+		prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
+		gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0, 1) + gl_LocalInvocationIndex;
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1);
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+	}
+}
diff --git a/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh b/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000000..3d037bcd5ea
--- /dev/null
+++ b/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh
@@ -0,0 +1,63 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(points, max_vertices = 24, max_primitives = 22) out;
+
+out gl_MeshPerVertexEXT
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[2];
+} gl_MeshVerticesEXT[];
+
+layout(location = 0) out vec4 vOut[];
+layout(location = 1) perprimitiveEXT out vec4 vPrim[];
+
+layout(location = 2) out BlockOut
+{
+	vec4 a;
+	vec4 b;
+} outputs[];
+
+layout(location = 4) perprimitiveEXT out BlockOutPrim
+{
+	vec4 a;
+	vec4 b;
+} prim_outputs[];
+
+shared float shared_float[16];
+
+struct TaskPayload
+{
+	float a;
+	float b;
+	int c;
+};
+
+taskPayloadSharedEXT TaskPayload payload;
+
+void main()
+{
+	SetMeshOutputsEXT(24, 22);
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0);
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
+	vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0);
+	outputs[gl_LocalInvocationIndex].a = vec4(5.0);
+	outputs[gl_LocalInvocationIndex].b = vec4(6.0);
+	barrier();
+	if (gl_LocalInvocationIndex < 22)
+	{
+		vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0);
+		prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
+		prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
+		gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1);
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+	}
+}
diff --git a/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
new file mode 100644
index 00000000000..944525aa505
--- /dev/null
+++ b/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh
@@ -0,0 +1,63 @@
+#version 450
+#extension GL_EXT_mesh_shader : require
+layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;
+layout(triangles, max_vertices = 24, max_primitives = 22) out;
+
+out gl_MeshPerVertexEXT
+{
+    vec4 gl_Position;
+    float gl_PointSize;
+    float gl_ClipDistance[1];
+    float gl_CullDistance[2];
+} gl_MeshVerticesEXT[];
+
+layout(location = 0) out vec4 vOut[];
+layout(location = 1) perprimitiveEXT out vec4 vPrim[];
+
+layout(location = 2) out BlockOut
+{
+	vec4 a;
+	vec4 b;
+} outputs[];
+
+layout(location = 4) perprimitiveEXT out BlockOutPrim
+{
+	vec4 a;
+	vec4 b;
+} prim_outputs[];
+
+shared float shared_float[16];
+
+struct TaskPayload
+{
+	float a;
+	float b;
+	int c;
+};
+
+taskPayloadSharedEXT TaskPayload payload;
+
+void main()
+{
+	SetMeshOutputsEXT(24, 22);
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0);
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0;
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0;
+	gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0;
+	vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0);
+	outputs[gl_LocalInvocationIndex].a = vec4(5.0);
+	outputs[gl_LocalInvocationIndex].b = vec4(6.0);
+	barrier();
+	if (gl_LocalInvocationIndex < 22)
+	{
+		vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0);
+		prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a);
+		prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b);
+		gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0, 1, 2) + gl_LocalInvocationIndex;
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x);
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1;
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2;
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1);
+		gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3;
+	}
+}
diff --git a/shaders/tese/load-array-of-array.tese b/shaders/tese/load-array-of-array.tese
new file mode 100644
index 00000000000..7383f7086eb
--- /dev/null
+++ b/shaders/tese/load-array-of-array.tese
@@ -0,0 +1,10 @@
+#version 450
+layout(ccw, quads) in;
+
+layout(location = 0) in vec4 vTexCoord[][1];
+
+void main()
+{
+	vec4 tmp[gl_MaxPatchVertices][1] = vTexCoord;
+	gl_Position = tmp[0][0] + tmp[2][0] + tmp[3][0];
+}
diff --git a/shaders/tese/patch-input-array.tese b/shaders/tese/patch-input-array.tese
new file mode 100644
index 00000000000..741b2c3b9bb
--- /dev/null
+++ b/shaders/tese/patch-input-array.tese
@@ -0,0 +1,9 @@
+#version 450
+
+layout(quads) in;
+layout(location = 0) patch in float P[4];
+
+void main()
+{
+	gl_Position = vec4(P[0], P[1], P[2], P[3]);
+}
diff --git a/shaders/vert/no-contraction.vert b/shaders/vert/no-contraction.vert
new file mode 100644
index 00000000000..206fbf0de80
--- /dev/null
+++ b/shaders/vert/no-contraction.vert
@@ -0,0 +1,15 @@
+#version 450
+
+layout(location = 0) in vec4 vA;
+layout(location = 1) in vec4 vB;
+layout(location = 2) in vec4 vC;
+
+void main()
+{
+	precise vec4 mul = vA * vB;
+	precise vec4 add = vA + vB;
+	precise vec4 sub = vA - vB;
+	precise vec4 mad = vA * vB + vC;
+	precise vec4 summed = mul + add + sub + mad;
+	gl_Position = summed;
+}
diff --git a/shaders/vert/row-major-workaround.vert b/shaders/vert/row-major-workaround.vert
new file mode 100644
index 00000000000..edb8a842ebc
--- /dev/null
+++ b/shaders/vert/row-major-workaround.vert
@@ -0,0 +1,28 @@
+#version 310 es
+
+layout(binding = 0) uniform Buffer
+{
+	layout(row_major) highp mat4 HP;
+	layout(row_major) mediump mat4 MP;
+};
+
+layout(binding = 1) uniform Buffer2
+{
+	layout(row_major) mediump mat4 MP2;
+};
+
+
+layout(location = 0) in vec4 Hin;
+layout(location = 1) in mediump vec4 Min;
+layout(location = 0) out vec4 H;
+layout(location = 1) out mediump vec4 M;
+layout(location = 2) out mediump vec4 M2;
+
+void main()
+{
+	gl_Position = vec4(1.0);
+	H = HP * Hin;
+	M = MP * Min;
+	M2 = MP2 * Min;
+}
+
diff --git a/shaders/vert/transform-feedback-decorations.vert b/shaders/vert/transform-feedback-decorations.vert
new file mode 100644
index 00000000000..b825dd112d8
--- /dev/null
+++ b/shaders/vert/transform-feedback-decorations.vert
@@ -0,0 +1,20 @@
+#version 450
+layout(xfb_stride = 32, xfb_offset = 16, xfb_buffer = 2, location = 0) out vec4 vFoo;
+
+layout(xfb_buffer = 1, xfb_stride = 20) out gl_PerVertex
+{
+	layout(xfb_offset = 4) vec4 gl_Position;
+	float gl_PointSize;
+};
+
+layout(xfb_buffer = 3) out VertOut
+{
+	layout(xfb_stride = 16, xfb_offset = 0, location = 1) vec4 vBar;
+};
+
+void main()
+{
+	gl_Position = vec4(1.0);
+	vFoo = vec4(3.0);
+	vBar = vec4(5.0);
+}
diff --git a/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp b/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp
index a1da941fdb1..f0421b292f5 100644
--- a/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp
+++ b/shaders/vulkan/comp/array-of-buffer-reference.nocompat.vk.comp
@@ -2,7 +2,7 @@
 #extension GL_EXT_buffer_reference : require
 layout(local_size_x = 1) in;
 
-layout(buffer_reference) buffer Block
+layout(buffer_reference, buffer_reference_align = 4) buffer Block
 {
 	float v;
 };
diff --git a/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp b/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp
new file mode 100644
index 00000000000..cf0a8e49331
--- /dev/null
+++ b/shaders/vulkan/comp/buffer-reference-atomic.nocompat.vk.comp
@@ -0,0 +1,24 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+
+layout(buffer_reference) buffer Foo
+{
+	uint v;
+};
+
+layout(buffer_reference, buffer_reference_align = 8) buffer Bar
+{
+	uint a;
+	uint b;
+	Foo foo;
+};
+
+layout(push_constant) uniform Push
+{
+	Bar bar;
+};
+
+void main()
+{
+	atomicAdd(bar.b, 1u);
+}
diff --git a/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp b/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp
new file mode 100644
index 00000000000..41b44519dde
--- /dev/null
+++ b/shaders/vulkan/comp/buffer-reference-base-alignment-promote.nocompat.vk.comp
@@ -0,0 +1,25 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+
+layout(buffer_reference) buffer Foo
+{
+	uint v;
+};
+
+layout(buffer_reference, buffer_reference_align = 8) buffer Bar
+{
+	uint a;
+	uint b;
+	Foo foo;
+};
+
+layout(push_constant) uniform Push
+{
+	Bar bar;
+};
+
+void main()
+{
+	uint v = bar.b;
+	atomicAdd(bar.a, v);
+}
diff --git a/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp b/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp
new file mode 100644
index 00000000000..1afb6f772e2
--- /dev/null
+++ b/shaders/vulkan/comp/buffer-reference-bitcast-uvec2-2.nocompat.invalid.vk.comp
@@ -0,0 +1,19 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference_uvec2 : require
+
+layout(buffer_reference) buffer PtrInt
+{
+	int value;
+};
+
+layout(set = 0, binding = 0) buffer Buf
+{
+	uvec2 ptr;
+	PtrInt ptrint;
+};
+
+void main()
+{
+	ptr = uvec2(ptrint);
+}
diff --git a/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp b/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp
new file mode 100644
index 00000000000..b3880823682
--- /dev/null
+++ b/shaders/vulkan/comp/buffer-reference-bitcast-uvec2.nocompat.vk.comp
@@ -0,0 +1,18 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference_uvec2 : require
+
+layout(buffer_reference) buffer PtrInt
+{
+	int value;
+};
+
+layout(set = 0, binding = 0) buffer Buf
+{
+	uvec2 ptr;
+};
+
+void main()
+{
+	PtrInt(ptr).value = 10;
+}
diff --git a/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp b/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp
new file mode 100644
index 00000000000..c65463168a1
--- /dev/null
+++ b/shaders/vulkan/comp/buffer-reference-decorations.nocompat.vk.comp
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_EXT_buffer_reference : require
+layout(local_size_x = 64) in;
+
+layout(std430, buffer_reference) readonly buffer RO
+{
+	vec4 v[];
+};
+
+layout(std430, buffer_reference) restrict buffer RW
+{
+	vec4 v[];
+};
+
+layout(std430, buffer_reference) coherent writeonly buffer WO
+{
+	vec4 v[];
+};
+
+layout(push_constant, std430) uniform Registers
+{
+	RO ro;
+	RW rw;
+	WO wo;
+} registers;
+
+void main()
+{
+	registers.rw.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x];
+	registers.wo.v[gl_GlobalInvocationID.x] = registers.ro.v[gl_GlobalInvocationID.x];
+}
diff --git a/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp b/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp
new file mode 100644
index 00000000000..987cb878e79
--- /dev/null
+++ b/shaders/vulkan/comp/fp-atomic.nocompat.vk.comp
@@ -0,0 +1,18 @@
+#version 450
+#extension GL_EXT_shader_atomic_float : require
+
+shared float shared_v;
+layout(set = 0, binding = 0, r32f) uniform image2D uImage;
+
+layout(set = 0, binding = 1) buffer SSBO
+{
+	float v;
+};
+
+void main()
+{
+	float value = atomicAdd(shared_v, 2.0);
+	atomicAdd(v, value);
+	imageAtomicAdd(uImage, ivec2(gl_GlobalInvocationID.xy), value);
+	value = imageAtomicExchange(uImage, ivec2(gl_GlobalInvocationID.xy), value);
+}
diff --git a/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp b/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp
new file mode 100644
index 00000000000..c7dc397e35f
--- /dev/null
+++ b/shaders/vulkan/comp/ray-query.nocompat.spv14.invalid.vk.comp
@@ -0,0 +1,58 @@
+#version 460
+#extension GL_EXT_ray_query : require
+#extension GL_EXT_ray_tracing : require
+#extension GL_EXT_ray_flags_primitive_culling : require
+layout(primitive_culling);
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT AS;
+
+layout(set = 0, binding = 1) uniform Params
+{
+	uint ray_flags;
+	uint cull_mask;
+	vec3 origin;
+	float tmin;
+	vec3 dir;
+	float tmax;
+	float thit;
+	uvec2 bda;
+};
+
+rayQueryEXT q2[2];
+
+void main()
+{
+	rayQueryEXT q;
+	bool res;
+	uint type;
+	float fval;
+	vec3 fvals;
+	int ival;
+	mat4x3 matrices;
+
+	rayQueryInitializeEXT(q, AS, ray_flags, cull_mask, origin, tmin, dir, tmax);
+	rayQueryInitializeEXT(q2[1], accelerationStructureEXT(bda), ray_flags, cull_mask, origin, tmin, dir, tmax);
+
+	res = rayQueryProceedEXT(q);
+	rayQueryTerminateEXT(q2[0]);
+	rayQueryGenerateIntersectionEXT(q, thit);
+	rayQueryConfirmIntersectionEXT(q2[1]);
+	fval = rayQueryGetRayTMinEXT(q);
+	type = rayQueryGetRayFlagsEXT(q2[0]);
+	fvals = rayQueryGetWorldRayDirectionEXT(q);
+	fvals = rayQueryGetWorldRayOriginEXT(q);
+	type = rayQueryGetIntersectionTypeEXT(q2[1], true);
+	res = rayQueryGetIntersectionCandidateAABBOpaqueEXT(q2[1]);
+	fval = rayQueryGetIntersectionTEXT(q2[1], false);
+	ival = rayQueryGetIntersectionInstanceCustomIndexEXT(q, true);
+	ival = rayQueryGetIntersectionInstanceIdEXT(q2[0], false);
+	type = rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(q, true);
+	ival = rayQueryGetIntersectionGeometryIndexEXT(q2[1], false);
+	ival = rayQueryGetIntersectionPrimitiveIndexEXT(q, true);
+	fvals.xy = rayQueryGetIntersectionBarycentricsEXT(q2[0], false);
+	res = rayQueryGetIntersectionFrontFaceEXT(q, true);
+	fvals = rayQueryGetIntersectionObjectRayDirectionEXT(q, false);
+	fvals = rayQueryGetIntersectionObjectRayOriginEXT(q2[0], true);
+	matrices = rayQueryGetIntersectionObjectToWorldEXT(q, false);
+	matrices = rayQueryGetIntersectionWorldToObjectEXT(q2[1], true);
+}
diff --git a/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag b/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag
new file mode 100644
index 00000000000..ba57b8c5afa
--- /dev/null
+++ b/shaders/vulkan/frag/demote-to-helper-forwarding.asm.vk.nocompat.frag
@@ -0,0 +1,41 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 19
+; Schema: 0
+               OpCapability Shader
+               OpCapability DemoteToHelperInvocationEXT
+               OpExtension "SPV_EXT_demote_to_helper_invocation"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_demote_to_helper_invocation"
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpDecorate %FragColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %bool = OpTypeBool
+%_ptr_Function_bool = OpTypePointer Function %bool
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+    %float_1 = OpConstant %float 1
+    %float_0 = OpConstant %float 0
+         %19 = OpConstantComposite %v4float %float_1 %float_0 %float_0 %float_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %9 = OpIsHelperInvocationEXT %bool
+               OpDemoteToHelperInvocationEXT
+         %10 = OpLogicalNot %bool %9
+               OpSelectionMerge %12 None
+               OpBranchConditional %10 %11 %12
+         %11 = OpLabel
+               OpStore %FragColor %19
+               OpBranch %12
+         %12 = OpLabel
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag b/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag
new file mode 100644
index 00000000000..8b8bb61ff7b
--- /dev/null
+++ b/shaders/vulkan/frag/demote-to-helper.vk.nocompat.frag
@@ -0,0 +1,8 @@
+#version 450
+#extension GL_EXT_demote_to_helper_invocation : require
+
+void main()
+{
+	demote;
+	bool helper = helperInvocationEXT();
+}
diff --git a/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag b/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag
index f59b07c07e7..136133eb300 100644
--- a/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag
+++ b/shaders/vulkan/frag/nonuniform-qualifier.vk.nocompat.frag
@@ -1,28 +1,52 @@
 #version 450
 #extension GL_EXT_nonuniform_qualifier : require
+#extension GL_EXT_samplerless_texture_functions : require
 
 layout(binding = 0) uniform texture2D uSamplers[];
+layout(binding = 0) uniform texture2DMS uSamplersMS[];
 layout(binding = 4) uniform sampler2D uCombinedSamplers[];
 layout(binding = 1) uniform sampler uSamps[];
 layout(location = 0) flat in int vIndex;
 layout(location = 1) in vec2 vUV;
 layout(location = 0) out vec4 FragColor;
 
+layout(r32f, binding = 5) uniform image2D uImages[];
+layout(r32ui, binding = 5) uniform uimage2D uImagesU32[];
+
 layout(set = 0, binding = 2) uniform UBO 
 {
 	vec4 v[64];
 } ubos[];
 
-layout(set = 0, binding = 3) readonly buffer SSBO
+layout(set = 0, binding = 3) buffer SSBO
 {
+	uint counter;
 	vec4 v[];
 } ssbos[];
 
 void main()
 {
 	int i = vIndex;
-	FragColor = texture(sampler2D(uSamplers[nonuniformEXT(i + 10)], uSamps[nonuniformEXT(i + 40)]), vUV);
+	FragColor = texture(nonuniformEXT(sampler2D(uSamplers[i + 10], uSamps[i + 40])), vUV);
 	FragColor = texture(uCombinedSamplers[nonuniformEXT(i + 10)], vUV);
 	FragColor += ubos[nonuniformEXT(i + 20)].v[nonuniformEXT(i + 40)];
 	FragColor += ssbos[nonuniformEXT(i + 50)].v[nonuniformEXT(i + 60)];
+	ssbos[nonuniformEXT(i + 60)].v[nonuniformEXT(i + 70)] = vec4(20.0);
+
+	FragColor = texelFetch(uSamplers[nonuniformEXT(i + 10)], ivec2(vUV), 0);
+	atomicAdd(ssbos[nonuniformEXT(i + 100)].counter, 100u);
+
+	vec2 queried = textureQueryLod(nonuniformEXT(sampler2D(uSamplers[i + 10], uSamps[i + 40])), vUV);
+	queried += textureQueryLod(uCombinedSamplers[nonuniformEXT(i + 10)], vUV);
+	FragColor.xy += queried;
+
+	FragColor.x += float(textureQueryLevels(uSamplers[nonuniformEXT(i + 20)]));
+	FragColor.y += float(textureSamples(uSamplersMS[nonuniformEXT(i + 20)]));
+	FragColor.xy += vec2(textureSize(uSamplers[nonuniformEXT(i + 20)], 0));
+
+	FragColor += imageLoad(uImages[nonuniformEXT(i + 50)], ivec2(vUV));
+	FragColor.xy += vec2(imageSize(uImages[nonuniformEXT(i + 20)]));
+	imageStore(uImages[nonuniformEXT(i + 60)], ivec2(vUV), vec4(50.0));
+
+	imageAtomicAdd(uImagesU32[nonuniformEXT(i + 70)], ivec2(vUV), 40u);
 }
diff --git a/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit b/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit
new file mode 100644
index 00000000000..dab437780d0
--- /dev/null
+++ b/shaders/vulkan/rahit/terminators.khr.spv14.nocompat.vk.rahit
@@ -0,0 +1,17 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+rayPayloadInEXT float payload;
+
+void in_func()
+{
+	if (payload > 0.0)
+		ignoreIntersectionEXT;
+	else
+		terminateRayEXT;
+}
+
+void main()
+{
+	in_func();
+}
diff --git a/shaders/vulkan/rahit/terminators.nocompat.vk.rahit b/shaders/vulkan/rahit/terminators.nocompat.vk.rahit
new file mode 100644
index 00000000000..943be33c571
--- /dev/null
+++ b/shaders/vulkan/rahit/terminators.nocompat.vk.rahit
@@ -0,0 +1,17 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+rayPayloadInNV float payload;
+
+void in_func()
+{
+	if (payload > 0.0)
+		ignoreIntersectionNV();
+	else
+		terminateRayNV();
+}
+
+void main()
+{
+	in_func();
+}
diff --git a/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall b/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall
new file mode 100644
index 00000000000..53c594b6a13
--- /dev/null
+++ b/shaders/vulkan/rcall/incoming-callable.khr.spv14.nocompat.vk.rcall
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 4) callableDataInEXT float c;
+
+void main()
+{
+	executeCallableEXT(10, 4);
+}
diff --git a/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..fd82f5bf93e
--- /dev/null
+++ b/shaders/vulkan/rchit/hit_attribute_block.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,11 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT Foo { float a; float b; } payload;
+hitAttributeEXT Foo2 { float a; float b; } hit;
+
+void main()
+{
+	payload.a = hit.a;
+	payload.b = hit.b;
+}
diff --git a/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit
new file mode 100644
index 00000000000..8d367e3b87f
--- /dev/null
+++ b/shaders/vulkan/rchit/hit_attribute_block.nocompat.vk.rchit
@@ -0,0 +1,11 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV Foo { float a; float b; } payload;
+hitAttributeNV Foo2 { float a; float b; } hit;
+
+void main()
+{
+	payload.a = hit.a;
+	payload.b = hit.b;
+}
diff --git a/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..7b8d53dd8bf
--- /dev/null
+++ b/shaders/vulkan/rchit/hit_attribute_block_in_function.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,16 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT Foo { float a; float b; } payload;
+hitAttributeEXT Foo2 { float a; float b; } hit;
+
+void in_function()
+{
+	payload.a = hit.a;
+	payload.b = hit.b;
+}
+
+void main()
+{
+	in_function();
+}
diff --git a/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit
new file mode 100644
index 00000000000..23a5c6519d0
--- /dev/null
+++ b/shaders/vulkan/rchit/hit_attribute_block_in_function.nocompat.vk.rchit
@@ -0,0 +1,16 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV Foo { float a; float b; } payload;
+hitAttributeNV Foo2 { float a; float b; } hit;
+
+void in_function()
+{
+	payload.a = hit.a;
+	payload.b = hit.b;
+}
+
+void main()
+{
+	in_function();
+}
diff --git a/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..145687739ff
--- /dev/null
+++ b/shaders/vulkan/rchit/hit_attribute_plain.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec2 payload;
+hitAttributeEXT vec2 hit;
+
+void main()
+{
+	payload = hit;
+}
diff --git a/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit
new file mode 100644
index 00000000000..cdbda9cb8a2
--- /dev/null
+++ b/shaders/vulkan/rchit/hit_attribute_plain.nocompat.vk.rchit
@@ -0,0 +1,10 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec2 payload;
+hitAttributeNV vec2 hit;
+
+void main()
+{
+	payload = hit;
+}
diff --git a/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..f391f1ebfcc
--- /dev/null
+++ b/shaders/vulkan/rchit/hit_attribute_struct.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,12 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+struct Foo { float a; float b; };
+
+layout(location = 0) rayPayloadInEXT Foo payload;
+hitAttributeEXT Foo hit;
+
+void main()
+{
+	payload = hit;
+}
diff --git a/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit
new file mode 100644
index 00000000000..625e125bc90
--- /dev/null
+++ b/shaders/vulkan/rchit/hit_attribute_struct.nocompat.vk.rchit
@@ -0,0 +1,12 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+struct Foo { float a; float b; };
+
+layout(location = 0) rayPayloadInNV Foo payload;
+hitAttributeNV Foo hit;
+
+void main()
+{
+	payload = hit;
+}
diff --git a/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..1bf150000a2
--- /dev/null
+++ b/shaders/vulkan/rchit/hit_kind.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+	payload = gl_HitKindEXT;
+}
diff --git a/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit
new file mode 100644
index 00000000000..39a088f46a2
--- /dev/null
+++ b/shaders/vulkan/rchit/hit_kind.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+	payload = gl_HitKindNV;
+}
diff --git a/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..5b4fc4dd506
--- /dev/null
+++ b/shaders/vulkan/rchit/hit_t.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+	payload = gl_HitTEXT;
+}
diff --git a/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit b/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit
new file mode 100644
index 00000000000..16d6f06ea98
--- /dev/null
+++ b/shaders/vulkan/rchit/hit_t.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV float payload;
+
+void main()
+{
+	payload = gl_HitTNV;
+}
diff --git a/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..2c286465541
--- /dev/null
+++ b/shaders/vulkan/rchit/incoming_ray_flags.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+	payload = gl_IncomingRayFlagsEXT;
+}
diff --git a/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit b/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit
new file mode 100644
index 00000000000..a1726d0ef45
--- /dev/null
+++ b/shaders/vulkan/rchit/incoming_ray_flags.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+	payload = gl_IncomingRayFlagsNV;
+}
diff --git a/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..330dfcebb5e
--- /dev/null
+++ b/shaders/vulkan/rchit/instance_custom_id.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+	payload = gl_InstanceCustomIndexEXT;
+}
diff --git a/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit b/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit
new file mode 100644
index 00000000000..02ae3430926
--- /dev/null
+++ b/shaders/vulkan/rchit/instance_custom_id.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+	payload = gl_InstanceCustomIndexNV;
+}
diff --git a/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..23d1e02c75a
--- /dev/null
+++ b/shaders/vulkan/rchit/instance_id.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+	payload = gl_InstanceID;
+}
diff --git a/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit b/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit
new file mode 100644
index 00000000000..d6f99668117
--- /dev/null
+++ b/shaders/vulkan/rchit/instance_id.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+	payload = gl_InstanceID;
+}
diff --git a/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..4d45134cb97
--- /dev/null
+++ b/shaders/vulkan/rchit/object_ray_direction.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+	payload = gl_ObjectRayDirectionEXT;
+}
diff --git a/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit b/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit
new file mode 100644
index 00000000000..257175b510f
--- /dev/null
+++ b/shaders/vulkan/rchit/object_ray_direction.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+	payload = gl_ObjectRayDirectionNV;
+}
diff --git a/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..0964b7b5668
--- /dev/null
+++ b/shaders/vulkan/rchit/object_ray_origin.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+	payload = gl_ObjectRayOriginEXT;
+}
diff --git a/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit b/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit
new file mode 100644
index 00000000000..8b71e7d9c4b
--- /dev/null
+++ b/shaders/vulkan/rchit/object_ray_origin.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+	payload = gl_ObjectRayOriginNV;
+}
diff --git a/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..4377443c2ba
--- /dev/null
+++ b/shaders/vulkan/rchit/object_to_world.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+	payload = gl_ObjectToWorldEXT * vec4(payload, 1.0);
+}
diff --git a/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit b/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit
new file mode 100644
index 00000000000..53b1406fe71
--- /dev/null
+++ b/shaders/vulkan/rchit/object_to_world.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+	payload = gl_ObjectToWorldNV * vec4(payload, 1.0);
+}
diff --git a/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..12ed5a7d62d
--- /dev/null
+++ b/shaders/vulkan/rchit/payloads.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,19 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+struct Payload
+{
+	vec4 a;	
+};
+
+layout(location = 0) rayPayloadInEXT Payload payload;
+
+void write_incoming_payload_in_function()
+{
+	payload.a = vec4(10.0);
+}
+
+void main()
+{
+	write_incoming_payload_in_function();
+}
diff --git a/shaders/vulkan/rchit/payloads.nocompat.vk.rchit b/shaders/vulkan/rchit/payloads.nocompat.vk.rchit
new file mode 100644
index 00000000000..61a86663b4a
--- /dev/null
+++ b/shaders/vulkan/rchit/payloads.nocompat.vk.rchit
@@ -0,0 +1,19 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+struct Payload
+{
+	vec4 a;	
+};
+
+layout(location = 0) rayPayloadInNV Payload payload;
+
+void write_incoming_payload_in_function()
+{
+	payload.a = vec4(10.0);
+}
+
+void main()
+{
+	write_incoming_payload_in_function();
+}
diff --git a/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..f3798c1f42c
--- /dev/null
+++ b/shaders/vulkan/rchit/primitive_id.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT uint payload;
+
+void main()
+{
+	payload = gl_PrimitiveID;
+}
diff --git a/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit b/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit
new file mode 100644
index 00000000000..fdfa1ffa32b
--- /dev/null
+++ b/shaders/vulkan/rchit/primitive_id.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV uint payload;
+
+void main()
+{
+	payload = gl_PrimitiveID;
+}
diff --git a/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..2f688baf9e9
--- /dev/null
+++ b/shaders/vulkan/rchit/ray_tmax.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+	payload = gl_RayTmaxEXT;
+}
diff --git a/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit b/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit
new file mode 100644
index 00000000000..c0e1387b3c8
--- /dev/null
+++ b/shaders/vulkan/rchit/ray_tmax.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV float payload;
+
+void main()
+{
+	payload = gl_RayTmaxNV;
+}
diff --git a/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..3f8ddf05f6b
--- /dev/null
+++ b/shaders/vulkan/rchit/ray_tmin.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+	payload = gl_RayTminEXT;
+}
diff --git a/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit b/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit
new file mode 100644
index 00000000000..896f4ffb4fb
--- /dev/null
+++ b/shaders/vulkan/rchit/ray_tmin.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV float payload;
+
+void main()
+{
+	payload = gl_RayTminNV;
+}
diff --git a/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..d48995b821e
--- /dev/null
+++ b/shaders/vulkan/rchit/ray_tracing.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+    payload = 1.0 + float(gl_InstanceID);
+}
diff --git a/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit b/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit
index 107f9751849..44c814d708e 100644
--- a/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit
+++ b/shaders/vulkan/rchit/ray_tracing.nocompat.vk.rchit
@@ -5,5 +5,5 @@ layout(location = 0) rayPayloadInNV float payload;
 
 void main()
 {
-    payload = 1.0;
+    payload = 1.0 + float(gl_InstanceID);
 }
diff --git a/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..191905a312e
--- /dev/null
+++ b/shaders/vulkan/rchit/world_ray_direction.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+	payload = gl_WorldRayDirectionEXT;
+}
diff --git a/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit b/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit
new file mode 100644
index 00000000000..43d14f2825f
--- /dev/null
+++ b/shaders/vulkan/rchit/world_ray_direction.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+	payload = gl_WorldRayDirectionNV;
+}
diff --git a/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..2c86a7234c9
--- /dev/null
+++ b/shaders/vulkan/rchit/world_ray_origin.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+	payload = gl_WorldRayOriginEXT;
+}
diff --git a/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit b/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit
new file mode 100644
index 00000000000..8b03e7dedf1
--- /dev/null
+++ b/shaders/vulkan/rchit/world_ray_origin.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+	payload = gl_WorldRayOriginNV;
+}
diff --git a/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit b/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit
new file mode 100644
index 00000000000..eb64bd0b416
--- /dev/null
+++ b/shaders/vulkan/rchit/world_to_object.khr.spv14.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT vec3 payload;
+
+void main()
+{
+	payload = gl_WorldToObjectEXT * vec4(payload, 1.0);
+}
diff --git a/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit b/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit
new file mode 100644
index 00000000000..dc67c4a90fa
--- /dev/null
+++ b/shaders/vulkan/rchit/world_to_object.nocompat.vk.rchit
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(location = 0) rayPayloadInNV vec3 payload;
+
+void main()
+{
+	payload = gl_WorldToObjectNV * vec4(payload, 1.0);
+}
diff --git a/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen b/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen
new file mode 100644
index 00000000000..8d6f1f4a933
--- /dev/null
+++ b/shaders/vulkan/rgen/convert-u-to-as.spv14.vk.nocompat.rgen
@@ -0,0 +1,16 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 3) rayPayloadEXT vec4 payload;
+
+layout(push_constant) uniform Registers
+{
+	uvec2 ptr;
+};
+
+void main()
+{
+    vec3 origin = vec3(0.0);
+    vec3 direction = vec3(0.0, 0.0, -1.0);
+    traceRayEXT(accelerationStructureEXT(ptr), gl_RayFlagsOpaqueEXT, 0xFF, 0u, 0u, 0u, origin, 0.0, direction, 100.0f, 3);
+}
diff --git a/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen b/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen
new file mode 100644
index 00000000000..9a4380e0a6e
--- /dev/null
+++ b/shaders/vulkan/rgen/execute_callable.nocompat.khr.spv14.vk.rgen
@@ -0,0 +1,16 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT as;
+layout(set = 0, binding = 1, rgba32f) uniform writeonly image2D image;
+layout(location = 3) rayPayloadEXT vec4 payload;
+layout(location = 4) callableDataEXT float blend;
+
+void main()
+{
+    vec3 origin = vec3(0.0);
+    vec3 direction = vec3(0.0, 0.0, -1.0);
+    traceRayEXT(as, gl_RayFlagsOpaqueEXT, 0xFF, 0u, 0u, 0u, origin, 0.0, direction, 100.0f, 3);
+    executeCallableEXT(0u, 4);
+    imageStore(image, ivec2(gl_LaunchIDEXT.xy), payload + vec4(blend));
+}
diff --git a/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen b/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen
new file mode 100644
index 00000000000..6f9983e97f6
--- /dev/null
+++ b/shaders/vulkan/rgen/launch_id.khr.spv14.nocompat.vk.rgen
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform writeonly image2D uImage;
+
+void main()
+{
+	imageStore(uImage, ivec2(gl_LaunchIDEXT.xy), vec4(1.0));
+}
diff --git a/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen b/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen
new file mode 100644
index 00000000000..b89792e3628
--- /dev/null
+++ b/shaders/vulkan/rgen/launch_id.nocompat.vk.rgen
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform writeonly image2D uImage;
+
+void main()
+{
+	imageStore(uImage, ivec2(gl_LaunchIDNV.xy), vec4(1.0));
+}
diff --git a/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen b/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen
new file mode 100644
index 00000000000..955f57dc404
--- /dev/null
+++ b/shaders/vulkan/rgen/launch_size.khr.spv14.nocompat.vk.rgen
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform writeonly image2D uImage;
+
+void main()
+{
+	imageStore(uImage, ivec2(gl_LaunchSizeEXT.xy) - 1, vec4(1.0));
+}
diff --git a/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen b/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen
new file mode 100644
index 00000000000..1e1ff55937a
--- /dev/null
+++ b/shaders/vulkan/rgen/launch_size.nocompat.vk.rgen
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform writeonly image2D uImage;
+
+void main()
+{
+	imageStore(uImage, ivec2(gl_LaunchSizeNV.xy) - 1, vec4(1.0));
+}
diff --git a/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen b/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen
new file mode 100644
index 00000000000..b73a2463dec
--- /dev/null
+++ b/shaders/vulkan/rgen/payloads.khr.spv14.nocompat.vk.rgen
@@ -0,0 +1,49 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0, rgba8) uniform image2D image;
+layout(set = 0, binding = 1) uniform accelerationStructureEXT as;
+
+struct Payload
+{
+	float a, b;
+};
+
+// Plain payload
+layout(location = 0) rayPayloadEXT float payload1;
+// Struct payload
+layout(location = 1) rayPayloadEXT Payload payload2;
+
+// This is syntactic sugar with the struct formulation (pretty sure), spec is kinda vague.
+layout(location = 2) rayPayloadEXT Block
+{
+	float a, b;
+	Payload c, d;
+};
+
+vec4 trace_in_function()
+{
+	vec4 result = vec4(0.0);
+	// Test that we can write to a payload in a function.
+	vec3 origin = vec3(1.0, 0.0, 0.0);
+	vec3 direction = vec3(0.0, 1.0, 0.0);
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 1);
+	result += payload2.a;
+	result += payload2.b;
+	return result;
+}
+
+void main()
+{
+	vec3 origin = vec3(1.0, 0.0, 0.0);
+	vec3 direction = vec3(0.0, 1.0, 0.0);
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0);
+	vec4 result = vec4(payload1);
+
+	result += trace_in_function();
+
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 2);
+	result += a + b + c.a + c.b + d.a + d.b;
+
+	imageStore(image, ivec2(gl_LaunchIDEXT.xy), result);
+}
diff --git a/shaders/vulkan/rgen/payloads.nocompat.vk.rgen b/shaders/vulkan/rgen/payloads.nocompat.vk.rgen
new file mode 100644
index 00000000000..11c12d44f69
--- /dev/null
+++ b/shaders/vulkan/rgen/payloads.nocompat.vk.rgen
@@ -0,0 +1,49 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+layout(set = 0, binding = 0, rgba8) uniform image2D image;
+layout(set = 0, binding = 1) uniform accelerationStructureNV as;
+
+struct Payload
+{
+	float a, b;
+};
+
+// Plain payload
+layout(location = 0) rayPayloadNV float payload1;
+// Struct payload
+layout(location = 1) rayPayloadNV Payload payload2;
+
+// This is syntactic sugar with the struct formulation (pretty sure), spec is kinda vague.
+layout(location = 2) rayPayloadNV Block
+{
+	float a, b;
+	Payload c, d;
+};
+
+vec4 trace_in_function()
+{
+	vec4 result = vec4(0.0);
+	// Test that we can write to a payload in a function.
+	vec3 origin = vec3(1.0, 0.0, 0.0);
+	vec3 direction = vec3(0.0, 1.0, 0.0);
+    traceNV(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 1);
+	result += payload2.a;
+	result += payload2.b;
+	return result;
+}
+
+void main()
+{
+	vec3 origin = vec3(1.0, 0.0, 0.0);
+	vec3 direction = vec3(0.0, 1.0, 0.0);
+    traceNV(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0);
+	vec4 result = vec4(payload1);
+
+	result += trace_in_function();
+
+    traceNV(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 2);
+	result += a + b + c.a + c.b + d.a + d.b;
+
+	imageStore(image, ivec2(gl_LaunchIDNV.xy), result);
+}
diff --git a/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen b/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen
new file mode 100644
index 00000000000..6763f55365c
--- /dev/null
+++ b/shaders/vulkan/rgen/pure_call.khr.spv14.nocompat.vk.rgen
@@ -0,0 +1,18 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 1) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadEXT float payload;
+
+float pure_call(vec2 launchID, vec2 launchSize)
+{
+    vec3 origin = vec3(launchID.x / launchSize.x, launchID.y / launchSize.y, 1.0);
+    vec3 direction = vec3(0.0, 0.0, -1.0);
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0);
+    return 0.0;
+}
+
+void main()
+{
+    pure_call(vec2(gl_LaunchIDEXT.xy), vec2(gl_LaunchSizeEXT.xy));
+}
diff --git a/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen b/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen
new file mode 100644
index 00000000000..40f16489010
--- /dev/null
+++ b/shaders/vulkan/rgen/ray_tracing.khr.spv14.nocompat.vk.rgen
@@ -0,0 +1,16 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0, rgba8) uniform image2D image;
+layout(set = 0, binding = 1) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadEXT float payload;
+
+void main()
+{
+    vec4 col = vec4(0.0, 0.0, 0.0, 1.0);
+    vec3 origin = vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0);
+    vec3 direction = vec3(0.0, 0.0, -1.0);
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 0);
+    col.y = payload;
+    imageStore(image, ivec2(gl_LaunchIDEXT.xy), col);
+}
diff --git a/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen b/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen
new file mode 100644
index 00000000000..39c9bf27867
--- /dev/null
+++ b/shaders/vulkan/rgen/shader_record_buffer.khr.spv14.nocompat.vk.rgen
@@ -0,0 +1,16 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(shaderRecordEXT, std430) buffer sbt
+{
+    vec3 direction;
+    float tmax;
+};
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT as;
+layout(location = 0) rayPayloadEXT float payload;
+
+void main()
+{
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, vec3(0.0), 0.0, direction, tmax, 0);
+}
diff --git a/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint b/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint
new file mode 100644
index 00000000000..b930e5c69fa
--- /dev/null
+++ b/shaders/vulkan/rint/report-intersection.khr.spv14.nocompat.vk.rint
@@ -0,0 +1,12 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+void in_func()
+{
+	reportIntersectionEXT(0.5, 10);
+}
+
+void main()
+{
+	in_func();
+}
diff --git a/shaders/vulkan/rint/report-intersection.nocompat.vk.rint b/shaders/vulkan/rint/report-intersection.nocompat.vk.rint
new file mode 100644
index 00000000000..ee384fc8da0
--- /dev/null
+++ b/shaders/vulkan/rint/report-intersection.nocompat.vk.rint
@@ -0,0 +1,12 @@
+#version 460
+#extension GL_NV_ray_tracing : require
+
+void in_func()
+{
+	reportIntersectionNV(0.5, 10);
+}
+
+void main()
+{
+	in_func();
+}
diff --git a/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss b/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss
new file mode 100644
index 00000000000..ee873cb96e5
--- /dev/null
+++ b/shaders/vulkan/rmiss/ray_tracing.khr.spv14.nocompat.vk.rmiss
@@ -0,0 +1,9 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(location = 0) rayPayloadInEXT float payload;
+
+void main()
+{
+    payload = 0.0;
+}
diff --git a/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss b/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss
new file mode 100644
index 00000000000..265a8f8b21a
--- /dev/null
+++ b/shaders/vulkan/rmiss/ray_tracing_trace_incoming.khr.spv14.nocompat.vk.rmiss
@@ -0,0 +1,12 @@
+#version 460
+#extension GL_EXT_ray_tracing : require
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT as;
+layout(location = 3) rayPayloadInEXT float p;
+
+void main()
+{
+    vec3 origin = vec3(float(gl_LaunchIDEXT.x) / float(gl_LaunchSizeEXT.x), float(gl_LaunchIDEXT.y) / float(gl_LaunchSizeEXT.y), 1.0);
+    vec3 direction = vec3(0.0, 0.0, -1.0);
+    traceRayEXT(as, 0u, 255u, 0u, 1u, 0u, origin, 0.0, direction, 1000.0, 3);
+}
diff --git a/shaders/vulkan/vert/device-group.nocompat.vk.vert b/shaders/vulkan/vert/device-group.nocompat.vk.vert
new file mode 100644
index 00000000000..16ed51b15ef
--- /dev/null
+++ b/shaders/vulkan/vert/device-group.nocompat.vk.vert
@@ -0,0 +1,7 @@
+#version 450 core
+#extension GL_EXT_device_group : require
+
+void main()
+{
+	gl_Position = vec4(gl_DeviceIndex);
+}
diff --git a/spirv.h b/spirv.h
index 8da27dd2638..5b6e8aaf475 100644
--- a/spirv.h
+++ b/spirv.h
@@ -1,5 +1,5 @@
 /*
-** Copyright (c) 2014-2019 The Khronos Group Inc.
+** Copyright (c) 2014-2020 The Khronos Group Inc.
 ** 
 ** Permission is hereby granted, free of charge, to any person obtaining a copy
 ** of this software and/or associated documentation files (the "Materials"),
@@ -31,7 +31,7 @@
 
 /*
 ** Enumeration tokens for SPIR-V, in various styles:
-**   C, C++, C++11, JSON, Lua, Python, C#, D
+**   C, C++, C++11, JSON, Lua, Python, C#, D, Beef
 ** 
 ** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL
 ** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL
@@ -41,6 +41,8 @@
 ** - C# will use enum classes in the Specification class located in the "Spv" namespace,
 **     e.g.: Spv.Specification.SourceLanguage.GLSL
 ** - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL
+** - Beef will use enum classes in the Specification class located in the "Spv" namespace,
+**     e.g.: Spv.Specification.SourceLanguage.GLSL
 ** 
 ** Some tokens act like mask values, which can be OR'd together,
 ** while others are mutually exclusive.  The mask-like ones have
@@ -53,12 +55,12 @@
 
 typedef unsigned int SpvId;
 
-#define SPV_VERSION 0x10300
-#define SPV_REVISION 6
+#define SPV_VERSION 0x10600
+#define SPV_REVISION 1
 
 static const unsigned int SpvMagicNumber = 0x07230203;
-static const unsigned int SpvVersion = 0x00010300;
-static const unsigned int SpvRevision = 6;
+static const unsigned int SpvVersion = 0x00010600;
+static const unsigned int SpvRevision = 1;
 static const unsigned int SpvOpCodeMask = 0xffff;
 static const unsigned int SpvWordCountShift = 16;
 
@@ -69,6 +71,8 @@ typedef enum SpvSourceLanguage_ {
     SpvSourceLanguageOpenCL_C = 3,
     SpvSourceLanguageOpenCL_CPP = 4,
     SpvSourceLanguageHLSL = 5,
+    SpvSourceLanguageCPP_for_OpenCL = 6,
+    SpvSourceLanguageSYCL = 7,
     SpvSourceLanguageMax = 0x7fffffff,
 } SpvSourceLanguage;
 
@@ -82,12 +86,20 @@ typedef enum SpvExecutionModel_ {
     SpvExecutionModelKernel = 6,
     SpvExecutionModelTaskNV = 5267,
     SpvExecutionModelMeshNV = 5268,
+    SpvExecutionModelRayGenerationKHR = 5313,
     SpvExecutionModelRayGenerationNV = 5313,
+    SpvExecutionModelIntersectionKHR = 5314,
     SpvExecutionModelIntersectionNV = 5314,
+    SpvExecutionModelAnyHitKHR = 5315,
     SpvExecutionModelAnyHitNV = 5315,
+    SpvExecutionModelClosestHitKHR = 5316,
     SpvExecutionModelClosestHitNV = 5316,
+    SpvExecutionModelMissKHR = 5317,
     SpvExecutionModelMissNV = 5317,
+    SpvExecutionModelCallableKHR = 5318,
     SpvExecutionModelCallableNV = 5318,
+    SpvExecutionModelTaskEXT = 5364,
+    SpvExecutionModelMeshEXT = 5365,
     SpvExecutionModelMax = 0x7fffffff,
 } SpvExecutionModel;
 
@@ -95,6 +107,7 @@ typedef enum SpvAddressingModel_ {
     SpvAddressingModelLogical = 0,
     SpvAddressingModelPhysical32 = 1,
     SpvAddressingModelPhysical64 = 2,
+    SpvAddressingModelPhysicalStorageBuffer64 = 5348,
     SpvAddressingModelPhysicalStorageBuffer64EXT = 5348,
     SpvAddressingModelMax = 0x7fffffff,
 } SpvAddressingModel;
@@ -103,6 +116,7 @@ typedef enum SpvMemoryModel_ {
     SpvMemoryModelSimple = 0,
     SpvMemoryModelGLSL450 = 1,
     SpvMemoryModelOpenCL = 2,
+    SpvMemoryModelVulkan = 3,
     SpvMemoryModelVulkanKHR = 3,
     SpvMemoryModelMax = 0x7fffffff,
 } SpvMemoryModel;
@@ -146,18 +160,46 @@ typedef enum SpvExecutionMode_ {
     SpvExecutionModeSubgroupsPerWorkgroupId = 37,
     SpvExecutionModeLocalSizeId = 38,
     SpvExecutionModeLocalSizeHintId = 39,
+    SpvExecutionModeSubgroupUniformControlFlowKHR = 4421,
     SpvExecutionModePostDepthCoverage = 4446,
     SpvExecutionModeDenormPreserve = 4459,
     SpvExecutionModeDenormFlushToZero = 4460,
     SpvExecutionModeSignedZeroInfNanPreserve = 4461,
     SpvExecutionModeRoundingModeRTE = 4462,
     SpvExecutionModeRoundingModeRTZ = 4463,
+    SpvExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
     SpvExecutionModeStencilRefReplacingEXT = 5027,
+    SpvExecutionModeStencilRefUnchangedFrontAMD = 5079,
+    SpvExecutionModeStencilRefGreaterFrontAMD = 5080,
+    SpvExecutionModeStencilRefLessFrontAMD = 5081,
+    SpvExecutionModeStencilRefUnchangedBackAMD = 5082,
+    SpvExecutionModeStencilRefGreaterBackAMD = 5083,
+    SpvExecutionModeStencilRefLessBackAMD = 5084,
+    SpvExecutionModeOutputLinesEXT = 5269,
     SpvExecutionModeOutputLinesNV = 5269,
+    SpvExecutionModeOutputPrimitivesEXT = 5270,
     SpvExecutionModeOutputPrimitivesNV = 5270,
     SpvExecutionModeDerivativeGroupQuadsNV = 5289,
     SpvExecutionModeDerivativeGroupLinearNV = 5290,
+    SpvExecutionModeOutputTrianglesEXT = 5298,
     SpvExecutionModeOutputTrianglesNV = 5298,
+    SpvExecutionModePixelInterlockOrderedEXT = 5366,
+    SpvExecutionModePixelInterlockUnorderedEXT = 5367,
+    SpvExecutionModeSampleInterlockOrderedEXT = 5368,
+    SpvExecutionModeSampleInterlockUnorderedEXT = 5369,
+    SpvExecutionModeShadingRateInterlockOrderedEXT = 5370,
+    SpvExecutionModeShadingRateInterlockUnorderedEXT = 5371,
+    SpvExecutionModeSharedLocalMemorySizeINTEL = 5618,
+    SpvExecutionModeRoundingModeRTPINTEL = 5620,
+    SpvExecutionModeRoundingModeRTNINTEL = 5621,
+    SpvExecutionModeFloatingPointModeALTINTEL = 5622,
+    SpvExecutionModeFloatingPointModeIEEEINTEL = 5623,
+    SpvExecutionModeMaxWorkgroupSizeINTEL = 5893,
+    SpvExecutionModeMaxWorkDimINTEL = 5894,
+    SpvExecutionModeNoGlobalOffsetINTEL = 5895,
+    SpvExecutionModeNumSIMDWorkitemsINTEL = 5896,
+    SpvExecutionModeSchedulerTargetFmaxMhzINTEL = 5903,
+    SpvExecutionModeNamedBarrierCountINTEL = 6417,
     SpvExecutionModeMax = 0x7fffffff,
 } SpvExecutionMode;
 
@@ -175,13 +217,24 @@ typedef enum SpvStorageClass_ {
     SpvStorageClassAtomicCounter = 10,
     SpvStorageClassImage = 11,
     SpvStorageClassStorageBuffer = 12,
+    SpvStorageClassCallableDataKHR = 5328,
     SpvStorageClassCallableDataNV = 5328,
+    SpvStorageClassIncomingCallableDataKHR = 5329,
     SpvStorageClassIncomingCallableDataNV = 5329,
+    SpvStorageClassRayPayloadKHR = 5338,
     SpvStorageClassRayPayloadNV = 5338,
+    SpvStorageClassHitAttributeKHR = 5339,
     SpvStorageClassHitAttributeNV = 5339,
+    SpvStorageClassIncomingRayPayloadKHR = 5342,
     SpvStorageClassIncomingRayPayloadNV = 5342,
+    SpvStorageClassShaderRecordBufferKHR = 5343,
     SpvStorageClassShaderRecordBufferNV = 5343,
+    SpvStorageClassPhysicalStorageBuffer = 5349,
     SpvStorageClassPhysicalStorageBufferEXT = 5349,
+    SpvStorageClassTaskPayloadWorkgroupEXT = 5402,
+    SpvStorageClassCodeSectionINTEL = 5605,
+    SpvStorageClassDeviceOnlyINTEL = 5936,
+    SpvStorageClassHostOnlyINTEL = 5937,
     SpvStorageClassMax = 0x7fffffff,
 } SpvStorageClass;
 
@@ -252,6 +305,8 @@ typedef enum SpvImageFormat_ {
     SpvImageFormatRg8ui = 37,
     SpvImageFormatR16ui = 38,
     SpvImageFormatR8ui = 39,
+    SpvImageFormatR64ui = 40,
+    SpvImageFormatR64i = 41,
     SpvImageFormatMax = 0x7fffffff,
 } SpvImageFormat;
 
@@ -309,10 +364,18 @@ typedef enum SpvImageOperandsShift_ {
     SpvImageOperandsConstOffsetsShift = 5,
     SpvImageOperandsSampleShift = 6,
     SpvImageOperandsMinLodShift = 7,
+    SpvImageOperandsMakeTexelAvailableShift = 8,
     SpvImageOperandsMakeTexelAvailableKHRShift = 8,
+    SpvImageOperandsMakeTexelVisibleShift = 9,
     SpvImageOperandsMakeTexelVisibleKHRShift = 9,
+    SpvImageOperandsNonPrivateTexelShift = 10,
     SpvImageOperandsNonPrivateTexelKHRShift = 10,
+    SpvImageOperandsVolatileTexelShift = 11,
     SpvImageOperandsVolatileTexelKHRShift = 11,
+    SpvImageOperandsSignExtendShift = 12,
+    SpvImageOperandsZeroExtendShift = 13,
+    SpvImageOperandsNontemporalShift = 14,
+    SpvImageOperandsOffsetsShift = 16,
     SpvImageOperandsMax = 0x7fffffff,
 } SpvImageOperandsShift;
 
@@ -326,10 +389,18 @@ typedef enum SpvImageOperandsMask_ {
     SpvImageOperandsConstOffsetsMask = 0x00000020,
     SpvImageOperandsSampleMask = 0x00000040,
     SpvImageOperandsMinLodMask = 0x00000080,
+    SpvImageOperandsMakeTexelAvailableMask = 0x00000100,
     SpvImageOperandsMakeTexelAvailableKHRMask = 0x00000100,
+    SpvImageOperandsMakeTexelVisibleMask = 0x00000200,
     SpvImageOperandsMakeTexelVisibleKHRMask = 0x00000200,
+    SpvImageOperandsNonPrivateTexelMask = 0x00000400,
     SpvImageOperandsNonPrivateTexelKHRMask = 0x00000400,
+    SpvImageOperandsVolatileTexelMask = 0x00000800,
     SpvImageOperandsVolatileTexelKHRMask = 0x00000800,
+    SpvImageOperandsSignExtendMask = 0x00001000,
+    SpvImageOperandsZeroExtendMask = 0x00002000,
+    SpvImageOperandsNontemporalMask = 0x00004000,
+    SpvImageOperandsOffsetsMask = 0x00010000,
 } SpvImageOperandsMask;
 
 typedef enum SpvFPFastMathModeShift_ {
@@ -338,6 +409,8 @@ typedef enum SpvFPFastMathModeShift_ {
     SpvFPFastMathModeNSZShift = 2,
     SpvFPFastMathModeAllowRecipShift = 3,
     SpvFPFastMathModeFastShift = 4,
+    SpvFPFastMathModeAllowContractFastINTELShift = 16,
+    SpvFPFastMathModeAllowReassocINTELShift = 17,
     SpvFPFastMathModeMax = 0x7fffffff,
 } SpvFPFastMathModeShift;
 
@@ -348,6 +421,8 @@ typedef enum SpvFPFastMathModeMask_ {
     SpvFPFastMathModeNSZMask = 0x00000004,
     SpvFPFastMathModeAllowRecipMask = 0x00000008,
     SpvFPFastMathModeFastMask = 0x00000010,
+    SpvFPFastMathModeAllowContractFastINTELMask = 0x00010000,
+    SpvFPFastMathModeAllowReassocINTELMask = 0x00020000,
 } SpvFPFastMathModeMask;
 
 typedef enum SpvFPRoundingMode_ {
@@ -361,6 +436,7 @@ typedef enum SpvFPRoundingMode_ {
 typedef enum SpvLinkageType_ {
     SpvLinkageTypeExport = 0,
     SpvLinkageTypeImport = 1,
+    SpvLinkageTypeLinkOnceODR = 2,
     SpvLinkageTypeMax = 0x7fffffff,
 } SpvLinkageType;
 
@@ -410,6 +486,7 @@ typedef enum SpvDecoration_ {
     SpvDecorationNonWritable = 24,
     SpvDecorationNonReadable = 25,
     SpvDecorationUniform = 26,
+    SpvDecorationUniformId = 27,
     SpvDecorationSaturatedConversion = 28,
     SpvDecorationStream = 29,
     SpvDecorationLocation = 30,
@@ -437,15 +514,64 @@ typedef enum SpvDecoration_ {
     SpvDecorationPassthroughNV = 5250,
     SpvDecorationViewportRelativeNV = 5252,
     SpvDecorationSecondaryViewportRelativeNV = 5256,
+    SpvDecorationPerPrimitiveEXT = 5271,
     SpvDecorationPerPrimitiveNV = 5271,
     SpvDecorationPerViewNV = 5272,
     SpvDecorationPerTaskNV = 5273,
+    SpvDecorationPerVertexKHR = 5285,
     SpvDecorationPerVertexNV = 5285,
+    SpvDecorationNonUniform = 5300,
     SpvDecorationNonUniformEXT = 5300,
+    SpvDecorationRestrictPointer = 5355,
     SpvDecorationRestrictPointerEXT = 5355,
+    SpvDecorationAliasedPointer = 5356,
     SpvDecorationAliasedPointerEXT = 5356,
+    SpvDecorationBindlessSamplerNV = 5398,
+    SpvDecorationBindlessImageNV = 5399,
+    SpvDecorationBoundSamplerNV = 5400,
+    SpvDecorationBoundImageNV = 5401,
+    SpvDecorationSIMTCallINTEL = 5599,
+    SpvDecorationReferencedIndirectlyINTEL = 5602,
+    SpvDecorationClobberINTEL = 5607,
+    SpvDecorationSideEffectsINTEL = 5608,
+    SpvDecorationVectorComputeVariableINTEL = 5624,
+    SpvDecorationFuncParamIOKindINTEL = 5625,
+    SpvDecorationVectorComputeFunctionINTEL = 5626,
+    SpvDecorationStackCallINTEL = 5627,
+    SpvDecorationGlobalVariableOffsetINTEL = 5628,
+    SpvDecorationCounterBuffer = 5634,
     SpvDecorationHlslCounterBufferGOOGLE = 5634,
     SpvDecorationHlslSemanticGOOGLE = 5635,
+    SpvDecorationUserSemantic = 5635,
+    SpvDecorationUserTypeGOOGLE = 5636,
+    SpvDecorationFunctionRoundingModeINTEL = 5822,
+    SpvDecorationFunctionDenormModeINTEL = 5823,
+    SpvDecorationRegisterINTEL = 5825,
+    SpvDecorationMemoryINTEL = 5826,
+    SpvDecorationNumbanksINTEL = 5827,
+    SpvDecorationBankwidthINTEL = 5828,
+    SpvDecorationMaxPrivateCopiesINTEL = 5829,
+    SpvDecorationSinglepumpINTEL = 5830,
+    SpvDecorationDoublepumpINTEL = 5831,
+    SpvDecorationMaxReplicatesINTEL = 5832,
+    SpvDecorationSimpleDualPortINTEL = 5833,
+    SpvDecorationMergeINTEL = 5834,
+    SpvDecorationBankBitsINTEL = 5835,
+    SpvDecorationForcePow2DepthINTEL = 5836,
+    SpvDecorationBurstCoalesceINTEL = 5899,
+    SpvDecorationCacheSizeINTEL = 5900,
+    SpvDecorationDontStaticallyCoalesceINTEL = 5901,
+    SpvDecorationPrefetchINTEL = 5902,
+    SpvDecorationStallEnableINTEL = 5905,
+    SpvDecorationFuseLoopsInFunctionINTEL = 5907,
+    SpvDecorationAliasScopeINTEL = 5914,
+    SpvDecorationNoAliasINTEL = 5915,
+    SpvDecorationBufferLocationINTEL = 5921,
+    SpvDecorationIOPipeStorageINTEL = 5944,
+    SpvDecorationFunctionFloatingPointModeINTEL = 6080,
+    SpvDecorationSingleElementVectorINTEL = 6085,
+    SpvDecorationVectorComputeCallableFunctionINTEL = 6087,
+    SpvDecorationMediaBlockIOINTEL = 6140,
     SpvDecorationMax = 0x7fffffff,
 } SpvDecoration;
 
@@ -504,8 +630,10 @@ typedef enum SpvBuiltIn_ {
     SpvBuiltInBaseVertex = 4424,
     SpvBuiltInBaseInstance = 4425,
     SpvBuiltInDrawIndex = 4426,
+    SpvBuiltInPrimitiveShadingRateKHR = 4432,
     SpvBuiltInDeviceIndex = 4438,
     SpvBuiltInViewIndex = 4440,
+    SpvBuiltInShadingRateKHR = 4444,
     SpvBuiltInBaryCoordNoPerspAMD = 4992,
     SpvBuiltInBaryCoordNoPerspCentroidAMD = 4993,
     SpvBuiltInBaryCoordNoPerspSampleAMD = 4994,
@@ -528,26 +656,52 @@ typedef enum SpvBuiltIn_ {
     SpvBuiltInLayerPerViewNV = 5279,
     SpvBuiltInMeshViewCountNV = 5280,
     SpvBuiltInMeshViewIndicesNV = 5281,
+    SpvBuiltInBaryCoordKHR = 5286,
     SpvBuiltInBaryCoordNV = 5286,
+    SpvBuiltInBaryCoordNoPerspKHR = 5287,
     SpvBuiltInBaryCoordNoPerspNV = 5287,
     SpvBuiltInFragSizeEXT = 5292,
     SpvBuiltInFragmentSizeNV = 5292,
     SpvBuiltInFragInvocationCountEXT = 5293,
     SpvBuiltInInvocationsPerPixelNV = 5293,
+    SpvBuiltInPrimitivePointIndicesEXT = 5294,
+    SpvBuiltInPrimitiveLineIndicesEXT = 5295,
+    SpvBuiltInPrimitiveTriangleIndicesEXT = 5296,
+    SpvBuiltInCullPrimitiveEXT = 5299,
+    SpvBuiltInLaunchIdKHR = 5319,
     SpvBuiltInLaunchIdNV = 5319,
+    SpvBuiltInLaunchSizeKHR = 5320,
     SpvBuiltInLaunchSizeNV = 5320,
+    SpvBuiltInWorldRayOriginKHR = 5321,
     SpvBuiltInWorldRayOriginNV = 5321,
+    SpvBuiltInWorldRayDirectionKHR = 5322,
     SpvBuiltInWorldRayDirectionNV = 5322,
+    SpvBuiltInObjectRayOriginKHR = 5323,
     SpvBuiltInObjectRayOriginNV = 5323,
+    SpvBuiltInObjectRayDirectionKHR = 5324,
     SpvBuiltInObjectRayDirectionNV = 5324,
+    SpvBuiltInRayTminKHR = 5325,
     SpvBuiltInRayTminNV = 5325,
+    SpvBuiltInRayTmaxKHR = 5326,
     SpvBuiltInRayTmaxNV = 5326,
+    SpvBuiltInInstanceCustomIndexKHR = 5327,
     SpvBuiltInInstanceCustomIndexNV = 5327,
+    SpvBuiltInObjectToWorldKHR = 5330,
     SpvBuiltInObjectToWorldNV = 5330,
+    SpvBuiltInWorldToObjectKHR = 5331,
     SpvBuiltInWorldToObjectNV = 5331,
     SpvBuiltInHitTNV = 5332,
+    SpvBuiltInHitKindKHR = 5333,
     SpvBuiltInHitKindNV = 5333,
+    SpvBuiltInCurrentRayTimeNV = 5334,
+    SpvBuiltInIncomingRayFlagsKHR = 5351,
     SpvBuiltInIncomingRayFlagsNV = 5351,
+    SpvBuiltInRayGeometryIndexKHR = 5352,
+    SpvBuiltInWarpsPerSMNV = 5374,
+    SpvBuiltInSMCountNV = 5375,
+    SpvBuiltInWarpIDNV = 5376,
+    SpvBuiltInSMIDNV = 5377,
+    SpvBuiltInCullMaskKHR = 6021,
     SpvBuiltInMax = 0x7fffffff,
 } SpvBuiltIn;
 
@@ -568,6 +722,19 @@ typedef enum SpvLoopControlShift_ {
     SpvLoopControlDontUnrollShift = 1,
     SpvLoopControlDependencyInfiniteShift = 2,
     SpvLoopControlDependencyLengthShift = 3,
+    SpvLoopControlMinIterationsShift = 4,
+    SpvLoopControlMaxIterationsShift = 5,
+    SpvLoopControlIterationMultipleShift = 6,
+    SpvLoopControlPeelCountShift = 7,
+    SpvLoopControlPartialCountShift = 8,
+    SpvLoopControlInitiationIntervalINTELShift = 16,
+    SpvLoopControlMaxConcurrencyINTELShift = 17,
+    SpvLoopControlDependencyArrayINTELShift = 18,
+    SpvLoopControlPipelineEnableINTELShift = 19,
+    SpvLoopControlLoopCoalesceINTELShift = 20,
+    SpvLoopControlMaxInterleavingINTELShift = 21,
+    SpvLoopControlSpeculatedIterationsINTELShift = 22,
+    SpvLoopControlNoFusionINTELShift = 23,
     SpvLoopControlMax = 0x7fffffff,
 } SpvLoopControlShift;
 
@@ -577,6 +744,19 @@ typedef enum SpvLoopControlMask_ {
     SpvLoopControlDontUnrollMask = 0x00000002,
     SpvLoopControlDependencyInfiniteMask = 0x00000004,
     SpvLoopControlDependencyLengthMask = 0x00000008,
+    SpvLoopControlMinIterationsMask = 0x00000010,
+    SpvLoopControlMaxIterationsMask = 0x00000020,
+    SpvLoopControlIterationMultipleMask = 0x00000040,
+    SpvLoopControlPeelCountMask = 0x00000080,
+    SpvLoopControlPartialCountMask = 0x00000100,
+    SpvLoopControlInitiationIntervalINTELMask = 0x00010000,
+    SpvLoopControlMaxConcurrencyINTELMask = 0x00020000,
+    SpvLoopControlDependencyArrayINTELMask = 0x00040000,
+    SpvLoopControlPipelineEnableINTELMask = 0x00080000,
+    SpvLoopControlLoopCoalesceINTELMask = 0x00100000,
+    SpvLoopControlMaxInterleavingINTELMask = 0x00200000,
+    SpvLoopControlSpeculatedIterationsINTELMask = 0x00400000,
+    SpvLoopControlNoFusionINTELMask = 0x00800000,
 } SpvLoopControlMask;
 
 typedef enum SpvFunctionControlShift_ {
@@ -584,6 +764,7 @@ typedef enum SpvFunctionControlShift_ {
     SpvFunctionControlDontInlineShift = 1,
     SpvFunctionControlPureShift = 2,
     SpvFunctionControlConstShift = 3,
+    SpvFunctionControlOptNoneINTELShift = 16,
     SpvFunctionControlMax = 0x7fffffff,
 } SpvFunctionControlShift;
 
@@ -593,6 +774,7 @@ typedef enum SpvFunctionControlMask_ {
     SpvFunctionControlDontInlineMask = 0x00000002,
     SpvFunctionControlPureMask = 0x00000004,
     SpvFunctionControlConstMask = 0x00000008,
+    SpvFunctionControlOptNoneINTELMask = 0x00010000,
 } SpvFunctionControlMask;
 
 typedef enum SpvMemorySemanticsShift_ {
@@ -606,9 +788,13 @@ typedef enum SpvMemorySemanticsShift_ {
     SpvMemorySemanticsCrossWorkgroupMemoryShift = 9,
     SpvMemorySemanticsAtomicCounterMemoryShift = 10,
     SpvMemorySemanticsImageMemoryShift = 11,
+    SpvMemorySemanticsOutputMemoryShift = 12,
     SpvMemorySemanticsOutputMemoryKHRShift = 12,
+    SpvMemorySemanticsMakeAvailableShift = 13,
     SpvMemorySemanticsMakeAvailableKHRShift = 13,
+    SpvMemorySemanticsMakeVisibleShift = 14,
     SpvMemorySemanticsMakeVisibleKHRShift = 14,
+    SpvMemorySemanticsVolatileShift = 15,
     SpvMemorySemanticsMax = 0x7fffffff,
 } SpvMemorySemanticsShift;
 
@@ -624,18 +810,27 @@ typedef enum SpvMemorySemanticsMask_ {
     SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200,
     SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400,
     SpvMemorySemanticsImageMemoryMask = 0x00000800,
+    SpvMemorySemanticsOutputMemoryMask = 0x00001000,
     SpvMemorySemanticsOutputMemoryKHRMask = 0x00001000,
+    SpvMemorySemanticsMakeAvailableMask = 0x00002000,
     SpvMemorySemanticsMakeAvailableKHRMask = 0x00002000,
+    SpvMemorySemanticsMakeVisibleMask = 0x00004000,
     SpvMemorySemanticsMakeVisibleKHRMask = 0x00004000,
+    SpvMemorySemanticsVolatileMask = 0x00008000,
 } SpvMemorySemanticsMask;
 
 typedef enum SpvMemoryAccessShift_ {
     SpvMemoryAccessVolatileShift = 0,
     SpvMemoryAccessAlignedShift = 1,
     SpvMemoryAccessNontemporalShift = 2,
+    SpvMemoryAccessMakePointerAvailableShift = 3,
     SpvMemoryAccessMakePointerAvailableKHRShift = 3,
+    SpvMemoryAccessMakePointerVisibleShift = 4,
     SpvMemoryAccessMakePointerVisibleKHRShift = 4,
+    SpvMemoryAccessNonPrivatePointerShift = 5,
     SpvMemoryAccessNonPrivatePointerKHRShift = 5,
+    SpvMemoryAccessAliasScopeINTELMaskShift = 16,
+    SpvMemoryAccessNoAliasINTELMaskShift = 17,
     SpvMemoryAccessMax = 0x7fffffff,
 } SpvMemoryAccessShift;
 
@@ -644,9 +839,14 @@ typedef enum SpvMemoryAccessMask_ {
     SpvMemoryAccessVolatileMask = 0x00000001,
     SpvMemoryAccessAlignedMask = 0x00000002,
     SpvMemoryAccessNontemporalMask = 0x00000004,
+    SpvMemoryAccessMakePointerAvailableMask = 0x00000008,
     SpvMemoryAccessMakePointerAvailableKHRMask = 0x00000008,
+    SpvMemoryAccessMakePointerVisibleMask = 0x00000010,
     SpvMemoryAccessMakePointerVisibleKHRMask = 0x00000010,
+    SpvMemoryAccessNonPrivatePointerMask = 0x00000020,
     SpvMemoryAccessNonPrivatePointerKHRMask = 0x00000020,
+    SpvMemoryAccessAliasScopeINTELMaskMask = 0x00010000,
+    SpvMemoryAccessNoAliasINTELMaskMask = 0x00020000,
 } SpvMemoryAccessMask;
 
 typedef enum SpvScope_ {
@@ -655,7 +855,9 @@ typedef enum SpvScope_ {
     SpvScopeWorkgroup = 2,
     SpvScopeSubgroup = 3,
     SpvScopeInvocation = 4,
+    SpvScopeQueueFamily = 5,
     SpvScopeQueueFamilyKHR = 5,
+    SpvScopeShaderCallKHR = 6,
     SpvScopeMax = 0x7fffffff,
 } SpvScope;
 
@@ -755,8 +957,15 @@ typedef enum SpvCapability_ {
     SpvCapabilityGroupNonUniformShuffleRelative = 66,
     SpvCapabilityGroupNonUniformClustered = 67,
     SpvCapabilityGroupNonUniformQuad = 68,
+    SpvCapabilityShaderLayer = 69,
+    SpvCapabilityShaderViewportIndex = 70,
+    SpvCapabilityUniformDecoration = 71,
+    SpvCapabilityFragmentShadingRateKHR = 4422,
     SpvCapabilitySubgroupBallotKHR = 4423,
     SpvCapabilityDrawParameters = 4427,
+    SpvCapabilityWorkgroupMemoryExplicitLayoutKHR = 4428,
+    SpvCapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR = 4429,
+    SpvCapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR = 4430,
     SpvCapabilitySubgroupVoteKHR = 4431,
     SpvCapabilityStorageBuffer16BitAccess = 4433,
     SpvCapabilityStorageUniformBufferBlock16 = 4433,
@@ -778,11 +987,17 @@ typedef enum SpvCapability_ {
     SpvCapabilitySignedZeroInfNanPreserve = 4466,
     SpvCapabilityRoundingModeRTE = 4467,
     SpvCapabilityRoundingModeRTZ = 4468,
+    SpvCapabilityRayQueryProvisionalKHR = 4471,
+    SpvCapabilityRayQueryKHR = 4472,
+    SpvCapabilityRayTraversalPrimitiveCullingKHR = 4478,
+    SpvCapabilityRayTracingKHR = 4479,
     SpvCapabilityFloat16ImageAMD = 5008,
     SpvCapabilityImageGatherBiasLodAMD = 5009,
     SpvCapabilityFragmentMaskAMD = 5010,
     SpvCapabilityStencilExportEXT = 5013,
     SpvCapabilityImageReadWriteLodAMD = 5015,
+    SpvCapabilityInt64ImageEXT = 5016,
+    SpvCapabilityShaderClockKHR = 5055,
     SpvCapabilitySampleMaskOverrideCoverageNV = 5249,
     SpvCapabilityGeometryShaderPassthroughNV = 5251,
     SpvCapabilityShaderViewportIndexLayerEXT = 5254,
@@ -793,36 +1008,217 @@ typedef enum SpvCapability_ {
     SpvCapabilityFragmentFullyCoveredEXT = 5265,
     SpvCapabilityMeshShadingNV = 5266,
     SpvCapabilityImageFootprintNV = 5282,
+    SpvCapabilityMeshShadingEXT = 5283,
+    SpvCapabilityFragmentBarycentricKHR = 5284,
     SpvCapabilityFragmentBarycentricNV = 5284,
     SpvCapabilityComputeDerivativeGroupQuadsNV = 5288,
     SpvCapabilityFragmentDensityEXT = 5291,
     SpvCapabilityShadingRateNV = 5291,
     SpvCapabilityGroupNonUniformPartitionedNV = 5297,
+    SpvCapabilityShaderNonUniform = 5301,
     SpvCapabilityShaderNonUniformEXT = 5301,
+    SpvCapabilityRuntimeDescriptorArray = 5302,
     SpvCapabilityRuntimeDescriptorArrayEXT = 5302,
+    SpvCapabilityInputAttachmentArrayDynamicIndexing = 5303,
     SpvCapabilityInputAttachmentArrayDynamicIndexingEXT = 5303,
+    SpvCapabilityUniformTexelBufferArrayDynamicIndexing = 5304,
     SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304,
+    SpvCapabilityStorageTexelBufferArrayDynamicIndexing = 5305,
     SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305,
+    SpvCapabilityUniformBufferArrayNonUniformIndexing = 5306,
     SpvCapabilityUniformBufferArrayNonUniformIndexingEXT = 5306,
+    SpvCapabilitySampledImageArrayNonUniformIndexing = 5307,
     SpvCapabilitySampledImageArrayNonUniformIndexingEXT = 5307,
+    SpvCapabilityStorageBufferArrayNonUniformIndexing = 5308,
     SpvCapabilityStorageBufferArrayNonUniformIndexingEXT = 5308,
+    SpvCapabilityStorageImageArrayNonUniformIndexing = 5309,
     SpvCapabilityStorageImageArrayNonUniformIndexingEXT = 5309,
+    SpvCapabilityInputAttachmentArrayNonUniformIndexing = 5310,
     SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310,
+    SpvCapabilityUniformTexelBufferArrayNonUniformIndexing = 5311,
     SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311,
+    SpvCapabilityStorageTexelBufferArrayNonUniformIndexing = 5312,
     SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312,
     SpvCapabilityRayTracingNV = 5340,
+    SpvCapabilityRayTracingMotionBlurNV = 5341,
+    SpvCapabilityVulkanMemoryModel = 5345,
     SpvCapabilityVulkanMemoryModelKHR = 5345,
+    SpvCapabilityVulkanMemoryModelDeviceScope = 5346,
     SpvCapabilityVulkanMemoryModelDeviceScopeKHR = 5346,
+    SpvCapabilityPhysicalStorageBufferAddresses = 5347,
     SpvCapabilityPhysicalStorageBufferAddressesEXT = 5347,
     SpvCapabilityComputeDerivativeGroupLinearNV = 5350,
+    SpvCapabilityRayTracingProvisionalKHR = 5353,
     SpvCapabilityCooperativeMatrixNV = 5357,
+    SpvCapabilityFragmentShaderSampleInterlockEXT = 5363,
+    SpvCapabilityFragmentShaderShadingRateInterlockEXT = 5372,
+    SpvCapabilityShaderSMBuiltinsNV = 5373,
+    SpvCapabilityFragmentShaderPixelInterlockEXT = 5378,
+    SpvCapabilityDemoteToHelperInvocation = 5379,
+    SpvCapabilityDemoteToHelperInvocationEXT = 5379,
+    SpvCapabilityBindlessTextureNV = 5390,
     SpvCapabilitySubgroupShuffleINTEL = 5568,
     SpvCapabilitySubgroupBufferBlockIOINTEL = 5569,
     SpvCapabilitySubgroupImageBlockIOINTEL = 5570,
     SpvCapabilitySubgroupImageMediaBlockIOINTEL = 5579,
+    SpvCapabilityRoundToInfinityINTEL = 5582,
+    SpvCapabilityFloatingPointModeINTEL = 5583,
+    SpvCapabilityIntegerFunctions2INTEL = 5584,
+    SpvCapabilityFunctionPointersINTEL = 5603,
+    SpvCapabilityIndirectReferencesINTEL = 5604,
+    SpvCapabilityAsmINTEL = 5606,
+    SpvCapabilityAtomicFloat32MinMaxEXT = 5612,
+    SpvCapabilityAtomicFloat64MinMaxEXT = 5613,
+    SpvCapabilityAtomicFloat16MinMaxEXT = 5616,
+    SpvCapabilityVectorComputeINTEL = 5617,
+    SpvCapabilityVectorAnyINTEL = 5619,
+    SpvCapabilityExpectAssumeKHR = 5629,
+    SpvCapabilitySubgroupAvcMotionEstimationINTEL = 5696,
+    SpvCapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697,
+    SpvCapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698,
+    SpvCapabilityVariableLengthArrayINTEL = 5817,
+    SpvCapabilityFunctionFloatControlINTEL = 5821,
+    SpvCapabilityFPGAMemoryAttributesINTEL = 5824,
+    SpvCapabilityFPFastMathModeINTEL = 5837,
+    SpvCapabilityArbitraryPrecisionIntegersINTEL = 5844,
+    SpvCapabilityArbitraryPrecisionFloatingPointINTEL = 5845,
+    SpvCapabilityUnstructuredLoopControlsINTEL = 5886,
+    SpvCapabilityFPGALoopControlsINTEL = 5888,
+    SpvCapabilityKernelAttributesINTEL = 5892,
+    SpvCapabilityFPGAKernelAttributesINTEL = 5897,
+    SpvCapabilityFPGAMemoryAccessesINTEL = 5898,
+    SpvCapabilityFPGAClusterAttributesINTEL = 5904,
+    SpvCapabilityLoopFuseINTEL = 5906,
+    SpvCapabilityMemoryAccessAliasingINTEL = 5910,
+    SpvCapabilityFPGABufferLocationINTEL = 5920,
+    SpvCapabilityArbitraryPrecisionFixedPointINTEL = 5922,
+    SpvCapabilityUSMStorageClassesINTEL = 5935,
+    SpvCapabilityIOPipesINTEL = 5943,
+    SpvCapabilityBlockingPipesINTEL = 5945,
+    SpvCapabilityFPGARegINTEL = 5948,
+    SpvCapabilityDotProductInputAll = 6016,
+    SpvCapabilityDotProductInputAllKHR = 6016,
+    SpvCapabilityDotProductInput4x8Bit = 6017,
+    SpvCapabilityDotProductInput4x8BitKHR = 6017,
+    SpvCapabilityDotProductInput4x8BitPacked = 6018,
+    SpvCapabilityDotProductInput4x8BitPackedKHR = 6018,
+    SpvCapabilityDotProduct = 6019,
+    SpvCapabilityDotProductKHR = 6019,
+    SpvCapabilityRayCullMaskKHR = 6020,
+    SpvCapabilityBitInstructions = 6025,
+    SpvCapabilityGroupNonUniformRotateKHR = 6026,
+    SpvCapabilityAtomicFloat32AddEXT = 6033,
+    SpvCapabilityAtomicFloat64AddEXT = 6034,
+    SpvCapabilityLongConstantCompositeINTEL = 6089,
+    SpvCapabilityOptNoneINTEL = 6094,
+    SpvCapabilityAtomicFloat16AddEXT = 6095,
+    SpvCapabilityDebugInfoModuleINTEL = 6114,
+    SpvCapabilitySplitBarrierINTEL = 6141,
+    SpvCapabilityGroupUniformArithmeticKHR = 6400,
     SpvCapabilityMax = 0x7fffffff,
 } SpvCapability;
 
+typedef enum SpvRayFlagsShift_ {
+    SpvRayFlagsOpaqueKHRShift = 0,
+    SpvRayFlagsNoOpaqueKHRShift = 1,
+    SpvRayFlagsTerminateOnFirstHitKHRShift = 2,
+    SpvRayFlagsSkipClosestHitShaderKHRShift = 3,
+    SpvRayFlagsCullBackFacingTrianglesKHRShift = 4,
+    SpvRayFlagsCullFrontFacingTrianglesKHRShift = 5,
+    SpvRayFlagsCullOpaqueKHRShift = 6,
+    SpvRayFlagsCullNoOpaqueKHRShift = 7,
+    SpvRayFlagsSkipTrianglesKHRShift = 8,
+    SpvRayFlagsSkipAABBsKHRShift = 9,
+    SpvRayFlagsMax = 0x7fffffff,
+} SpvRayFlagsShift;
+
+typedef enum SpvRayFlagsMask_ {
+    SpvRayFlagsMaskNone = 0,
+    SpvRayFlagsOpaqueKHRMask = 0x00000001,
+    SpvRayFlagsNoOpaqueKHRMask = 0x00000002,
+    SpvRayFlagsTerminateOnFirstHitKHRMask = 0x00000004,
+    SpvRayFlagsSkipClosestHitShaderKHRMask = 0x00000008,
+    SpvRayFlagsCullBackFacingTrianglesKHRMask = 0x00000010,
+    SpvRayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020,
+    SpvRayFlagsCullOpaqueKHRMask = 0x00000040,
+    SpvRayFlagsCullNoOpaqueKHRMask = 0x00000080,
+    SpvRayFlagsSkipTrianglesKHRMask = 0x00000100,
+    SpvRayFlagsSkipAABBsKHRMask = 0x00000200,
+} SpvRayFlagsMask;
+
+typedef enum SpvRayQueryIntersection_ {
+    SpvRayQueryIntersectionRayQueryCandidateIntersectionKHR = 0,
+    SpvRayQueryIntersectionRayQueryCommittedIntersectionKHR = 1,
+    SpvRayQueryIntersectionMax = 0x7fffffff,
+} SpvRayQueryIntersection;
+
+typedef enum SpvRayQueryCommittedIntersectionType_ {
+    SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR = 0,
+    SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR = 1,
+    SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR = 2,
+    SpvRayQueryCommittedIntersectionTypeMax = 0x7fffffff,
+} SpvRayQueryCommittedIntersectionType;
+
+typedef enum SpvRayQueryCandidateIntersectionType_ {
+    SpvRayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR = 0,
+    SpvRayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR = 1,
+    SpvRayQueryCandidateIntersectionTypeMax = 0x7fffffff,
+} SpvRayQueryCandidateIntersectionType;
+
+typedef enum SpvFragmentShadingRateShift_ {
+    SpvFragmentShadingRateVertical2PixelsShift = 0,
+    SpvFragmentShadingRateVertical4PixelsShift = 1,
+    SpvFragmentShadingRateHorizontal2PixelsShift = 2,
+    SpvFragmentShadingRateHorizontal4PixelsShift = 3,
+    SpvFragmentShadingRateMax = 0x7fffffff,
+} SpvFragmentShadingRateShift;
+
+typedef enum SpvFragmentShadingRateMask_ {
+    SpvFragmentShadingRateMaskNone = 0,
+    SpvFragmentShadingRateVertical2PixelsMask = 0x00000001,
+    SpvFragmentShadingRateVertical4PixelsMask = 0x00000002,
+    SpvFragmentShadingRateHorizontal2PixelsMask = 0x00000004,
+    SpvFragmentShadingRateHorizontal4PixelsMask = 0x00000008,
+} SpvFragmentShadingRateMask;
+
+typedef enum SpvFPDenormMode_ {
+    SpvFPDenormModePreserve = 0,
+    SpvFPDenormModeFlushToZero = 1,
+    SpvFPDenormModeMax = 0x7fffffff,
+} SpvFPDenormMode;
+
+typedef enum SpvFPOperationMode_ {
+    SpvFPOperationModeIEEE = 0,
+    SpvFPOperationModeALT = 1,
+    SpvFPOperationModeMax = 0x7fffffff,
+} SpvFPOperationMode;
+
+typedef enum SpvQuantizationModes_ {
+    SpvQuantizationModesTRN = 0,
+    SpvQuantizationModesTRN_ZERO = 1,
+    SpvQuantizationModesRND = 2,
+    SpvQuantizationModesRND_ZERO = 3,
+    SpvQuantizationModesRND_INF = 4,
+    SpvQuantizationModesRND_MIN_INF = 5,
+    SpvQuantizationModesRND_CONV = 6,
+    SpvQuantizationModesRND_CONV_ODD = 7,
+    SpvQuantizationModesMax = 0x7fffffff,
+} SpvQuantizationModes;
+
+typedef enum SpvOverflowModes_ {
+    SpvOverflowModesWRAP = 0,
+    SpvOverflowModesSAT = 1,
+    SpvOverflowModesSAT_ZERO = 2,
+    SpvOverflowModesSAT_SYM = 3,
+    SpvOverflowModesMax = 0x7fffffff,
+} SpvOverflowModes;
+
+typedef enum SpvPackedVectorFormat_ {
+    SpvPackedVectorFormatPackedVectorFormat4x8Bit = 0,
+    SpvPackedVectorFormatPackedVectorFormat4x8BitKHR = 0,
+    SpvPackedVectorFormatMax = 0x7fffffff,
+} SpvPackedVectorFormat;
+
 typedef enum SpvOp_ {
     SpvOpNop = 0,
     SpvOpUndef = 1,
@@ -1164,12 +1560,42 @@ typedef enum SpvOp_ {
     SpvOpGroupNonUniformLogicalXor = 364,
     SpvOpGroupNonUniformQuadBroadcast = 365,
     SpvOpGroupNonUniformQuadSwap = 366,
+    SpvOpCopyLogical = 400,
+    SpvOpPtrEqual = 401,
+    SpvOpPtrNotEqual = 402,
+    SpvOpPtrDiff = 403,
+    SpvOpTerminateInvocation = 4416,
     SpvOpSubgroupBallotKHR = 4421,
     SpvOpSubgroupFirstInvocationKHR = 4422,
     SpvOpSubgroupAllKHR = 4428,
     SpvOpSubgroupAnyKHR = 4429,
     SpvOpSubgroupAllEqualKHR = 4430,
+    SpvOpGroupNonUniformRotateKHR = 4431,
     SpvOpSubgroupReadInvocationKHR = 4432,
+    SpvOpTraceRayKHR = 4445,
+    SpvOpExecuteCallableKHR = 4446,
+    SpvOpConvertUToAccelerationStructureKHR = 4447,
+    SpvOpIgnoreIntersectionKHR = 4448,
+    SpvOpTerminateRayKHR = 4449,
+    SpvOpSDot = 4450,
+    SpvOpSDotKHR = 4450,
+    SpvOpUDot = 4451,
+    SpvOpUDotKHR = 4451,
+    SpvOpSUDot = 4452,
+    SpvOpSUDotKHR = 4452,
+    SpvOpSDotAccSat = 4453,
+    SpvOpSDotAccSatKHR = 4453,
+    SpvOpUDotAccSat = 4454,
+    SpvOpUDotAccSatKHR = 4454,
+    SpvOpSUDotAccSat = 4455,
+    SpvOpSUDotAccSatKHR = 4455,
+    SpvOpTypeRayQueryKHR = 4472,
+    SpvOpRayQueryInitializeKHR = 4473,
+    SpvOpRayQueryTerminateKHR = 4474,
+    SpvOpRayQueryGenerateIntersectionKHR = 4475,
+    SpvOpRayQueryConfirmIntersectionKHR = 4476,
+    SpvOpRayQueryProceedKHR = 4477,
+    SpvOpRayQueryGetIntersectionTypeKHR = 4479,
     SpvOpGroupIAddNonUniformAMD = 5000,
     SpvOpGroupFAddNonUniformAMD = 5001,
     SpvOpGroupFMinNonUniformAMD = 5002,
@@ -1180,13 +1606,20 @@ typedef enum SpvOp_ {
     SpvOpGroupSMaxNonUniformAMD = 5007,
     SpvOpFragmentMaskFetchAMD = 5011,
     SpvOpFragmentFetchAMD = 5012,
+    SpvOpReadClockKHR = 5056,
     SpvOpImageSampleFootprintNV = 5283,
+    SpvOpEmitMeshTasksEXT = 5294,
+    SpvOpSetMeshOutputsEXT = 5295,
     SpvOpGroupNonUniformPartitionNV = 5296,
     SpvOpWritePackedPrimitiveIndices4x8NV = 5299,
+    SpvOpReportIntersectionKHR = 5334,
     SpvOpReportIntersectionNV = 5334,
     SpvOpIgnoreIntersectionNV = 5335,
     SpvOpTerminateRayNV = 5336,
     SpvOpTraceNV = 5337,
+    SpvOpTraceMotionNV = 5338,
+    SpvOpTraceRayMotionNV = 5339,
+    SpvOpTypeAccelerationStructureKHR = 5341,
     SpvOpTypeAccelerationStructureNV = 5341,
     SpvOpExecuteCallableNV = 5344,
     SpvOpTypeCooperativeMatrixNV = 5358,
@@ -1194,6 +1627,18 @@ typedef enum SpvOp_ {
     SpvOpCooperativeMatrixStoreNV = 5360,
     SpvOpCooperativeMatrixMulAddNV = 5361,
     SpvOpCooperativeMatrixLengthNV = 5362,
+    SpvOpBeginInvocationInterlockEXT = 5364,
+    SpvOpEndInvocationInterlockEXT = 5365,
+    SpvOpDemoteToHelperInvocation = 5380,
+    SpvOpDemoteToHelperInvocationEXT = 5380,
+    SpvOpIsHelperInvocationEXT = 5381,
+    SpvOpConvertUToImageNV = 5391,
+    SpvOpConvertUToSamplerNV = 5392,
+    SpvOpConvertImageToUNV = 5393,
+    SpvOpConvertSamplerToUNV = 5394,
+    SpvOpConvertUToSampledImageNV = 5395,
+    SpvOpConvertSampledImageToUNV = 5396,
+    SpvOpSamplerImageAddressingModeNV = 5397,
     SpvOpSubgroupShuffleINTEL = 5571,
     SpvOpSubgroupShuffleDownINTEL = 5572,
     SpvOpSubgroupShuffleUpINTEL = 5573,
@@ -1204,10 +1649,920 @@ typedef enum SpvOp_ {
     SpvOpSubgroupImageBlockWriteINTEL = 5578,
     SpvOpSubgroupImageMediaBlockReadINTEL = 5580,
     SpvOpSubgroupImageMediaBlockWriteINTEL = 5581,
+    SpvOpUCountLeadingZerosINTEL = 5585,
+    SpvOpUCountTrailingZerosINTEL = 5586,
+    SpvOpAbsISubINTEL = 5587,
+    SpvOpAbsUSubINTEL = 5588,
+    SpvOpIAddSatINTEL = 5589,
+    SpvOpUAddSatINTEL = 5590,
+    SpvOpIAverageINTEL = 5591,
+    SpvOpUAverageINTEL = 5592,
+    SpvOpIAverageRoundedINTEL = 5593,
+    SpvOpUAverageRoundedINTEL = 5594,
+    SpvOpISubSatINTEL = 5595,
+    SpvOpUSubSatINTEL = 5596,
+    SpvOpIMul32x16INTEL = 5597,
+    SpvOpUMul32x16INTEL = 5598,
+    SpvOpConstantFunctionPointerINTEL = 5600,
+    SpvOpFunctionPointerCallINTEL = 5601,
+    SpvOpAsmTargetINTEL = 5609,
+    SpvOpAsmINTEL = 5610,
+    SpvOpAsmCallINTEL = 5611,
+    SpvOpAtomicFMinEXT = 5614,
+    SpvOpAtomicFMaxEXT = 5615,
+    SpvOpAssumeTrueKHR = 5630,
+    SpvOpExpectKHR = 5631,
+    SpvOpDecorateString = 5632,
     SpvOpDecorateStringGOOGLE = 5632,
+    SpvOpMemberDecorateString = 5633,
     SpvOpMemberDecorateStringGOOGLE = 5633,
+    SpvOpVmeImageINTEL = 5699,
+    SpvOpTypeVmeImageINTEL = 5700,
+    SpvOpTypeAvcImePayloadINTEL = 5701,
+    SpvOpTypeAvcRefPayloadINTEL = 5702,
+    SpvOpTypeAvcSicPayloadINTEL = 5703,
+    SpvOpTypeAvcMcePayloadINTEL = 5704,
+    SpvOpTypeAvcMceResultINTEL = 5705,
+    SpvOpTypeAvcImeResultINTEL = 5706,
+    SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707,
+    SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708,
+    SpvOpTypeAvcImeSingleReferenceStreaminINTEL = 5709,
+    SpvOpTypeAvcImeDualReferenceStreaminINTEL = 5710,
+    SpvOpTypeAvcRefResultINTEL = 5711,
+    SpvOpTypeAvcSicResultINTEL = 5712,
+    SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713,
+    SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714,
+    SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715,
+    SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716,
+    SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717,
+    SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718,
+    SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719,
+    SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720,
+    SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721,
+    SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722,
+    SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723,
+    SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724,
+    SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725,
+    SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726,
+    SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727,
+    SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728,
+    SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729,
+    SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730,
+    SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731,
+    SpvOpSubgroupAvcMceConvertToImePayloadINTEL = 5732,
+    SpvOpSubgroupAvcMceConvertToImeResultINTEL = 5733,
+    SpvOpSubgroupAvcMceConvertToRefPayloadINTEL = 5734,
+    SpvOpSubgroupAvcMceConvertToRefResultINTEL = 5735,
+    SpvOpSubgroupAvcMceConvertToSicPayloadINTEL = 5736,
+    SpvOpSubgroupAvcMceConvertToSicResultINTEL = 5737,
+    SpvOpSubgroupAvcMceGetMotionVectorsINTEL = 5738,
+    SpvOpSubgroupAvcMceGetInterDistortionsINTEL = 5739,
+    SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740,
+    SpvOpSubgroupAvcMceGetInterMajorShapeINTEL = 5741,
+    SpvOpSubgroupAvcMceGetInterMinorShapeINTEL = 5742,
+    SpvOpSubgroupAvcMceGetInterDirectionsINTEL = 5743,
+    SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744,
+    SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745,
+    SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746,
+    SpvOpSubgroupAvcImeInitializeINTEL = 5747,
+    SpvOpSubgroupAvcImeSetSingleReferenceINTEL = 5748,
+    SpvOpSubgroupAvcImeSetDualReferenceINTEL = 5749,
+    SpvOpSubgroupAvcImeRefWindowSizeINTEL = 5750,
+    SpvOpSubgroupAvcImeAdjustRefOffsetINTEL = 5751,
+    SpvOpSubgroupAvcImeConvertToMcePayloadINTEL = 5752,
+    SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753,
+    SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754,
+    SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755,
+    SpvOpSubgroupAvcImeSetWeightedSadINTEL = 5756,
+    SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757,
+    SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758,
+    SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759,
+    SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760,
+    SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761,
+    SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762,
+    SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763,
+    SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764,
+    SpvOpSubgroupAvcImeConvertToMceResultINTEL = 5765,
+    SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766,
+    SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767,
+    SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768,
+    SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769,
+    SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770,
+    SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771,
+    SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772,
+    SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773,
+    SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774,
+    SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775,
+    SpvOpSubgroupAvcImeGetBorderReachedINTEL = 5776,
+    SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777,
+    SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778,
+    SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779,
+    SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780,
+    SpvOpSubgroupAvcFmeInitializeINTEL = 5781,
+    SpvOpSubgroupAvcBmeInitializeINTEL = 5782,
+    SpvOpSubgroupAvcRefConvertToMcePayloadINTEL = 5783,
+    SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784,
+    SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785,
+    SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786,
+    SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787,
+    SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788,
+    SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789,
+    SpvOpSubgroupAvcRefConvertToMceResultINTEL = 5790,
+    SpvOpSubgroupAvcSicInitializeINTEL = 5791,
+    SpvOpSubgroupAvcSicConfigureSkcINTEL = 5792,
+    SpvOpSubgroupAvcSicConfigureIpeLumaINTEL = 5793,
+    SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794,
+    SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795,
+    SpvOpSubgroupAvcSicConvertToMcePayloadINTEL = 5796,
+    SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797,
+    SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798,
+    SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799,
+    SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800,
+    SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801,
+    SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802,
+    SpvOpSubgroupAvcSicEvaluateIpeINTEL = 5803,
+    SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804,
+    SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805,
+    SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806,
+    SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807,
+    SpvOpSubgroupAvcSicConvertToMceResultINTEL = 5808,
+    SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809,
+    SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810,
+    SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811,
+    SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812,
+    SpvOpSubgroupAvcSicGetIpeChromaModeINTEL = 5813,
+    SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814,
+    SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815,
+    SpvOpSubgroupAvcSicGetInterRawSadsINTEL = 5816,
+    SpvOpVariableLengthArrayINTEL = 5818,
+    SpvOpSaveMemoryINTEL = 5819,
+    SpvOpRestoreMemoryINTEL = 5820,
+    SpvOpArbitraryFloatSinCosPiINTEL = 5840,
+    SpvOpArbitraryFloatCastINTEL = 5841,
+    SpvOpArbitraryFloatCastFromIntINTEL = 5842,
+    SpvOpArbitraryFloatCastToIntINTEL = 5843,
+    SpvOpArbitraryFloatAddINTEL = 5846,
+    SpvOpArbitraryFloatSubINTEL = 5847,
+    SpvOpArbitraryFloatMulINTEL = 5848,
+    SpvOpArbitraryFloatDivINTEL = 5849,
+    SpvOpArbitraryFloatGTINTEL = 5850,
+    SpvOpArbitraryFloatGEINTEL = 5851,
+    SpvOpArbitraryFloatLTINTEL = 5852,
+    SpvOpArbitraryFloatLEINTEL = 5853,
+    SpvOpArbitraryFloatEQINTEL = 5854,
+    SpvOpArbitraryFloatRecipINTEL = 5855,
+    SpvOpArbitraryFloatRSqrtINTEL = 5856,
+    SpvOpArbitraryFloatCbrtINTEL = 5857,
+    SpvOpArbitraryFloatHypotINTEL = 5858,
+    SpvOpArbitraryFloatSqrtINTEL = 5859,
+    SpvOpArbitraryFloatLogINTEL = 5860,
+    SpvOpArbitraryFloatLog2INTEL = 5861,
+    SpvOpArbitraryFloatLog10INTEL = 5862,
+    SpvOpArbitraryFloatLog1pINTEL = 5863,
+    SpvOpArbitraryFloatExpINTEL = 5864,
+    SpvOpArbitraryFloatExp2INTEL = 5865,
+    SpvOpArbitraryFloatExp10INTEL = 5866,
+    SpvOpArbitraryFloatExpm1INTEL = 5867,
+    SpvOpArbitraryFloatSinINTEL = 5868,
+    SpvOpArbitraryFloatCosINTEL = 5869,
+    SpvOpArbitraryFloatSinCosINTEL = 5870,
+    SpvOpArbitraryFloatSinPiINTEL = 5871,
+    SpvOpArbitraryFloatCosPiINTEL = 5872,
+    SpvOpArbitraryFloatASinINTEL = 5873,
+    SpvOpArbitraryFloatASinPiINTEL = 5874,
+    SpvOpArbitraryFloatACosINTEL = 5875,
+    SpvOpArbitraryFloatACosPiINTEL = 5876,
+    SpvOpArbitraryFloatATanINTEL = 5877,
+    SpvOpArbitraryFloatATanPiINTEL = 5878,
+    SpvOpArbitraryFloatATan2INTEL = 5879,
+    SpvOpArbitraryFloatPowINTEL = 5880,
+    SpvOpArbitraryFloatPowRINTEL = 5881,
+    SpvOpArbitraryFloatPowNINTEL = 5882,
+    SpvOpLoopControlINTEL = 5887,
+    SpvOpAliasDomainDeclINTEL = 5911,
+    SpvOpAliasScopeDeclINTEL = 5912,
+    SpvOpAliasScopeListDeclINTEL = 5913,
+    SpvOpFixedSqrtINTEL = 5923,
+    SpvOpFixedRecipINTEL = 5924,
+    SpvOpFixedRsqrtINTEL = 5925,
+    SpvOpFixedSinINTEL = 5926,
+    SpvOpFixedCosINTEL = 5927,
+    SpvOpFixedSinCosINTEL = 5928,
+    SpvOpFixedSinPiINTEL = 5929,
+    SpvOpFixedCosPiINTEL = 5930,
+    SpvOpFixedSinCosPiINTEL = 5931,
+    SpvOpFixedLogINTEL = 5932,
+    SpvOpFixedExpINTEL = 5933,
+    SpvOpPtrCastToCrossWorkgroupINTEL = 5934,
+    SpvOpCrossWorkgroupCastToPtrINTEL = 5938,
+    SpvOpReadPipeBlockingINTEL = 5946,
+    SpvOpWritePipeBlockingINTEL = 5947,
+    SpvOpFPGARegINTEL = 5949,
+    SpvOpRayQueryGetRayTMinKHR = 6016,
+    SpvOpRayQueryGetRayFlagsKHR = 6017,
+    SpvOpRayQueryGetIntersectionTKHR = 6018,
+    SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019,
+    SpvOpRayQueryGetIntersectionInstanceIdKHR = 6020,
+    SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021,
+    SpvOpRayQueryGetIntersectionGeometryIndexKHR = 6022,
+    SpvOpRayQueryGetIntersectionPrimitiveIndexKHR = 6023,
+    SpvOpRayQueryGetIntersectionBarycentricsKHR = 6024,
+    SpvOpRayQueryGetIntersectionFrontFaceKHR = 6025,
+    SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026,
+    SpvOpRayQueryGetIntersectionObjectRayDirectionKHR = 6027,
+    SpvOpRayQueryGetIntersectionObjectRayOriginKHR = 6028,
+    SpvOpRayQueryGetWorldRayDirectionKHR = 6029,
+    SpvOpRayQueryGetWorldRayOriginKHR = 6030,
+    SpvOpRayQueryGetIntersectionObjectToWorldKHR = 6031,
+    SpvOpRayQueryGetIntersectionWorldToObjectKHR = 6032,
+    SpvOpAtomicFAddEXT = 6035,
+    SpvOpTypeBufferSurfaceINTEL = 6086,
+    SpvOpTypeStructContinuedINTEL = 6090,
+    SpvOpConstantCompositeContinuedINTEL = 6091,
+    SpvOpSpecConstantCompositeContinuedINTEL = 6092,
+    SpvOpControlBarrierArriveINTEL = 6142,
+    SpvOpControlBarrierWaitINTEL = 6143,
+    SpvOpGroupIMulKHR = 6401,
+    SpvOpGroupFMulKHR = 6402,
+    SpvOpGroupBitwiseAndKHR = 6403,
+    SpvOpGroupBitwiseOrKHR = 6404,
+    SpvOpGroupBitwiseXorKHR = 6405,
+    SpvOpGroupLogicalAndKHR = 6406,
+    SpvOpGroupLogicalOrKHR = 6407,
+    SpvOpGroupLogicalXorKHR = 6408,
     SpvOpMax = 0x7fffffff,
 } SpvOp;
 
+#ifdef SPV_ENABLE_UTILITY_CODE
+#ifndef __cplusplus
+#include <stdbool.h>
+#endif
+inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultType) {
+    *hasResult = *hasResultType = false;
+    switch (opcode) {
+    default: /* unknown opcode */ break;
+    case SpvOpNop: *hasResult = false; *hasResultType = false; break;
+    case SpvOpUndef: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSourceContinued: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSource: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSourceExtension: *hasResult = false; *hasResultType = false; break;
+    case SpvOpName: *hasResult = false; *hasResultType = false; break;
+    case SpvOpMemberName: *hasResult = false; *hasResultType = false; break;
+    case SpvOpString: *hasResult = true; *hasResultType = false; break;
+    case SpvOpLine: *hasResult = false; *hasResultType = false; break;
+    case SpvOpExtension: *hasResult = false; *hasResultType = false; break;
+    case SpvOpExtInstImport: *hasResult = true; *hasResultType = false; break;
+    case SpvOpExtInst: *hasResult = true; *hasResultType = true; break;
+    case SpvOpMemoryModel: *hasResult = false; *hasResultType = false; break;
+    case SpvOpEntryPoint: *hasResult = false; *hasResultType = false; break;
+    case SpvOpExecutionMode: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCapability: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTypeVoid: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeBool: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeInt: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeFloat: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeVector: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeMatrix: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeImage: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeSampler: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeSampledImage: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeArray: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeStruct: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeOpaque: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypePointer: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeFunction: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeEvent: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeReserveId: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeQueue: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypePipe: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeForwardPointer: *hasResult = false; *hasResultType = false; break;
+    case SpvOpConstantTrue: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConstantFalse: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConstant: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConstantComposite: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConstantSampler: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConstantNull: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSpecConstantTrue: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSpecConstantFalse: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSpecConstant: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSpecConstantComposite: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSpecConstantOp: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFunction: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFunctionParameter: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFunctionEnd: *hasResult = false; *hasResultType = false; break;
+    case SpvOpFunctionCall: *hasResult = true; *hasResultType = true; break;
+    case SpvOpVariable: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageTexelPointer: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLoad: *hasResult = true; *hasResultType = true; break;
+    case SpvOpStore: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCopyMemory: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCopyMemorySized: *hasResult = false; *hasResultType = false; break;
+    case SpvOpAccessChain: *hasResult = true; *hasResultType = true; break;
+    case SpvOpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break;
+    case SpvOpPtrAccessChain: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArrayLength: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break;
+    case SpvOpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDecorate: *hasResult = false; *hasResultType = false; break;
+    case SpvOpMemberDecorate: *hasResult = false; *hasResultType = false; break;
+    case SpvOpDecorationGroup: *hasResult = true; *hasResultType = false; break;
+    case SpvOpGroupDecorate: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break;
+    case SpvOpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break;
+    case SpvOpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break;
+    case SpvOpVectorShuffle: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCompositeConstruct: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCompositeExtract: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCompositeInsert: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCopyObject: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTranspose: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSampledImage: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageFetch: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageGather: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageDrefGather: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageRead: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageWrite: *hasResult = false; *hasResultType = false; break;
+    case SpvOpImage: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQueryFormat: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQueryOrder: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQuerySize: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQueryLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQueryLevels: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageQuerySamples: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertFToU: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertFToS: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertSToF: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertUToF: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUConvert: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSConvert: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFConvert: *hasResult = true; *hasResultType = true; break;
+    case SpvOpQuantizeToF16: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertPtrToU: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSatConvertSToU: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSatConvertUToS: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertUToPtr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGenericCastToPtr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitcast: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSNegate: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFNegate: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpISub: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFSub: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIMul: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFMul: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUDiv: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSDiv: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFDiv: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUMod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSRem: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSMod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFRem: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFMod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpVectorTimesScalar: *hasResult = true; *hasResultType = true; break;
+    case SpvOpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break;
+    case SpvOpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break;
+    case SpvOpMatrixTimesVector: *hasResult = true; *hasResultType = true; break;
+    case SpvOpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break;
+    case SpvOpOuterProduct: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDot: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIAddCarry: *hasResult = true; *hasResultType = true; break;
+    case SpvOpISubBorrow: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUMulExtended: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSMulExtended: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAny: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAll: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIsNan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIsInf: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIsFinite: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIsNormal: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSignBitSet: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLessOrGreater: *hasResult = true; *hasResultType = true; break;
+    case SpvOpOrdered: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUnordered: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLogicalEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLogicalNotEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLogicalOr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLogicalAnd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLogicalNot: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSelect: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpINotEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpULessThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSLessThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpULessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSLessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFOrdEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFUnordEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFOrdNotEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFUnordNotEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFOrdLessThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFUnordLessThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpShiftRightLogical: *hasResult = true; *hasResultType = true; break;
+    case SpvOpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break;
+    case SpvOpShiftLeftLogical: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitwiseOr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitwiseXor: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitwiseAnd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpNot: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitFieldInsert: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitFieldSExtract: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitFieldUExtract: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitReverse: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBitCount: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDPdx: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDPdy: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFwidth: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDPdxFine: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDPdyFine: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFwidthFine: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDPdxCoarse: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDPdyCoarse: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFwidthCoarse: *hasResult = true; *hasResultType = true; break;
+    case SpvOpEmitVertex: *hasResult = false; *hasResultType = false; break;
+    case SpvOpEndPrimitive: *hasResult = false; *hasResultType = false; break;
+    case SpvOpEmitStreamVertex: *hasResult = false; *hasResultType = false; break;
+    case SpvOpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break;
+    case SpvOpControlBarrier: *hasResult = false; *hasResultType = false; break;
+    case SpvOpMemoryBarrier: *hasResult = false; *hasResultType = false; break;
+    case SpvOpAtomicLoad: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicStore: *hasResult = false; *hasResultType = false; break;
+    case SpvOpAtomicExchange: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicIIncrement: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicIDecrement: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicIAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicISub: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicSMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicUMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicSMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicUMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicAnd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicOr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicXor: *hasResult = true; *hasResultType = true; break;
+    case SpvOpPhi: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLoopMerge: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSelectionMerge: *hasResult = false; *hasResultType = false; break;
+    case SpvOpLabel: *hasResult = true; *hasResultType = false; break;
+    case SpvOpBranch: *hasResult = false; *hasResultType = false; break;
+    case SpvOpBranchConditional: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSwitch: *hasResult = false; *hasResultType = false; break;
+    case SpvOpKill: *hasResult = false; *hasResultType = false; break;
+    case SpvOpReturn: *hasResult = false; *hasResultType = false; break;
+    case SpvOpReturnValue: *hasResult = false; *hasResultType = false; break;
+    case SpvOpUnreachable: *hasResult = false; *hasResultType = false; break;
+    case SpvOpLifetimeStart: *hasResult = false; *hasResultType = false; break;
+    case SpvOpLifetimeStop: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupWaitEvents: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGroupAll: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupAny: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupBroadcast: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupIAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupFAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupFMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupUMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupSMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupFMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupUMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupSMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpReadPipe: *hasResult = true; *hasResultType = true; break;
+    case SpvOpWritePipe: *hasResult = true; *hasResultType = true; break;
+    case SpvOpReservedReadPipe: *hasResult = true; *hasResultType = true; break;
+    case SpvOpReservedWritePipe: *hasResult = true; *hasResultType = true; break;
+    case SpvOpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break;
+    case SpvOpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCommitReadPipe: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCommitWritePipe: *hasResult = false; *hasResultType = false; break;
+    case SpvOpIsValidReserveId: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetNumPipePackets: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break;
+    case SpvOpEnqueueMarker: *hasResult = true; *hasResultType = true; break;
+    case SpvOpEnqueueKernel: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRetainEvent: *hasResult = false; *hasResultType = false; break;
+    case SpvOpReleaseEvent: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCreateUserEvent: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIsValidEvent: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSetUserEventStatus: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGetDefaultQueue: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBuildNDRange: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseFetch: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseGather: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break;
+    case SpvOpNoLine: *hasResult = false; *hasResultType = false; break;
+    case SpvOpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicFlagClear: *hasResult = false; *hasResultType = false; break;
+    case SpvOpImageSparseRead: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSizeOf: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTypePipeStorage: *hasResult = true; *hasResultType = false; break;
+    case SpvOpConstantPipeStorage: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break;
+    case SpvOpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break;
+    case SpvOpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break;
+    case SpvOpModuleProcessed: *hasResult = false; *hasResultType = false; break;
+    case SpvOpExecutionModeId: *hasResult = false; *hasResultType = false; break;
+    case SpvOpDecorateId: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCopyLogical: *hasResult = true; *hasResultType = true; break;
+    case SpvOpPtrEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpPtrNotEqual: *hasResult = true; *hasResultType = true; break;
+    case SpvOpPtrDiff: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTerminateInvocation: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupNonUniformRotateKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTraceRayKHR: *hasResult = false; *hasResultType = false; break;
+    case SpvOpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break;
+    case SpvOpConvertUToAccelerationStructureKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIgnoreIntersectionKHR: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTerminateRayKHR: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSDot: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUDot: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSUDot: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSDotAccSat: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUDotAccSat: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSUDotAccSat: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTypeRayQueryKHR: *hasResult = true; *hasResultType = false; break;
+    case SpvOpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break;
+    case SpvOpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break;
+    case SpvOpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break;
+    case SpvOpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break;
+    case SpvOpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break;
+    case SpvOpReadClockKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpReportIntersectionNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTerminateRayNV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTraceNV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTraceMotionNV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTraceRayMotionNV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break;
+    case SpvOpExecuteCallableNV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break;
+    case SpvOpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
+    case SpvOpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
+    case SpvOpDemoteToHelperInvocation: *hasResult = false; *hasResultType = false; break;
+    case SpvOpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertUToImageNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertUToSamplerNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertImageToUNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertSamplerToUNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertUToSampledImageNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConvertSampledImageToUNV: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSamplerImageAddressingModeNV: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAbsISubINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIAddSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUAddSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIAverageINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUAverageINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpISubSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUSubSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpConstantFunctionPointerINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFunctionPointerCallINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAsmINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAsmCallINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break;
+    case SpvOpExpectKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpDecorateString: *hasResult = false; *hasResultType = false; break;
+    case SpvOpMemberDecorateString: *hasResult = false; *hasResultType = false; break;
+    case SpvOpVmeImageINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpVariableLengthArrayINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpSaveMemoryINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRestoreMemoryINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpArbitraryFloatSinCosPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatCastINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatCastFromIntINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatCastToIntINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatAddINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatSubINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatMulINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatDivINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatGTINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatGEINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatLTINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatLEINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatEQINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatRecipINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatRSqrtINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatCbrtINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatHypotINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatSqrtINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatLogINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatLog2INTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatLog10INTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatLog1pINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatExpINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatExp2INTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatExp10INTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatExpm1INTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatSinINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatCosINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatSinCosINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatSinPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatCosPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatASinINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatASinPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatACosINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatACosPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatATanINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatATanPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatATan2INTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatPowINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpLoopControlINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpAliasDomainDeclINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpAliasScopeDeclINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpAliasScopeListDeclINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFixedSinINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFixedCosINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFixedSinCosINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFixedSinPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFixedCosPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFixedSinCosPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFixedLogINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFixedExpINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpPtrCastToCrossWorkgroupINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpCrossWorkgroupCastToPtrINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpReadPipeBlockingINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpWritePipeBlockingINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpFPGARegINTEL: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpAtomicFAddEXT: *hasResult = true; *hasResultType = true; break;
+    case SpvOpTypeBufferSurfaceINTEL: *hasResult = true; *hasResultType = false; break;
+    case SpvOpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpControlBarrierArriveINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpControlBarrierWaitINTEL: *hasResult = false; *hasResultType = false; break;
+    case SpvOpGroupIMulKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupFMulKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupBitwiseAndKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupBitwiseOrKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupBitwiseXorKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupLogicalAndKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupLogicalOrKHR: *hasResult = true; *hasResultType = true; break;
+    case SpvOpGroupLogicalXorKHR: *hasResult = true; *hasResultType = true; break;
+    }
+}
+#endif /* SPV_ENABLE_UTILITY_CODE */
+
 #endif
 
diff --git a/spirv.hpp b/spirv.hpp
index adc13de3f34..e25264af28f 100644
--- a/spirv.hpp
+++ b/spirv.hpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2019 The Khronos Group Inc.
+// Copyright (c) 2014-2020 The Khronos Group Inc.
 // 
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and/or associated documentation files (the "Materials"),
@@ -26,7 +26,7 @@
 // the Binary Section of the SPIR-V specification.
 
 // Enumeration tokens for SPIR-V, in various styles:
-//   C, C++, C++11, JSON, Lua, Python, C#, D
+//   C, C++, C++11, JSON, Lua, Python, C#, D, Beef
 // 
 // - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL
 // - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL
@@ -36,6 +36,8 @@
 // - C# will use enum classes in the Specification class located in the "Spv" namespace,
 //     e.g.: Spv.Specification.SourceLanguage.GLSL
 // - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL
+// - Beef will use enum classes in the Specification class located in the "Spv" namespace,
+//     e.g.: Spv.Specification.SourceLanguage.GLSL
 // 
 // Some tokens act like mask values, which can be OR'd together,
 // while others are mutually exclusive.  The mask-like ones have
@@ -49,12 +51,12 @@ namespace spv {
 
 typedef unsigned int Id;
 
-#define SPV_VERSION 0x10300
-#define SPV_REVISION 6
+#define SPV_VERSION 0x10600
+#define SPV_REVISION 1
 
 static const unsigned int MagicNumber = 0x07230203;
-static const unsigned int Version = 0x00010300;
-static const unsigned int Revision = 6;
+static const unsigned int Version = 0x00010600;
+static const unsigned int Revision = 1;
 static const unsigned int OpCodeMask = 0xffff;
 static const unsigned int WordCountShift = 16;
 
@@ -65,6 +67,8 @@ enum SourceLanguage {
     SourceLanguageOpenCL_C = 3,
     SourceLanguageOpenCL_CPP = 4,
     SourceLanguageHLSL = 5,
+    SourceLanguageCPP_for_OpenCL = 6,
+    SourceLanguageSYCL = 7,
     SourceLanguageMax = 0x7fffffff,
 };
 
@@ -78,12 +82,20 @@ enum ExecutionModel {
     ExecutionModelKernel = 6,
     ExecutionModelTaskNV = 5267,
     ExecutionModelMeshNV = 5268,
+    ExecutionModelRayGenerationKHR = 5313,
     ExecutionModelRayGenerationNV = 5313,
+    ExecutionModelIntersectionKHR = 5314,
     ExecutionModelIntersectionNV = 5314,
+    ExecutionModelAnyHitKHR = 5315,
     ExecutionModelAnyHitNV = 5315,
+    ExecutionModelClosestHitKHR = 5316,
     ExecutionModelClosestHitNV = 5316,
+    ExecutionModelMissKHR = 5317,
     ExecutionModelMissNV = 5317,
+    ExecutionModelCallableKHR = 5318,
     ExecutionModelCallableNV = 5318,
+    ExecutionModelTaskEXT = 5364,
+    ExecutionModelMeshEXT = 5365,
     ExecutionModelMax = 0x7fffffff,
 };
 
@@ -91,6 +103,7 @@ enum AddressingModel {
     AddressingModelLogical = 0,
     AddressingModelPhysical32 = 1,
     AddressingModelPhysical64 = 2,
+    AddressingModelPhysicalStorageBuffer64 = 5348,
     AddressingModelPhysicalStorageBuffer64EXT = 5348,
     AddressingModelMax = 0x7fffffff,
 };
@@ -99,6 +112,7 @@ enum MemoryModel {
     MemoryModelSimple = 0,
     MemoryModelGLSL450 = 1,
     MemoryModelOpenCL = 2,
+    MemoryModelVulkan = 3,
     MemoryModelVulkanKHR = 3,
     MemoryModelMax = 0x7fffffff,
 };
@@ -142,18 +156,46 @@ enum ExecutionMode {
     ExecutionModeSubgroupsPerWorkgroupId = 37,
     ExecutionModeLocalSizeId = 38,
     ExecutionModeLocalSizeHintId = 39,
+    ExecutionModeSubgroupUniformControlFlowKHR = 4421,
     ExecutionModePostDepthCoverage = 4446,
     ExecutionModeDenormPreserve = 4459,
     ExecutionModeDenormFlushToZero = 4460,
     ExecutionModeSignedZeroInfNanPreserve = 4461,
     ExecutionModeRoundingModeRTE = 4462,
     ExecutionModeRoundingModeRTZ = 4463,
+    ExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
     ExecutionModeStencilRefReplacingEXT = 5027,
+    ExecutionModeStencilRefUnchangedFrontAMD = 5079,
+    ExecutionModeStencilRefGreaterFrontAMD = 5080,
+    ExecutionModeStencilRefLessFrontAMD = 5081,
+    ExecutionModeStencilRefUnchangedBackAMD = 5082,
+    ExecutionModeStencilRefGreaterBackAMD = 5083,
+    ExecutionModeStencilRefLessBackAMD = 5084,
+    ExecutionModeOutputLinesEXT = 5269,
     ExecutionModeOutputLinesNV = 5269,
+    ExecutionModeOutputPrimitivesEXT = 5270,
     ExecutionModeOutputPrimitivesNV = 5270,
     ExecutionModeDerivativeGroupQuadsNV = 5289,
     ExecutionModeDerivativeGroupLinearNV = 5290,
+    ExecutionModeOutputTrianglesEXT = 5298,
     ExecutionModeOutputTrianglesNV = 5298,
+    ExecutionModePixelInterlockOrderedEXT = 5366,
+    ExecutionModePixelInterlockUnorderedEXT = 5367,
+    ExecutionModeSampleInterlockOrderedEXT = 5368,
+    ExecutionModeSampleInterlockUnorderedEXT = 5369,
+    ExecutionModeShadingRateInterlockOrderedEXT = 5370,
+    ExecutionModeShadingRateInterlockUnorderedEXT = 5371,
+    ExecutionModeSharedLocalMemorySizeINTEL = 5618,
+    ExecutionModeRoundingModeRTPINTEL = 5620,
+    ExecutionModeRoundingModeRTNINTEL = 5621,
+    ExecutionModeFloatingPointModeALTINTEL = 5622,
+    ExecutionModeFloatingPointModeIEEEINTEL = 5623,
+    ExecutionModeMaxWorkgroupSizeINTEL = 5893,
+    ExecutionModeMaxWorkDimINTEL = 5894,
+    ExecutionModeNoGlobalOffsetINTEL = 5895,
+    ExecutionModeNumSIMDWorkitemsINTEL = 5896,
+    ExecutionModeSchedulerTargetFmaxMhzINTEL = 5903,
+    ExecutionModeNamedBarrierCountINTEL = 6417,
     ExecutionModeMax = 0x7fffffff,
 };
 
@@ -171,13 +213,24 @@ enum StorageClass {
     StorageClassAtomicCounter = 10,
     StorageClassImage = 11,
     StorageClassStorageBuffer = 12,
+    StorageClassCallableDataKHR = 5328,
     StorageClassCallableDataNV = 5328,
+    StorageClassIncomingCallableDataKHR = 5329,
     StorageClassIncomingCallableDataNV = 5329,
+    StorageClassRayPayloadKHR = 5338,
     StorageClassRayPayloadNV = 5338,
+    StorageClassHitAttributeKHR = 5339,
     StorageClassHitAttributeNV = 5339,
+    StorageClassIncomingRayPayloadKHR = 5342,
     StorageClassIncomingRayPayloadNV = 5342,
+    StorageClassShaderRecordBufferKHR = 5343,
     StorageClassShaderRecordBufferNV = 5343,
+    StorageClassPhysicalStorageBuffer = 5349,
     StorageClassPhysicalStorageBufferEXT = 5349,
+    StorageClassTaskPayloadWorkgroupEXT = 5402,
+    StorageClassCodeSectionINTEL = 5605,
+    StorageClassDeviceOnlyINTEL = 5936,
+    StorageClassHostOnlyINTEL = 5937,
     StorageClassMax = 0x7fffffff,
 };
 
@@ -248,6 +301,8 @@ enum ImageFormat {
     ImageFormatRg8ui = 37,
     ImageFormatR16ui = 38,
     ImageFormatR8ui = 39,
+    ImageFormatR64ui = 40,
+    ImageFormatR64i = 41,
     ImageFormatMax = 0x7fffffff,
 };
 
@@ -305,10 +360,18 @@ enum ImageOperandsShift {
     ImageOperandsConstOffsetsShift = 5,
     ImageOperandsSampleShift = 6,
     ImageOperandsMinLodShift = 7,
+    ImageOperandsMakeTexelAvailableShift = 8,
     ImageOperandsMakeTexelAvailableKHRShift = 8,
+    ImageOperandsMakeTexelVisibleShift = 9,
     ImageOperandsMakeTexelVisibleKHRShift = 9,
+    ImageOperandsNonPrivateTexelShift = 10,
     ImageOperandsNonPrivateTexelKHRShift = 10,
+    ImageOperandsVolatileTexelShift = 11,
     ImageOperandsVolatileTexelKHRShift = 11,
+    ImageOperandsSignExtendShift = 12,
+    ImageOperandsZeroExtendShift = 13,
+    ImageOperandsNontemporalShift = 14,
+    ImageOperandsOffsetsShift = 16,
     ImageOperandsMax = 0x7fffffff,
 };
 
@@ -322,10 +385,18 @@ enum ImageOperandsMask {
     ImageOperandsConstOffsetsMask = 0x00000020,
     ImageOperandsSampleMask = 0x00000040,
     ImageOperandsMinLodMask = 0x00000080,
+    ImageOperandsMakeTexelAvailableMask = 0x00000100,
     ImageOperandsMakeTexelAvailableKHRMask = 0x00000100,
+    ImageOperandsMakeTexelVisibleMask = 0x00000200,
     ImageOperandsMakeTexelVisibleKHRMask = 0x00000200,
+    ImageOperandsNonPrivateTexelMask = 0x00000400,
     ImageOperandsNonPrivateTexelKHRMask = 0x00000400,
+    ImageOperandsVolatileTexelMask = 0x00000800,
     ImageOperandsVolatileTexelKHRMask = 0x00000800,
+    ImageOperandsSignExtendMask = 0x00001000,
+    ImageOperandsZeroExtendMask = 0x00002000,
+    ImageOperandsNontemporalMask = 0x00004000,
+    ImageOperandsOffsetsMask = 0x00010000,
 };
 
 enum FPFastMathModeShift {
@@ -334,6 +405,8 @@ enum FPFastMathModeShift {
     FPFastMathModeNSZShift = 2,
     FPFastMathModeAllowRecipShift = 3,
     FPFastMathModeFastShift = 4,
+    FPFastMathModeAllowContractFastINTELShift = 16,
+    FPFastMathModeAllowReassocINTELShift = 17,
     FPFastMathModeMax = 0x7fffffff,
 };
 
@@ -344,6 +417,8 @@ enum FPFastMathModeMask {
     FPFastMathModeNSZMask = 0x00000004,
     FPFastMathModeAllowRecipMask = 0x00000008,
     FPFastMathModeFastMask = 0x00000010,
+    FPFastMathModeAllowContractFastINTELMask = 0x00010000,
+    FPFastMathModeAllowReassocINTELMask = 0x00020000,
 };
 
 enum FPRoundingMode {
@@ -357,6 +432,7 @@ enum FPRoundingMode {
 enum LinkageType {
     LinkageTypeExport = 0,
     LinkageTypeImport = 1,
+    LinkageTypeLinkOnceODR = 2,
     LinkageTypeMax = 0x7fffffff,
 };
 
@@ -406,6 +482,7 @@ enum Decoration {
     DecorationNonWritable = 24,
     DecorationNonReadable = 25,
     DecorationUniform = 26,
+    DecorationUniformId = 27,
     DecorationSaturatedConversion = 28,
     DecorationStream = 29,
     DecorationLocation = 30,
@@ -433,15 +510,64 @@ enum Decoration {
     DecorationPassthroughNV = 5250,
     DecorationViewportRelativeNV = 5252,
     DecorationSecondaryViewportRelativeNV = 5256,
+    DecorationPerPrimitiveEXT = 5271,
     DecorationPerPrimitiveNV = 5271,
     DecorationPerViewNV = 5272,
     DecorationPerTaskNV = 5273,
+    DecorationPerVertexKHR = 5285,
     DecorationPerVertexNV = 5285,
+    DecorationNonUniform = 5300,
     DecorationNonUniformEXT = 5300,
+    DecorationRestrictPointer = 5355,
     DecorationRestrictPointerEXT = 5355,
+    DecorationAliasedPointer = 5356,
     DecorationAliasedPointerEXT = 5356,
+    DecorationBindlessSamplerNV = 5398,
+    DecorationBindlessImageNV = 5399,
+    DecorationBoundSamplerNV = 5400,
+    DecorationBoundImageNV = 5401,
+    DecorationSIMTCallINTEL = 5599,
+    DecorationReferencedIndirectlyINTEL = 5602,
+    DecorationClobberINTEL = 5607,
+    DecorationSideEffectsINTEL = 5608,
+    DecorationVectorComputeVariableINTEL = 5624,
+    DecorationFuncParamIOKindINTEL = 5625,
+    DecorationVectorComputeFunctionINTEL = 5626,
+    DecorationStackCallINTEL = 5627,
+    DecorationGlobalVariableOffsetINTEL = 5628,
+    DecorationCounterBuffer = 5634,
     DecorationHlslCounterBufferGOOGLE = 5634,
     DecorationHlslSemanticGOOGLE = 5635,
+    DecorationUserSemantic = 5635,
+    DecorationUserTypeGOOGLE = 5636,
+    DecorationFunctionRoundingModeINTEL = 5822,
+    DecorationFunctionDenormModeINTEL = 5823,
+    DecorationRegisterINTEL = 5825,
+    DecorationMemoryINTEL = 5826,
+    DecorationNumbanksINTEL = 5827,
+    DecorationBankwidthINTEL = 5828,
+    DecorationMaxPrivateCopiesINTEL = 5829,
+    DecorationSinglepumpINTEL = 5830,
+    DecorationDoublepumpINTEL = 5831,
+    DecorationMaxReplicatesINTEL = 5832,
+    DecorationSimpleDualPortINTEL = 5833,
+    DecorationMergeINTEL = 5834,
+    DecorationBankBitsINTEL = 5835,
+    DecorationForcePow2DepthINTEL = 5836,
+    DecorationBurstCoalesceINTEL = 5899,
+    DecorationCacheSizeINTEL = 5900,
+    DecorationDontStaticallyCoalesceINTEL = 5901,
+    DecorationPrefetchINTEL = 5902,
+    DecorationStallEnableINTEL = 5905,
+    DecorationFuseLoopsInFunctionINTEL = 5907,
+    DecorationAliasScopeINTEL = 5914,
+    DecorationNoAliasINTEL = 5915,
+    DecorationBufferLocationINTEL = 5921,
+    DecorationIOPipeStorageINTEL = 5944,
+    DecorationFunctionFloatingPointModeINTEL = 6080,
+    DecorationSingleElementVectorINTEL = 6085,
+    DecorationVectorComputeCallableFunctionINTEL = 6087,
+    DecorationMediaBlockIOINTEL = 6140,
     DecorationMax = 0x7fffffff,
 };
 
@@ -500,8 +626,10 @@ enum BuiltIn {
     BuiltInBaseVertex = 4424,
     BuiltInBaseInstance = 4425,
     BuiltInDrawIndex = 4426,
+    BuiltInPrimitiveShadingRateKHR = 4432,
     BuiltInDeviceIndex = 4438,
     BuiltInViewIndex = 4440,
+    BuiltInShadingRateKHR = 4444,
     BuiltInBaryCoordNoPerspAMD = 4992,
     BuiltInBaryCoordNoPerspCentroidAMD = 4993,
     BuiltInBaryCoordNoPerspSampleAMD = 4994,
@@ -524,26 +652,52 @@ enum BuiltIn {
     BuiltInLayerPerViewNV = 5279,
     BuiltInMeshViewCountNV = 5280,
     BuiltInMeshViewIndicesNV = 5281,
+    BuiltInBaryCoordKHR = 5286,
     BuiltInBaryCoordNV = 5286,
+    BuiltInBaryCoordNoPerspKHR = 5287,
     BuiltInBaryCoordNoPerspNV = 5287,
     BuiltInFragSizeEXT = 5292,
     BuiltInFragmentSizeNV = 5292,
     BuiltInFragInvocationCountEXT = 5293,
     BuiltInInvocationsPerPixelNV = 5293,
+    BuiltInPrimitivePointIndicesEXT = 5294,
+    BuiltInPrimitiveLineIndicesEXT = 5295,
+    BuiltInPrimitiveTriangleIndicesEXT = 5296,
+    BuiltInCullPrimitiveEXT = 5299,
+    BuiltInLaunchIdKHR = 5319,
     BuiltInLaunchIdNV = 5319,
+    BuiltInLaunchSizeKHR = 5320,
     BuiltInLaunchSizeNV = 5320,
+    BuiltInWorldRayOriginKHR = 5321,
     BuiltInWorldRayOriginNV = 5321,
+    BuiltInWorldRayDirectionKHR = 5322,
     BuiltInWorldRayDirectionNV = 5322,
+    BuiltInObjectRayOriginKHR = 5323,
     BuiltInObjectRayOriginNV = 5323,
+    BuiltInObjectRayDirectionKHR = 5324,
     BuiltInObjectRayDirectionNV = 5324,
+    BuiltInRayTminKHR = 5325,
     BuiltInRayTminNV = 5325,
+    BuiltInRayTmaxKHR = 5326,
     BuiltInRayTmaxNV = 5326,
+    BuiltInInstanceCustomIndexKHR = 5327,
     BuiltInInstanceCustomIndexNV = 5327,
+    BuiltInObjectToWorldKHR = 5330,
     BuiltInObjectToWorldNV = 5330,
+    BuiltInWorldToObjectKHR = 5331,
     BuiltInWorldToObjectNV = 5331,
     BuiltInHitTNV = 5332,
+    BuiltInHitKindKHR = 5333,
     BuiltInHitKindNV = 5333,
+    BuiltInCurrentRayTimeNV = 5334,
+    BuiltInIncomingRayFlagsKHR = 5351,
     BuiltInIncomingRayFlagsNV = 5351,
+    BuiltInRayGeometryIndexKHR = 5352,
+    BuiltInWarpsPerSMNV = 5374,
+    BuiltInSMCountNV = 5375,
+    BuiltInWarpIDNV = 5376,
+    BuiltInSMIDNV = 5377,
+    BuiltInCullMaskKHR = 6021,
     BuiltInMax = 0x7fffffff,
 };
 
@@ -564,6 +718,19 @@ enum LoopControlShift {
     LoopControlDontUnrollShift = 1,
     LoopControlDependencyInfiniteShift = 2,
     LoopControlDependencyLengthShift = 3,
+    LoopControlMinIterationsShift = 4,
+    LoopControlMaxIterationsShift = 5,
+    LoopControlIterationMultipleShift = 6,
+    LoopControlPeelCountShift = 7,
+    LoopControlPartialCountShift = 8,
+    LoopControlInitiationIntervalINTELShift = 16,
+    LoopControlMaxConcurrencyINTELShift = 17,
+    LoopControlDependencyArrayINTELShift = 18,
+    LoopControlPipelineEnableINTELShift = 19,
+    LoopControlLoopCoalesceINTELShift = 20,
+    LoopControlMaxInterleavingINTELShift = 21,
+    LoopControlSpeculatedIterationsINTELShift = 22,
+    LoopControlNoFusionINTELShift = 23,
     LoopControlMax = 0x7fffffff,
 };
 
@@ -573,6 +740,19 @@ enum LoopControlMask {
     LoopControlDontUnrollMask = 0x00000002,
     LoopControlDependencyInfiniteMask = 0x00000004,
     LoopControlDependencyLengthMask = 0x00000008,
+    LoopControlMinIterationsMask = 0x00000010,
+    LoopControlMaxIterationsMask = 0x00000020,
+    LoopControlIterationMultipleMask = 0x00000040,
+    LoopControlPeelCountMask = 0x00000080,
+    LoopControlPartialCountMask = 0x00000100,
+    LoopControlInitiationIntervalINTELMask = 0x00010000,
+    LoopControlMaxConcurrencyINTELMask = 0x00020000,
+    LoopControlDependencyArrayINTELMask = 0x00040000,
+    LoopControlPipelineEnableINTELMask = 0x00080000,
+    LoopControlLoopCoalesceINTELMask = 0x00100000,
+    LoopControlMaxInterleavingINTELMask = 0x00200000,
+    LoopControlSpeculatedIterationsINTELMask = 0x00400000,
+    LoopControlNoFusionINTELMask = 0x00800000,
 };
 
 enum FunctionControlShift {
@@ -580,6 +760,7 @@ enum FunctionControlShift {
     FunctionControlDontInlineShift = 1,
     FunctionControlPureShift = 2,
     FunctionControlConstShift = 3,
+    FunctionControlOptNoneINTELShift = 16,
     FunctionControlMax = 0x7fffffff,
 };
 
@@ -589,6 +770,7 @@ enum FunctionControlMask {
     FunctionControlDontInlineMask = 0x00000002,
     FunctionControlPureMask = 0x00000004,
     FunctionControlConstMask = 0x00000008,
+    FunctionControlOptNoneINTELMask = 0x00010000,
 };
 
 enum MemorySemanticsShift {
@@ -602,9 +784,13 @@ enum MemorySemanticsShift {
     MemorySemanticsCrossWorkgroupMemoryShift = 9,
     MemorySemanticsAtomicCounterMemoryShift = 10,
     MemorySemanticsImageMemoryShift = 11,
+    MemorySemanticsOutputMemoryShift = 12,
     MemorySemanticsOutputMemoryKHRShift = 12,
+    MemorySemanticsMakeAvailableShift = 13,
     MemorySemanticsMakeAvailableKHRShift = 13,
+    MemorySemanticsMakeVisibleShift = 14,
     MemorySemanticsMakeVisibleKHRShift = 14,
+    MemorySemanticsVolatileShift = 15,
     MemorySemanticsMax = 0x7fffffff,
 };
 
@@ -620,18 +806,27 @@ enum MemorySemanticsMask {
     MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200,
     MemorySemanticsAtomicCounterMemoryMask = 0x00000400,
     MemorySemanticsImageMemoryMask = 0x00000800,
+    MemorySemanticsOutputMemoryMask = 0x00001000,
     MemorySemanticsOutputMemoryKHRMask = 0x00001000,
+    MemorySemanticsMakeAvailableMask = 0x00002000,
     MemorySemanticsMakeAvailableKHRMask = 0x00002000,
+    MemorySemanticsMakeVisibleMask = 0x00004000,
     MemorySemanticsMakeVisibleKHRMask = 0x00004000,
+    MemorySemanticsVolatileMask = 0x00008000,
 };
 
 enum MemoryAccessShift {
     MemoryAccessVolatileShift = 0,
     MemoryAccessAlignedShift = 1,
     MemoryAccessNontemporalShift = 2,
+    MemoryAccessMakePointerAvailableShift = 3,
     MemoryAccessMakePointerAvailableKHRShift = 3,
+    MemoryAccessMakePointerVisibleShift = 4,
     MemoryAccessMakePointerVisibleKHRShift = 4,
+    MemoryAccessNonPrivatePointerShift = 5,
     MemoryAccessNonPrivatePointerKHRShift = 5,
+    MemoryAccessAliasScopeINTELMaskShift = 16,
+    MemoryAccessNoAliasINTELMaskShift = 17,
     MemoryAccessMax = 0x7fffffff,
 };
 
@@ -640,9 +835,14 @@ enum MemoryAccessMask {
     MemoryAccessVolatileMask = 0x00000001,
     MemoryAccessAlignedMask = 0x00000002,
     MemoryAccessNontemporalMask = 0x00000004,
+    MemoryAccessMakePointerAvailableMask = 0x00000008,
     MemoryAccessMakePointerAvailableKHRMask = 0x00000008,
+    MemoryAccessMakePointerVisibleMask = 0x00000010,
     MemoryAccessMakePointerVisibleKHRMask = 0x00000010,
+    MemoryAccessNonPrivatePointerMask = 0x00000020,
     MemoryAccessNonPrivatePointerKHRMask = 0x00000020,
+    MemoryAccessAliasScopeINTELMaskMask = 0x00010000,
+    MemoryAccessNoAliasINTELMaskMask = 0x00020000,
 };
 
 enum Scope {
@@ -651,7 +851,9 @@ enum Scope {
     ScopeWorkgroup = 2,
     ScopeSubgroup = 3,
     ScopeInvocation = 4,
+    ScopeQueueFamily = 5,
     ScopeQueueFamilyKHR = 5,
+    ScopeShaderCallKHR = 6,
     ScopeMax = 0x7fffffff,
 };
 
@@ -751,8 +953,15 @@ enum Capability {
     CapabilityGroupNonUniformShuffleRelative = 66,
     CapabilityGroupNonUniformClustered = 67,
     CapabilityGroupNonUniformQuad = 68,
+    CapabilityShaderLayer = 69,
+    CapabilityShaderViewportIndex = 70,
+    CapabilityUniformDecoration = 71,
+    CapabilityFragmentShadingRateKHR = 4422,
     CapabilitySubgroupBallotKHR = 4423,
     CapabilityDrawParameters = 4427,
+    CapabilityWorkgroupMemoryExplicitLayoutKHR = 4428,
+    CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR = 4429,
+    CapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR = 4430,
     CapabilitySubgroupVoteKHR = 4431,
     CapabilityStorageBuffer16BitAccess = 4433,
     CapabilityStorageUniformBufferBlock16 = 4433,
@@ -774,11 +983,17 @@ enum Capability {
     CapabilitySignedZeroInfNanPreserve = 4466,
     CapabilityRoundingModeRTE = 4467,
     CapabilityRoundingModeRTZ = 4468,
+    CapabilityRayQueryProvisionalKHR = 4471,
+    CapabilityRayQueryKHR = 4472,
+    CapabilityRayTraversalPrimitiveCullingKHR = 4478,
+    CapabilityRayTracingKHR = 4479,
     CapabilityFloat16ImageAMD = 5008,
     CapabilityImageGatherBiasLodAMD = 5009,
     CapabilityFragmentMaskAMD = 5010,
     CapabilityStencilExportEXT = 5013,
     CapabilityImageReadWriteLodAMD = 5015,
+    CapabilityInt64ImageEXT = 5016,
+    CapabilityShaderClockKHR = 5055,
     CapabilitySampleMaskOverrideCoverageNV = 5249,
     CapabilityGeometryShaderPassthroughNV = 5251,
     CapabilityShaderViewportIndexLayerEXT = 5254,
@@ -789,35 +1004,217 @@ enum Capability {
     CapabilityFragmentFullyCoveredEXT = 5265,
     CapabilityMeshShadingNV = 5266,
     CapabilityImageFootprintNV = 5282,
+    CapabilityMeshShadingEXT = 5283,
+    CapabilityFragmentBarycentricKHR = 5284,
     CapabilityFragmentBarycentricNV = 5284,
     CapabilityComputeDerivativeGroupQuadsNV = 5288,
     CapabilityFragmentDensityEXT = 5291,
     CapabilityShadingRateNV = 5291,
     CapabilityGroupNonUniformPartitionedNV = 5297,
+    CapabilityShaderNonUniform = 5301,
     CapabilityShaderNonUniformEXT = 5301,
+    CapabilityRuntimeDescriptorArray = 5302,
     CapabilityRuntimeDescriptorArrayEXT = 5302,
+    CapabilityInputAttachmentArrayDynamicIndexing = 5303,
     CapabilityInputAttachmentArrayDynamicIndexingEXT = 5303,
+    CapabilityUniformTexelBufferArrayDynamicIndexing = 5304,
     CapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304,
+    CapabilityStorageTexelBufferArrayDynamicIndexing = 5305,
     CapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305,
+    CapabilityUniformBufferArrayNonUniformIndexing = 5306,
     CapabilityUniformBufferArrayNonUniformIndexingEXT = 5306,
+    CapabilitySampledImageArrayNonUniformIndexing = 5307,
     CapabilitySampledImageArrayNonUniformIndexingEXT = 5307,
+    CapabilityStorageBufferArrayNonUniformIndexing = 5308,
     CapabilityStorageBufferArrayNonUniformIndexingEXT = 5308,
+    CapabilityStorageImageArrayNonUniformIndexing = 5309,
     CapabilityStorageImageArrayNonUniformIndexingEXT = 5309,
+    CapabilityInputAttachmentArrayNonUniformIndexing = 5310,
     CapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310,
+    CapabilityUniformTexelBufferArrayNonUniformIndexing = 5311,
     CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311,
+    CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312,
     CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312,
     CapabilityRayTracingNV = 5340,
+    CapabilityRayTracingMotionBlurNV = 5341,
+    CapabilityVulkanMemoryModel = 5345,
     CapabilityVulkanMemoryModelKHR = 5345,
+    CapabilityVulkanMemoryModelDeviceScope = 5346,
     CapabilityVulkanMemoryModelDeviceScopeKHR = 5346,
+    CapabilityPhysicalStorageBufferAddresses = 5347,
     CapabilityPhysicalStorageBufferAddressesEXT = 5347,
     CapabilityComputeDerivativeGroupLinearNV = 5350,
+    CapabilityRayTracingProvisionalKHR = 5353,
+    CapabilityCooperativeMatrixNV = 5357,
+    CapabilityFragmentShaderSampleInterlockEXT = 5363,
+    CapabilityFragmentShaderShadingRateInterlockEXT = 5372,
+    CapabilityShaderSMBuiltinsNV = 5373,
+    CapabilityFragmentShaderPixelInterlockEXT = 5378,
+    CapabilityDemoteToHelperInvocation = 5379,
+    CapabilityDemoteToHelperInvocationEXT = 5379,
+    CapabilityBindlessTextureNV = 5390,
     CapabilitySubgroupShuffleINTEL = 5568,
     CapabilitySubgroupBufferBlockIOINTEL = 5569,
     CapabilitySubgroupImageBlockIOINTEL = 5570,
     CapabilitySubgroupImageMediaBlockIOINTEL = 5579,
+    CapabilityRoundToInfinityINTEL = 5582,
+    CapabilityFloatingPointModeINTEL = 5583,
+    CapabilityIntegerFunctions2INTEL = 5584,
+    CapabilityFunctionPointersINTEL = 5603,
+    CapabilityIndirectReferencesINTEL = 5604,
+    CapabilityAsmINTEL = 5606,
+    CapabilityAtomicFloat32MinMaxEXT = 5612,
+    CapabilityAtomicFloat64MinMaxEXT = 5613,
+    CapabilityAtomicFloat16MinMaxEXT = 5616,
+    CapabilityVectorComputeINTEL = 5617,
+    CapabilityVectorAnyINTEL = 5619,
+    CapabilityExpectAssumeKHR = 5629,
+    CapabilitySubgroupAvcMotionEstimationINTEL = 5696,
+    CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697,
+    CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698,
+    CapabilityVariableLengthArrayINTEL = 5817,
+    CapabilityFunctionFloatControlINTEL = 5821,
+    CapabilityFPGAMemoryAttributesINTEL = 5824,
+    CapabilityFPFastMathModeINTEL = 5837,
+    CapabilityArbitraryPrecisionIntegersINTEL = 5844,
+    CapabilityArbitraryPrecisionFloatingPointINTEL = 5845,
+    CapabilityUnstructuredLoopControlsINTEL = 5886,
+    CapabilityFPGALoopControlsINTEL = 5888,
+    CapabilityKernelAttributesINTEL = 5892,
+    CapabilityFPGAKernelAttributesINTEL = 5897,
+    CapabilityFPGAMemoryAccessesINTEL = 5898,
+    CapabilityFPGAClusterAttributesINTEL = 5904,
+    CapabilityLoopFuseINTEL = 5906,
+    CapabilityMemoryAccessAliasingINTEL = 5910,
+    CapabilityFPGABufferLocationINTEL = 5920,
+    CapabilityArbitraryPrecisionFixedPointINTEL = 5922,
+    CapabilityUSMStorageClassesINTEL = 5935,
+    CapabilityIOPipesINTEL = 5943,
+    CapabilityBlockingPipesINTEL = 5945,
+    CapabilityFPGARegINTEL = 5948,
+    CapabilityDotProductInputAll = 6016,
+    CapabilityDotProductInputAllKHR = 6016,
+    CapabilityDotProductInput4x8Bit = 6017,
+    CapabilityDotProductInput4x8BitKHR = 6017,
+    CapabilityDotProductInput4x8BitPacked = 6018,
+    CapabilityDotProductInput4x8BitPackedKHR = 6018,
+    CapabilityDotProduct = 6019,
+    CapabilityDotProductKHR = 6019,
+    CapabilityRayCullMaskKHR = 6020,
+    CapabilityBitInstructions = 6025,
+    CapabilityGroupNonUniformRotateKHR = 6026,
+    CapabilityAtomicFloat32AddEXT = 6033,
+    CapabilityAtomicFloat64AddEXT = 6034,
+    CapabilityLongConstantCompositeINTEL = 6089,
+    CapabilityOptNoneINTEL = 6094,
+    CapabilityAtomicFloat16AddEXT = 6095,
+    CapabilityDebugInfoModuleINTEL = 6114,
+    CapabilitySplitBarrierINTEL = 6141,
+    CapabilityGroupUniformArithmeticKHR = 6400,
     CapabilityMax = 0x7fffffff,
 };
 
+enum RayFlagsShift {
+    RayFlagsOpaqueKHRShift = 0,
+    RayFlagsNoOpaqueKHRShift = 1,
+    RayFlagsTerminateOnFirstHitKHRShift = 2,
+    RayFlagsSkipClosestHitShaderKHRShift = 3,
+    RayFlagsCullBackFacingTrianglesKHRShift = 4,
+    RayFlagsCullFrontFacingTrianglesKHRShift = 5,
+    RayFlagsCullOpaqueKHRShift = 6,
+    RayFlagsCullNoOpaqueKHRShift = 7,
+    RayFlagsSkipTrianglesKHRShift = 8,
+    RayFlagsSkipAABBsKHRShift = 9,
+    RayFlagsMax = 0x7fffffff,
+};
+
+enum RayFlagsMask {
+    RayFlagsMaskNone = 0,
+    RayFlagsOpaqueKHRMask = 0x00000001,
+    RayFlagsNoOpaqueKHRMask = 0x00000002,
+    RayFlagsTerminateOnFirstHitKHRMask = 0x00000004,
+    RayFlagsSkipClosestHitShaderKHRMask = 0x00000008,
+    RayFlagsCullBackFacingTrianglesKHRMask = 0x00000010,
+    RayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020,
+    RayFlagsCullOpaqueKHRMask = 0x00000040,
+    RayFlagsCullNoOpaqueKHRMask = 0x00000080,
+    RayFlagsSkipTrianglesKHRMask = 0x00000100,
+    RayFlagsSkipAABBsKHRMask = 0x00000200,
+};
+
+enum RayQueryIntersection {
+    RayQueryIntersectionRayQueryCandidateIntersectionKHR = 0,
+    RayQueryIntersectionRayQueryCommittedIntersectionKHR = 1,
+    RayQueryIntersectionMax = 0x7fffffff,
+};
+
+enum RayQueryCommittedIntersectionType {
+    RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR = 0,
+    RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR = 1,
+    RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR = 2,
+    RayQueryCommittedIntersectionTypeMax = 0x7fffffff,
+};
+
+enum RayQueryCandidateIntersectionType {
+    RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR = 0,
+    RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR = 1,
+    RayQueryCandidateIntersectionTypeMax = 0x7fffffff,
+};
+
+enum FragmentShadingRateShift {
+    FragmentShadingRateVertical2PixelsShift = 0,
+    FragmentShadingRateVertical4PixelsShift = 1,
+    FragmentShadingRateHorizontal2PixelsShift = 2,
+    FragmentShadingRateHorizontal4PixelsShift = 3,
+    FragmentShadingRateMax = 0x7fffffff,
+};
+
+enum FragmentShadingRateMask {
+    FragmentShadingRateMaskNone = 0,
+    FragmentShadingRateVertical2PixelsMask = 0x00000001,
+    FragmentShadingRateVertical4PixelsMask = 0x00000002,
+    FragmentShadingRateHorizontal2PixelsMask = 0x00000004,
+    FragmentShadingRateHorizontal4PixelsMask = 0x00000008,
+};
+
+enum FPDenormMode {
+    FPDenormModePreserve = 0,
+    FPDenormModeFlushToZero = 1,
+    FPDenormModeMax = 0x7fffffff,
+};
+
+enum FPOperationMode {
+    FPOperationModeIEEE = 0,
+    FPOperationModeALT = 1,
+    FPOperationModeMax = 0x7fffffff,
+};
+
+enum QuantizationModes {
+    QuantizationModesTRN = 0,
+    QuantizationModesTRN_ZERO = 1,
+    QuantizationModesRND = 2,
+    QuantizationModesRND_ZERO = 3,
+    QuantizationModesRND_INF = 4,
+    QuantizationModesRND_MIN_INF = 5,
+    QuantizationModesRND_CONV = 6,
+    QuantizationModesRND_CONV_ODD = 7,
+    QuantizationModesMax = 0x7fffffff,
+};
+
+enum OverflowModes {
+    OverflowModesWRAP = 0,
+    OverflowModesSAT = 1,
+    OverflowModesSAT_ZERO = 2,
+    OverflowModesSAT_SYM = 3,
+    OverflowModesMax = 0x7fffffff,
+};
+
+enum PackedVectorFormat {
+    PackedVectorFormatPackedVectorFormat4x8Bit = 0,
+    PackedVectorFormatPackedVectorFormat4x8BitKHR = 0,
+    PackedVectorFormatMax = 0x7fffffff,
+};
+
 enum Op {
     OpNop = 0,
     OpUndef = 1,
@@ -1159,12 +1556,42 @@ enum Op {
     OpGroupNonUniformLogicalXor = 364,
     OpGroupNonUniformQuadBroadcast = 365,
     OpGroupNonUniformQuadSwap = 366,
+    OpCopyLogical = 400,
+    OpPtrEqual = 401,
+    OpPtrNotEqual = 402,
+    OpPtrDiff = 403,
+    OpTerminateInvocation = 4416,
     OpSubgroupBallotKHR = 4421,
     OpSubgroupFirstInvocationKHR = 4422,
     OpSubgroupAllKHR = 4428,
     OpSubgroupAnyKHR = 4429,
     OpSubgroupAllEqualKHR = 4430,
+    OpGroupNonUniformRotateKHR = 4431,
     OpSubgroupReadInvocationKHR = 4432,
+    OpTraceRayKHR = 4445,
+    OpExecuteCallableKHR = 4446,
+    OpConvertUToAccelerationStructureKHR = 4447,
+    OpIgnoreIntersectionKHR = 4448,
+    OpTerminateRayKHR = 4449,
+    OpSDot = 4450,
+    OpSDotKHR = 4450,
+    OpUDot = 4451,
+    OpUDotKHR = 4451,
+    OpSUDot = 4452,
+    OpSUDotKHR = 4452,
+    OpSDotAccSat = 4453,
+    OpSDotAccSatKHR = 4453,
+    OpUDotAccSat = 4454,
+    OpUDotAccSatKHR = 4454,
+    OpSUDotAccSat = 4455,
+    OpSUDotAccSatKHR = 4455,
+    OpTypeRayQueryKHR = 4472,
+    OpRayQueryInitializeKHR = 4473,
+    OpRayQueryTerminateKHR = 4474,
+    OpRayQueryGenerateIntersectionKHR = 4475,
+    OpRayQueryConfirmIntersectionKHR = 4476,
+    OpRayQueryProceedKHR = 4477,
+    OpRayQueryGetIntersectionTypeKHR = 4479,
     OpGroupIAddNonUniformAMD = 5000,
     OpGroupFAddNonUniformAMD = 5001,
     OpGroupFMinNonUniformAMD = 5002,
@@ -1175,15 +1602,39 @@ enum Op {
     OpGroupSMaxNonUniformAMD = 5007,
     OpFragmentMaskFetchAMD = 5011,
     OpFragmentFetchAMD = 5012,
+    OpReadClockKHR = 5056,
     OpImageSampleFootprintNV = 5283,
+    OpEmitMeshTasksEXT = 5294,
+    OpSetMeshOutputsEXT = 5295,
     OpGroupNonUniformPartitionNV = 5296,
     OpWritePackedPrimitiveIndices4x8NV = 5299,
+    OpReportIntersectionKHR = 5334,
     OpReportIntersectionNV = 5334,
     OpIgnoreIntersectionNV = 5335,
     OpTerminateRayNV = 5336,
     OpTraceNV = 5337,
+    OpTraceMotionNV = 5338,
+    OpTraceRayMotionNV = 5339,
+    OpTypeAccelerationStructureKHR = 5341,
     OpTypeAccelerationStructureNV = 5341,
     OpExecuteCallableNV = 5344,
+    OpTypeCooperativeMatrixNV = 5358,
+    OpCooperativeMatrixLoadNV = 5359,
+    OpCooperativeMatrixStoreNV = 5360,
+    OpCooperativeMatrixMulAddNV = 5361,
+    OpCooperativeMatrixLengthNV = 5362,
+    OpBeginInvocationInterlockEXT = 5364,
+    OpEndInvocationInterlockEXT = 5365,
+    OpDemoteToHelperInvocation = 5380,
+    OpDemoteToHelperInvocationEXT = 5380,
+    OpIsHelperInvocationEXT = 5381,
+    OpConvertUToImageNV = 5391,
+    OpConvertUToSamplerNV = 5392,
+    OpConvertImageToUNV = 5393,
+    OpConvertSamplerToUNV = 5394,
+    OpConvertUToSampledImageNV = 5395,
+    OpConvertSampledImageToUNV = 5396,
+    OpSamplerImageAddressingModeNV = 5397,
     OpSubgroupShuffleINTEL = 5571,
     OpSubgroupShuffleDownINTEL = 5572,
     OpSubgroupShuffleUpINTEL = 5573,
@@ -1194,11 +1645,921 @@ enum Op {
     OpSubgroupImageBlockWriteINTEL = 5578,
     OpSubgroupImageMediaBlockReadINTEL = 5580,
     OpSubgroupImageMediaBlockWriteINTEL = 5581,
+    OpUCountLeadingZerosINTEL = 5585,
+    OpUCountTrailingZerosINTEL = 5586,
+    OpAbsISubINTEL = 5587,
+    OpAbsUSubINTEL = 5588,
+    OpIAddSatINTEL = 5589,
+    OpUAddSatINTEL = 5590,
+    OpIAverageINTEL = 5591,
+    OpUAverageINTEL = 5592,
+    OpIAverageRoundedINTEL = 5593,
+    OpUAverageRoundedINTEL = 5594,
+    OpISubSatINTEL = 5595,
+    OpUSubSatINTEL = 5596,
+    OpIMul32x16INTEL = 5597,
+    OpUMul32x16INTEL = 5598,
+    OpConstantFunctionPointerINTEL = 5600,
+    OpFunctionPointerCallINTEL = 5601,
+    OpAsmTargetINTEL = 5609,
+    OpAsmINTEL = 5610,
+    OpAsmCallINTEL = 5611,
+    OpAtomicFMinEXT = 5614,
+    OpAtomicFMaxEXT = 5615,
+    OpAssumeTrueKHR = 5630,
+    OpExpectKHR = 5631,
+    OpDecorateString = 5632,
     OpDecorateStringGOOGLE = 5632,
+    OpMemberDecorateString = 5633,
     OpMemberDecorateStringGOOGLE = 5633,
+    OpVmeImageINTEL = 5699,
+    OpTypeVmeImageINTEL = 5700,
+    OpTypeAvcImePayloadINTEL = 5701,
+    OpTypeAvcRefPayloadINTEL = 5702,
+    OpTypeAvcSicPayloadINTEL = 5703,
+    OpTypeAvcMcePayloadINTEL = 5704,
+    OpTypeAvcMceResultINTEL = 5705,
+    OpTypeAvcImeResultINTEL = 5706,
+    OpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707,
+    OpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708,
+    OpTypeAvcImeSingleReferenceStreaminINTEL = 5709,
+    OpTypeAvcImeDualReferenceStreaminINTEL = 5710,
+    OpTypeAvcRefResultINTEL = 5711,
+    OpTypeAvcSicResultINTEL = 5712,
+    OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713,
+    OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714,
+    OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715,
+    OpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716,
+    OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717,
+    OpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718,
+    OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719,
+    OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720,
+    OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721,
+    OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722,
+    OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723,
+    OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724,
+    OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725,
+    OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726,
+    OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727,
+    OpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728,
+    OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729,
+    OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730,
+    OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731,
+    OpSubgroupAvcMceConvertToImePayloadINTEL = 5732,
+    OpSubgroupAvcMceConvertToImeResultINTEL = 5733,
+    OpSubgroupAvcMceConvertToRefPayloadINTEL = 5734,
+    OpSubgroupAvcMceConvertToRefResultINTEL = 5735,
+    OpSubgroupAvcMceConvertToSicPayloadINTEL = 5736,
+    OpSubgroupAvcMceConvertToSicResultINTEL = 5737,
+    OpSubgroupAvcMceGetMotionVectorsINTEL = 5738,
+    OpSubgroupAvcMceGetInterDistortionsINTEL = 5739,
+    OpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740,
+    OpSubgroupAvcMceGetInterMajorShapeINTEL = 5741,
+    OpSubgroupAvcMceGetInterMinorShapeINTEL = 5742,
+    OpSubgroupAvcMceGetInterDirectionsINTEL = 5743,
+    OpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744,
+    OpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745,
+    OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746,
+    OpSubgroupAvcImeInitializeINTEL = 5747,
+    OpSubgroupAvcImeSetSingleReferenceINTEL = 5748,
+    OpSubgroupAvcImeSetDualReferenceINTEL = 5749,
+    OpSubgroupAvcImeRefWindowSizeINTEL = 5750,
+    OpSubgroupAvcImeAdjustRefOffsetINTEL = 5751,
+    OpSubgroupAvcImeConvertToMcePayloadINTEL = 5752,
+    OpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753,
+    OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754,
+    OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755,
+    OpSubgroupAvcImeSetWeightedSadINTEL = 5756,
+    OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757,
+    OpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758,
+    OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759,
+    OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760,
+    OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761,
+    OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762,
+    OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763,
+    OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764,
+    OpSubgroupAvcImeConvertToMceResultINTEL = 5765,
+    OpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766,
+    OpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767,
+    OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768,
+    OpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769,
+    OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770,
+    OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771,
+    OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772,
+    OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773,
+    OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774,
+    OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775,
+    OpSubgroupAvcImeGetBorderReachedINTEL = 5776,
+    OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777,
+    OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778,
+    OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779,
+    OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780,
+    OpSubgroupAvcFmeInitializeINTEL = 5781,
+    OpSubgroupAvcBmeInitializeINTEL = 5782,
+    OpSubgroupAvcRefConvertToMcePayloadINTEL = 5783,
+    OpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784,
+    OpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785,
+    OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786,
+    OpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787,
+    OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788,
+    OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789,
+    OpSubgroupAvcRefConvertToMceResultINTEL = 5790,
+    OpSubgroupAvcSicInitializeINTEL = 5791,
+    OpSubgroupAvcSicConfigureSkcINTEL = 5792,
+    OpSubgroupAvcSicConfigureIpeLumaINTEL = 5793,
+    OpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794,
+    OpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795,
+    OpSubgroupAvcSicConvertToMcePayloadINTEL = 5796,
+    OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797,
+    OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798,
+    OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799,
+    OpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800,
+    OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801,
+    OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802,
+    OpSubgroupAvcSicEvaluateIpeINTEL = 5803,
+    OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804,
+    OpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805,
+    OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806,
+    OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807,
+    OpSubgroupAvcSicConvertToMceResultINTEL = 5808,
+    OpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809,
+    OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810,
+    OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811,
+    OpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812,
+    OpSubgroupAvcSicGetIpeChromaModeINTEL = 5813,
+    OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814,
+    OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815,
+    OpSubgroupAvcSicGetInterRawSadsINTEL = 5816,
+    OpVariableLengthArrayINTEL = 5818,
+    OpSaveMemoryINTEL = 5819,
+    OpRestoreMemoryINTEL = 5820,
+    OpArbitraryFloatSinCosPiINTEL = 5840,
+    OpArbitraryFloatCastINTEL = 5841,
+    OpArbitraryFloatCastFromIntINTEL = 5842,
+    OpArbitraryFloatCastToIntINTEL = 5843,
+    OpArbitraryFloatAddINTEL = 5846,
+    OpArbitraryFloatSubINTEL = 5847,
+    OpArbitraryFloatMulINTEL = 5848,
+    OpArbitraryFloatDivINTEL = 5849,
+    OpArbitraryFloatGTINTEL = 5850,
+    OpArbitraryFloatGEINTEL = 5851,
+    OpArbitraryFloatLTINTEL = 5852,
+    OpArbitraryFloatLEINTEL = 5853,
+    OpArbitraryFloatEQINTEL = 5854,
+    OpArbitraryFloatRecipINTEL = 5855,
+    OpArbitraryFloatRSqrtINTEL = 5856,
+    OpArbitraryFloatCbrtINTEL = 5857,
+    OpArbitraryFloatHypotINTEL = 5858,
+    OpArbitraryFloatSqrtINTEL = 5859,
+    OpArbitraryFloatLogINTEL = 5860,
+    OpArbitraryFloatLog2INTEL = 5861,
+    OpArbitraryFloatLog10INTEL = 5862,
+    OpArbitraryFloatLog1pINTEL = 5863,
+    OpArbitraryFloatExpINTEL = 5864,
+    OpArbitraryFloatExp2INTEL = 5865,
+    OpArbitraryFloatExp10INTEL = 5866,
+    OpArbitraryFloatExpm1INTEL = 5867,
+    OpArbitraryFloatSinINTEL = 5868,
+    OpArbitraryFloatCosINTEL = 5869,
+    OpArbitraryFloatSinCosINTEL = 5870,
+    OpArbitraryFloatSinPiINTEL = 5871,
+    OpArbitraryFloatCosPiINTEL = 5872,
+    OpArbitraryFloatASinINTEL = 5873,
+    OpArbitraryFloatASinPiINTEL = 5874,
+    OpArbitraryFloatACosINTEL = 5875,
+    OpArbitraryFloatACosPiINTEL = 5876,
+    OpArbitraryFloatATanINTEL = 5877,
+    OpArbitraryFloatATanPiINTEL = 5878,
+    OpArbitraryFloatATan2INTEL = 5879,
+    OpArbitraryFloatPowINTEL = 5880,
+    OpArbitraryFloatPowRINTEL = 5881,
+    OpArbitraryFloatPowNINTEL = 5882,
+    OpLoopControlINTEL = 5887,
+    OpAliasDomainDeclINTEL = 5911,
+    OpAliasScopeDeclINTEL = 5912,
+    OpAliasScopeListDeclINTEL = 5913,
+    OpFixedSqrtINTEL = 5923,
+    OpFixedRecipINTEL = 5924,
+    OpFixedRsqrtINTEL = 5925,
+    OpFixedSinINTEL = 5926,
+    OpFixedCosINTEL = 5927,
+    OpFixedSinCosINTEL = 5928,
+    OpFixedSinPiINTEL = 5929,
+    OpFixedCosPiINTEL = 5930,
+    OpFixedSinCosPiINTEL = 5931,
+    OpFixedLogINTEL = 5932,
+    OpFixedExpINTEL = 5933,
+    OpPtrCastToCrossWorkgroupINTEL = 5934,
+    OpCrossWorkgroupCastToPtrINTEL = 5938,
+    OpReadPipeBlockingINTEL = 5946,
+    OpWritePipeBlockingINTEL = 5947,
+    OpFPGARegINTEL = 5949,
+    OpRayQueryGetRayTMinKHR = 6016,
+    OpRayQueryGetRayFlagsKHR = 6017,
+    OpRayQueryGetIntersectionTKHR = 6018,
+    OpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019,
+    OpRayQueryGetIntersectionInstanceIdKHR = 6020,
+    OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021,
+    OpRayQueryGetIntersectionGeometryIndexKHR = 6022,
+    OpRayQueryGetIntersectionPrimitiveIndexKHR = 6023,
+    OpRayQueryGetIntersectionBarycentricsKHR = 6024,
+    OpRayQueryGetIntersectionFrontFaceKHR = 6025,
+    OpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026,
+    OpRayQueryGetIntersectionObjectRayDirectionKHR = 6027,
+    OpRayQueryGetIntersectionObjectRayOriginKHR = 6028,
+    OpRayQueryGetWorldRayDirectionKHR = 6029,
+    OpRayQueryGetWorldRayOriginKHR = 6030,
+    OpRayQueryGetIntersectionObjectToWorldKHR = 6031,
+    OpRayQueryGetIntersectionWorldToObjectKHR = 6032,
+    OpAtomicFAddEXT = 6035,
+    OpTypeBufferSurfaceINTEL = 6086,
+    OpTypeStructContinuedINTEL = 6090,
+    OpConstantCompositeContinuedINTEL = 6091,
+    OpSpecConstantCompositeContinuedINTEL = 6092,
+    OpControlBarrierArriveINTEL = 6142,
+    OpControlBarrierWaitINTEL = 6143,
+    OpGroupIMulKHR = 6401,
+    OpGroupFMulKHR = 6402,
+    OpGroupBitwiseAndKHR = 6403,
+    OpGroupBitwiseOrKHR = 6404,
+    OpGroupBitwiseXorKHR = 6405,
+    OpGroupLogicalAndKHR = 6406,
+    OpGroupLogicalOrKHR = 6407,
+    OpGroupLogicalXorKHR = 6408,
     OpMax = 0x7fffffff,
 };
 
+#ifdef SPV_ENABLE_UTILITY_CODE
+#ifndef __cplusplus
+#include <stdbool.h>
+#endif
+inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
+    *hasResult = *hasResultType = false;
+    switch (opcode) {
+    default: /* unknown opcode */ break;
+    case OpNop: *hasResult = false; *hasResultType = false; break;
+    case OpUndef: *hasResult = true; *hasResultType = true; break;
+    case OpSourceContinued: *hasResult = false; *hasResultType = false; break;
+    case OpSource: *hasResult = false; *hasResultType = false; break;
+    case OpSourceExtension: *hasResult = false; *hasResultType = false; break;
+    case OpName: *hasResult = false; *hasResultType = false; break;
+    case OpMemberName: *hasResult = false; *hasResultType = false; break;
+    case OpString: *hasResult = true; *hasResultType = false; break;
+    case OpLine: *hasResult = false; *hasResultType = false; break;
+    case OpExtension: *hasResult = false; *hasResultType = false; break;
+    case OpExtInstImport: *hasResult = true; *hasResultType = false; break;
+    case OpExtInst: *hasResult = true; *hasResultType = true; break;
+    case OpMemoryModel: *hasResult = false; *hasResultType = false; break;
+    case OpEntryPoint: *hasResult = false; *hasResultType = false; break;
+    case OpExecutionMode: *hasResult = false; *hasResultType = false; break;
+    case OpCapability: *hasResult = false; *hasResultType = false; break;
+    case OpTypeVoid: *hasResult = true; *hasResultType = false; break;
+    case OpTypeBool: *hasResult = true; *hasResultType = false; break;
+    case OpTypeInt: *hasResult = true; *hasResultType = false; break;
+    case OpTypeFloat: *hasResult = true; *hasResultType = false; break;
+    case OpTypeVector: *hasResult = true; *hasResultType = false; break;
+    case OpTypeMatrix: *hasResult = true; *hasResultType = false; break;
+    case OpTypeImage: *hasResult = true; *hasResultType = false; break;
+    case OpTypeSampler: *hasResult = true; *hasResultType = false; break;
+    case OpTypeSampledImage: *hasResult = true; *hasResultType = false; break;
+    case OpTypeArray: *hasResult = true; *hasResultType = false; break;
+    case OpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break;
+    case OpTypeStruct: *hasResult = true; *hasResultType = false; break;
+    case OpTypeOpaque: *hasResult = true; *hasResultType = false; break;
+    case OpTypePointer: *hasResult = true; *hasResultType = false; break;
+    case OpTypeFunction: *hasResult = true; *hasResultType = false; break;
+    case OpTypeEvent: *hasResult = true; *hasResultType = false; break;
+    case OpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break;
+    case OpTypeReserveId: *hasResult = true; *hasResultType = false; break;
+    case OpTypeQueue: *hasResult = true; *hasResultType = false; break;
+    case OpTypePipe: *hasResult = true; *hasResultType = false; break;
+    case OpTypeForwardPointer: *hasResult = false; *hasResultType = false; break;
+    case OpConstantTrue: *hasResult = true; *hasResultType = true; break;
+    case OpConstantFalse: *hasResult = true; *hasResultType = true; break;
+    case OpConstant: *hasResult = true; *hasResultType = true; break;
+    case OpConstantComposite: *hasResult = true; *hasResultType = true; break;
+    case OpConstantSampler: *hasResult = true; *hasResultType = true; break;
+    case OpConstantNull: *hasResult = true; *hasResultType = true; break;
+    case OpSpecConstantTrue: *hasResult = true; *hasResultType = true; break;
+    case OpSpecConstantFalse: *hasResult = true; *hasResultType = true; break;
+    case OpSpecConstant: *hasResult = true; *hasResultType = true; break;
+    case OpSpecConstantComposite: *hasResult = true; *hasResultType = true; break;
+    case OpSpecConstantOp: *hasResult = true; *hasResultType = true; break;
+    case OpFunction: *hasResult = true; *hasResultType = true; break;
+    case OpFunctionParameter: *hasResult = true; *hasResultType = true; break;
+    case OpFunctionEnd: *hasResult = false; *hasResultType = false; break;
+    case OpFunctionCall: *hasResult = true; *hasResultType = true; break;
+    case OpVariable: *hasResult = true; *hasResultType = true; break;
+    case OpImageTexelPointer: *hasResult = true; *hasResultType = true; break;
+    case OpLoad: *hasResult = true; *hasResultType = true; break;
+    case OpStore: *hasResult = false; *hasResultType = false; break;
+    case OpCopyMemory: *hasResult = false; *hasResultType = false; break;
+    case OpCopyMemorySized: *hasResult = false; *hasResultType = false; break;
+    case OpAccessChain: *hasResult = true; *hasResultType = true; break;
+    case OpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break;
+    case OpPtrAccessChain: *hasResult = true; *hasResultType = true; break;
+    case OpArrayLength: *hasResult = true; *hasResultType = true; break;
+    case OpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break;
+    case OpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break;
+    case OpDecorate: *hasResult = false; *hasResultType = false; break;
+    case OpMemberDecorate: *hasResult = false; *hasResultType = false; break;
+    case OpDecorationGroup: *hasResult = true; *hasResultType = false; break;
+    case OpGroupDecorate: *hasResult = false; *hasResultType = false; break;
+    case OpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break;
+    case OpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break;
+    case OpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break;
+    case OpVectorShuffle: *hasResult = true; *hasResultType = true; break;
+    case OpCompositeConstruct: *hasResult = true; *hasResultType = true; break;
+    case OpCompositeExtract: *hasResult = true; *hasResultType = true; break;
+    case OpCompositeInsert: *hasResult = true; *hasResultType = true; break;
+    case OpCopyObject: *hasResult = true; *hasResultType = true; break;
+    case OpTranspose: *hasResult = true; *hasResultType = true; break;
+    case OpSampledImage: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageFetch: *hasResult = true; *hasResultType = true; break;
+    case OpImageGather: *hasResult = true; *hasResultType = true; break;
+    case OpImageDrefGather: *hasResult = true; *hasResultType = true; break;
+    case OpImageRead: *hasResult = true; *hasResultType = true; break;
+    case OpImageWrite: *hasResult = false; *hasResultType = false; break;
+    case OpImage: *hasResult = true; *hasResultType = true; break;
+    case OpImageQueryFormat: *hasResult = true; *hasResultType = true; break;
+    case OpImageQueryOrder: *hasResult = true; *hasResultType = true; break;
+    case OpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageQuerySize: *hasResult = true; *hasResultType = true; break;
+    case OpImageQueryLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageQueryLevels: *hasResult = true; *hasResultType = true; break;
+    case OpImageQuerySamples: *hasResult = true; *hasResultType = true; break;
+    case OpConvertFToU: *hasResult = true; *hasResultType = true; break;
+    case OpConvertFToS: *hasResult = true; *hasResultType = true; break;
+    case OpConvertSToF: *hasResult = true; *hasResultType = true; break;
+    case OpConvertUToF: *hasResult = true; *hasResultType = true; break;
+    case OpUConvert: *hasResult = true; *hasResultType = true; break;
+    case OpSConvert: *hasResult = true; *hasResultType = true; break;
+    case OpFConvert: *hasResult = true; *hasResultType = true; break;
+    case OpQuantizeToF16: *hasResult = true; *hasResultType = true; break;
+    case OpConvertPtrToU: *hasResult = true; *hasResultType = true; break;
+    case OpSatConvertSToU: *hasResult = true; *hasResultType = true; break;
+    case OpSatConvertUToS: *hasResult = true; *hasResultType = true; break;
+    case OpConvertUToPtr: *hasResult = true; *hasResultType = true; break;
+    case OpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break;
+    case OpGenericCastToPtr: *hasResult = true; *hasResultType = true; break;
+    case OpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break;
+    case OpBitcast: *hasResult = true; *hasResultType = true; break;
+    case OpSNegate: *hasResult = true; *hasResultType = true; break;
+    case OpFNegate: *hasResult = true; *hasResultType = true; break;
+    case OpIAdd: *hasResult = true; *hasResultType = true; break;
+    case OpFAdd: *hasResult = true; *hasResultType = true; break;
+    case OpISub: *hasResult = true; *hasResultType = true; break;
+    case OpFSub: *hasResult = true; *hasResultType = true; break;
+    case OpIMul: *hasResult = true; *hasResultType = true; break;
+    case OpFMul: *hasResult = true; *hasResultType = true; break;
+    case OpUDiv: *hasResult = true; *hasResultType = true; break;
+    case OpSDiv: *hasResult = true; *hasResultType = true; break;
+    case OpFDiv: *hasResult = true; *hasResultType = true; break;
+    case OpUMod: *hasResult = true; *hasResultType = true; break;
+    case OpSRem: *hasResult = true; *hasResultType = true; break;
+    case OpSMod: *hasResult = true; *hasResultType = true; break;
+    case OpFRem: *hasResult = true; *hasResultType = true; break;
+    case OpFMod: *hasResult = true; *hasResultType = true; break;
+    case OpVectorTimesScalar: *hasResult = true; *hasResultType = true; break;
+    case OpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break;
+    case OpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break;
+    case OpMatrixTimesVector: *hasResult = true; *hasResultType = true; break;
+    case OpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break;
+    case OpOuterProduct: *hasResult = true; *hasResultType = true; break;
+    case OpDot: *hasResult = true; *hasResultType = true; break;
+    case OpIAddCarry: *hasResult = true; *hasResultType = true; break;
+    case OpISubBorrow: *hasResult = true; *hasResultType = true; break;
+    case OpUMulExtended: *hasResult = true; *hasResultType = true; break;
+    case OpSMulExtended: *hasResult = true; *hasResultType = true; break;
+    case OpAny: *hasResult = true; *hasResultType = true; break;
+    case OpAll: *hasResult = true; *hasResultType = true; break;
+    case OpIsNan: *hasResult = true; *hasResultType = true; break;
+    case OpIsInf: *hasResult = true; *hasResultType = true; break;
+    case OpIsFinite: *hasResult = true; *hasResultType = true; break;
+    case OpIsNormal: *hasResult = true; *hasResultType = true; break;
+    case OpSignBitSet: *hasResult = true; *hasResultType = true; break;
+    case OpLessOrGreater: *hasResult = true; *hasResultType = true; break;
+    case OpOrdered: *hasResult = true; *hasResultType = true; break;
+    case OpUnordered: *hasResult = true; *hasResultType = true; break;
+    case OpLogicalEqual: *hasResult = true; *hasResultType = true; break;
+    case OpLogicalNotEqual: *hasResult = true; *hasResultType = true; break;
+    case OpLogicalOr: *hasResult = true; *hasResultType = true; break;
+    case OpLogicalAnd: *hasResult = true; *hasResultType = true; break;
+    case OpLogicalNot: *hasResult = true; *hasResultType = true; break;
+    case OpSelect: *hasResult = true; *hasResultType = true; break;
+    case OpIEqual: *hasResult = true; *hasResultType = true; break;
+    case OpINotEqual: *hasResult = true; *hasResultType = true; break;
+    case OpUGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case OpSGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case OpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpULessThan: *hasResult = true; *hasResultType = true; break;
+    case OpSLessThan: *hasResult = true; *hasResultType = true; break;
+    case OpULessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpSLessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFOrdEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFUnordEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFOrdNotEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFUnordNotEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFOrdLessThan: *hasResult = true; *hasResultType = true; break;
+    case OpFUnordLessThan: *hasResult = true; *hasResultType = true; break;
+    case OpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case OpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break;
+    case OpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break;
+    case OpShiftRightLogical: *hasResult = true; *hasResultType = true; break;
+    case OpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break;
+    case OpShiftLeftLogical: *hasResult = true; *hasResultType = true; break;
+    case OpBitwiseOr: *hasResult = true; *hasResultType = true; break;
+    case OpBitwiseXor: *hasResult = true; *hasResultType = true; break;
+    case OpBitwiseAnd: *hasResult = true; *hasResultType = true; break;
+    case OpNot: *hasResult = true; *hasResultType = true; break;
+    case OpBitFieldInsert: *hasResult = true; *hasResultType = true; break;
+    case OpBitFieldSExtract: *hasResult = true; *hasResultType = true; break;
+    case OpBitFieldUExtract: *hasResult = true; *hasResultType = true; break;
+    case OpBitReverse: *hasResult = true; *hasResultType = true; break;
+    case OpBitCount: *hasResult = true; *hasResultType = true; break;
+    case OpDPdx: *hasResult = true; *hasResultType = true; break;
+    case OpDPdy: *hasResult = true; *hasResultType = true; break;
+    case OpFwidth: *hasResult = true; *hasResultType = true; break;
+    case OpDPdxFine: *hasResult = true; *hasResultType = true; break;
+    case OpDPdyFine: *hasResult = true; *hasResultType = true; break;
+    case OpFwidthFine: *hasResult = true; *hasResultType = true; break;
+    case OpDPdxCoarse: *hasResult = true; *hasResultType = true; break;
+    case OpDPdyCoarse: *hasResult = true; *hasResultType = true; break;
+    case OpFwidthCoarse: *hasResult = true; *hasResultType = true; break;
+    case OpEmitVertex: *hasResult = false; *hasResultType = false; break;
+    case OpEndPrimitive: *hasResult = false; *hasResultType = false; break;
+    case OpEmitStreamVertex: *hasResult = false; *hasResultType = false; break;
+    case OpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break;
+    case OpControlBarrier: *hasResult = false; *hasResultType = false; break;
+    case OpMemoryBarrier: *hasResult = false; *hasResultType = false; break;
+    case OpAtomicLoad: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicStore: *hasResult = false; *hasResultType = false; break;
+    case OpAtomicExchange: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicIIncrement: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicIDecrement: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicIAdd: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicISub: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicSMin: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicUMin: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicSMax: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicUMax: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicAnd: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicOr: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicXor: *hasResult = true; *hasResultType = true; break;
+    case OpPhi: *hasResult = true; *hasResultType = true; break;
+    case OpLoopMerge: *hasResult = false; *hasResultType = false; break;
+    case OpSelectionMerge: *hasResult = false; *hasResultType = false; break;
+    case OpLabel: *hasResult = true; *hasResultType = false; break;
+    case OpBranch: *hasResult = false; *hasResultType = false; break;
+    case OpBranchConditional: *hasResult = false; *hasResultType = false; break;
+    case OpSwitch: *hasResult = false; *hasResultType = false; break;
+    case OpKill: *hasResult = false; *hasResultType = false; break;
+    case OpReturn: *hasResult = false; *hasResultType = false; break;
+    case OpReturnValue: *hasResult = false; *hasResultType = false; break;
+    case OpUnreachable: *hasResult = false; *hasResultType = false; break;
+    case OpLifetimeStart: *hasResult = false; *hasResultType = false; break;
+    case OpLifetimeStop: *hasResult = false; *hasResultType = false; break;
+    case OpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break;
+    case OpGroupWaitEvents: *hasResult = false; *hasResultType = false; break;
+    case OpGroupAll: *hasResult = true; *hasResultType = true; break;
+    case OpGroupAny: *hasResult = true; *hasResultType = true; break;
+    case OpGroupBroadcast: *hasResult = true; *hasResultType = true; break;
+    case OpGroupIAdd: *hasResult = true; *hasResultType = true; break;
+    case OpGroupFAdd: *hasResult = true; *hasResultType = true; break;
+    case OpGroupFMin: *hasResult = true; *hasResultType = true; break;
+    case OpGroupUMin: *hasResult = true; *hasResultType = true; break;
+    case OpGroupSMin: *hasResult = true; *hasResultType = true; break;
+    case OpGroupFMax: *hasResult = true; *hasResultType = true; break;
+    case OpGroupUMax: *hasResult = true; *hasResultType = true; break;
+    case OpGroupSMax: *hasResult = true; *hasResultType = true; break;
+    case OpReadPipe: *hasResult = true; *hasResultType = true; break;
+    case OpWritePipe: *hasResult = true; *hasResultType = true; break;
+    case OpReservedReadPipe: *hasResult = true; *hasResultType = true; break;
+    case OpReservedWritePipe: *hasResult = true; *hasResultType = true; break;
+    case OpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break;
+    case OpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break;
+    case OpCommitReadPipe: *hasResult = false; *hasResultType = false; break;
+    case OpCommitWritePipe: *hasResult = false; *hasResultType = false; break;
+    case OpIsValidReserveId: *hasResult = true; *hasResultType = true; break;
+    case OpGetNumPipePackets: *hasResult = true; *hasResultType = true; break;
+    case OpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break;
+    case OpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break;
+    case OpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break;
+    case OpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break;
+    case OpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break;
+    case OpEnqueueMarker: *hasResult = true; *hasResultType = true; break;
+    case OpEnqueueKernel: *hasResult = true; *hasResultType = true; break;
+    case OpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break;
+    case OpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break;
+    case OpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break;
+    case OpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break;
+    case OpRetainEvent: *hasResult = false; *hasResultType = false; break;
+    case OpReleaseEvent: *hasResult = false; *hasResultType = false; break;
+    case OpCreateUserEvent: *hasResult = true; *hasResultType = true; break;
+    case OpIsValidEvent: *hasResult = true; *hasResultType = true; break;
+    case OpSetUserEventStatus: *hasResult = false; *hasResultType = false; break;
+    case OpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break;
+    case OpGetDefaultQueue: *hasResult = true; *hasResultType = true; break;
+    case OpBuildNDRange: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseFetch: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseGather: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break;
+    case OpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break;
+    case OpNoLine: *hasResult = false; *hasResultType = false; break;
+    case OpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicFlagClear: *hasResult = false; *hasResultType = false; break;
+    case OpImageSparseRead: *hasResult = true; *hasResultType = true; break;
+    case OpSizeOf: *hasResult = true; *hasResultType = true; break;
+    case OpTypePipeStorage: *hasResult = true; *hasResultType = false; break;
+    case OpConstantPipeStorage: *hasResult = true; *hasResultType = true; break;
+    case OpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break;
+    case OpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break;
+    case OpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break;
+    case OpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break;
+    case OpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break;
+    case OpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break;
+    case OpModuleProcessed: *hasResult = false; *hasResultType = false; break;
+    case OpExecutionModeId: *hasResult = false; *hasResultType = false; break;
+    case OpDecorateId: *hasResult = false; *hasResultType = false; break;
+    case OpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break;
+    case OpCopyLogical: *hasResult = true; *hasResultType = true; break;
+    case OpPtrEqual: *hasResult = true; *hasResultType = true; break;
+    case OpPtrNotEqual: *hasResult = true; *hasResultType = true; break;
+    case OpPtrDiff: *hasResult = true; *hasResultType = true; break;
+    case OpTerminateInvocation: *hasResult = false; *hasResultType = false; break;
+    case OpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break;
+    case OpGroupNonUniformRotateKHR: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break;
+    case OpTraceRayKHR: *hasResult = false; *hasResultType = false; break;
+    case OpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break;
+    case OpConvertUToAccelerationStructureKHR: *hasResult = true; *hasResultType = true; break;
+    case OpIgnoreIntersectionKHR: *hasResult = false; *hasResultType = false; break;
+    case OpTerminateRayKHR: *hasResult = false; *hasResultType = false; break;
+    case OpSDot: *hasResult = true; *hasResultType = true; break;
+    case OpUDot: *hasResult = true; *hasResultType = true; break;
+    case OpSUDot: *hasResult = true; *hasResultType = true; break;
+    case OpSDotAccSat: *hasResult = true; *hasResultType = true; break;
+    case OpUDotAccSat: *hasResult = true; *hasResultType = true; break;
+    case OpSUDotAccSat: *hasResult = true; *hasResultType = true; break;
+    case OpTypeRayQueryKHR: *hasResult = true; *hasResultType = false; break;
+    case OpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break;
+    case OpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break;
+    case OpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break;
+    case OpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break;
+    case OpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break;
+    case OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break;
+    case OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break;
+    case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break;
+    case OpReadClockKHR: *hasResult = true; *hasResultType = true; break;
+    case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break;
+    case OpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break;
+    case OpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break;
+    case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break;
+    case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break;
+    case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break;
+    case OpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break;
+    case OpTerminateRayNV: *hasResult = false; *hasResultType = false; break;
+    case OpTraceNV: *hasResult = false; *hasResultType = false; break;
+    case OpTraceMotionNV: *hasResult = false; *hasResultType = false; break;
+    case OpTraceRayMotionNV: *hasResult = false; *hasResultType = false; break;
+    case OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break;
+    case OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break;
+    case OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break;
+    case OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break;
+    case OpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break;
+    case OpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break;
+    case OpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break;
+    case OpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
+    case OpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
+    case OpDemoteToHelperInvocation: *hasResult = false; *hasResultType = false; break;
+    case OpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break;
+    case OpConvertUToImageNV: *hasResult = true; *hasResultType = true; break;
+    case OpConvertUToSamplerNV: *hasResult = true; *hasResultType = true; break;
+    case OpConvertImageToUNV: *hasResult = true; *hasResultType = true; break;
+    case OpConvertSamplerToUNV: *hasResult = true; *hasResultType = true; break;
+    case OpConvertUToSampledImageNV: *hasResult = true; *hasResultType = true; break;
+    case OpConvertSampledImageToUNV: *hasResult = true; *hasResultType = true; break;
+    case OpSamplerImageAddressingModeNV: *hasResult = false; *hasResultType = false; break;
+    case OpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpAbsISubINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpIAddSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpUAddSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpIAverageINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpUAverageINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpISubSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpUSubSatINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break;
+    case OpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break;
+    case OpConstantFunctionPointerINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpFunctionPointerCallINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpAsmINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpAsmCallINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break;
+    case OpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break;
+    case OpExpectKHR: *hasResult = true; *hasResultType = true; break;
+    case OpDecorateString: *hasResult = false; *hasResultType = false; break;
+    case OpMemberDecorateString: *hasResult = false; *hasResultType = false; break;
+    case OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpVariableLengthArrayINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpSaveMemoryINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpRestoreMemoryINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpArbitraryFloatSinCosPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatCastINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatCastFromIntINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatCastToIntINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatAddINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatSubINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatMulINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatDivINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatGTINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatGEINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatLTINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatLEINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatEQINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatRecipINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatRSqrtINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatCbrtINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatHypotINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatSqrtINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatLogINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatLog2INTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatLog10INTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatLog1pINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatExpINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatExp2INTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatExp10INTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatExpm1INTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatSinINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatCosINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatSinCosINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatSinPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatCosPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatASinINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatASinPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatACosINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatACosPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatATanINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatATanPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatATan2INTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatPowINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpLoopControlINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpAliasDomainDeclINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpAliasScopeDeclINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpAliasScopeListDeclINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpFixedSinINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpFixedCosINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpFixedSinCosINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpFixedSinPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpFixedCosPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpFixedSinCosPiINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpFixedLogINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpFixedExpINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpPtrCastToCrossWorkgroupINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpCrossWorkgroupCastToPtrINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpReadPipeBlockingINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpWritePipeBlockingINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpFPGARegINTEL: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break;
+    case OpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break;
+    case OpAtomicFAddEXT: *hasResult = true; *hasResultType = true; break;
+    case OpTypeBufferSurfaceINTEL: *hasResult = true; *hasResultType = false; break;
+    case OpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpControlBarrierArriveINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpControlBarrierWaitINTEL: *hasResult = false; *hasResultType = false; break;
+    case OpGroupIMulKHR: *hasResult = true; *hasResultType = true; break;
+    case OpGroupFMulKHR: *hasResult = true; *hasResultType = true; break;
+    case OpGroupBitwiseAndKHR: *hasResult = true; *hasResultType = true; break;
+    case OpGroupBitwiseOrKHR: *hasResult = true; *hasResultType = true; break;
+    case OpGroupBitwiseXorKHR: *hasResult = true; *hasResultType = true; break;
+    case OpGroupLogicalAndKHR: *hasResult = true; *hasResultType = true; break;
+    case OpGroupLogicalOrKHR: *hasResult = true; *hasResultType = true; break;
+    case OpGroupLogicalXorKHR: *hasResult = true; *hasResultType = true; break;
+    }
+}
+#endif /* SPV_ENABLE_UTILITY_CODE */
+
 // Overload operator| for mask bit combining
 
 inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); }
@@ -1209,6 +2570,8 @@ inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask
 inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); }
 inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); }
 inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); }
+inline RayFlagsMask operator|(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) | unsigned(b)); }
+inline FragmentShadingRateMask operator|(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) | unsigned(b)); }
 
 }  // end namespace spv
 
diff --git a/spirv_cfg.cpp b/spirv_cfg.cpp
index ed31f236792..93299479815 100644
--- a/spirv_cfg.cpp
+++ b/spirv_cfg.cpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2016-2019 Arm Limited
+ * Copyright 2016-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #include "spirv_cfg.hpp"
 #include "spirv_cross.hpp"
 #include <algorithm>
@@ -61,7 +68,7 @@ void CFG::build_immediate_dominators()
 			if (immediate_dominators[block])
 			{
 				assert(immediate_dominators[edge]);
-				immediate_dominators[block] = find_common_dominator(block, edge);
+				immediate_dominators[block] = find_common_dominator(immediate_dominators[block], edge);
 			}
 			else
 				immediate_dominators[block] = edge;
@@ -97,8 +104,22 @@ bool CFG::post_order_visit(uint32_t block_id)
 	// Block back-edges from recursively revisiting ourselves.
 	visit_order[block_id].get() = 0;
 
-	// First visit our branch targets.
 	auto &block = compiler.get<SPIRBlock>(block_id);
+
+	// If this is a loop header, add an implied branch to the merge target.
+	// This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners.
+	// To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block.
+	// This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator.
+	// We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG.
+
+	// Make a point out of visiting merge target first. This is to make sure that post visit order outside the loop
+	// is lower than inside the loop, which is going to be key for some traversal algorithms like post-dominance analysis.
+	// For selection constructs true/false blocks will end up visiting the merge block directly and it works out fine,
+	// but for loops, only the header might end up actually branching to merge block.
+	if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block))
+		add_branch(block_id, block.merge_block);
+
+	// First visit our branch targets.
 	switch (block.terminator)
 	{
 	case SPIRBlock::Direct:
@@ -114,7 +135,9 @@ bool CFG::post_order_visit(uint32_t block_id)
 		break;
 
 	case SPIRBlock::MultiSelect:
-		for (auto &target : block.cases)
+	{
+		const auto &cases = compiler.get_case_list(block);
+		for (const auto &target : cases)
 		{
 			if (post_order_visit(target.block))
 				add_branch(block_id, target.block);
@@ -122,19 +145,11 @@ bool CFG::post_order_visit(uint32_t block_id)
 		if (block.default_block && post_order_visit(block.default_block))
 			add_branch(block_id, block.default_block);
 		break;
-
+	}
 	default:
 		break;
 	}
 
-	// If this is a loop header, add an implied branch to the merge target.
-	// This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners.
-	// To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block.
-	// This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator.
-	// We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG.
-	if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block))
-		add_branch(block_id, block.merge_block);
-
 	// If this is a selection merge, add an implied branch to the merge target.
 	// This is needed to avoid cases where an inner branch dominates the outer branch.
 	// This can happen if one of the branches exit early, e.g.:
@@ -148,14 +163,35 @@ bool CFG::post_order_visit(uint32_t block_id)
 		// Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement
 		// will be hoisted out to outside the selection merge.
 		// If size > 1, the variable will be automatically hoisted, so we should not mess with it.
+		// The exception here is switch blocks, where we can have multiple edges to merge block,
+		// all coming from same scope, so be more conservative in this case.
 		// Adding fake branches unconditionally breaks parameter preservation analysis,
 		// which looks at how variables are accessed through the CFG.
 		auto pred_itr = preceding_edges.find(block.next_block);
 		if (pred_itr != end(preceding_edges))
 		{
 			auto &pred = pred_itr->second;
-			if (pred.size() == 1 && *pred.begin() != block_id)
-				add_branch(block_id, block.next_block);
+			auto succ_itr = succeeding_edges.find(block_id);
+			size_t num_succeeding_edges = 0;
+			if (succ_itr != end(succeeding_edges))
+				num_succeeding_edges = succ_itr->second.size();
+
+			if (block.terminator == SPIRBlock::MultiSelect && num_succeeding_edges == 1)
+			{
+				// Multiple branches can come from the same scope due to "break;", so we need to assume that all branches
+				// come from same case scope in worst case, even if there are multiple preceding edges.
+				// If we have more than one succeeding edge from the block header, it should be impossible
+				// to have a dominator be inside the block.
+				// Only case this can go wrong is if we have 2 or more edges from block header and
+				// 2 or more edges to merge block, and still have dominator be inside a case label.
+				if (!pred.empty())
+					add_branch(block_id, block.next_block);
+			}
+			else
+			{
+				if (pred.size() == 1 && *pred.begin() != block_id)
+					add_branch(block_id, block.next_block);
+			}
 		}
 		else
 		{
@@ -210,13 +246,13 @@ uint32_t CFG::find_loop_dominator(uint32_t block_id) const
 		for (auto &pred : itr->second)
 		{
 			auto &pred_block = compiler.get<SPIRBlock>(pred);
-			if (pred_block.merge == SPIRBlock::MergeLoop && pred_block.merge_block == block_id)
+			if (pred_block.merge == SPIRBlock::MergeLoop && pred_block.merge_block == ID(block_id))
 			{
 				pred_block_id = pred;
 				ignore_loop_header = true;
 				break;
 			}
-			else if (pred_block.merge == SPIRBlock::MergeSelection && pred_block.next_block == block_id)
+			else if (pred_block.merge == SPIRBlock::MergeSelection && pred_block.next_block == ID(block_id))
 			{
 				pred_block_id = pred;
 				break;
@@ -241,6 +277,82 @@ uint32_t CFG::find_loop_dominator(uint32_t block_id) const
 	return block_id;
 }
 
+bool CFG::node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const
+{
+	// Walk backwards, starting from "to" block.
+	// Only follow pred edges if they have a 1:1 relationship, or a merge relationship.
+	// If we cannot find a path to "from", we must assume that to is inside control flow in some way.
+
+	auto &from_block = compiler.get<SPIRBlock>(from);
+	BlockID ignore_block_id = 0;
+	if (from_block.merge == SPIRBlock::MergeLoop)
+		ignore_block_id = from_block.merge_block;
+
+	while (to != from)
+	{
+		auto pred_itr = preceding_edges.find(to);
+		if (pred_itr == end(preceding_edges))
+			return false;
+
+		DominatorBuilder builder(*this);
+		for (auto &edge : pred_itr->second)
+			builder.add_block(edge);
+
+		uint32_t dominator = builder.get_dominator();
+		if (dominator == 0)
+			return false;
+
+		auto &dom = compiler.get<SPIRBlock>(dominator);
+
+		bool true_path_ignore = false;
+		bool false_path_ignore = false;
+
+		bool merges_to_nothing = dom.merge == SPIRBlock::MergeNone ||
+		                         (dom.merge == SPIRBlock::MergeSelection && dom.next_block &&
+		                          compiler.get<SPIRBlock>(dom.next_block).terminator == SPIRBlock::Unreachable) ||
+		                         (dom.merge == SPIRBlock::MergeLoop && dom.merge_block &&
+		                          compiler.get<SPIRBlock>(dom.merge_block).terminator == SPIRBlock::Unreachable);
+
+		if (dom.self == from || merges_to_nothing)
+		{
+			// We can only ignore inner branchy paths if there is no merge,
+			// i.e. no code is generated afterwards. E.g. this allows us to elide continue:
+			// for (;;) { if (cond) { continue; } else { break; } }.
+			// Codegen here in SPIR-V will be something like either no merge if one path directly breaks, or
+			// we merge to Unreachable.
+			if (ignore_block_id && dom.terminator == SPIRBlock::Select)
+			{
+				auto &true_block = compiler.get<SPIRBlock>(dom.true_block);
+				auto &false_block = compiler.get<SPIRBlock>(dom.false_block);
+				auto &ignore_block = compiler.get<SPIRBlock>(ignore_block_id);
+				true_path_ignore = compiler.execution_is_branchless(true_block, ignore_block);
+				false_path_ignore = compiler.execution_is_branchless(false_block, ignore_block);
+			}
+		}
+
+		// Cases where we allow traversal. This serves as a proxy for post-dominance in a loop body.
+		// TODO: Might want to do full post-dominance analysis, but it's a lot of churn for something like this ...
+		// - We're the merge block of a selection construct. Jump to header.
+		// - We're the merge block of a loop. Jump to header.
+		// - Direct branch. Trivial.
+		// - Allow cases inside a branch if the header cannot merge execution before loop exit.
+		if ((dom.merge == SPIRBlock::MergeSelection && dom.next_block == to) ||
+		    (dom.merge == SPIRBlock::MergeLoop && dom.merge_block == to) ||
+		    (dom.terminator == SPIRBlock::Direct && dom.next_block == to) ||
+		    (dom.terminator == SPIRBlock::Select && dom.true_block == to && false_path_ignore) ||
+		    (dom.terminator == SPIRBlock::Select && dom.false_block == to && true_path_ignore))
+		{
+			// Allow walking selection constructs if the other branch reaches out of a loop construct.
+			// It cannot be in-scope anymore.
+			to = dominator;
+		}
+		else
+			return false;
+	}
+
+	return true;
+}
+
 DominatorBuilder::DominatorBuilder(const CFG &cfg_)
     : cfg(cfg_)
 {
@@ -296,7 +408,9 @@ void DominatorBuilder::lift_continue_block_dominator()
 		break;
 
 	case SPIRBlock::MultiSelect:
-		for (auto &target : block.cases)
+	{
+		auto &cases = cfg.get_compiler().get_case_list(block);
+		for (auto &target : cases)
 		{
 			if (cfg.get_visit_order(target.block) > post_order)
 				back_edge_dominator = true;
@@ -304,6 +418,7 @@ void DominatorBuilder::lift_continue_block_dominator()
 		if (block.default_block && cfg.get_visit_order(block.default_block) > post_order)
 			back_edge_dominator = true;
 		break;
+	}
 
 	default:
 		break;
diff --git a/spirv_cfg.hpp b/spirv_cfg.hpp
index 7d07d484107..1d85fe0a97b 100644
--- a/spirv_cfg.hpp
+++ b/spirv_cfg.hpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2016-2019 Arm Limited
+ * Copyright 2016-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_CROSS_CFG_HPP
 #define SPIRV_CROSS_CFG_HPP
 
@@ -52,6 +59,11 @@ class CFG
 			return 0;
 	}
 
+	bool is_reachable(uint32_t block) const
+	{
+		return visit_order.count(block) != 0;
+	}
+
 	uint32_t get_visit_order(uint32_t block) const
 	{
 		auto itr = visit_order.find(block);
@@ -88,13 +100,17 @@ class CFG
 			return;
 		seen_blocks.insert(block);
 
-		op(block);
-		for (auto b : get_succeeding_edges(block))
-			walk_from(seen_blocks, b, op);
+		if (op(block))
+		{
+			for (auto b : get_succeeding_edges(block))
+				walk_from(seen_blocks, b, op);
+		}
 	}
 
 	uint32_t find_loop_dominator(uint32_t block) const;
 
+	bool node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const;
+
 private:
 	struct VisitOrder
 	{
diff --git a/spirv_common.hpp b/spirv_common.hpp
index d7a5f32ecab..ba420e1dbe0 100644
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2015-2019 Arm Limited
+ * Copyright 2015-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,12 +15,23 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_CROSS_COMMON_HPP
 #define SPIRV_CROSS_COMMON_HPP
 
+#ifndef SPV_ENABLE_UTILITY_CODE
+#define SPV_ENABLE_UTILITY_CODE
+#endif
 #include "spirv.hpp"
+
 #include "spirv_cross_containers.hpp"
 #include "spirv_cross_error_handling.hpp"
+#include <functional>
 
 // A bit crude, but allows projects which embed SPIRV-Cross statically to
 // effectively hide all the symbols from other projects.
@@ -203,14 +215,39 @@ inline std::string convert_to_string(const T &t)
 	return std::to_string(t);
 }
 
+static inline std::string convert_to_string(int32_t value)
+{
+	// INT_MIN is ... special on some backends. If we use a decimal literal, and negate it, we
+	// could accidentally promote the literal to long first, then negate.
+	// To workaround it, emit int(0x80000000) instead.
+	if (value == std::numeric_limits<int32_t>::min())
+		return "int(0x80000000)";
+	else
+		return std::to_string(value);
+}
+
+static inline std::string convert_to_string(int64_t value, const std::string &int64_type, bool long_long_literal_suffix)
+{
+	// INT64_MIN is ... special on some backends.
+	// If we use a decimal literal, and negate it, we might overflow the representable numbers.
+	// To workaround it, emit int(0x80000000) instead.
+	if (value == std::numeric_limits<int64_t>::min())
+		return join(int64_type, "(0x8000000000000000u", (long_long_literal_suffix ? "ll" : "l"), ")");
+	else
+		return std::to_string(value) + (long_long_literal_suffix ? "ll" : "l");
+}
+
 // Allow implementations to set a convenient standard precision
 #ifndef SPIRV_CROSS_FLT_FMT
 #define SPIRV_CROSS_FLT_FMT "%.32g"
 #endif
 
-#ifdef _MSC_VER
-// sprintf warning.
-// We cannot rely on snprintf existing because, ..., MSVC.
+// Disable sprintf and strcat warnings.
+// We cannot rely on snprintf and family existing because, ..., MSVC.
+#if defined(__clang__) || defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#elif defined(_MSC_VER)
 #pragma warning(push)
 #pragma warning(disable : 4996)
 #endif
@@ -258,7 +295,32 @@ inline std::string convert_to_string(double t, char locale_radix_point)
 	return buf;
 }
 
-#ifdef _MSC_VER
+template <typename T>
+struct ValueSaver
+{
+	explicit ValueSaver(T &current_)
+	    : current(current_)
+	    , saved(current_)
+	{
+	}
+
+	void release()
+	{
+		current = saved;
+	}
+
+	~ValueSaver()
+	{
+		release();
+	}
+
+	T &current;
+	T saved;
+};
+
+#if defined(__clang__) || defined(__GNUC__)
+#pragma GCC diagnostic pop
+#elif defined(_MSC_VER)
 #pragma warning(pop)
 #endif
 
@@ -266,24 +328,22 @@ struct Instruction
 {
 	uint16_t op = 0;
 	uint16_t count = 0;
+	// If offset is 0 (not a valid offset into the instruction stream),
+	// we have an instruction stream which is embedded in the object.
 	uint32_t offset = 0;
 	uint32_t length = 0;
+
+	inline bool is_embedded() const
+	{
+		return offset == 0;
+	}
 };
 
-// Helper for Variant interface.
-struct IVariant
+struct EmbeddedInstruction : Instruction
 {
-	virtual ~IVariant() = default;
-	virtual IVariant *clone(ObjectPoolBase *pool) = 0;
-	uint32_t self = 0;
+	SmallVector<uint32_t> ops;
 };
 
-#define SPIRV_CROSS_DECLARE_CLONE(T)                                \
-	IVariant *clone(ObjectPoolBase *pool) override                  \
-	{                                                               \
-		return static_cast<ObjectPool<T> *>(pool)->allocate(*this); \
-	}
-
 enum Types
 {
 	TypeNone,
@@ -303,6 +363,99 @@ enum Types
 	TypeCount
 };
 
+template <Types type>
+class TypedID;
+
+template <>
+class TypedID<TypeNone>
+{
+public:
+	TypedID() = default;
+	TypedID(uint32_t id_)
+	    : id(id_)
+	{
+	}
+
+	template <Types U>
+	TypedID(const TypedID<U> &other)
+	{
+		*this = other;
+	}
+
+	template <Types U>
+	TypedID &operator=(const TypedID<U> &other)
+	{
+		id = uint32_t(other);
+		return *this;
+	}
+
+	// Implicit conversion to u32 is desired here.
+	// As long as we block implicit conversion between TypedID<A> and TypedID<B> we're good.
+	operator uint32_t() const
+	{
+		return id;
+	}
+
+	template <Types U>
+	operator TypedID<U>() const
+	{
+		return TypedID<U>(*this);
+	}
+
+private:
+	uint32_t id = 0;
+};
+
+template <Types type>
+class TypedID
+{
+public:
+	TypedID() = default;
+	TypedID(uint32_t id_)
+	    : id(id_)
+	{
+	}
+
+	explicit TypedID(const TypedID<TypeNone> &other)
+	    : id(uint32_t(other))
+	{
+	}
+
+	operator uint32_t() const
+	{
+		return id;
+	}
+
+private:
+	uint32_t id = 0;
+};
+
+using VariableID = TypedID<TypeVariable>;
+using TypeID = TypedID<TypeType>;
+using ConstantID = TypedID<TypeConstant>;
+using FunctionID = TypedID<TypeFunction>;
+using BlockID = TypedID<TypeBlock>;
+using ID = TypedID<TypeNone>;
+
+// Helper for Variant interface.
+struct IVariant
+{
+	virtual ~IVariant() = default;
+	virtual IVariant *clone(ObjectPoolBase *pool) = 0;
+	ID self = 0;
+
+protected:
+	IVariant() = default;
+	IVariant(const IVariant&) = default;
+	IVariant &operator=(const IVariant&) = default;
+};
+
+#define SPIRV_CROSS_DECLARE_CLONE(T)                                \
+	IVariant *clone(ObjectPoolBase *pool) override                  \
+	{                                                               \
+		return static_cast<ObjectPool<T> *>(pool)->allocate(*this); \
+	}
+
 struct SPIRUndef : IVariant
 {
 	enum
@@ -310,11 +463,11 @@ struct SPIRUndef : IVariant
 		type = TypeUndef
 	};
 
-	explicit SPIRUndef(uint32_t basetype_)
+	explicit SPIRUndef(TypeID basetype_)
 	    : basetype(basetype_)
 	{
 	}
-	uint32_t basetype;
+	TypeID basetype;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRUndef)
 };
@@ -344,15 +497,15 @@ struct SPIRCombinedImageSampler : IVariant
 	{
 		type = TypeCombinedImageSampler
 	};
-	SPIRCombinedImageSampler(uint32_t type_, uint32_t image_, uint32_t sampler_)
+	SPIRCombinedImageSampler(TypeID type_, VariableID image_, VariableID sampler_)
 	    : combined_type(type_)
 	    , image(image_)
 	    , sampler(sampler_)
 	{
 	}
-	uint32_t combined_type;
-	uint32_t image;
-	uint32_t sampler;
+	TypeID combined_type;
+	VariableID image;
+	VariableID sampler;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRCombinedImageSampler)
 };
@@ -364,16 +517,18 @@ struct SPIRConstantOp : IVariant
 		type = TypeConstantOp
 	};
 
-	SPIRConstantOp(uint32_t result_type, spv::Op op, const uint32_t *args, uint32_t length)
+	SPIRConstantOp(TypeID result_type, spv::Op op, const uint32_t *args, uint32_t length)
 	    : opcode(op)
-	    , arguments(args, args + length)
 	    , basetype(result_type)
 	{
+		arguments.reserve(length);
+		for (uint32_t i = 0; i < length; i++)
+			arguments.push_back(args[i]);
 	}
 
 	spv::Op opcode;
 	SmallVector<uint32_t> arguments;
-	uint32_t basetype;
+	TypeID basetype;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRConstantOp)
 };
@@ -406,10 +561,12 @@ struct SPIRType : IVariant
 		Image,
 		SampledImage,
 		Sampler,
-		AccelerationStructureNV,
+		AccelerationStructure,
+		RayQuery,
 
 		// Keep internal types at the end.
 		ControlPointArray,
+		Interpolant,
 		Char
 	};
 
@@ -433,14 +590,19 @@ struct SPIRType : IVariant
 	// Keep track of how many pointer layers we have.
 	uint32_t pointer_depth = 0;
 	bool pointer = false;
+	bool forward_pointer = false;
 
 	spv::StorageClass storage = spv::StorageClassGeneric;
 
-	SmallVector<uint32_t> member_types;
+	SmallVector<TypeID> member_types;
+
+	// If member order has been rewritten to handle certain scenarios with Offset,
+	// allow codegen to rewrite the index.
+	SmallVector<uint32_t> member_type_index_redirection;
 
 	struct ImageType
 	{
-		uint32_t type;
+		TypeID type;
 		spv::Dim dim;
 		bool depth;
 		bool arrayed;
@@ -453,11 +615,11 @@ struct SPIRType : IVariant
 	// Structs can be declared multiple times if they are used as part of interface blocks.
 	// We want to detect this so that we only emit the struct definition once.
 	// Since we cannot rely on OpName to be equal, we need to figure out aliases.
-	uint32_t type_alias = 0;
+	TypeID type_alias = 0;
 
 	// Denotes the type which this type is based on.
 	// Allows the backend to traverse how a complex type is built up during access chains.
-	uint32_t parent_type = 0;
+	TypeID parent_type = 0;
 
 	// Used in backends to avoid emitting members with conflicting names.
 	std::unordered_set<std::string> member_name_cache;
@@ -480,7 +642,10 @@ struct SPIRExtension : IVariant
 		SPV_AMD_shader_ballot,
 		SPV_AMD_shader_explicit_vertex_parameter,
 		SPV_AMD_shader_trinary_minmax,
-		SPV_AMD_gcn_shader
+		SPV_AMD_gcn_shader,
+		NonSemanticDebugPrintf,
+		NonSemanticShaderDebugInfo,
+		NonSemanticGeneric
 	};
 
 	explicit SPIRExtension(Extension ext_)
@@ -496,7 +661,7 @@ struct SPIRExtension : IVariant
 // so in order to avoid conflicts, we can't stick them in the ids array.
 struct SPIREntryPoint
 {
-	SPIREntryPoint(uint32_t self_, spv::ExecutionModel execution_model, const std::string &entry_name)
+	SPIREntryPoint(FunctionID self_, spv::ExecutionModel execution_model, const std::string &entry_name)
 	    : self(self_)
 	    , name(entry_name)
 	    , orig_name(entry_name)
@@ -505,20 +670,23 @@ struct SPIREntryPoint
 	}
 	SPIREntryPoint() = default;
 
-	uint32_t self = 0;
+	FunctionID self = 0;
 	std::string name;
 	std::string orig_name;
-	SmallVector<uint32_t> interface_variables;
+	SmallVector<VariableID> interface_variables;
 
 	Bitset flags;
-	struct
+	struct WorkgroupSize
 	{
 		uint32_t x = 0, y = 0, z = 0;
+		uint32_t id_x = 0, id_y = 0, id_z = 0;
 		uint32_t constant = 0; // Workgroup size can be expressed as a constant/spec-constant instead.
 	} workgroup_size;
 	uint32_t invocations = 0;
 	uint32_t output_vertices = 0;
+	uint32_t output_primitives = 0;
 	spv::ExecutionModel model = spv::ExecutionModelMax;
+	bool geometry_passthrough = false;
 };
 
 struct SPIRExpression : IVariant
@@ -529,8 +697,8 @@ struct SPIRExpression : IVariant
 	};
 
 	// Only created by the backend target to avoid creating tons of temporaries.
-	SPIRExpression(std::string expr, uint32_t expression_type_, bool immutable_)
-	    : expression(move(expr))
+	SPIRExpression(std::string expr, TypeID expression_type_, bool immutable_)
+	    : expression(std::move(expr))
 	    , expression_type(expression_type_)
 	    , immutable(immutable_)
 	{
@@ -539,14 +707,14 @@ struct SPIRExpression : IVariant
 	// If non-zero, prepend expression with to_expression(base_expression).
 	// Used in amortizing multiple calls to to_expression()
 	// where in certain cases that would quickly force a temporary when not needed.
-	uint32_t base_expression = 0;
+	ID base_expression = 0;
 
 	std::string expression;
-	uint32_t expression_type = 0;
+	TypeID expression_type = 0;
 
 	// If this expression is a forwarded load,
 	// allow us to reference the original variable.
-	uint32_t loaded_from = 0;
+	ID loaded_from = 0;
 
 	// If this expression will never change, we can avoid lots of temporaries
 	// in high level source.
@@ -562,11 +730,14 @@ struct SPIRExpression : IVariant
 	bool access_chain = false;
 
 	// A list of expressions which this expression depends on.
-	SmallVector<uint32_t> expression_dependencies;
+	SmallVector<ID> expression_dependencies;
 
 	// By reading this expression, we implicitly read these expressions as well.
 	// Used by access chain Store and Load since we read multiple expressions in this case.
-	SmallVector<uint32_t> implied_read_expressions;
+	SmallVector<ID> implied_read_expressions;
+
+	// The expression was emitted at a certain scope. Lets us track when an expression read means multiple reads.
+	uint32_t emitted_loop_level = 0;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRExpression)
 };
@@ -578,12 +749,12 @@ struct SPIRFunctionPrototype : IVariant
 		type = TypeFunctionPrototype
 	};
 
-	explicit SPIRFunctionPrototype(uint32_t return_type_)
+	explicit SPIRFunctionPrototype(TypeID return_type_)
 	    : return_type(return_type_)
 	{
 	}
 
-	uint32_t return_type;
+	TypeID return_type;
 	SmallVector<uint32_t> parameter_types;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRFunctionPrototype)
@@ -606,7 +777,10 @@ struct SPIRBlock : IVariant
 
 		Return, // Block ends with return.
 		Unreachable, // Noop
-		Kill // Discard
+		Kill, // Discard
+		IgnoreIntersection, // Ray Tracing
+		TerminateRay, // Ray Tracing
+		EmitMeshTasks // Mesh shaders
 	};
 
 	enum Merge
@@ -650,7 +824,7 @@ struct SPIRBlock : IVariant
 		ComplexLoop
 	};
 
-	enum
+	enum : uint32_t
 	{
 		NoDominator = 0xffffffffu
 	};
@@ -658,23 +832,30 @@ struct SPIRBlock : IVariant
 	Terminator terminator = Unknown;
 	Merge merge = MergeNone;
 	Hints hint = HintNone;
-	uint32_t next_block = 0;
-	uint32_t merge_block = 0;
-	uint32_t continue_block = 0;
+	BlockID next_block = 0;
+	BlockID merge_block = 0;
+	BlockID continue_block = 0;
 
-	uint32_t return_value = 0; // If 0, return nothing (void).
-	uint32_t condition = 0;
-	uint32_t true_block = 0;
-	uint32_t false_block = 0;
-	uint32_t default_block = 0;
+	ID return_value = 0; // If 0, return nothing (void).
+	ID condition = 0;
+	BlockID true_block = 0;
+	BlockID false_block = 0;
+	BlockID default_block = 0;
+
+	// If terminator is EmitMeshTasksEXT.
+	struct
+	{
+		ID groups[3];
+		ID payload;
+	} mesh = {};
 
 	SmallVector<Instruction> ops;
 
 	struct Phi
 	{
-		uint32_t local_variable; // flush local variable ...
-		uint32_t parent; // If we're in from_block and want to branch into this block ...
-		uint32_t function_variable; // to this function-global "phi" variable first.
+		ID local_variable; // flush local variable ...
+		BlockID parent; // If we're in from_block and want to branch into this block ...
+		VariableID function_variable; // to this function-global "phi" variable first.
 	};
 
 	// Before entering this block flush out local variables to magical "phi" variables.
@@ -682,18 +863,19 @@ struct SPIRBlock : IVariant
 
 	// Declare these temporaries before beginning the block.
 	// Used for handling complex continue blocks which have side effects.
-	SmallVector<std::pair<uint32_t, uint32_t>> declare_temporary;
+	SmallVector<std::pair<TypeID, ID>> declare_temporary;
 
 	// Declare these temporaries, but only conditionally if this block turns out to be
 	// a complex loop header.
-	SmallVector<std::pair<uint32_t, uint32_t>> potential_declare_temporary;
+	SmallVector<std::pair<TypeID, ID>> potential_declare_temporary;
 
 	struct Case
 	{
-		uint32_t value;
-		uint32_t block;
+		uint64_t value;
+		BlockID block;
 	};
-	SmallVector<Case> cases;
+	SmallVector<Case> cases_32bit;
+	SmallVector<Case> cases_64bit;
 
 	// If we have tried to optimize code for this block but failed,
 	// keep track of this.
@@ -707,25 +889,25 @@ struct SPIRBlock : IVariant
 
 	// If marked, we have explicitly handled Phi from this block, so skip any flushes related to that on a branch.
 	// Used to handle an edge case with switch and case-label fallthrough where fall-through writes to Phi.
-	uint32_t ignore_phi_from_block = 0;
+	BlockID ignore_phi_from_block = 0;
 
 	// The dominating block which this block might be within.
 	// Used in continue; blocks to determine if we really need to write continue.
-	uint32_t loop_dominator = 0;
+	BlockID loop_dominator = 0;
 
 	// All access to these variables are dominated by this block,
 	// so before branching anywhere we need to make sure that we declare these variables.
-	SmallVector<uint32_t> dominated_variables;
+	SmallVector<VariableID> dominated_variables;
 
 	// These are variables which should be declared in a for loop header, if we
 	// fail to use a classic for-loop,
 	// we remove these variables, and fall back to regular variables outside the loop.
-	SmallVector<uint32_t> loop_variables;
+	SmallVector<VariableID> loop_variables;
 
 	// Some expressions are control-flow dependent, i.e. any instruction which relies on derivatives or
 	// sub-group-like operations.
 	// Make sure that we only use these expressions in the original block.
-	SmallVector<uint32_t> invalidate_expressions;
+	SmallVector<ID> invalidate_expressions;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRBlock)
 };
@@ -737,7 +919,7 @@ struct SPIRFunction : IVariant
 		type = TypeFunction
 	};
 
-	SPIRFunction(uint32_t return_type_, uint32_t function_type_)
+	SPIRFunction(TypeID return_type_, TypeID function_type_)
 	    : return_type(return_type_)
 	    , function_type(function_type_)
 	{
@@ -745,8 +927,8 @@ struct SPIRFunction : IVariant
 
 	struct Parameter
 	{
-		uint32_t type;
-		uint32_t id;
+		TypeID type;
+		ID id;
 		uint32_t read_count;
 		uint32_t write_count;
 
@@ -768,25 +950,25 @@ struct SPIRFunction : IVariant
 	// or a global ID.
 	struct CombinedImageSamplerParameter
 	{
-		uint32_t id;
-		uint32_t image_id;
-		uint32_t sampler_id;
+		VariableID id;
+		VariableID image_id;
+		VariableID sampler_id;
 		bool global_image;
 		bool global_sampler;
 		bool depth;
 	};
 
-	uint32_t return_type;
-	uint32_t function_type;
+	TypeID return_type;
+	TypeID function_type;
 	SmallVector<Parameter> arguments;
 
 	// Can be used by backends to add magic arguments.
 	// Currently used by combined image/sampler implementation.
 
 	SmallVector<Parameter> shadow_arguments;
-	SmallVector<uint32_t> local_variables;
-	uint32_t entry_block = 0;
-	SmallVector<uint32_t> blocks;
+	SmallVector<VariableID> local_variables;
+	BlockID entry_block = 0;
+	SmallVector<BlockID> blocks;
 	SmallVector<CombinedImageSamplerParameter> combined_parameters;
 
 	struct EntryLine
@@ -796,12 +978,12 @@ struct SPIRFunction : IVariant
 	};
 	EntryLine entry_line;
 
-	void add_local_variable(uint32_t id)
+	void add_local_variable(VariableID id)
 	{
 		local_variables.push_back(id);
 	}
 
-	void add_parameter(uint32_t parameter_type, uint32_t id, bool alias_global_variable = false)
+	void add_parameter(TypeID parameter_type, ID id, bool alias_global_variable = false)
 	{
 		// Arguments are read-only until proven otherwise.
 		arguments.push_back({ parameter_type, id, 0u, 0u, alias_global_variable });
@@ -822,7 +1004,7 @@ struct SPIRFunction : IVariant
 	// On function entry, make sure to copy a constant array into thread addr space to work around
 	// the case where we are passing a constant array by value to a function on backends which do not
 	// consider arrays value types.
-	SmallVector<uint32_t> constant_arrays_needed_on_stack;
+	SmallVector<ID> constant_arrays_needed_on_stack;
 
 	bool active = false;
 	bool flush_undeclared = true;
@@ -838,7 +1020,7 @@ struct SPIRAccessChain : IVariant
 		type = TypeAccessChain
 	};
 
-	SPIRAccessChain(uint32_t basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_,
+	SPIRAccessChain(TypeID basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_,
 	                int32_t static_index_)
 	    : basetype(basetype_)
 	    , storage(storage_)
@@ -853,20 +1035,21 @@ struct SPIRAccessChain : IVariant
 	// which has no usable buffer type ala GLSL SSBOs.
 	// StructuredBuffer is too limited, so our only option is to deal with ByteAddressBuffer which works with raw addresses.
 
-	uint32_t basetype;
+	TypeID basetype;
 	spv::StorageClass storage;
 	std::string base;
 	std::string dynamic_index;
 	int32_t static_index;
 
-	uint32_t loaded_from = 0;
+	VariableID loaded_from = 0;
 	uint32_t matrix_stride = 0;
+	uint32_t array_stride = 0;
 	bool row_major_matrix = false;
 	bool immutable = false;
 
 	// By reading this expression, we implicitly read these expressions as well.
 	// Used by access chain Store and Load since we read multiple expressions in this case.
-	SmallVector<uint32_t> implied_read_expressions;
+	SmallVector<ID> implied_read_expressions;
 
 	SPIRV_CROSS_DECLARE_CLONE(SPIRAccessChain)
 };
@@ -879,7 +1062,7 @@ struct SPIRVariable : IVariant
 	};
 
 	SPIRVariable() = default;
-	SPIRVariable(uint32_t basetype_, spv::StorageClass storage_, uint32_t initializer_ = 0, uint32_t basevariable_ = 0)
+	SPIRVariable(TypeID basetype_, spv::StorageClass storage_, ID initializer_ = 0, VariableID basevariable_ = 0)
 	    : basetype(basetype_)
 	    , storage(storage_)
 	    , initializer(initializer_)
@@ -887,11 +1070,11 @@ struct SPIRVariable : IVariant
 	{
 	}
 
-	uint32_t basetype = 0;
+	TypeID basetype = 0;
 	spv::StorageClass storage = spv::StorageClassGeneric;
 	uint32_t decoration = 0;
-	uint32_t initializer = 0;
-	uint32_t basevariable = 0;
+	ID initializer = 0;
+	VariableID basevariable = 0;
 
 	SmallVector<uint32_t> dereference_chain;
 	bool compat_builtin = false;
@@ -901,11 +1084,10 @@ struct SPIRVariable : IVariant
 	// When we read the variable as an expression, just forward
 	// shadowed_id as the expression.
 	bool statically_assigned = false;
-	uint32_t static_expression = 0;
+	ID static_expression = 0;
 
 	// Temporaries which can remain forwarded as long as this variable is not modified.
-	SmallVector<uint32_t> dependees;
-	bool forwardable = true;
+	SmallVector<ID> dependees;
 
 	bool deferred_declaration = false;
 	bool phi_variable = false;
@@ -917,7 +1099,7 @@ struct SPIRVariable : IVariant
 	uint32_t remapped_components = 0;
 
 	// The block which dominates all access to this variable.
-	uint32_t dominator = 0;
+	BlockID dominator = 0;
 	// If true, this variable is a loop variable, when accessing the variable
 	// outside a loop,
 	// we should statically forward it.
@@ -937,7 +1119,8 @@ struct SPIRConstant : IVariant
 		type = TypeConstant
 	};
 
-	union Constant {
+	union Constant
+	{
 		uint32_t u32;
 		int32_t i32;
 		float f32;
@@ -951,15 +1134,12 @@ struct SPIRConstant : IVariant
 	{
 		Constant r[4];
 		// If != 0, this element is a specialization constant, and we should keep track of it as such.
-		uint32_t id[4];
+		ID id[4];
 		uint32_t vecsize = 1;
 
-		// Workaround for MSVC 2013, initializing an array breaks.
 		ConstantVector()
 		{
 			memset(r, 0, sizeof(r));
-			for (unsigned i = 0; i < 4; i++)
-				id[i] = 0;
 		}
 	};
 
@@ -967,15 +1147,8 @@ struct SPIRConstant : IVariant
 	{
 		ConstantVector c[4];
 		// If != 0, this column is a specialization constant, and we should keep track of it as such.
-		uint32_t id[4];
+		ID id[4];
 		uint32_t columns = 1;
-
-		// Workaround for MSVC 2013, initializing an array breaks.
-		ConstantMatrix()
-		{
-			for (unsigned i = 0; i < 4; i++)
-				id[i] = 0;
-		}
 	};
 
 	static inline float f16_to_f32(uint16_t u16_value)
@@ -985,7 +1158,8 @@ struct SPIRConstant : IVariant
 		int e = (u16_value >> 10) & 0x1f;
 		int m = (u16_value >> 0) & 0x3ff;
 
-		union {
+		union
+		{
 			float f32;
 			uint32_t u32;
 		} u;
@@ -1139,16 +1313,18 @@ struct SPIRConstant : IVariant
 
 	SPIRConstant() = default;
 
-	SPIRConstant(uint32_t constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
+	SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
 	    : constant_type(constant_type_)
 	    , specialization(specialized)
 	{
-		subconstants.insert(std::end(subconstants), elements, elements + num_elements);
+		subconstants.reserve(num_elements);
+		for (uint32_t i = 0; i < num_elements; i++)
+			subconstants.push_back(elements[i]);
 		specialization = specialized;
 	}
 
 	// Construct scalar (32-bit).
-	SPIRConstant(uint32_t constant_type_, uint32_t v0, bool specialized)
+	SPIRConstant(TypeID constant_type_, uint32_t v0, bool specialized)
 	    : constant_type(constant_type_)
 	    , specialization(specialized)
 	{
@@ -1158,7 +1334,7 @@ struct SPIRConstant : IVariant
 	}
 
 	// Construct scalar (64-bit).
-	SPIRConstant(uint32_t constant_type_, uint64_t v0, bool specialized)
+	SPIRConstant(TypeID constant_type_, uint64_t v0, bool specialized)
 	    : constant_type(constant_type_)
 	    , specialization(specialized)
 	{
@@ -1168,7 +1344,7 @@ struct SPIRConstant : IVariant
 	}
 
 	// Construct vectors and matrices.
-	SPIRConstant(uint32_t constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements,
+	SPIRConstant(TypeID constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements,
 	             bool specialized)
 	    : constant_type(constant_type_)
 	    , specialization(specialized)
@@ -1200,7 +1376,7 @@ struct SPIRConstant : IVariant
 		}
 	}
 
-	uint32_t constant_type = 0;
+	TypeID constant_type = 0;
 	ConstantMatrix m;
 
 	// If this constant is a specialization constant (i.e. created with OpSpecConstant*).
@@ -1212,7 +1388,7 @@ struct SPIRConstant : IVariant
 	bool is_used_as_lut = false;
 
 	// For composites which are constant arrays, etc.
-	SmallVector<uint32_t> subconstants;
+	SmallVector<ConstantID> subconstants;
 
 	// Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant,
 	// and uses them to initialize the constant. This allows the user
@@ -1240,7 +1416,7 @@ class Variant
 	~Variant()
 	{
 		if (holder)
-			group->pools[type]->free_opaque(holder);
+			group->pools[type]->deallocate_opaque(holder);
 	}
 
 	// Marking custom move constructor as noexcept is important.
@@ -1259,7 +1435,7 @@ class Variant
 		if (this != &other)
 		{
 			if (holder)
-				group->pools[type]->free_opaque(holder);
+				group->pools[type]->deallocate_opaque(holder);
 			holder = other.holder;
 			group = other.group;
 			type = other.type;
@@ -1283,7 +1459,7 @@ class Variant
 		if (this != &other)
 		{
 			if (holder)
-				group->pools[type]->free_opaque(holder);
+				group->pools[type]->deallocate_opaque(holder);
 
 			if (other.holder)
 				holder = other.holder->clone(group->pools[other.type].get());
@@ -1299,13 +1475,13 @@ class Variant
 	void set(IVariant *val, Types new_type)
 	{
 		if (holder)
-			group->pools[type]->free_opaque(holder);
+			group->pools[type]->deallocate_opaque(holder);
 		holder = nullptr;
 
 		if (!allow_type_rewrite && type != TypeNone && type != new_type)
 		{
 			if (val)
-				group->pools[new_type]->free_opaque(val);
+				group->pools[new_type]->deallocate_opaque(val);
 			SPIRV_CROSS_THROW("Overwriting a variant with new type.");
 		}
 
@@ -1347,9 +1523,9 @@ class Variant
 		return type;
 	}
 
-	uint32_t get_id() const
+	ID get_id() const
 	{
-		return holder ? holder->self : 0;
+		return holder ? holder->self : ID(0);
 	}
 
 	bool empty() const
@@ -1360,7 +1536,7 @@ class Variant
 	void reset()
 	{
 		if (holder)
-			group->pools[type]->free_opaque(holder);
+			group->pools[type]->deallocate_opaque(holder);
 		holder = nullptr;
 		type = TypeNone;
 	}
@@ -1398,10 +1574,86 @@ T &variant_set(Variant &var, P &&... args)
 
 struct AccessChainMeta
 {
-	uint32_t storage_packed_type = 0;
+	uint32_t storage_physical_type = 0;
 	bool need_transpose = false;
 	bool storage_is_packed = false;
 	bool storage_is_invariant = false;
+	bool flattened_struct = false;
+	bool relaxed_precision = false;
+};
+
+enum ExtendedDecorations
+{
+	// Marks if a buffer block is re-packed, i.e. member declaration might be subject to PhysicalTypeID remapping and padding.
+	SPIRVCrossDecorationBufferBlockRepacked = 0,
+
+	// A type in a buffer block might be declared with a different physical type than the logical type.
+	// If this is not set, PhysicalTypeID == the SPIR-V type as declared.
+	SPIRVCrossDecorationPhysicalTypeID,
+
+	// Marks if the physical type is to be declared with tight packing rules, i.e. packed_floatN on MSL and friends.
+	// If this is set, PhysicalTypeID might also be set. It can be set to same as logical type if all we're doing
+	// is converting float3 to packed_float3 for example.
+	// If this is marked on a struct, it means the struct itself must use only Packed types for all its members.
+	SPIRVCrossDecorationPhysicalTypePacked,
+
+	// The padding in bytes before declaring this struct member.
+	// If used on a struct type, marks the target size of a struct.
+	SPIRVCrossDecorationPaddingTarget,
+
+	SPIRVCrossDecorationInterfaceMemberIndex,
+	SPIRVCrossDecorationInterfaceOrigID,
+	SPIRVCrossDecorationResourceIndexPrimary,
+	// Used for decorations like resource indices for samplers when part of combined image samplers.
+	// A variable might need to hold two resource indices in this case.
+	SPIRVCrossDecorationResourceIndexSecondary,
+	// Used for resource indices for multiplanar images when part of combined image samplers.
+	SPIRVCrossDecorationResourceIndexTertiary,
+	SPIRVCrossDecorationResourceIndexQuaternary,
+
+	// Marks a buffer block for using explicit offsets (GLSL/HLSL).
+	SPIRVCrossDecorationExplicitOffset,
+
+	// Apply to a variable in the Input storage class; marks it as holding the base group passed to vkCmdDispatchBase(),
+	// or the base vertex and instance indices passed to vkCmdDrawIndexed().
+	// In MSL, this is used to adjust the WorkgroupId and GlobalInvocationId variables in compute shaders,
+	// and to hold the BaseVertex and BaseInstance variables in vertex shaders.
+	SPIRVCrossDecorationBuiltInDispatchBase,
+
+	// Apply to a variable that is a function parameter; marks it as being a "dynamic"
+	// combined image-sampler. In MSL, this is used when a function parameter might hold
+	// either a regular combined image-sampler or one that has an attached sampler
+	// Y'CbCr conversion.
+	SPIRVCrossDecorationDynamicImageSampler,
+
+	// Apply to a variable in the Input storage class; marks it as holding the size of the stage
+	// input grid.
+	// In MSL, this is used to hold the vertex and instance counts in a tessellation pipeline
+	// vertex shader.
+	SPIRVCrossDecorationBuiltInStageInputSize,
+
+	// Apply to any access chain of a tessellation I/O variable; stores the type of the sub-object
+	// that was chained to, as recorded in the input variable itself. This is used in case the pointer
+	// is itself used as the base of an access chain, to calculate the original type of the sub-object
+	// chained to, in case a swizzle needs to be applied. This should not happen normally with valid
+	// SPIR-V, but the MSL backend can change the type of input variables, necessitating the
+	// addition of swizzles to keep the generated code compiling.
+	SPIRVCrossDecorationTessIOOriginalInputTypeID,
+
+	// Apply to any access chain of an interface variable used with pull-model interpolation, where the variable is a
+	// vector but the resulting pointer is a scalar; stores the component index that is to be accessed by the chain.
+	// This is used when emitting calls to interpolation functions on the chain in MSL: in this case, the component
+	// must be applied to the result, since pull-model interpolants in MSL cannot be swizzled directly, but the
+	// results of interpolation can.
+	SPIRVCrossDecorationInterpolantComponentExpr,
+
+	// Apply to any struct type that is used in the Workgroup storage class.
+	// This causes matrices in MSL prior to Metal 3.0 to be emitted using a special
+	// class that is convertible to the standard matrix type, to work around the
+	// lack of constructors in the 'threadgroup' address space.
+	SPIRVCrossDecorationWorkgroupStruct,
+
+	SPIRVCrossDecorationCount
 };
 
 struct Meta
@@ -1418,6 +1670,9 @@ struct Meta
 		uint32_t set = 0;
 		uint32_t binding = 0;
 		uint32_t offset = 0;
+		uint32_t xfb_buffer = 0;
+		uint32_t xfb_stride = 0;
+		uint32_t stream = 0;
 		uint32_t array_stride = 0;
 		uint32_t matrix_stride = 0;
 		uint32_t input_attachment = 0;
@@ -1426,14 +1681,17 @@ struct Meta
 		spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax;
 		bool builtin = false;
 
-		struct
+		struct Extended
 		{
-			uint32_t packed_type = 0;
-			bool packed = false;
-			uint32_t ib_member_index = ~(0u);
-			uint32_t ib_orig_id = 0;
-			uint32_t resource_index_primary = ~(0u);
-			uint32_t resource_index_secondary = ~(0u);
+			Extended()
+			{
+				// MSVC 2013 workaround to init like this.
+				for (auto &v : values)
+					v = 0;
+			}
+
+			Bitset flags;
+			uint32_t values[SPIRVCrossDecorationCount];
 		} extended;
 	};
 
@@ -1539,6 +1797,125 @@ static inline bool opcode_is_sign_invariant(spv::Op opcode)
 		return false;
 	}
 }
+
+static inline bool opcode_can_promote_integer_implicitly(spv::Op opcode)
+{
+	switch (opcode)
+	{
+	case spv::OpSNegate:
+	case spv::OpNot:
+	case spv::OpBitwiseAnd:
+	case spv::OpBitwiseOr:
+	case spv::OpBitwiseXor:
+	case spv::OpShiftLeftLogical:
+	case spv::OpShiftRightLogical:
+	case spv::OpShiftRightArithmetic:
+	case spv::OpIAdd:
+	case spv::OpISub:
+	case spv::OpIMul:
+	case spv::OpSDiv:
+	case spv::OpUDiv:
+	case spv::OpSRem:
+	case spv::OpUMod:
+	case spv::OpSMod:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+struct SetBindingPair
+{
+	uint32_t desc_set;
+	uint32_t binding;
+
+	inline bool operator==(const SetBindingPair &other) const
+	{
+		return desc_set == other.desc_set && binding == other.binding;
+	}
+
+	inline bool operator<(const SetBindingPair &other) const
+	{
+		return desc_set < other.desc_set || (desc_set == other.desc_set && binding < other.binding);
+	}
+};
+
+struct LocationComponentPair
+{
+	uint32_t location;
+	uint32_t component;
+
+	inline bool operator==(const LocationComponentPair &other) const
+	{
+		return location == other.location && component == other.component;
+	}
+
+	inline bool operator<(const LocationComponentPair &other) const
+	{
+		return location < other.location || (location == other.location && component < other.component);
+	}
+};
+
+struct StageSetBinding
+{
+	spv::ExecutionModel model;
+	uint32_t desc_set;
+	uint32_t binding;
+
+	inline bool operator==(const StageSetBinding &other) const
+	{
+		return model == other.model && desc_set == other.desc_set && binding == other.binding;
+	}
+};
+
+struct InternalHasher
+{
+	inline size_t operator()(const SetBindingPair &value) const
+	{
+		// Quality of hash doesn't really matter here.
+		auto hash_set = std::hash<uint32_t>()(value.desc_set);
+		auto hash_binding = std::hash<uint32_t>()(value.binding);
+		return (hash_set * 0x10001b31) ^ hash_binding;
+	}
+
+	inline size_t operator()(const LocationComponentPair &value) const
+	{
+		// Quality of hash doesn't really matter here.
+		auto hash_set = std::hash<uint32_t>()(value.location);
+		auto hash_binding = std::hash<uint32_t>()(value.component);
+		return (hash_set * 0x10001b31) ^ hash_binding;
+	}
+
+	inline size_t operator()(const StageSetBinding &value) const
+	{
+		// Quality of hash doesn't really matter here.
+		auto hash_model = std::hash<uint32_t>()(value.model);
+		auto hash_set = std::hash<uint32_t>()(value.desc_set);
+		auto tmp_hash = (hash_model * 0x10001b31) ^ hash_set;
+		return (tmp_hash * 0x10001b31) ^ value.binding;
+	}
+};
+
+// Special constant used in a {MSL,HLSL}ResourceBinding desc_set
+// element to indicate the bindings for the push constants.
+static const uint32_t ResourceBindingPushConstantDescriptorSet = ~(0u);
+
+// Special constant used in a {MSL,HLSL}ResourceBinding binding
+// element to indicate the bindings for the push constants.
+static const uint32_t ResourceBindingPushConstantBinding = 0;
 } // namespace SPIRV_CROSS_NAMESPACE
 
+namespace std
+{
+template <SPIRV_CROSS_NAMESPACE::Types type>
+struct hash<SPIRV_CROSS_NAMESPACE::TypedID<type>>
+{
+	size_t operator()(const SPIRV_CROSS_NAMESPACE::TypedID<type> &value) const
+	{
+		return std::hash<uint32_t>()(value);
+	}
+};
+} // namespace std
+
 #endif
diff --git a/spirv_cpp.cpp b/spirv_cpp.cpp
index 25966b32ad7..dd0a84c8312 100644
--- a/spirv_cpp.cpp
+++ b/spirv_cpp.cpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2015-2019 Arm Limited
+ * Copyright 2015-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #include "spirv_cpp.hpp"
 
 using namespace spv;
@@ -267,8 +274,6 @@ void CompilerCPP::emit_resources()
 	if (emitted)
 		statement("");
 
-	declare_undefined_values();
-
 	statement("inline void init(spirv_cross_shader& s)");
 	begin_scope();
 	statement(resource_type, "::init(s);");
@@ -306,6 +311,8 @@ void CompilerCPP::emit_resources()
 
 string CompilerCPP::compile()
 {
+	ir.fixup_reserved_names();
+
 	// Do not deal with ES-isms like precision, older extensions and such.
 	options.es = false;
 	options.version = 450;
@@ -329,11 +336,8 @@ string CompilerCPP::compile()
 	uint32_t pass_count = 0;
 	do
 	{
-		if (pass_count >= 3)
-			SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
-
 		resource_registrations.clear();
-		reset();
+		reset(pass_count);
 
 		// Move constructor for this type is broken on GCC 4.9 ...
 		buffer.reset();
diff --git a/spirv_cpp.hpp b/spirv_cpp.hpp
index 4c20aa37b8d..c76629cdcbe 100644
--- a/spirv_cpp.hpp
+++ b/spirv_cpp.hpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2015-2019 Arm Limited
+ * Copyright 2015-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_CROSS_CPP_HPP
 #define SPIRV_CROSS_CPP_HPP
 
diff --git a/spirv_cross.cpp b/spirv_cross.cpp
index 9fdfd1f00c0..edc98f81e1d 100644
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2015-2019 Arm Limited
+ * Copyright 2015-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #include "spirv_cross.hpp"
 #include "GLSL.std.450.h"
 #include "spirv_cfg.hpp"
@@ -29,16 +36,16 @@ using namespace SPIRV_CROSS_NAMESPACE;
 
 Compiler::Compiler(vector<uint32_t> ir_)
 {
-	Parser parser(move(ir_));
+	Parser parser(std::move(ir_));
 	parser.parse();
-	set_ir(move(parser.get_parsed_ir()));
+	set_ir(std::move(parser.get_parsed_ir()));
 }
 
 Compiler::Compiler(const uint32_t *ir_, size_t word_count)
 {
 	Parser parser(ir_, word_count);
 	parser.parse();
-	set_ir(move(parser.get_parsed_ir()));
+	set_ir(std::move(parser.get_parsed_ir()));
 }
 
 Compiler::Compiler(const ParsedIR &ir_)
@@ -48,12 +55,12 @@ Compiler::Compiler(const ParsedIR &ir_)
 
 Compiler::Compiler(ParsedIR &&ir_)
 {
-	set_ir(move(ir_));
+	set_ir(std::move(ir_));
 }
 
 void Compiler::set_ir(ParsedIR &&ir_)
 {
-	ir = move(ir_);
+	ir = std::move(ir_);
 	parse_fixup();
 }
 
@@ -88,6 +95,13 @@ bool Compiler::variable_storage_is_aliased(const SPIRVariable &v)
 
 bool Compiler::block_is_pure(const SPIRBlock &block)
 {
+	// This is a global side effect of the function.
+	if (block.terminator == SPIRBlock::Kill ||
+	    block.terminator == SPIRBlock::TerminateRay ||
+	    block.terminator == SPIRBlock::IgnoreIntersection ||
+	    block.terminator == SPIRBlock::EmitMeshTasks)
+		return false;
+
 	for (auto &i : block.ops)
 	{
 		auto ops = stream(i);
@@ -141,21 +155,62 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
 		case OpEmitVertex:
 			return false;
 
+		// Mesh shader functions modify global state.
+		// (EmitMeshTasks is a terminator).
+		case OpSetMeshOutputsEXT:
+			return false;
+
 		// Barriers disallow any reordering, so we should treat blocks with barrier as writing.
 		case OpControlBarrier:
 		case OpMemoryBarrier:
 			return false;
 
 		// Ray tracing builtins are impure.
-		case OpReportIntersectionNV:
+		case OpReportIntersectionKHR:
 		case OpIgnoreIntersectionNV:
 		case OpTerminateRayNV:
 		case OpTraceNV:
+		case OpTraceRayKHR:
 		case OpExecuteCallableNV:
+		case OpExecuteCallableKHR:
+		case OpRayQueryInitializeKHR:
+		case OpRayQueryTerminateKHR:
+		case OpRayQueryGenerateIntersectionKHR:
+		case OpRayQueryConfirmIntersectionKHR:
+		case OpRayQueryProceedKHR:
+			// There are various getters in ray query, but they are considered pure.
 			return false;
 
 			// OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
 
+		case OpDemoteToHelperInvocationEXT:
+			// This is a global side effect of the function.
+			return false;
+
+		case OpExtInst:
+		{
+			uint32_t extension_set = ops[2];
+			if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
+			{
+				auto op_450 = static_cast<GLSLstd450>(ops[3]);
+				switch (op_450)
+				{
+				case GLSLstd450Modf:
+				case GLSLstd450Frexp:
+				{
+					auto &type = expression_type(ops[5]);
+					if (type.storage != StorageClassFunction)
+						return false;
+					break;
+				}
+
+				default:
+					break;
+				}
+			}
+			break;
+		}
+
 		default:
 			break;
 		}
@@ -177,7 +232,7 @@ string Compiler::to_name(uint32_t id, bool allow_alias) const
 		{
 			// If the alias master has been specially packed, we will have emitted a clean variant as well,
 			// so skip the name aliasing here.
-			if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked))
+			if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
 				return to_name(type.type_alias);
 		}
 	}
@@ -300,6 +355,8 @@ void Compiler::register_write(uint32_t chain)
 			var = maybe_get<SPIRVariable>(access_chain->loaded_from);
 	}
 
+	auto &chain_type = expression_type(chain);
+
 	if (var)
 	{
 		bool check_argument_storage_qualifier = true;
@@ -342,7 +399,7 @@ void Compiler::register_write(uint32_t chain)
 			force_recompile();
 		}
 	}
-	else
+	else if (chain_type.pointer)
 	{
 		// If we stored through a variable pointer, then we don't know which
 		// variable we stored to. So *all* expressions after this point need to
@@ -351,6 +408,9 @@ void Compiler::register_write(uint32_t chain)
 		// only certain variables, we can invalidate only those.
 		flush_all_active_variables();
 	}
+
+	// If chain_type.pointer is false, we're not writing to memory backed variables, but temporaries instead.
+	// This can happen in copy_logical_type where we unroll complex reads and writes to temporaries.
 }
 
 void Compiler::flush_dependees(SPIRVariable &var)
@@ -497,10 +557,16 @@ bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins
 		return false;
 	}
 
-	bool hidden = false;
-	if (check_active_interface_variables && storage_class_is_interface(var.storage))
-		hidden = active_interface_variables.find(var.self) == end(active_interface_variables);
-	return hidden;
+	// In SPIR-V 1.4 and up we must also use the active variable interface to disable global variables
+	// which are not part of the entry point.
+	if (ir.get_spirv_version() >= 0x10400 && var.storage != spv::StorageClassGeneric &&
+	    var.storage != spv::StorageClassFunction && !interface_variable_exists_in_entry_point(var.self))
+	{
+		return true;
+	}
+
+	return check_active_interface_variables && storage_class_is_interface(var.storage) &&
+	       active_interface_variables.find(var.self) == end(active_interface_variables);
 }
 
 bool Compiler::is_builtin_type(const SPIRType &type) const
@@ -569,7 +635,7 @@ ShaderResources Compiler::get_shader_resources() const
 	return get_shader_resources(nullptr);
 }
 
-ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> &active_variables) const
+ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> &active_variables) const
 {
 	return get_shader_resources(&active_variables);
 }
@@ -659,10 +725,42 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
 
 	case OpExtInst:
 	{
-		if (length < 5)
+		if (length < 3)
 			return false;
-		uint32_t extension_set = args[2];
-		if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
+		auto &extension_set = compiler.get<SPIRExtension>(args[2]);
+		switch (extension_set.ext)
+		{
+		case SPIRExtension::GLSL:
+		{
+			auto op = static_cast<GLSLstd450>(args[3]);
+
+			switch (op)
+			{
+			case GLSLstd450InterpolateAtCentroid:
+			case GLSLstd450InterpolateAtSample:
+			case GLSLstd450InterpolateAtOffset:
+			{
+				auto *var = compiler.maybe_get<SPIRVariable>(args[4]);
+				if (var && storage_class_is_interface(var->storage))
+					variables.insert(args[4]);
+				break;
+			}
+
+			case GLSLstd450Modf:
+			case GLSLstd450Fract:
+			{
+				auto *var = compiler.maybe_get<SPIRVariable>(args[5]);
+				if (var && storage_class_is_interface(var->storage))
+					variables.insert(args[5]);
+				break;
+			}
+
+			default:
+				break;
+			}
+			break;
+		}
+		case SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter:
 		{
 			enum AMDShaderExplicitVertexParameter
 			{
@@ -684,6 +782,10 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
 			default:
 				break;
 			}
+			break;
+		}
+		default:
+			break;
 		}
 		break;
 	}
@@ -726,16 +828,24 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
 	return true;
 }
 
-unordered_set<uint32_t> Compiler::get_active_interface_variables() const
+unordered_set<VariableID> Compiler::get_active_interface_variables() const
 {
 	// Traverse the call graph and find all interface variables which are in use.
-	unordered_set<uint32_t> variables;
+	unordered_set<VariableID> variables;
 	InterfaceVariableAccessHandler handler(*this, variables);
 	traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
 
-	// Make sure we preserve output variables which are only initialized, but never accessed by any code.
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
-		if (var.storage == StorageClassOutput && var.initializer != 0)
+		if (var.storage != StorageClassOutput)
+			return;
+		if (!interface_variable_exists_in_entry_point(var.self))
+			return;
+
+		// An output variable which is just declared (but uninitialized) might be read by subsequent stages
+		// so we should force-enable these outputs,
+		// since compilation will fail if a subsequent stage attempts to read from the variable in question.
+		// Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
+		if (var.initializer != ID(0) || get_execution_model() != ExecutionModelFragment)
 			variables.insert(var.self);
 	});
 
@@ -746,13 +856,13 @@ unordered_set<uint32_t> Compiler::get_active_interface_variables() const
 	return variables;
 }
 
-void Compiler::set_enabled_interface_variables(std::unordered_set<uint32_t> active_variables)
+void Compiler::set_enabled_interface_variables(std::unordered_set<VariableID> active_variables)
 {
-	active_interface_variables = move(active_variables);
+	active_interface_variables = std::move(active_variables);
 	check_active_interface_variables = true;
 }
 
-ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *active_variables) const
+ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *active_variables) const
 {
 	ShaderResources res;
 
@@ -763,19 +873,79 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac
 
 		// It is possible for uniform storage classes to be passed as function parameters, so detect
 		// that. To detect function parameters, check of StorageClass of variable is function scope.
-		if (var.storage == StorageClassFunction || !type.pointer || is_builtin_variable(var))
+		if (var.storage == StorageClassFunction || !type.pointer)
 			return;
 
 		if (active_variables && active_variables->find(var.self) == end(*active_variables))
 			return;
 
+		// In SPIR-V 1.4 and up, every global must be present in the entry point interface list,
+		// not just IO variables.
+		bool active_in_entry_point = true;
+		if (ir.get_spirv_version() < 0x10400)
+		{
+			if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
+				active_in_entry_point = interface_variable_exists_in_entry_point(var.self);
+		}
+		else
+			active_in_entry_point = interface_variable_exists_in_entry_point(var.self);
+
+		if (!active_in_entry_point)
+			return;
+
+		bool is_builtin = is_builtin_variable(var);
+
+		if (is_builtin)
+		{
+			if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
+				return;
+
+			auto &list = var.storage == StorageClassInput ? res.builtin_inputs : res.builtin_outputs;
+			BuiltInResource resource;
+
+			if (has_decoration(type.self, DecorationBlock))
+			{
+				resource.resource = { var.self, var.basetype, type.self,
+				                      get_remapped_declared_block_name(var.self, false) };
+
+				for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
+				{
+					resource.value_type_id = type.member_types[i];
+					resource.builtin = BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn));
+					list.push_back(resource);
+				}
+			}
+			else
+			{
+				bool strip_array =
+						!has_decoration(var.self, DecorationPatch) && (
+								get_execution_model() == ExecutionModelTessellationControl ||
+								(get_execution_model() == ExecutionModelTessellationEvaluation &&
+								 var.storage == StorageClassInput));
+
+				resource.resource = { var.self, var.basetype, type.self, get_name(var.self) };
+
+				if (strip_array && !type.array.empty())
+					resource.value_type_id = get_variable_data_type(var).parent_type;
+				else
+					resource.value_type_id = get_variable_data_type_id(var);
+
+				assert(resource.value_type_id);
+
+				resource.builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
+				list.push_back(std::move(resource));
+			}
+			return;
+		}
+
 		// Input
-		if (var.storage == StorageClassInput && interface_variable_exists_in_entry_point(var.self))
+		if (var.storage == StorageClassInput)
 		{
 			if (has_decoration(type.self, DecorationBlock))
 			{
 				res.stage_inputs.push_back(
-				    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
+						{ var.self, var.basetype, type.self,
+						  get_remapped_declared_block_name(var.self, false) });
 			}
 			else
 				res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
@@ -786,12 +956,12 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac
 			res.subpass_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
 		}
 		// Outputs
-		else if (var.storage == StorageClassOutput && interface_variable_exists_in_entry_point(var.self))
+		else if (var.storage == StorageClassOutput)
 		{
 			if (has_decoration(type.self, DecorationBlock))
 			{
 				res.stage_outputs.push_back(
-				    { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
+						{ var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) });
 			}
 			else
 				res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
@@ -821,6 +991,10 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac
 			// in the future.
 			res.push_constant_buffers.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
 		}
+		else if (type.storage == StorageClassShaderRecordBufferKHR)
+		{
+			res.shader_record_buffers.push_back({ var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) });
+		}
 		// Images
 		else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image &&
 		         type.image.sampled == 2)
@@ -849,7 +1023,7 @@ ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *ac
 			res.atomic_counters.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
 		}
 		// Acceleration structures
-		else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::AccelerationStructureNV)
+		else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::AccelerationStructure)
 		{
 			res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) });
 		}
@@ -886,10 +1060,12 @@ void Compiler::parse_fixup()
 		if (id.get_type() == TypeConstant)
 		{
 			auto &c = id.get<SPIRConstant>();
-			if (ir.meta[c.self].decoration.builtin && ir.meta[c.self].decoration.builtin_type == BuiltInWorkgroupSize)
+			if (has_decoration(c.self, DecorationBuiltIn) &&
+			    BuiltIn(get_decoration(c.self, DecorationBuiltIn)) == BuiltInWorkgroupSize)
 			{
 				// In current SPIR-V, there can be just one constant like this.
 				// All entry points will receive the constant value.
+				// WorkgroupSize take precedence over LocalSizeId.
 				for (auto &entry : ir.entry_points)
 				{
 					entry.second.workgroup_size.constant = c.self;
@@ -903,8 +1079,11 @@ void Compiler::parse_fixup()
 		{
 			auto &var = id.get<SPIRVariable>();
 			if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup ||
+			    var.storage == StorageClassTaskPayloadWorkgroupEXT ||
 			    var.storage == StorageClassOutput)
+			{
 				global_variables.push_back(var.self);
+			}
 			if (variable_storage_is_aliased(var))
 				aliased_variables.push_back(var.self);
 		}
@@ -969,17 +1148,17 @@ void Compiler::update_name_cache(unordered_set<string> &cache, string &name)
 	update_name_cache(cache, cache, name);
 }
 
-void Compiler::set_name(uint32_t id, const std::string &name)
+void Compiler::set_name(ID id, const std::string &name)
 {
 	ir.set_name(id, name);
 }
 
-const SPIRType &Compiler::get_type(uint32_t id) const
+const SPIRType &Compiler::get_type(TypeID id) const
 {
 	return get<SPIRType>(id);
 }
 
-const SPIRType &Compiler::get_type_from_variable(uint32_t id) const
+const SPIRType &Compiler::get_type_from_variable(VariableID id) const
 {
 	return get<SPIRType>(get<SPIRVariable>(id).basetype);
 }
@@ -1050,23 +1229,23 @@ bool Compiler::is_sampled_image_type(const SPIRType &type)
 	       type.image.dim != DimBuffer;
 }
 
-void Compiler::set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration,
+void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
                                             const std::string &argument)
 {
 	ir.set_member_decoration_string(id, index, decoration, argument);
 }
 
-void Compiler::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument)
+void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument)
 {
 	ir.set_member_decoration(id, index, decoration, argument);
 }
 
-void Compiler::set_member_name(uint32_t id, uint32_t index, const std::string &name)
+void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name)
 {
 	ir.set_member_name(id, index, name);
 }
 
-const std::string &Compiler::get_member_name(uint32_t id, uint32_t index) const
+const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const
 {
 	return ir.get_member_name(id, index);
 }
@@ -1082,7 +1261,7 @@ void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const
 	ir.meta[type_id].members[index].qualified_alias = name;
 }
 
-const string &Compiler::get_member_qualified_name(uint32_t type_id, uint32_t index) const
+const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const
 {
 	auto *m = ir.find_meta(type_id);
 	if (m && index < m->members.size())
@@ -1091,32 +1270,32 @@ const string &Compiler::get_member_qualified_name(uint32_t type_id, uint32_t ind
 		return ir.get_empty_string();
 }
 
-uint32_t Compiler::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const
+uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
 {
 	return ir.get_member_decoration(id, index, decoration);
 }
 
-const Bitset &Compiler::get_member_decoration_bitset(uint32_t id, uint32_t index) const
+const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const
 {
 	return ir.get_member_decoration_bitset(id, index);
 }
 
-bool Compiler::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const
+bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
 {
 	return ir.has_member_decoration(id, index, decoration);
 }
 
-void Compiler::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration)
+void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration)
 {
 	ir.unset_member_decoration(id, index, decoration);
 }
 
-void Compiler::set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument)
+void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument)
 {
 	ir.set_decoration_string(id, decoration, argument);
 }
 
-void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argument)
+void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument)
 {
 	ir.set_decoration(id, decoration, argument);
 }
@@ -1124,32 +1303,8 @@ void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argum
 void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value)
 {
 	auto &dec = ir.meta[id].decoration;
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		dec.extended.packed = true;
-		break;
-
-	case SPIRVCrossDecorationPackedType:
-		dec.extended.packed_type = value;
-		break;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		dec.extended.ib_member_index = value;
-		break;
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		dec.extended.ib_orig_id = value;
-		break;
-
-	case SPIRVCrossDecorationResourceIndexPrimary:
-		dec.extended.resource_index_primary = value;
-		break;
-
-	case SPIRVCrossDecorationResourceIndexSecondary:
-		dec.extended.resource_index_secondary = value;
-		break;
-	}
+	dec.extended.flags.set(decoration);
+	dec.extended.values[decoration] = value;
 }
 
 void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration,
@@ -1157,32 +1312,23 @@ void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, Ext
 {
 	ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
 	auto &dec = ir.meta[type].members[index];
+	dec.extended.flags.set(decoration);
+	dec.extended.values[decoration] = value;
+}
 
+static uint32_t get_default_extended_decoration(ExtendedDecorations decoration)
+{
 	switch (decoration)
 	{
-	case SPIRVCrossDecorationPacked:
-		dec.extended.packed = true;
-		break;
-
-	case SPIRVCrossDecorationPackedType:
-		dec.extended.packed_type = value;
-		break;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		dec.extended.ib_member_index = value;
-		break;
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		dec.extended.ib_orig_id = value;
-		break;
-
 	case SPIRVCrossDecorationResourceIndexPrimary:
-		dec.extended.resource_index_primary = value;
-		break;
-
 	case SPIRVCrossDecorationResourceIndexSecondary:
-		dec.extended.resource_index_secondary = value;
-		break;
+	case SPIRVCrossDecorationResourceIndexTertiary:
+	case SPIRVCrossDecorationResourceIndexQuaternary:
+	case SPIRVCrossDecorationInterfaceMemberIndex:
+		return ~(0u);
+
+	default:
+		return 0;
 	}
 }
 
@@ -1193,28 +1339,11 @@ uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations deco
 		return 0;
 
 	auto &dec = m->decoration;
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		return uint32_t(dec.extended.packed);
-
-	case SPIRVCrossDecorationPackedType:
-		return dec.extended.packed_type;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		return dec.extended.ib_member_index;
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		return dec.extended.ib_orig_id;
-
-	case SPIRVCrossDecorationResourceIndexPrimary:
-		return dec.extended.resource_index_primary;
 
-	case SPIRVCrossDecorationResourceIndexSecondary:
-		return dec.extended.resource_index_secondary;
-	}
+	if (!dec.extended.flags.get(decoration))
+		return get_default_extended_decoration(decoration);
 
-	return 0;
+	return dec.extended.values[decoration];
 }
 
 uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
@@ -1227,28 +1356,9 @@ uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index,
 		return 0;
 
 	auto &dec = m->members[index];
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		return uint32_t(dec.extended.packed);
-
-	case SPIRVCrossDecorationPackedType:
-		return dec.extended.packed_type;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		return dec.extended.ib_member_index;
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		return dec.extended.ib_orig_id;
-
-	case SPIRVCrossDecorationResourceIndexPrimary:
-		return dec.extended.resource_index_primary;
-
-	case SPIRVCrossDecorationResourceIndexSecondary:
-		return dec.extended.resource_index_secondary;
-	}
-
-	return 0;
+	if (!dec.extended.flags.get(decoration))
+		return get_default_extended_decoration(decoration);
+	return dec.extended.values[decoration];
 }
 
 bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const
@@ -1258,28 +1368,7 @@ bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decorati
 		return false;
 
 	auto &dec = m->decoration;
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		return dec.extended.packed;
-
-	case SPIRVCrossDecorationPackedType:
-		return dec.extended.packed_type != 0;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		return dec.extended.ib_member_index != uint32_t(-1);
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		return dec.extended.ib_orig_id != 0;
-
-	case SPIRVCrossDecorationResourceIndexPrimary:
-		return dec.extended.resource_index_primary != uint32_t(-1);
-
-	case SPIRVCrossDecorationResourceIndexSecondary:
-		return dec.extended.resource_index_secondary != uint32_t(-1);
-	}
-
-	return false;
+	return dec.extended.flags.get(decoration);
 }
 
 bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const
@@ -1292,110 +1381,40 @@ bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, Ext
 		return false;
 
 	auto &dec = m->members[index];
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		return dec.extended.packed;
-
-	case SPIRVCrossDecorationPackedType:
-		return dec.extended.packed_type != 0;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		return dec.extended.ib_member_index != uint32_t(-1);
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		return dec.extended.ib_orig_id != 0;
-
-	case SPIRVCrossDecorationResourceIndexPrimary:
-		return dec.extended.resource_index_primary != uint32_t(-1);
-
-	case SPIRVCrossDecorationResourceIndexSecondary:
-		return dec.extended.resource_index_secondary != uint32_t(-1);
-	}
-
-	return false;
+	return dec.extended.flags.get(decoration);
 }
 
 void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration)
 {
 	auto &dec = ir.meta[id].decoration;
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		dec.extended.packed = false;
-		break;
-
-	case SPIRVCrossDecorationPackedType:
-		dec.extended.packed_type = 0;
-		break;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		dec.extended.ib_member_index = uint32_t(-1);
-		break;
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		dec.extended.ib_orig_id = 0;
-		break;
-
-	case SPIRVCrossDecorationResourceIndexPrimary:
-		dec.extended.resource_index_primary = uint32_t(-1);
-		break;
-
-	case SPIRVCrossDecorationResourceIndexSecondary:
-		dec.extended.resource_index_secondary = uint32_t(-1);
-		break;
-	}
+	dec.extended.flags.clear(decoration);
+	dec.extended.values[decoration] = 0;
 }
 
 void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration)
 {
 	ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1));
 	auto &dec = ir.meta[type].members[index];
-
-	switch (decoration)
-	{
-	case SPIRVCrossDecorationPacked:
-		dec.extended.packed = false;
-		break;
-
-	case SPIRVCrossDecorationPackedType:
-		dec.extended.packed_type = 0;
-		break;
-
-	case SPIRVCrossDecorationInterfaceMemberIndex:
-		dec.extended.ib_member_index = uint32_t(-1);
-		break;
-
-	case SPIRVCrossDecorationInterfaceOrigID:
-		dec.extended.ib_orig_id = 0;
-		break;
-
-	case SPIRVCrossDecorationResourceIndexPrimary:
-		dec.extended.resource_index_primary = uint32_t(-1);
-		break;
-
-	case SPIRVCrossDecorationResourceIndexSecondary:
-		dec.extended.resource_index_secondary = uint32_t(-1);
-		break;
-	}
+	dec.extended.flags.clear(decoration);
+	dec.extended.values[decoration] = 0;
 }
 
-StorageClass Compiler::get_storage_class(uint32_t id) const
+StorageClass Compiler::get_storage_class(VariableID id) const
 {
 	return get<SPIRVariable>(id).storage;
 }
 
-const std::string &Compiler::get_name(uint32_t id) const
+const std::string &Compiler::get_name(ID id) const
 {
 	return ir.get_name(id);
 }
 
-const std::string Compiler::get_fallback_name(uint32_t id) const
+const std::string Compiler::get_fallback_name(ID id) const
 {
 	return join("_", id);
 }
 
-const std::string Compiler::get_block_fallback_name(uint32_t id) const
+const std::string Compiler::get_block_fallback_name(VariableID id) const
 {
 	auto &var = get<SPIRVariable>(id);
 	if (get_name(id).empty())
@@ -1404,37 +1423,37 @@ const std::string Compiler::get_block_fallback_name(uint32_t id) const
 		return get_name(id);
 }
 
-const Bitset &Compiler::get_decoration_bitset(uint32_t id) const
+const Bitset &Compiler::get_decoration_bitset(ID id) const
 {
 	return ir.get_decoration_bitset(id);
 }
 
-bool Compiler::has_decoration(uint32_t id, Decoration decoration) const
+bool Compiler::has_decoration(ID id, Decoration decoration) const
 {
 	return ir.has_decoration(id, decoration);
 }
 
-const string &Compiler::get_decoration_string(uint32_t id, Decoration decoration) const
+const string &Compiler::get_decoration_string(ID id, Decoration decoration) const
 {
 	return ir.get_decoration_string(id, decoration);
 }
 
-const string &Compiler::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const
+const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const
 {
 	return ir.get_member_decoration_string(id, index, decoration);
 }
 
-uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const
+uint32_t Compiler::get_decoration(ID id, Decoration decoration) const
 {
 	return ir.get_decoration(id, decoration);
 }
 
-void Compiler::unset_decoration(uint32_t id, Decoration decoration)
+void Compiler::unset_decoration(ID id, Decoration decoration)
 {
 	ir.unset_decoration(id, decoration);
 }
 
-bool Compiler::get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const
+bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const
 {
 	auto *m = ir.find_meta(id);
 	if (!m)
@@ -1449,6 +1468,58 @@ bool Compiler::get_binary_offset_for_decoration(uint32_t id, spv::Decoration dec
 	return true;
 }
 
+bool Compiler::block_is_noop(const SPIRBlock &block) const
+{
+	if (block.terminator != SPIRBlock::Direct)
+		return false;
+
+	auto &child = get<SPIRBlock>(block.next_block);
+
+	// If this block participates in PHI, the block isn't really noop.
+	for (auto &phi : block.phi_variables)
+		if (phi.parent == block.self || phi.parent == child.self)
+			return false;
+
+	for (auto &phi : child.phi_variables)
+		if (phi.parent == block.self)
+			return false;
+
+	// Verify all instructions have no semantic impact.
+	for (auto &i : block.ops)
+	{
+		auto op = static_cast<Op>(i.op);
+
+		switch (op)
+		{
+		// Non-Semantic instructions.
+		case OpLine:
+		case OpNoLine:
+			break;
+
+		case OpExtInst:
+		{
+			auto *ops = stream(i);
+			auto ext = get<SPIRExtension>(ops[2]).ext;
+
+			bool ext_is_nonsemantic_only =
+				ext == SPIRExtension::NonSemanticShaderDebugInfo ||
+				ext == SPIRExtension::SPV_debug_info ||
+				ext == SPIRExtension::NonSemanticGeneric;
+
+			if (!ext_is_nonsemantic_only)
+				return false;
+
+			break;
+		}
+
+		default:
+			return false;
+		}
+	}
+
+	return true;
+}
+
 bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const
 {
 	// Tried and failed.
@@ -1506,7 +1577,7 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method
 	{
 		// Empty loop header that just sets up merge target
 		// and branches to loop body.
-		bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block.ops.empty();
+		bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block_is_noop(block);
 
 		if (!ret)
 			return false;
@@ -1532,19 +1603,8 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method
 		ret = child.terminator == SPIRBlock::Select && child.merge == SPIRBlock::MergeNone &&
 		      (positive_candidate || negative_candidate);
 
-		// If we have OpPhi which depends on branches which came from our own block,
-		// we need to flush phi variables in else block instead of a trivial break,
-		// so we cannot assume this is a for loop candidate.
 		if (ret)
 		{
-			for (auto &phi : block.phi_variables)
-				if (phi.parent == block.self || phi.parent == child.self)
-					return false;
-
-			for (auto &phi : child.phi_variables)
-				if (phi.parent == block.self)
-					return false;
-
 			auto *merge = maybe_get<SPIRBlock>(block.merge_block);
 			if (merge)
 				for (auto &phi : merge->phi_variables)
@@ -1558,42 +1618,10 @@ bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method
 		return false;
 }
 
-bool Compiler::block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to)
+bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const
 {
-	auto *start = &from;
-
-	if (start->self == to.self)
-		return true;
-
-	// Break cycles.
-	if (is_continue(start->self))
-		return false;
-
-	// If our select block doesn't merge, we must break or continue in these blocks,
-	// so if continues occur branchless within these blocks, consider them branchless as well.
-	// This is typically used for loop control.
-	if (start->terminator == SPIRBlock::Select && start->merge == SPIRBlock::MergeNone &&
-	    (block_is_outside_flow_control_from_block(get<SPIRBlock>(start->true_block), to) ||
-	     block_is_outside_flow_control_from_block(get<SPIRBlock>(start->false_block), to)))
-	{
-		return true;
-	}
-	else if (start->merge_block && block_is_outside_flow_control_from_block(get<SPIRBlock>(start->merge_block), to))
-	{
-		return true;
-	}
-	else if (start->next_block && block_is_outside_flow_control_from_block(get<SPIRBlock>(start->next_block), to))
-	{
-		return true;
-	}
-	else
-		return false;
-}
-
-bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const
-{
-	if (!execution_is_branchless(from, to))
-		return false;
+	if (!execution_is_branchless(from, to))
+		return false;
 
 	auto *start = &from;
 	for (;;)
@@ -1601,15 +1629,10 @@ bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) con
 		if (start->self == to.self)
 			return true;
 
-		if (!start->ops.empty())
+		if (!block_is_noop(*start))
 			return false;
 
 		auto &next = get<SPIRBlock>(start->next_block);
-		// Flushing phi variables does not count as noop.
-		for (auto &phi : next.phi_variables)
-			if (phi.parent == start->self)
-				return false;
-
 		start = &next;
 	}
 }
@@ -1645,7 +1668,7 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc
 	if (block.merge == SPIRBlock::MergeLoop)
 		return SPIRBlock::WhileLoop;
 
-	if (block.loop_dominator == SPIRBlock::NoDominator)
+	if (block.loop_dominator == BlockID(SPIRBlock::NoDominator))
 	{
 		// Continue block is never reached from CFG.
 		return SPIRBlock::ComplexLoop;
@@ -1663,6 +1686,12 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc
 		const auto *true_block = maybe_get<SPIRBlock>(block.true_block);
 		const auto *merge_block = maybe_get<SPIRBlock>(dominator.merge_block);
 
+		// If we need to flush Phi in this block, we cannot have a DoWhile loop.
+		bool flush_phi_to_false = false_block && flush_phi_required(block.self, block.false_block);
+		bool flush_phi_to_true = true_block && flush_phi_required(block.self, block.true_block);
+		if (flush_phi_to_false || flush_phi_to_true)
+			return SPIRBlock::ComplexLoop;
+
 		bool positive_do_while = block.true_block == dominator.self &&
 		                         (block.false_block == dominator.merge_block ||
 		                          (false_block && merge_block && execution_is_noop(*false_block, *merge_block)));
@@ -1681,9 +1710,48 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc
 	}
 }
 
+const SmallVector<SPIRBlock::Case> &Compiler::get_case_list(const SPIRBlock &block) const
+{
+	uint32_t width = 0;
+
+	// First we check if we can get the type directly from the block.condition
+	// since it can be a SPIRConstant or a SPIRVariable.
+	if (const auto *constant = maybe_get<SPIRConstant>(block.condition))
+	{
+		const auto &type = get<SPIRType>(constant->constant_type);
+		width = type.width;
+	}
+	else if (const auto *var = maybe_get<SPIRVariable>(block.condition))
+	{
+		const auto &type = get<SPIRType>(var->basetype);
+		width = type.width;
+	}
+	else if (const auto *undef = maybe_get<SPIRUndef>(block.condition))
+	{
+		const auto &type = get<SPIRType>(undef->basetype);
+		width = type.width;
+	}
+	else
+	{
+		auto search = ir.load_type_width.find(block.condition);
+		if (search == ir.load_type_width.end())
+		{
+			SPIRV_CROSS_THROW("Use of undeclared variable on a switch statement.");
+		}
+
+		width = search->second;
+	}
+
+	if (width > 32)
+		return block.cases_64bit;
+
+	return block.cases_32bit;
+}
+
 bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const
 {
 	handler.set_current_block(block);
+	handler.rearm_current_block(block);
 
 	// Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks,
 	// but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing
@@ -1707,10 +1775,15 @@ bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHand
 					return false;
 				if (!handler.end_function_scope(ops, i.length))
 					return false;
+
+				handler.rearm_current_block(block);
 			}
 		}
 	}
 
+	if (!handler.handle_terminator(block))
+		return false;
+
 	return true;
 }
 
@@ -1778,10 +1851,22 @@ size_t Compiler::get_declared_struct_size(const SPIRType &type) const
 	if (type.member_types.empty())
 		SPIRV_CROSS_THROW("Declared struct in block cannot be empty.");
 
-	uint32_t last = uint32_t(type.member_types.size() - 1);
-	size_t offset = type_struct_member_offset(type, last);
-	size_t size = get_declared_struct_member_size(type, last);
-	return offset + size;
+	// Offsets can be declared out of order, so we need to deduce the actual size
+	// based on last member instead.
+	uint32_t member_index = 0;
+	size_t highest_offset = 0;
+	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
+	{
+		size_t offset = type_struct_member_offset(type, i);
+		if (offset > highest_offset)
+		{
+			highest_offset = offset;
+			member_index = i;
+		}
+	}
+
+	size_t size = get_declared_struct_member_size(type, member_index);
+	return highest_offset + size;
 }
 
 size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, size_t array_size) const
@@ -1797,6 +1882,161 @@ size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, si
 	return size;
 }
 
+uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const
+{
+	auto &result_type = get<SPIRType>(spec.basetype);
+	if (result_type.basetype != SPIRType::UInt && result_type.basetype != SPIRType::Int &&
+	    result_type.basetype != SPIRType::Boolean)
+	{
+		SPIRV_CROSS_THROW(
+		    "Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n");
+	}
+
+	if (!is_scalar(result_type))
+		SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
+
+	uint32_t value = 0;
+
+	const auto eval_u32 = [&](uint32_t id) -> uint32_t {
+		auto &type = expression_type(id);
+		if (type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int && type.basetype != SPIRType::Boolean)
+		{
+			SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating "
+			                  "specialization constants.\n");
+		}
+
+		if (!is_scalar(type))
+			SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n");
+		if (const auto *c = this->maybe_get<SPIRConstant>(id))
+			return c->scalar();
+		else
+			return evaluate_spec_constant_u32(this->get<SPIRConstantOp>(id));
+	};
+
+#define binary_spec_op(op, binary_op)                                              \
+	case Op##op:                                                                   \
+		value = eval_u32(spec.arguments[0]) binary_op eval_u32(spec.arguments[1]); \
+		break
+#define binary_spec_op_cast(op, binary_op, type)                                                         \
+	case Op##op:                                                                                         \
+		value = uint32_t(type(eval_u32(spec.arguments[0])) binary_op type(eval_u32(spec.arguments[1]))); \
+		break
+
+	// Support the basic opcodes which are typically used when computing array sizes.
+	switch (spec.opcode)
+	{
+		binary_spec_op(IAdd, +);
+		binary_spec_op(ISub, -);
+		binary_spec_op(IMul, *);
+		binary_spec_op(BitwiseAnd, &);
+		binary_spec_op(BitwiseOr, |);
+		binary_spec_op(BitwiseXor, ^);
+		binary_spec_op(LogicalAnd, &);
+		binary_spec_op(LogicalOr, |);
+		binary_spec_op(ShiftLeftLogical, <<);
+		binary_spec_op(ShiftRightLogical, >>);
+		binary_spec_op_cast(ShiftRightArithmetic, >>, int32_t);
+		binary_spec_op(LogicalEqual, ==);
+		binary_spec_op(LogicalNotEqual, !=);
+		binary_spec_op(IEqual, ==);
+		binary_spec_op(INotEqual, !=);
+		binary_spec_op(ULessThan, <);
+		binary_spec_op(ULessThanEqual, <=);
+		binary_spec_op(UGreaterThan, >);
+		binary_spec_op(UGreaterThanEqual, >=);
+		binary_spec_op_cast(SLessThan, <, int32_t);
+		binary_spec_op_cast(SLessThanEqual, <=, int32_t);
+		binary_spec_op_cast(SGreaterThan, >, int32_t);
+		binary_spec_op_cast(SGreaterThanEqual, >=, int32_t);
+#undef binary_spec_op
+#undef binary_spec_op_cast
+
+	case OpLogicalNot:
+		value = uint32_t(!eval_u32(spec.arguments[0]));
+		break;
+
+	case OpNot:
+		value = ~eval_u32(spec.arguments[0]);
+		break;
+
+	case OpSNegate:
+		value = uint32_t(-int32_t(eval_u32(spec.arguments[0])));
+		break;
+
+	case OpSelect:
+		value = eval_u32(spec.arguments[0]) ? eval_u32(spec.arguments[1]) : eval_u32(spec.arguments[2]);
+		break;
+
+	case OpUMod:
+	{
+		uint32_t a = eval_u32(spec.arguments[0]);
+		uint32_t b = eval_u32(spec.arguments[1]);
+		if (b == 0)
+			SPIRV_CROSS_THROW("Undefined behavior in UMod, b == 0.\n");
+		value = a % b;
+		break;
+	}
+
+	case OpSRem:
+	{
+		auto a = int32_t(eval_u32(spec.arguments[0]));
+		auto b = int32_t(eval_u32(spec.arguments[1]));
+		if (b == 0)
+			SPIRV_CROSS_THROW("Undefined behavior in SRem, b == 0.\n");
+		value = a % b;
+		break;
+	}
+
+	case OpSMod:
+	{
+		auto a = int32_t(eval_u32(spec.arguments[0]));
+		auto b = int32_t(eval_u32(spec.arguments[1]));
+		if (b == 0)
+			SPIRV_CROSS_THROW("Undefined behavior in SMod, b == 0.\n");
+		auto v = a % b;
+
+		// Makes sure we match the sign of b, not a.
+		if ((b < 0 && v > 0) || (b > 0 && v < 0))
+			v += b;
+		value = v;
+		break;
+	}
+
+	case OpUDiv:
+	{
+		uint32_t a = eval_u32(spec.arguments[0]);
+		uint32_t b = eval_u32(spec.arguments[1]);
+		if (b == 0)
+			SPIRV_CROSS_THROW("Undefined behavior in UDiv, b == 0.\n");
+		value = a / b;
+		break;
+	}
+
+	case OpSDiv:
+	{
+		auto a = int32_t(eval_u32(spec.arguments[0]));
+		auto b = int32_t(eval_u32(spec.arguments[1]));
+		if (b == 0)
+			SPIRV_CROSS_THROW("Undefined behavior in SDiv, b == 0.\n");
+		value = a / b;
+		break;
+	}
+
+	default:
+		SPIRV_CROSS_THROW("Unsupported spec constant opcode for evaluation.\n");
+	}
+
+	return value;
+}
+
+uint32_t Compiler::evaluate_constant_u32(uint32_t id) const
+{
+	if (const auto *c = maybe_get<SPIRConstant>(id))
+		return c->scalar();
+	else
+		return evaluate_spec_constant_u32(get<SPIRConstantOp>(id));
+}
+
 size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
 {
 	if (struct_type.member_types.empty())
@@ -1820,11 +2060,18 @@ size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, ui
 		break;
 	}
 
+	if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
+	{
+		// Check if this is a top-level pointer type, and not an array of pointers.
+		if (type.pointer_depth > get<SPIRType>(type.parent_type).pointer_depth)
+			return 8;
+	}
+
 	if (!type.array.empty())
 	{
 		// For arrays, we can use ArrayStride to get an easy check.
 		bool array_size_literal = type.array_size_literal.back();
-		uint32_t array_size = array_size_literal ? type.array.back() : get<SPIRConstant>(type.array.back()).scalar();
+		uint32_t array_size = array_size_literal ? type.array.back() : evaluate_constant_u32(type.array.back());
 		return type_struct_member_array_stride(struct_type, index) * array_size;
 	}
 	else if (type.basetype == SPIRType::Struct)
@@ -1903,7 +2150,7 @@ bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint
 	return true;
 }
 
-SmallVector<BufferRange> Compiler::get_active_buffer_ranges(uint32_t id) const
+SmallVector<BufferRange> Compiler::get_active_buffer_ranges(VariableID id) const
 {
 	SmallVector<BufferRange> ranges;
 	BufferAccessHandler handler(*this, ranges, id);
@@ -1965,6 +2212,12 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar
 		execution.workgroup_size.z = arg2;
 		break;
 
+	case ExecutionModeLocalSizeId:
+		execution.workgroup_size.id_x = arg0;
+		execution.workgroup_size.id_y = arg1;
+		execution.workgroup_size.id_z = arg2;
+		break;
+
 	case ExecutionModeInvocations:
 		execution.invocations = arg0;
 		break;
@@ -1973,6 +2226,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar
 		execution.output_vertices = arg0;
 		break;
 
+	case ExecutionModeOutputPrimitivesEXT:
+		execution.output_primitives = arg0;
+		break;
+
 	default:
 		break;
 	}
@@ -1992,28 +2249,52 @@ uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationCo
 	y = { 0, 0 };
 	z = { 0, 0 };
 
+	// WorkgroupSize builtin takes precedence over LocalSize / LocalSizeId.
 	if (execution.workgroup_size.constant != 0)
 	{
 		auto &c = get<SPIRConstant>(execution.workgroup_size.constant);
 
-		if (c.m.c[0].id[0] != 0)
+		if (c.m.c[0].id[0] != ID(0))
 		{
 			x.id = c.m.c[0].id[0];
 			x.constant_id = get_decoration(c.m.c[0].id[0], DecorationSpecId);
 		}
 
-		if (c.m.c[0].id[1] != 0)
+		if (c.m.c[0].id[1] != ID(0))
 		{
 			y.id = c.m.c[0].id[1];
 			y.constant_id = get_decoration(c.m.c[0].id[1], DecorationSpecId);
 		}
 
-		if (c.m.c[0].id[2] != 0)
+		if (c.m.c[0].id[2] != ID(0))
 		{
 			z.id = c.m.c[0].id[2];
 			z.constant_id = get_decoration(c.m.c[0].id[2], DecorationSpecId);
 		}
 	}
+	else if (execution.flags.get(ExecutionModeLocalSizeId))
+	{
+		auto &cx = get<SPIRConstant>(execution.workgroup_size.id_x);
+		if (cx.specialization)
+		{
+			x.id = execution.workgroup_size.id_x;
+			x.constant_id = get_decoration(execution.workgroup_size.id_x, DecorationSpecId);
+		}
+
+		auto &cy = get<SPIRConstant>(execution.workgroup_size.id_y);
+		if (cy.specialization)
+		{
+			y.id = execution.workgroup_size.id_y;
+			y.constant_id = get_decoration(execution.workgroup_size.id_y, DecorationSpecId);
+		}
+
+		auto &cz = get<SPIRConstant>(execution.workgroup_size.id_z);
+		if (cz.specialization)
+		{
+			z.id = execution.workgroup_size.id_z;
+			z.constant_id = get_decoration(execution.workgroup_size.id_z, DecorationSpecId);
+		}
+	}
 
 	return execution.workgroup_size.constant;
 }
@@ -2023,15 +2304,42 @@ uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t
 	auto &execution = get_entry_point();
 	switch (mode)
 	{
+	case ExecutionModeLocalSizeId:
+		if (execution.flags.get(ExecutionModeLocalSizeId))
+		{
+			switch (index)
+			{
+			case 0:
+				return execution.workgroup_size.id_x;
+			case 1:
+				return execution.workgroup_size.id_y;
+			case 2:
+				return execution.workgroup_size.id_z;
+			default:
+				return 0;
+			}
+		}
+		else
+			return 0;
+
 	case ExecutionModeLocalSize:
 		switch (index)
 		{
 		case 0:
-			return execution.workgroup_size.x;
+			if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_x != 0)
+				return get<SPIRConstant>(execution.workgroup_size.id_x).scalar();
+			else
+				return execution.workgroup_size.x;
 		case 1:
-			return execution.workgroup_size.y;
+			if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_y != 0)
+				return get<SPIRConstant>(execution.workgroup_size.id_y).scalar();
+			else
+				return execution.workgroup_size.y;
 		case 2:
-			return execution.workgroup_size.z;
+			if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_z != 0)
+				return get<SPIRConstant>(execution.workgroup_size.id_z).scalar();
+			else
+				return execution.workgroup_size.z;
 		default:
 			return 0;
 		}
@@ -2042,6 +2350,9 @@ uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t
 	case ExecutionModeOutputVertices:
 		return execution.output_vertices;
 
+	case ExecutionModeOutputPrimitivesEXT:
+		return execution.output_primitives;
+
 	default:
 		return 0;
 	}
@@ -2058,45 +2369,70 @@ bool Compiler::is_tessellation_shader(ExecutionModel model)
 	return model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation;
 }
 
+bool Compiler::is_vertex_like_shader() const
+{
+	auto model = get_execution_model();
+	return model == ExecutionModelVertex || model == ExecutionModelGeometry ||
+	       model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation;
+}
+
 bool Compiler::is_tessellation_shader() const
 {
 	return is_tessellation_shader(get_execution_model());
 }
 
-void Compiler::set_remapped_variable_state(uint32_t id, bool remap_enable)
+bool Compiler::is_tessellating_triangles() const
+{
+	return get_execution_mode_bitset().get(ExecutionModeTriangles);
+}
+
+void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable)
 {
 	get<SPIRVariable>(id).remapped_variable = remap_enable;
 }
 
-bool Compiler::get_remapped_variable_state(uint32_t id) const
+bool Compiler::get_remapped_variable_state(VariableID id) const
 {
 	return get<SPIRVariable>(id).remapped_variable;
 }
 
-void Compiler::set_subpass_input_remapped_components(uint32_t id, uint32_t components)
+void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components)
 {
 	get<SPIRVariable>(id).remapped_components = components;
 }
 
-uint32_t Compiler::get_subpass_input_remapped_components(uint32_t id) const
+uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const
 {
 	return get<SPIRVariable>(id).remapped_components;
 }
 
 void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source)
 {
-	auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source);
+	auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source));
 	if (itr == end(e.implied_read_expressions))
 		e.implied_read_expressions.push_back(source);
 }
 
 void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source)
 {
-	auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source);
+	auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source));
 	if (itr == end(e.implied_read_expressions))
 		e.implied_read_expressions.push_back(source);
 }
 
+void Compiler::add_active_interface_variable(uint32_t var_id)
+{
+	active_interface_variables.insert(var_id);
+
+	// In SPIR-V 1.4 and up we must also track the interface variable in the entry point.
+	if (ir.get_spirv_version() >= 0x10400)
+	{
+		auto &vars = get_entry_point().interface_variables;
+		if (find(begin(vars), end(vars), VariableID(var_id)) == end(vars))
+			vars.push_back(var_id);
+	}
+}
+
 void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression)
 {
 	// Don't inherit any expression dependencies if the expression in dst
@@ -2221,19 +2557,25 @@ SPIREntryPoint &Compiler::get_entry_point()
 bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const
 {
 	auto &var = get<SPIRVariable>(id);
-	if (var.storage != StorageClassInput && var.storage != StorageClassOutput &&
-	    var.storage != StorageClassUniformConstant)
-		SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface.");
-
-	// This is to avoid potential problems with very old glslang versions which did
-	// not emit input/output interfaces properly.
-	// We can assume they only had a single entry point, and single entry point
-	// shaders could easily be assumed to use every interface variable anyways.
-	if (ir.entry_points.size() <= 1)
-		return true;
+
+	if (ir.get_spirv_version() < 0x10400)
+	{
+		if (var.storage != StorageClassInput && var.storage != StorageClassOutput &&
+		    var.storage != StorageClassUniformConstant)
+			SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface.");
+
+		// This is to avoid potential problems with very old glslang versions which did
+		// not emit input/output interfaces properly.
+		// We can assume they only had a single entry point, and single entry point
+		// shaders could easily be assumed to use every interface variable anyways.
+		if (ir.entry_points.size() <= 1)
+			return true;
+	}
+
+	// In SPIR-V 1.4 and later, all global resource variables must be present.
 
 	auto &execution = get_entry_point();
-	return find(begin(execution.interface_variables), end(execution.interface_variables), id) !=
+	return find(begin(execution.interface_variables), end(execution.interface_variables), VariableID(id)) !=
 	       end(execution.interface_variables);
 }
 
@@ -2245,7 +2587,7 @@ void Compiler::CombinedImageSamplerHandler::push_remap_parameters(const SPIRFunc
 	unordered_map<uint32_t, uint32_t> remapping;
 	for (uint32_t i = 0; i < length; i++)
 		remapping[func.arguments[i].id] = remap_parameter(args[i]);
-	parameter_remapping.push(move(remapping));
+	parameter_remapping.push(std::move(remapping));
 }
 
 void Compiler::CombinedImageSamplerHandler::pop_remap_parameters()
@@ -2313,8 +2655,8 @@ bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *a
 	{
 		for (auto &param : params)
 		{
-			uint32_t image_id = param.global_image ? param.image_id : args[param.image_id];
-			uint32_t sampler_id = param.global_sampler ? param.sampler_id : args[param.sampler_id];
+			VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]);
+			VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]);
 
 			auto *i = compiler.maybe_get_backing_variable(image_id);
 			auto *s = compiler.maybe_get_backing_variable(sampler_id);
@@ -2323,15 +2665,17 @@ bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *a
 			if (s)
 				sampler_id = s->self;
 
-			register_combined_image_sampler(caller, image_id, sampler_id, param.depth);
+			register_combined_image_sampler(caller, 0, image_id, sampler_id, param.depth);
 		}
 	}
 
 	return true;
 }
 
-void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller, uint32_t image_id,
-                                                                            uint32_t sampler_id, bool depth)
+void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller,
+                                                                            VariableID combined_module_id,
+                                                                            VariableID image_id, VariableID sampler_id,
+                                                                            bool depth)
 {
 	// We now have a texture ID and a sampler ID which will either be found as a global
 	// or a parameter in our own function. If both are global, they will not need a parameter,
@@ -2391,12 +2735,15 @@ void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIR
 		// Build new variable.
 		compiler.set<SPIRVariable>(combined_id, ptr_type_id, StorageClassFunction, 0);
 
-		// Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
-		auto &new_flags = compiler.ir.meta[combined_id].decoration.decoration_flags;
-		auto &old_flags = compiler.ir.meta[sampler_id].decoration.decoration_flags;
-		new_flags.reset();
-		if (old_flags.get(DecorationRelaxedPrecision))
-			new_flags.set(DecorationRelaxedPrecision);
+		// Inherit RelaxedPrecision.
+		// If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
+		bool relaxed_precision =
+		    compiler.has_decoration(sampler_id, DecorationRelaxedPrecision) ||
+		    compiler.has_decoration(image_id, DecorationRelaxedPrecision) ||
+		    (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision));
+
+		if (relaxed_precision)
+			compiler.set_decoration(combined_id, DecorationRelaxedPrecision);
 
 		param.id = combined_id;
 
@@ -2603,8 +2950,10 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 			if (sampler)
 				sampler_id = sampler->self;
 
+			uint32_t combined_id = args[1];
+
 			auto &combined_type = compiler.get<SPIRType>(args[0]);
-			register_combined_image_sampler(callee, image_id, sampler_id, combined_type.image.depth);
+			register_combined_image_sampler(callee, combined_id, image_id, sampler_id, combined_type.image.depth);
 		}
 	}
 
@@ -2612,8 +2961,8 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 	// This information is statically known from the current place in the call stack.
 	// Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know
 	// which backing variable the image/sample came from.
-	uint32_t image_id = remap_parameter(args[2]);
-	uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]);
+	VariableID image_id = remap_parameter(args[2]);
+	VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]);
 
 	auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers),
 	                   [image_id, sampler_id](const CombinedImageSampler &combined) {
@@ -2623,6 +2972,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 	if (itr == end(compiler.combined_image_samplers))
 	{
 		uint32_t sampled_type;
+		uint32_t combined_module_id;
 		if (is_fetch)
 		{
 			// Have to invent the sampled image type.
@@ -2632,10 +2982,12 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 			type.self = sampled_type;
 			type.basetype = SPIRType::SampledImage;
 			type.image.depth = false;
+			combined_module_id = 0;
 		}
 		else
 		{
 			sampled_type = args[0];
+			combined_module_id = args[1];
 		}
 
 		auto id = compiler.ir.increase_bound_by(2);
@@ -2655,12 +3007,14 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 		compiler.set<SPIRVariable>(combined_id, type_id, StorageClassUniformConstant, 0);
 
 		// Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant).
-		auto &new_flags = compiler.ir.meta[combined_id].decoration.decoration_flags;
-		// Fetch inherits precision from the image, not sampler (there is no sampler).
-		auto &old_flags = compiler.ir.meta[is_fetch ? image_id : sampler_id].decoration.decoration_flags;
-		new_flags.reset();
-		if (old_flags.get(DecorationRelaxedPrecision))
-			new_flags.set(DecorationRelaxedPrecision);
+		// If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration.
+		bool relaxed_precision =
+		    (sampler_id && compiler.has_decoration(sampler_id, DecorationRelaxedPrecision)) ||
+		    (image_id && compiler.has_decoration(image_id, DecorationRelaxedPrecision)) ||
+		    (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision));
+
+		if (relaxed_precision)
+			compiler.set_decoration(combined_id, DecorationRelaxedPrecision);
 
 		// Propagate the array type for the original image as well.
 		auto *var = compiler.maybe_get_backing_variable(image_id);
@@ -2677,7 +3031,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
 	return true;
 }
 
-uint32_t Compiler::build_dummy_sampler_for_combined_images()
+VariableID Compiler::build_dummy_sampler_for_combined_images()
 {
 	DummySamplerForCombinedImageHandler handler(*this);
 	traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
@@ -2731,17 +3085,18 @@ SmallVector<SpecializationConstant> Compiler::get_specialization_constants() con
 	return spec_consts;
 }
 
-SPIRConstant &Compiler::get_constant(uint32_t id)
+SPIRConstant &Compiler::get_constant(ConstantID id)
 {
 	return get<SPIRConstant>(id);
 }
 
-const SPIRConstant &Compiler::get_constant(uint32_t id) const
+const SPIRConstant &Compiler::get_constant(ConstantID id) const
 {
 	return get<SPIRConstant>(id);
 }
 
-static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set<uint32_t> &blocks)
+static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set<uint32_t> &blocks,
+                                             unordered_set<uint32_t> &visit_cache)
 {
 	// This block accesses the variable.
 	if (blocks.find(block) != end(blocks))
@@ -2753,8 +3108,14 @@ static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, con
 
 	// If any of our successors have a path to the end, there exists a path from block.
 	for (auto &succ : cfg.get_succeeding_edges(block))
-		if (exists_unaccessed_path_to_return(cfg, succ, blocks))
-			return true;
+	{
+		if (visit_cache.count(succ) == 0)
+		{
+			if (exists_unaccessed_path_to_return(cfg, succ, blocks, visit_cache))
+				return true;
+			visit_cache.insert(succ);
+		}
+	}
 
 	return false;
 }
@@ -2811,7 +3172,8 @@ void Compiler::analyze_parameter_preservation(
 		// void foo(int &var) { if (cond) var = 10; }
 		// Using read/write counts, we will think it's just an out variable, but it really needs to be inout,
 		// because if we don't write anything whatever we put into the function must return back to the caller.
-		if (exists_unaccessed_path_to_return(cfg, entry.entry_block, itr->second))
+		unordered_set<uint32_t> visit_cache;
+		if (exists_unaccessed_path_to_return(cfg, entry.entry_block, itr->second, visit_cache))
 			arg.read_count++;
 	}
 }
@@ -2866,12 +3228,15 @@ void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBl
 		break;
 
 	case SPIRBlock::MultiSelect:
+	{
 		notify_variable_access(block.condition, block.self);
-		for (auto &target : block.cases)
+		auto &cases = compiler.get_case_list(block);
+		for (auto &target : cases)
 			test_phi(target.block);
 		if (block.default_block)
 			test_phi(block.default_block);
 		break;
+	}
 
 	default:
 		break;
@@ -2883,6 +3248,12 @@ void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_
 	if (id == 0)
 		return;
 
+	// Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
+	auto itr = rvalue_forward_children.find(id);
+	if (itr != end(rvalue_forward_children))
+		for (auto child_id : itr->second)
+			notify_variable_access(child_id, block);
+
 	if (id_is_phi_variable(id))
 		accessed_variables_to_block[id].insert(block);
 	else if (id_is_potential_temporary(id))
@@ -2906,12 +3277,46 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint
 	return compiler.ir.ids[id].empty() || (compiler.ir.ids[id].get_type() == TypeExpression);
 }
 
+bool Compiler::AnalyzeVariableScopeAccessHandler::handle_terminator(const SPIRBlock &block)
+{
+	switch (block.terminator)
+	{
+	case SPIRBlock::Return:
+		if (block.return_value)
+			notify_variable_access(block.return_value, block.self);
+		break;
+
+	case SPIRBlock::Select:
+	case SPIRBlock::MultiSelect:
+		notify_variable_access(block.condition, block.self);
+		break;
+
+	default:
+		break;
+	}
+
+	return true;
+}
+
 bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length)
 {
 	// Keep track of the types of temporaries, so we can hoist them out as necessary.
 	uint32_t result_type, result_id;
 	if (compiler.instruction_to_result_type(result_type, result_id, op, args, length))
+	{
+		// For some opcodes, we will need to override the result id.
+		// If we need to hoist the temporary, the temporary type is the input, not the result.
+		// FIXME: This will likely break with OpCopyObject + hoisting, but we'll have to
+		// solve it if we ever get there ...
+		if (op == OpConvertUToAccelerationStructureKHR)
+		{
+			auto itr = result_id_to_type.find(args[2]);
+			if (itr != result_id_to_type.end())
+				result_type = itr->second;
+		}
+
 		result_id_to_type[result_id] = result_type;
+	}
 
 	switch (op)
 	{
@@ -2920,7 +3325,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 		if (length < 2)
 			return false;
 
-		uint32_t ptr = args[0];
+		ID ptr = args[0];
 		auto *var = compiler.maybe_get_backing_variable(ptr);
 
 		// If we store through an access chain, we have a partial write.
@@ -2947,14 +3352,21 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 		if (length < 3)
 			return false;
 
+		// Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
 		uint32_t ptr = args[2];
 		auto *var = compiler.maybe_get<SPIRVariable>(ptr);
 		if (var)
+		{
 			accessed_variables_to_block[var->self].insert(current_block->self);
+			rvalue_forward_children[args[1]].insert(var->self);
+		}
 
 		// args[2] might be another access chain we have to track use of.
 		for (uint32_t i = 2; i < length; i++)
+		{
 			notify_variable_access(args[i], current_block->self);
+			rvalue_forward_children[args[1]].insert(args[i]);
+		}
 
 		// Also keep track of the access chain pointer itself.
 		// In exceptionally rare cases, we can end up with a case where
@@ -2965,7 +3377,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 		// The result of an access chain is a fixed expression and is not really considered a temporary.
 		auto &e = compiler.set<SPIRExpression>(args[1], "", args[0], true);
 		auto *backing_variable = compiler.maybe_get_backing_variable(ptr);
-		e.loaded_from = backing_variable ? backing_variable->self : 0;
+		e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0);
 
 		// Other backends might use SPIRAccessChain for this later.
 		compiler.ir.ids[args[1]].set_allow_type_rewrite();
@@ -2978,8 +3390,8 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 		if (length < 2)
 			return false;
 
-		uint32_t lhs = args[0];
-		uint32_t rhs = args[1];
+		ID lhs = args[0];
+		ID rhs = args[1];
 		auto *var = compiler.maybe_get_backing_variable(lhs);
 
 		// If we store through an access chain, we have a partial write.
@@ -3035,6 +3447,12 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 
 		// Might be an access chain we have to track use of.
 		notify_variable_access(args[2], current_block->self);
+
+		// If we're loading an opaque type we cannot lower it to a temporary,
+		// we must defer access of args[2] until it's used.
+		auto &type = compiler.get<SPIRType>(args[0]);
+		if (compiler.type_is_opaque_value(type))
+			rvalue_forward_children[args[1]].insert(args[2]);
 		break;
 	}
 
@@ -3043,6 +3461,10 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 		if (length < 3)
 			return false;
 
+		// Return value may be a temporary.
+		if (compiler.get_type(args[0]).basetype != SPIRType::Void)
+			notify_variable_access(args[1], current_block->self);
+
 		length -= 3;
 		args += 3;
 
@@ -3063,9 +3485,29 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 			// Might try to copy a Phi variable here.
 			notify_variable_access(args[i], current_block->self);
 		}
+		break;
+	}
 
-		// Return value may be a temporary.
-		notify_variable_access(args[1], current_block->self);
+	case OpSelect:
+	{
+		// In case of variable pointers, we might access a variable here.
+		// We cannot prove anything about these accesses however.
+		for (uint32_t i = 1; i < length; i++)
+		{
+			if (i >= 3)
+			{
+				auto *var = compiler.maybe_get_backing_variable(args[i]);
+				if (var)
+				{
+					accessed_variables_to_block[var->self].insert(current_block->self);
+					// Assume we can get partial writes to this variable.
+					partial_write_variables_to_block[var->self].insert(current_block->self);
+				}
+			}
+
+			// Might try to copy a Phi variable here.
+			notify_variable_access(args[i], current_block->self);
+		}
 		break;
 	}
 
@@ -3074,10 +3516,41 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 		for (uint32_t i = 4; i < length; i++)
 			notify_variable_access(args[i], current_block->self);
 		notify_variable_access(args[1], current_block->self);
+
+		uint32_t extension_set = args[2];
+		if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
+		{
+			auto op_450 = static_cast<GLSLstd450>(args[3]);
+			switch (op_450)
+			{
+			case GLSLstd450Modf:
+			case GLSLstd450Frexp:
+			{
+				uint32_t ptr = args[5];
+				auto *var = compiler.maybe_get_backing_variable(ptr);
+				if (var)
+				{
+					accessed_variables_to_block[var->self].insert(current_block->self);
+					if (var->self == ptr)
+						complete_write_variables_to_block[var->self].insert(current_block->self);
+					else
+						partial_write_variables_to_block[var->self].insert(current_block->self);
+				}
+				break;
+			}
+
+			default:
+				break;
+			}
+		}
 		break;
 	}
 
 	case OpArrayLength:
+		// Only result is a temporary.
+		notify_variable_access(args[1], current_block->self);
+		break;
+
 	case OpLine:
 	case OpNoLine:
 		// Uses literals, but cannot be a phi variable or temporary, so ignore.
@@ -3344,12 +3817,14 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 	for (auto &var : handler.accessed_variables_to_block)
 	{
 		// Only deal with variables which are considered local variables in this function.
-		if (find(begin(entry.local_variables), end(entry.local_variables), var.first) == end(entry.local_variables))
+		if (find(begin(entry.local_variables), end(entry.local_variables), VariableID(var.first)) ==
+		    end(entry.local_variables))
 			continue;
 
 		DominatorBuilder builder(cfg);
 		auto &blocks = var.second;
 		auto &type = expression_type(var.first);
+		BlockID potential_continue_block = 0;
 
 		// Figure out which block is dominating all accesses of those variables.
 		for (auto &block : blocks)
@@ -3371,21 +3846,48 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 				{
 					// The variable is used in multiple continue blocks, this is not a loop
 					// candidate, signal that by setting block to -1u.
-					auto &potential = potential_loop_variables[var.first];
-
-					if (potential == 0)
-						potential = block;
+					if (potential_continue_block == 0)
+						potential_continue_block = block;
 					else
-						potential = ~(0u);
+						potential_continue_block = ~(0u);
 				}
 			}
+
 			builder.add_block(block);
 		}
 
 		builder.lift_continue_block_dominator();
 
 		// Add it to a per-block list of variables.
-		uint32_t dominating_block = builder.get_dominator();
+		BlockID dominating_block = builder.get_dominator();
+
+		if (dominating_block && potential_continue_block != 0 && potential_continue_block != ~0u)
+		{
+			auto &inner_block = get<SPIRBlock>(dominating_block);
+
+			BlockID merge_candidate = 0;
+
+			// Analyze the dominator. If it lives in a different loop scope than the candidate continue
+			// block, reject the loop variable candidate.
+			if (inner_block.merge == SPIRBlock::MergeLoop)
+				merge_candidate = inner_block.merge_block;
+			else if (inner_block.loop_dominator != SPIRBlock::NoDominator)
+				merge_candidate = get<SPIRBlock>(inner_block.loop_dominator).merge_block;
+
+			if (merge_candidate != 0 && cfg.is_reachable(merge_candidate))
+			{
+				// If the merge block has a higher post-visit order, we know that continue candidate
+				// cannot reach the merge block, and we have two separate scopes.
+				if (!cfg.is_reachable(potential_continue_block) ||
+				    cfg.get_visit_order(merge_candidate) > cfg.get_visit_order(potential_continue_block))
+				{
+					potential_continue_block = 0;
+				}
+			}
+		}
+
+		if (potential_continue_block != 0 && potential_continue_block != ~0u)
+			potential_loop_variables[var.first] = potential_continue_block;
 
 		// For variables whose dominating block is inside a loop, there is a risk that these variables
 		// actually need to be preserved across loop iterations. We can express this by adding
@@ -3403,7 +3905,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 				if (preserve)
 				{
 					// Find the outermost loop scope.
-					while (block->loop_dominator != SPIRBlock::NoDominator)
+					while (block->loop_dominator != BlockID(SPIRBlock::NoDominator))
 						block = &get<SPIRBlock>(block->loop_dominator);
 
 					if (block->self != dominating_block)
@@ -3443,6 +3945,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 
 		DominatorBuilder builder(cfg);
 		bool force_temporary = false;
+		bool used_in_header_hoisted_continue_block = false;
 
 		// Figure out which block is dominating all accesses of those temporaries.
 		auto &blocks = var.second;
@@ -3450,25 +3953,27 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 		{
 			builder.add_block(block);
 
-			// If a temporary is used in more than one block, we might have to lift continue block
-			// access up to loop header like we did for variables.
 			if (blocks.size() != 1 && is_continue(block))
 			{
+				// The risk here is that inner loop can dominate the continue block.
+				// Any temporary we access in the continue block must be declared before the loop.
+				// This is moot for complex loops however.
 				auto &loop_header_block = get<SPIRBlock>(ir.continue_block_to_loop_header[block]);
 				assert(loop_header_block.merge == SPIRBlock::MergeLoop);
-
-				// Only relevant if the loop is not marked as complex.
-				if (!loop_header_block.complex_continue)
-					builder.add_block(loop_header_block.self);
-			}
-			else if (blocks.size() != 1 && is_single_block_loop(block))
-			{
-				// Awkward case, because the loop header is also the continue block.
-				force_temporary = true;
+				builder.add_block(loop_header_block.self);
+				used_in_header_hoisted_continue_block = true;
 			}
 		}
 
 		uint32_t dominating_block = builder.get_dominator();
+
+		if (blocks.size() != 1 && is_single_block_loop(dominating_block))
+		{
+			// Awkward case, because the loop header is also the continue block,
+			// so hoisting to loop header does not help.
+			force_temporary = true;
+		}
+
 		if (dominating_block)
 		{
 			// If we touch a variable in the dominating block, this is the expected setup.
@@ -3481,11 +3986,22 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 				{
 					// Exceptionally rare case.
 					// We cannot declare temporaries of access chains (except on MSL perhaps with pointers).
-					// Rather than do that, we force a complex loop to make sure access chains are created and consumed
-					// in expected order.
-					auto &loop_header_block = get<SPIRBlock>(dominating_block);
-					assert(loop_header_block.merge == SPIRBlock::MergeLoop);
-					loop_header_block.complex_continue = true;
+					// Rather than do that, we force the indexing expressions to be declared in the right scope by
+					// tracking their usage to that end. There is no temporary to hoist.
+					// However, we still need to observe declaration order of the access chain.
+
+					if (used_in_header_hoisted_continue_block)
+					{
+						// For this scenario, we used an access chain inside a continue block where we also registered an access to header block.
+						// This is a problem as we need to declare an access chain properly first with full definition.
+						// We cannot use temporaries for these expressions,
+						// so we must make sure the access chain is declared ahead of time.
+						// Force a complex for loop to deal with this.
+						// TODO: Out-of-order declaring for loops where continue blocks are emitted last might be another option.
+						auto &loop_header_block = get<SPIRBlock>(dominating_block);
+						assert(loop_header_block.merge == SPIRBlock::MergeLoop);
+						loop_header_block.complex_continue = true;
+					}
 				}
 				else
 				{
@@ -3519,17 +4035,17 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 	{
 		auto &var = get<SPIRVariable>(loop_variable.first);
 		auto dominator = var.dominator;
-		auto block = loop_variable.second;
+		BlockID block = loop_variable.second;
 
 		// The variable was accessed in multiple continue blocks, ignore.
-		if (block == ~(0u) || block == 0)
+		if (block == BlockID(~(0u)) || block == BlockID(0))
 			continue;
 
 		// Dead code.
-		if (dominator == 0)
+		if (dominator == ID(0))
 			continue;
 
-		uint32_t header = 0;
+		BlockID header = 0;
 
 		// Find the loop header for this block if we are a continue block.
 		{
@@ -3588,10 +4104,11 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA
 		// merge can occur. Walk the CFG to see if we find anything.
 
 		seen_blocks.clear();
-		cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) {
+		cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) -> bool {
 			// We found a block which accesses the variable outside the loop.
 			if (blocks.find(walk_block) != end(blocks))
 				static_loop_init = false;
+			return true;
 		});
 
 		if (!static_loop_init)
@@ -3679,7 +4196,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
 	return true;
 }
 
-Bitset Compiler::get_buffer_block_flags(uint32_t id) const
+Bitset Compiler::get_buffer_block_flags(VariableID id) const
 {
 	return ir.get_buffer_block_flags(get<SPIRVariable>(id));
 }
@@ -3739,23 +4256,55 @@ void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltI
 	}
 }
 
-bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length)
+void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id, bool allow_blocks)
 {
-	const auto add_if_builtin = [&](uint32_t id) {
-		// Only handles variables here.
-		// Builtins which are part of a block are handled in AccessChain.
-		auto *var = compiler.maybe_get<SPIRVariable>(id);
-		auto &decorations = compiler.ir.meta[id].decoration;
-		if (var && decorations.builtin)
+	// Only handle plain variables here.
+	// Builtins which are part of a block are handled in AccessChain.
+	// If allow_blocks is used however, this is to handle initializers of blocks,
+	// which implies that all members are written to.
+
+	auto *var = compiler.maybe_get<SPIRVariable>(id);
+	auto *m = compiler.ir.find_meta(id);
+	if (var && m)
+	{
+		auto &type = compiler.get<SPIRType>(var->basetype);
+		auto &decorations = m->decoration;
+		auto &flags = type.storage == StorageClassInput ?
+		              compiler.active_input_builtins : compiler.active_output_builtins;
+		if (decorations.builtin)
 		{
-			auto &type = compiler.get<SPIRType>(var->basetype);
-			auto &flags =
-			    type.storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins;
 			flags.set(decorations.builtin_type);
 			handle_builtin(type, decorations.builtin_type, decorations.decoration_flags);
 		}
-	};
+		else if (allow_blocks && compiler.has_decoration(type.self, DecorationBlock))
+		{
+			uint32_t member_count = uint32_t(type.member_types.size());
+			for (uint32_t i = 0; i < member_count; i++)
+			{
+				if (compiler.has_member_decoration(type.self, i, DecorationBuiltIn))
+				{
+					auto &member_type = compiler.get<SPIRType>(type.member_types[i]);
+					BuiltIn builtin = BuiltIn(compiler.get_member_decoration(type.self, i, DecorationBuiltIn));
+					flags.set(builtin);
+					handle_builtin(member_type, builtin, compiler.get_member_decoration_bitset(type.self, i));
+				}
+			}
+		}
+	}
+}
+
+void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id)
+{
+	add_if_builtin(id, false);
+}
 
+void Compiler::ActiveBuiltinHandler::add_if_builtin_or_block(uint32_t id)
+{
+	add_if_builtin(id, true);
+}
+
+bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length)
+{
 	switch (opcode)
 	{
 	case OpStore:
@@ -3893,10 +4442,21 @@ void Compiler::update_active_builtins()
 	clip_distance_count = 0;
 	ActiveBuiltinHandler handler(*this);
 	traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
+
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
+		if (var.storage != StorageClassOutput)
+			return;
+		if (!interface_variable_exists_in_entry_point(var.self))
+			return;
+
+		// Also, make sure we preserve output variables which are only initialized, but never accessed by any code.
+		if (var.initializer != ID(0))
+			handler.add_if_builtin_or_block(var.self);
+	});
 }
 
 // Returns whether this shader uses a builtin of the storage class
-bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage)
+bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) const
 {
 	const Bitset *flags;
 	switch (storage)
@@ -3921,8 +4481,16 @@ void Compiler::analyze_image_and_sampler_usage()
 
 	CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers);
 	traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
-	comparison_ids = move(handler.comparison_ids);
+
+	// Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions
+	// down to main().
+	// In the second pass, we can propagate up forced depth state coming from main() up into leaf functions.
+	handler.dependency_hierarchy.clear();
+	traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
+
+	comparison_ids = std::move(handler.comparison_ids);
 	need_subpass_input = handler.need_subpass_input;
+	need_subpass_input_ms = handler.need_subpass_input_ms;
 
 	// Forward information from separate images and samplers into combined image samplers.
 	for (auto &combined : combined_image_samplers)
@@ -3955,12 +4523,26 @@ bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uin
 	return true;
 }
 
+const CFG &Compiler::get_cfg_for_current_function() const
+{
+	assert(current_function);
+	return get_cfg_for_function(current_function->self);
+}
+
+const CFG &Compiler::get_cfg_for_function(uint32_t id) const
+{
+	auto cfg_itr = function_cfgs.find(id);
+	assert(cfg_itr != end(function_cfgs));
+	assert(cfg_itr->second);
+	return *cfg_itr->second;
+}
+
 void Compiler::build_function_control_flow_graphs_and_analyze()
 {
 	CFGBuilder handler(*this);
 	handler.function_cfgs[ir.default_entry_point].reset(new CFG(*this, get<SPIRFunction>(ir.default_entry_point)));
 	traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
-	function_cfgs = move(handler.function_cfgs);
+	function_cfgs = std::move(handler.function_cfgs);
 	bool single_function = function_cfgs.size() <= 1;
 
 	for (auto &f : function_cfgs)
@@ -4022,6 +4604,14 @@ bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func)
 		return false;
 }
 
+void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst, uint32_t src)
+{
+	dependency_hierarchy[dst].insert(src);
+	// Propagate up any comparison state if we're loading from one such variable.
+	if (comparison_ids.count(src))
+		comparison_ids.insert(dst);
+}
+
 bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length)
 {
 	if (length < 3)
@@ -4034,7 +4624,7 @@ bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint
 	for (uint32_t i = 0; i < length; i++)
 	{
 		auto &argument = func.arguments[i];
-		dependency_hierarchy[argument.id].insert(arg[i]);
+		add_dependency(argument.id, arg[i]);
 	}
 
 	return true;
@@ -4044,6 +4634,7 @@ void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids
 {
 	// Traverse the variable dependency hierarchy and tag everything in its path with comparison ids.
 	comparison_ids.insert(id);
+
 	for (auto &dep_id : dependency_hierarchy[id])
 		add_hierarchy_to_comparison_ids(dep_id);
 }
@@ -4059,13 +4650,18 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_
 	{
 		if (length < 3)
 			return false;
-		dependency_hierarchy[args[1]].insert(args[2]);
+
+		add_dependency(args[1], args[2]);
 
 		// Ideally defer this to OpImageRead, but then we'd need to track loaded IDs.
 		// If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord.
 		auto &type = compiler.get<SPIRType>(args[0]);
 		if (type.image.dim == DimSubpassData)
+		{
 			need_subpass_input = true;
+			if (type.image.ms)
+				need_subpass_input_ms = true;
+		}
 
 		// If we load a SampledImage and it will be used with Dref, propagate the state up.
 		if (dref_combined_samplers.count(args[1]) != 0)
@@ -4078,17 +4674,17 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_
 		if (length < 4)
 			return false;
 
-		uint32_t result_type = args[0];
+		// If the underlying resource has been used for comparison then duplicate loads of that resource must be too.
+		// This image must be a depth image.
 		uint32_t result_id = args[1];
-		auto &type = compiler.get<SPIRType>(result_type);
-		if (type.image.depth || dref_combined_samplers.count(result_id) != 0)
+		uint32_t image = args[2];
+		uint32_t sampler = args[3];
+
+		if (dref_combined_samplers.count(result_id) != 0)
 		{
-			// This image must be a depth image.
-			uint32_t image = args[2];
 			add_hierarchy_to_comparison_ids(image);
 
 			// This sampler must be a SamplerComparisonState, and not a regular SamplerState.
-			uint32_t sampler = args[3];
 			add_hierarchy_to_comparison_ids(sampler);
 
 			// Mark the OpSampledImage itself as being comparison state.
@@ -4104,13 +4700,13 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_
 	return true;
 }
 
-bool Compiler::buffer_is_hlsl_counter_buffer(uint32_t id) const
+bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const
 {
 	auto *m = ir.find_meta(id);
 	return m && m->hlsl_is_magic_counter_buffer;
 }
 
-bool Compiler::buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const
+bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const
 {
 	auto *m = ir.find_meta(id);
 
@@ -4175,7 +4771,7 @@ const SmallVector<std::string> &Compiler::get_declared_extensions() const
 	return ir.declared_extensions;
 }
 
-std::string Compiler::get_remapped_declared_block_name(uint32_t id) const
+std::string Compiler::get_remapped_declared_block_name(VariableID id) const
 {
 	return get_remapped_declared_block_name(id, false);
 }
@@ -4240,46 +4836,22 @@ bool Compiler::reflection_ssbo_instance_name_is_significant() const
 	return aliased_ssbo_types;
 }
 
-bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args,
-                                          uint32_t length)
+bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op,
+                                          const uint32_t *args, uint32_t length)
 {
-	// Most instructions follow the pattern of <result-type> <result-id> <arguments>.
-	// There are some exceptions.
-	switch (op)
-	{
-	case OpStore:
-	case OpCopyMemory:
-	case OpCopyMemorySized:
-	case OpImageWrite:
-	case OpAtomicStore:
-	case OpAtomicFlagClear:
-	case OpEmitStreamVertex:
-	case OpEndStreamPrimitive:
-	case OpControlBarrier:
-	case OpMemoryBarrier:
-	case OpGroupWaitEvents:
-	case OpRetainEvent:
-	case OpReleaseEvent:
-	case OpSetUserEventStatus:
-	case OpCaptureEventProfilingInfo:
-	case OpCommitReadPipe:
-	case OpCommitWritePipe:
-	case OpGroupCommitReadPipe:
-	case OpGroupCommitWritePipe:
-	case OpLine:
-	case OpNoLine:
+	if (length < 2)
 		return false;
 
-	default:
-		if (length > 1 && maybe_get<SPIRType>(args[0]) != nullptr)
-		{
-			result_type = args[0];
-			result_id = args[1];
-			return true;
-		}
-		else
-			return false;
+	bool has_result_id = false, has_result_type = false;
+	HasResultAndType(op, &has_result_id, &has_result_type);
+	if (has_result_id && has_result_type)
+	{
+		result_type = args[0];
+		result_id = args[1];
+		return true;
 	}
+	else
+		return false;
 }
 
 Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t index) const
@@ -4289,19 +4861,22 @@ Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t i
 
 	if (type_meta)
 	{
-		auto &memb = type_meta->members;
-		if (index >= memb.size())
+		auto &members = type_meta->members;
+		if (index >= members.size())
 			return flags;
-		auto &dec = memb[index];
+		auto &dec = members[index];
 
-		// If our type is a struct, traverse all the members as well recursively.
 		flags.merge_or(dec.decoration_flags);
 
-		for (uint32_t i = 0; i < type.member_types.size(); i++)
+		auto &member_type = get<SPIRType>(type.member_types[index]);
+
+		// If our member type is a struct, traverse all the child members as well recursively.
+		auto &member_childs = member_type.member_types;
+		for (uint32_t i = 0; i < member_childs.size(); i++)
 		{
-			auto &memb_type = get<SPIRType>(type.member_types[i]);
-			if (!memb_type.pointer)
-				flags.merge_or(combined_decoration_for_member(memb_type, i));
+			auto &child_member_type = get<SPIRType>(member_childs[i]);
+			if (!child_member_type.pointer)
+				flags.merge_or(combined_decoration_for_member(member_type, i));
 		}
 	}
 
@@ -4341,9 +4916,11 @@ bool Compiler::is_desktop_only_format(spv::ImageFormat format)
 	return false;
 }
 
-bool Compiler::image_is_comparison(const SPIRType &type, uint32_t id) const
+// An image is determined to be a depth image if it is marked as a depth image and is not also
+// explicitly marked with a color format, or if there are any sample/gather compare operations on it.
+bool Compiler::is_depth_image(const SPIRType &type, uint32_t id) const
 {
-	return type.image.depth || (comparison_ids.count(id) != 0);
+	return (type.image.depth && type.image.format == ImageFormatUnknown) || comparison_ids.count(id);
 }
 
 bool Compiler::type_is_opaque_value(const SPIRType &type) const
@@ -4358,6 +4935,12 @@ void Compiler::force_recompile()
 	is_force_recompile = true;
 }
 
+void Compiler::force_recompile_guarantee_forward_progress()
+{
+	force_recompile();
+	is_force_recompile_forward_progress = true;
+}
+
 bool Compiler::is_forcing_recompilation() const
 {
 	return is_force_recompile;
@@ -4366,6 +4949,7 @@ bool Compiler::is_forcing_recompilation() const
 void Compiler::clear_force_recompile()
 {
 	is_force_recompile = false;
+	is_force_recompile_forward_progress = false;
 }
 
 Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandler(Compiler &compiler_)
@@ -4373,31 +4957,491 @@ Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandl
 {
 }
 
-bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t *args, uint32_t)
+Compiler::PhysicalBlockMeta *Compiler::PhysicalStorageBufferPointerHandler::find_block_meta(uint32_t id) const
+{
+	auto chain_itr = access_chain_to_physical_block.find(id);
+	if (chain_itr != access_chain_to_physical_block.end())
+		return chain_itr->second;
+	else
+		return nullptr;
+}
+
+void Compiler::PhysicalStorageBufferPointerHandler::mark_aligned_access(uint32_t id, const uint32_t *args, uint32_t length)
 {
-	if (op == OpConvertUToPtr || op == OpBitcast)
+	uint32_t mask = *args;
+	args++;
+	length--;
+	if (length && (mask & MemoryAccessVolatileMask) != 0)
 	{
-		auto &type = compiler.get<SPIRType>(args[0]);
-		if (type.storage == StorageClassPhysicalStorageBufferEXT && type.pointer && type.pointer_depth == 1)
+		args++;
+		length--;
+	}
+
+	if (length && (mask & MemoryAccessAlignedMask) != 0)
+	{
+		uint32_t alignment = *args;
+		auto *meta = find_block_meta(id);
+
+		// This makes the assumption that the application does not rely on insane edge cases like:
+		// Bind buffer with ADDR = 8, use block offset of 8 bytes, load/store with 16 byte alignment.
+		// If we emit the buffer with alignment = 16 here, the first element at offset = 0 should
+		// actually have alignment of 8 bytes, but this is too theoretical and awkward to support.
+		// We could potentially keep track of any offset in the access chain, but it's
+		// practically impossible for high level compilers to emit code like that,
+		// so deducing overall alignment requirement based on maximum observed Alignment value is probably fine.
+		if (meta && alignment > meta->alignment)
+			meta->alignment = alignment;
+	}
+}
+
+bool Compiler::PhysicalStorageBufferPointerHandler::type_is_bda_block_entry(uint32_t type_id) const
+{
+	auto &type = compiler.get<SPIRType>(type_id);
+	return type.storage == StorageClassPhysicalStorageBufferEXT && type.pointer &&
+	       type.pointer_depth == 1 && !compiler.type_is_array_of_pointers(type);
+}
+
+uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_minimum_scalar_alignment(const SPIRType &type) const
+{
+	if (type.storage == spv::StorageClassPhysicalStorageBufferEXT)
+		return 8;
+	else if (type.basetype == SPIRType::Struct)
+	{
+		uint32_t alignment = 0;
+		for (auto &member_type : type.member_types)
 		{
-			// If we need to cast to a pointer type which is not a block, we might need to synthesize ourselves
-			// a block type which wraps this POD type.
-			if (type.basetype != SPIRType::Struct)
-				types.insert(args[0]);
+			uint32_t member_align = get_minimum_scalar_alignment(compiler.get<SPIRType>(member_type));
+			if (member_align > alignment)
+				alignment = member_align;
 		}
+		return alignment;
+	}
+	else
+		return type.width / 8;
+}
+
+void Compiler::PhysicalStorageBufferPointerHandler::setup_meta_chain(uint32_t type_id, uint32_t var_id)
+{
+	if (type_is_bda_block_entry(type_id))
+	{
+		auto &meta = physical_block_type_meta[type_id];
+		access_chain_to_physical_block[var_id] = &meta;
+
+		auto &type = compiler.get<SPIRType>(type_id);
+		if (type.basetype != SPIRType::Struct)
+			non_block_types.insert(type_id);
+
+		if (meta.alignment == 0)
+			meta.alignment = get_minimum_scalar_alignment(compiler.get_pointee_type(type));
+	}
+}
+
+bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t *args, uint32_t length)
+{
+	// When a BDA pointer comes to life, we need to keep a mapping of SSA ID -> type ID for the pointer type.
+	// For every load and store, we'll need to be able to look up the type ID being accessed and mark any alignment
+	// requirements.
+	switch (op)
+	{
+	case OpConvertUToPtr:
+	case OpBitcast:
+	case OpCompositeExtract:
+		// Extract can begin a new chain if we had a struct or array of pointers as input.
+		// We don't begin chains before we have a pure scalar pointer.
+		setup_meta_chain(args[0], args[1]);
+		break;
+
+	case OpAccessChain:
+	case OpInBoundsAccessChain:
+	case OpPtrAccessChain:
+	case OpCopyObject:
+	{
+		auto itr = access_chain_to_physical_block.find(args[2]);
+		if (itr != access_chain_to_physical_block.end())
+			access_chain_to_physical_block[args[1]] = itr->second;
+		break;
+	}
+
+	case OpLoad:
+	{
+		setup_meta_chain(args[0], args[1]);
+		if (length >= 4)
+			mark_aligned_access(args[2], args + 3, length - 3);
+		break;
+	}
+
+	case OpStore:
+	{
+		if (length >= 3)
+			mark_aligned_access(args[0], args + 2, length - 2);
+		break;
+	}
+
+	default:
+		break;
 	}
 
 	return true;
 }
 
+uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_base_non_block_type_id(uint32_t type_id) const
+{
+	auto *type = &compiler.get<SPIRType>(type_id);
+	while (type->pointer &&
+	       type->storage == StorageClassPhysicalStorageBufferEXT &&
+	       !type_is_bda_block_entry(type_id))
+	{
+		type_id = type->parent_type;
+		type = &compiler.get<SPIRType>(type_id);
+	}
+
+	assert(type_is_bda_block_entry(type_id));
+	return type_id;
+}
+
+void Compiler::PhysicalStorageBufferPointerHandler::analyze_non_block_types_from_block(const SPIRType &type)
+{
+	for (auto &member : type.member_types)
+	{
+		auto &subtype = compiler.get<SPIRType>(member);
+		if (subtype.basetype != SPIRType::Struct && subtype.pointer &&
+		    subtype.storage == spv::StorageClassPhysicalStorageBufferEXT)
+		{
+			non_block_types.insert(get_base_non_block_type_id(member));
+		}
+		else if (subtype.basetype == SPIRType::Struct && !subtype.pointer)
+			analyze_non_block_types_from_block(subtype);
+	}
+}
+
 void Compiler::analyze_non_block_pointer_types()
 {
 	PhysicalStorageBufferPointerHandler handler(*this);
 	traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
-	physical_storage_non_block_pointer_types.reserve(handler.types.size());
-	for (auto type : handler.types)
+
+	// Analyze any block declaration we have to make. It might contain
+	// physical pointers to POD types which we never used, and thus never added to the list.
+	// We'll need to add those pointer types to the set of types we declare.
+	ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
+		if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
+			handler.analyze_non_block_types_from_block(type);
+	});
+
+	physical_storage_non_block_pointer_types.reserve(handler.non_block_types.size());
+	for (auto type : handler.non_block_types)
 		physical_storage_non_block_pointer_types.push_back(type);
 	sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types));
+	physical_storage_type_to_alignment = std::move(handler.physical_block_type_meta);
+}
+
+bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t)
+{
+	if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT)
+	{
+		if (interlock_function_id != 0 && interlock_function_id != call_stack.back())
+		{
+			// Most complex case, we have no sensible way of dealing with this
+			// other than taking the 100% conservative approach, exit early.
+			split_function_case = true;
+			return false;
+		}
+		else
+		{
+			interlock_function_id = call_stack.back();
+			// If this call is performed inside control flow we have a problem.
+			auto &cfg = compiler.get_cfg_for_function(interlock_function_id);
+
+			uint32_t from_block_id = compiler.get<SPIRFunction>(interlock_function_id).entry_block;
+			bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id);
+			if (!outside_control_flow)
+				control_flow_interlock = true;
+		}
+	}
+	return true;
+}
+
+void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block)
+{
+	current_block_id = block.self;
+}
+
+bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length)
+{
+	if (length < 3)
+		return false;
+	call_stack.push_back(args[2]);
+	return true;
+}
+
+bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t)
+{
+	call_stack.pop_back();
+	return true;
+}
+
+bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length)
+{
+	if (length < 3)
+		return false;
+
+	if (args[2] == interlock_function_id)
+		call_stack_is_interlocked = true;
+
+	call_stack.push_back(args[2]);
+	return true;
+}
+
+bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t)
+{
+	if (call_stack.back() == interlock_function_id)
+		call_stack_is_interlocked = false;
+
+	call_stack.pop_back();
+	return true;
+}
+
+void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id)
+{
+	if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) ||
+	    split_function_case)
+	{
+		compiler.interlocked_resources.insert(id);
+	}
+}
+
+bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
+{
+	// Only care about critical section analysis if we have simple case.
+	if (use_critical_section)
+	{
+		if (opcode == OpBeginInvocationInterlockEXT)
+		{
+			in_crit_sec = true;
+			return true;
+		}
+
+		if (opcode == OpEndInvocationInterlockEXT)
+		{
+			// End critical section--nothing more to do.
+			return false;
+		}
+	}
+
+	// We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
+	switch (opcode)
+	{
+	case OpLoad:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t ptr = args[2];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+
+		// We're only concerned with buffer and image memory here.
+		if (!var)
+			break;
+
+		switch (var->storage)
+		{
+		default:
+			break;
+
+		case StorageClassUniformConstant:
+		{
+			uint32_t result_type = args[0];
+			uint32_t id = args[1];
+			compiler.set<SPIRExpression>(id, "", result_type, true);
+			compiler.register_read(id, ptr, true);
+			break;
+		}
+
+		case StorageClassUniform:
+			// Must have BufferBlock; we only care about SSBOs.
+			if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+				break;
+			// fallthrough
+		case StorageClassStorageBuffer:
+			access_potential_resource(var->self);
+			break;
+		}
+		break;
+	}
+
+	case OpInBoundsAccessChain:
+	case OpAccessChain:
+	case OpPtrAccessChain:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t result_type = args[0];
+
+		auto &type = compiler.get<SPIRType>(result_type);
+		if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
+		    type.storage == StorageClassStorageBuffer)
+		{
+			uint32_t id = args[1];
+			uint32_t ptr = args[2];
+			compiler.set<SPIRExpression>(id, "", result_type, true);
+			compiler.register_read(id, ptr, true);
+			compiler.ir.ids[id].set_allow_type_rewrite();
+		}
+		break;
+	}
+
+	case OpImageTexelPointer:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t result_type = args[0];
+		uint32_t id = args[1];
+		uint32_t ptr = args[2];
+		auto &e = compiler.set<SPIRExpression>(id, "", result_type, true);
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+		if (var)
+			e.loaded_from = var->self;
+		break;
+	}
+
+	case OpStore:
+	case OpImageWrite:
+	case OpAtomicStore:
+	{
+		if (length < 1)
+			return false;
+
+		uint32_t ptr = args[0];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+		if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
+		            var->storage == StorageClassStorageBuffer))
+		{
+			access_potential_resource(var->self);
+		}
+
+		break;
+	}
+
+	case OpCopyMemory:
+	{
+		if (length < 2)
+			return false;
+
+		uint32_t dst = args[0];
+		uint32_t src = args[1];
+		auto *dst_var = compiler.maybe_get_backing_variable(dst);
+		auto *src_var = compiler.maybe_get_backing_variable(src);
+
+		if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer))
+			access_potential_resource(dst_var->self);
+
+		if (src_var)
+		{
+			if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer)
+				break;
+
+			if (src_var->storage == StorageClassUniform &&
+			    !compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock))
+			{
+				break;
+			}
+
+			access_potential_resource(src_var->self);
+		}
+
+		break;
+	}
+
+	case OpImageRead:
+	case OpAtomicLoad:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t ptr = args[2];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+
+		// We're only concerned with buffer and image memory here.
+		if (!var)
+			break;
+
+		switch (var->storage)
+		{
+		default:
+			break;
+
+		case StorageClassUniform:
+			// Must have BufferBlock; we only care about SSBOs.
+			if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+				break;
+			// fallthrough
+		case StorageClassUniformConstant:
+		case StorageClassStorageBuffer:
+			access_potential_resource(var->self);
+			break;
+		}
+		break;
+	}
+
+	case OpAtomicExchange:
+	case OpAtomicCompareExchange:
+	case OpAtomicIIncrement:
+	case OpAtomicIDecrement:
+	case OpAtomicIAdd:
+	case OpAtomicISub:
+	case OpAtomicSMin:
+	case OpAtomicUMin:
+	case OpAtomicSMax:
+	case OpAtomicUMax:
+	case OpAtomicAnd:
+	case OpAtomicOr:
+	case OpAtomicXor:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t ptr = args[2];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+		if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
+		            var->storage == StorageClassStorageBuffer))
+		{
+			access_potential_resource(var->self);
+		}
+
+		break;
+	}
+
+	default:
+		break;
+	}
+
+	return true;
+}
+
+void Compiler::analyze_interlocked_resource_usage()
+{
+	if (get_execution_model() == ExecutionModelFragment &&
+	    (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT)))
+	{
+		InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point);
+		traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), prepass_handler);
+
+		InterlockedResourceAccessHandler handler(*this, ir.default_entry_point);
+		handler.interlock_function_id = prepass_handler.interlock_function_id;
+		handler.split_function_case = prepass_handler.split_function_case;
+		handler.control_flow_interlock = prepass_handler.control_flow_interlock;
+		handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock;
+
+		traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
+
+		// For GLSL. If we hit any of these cases, we have to fall back to conservative approach.
+		interlocked_is_complex =
+		    !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point;
+	}
 }
 
 bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
@@ -4408,3 +5452,23 @@ bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
 	// If parent type has same pointer depth, we must have an array of pointers.
 	return type.pointer_depth == get<SPIRType>(type.parent_type).pointer_depth;
 }
+
+bool Compiler::type_is_top_level_physical_pointer(const SPIRType &type) const
+{
+	return type.pointer && type.storage == StorageClassPhysicalStorageBuffer &&
+	       type.pointer_depth > get<SPIRType>(type.parent_type).pointer_depth;
+}
+
+bool Compiler::flush_phi_required(BlockID from, BlockID to) const
+{
+	auto &child = get<SPIRBlock>(to);
+	for (auto &phi : child.phi_variables)
+		if (phi.parent == from)
+			return true;
+	return false;
+}
+
+void Compiler::add_loop_level()
+{
+	current_loop_level++;
+}
diff --git a/spirv_cross.hpp b/spirv_cross.hpp
index c2dc4ea61ba..ea98ee60d37 100644
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2015-2019 Arm Limited
+ * Copyright 2015-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +15,18 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_CROSS_HPP
 #define SPIRV_CROSS_HPP
 
+#ifndef SPV_ENABLE_UTILITY_CODE
+#define SPV_ENABLE_UTILITY_CODE
+#endif
 #include "spirv.hpp"
 #include "spirv_cfg.hpp"
 #include "spirv_cross_parsed_ir.hpp"
@@ -27,18 +37,18 @@ struct Resource
 {
 	// Resources are identified with their SPIR-V ID.
 	// This is the ID of the OpVariable.
-	uint32_t id;
+	ID id;
 
 	// The type ID of the variable which includes arrays and all type modifications.
 	// This type ID is not suitable for parsing OpMemberDecoration of a struct and other decorations in general
 	// since these modifications typically happen on the base_type_id.
-	uint32_t type_id;
+	TypeID type_id;
 
 	// The base type of the declared resource.
 	// This type is the base type which ignores pointers and arrays of the type_id.
 	// This is mostly useful to parse decorations of the underlying type.
 	// base_type_id can also be obtained with get_type(get_type(type_id).self).
-	uint32_t base_type_id;
+	TypeID base_type_id;
 
 	// The declared name (OpName) of the resource.
 	// For Buffer blocks, the name actually reflects the externally
@@ -52,6 +62,27 @@ struct Resource
 	std::string name;
 };
 
+struct BuiltInResource
+{
+	// This is mostly here to support reflection of builtins such as Position/PointSize/CullDistance/ClipDistance.
+	// This needs to be different from Resource since we can collect builtins from blocks.
+	// A builtin present here does not necessarily mean it's considered an active builtin,
+	// since variable ID "activeness" is only tracked on OpVariable level, not Block members.
+	// For that, update_active_builtins() -> has_active_builtin() can be used to further refine the reflection.
+	spv::BuiltIn builtin;
+
+	// This is the actual value type of the builtin.
+	// Typically float4, float, array<float, N> for the gl_PerVertex builtins.
+	// If the builtin is a control point, the control point array type will be stripped away here as appropriate.
+	TypeID value_type_id;
+
+	// This refers to the base resource which contains the builtin.
+	// If resource is a Block, it can hold multiple builtins, or it might not be a block.
+	// For advanced reflection scenarios, all information in builtin/value_type_id can be deduced,
+	// it's just more convenient this way.
+	Resource resource;
+};
+
 struct ShaderResources
 {
 	SmallVector<Resource> uniform_buffers;
@@ -68,26 +99,31 @@ struct ShaderResources
 	// but keep the vector in case this restriction is lifted in the future.
 	SmallVector<Resource> push_constant_buffers;
 
+	SmallVector<Resource> shader_record_buffers;
+
 	// For Vulkan GLSL and HLSL source,
 	// these correspond to separate texture2D and samplers respectively.
 	SmallVector<Resource> separate_images;
 	SmallVector<Resource> separate_samplers;
+
+	SmallVector<BuiltInResource> builtin_inputs;
+	SmallVector<BuiltInResource> builtin_outputs;
 };
 
 struct CombinedImageSampler
 {
 	// The ID of the sampler2D variable.
-	uint32_t combined_id;
+	VariableID combined_id;
 	// The ID of the texture2D variable.
-	uint32_t image_id;
+	VariableID image_id;
 	// The ID of the sampler variable.
-	uint32_t sampler_id;
+	VariableID sampler_id;
 };
 
 struct SpecializationConstant
 {
 	// The ID of the specialization constant.
-	uint32_t id;
+	ConstantID id;
 	// The constant ID of the constant, used in Vulkan during pipeline creation.
 	uint32_t constant_id;
 };
@@ -117,18 +153,6 @@ struct EntryPoint
 	spv::ExecutionModel execution_model;
 };
 
-enum ExtendedDecorations
-{
-	SPIRVCrossDecorationPacked,
-	SPIRVCrossDecorationPackedType,
-	SPIRVCrossDecorationInterfaceMemberIndex,
-	SPIRVCrossDecorationInterfaceOrigID,
-	SPIRVCrossDecorationResourceIndexPrimary,
-	// Used for decorations like resource indices for samplers when part of combined image samplers.
-	// A variable might need to hold two resource indices in this case.
-	SPIRVCrossDecorationResourceIndexSecondary,
-};
-
 class Compiler
 {
 public:
@@ -154,81 +178,81 @@ class Compiler
 	virtual std::string compile();
 
 	// Gets the identifier (OpName) of an ID. If not defined, an empty string will be returned.
-	const std::string &get_name(uint32_t id) const;
+	const std::string &get_name(ID id) const;
 
 	// Applies a decoration to an ID. Effectively injects OpDecorate.
-	void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0);
-	void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument);
+	void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0);
+	void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument);
 
 	// Overrides the identifier OpName of an ID.
 	// Identifiers beginning with underscores or identifiers which contain double underscores
 	// are reserved by the implementation.
-	void set_name(uint32_t id, const std::string &name);
+	void set_name(ID id, const std::string &name);
 
 	// Gets a bitmask for the decorations which are applied to ID.
 	// I.e. (1ull << spv::DecorationFoo) | (1ull << spv::DecorationBar)
-	const Bitset &get_decoration_bitset(uint32_t id) const;
+	const Bitset &get_decoration_bitset(ID id) const;
 
 	// Returns whether the decoration has been applied to the ID.
-	bool has_decoration(uint32_t id, spv::Decoration decoration) const;
+	bool has_decoration(ID id, spv::Decoration decoration) const;
 
 	// Gets the value for decorations which take arguments.
 	// If the decoration is a boolean (i.e. spv::DecorationNonWritable),
 	// 1 will be returned.
 	// If decoration doesn't exist or decoration is not recognized,
 	// 0 will be returned.
-	uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const;
-	const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const;
+	uint32_t get_decoration(ID id, spv::Decoration decoration) const;
+	const std::string &get_decoration_string(ID id, spv::Decoration decoration) const;
 
 	// Removes the decoration for an ID.
-	void unset_decoration(uint32_t id, spv::Decoration decoration);
+	void unset_decoration(ID id, spv::Decoration decoration);
 
 	// Gets the SPIR-V type associated with ID.
 	// Mostly used with Resource::type_id and Resource::base_type_id to parse the underlying type of a resource.
-	const SPIRType &get_type(uint32_t id) const;
+	const SPIRType &get_type(TypeID id) const;
 
 	// Gets the SPIR-V type of a variable.
-	const SPIRType &get_type_from_variable(uint32_t id) const;
+	const SPIRType &get_type_from_variable(VariableID id) const;
 
 	// Gets the underlying storage class for an OpVariable.
-	spv::StorageClass get_storage_class(uint32_t id) const;
+	spv::StorageClass get_storage_class(VariableID id) const;
 
 	// If get_name() is an empty string, get the fallback name which will be used
 	// instead in the disassembled source.
-	virtual const std::string get_fallback_name(uint32_t id) const;
+	virtual const std::string get_fallback_name(ID id) const;
 
 	// If get_name() of a Block struct is an empty string, get the fallback name.
 	// This needs to be per-variable as multiple variables can use the same block type.
-	virtual const std::string get_block_fallback_name(uint32_t id) const;
+	virtual const std::string get_block_fallback_name(VariableID id) const;
 
 	// Given an OpTypeStruct in ID, obtain the identifier for member number "index".
 	// This may be an empty string.
-	const std::string &get_member_name(uint32_t id, uint32_t index) const;
+	const std::string &get_member_name(TypeID id, uint32_t index) const;
 
 	// Given an OpTypeStruct in ID, obtain the OpMemberDecoration for member number "index".
-	uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
-	const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const;
+	uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const;
+	const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const;
 
 	// Sets the member identifier for OpTypeStruct ID, member number "index".
-	void set_member_name(uint32_t id, uint32_t index, const std::string &name);
+	void set_member_name(TypeID id, uint32_t index, const std::string &name);
 
 	// Returns the qualified member identifier for OpTypeStruct ID, member number "index",
 	// or an empty string if no qualified alias exists
-	const std::string &get_member_qualified_name(uint32_t type_id, uint32_t index) const;
+	const std::string &get_member_qualified_name(TypeID type_id, uint32_t index) const;
 
 	// Gets the decoration mask for a member of a struct, similar to get_decoration_mask.
-	const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const;
+	const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const;
 
 	// Returns whether the decoration has been applied to a member of a struct.
-	bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
+	bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const;
 
 	// Similar to set_decoration, but for struct members.
-	void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0);
-	void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration,
+	void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0);
+	void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
 	                                  const std::string &argument);
 
 	// Unsets a member decoration, similar to unset_decoration.
-	void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration);
+	void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration);
 
 	// Gets the fallback name for a member, similar to get_fallback_name.
 	virtual const std::string get_fallback_member_name(uint32_t index) const
@@ -240,7 +264,7 @@ class Compiler
 	// SPIR-V shader. The granularity of this analysis is per-member of a struct.
 	// This can be used for Buffer (UBO), BufferBlock/StorageBuffer (SSBO) and PushConstant blocks.
 	// ID is the Resource::id obtained from get_shader_resources().
-	SmallVector<BufferRange> get_active_buffer_ranges(uint32_t id) const;
+	SmallVector<BufferRange> get_active_buffer_ranges(VariableID id) const;
 
 	// Returns the effective size of a buffer block.
 	size_t get_declared_struct_size(const SPIRType &struct_type) const;
@@ -268,12 +292,12 @@ class Compiler
 	//
 	// To use the returned set as the filter for which variables are used during compilation,
 	// this set can be moved to set_enabled_interface_variables().
-	std::unordered_set<uint32_t> get_active_interface_variables() const;
+	std::unordered_set<VariableID> get_active_interface_variables() const;
 
 	// Sets the interface variables which are used during compilation.
 	// By default, all variables are used.
 	// Once set, compile() will only consider the set in active_variables.
-	void set_enabled_interface_variables(std::unordered_set<uint32_t> active_variables);
+	void set_enabled_interface_variables(std::unordered_set<VariableID> active_variables);
 
 	// Query shader resources, use ids with reflection interface to modify or query binding points, etc.
 	ShaderResources get_shader_resources() const;
@@ -281,19 +305,19 @@ class Compiler
 	// Query shader resources, but only return the variables which are part of active_variables.
 	// E.g.: get_shader_resources(get_active_variables()) to only return the variables which are statically
 	// accessed.
-	ShaderResources get_shader_resources(const std::unordered_set<uint32_t> &active_variables) const;
+	ShaderResources get_shader_resources(const std::unordered_set<VariableID> &active_variables) const;
 
 	// Remapped variables are considered built-in variables and a backend will
 	// not emit a declaration for this variable.
 	// This is mostly useful for making use of builtins which are dependent on extensions.
-	void set_remapped_variable_state(uint32_t id, bool remap_enable);
-	bool get_remapped_variable_state(uint32_t id) const;
+	void set_remapped_variable_state(VariableID id, bool remap_enable);
+	bool get_remapped_variable_state(VariableID id) const;
 
 	// For subpassInput variables which are remapped to plain variables,
 	// the number of components in the remapped
 	// variable must be specified as the backing type of subpass inputs are opaque.
-	void set_subpass_input_remapped_components(uint32_t id, uint32_t components);
-	uint32_t get_subpass_input_remapped_components(uint32_t id) const;
+	void set_subpass_input_remapped_components(VariableID id, uint32_t components);
+	uint32_t get_subpass_input_remapped_components(VariableID id) const;
 
 	// All operations work on the current entry point.
 	// Entry points can be swapped out with set_entry_point().
@@ -327,6 +351,10 @@ class Compiler
 	const std::string &get_cleansed_entry_point_name(const std::string &name,
 	                                                 spv::ExecutionModel execution_model) const;
 
+	// Traverses all reachable opcodes and sets active_builtins to a bitmask of all builtin variables which are accessed in the shader.
+	void update_active_builtins();
+	bool has_active_builtin(spv::BuiltIn builtin, spv::StorageClass storage) const;
+
 	// Query and modify OpExecutionMode.
 	const Bitset &get_execution_mode_bitset() const;
 
@@ -334,12 +362,16 @@ class Compiler
 	void set_execution_mode(spv::ExecutionMode mode, uint32_t arg0 = 0, uint32_t arg1 = 0, uint32_t arg2 = 0);
 
 	// Gets argument for an execution mode (LocalSize, Invocations, OutputVertices).
-	// For LocalSize, the index argument is used to select the dimension (X = 0, Y = 1, Z = 2).
+	// For LocalSize or LocalSizeId, the index argument is used to select the dimension (X = 0, Y = 1, Z = 2).
 	// For execution modes which do not have arguments, 0 is returned.
+	// LocalSizeId query returns an ID. If LocalSizeId execution mode is not used, it returns 0.
+	// LocalSize always returns a literal. If execution mode is LocalSizeId,
+	// the literal (spec constant or not) is still returned.
 	uint32_t get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index = 0) const;
 	spv::ExecutionModel get_execution_model() const;
 
 	bool is_tessellation_shader() const;
+	bool is_tessellating_triangles() const;
 
 	// In SPIR-V, the compute work group size can be represented by a constant vector, in which case
 	// the LocalSize execution mode is ignored.
@@ -357,6 +389,8 @@ class Compiler
 	// If the component is not a specialization constant, a zeroed out struct will be written.
 	// The return value is the constant ID of the builtin WorkGroupSize, but this is not expected to be useful
 	// for most use cases.
+	// If LocalSizeId is used, there is no uvec3 value representing the workgroup size, so the return value is 0,
+	// but x, y and z are written as normal if the components are specialization constants.
 	uint32_t get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y,
 	                                                      SpecializationConstant &z) const;
 
@@ -374,7 +408,7 @@ class Compiler
 	// If the returned ID is non-zero, it can be decorated with set/bindings as desired before calling compile().
 	// Calling this function also invalidates get_active_interface_variables(), so this should be called
 	// before that function.
-	uint32_t build_dummy_sampler_for_combined_images();
+	VariableID build_dummy_sampler_for_combined_images();
 
 	// Analyzes all separate image and samplers used from the currently selected entry point,
 	// and re-routes them all to a combined image sampler instead.
@@ -423,8 +457,8 @@ class Compiler
 	// constant_type is the SPIRType for the specialization constant,
 	// which can be queried to determine which fields in the unions should be poked at.
 	SmallVector<SpecializationConstant> get_specialization_constants() const;
-	SPIRConstant &get_constant(uint32_t id);
-	const SPIRConstant &get_constant(uint32_t id) const;
+	SPIRConstant &get_constant(ConstantID id);
+	const SPIRConstant &get_constant(ConstantID id) const;
 
 	uint32_t get_current_id_bound() const
 	{
@@ -447,7 +481,7 @@ class Compiler
 	// If the decoration was declared, sets the word_offset to an offset into the provided SPIR-V binary buffer and returns true,
 	// otherwise, returns false.
 	// If the decoration does not have any value attached to it (e.g. DecorationRelaxedPrecision), this function will also return false.
-	bool get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const;
+	bool get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const;
 
 	// HLSL counter buffer reflection interface.
 	// Append/Consume/Increment/Decrement in HLSL is implemented as two "neighbor" buffer objects where
@@ -462,7 +496,7 @@ class Compiler
 	// only return true if OpSource was reported HLSL.
 	// To rely on this functionality, ensure that the SPIR-V module is not stripped.
 
-	bool buffer_is_hlsl_counter_buffer(uint32_t id) const;
+	bool buffer_is_hlsl_counter_buffer(VariableID id) const;
 
 	// Queries if a buffer object has a neighbor "counter" buffer.
 	// If so, the ID of that counter buffer will be returned in counter_id.
@@ -470,7 +504,7 @@ class Compiler
 	// Otherwise, this query is purely based on OpName identifiers as found in the SPIR-V module, and will
 	// only return true if OpSource was reported HLSL.
 	// To rely on this functionality, ensure that the SPIR-V module is not stripped.
-	bool buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const;
+	bool buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const;
 
 	// Gets the list of all SPIR-V Capabilities which were declared in the SPIR-V module.
 	const SmallVector<spv::Capability> &get_declared_capabilities() const;
@@ -491,13 +525,19 @@ class Compiler
 	// ID is the name of a variable as returned by Resource::id, and must be a variable with a Block-like type.
 	//
 	// This also applies to HLSL cbuffers.
-	std::string get_remapped_declared_block_name(uint32_t id) const;
+	std::string get_remapped_declared_block_name(VariableID id) const;
 
 	// For buffer block variables, get the decorations for that variable.
 	// Sometimes, decorations for buffer blocks are found in member decorations instead
 	// of direct decorations on the variable itself.
 	// The most common use here is to check if a buffer is readonly or writeonly.
-	Bitset get_buffer_block_flags(uint32_t id) const;
+	Bitset get_buffer_block_flags(VariableID id) const;
+
+	// Returns whether the position output is invariant
+	bool is_position_invariant() const
+	{
+		return position_invariant;
+	}
 
 protected:
 	const uint32_t *stream(const Instruction &instr) const
@@ -508,9 +548,23 @@ class Compiler
 		if (!instr.length)
 			return nullptr;
 
-		if (instr.offset + instr.length > ir.spirv.size())
-			SPIRV_CROSS_THROW("Compiler::stream() out of range.");
-		return &ir.spirv[instr.offset];
+		if (instr.is_embedded())
+		{
+			auto &embedded = static_cast<const EmbeddedInstruction &>(instr);
+			assert(embedded.ops.size() == instr.length);
+			return embedded.ops.data();
+		}
+		else
+		{
+			if (instr.offset + instr.length > ir.spirv.size())
+				SPIRV_CROSS_THROW("Compiler::stream() out of range.");
+			return &ir.spirv[instr.offset];
+		}
+	}
+
+	uint32_t *stream_mutable(const Instruction &instr) const
+	{
+		return const_cast<uint32_t *>(stream(instr));
 	}
 
 	ParsedIR ir;
@@ -521,9 +575,22 @@ class Compiler
 
 	SPIRFunction *current_function = nullptr;
 	SPIRBlock *current_block = nullptr;
-	std::unordered_set<uint32_t> active_interface_variables;
+	uint32_t current_loop_level = 0;
+	std::unordered_set<VariableID> active_interface_variables;
 	bool check_active_interface_variables = false;
 
+	void add_loop_level();
+
+	void set_initializers(SPIRExpression &e)
+	{
+		e.emitted_loop_level = current_loop_level;
+	}
+
+	template <typename T>
+	void set_initializers(const T &)
+	{
+	}
+
 	// If our IDs are out of range here as part of opcodes, throw instead of
 	// undefined behavior.
 	template <typename T, typename... P>
@@ -532,6 +599,7 @@ class Compiler
 		ir.add_typed_id(static_cast<Types>(T::type), id);
 		auto &var = variant_set<T>(ir.ids[id], std::forward<P>(args)...);
 		var.self = id;
+		set_initializers(var);
 		return var;
 	}
 
@@ -561,7 +629,9 @@ class Compiler
 	template <typename T>
 	const T *maybe_get(uint32_t id) const
 	{
-		if (ir.ids[id].get_type() == static_cast<Types>(T::type))
+		if (id >= ir.ids.size())
+			return nullptr;
+		else if (ir.ids[id].get_type() == static_cast<Types>(T::type))
 			return &get<T>(id);
 		else
 			return nullptr;
@@ -629,7 +699,7 @@ class Compiler
 	inline bool is_single_block_loop(uint32_t next) const
 	{
 		auto &block = get<SPIRBlock>(next);
-		return block.merge == SPIRBlock::MergeLoop && block.continue_block == next;
+		return block.merge == SPIRBlock::MergeLoop && block.continue_block == ID(next);
 	}
 
 	inline bool is_break(uint32_t next) const
@@ -669,7 +739,6 @@ class Compiler
 
 	bool function_is_pure(const SPIRFunction &func);
 	bool block_is_pure(const SPIRBlock &block);
-	bool block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to);
 
 	bool execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const;
 	bool execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const;
@@ -677,16 +746,20 @@ class Compiler
 	SPIRBlock::ContinueBlockType continue_block_type(const SPIRBlock &continue_block) const;
 
 	void force_recompile();
+	void force_recompile_guarantee_forward_progress();
 	void clear_force_recompile();
 	bool is_forcing_recompilation() const;
 	bool is_force_recompile = false;
+	bool is_force_recompile_forward_progress = false;
 
+	bool block_is_noop(const SPIRBlock &block) const;
 	bool block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const;
 
 	bool types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const;
 	void inherit_expression_dependencies(uint32_t dst, uint32_t source);
 	void add_implied_read_expression(SPIRExpression &e, uint32_t source);
 	void add_implied_read_expression(SPIRAccessChain &e, uint32_t source);
+	void add_active_interface_variable(uint32_t var_id);
 
 	// For proper multiple entry point support, allow querying if an Input or Output
 	// variable is part of that entry points interface.
@@ -712,6 +785,10 @@ class Compiler
 		// Return true if traversal should continue.
 		// If false, traversal will end immediately.
 		virtual bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) = 0;
+		virtual bool handle_terminator(const SPIRBlock &)
+		{
+			return true;
+		}
 
 		virtual bool follow_function_call(const SPIRFunction &)
 		{
@@ -722,6 +799,13 @@ class Compiler
 		{
 		}
 
+		// Called after returning from a function or when entering a block,
+		// can be called multiple times per block,
+		// while set_current_block is only called on block entry.
+		virtual void rearm_current_block(const SPIRBlock &)
+		{
+		}
+
 		virtual bool begin_function_scope(const uint32_t *, uint32_t)
 		{
 			return true;
@@ -753,7 +837,7 @@ class Compiler
 
 	struct InterfaceVariableAccessHandler : OpcodeHandler
 	{
-		InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set<uint32_t> &variables_)
+		InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set<VariableID> &variables_)
 		    : compiler(compiler_)
 		    , variables(variables_)
 		{
@@ -762,7 +846,7 @@ class Compiler
 		bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
 
 		const Compiler &compiler;
-		std::unordered_set<uint32_t> &variables;
+		std::unordered_set<VariableID> &variables;
 	};
 
 	struct CombinedImageSamplerHandler : OpcodeHandler
@@ -784,8 +868,8 @@ class Compiler
 		uint32_t remap_parameter(uint32_t id);
 		void push_remap_parameters(const SPIRFunction &func, const uint32_t *args, uint32_t length);
 		void pop_remap_parameters();
-		void register_combined_image_sampler(SPIRFunction &caller, uint32_t texture_id, uint32_t sampler_id,
-		                                     bool depth);
+		void register_combined_image_sampler(SPIRFunction &caller, VariableID combined_id, VariableID texture_id,
+		                                     VariableID sampler_id, bool depth);
 	};
 
 	struct DummySamplerForCombinedImageHandler : OpcodeHandler
@@ -811,6 +895,9 @@ class Compiler
 		Compiler &compiler;
 
 		void handle_builtin(const SPIRType &type, spv::BuiltIn builtin, const Bitset &decoration_flags);
+		void add_if_builtin(uint32_t id);
+		void add_if_builtin_or_block(uint32_t id);
+		void add_if_builtin(uint32_t id, bool allow_blocks);
 	};
 
 	bool traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const;
@@ -818,7 +905,7 @@ class Compiler
 	// This must be an ordered data structure so we always pick the same type aliases.
 	SmallVector<uint32_t> global_struct_cache;
 
-	ShaderResources get_shader_resources(const std::unordered_set<uint32_t> *active_variables) const;
+	ShaderResources get_shader_resources(const std::unordered_set<VariableID> *active_variables) const;
 
 	VariableTypeRemapCallback variable_remap_callback;
 
@@ -826,7 +913,9 @@ class Compiler
 
 	std::unordered_set<uint32_t> forced_temporaries;
 	std::unordered_set<uint32_t> forwarded_temporaries;
+	std::unordered_set<uint32_t> suppressed_usage_tracking;
 	std::unordered_set<uint32_t> hoisted_temporaries;
+	std::unordered_set<uint32_t> forced_invariant_temporaries;
 
 	Bitset active_input_builtins;
 	Bitset active_output_builtins;
@@ -834,10 +923,6 @@ class Compiler
 	uint32_t cull_distance_count = 0;
 	bool position_invariant = false;
 
-	// Traverses all reachable opcodes and sets active_builtins to a bitmask of all builtin variables which are accessed in the shader.
-	void update_active_builtins();
-	bool has_active_builtin(spv::BuiltIn builtin, spv::StorageClass storage);
-
 	void analyze_parameter_preservation(
 	    SPIRFunction &entry, const CFG &cfg,
 	    const std::unordered_map<uint32_t, std::unordered_set<uint32_t>> &variable_to_blocks,
@@ -850,6 +935,7 @@ class Compiler
 	// Similar is implemented for images, as well as if subpass inputs are needed.
 	std::unordered_set<uint32_t> comparison_ids;
 	bool need_subpass_input = false;
+	bool need_subpass_input_ms = false;
 
 	// In certain backends, we will need to use a dummy sampler to be able to emit code.
 	// GLSL does not support texelFetch on texture2D objects, but SPIR-V does,
@@ -889,13 +975,18 @@ class Compiler
 
 		void add_hierarchy_to_comparison_ids(uint32_t ids);
 		bool need_subpass_input = false;
+		bool need_subpass_input_ms = false;
+		void add_dependency(uint32_t dst, uint32_t src);
 	};
 
 	void build_function_control_flow_graphs_and_analyze();
 	std::unordered_map<uint32_t, std::unique_ptr<CFG>> function_cfgs;
+	const CFG &get_cfg_for_current_function() const;
+	const CFG &get_cfg_for_function(uint32_t id) const;
+
 	struct CFGBuilder : OpcodeHandler
 	{
-		CFGBuilder(Compiler &compiler_);
+		explicit CFGBuilder(Compiler &compiler_);
 
 		bool follow_function_call(const SPIRFunction &func) override;
 		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
@@ -914,6 +1005,7 @@ class Compiler
 		bool id_is_phi_variable(uint32_t id) const;
 		bool id_is_potential_temporary(uint32_t id) const;
 		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
+		bool handle_terminator(const SPIRBlock &block) override;
 
 		Compiler &compiler;
 		SPIRFunction &entry;
@@ -923,6 +1015,9 @@ class Compiler
 		std::unordered_map<uint32_t, std::unordered_set<uint32_t>> complete_write_variables_to_block;
 		std::unordered_map<uint32_t, std::unordered_set<uint32_t>> partial_write_variables_to_block;
 		std::unordered_set<uint32_t> access_chain_expressions;
+		// Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers.
+		// This is also relevant when forwarding opaque objects since we cannot lower these to temporaries.
+		std::unordered_map<uint32_t, std::unordered_set<uint32_t>> rvalue_forward_children;
 		const SPIRBlock *current_block = nullptr;
 	};
 
@@ -938,21 +1033,93 @@ class Compiler
 		uint32_t write_count = 0;
 	};
 
+	struct PhysicalBlockMeta
+	{
+		uint32_t alignment = 0;
+	};
+
 	struct PhysicalStorageBufferPointerHandler : OpcodeHandler
 	{
-		PhysicalStorageBufferPointerHandler(Compiler &compiler_);
+		explicit PhysicalStorageBufferPointerHandler(Compiler &compiler_);
 		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
 		Compiler &compiler;
-		std::unordered_set<uint32_t> types;
+
+		std::unordered_set<uint32_t> non_block_types;
+		std::unordered_map<uint32_t, PhysicalBlockMeta> physical_block_type_meta;
+		std::unordered_map<uint32_t, PhysicalBlockMeta *> access_chain_to_physical_block;
+
+		void mark_aligned_access(uint32_t id, const uint32_t *args, uint32_t length);
+		PhysicalBlockMeta *find_block_meta(uint32_t id) const;
+		bool type_is_bda_block_entry(uint32_t type_id) const;
+		void setup_meta_chain(uint32_t type_id, uint32_t var_id);
+		uint32_t get_minimum_scalar_alignment(const SPIRType &type) const;
+		void analyze_non_block_types_from_block(const SPIRType &type);
+		uint32_t get_base_non_block_type_id(uint32_t type_id) const;
 	};
 	void analyze_non_block_pointer_types();
 	SmallVector<uint32_t> physical_storage_non_block_pointer_types;
+	std::unordered_map<uint32_t, PhysicalBlockMeta> physical_storage_type_to_alignment;
 
 	void analyze_variable_scope(SPIRFunction &function, AnalyzeVariableScopeAccessHandler &handler);
 	void find_function_local_luts(SPIRFunction &function, const AnalyzeVariableScopeAccessHandler &handler,
 	                              bool single_function);
 	bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var);
 
+	// Finds all resources that are written to from inside the critical section, if present.
+	// The critical section is delimited by OpBeginInvocationInterlockEXT and
+	// OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written
+	// while inside the critical section must be placed in a raster order group.
+	struct InterlockedResourceAccessHandler : OpcodeHandler
+	{
+		InterlockedResourceAccessHandler(Compiler &compiler_, uint32_t entry_point_id)
+		    : compiler(compiler_)
+		{
+			call_stack.push_back(entry_point_id);
+		}
+
+		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
+		bool begin_function_scope(const uint32_t *args, uint32_t length) override;
+		bool end_function_scope(const uint32_t *args, uint32_t length) override;
+
+		Compiler &compiler;
+		bool in_crit_sec = false;
+
+		uint32_t interlock_function_id = 0;
+		bool split_function_case = false;
+		bool control_flow_interlock = false;
+		bool use_critical_section = false;
+		bool call_stack_is_interlocked = false;
+		SmallVector<uint32_t> call_stack;
+
+		void access_potential_resource(uint32_t id);
+	};
+
+	struct InterlockedResourceAccessPrepassHandler : OpcodeHandler
+	{
+		InterlockedResourceAccessPrepassHandler(Compiler &compiler_, uint32_t entry_point_id)
+		    : compiler(compiler_)
+		{
+			call_stack.push_back(entry_point_id);
+		}
+
+		void rearm_current_block(const SPIRBlock &block) override;
+		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
+		bool begin_function_scope(const uint32_t *args, uint32_t length) override;
+		bool end_function_scope(const uint32_t *args, uint32_t length) override;
+
+		Compiler &compiler;
+		uint32_t interlock_function_id = 0;
+		uint32_t current_block_id = 0;
+		bool split_function_case = false;
+		bool control_flow_interlock = false;
+		SmallVector<uint32_t> call_stack;
+	};
+
+	void analyze_interlocked_resource_usage();
+	// The set of all resources written while inside the critical section, if present.
+	std::unordered_set<uint32_t> interlocked_resources;
+	bool interlocked_is_complex = false;
+
 	void make_constant_null(uint32_t id, uint32_t type);
 
 	std::unordered_map<uint32_t, std::string> declared_block_names;
@@ -963,7 +1130,7 @@ class Compiler
 	Bitset combined_decoration_for_member(const SPIRType &type, uint32_t index) const;
 	static bool is_desktop_only_format(spv::ImageFormat format);
 
-	bool image_is_comparison(const SPIRType &type, uint32_t id) const;
+	bool is_depth_image(const SPIRType &type, uint32_t id) const;
 
 	void set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value = 0);
 	uint32_t get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const;
@@ -977,12 +1144,25 @@ class Compiler
 	void unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration);
 
 	bool type_is_array_of_pointers(const SPIRType &type) const;
+	bool type_is_top_level_physical_pointer(const SPIRType &type) const;
 	bool type_is_block_like(const SPIRType &type) const;
 	bool type_is_opaque_value(const SPIRType &type) const;
 
 	bool reflection_ssbo_instance_name_is_significant() const;
 	std::string get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const;
 
+	bool flush_phi_required(BlockID from, BlockID to) const;
+
+	uint32_t evaluate_spec_constant_u32(const SPIRConstantOp &spec) const;
+	uint32_t evaluate_constant_u32(uint32_t id) const;
+
+	bool is_vertex_like_shader() const;
+
+	// Get the correct case list for the OpSwitch, since it can be either a
+	// 32 bit wide condition or a 64 bit, but the type is not embedded in the
+	// instruction itself.
+	const SmallVector<SPIRBlock::Case> &get_case_list(const SPIRBlock &block) const;
+
 private:
 	// Used only to implement the old deprecated get_entry_point() interface.
 	const SPIREntryPoint &get_first_entry_point(const std::string &name) const;
diff --git a/spirv_cross_c.cpp b/spirv_cross_c.cpp
index 8048274f00b..72614d78e36 100644
--- a/spirv_cross_c.cpp
+++ b/spirv_cross_c.cpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2019 Hans-Kristian Arntzen
+ * Copyright 2019-2021 Hans-Kristian Arntzen
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #include "spirv_cross_c.h"
 
 #if SPIRV_CROSS_C_API_CPP
@@ -162,7 +169,7 @@ struct spvc_compiler_options_s : ScratchMemoryAllocation
 
 struct spvc_set_s : ScratchMemoryAllocation
 {
-	std::unordered_set<uint32_t> set;
+	std::unordered_set<VariableID> set;
 };
 
 // Dummy-inherit to we can keep our opaque type handle type safe in C-land as well,
@@ -187,11 +194,15 @@ struct spvc_resources_s : ScratchMemoryAllocation
 	SmallVector<spvc_reflected_resource> sampled_images;
 	SmallVector<spvc_reflected_resource> atomic_counters;
 	SmallVector<spvc_reflected_resource> push_constant_buffers;
+	SmallVector<spvc_reflected_resource> shader_record_buffers;
 	SmallVector<spvc_reflected_resource> separate_images;
 	SmallVector<spvc_reflected_resource> separate_samplers;
 	SmallVector<spvc_reflected_resource> acceleration_structures;
+	SmallVector<spvc_reflected_builtin_resource> builtin_inputs;
+	SmallVector<spvc_reflected_builtin_resource> builtin_outputs;
 
 	bool copy_resources(SmallVector<spvc_reflected_resource> &outputs, const SmallVector<Resource> &inputs);
+	bool copy_resources(SmallVector<spvc_reflected_builtin_resource> &outputs, const SmallVector<BuiltInResource> &inputs);
 	bool copy_resources(const ShaderResources &resources);
 };
 
@@ -241,7 +252,7 @@ spvc_result spvc_context_parse_spirv(spvc_context context, const SpvId *spirv, s
 		pir->context = context;
 		Parser parser(spirv, word_count);
 		parser.parse();
-		pir->parsed = move(parser.get_parsed_ir());
+		pir->parsed = std::move(parser.get_parsed_ir());
 		*parsed_ir = pir.get();
 		context->allocations.push_back(std::move(pir));
 	}
@@ -273,7 +284,7 @@ spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend back
 		{
 		case SPVC_BACKEND_NONE:
 			if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP)
-				comp->compiler.reset(new Compiler(move(parsed_ir->parsed)));
+				comp->compiler.reset(new Compiler(std::move(parsed_ir->parsed)));
 			else if (mode == SPVC_CAPTURE_MODE_COPY)
 				comp->compiler.reset(new Compiler(parsed_ir->parsed));
 			break;
@@ -281,7 +292,7 @@ spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend back
 #if SPIRV_CROSS_C_API_GLSL
 		case SPVC_BACKEND_GLSL:
 			if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP)
-				comp->compiler.reset(new CompilerGLSL(move(parsed_ir->parsed)));
+				comp->compiler.reset(new CompilerGLSL(std::move(parsed_ir->parsed)));
 			else if (mode == SPVC_CAPTURE_MODE_COPY)
 				comp->compiler.reset(new CompilerGLSL(parsed_ir->parsed));
 			break;
@@ -290,7 +301,7 @@ spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend back
 #if SPIRV_CROSS_C_API_HLSL
 		case SPVC_BACKEND_HLSL:
 			if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP)
-				comp->compiler.reset(new CompilerHLSL(move(parsed_ir->parsed)));
+				comp->compiler.reset(new CompilerHLSL(std::move(parsed_ir->parsed)));
 			else if (mode == SPVC_CAPTURE_MODE_COPY)
 				comp->compiler.reset(new CompilerHLSL(parsed_ir->parsed));
 			break;
@@ -299,7 +310,7 @@ spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend back
 #if SPIRV_CROSS_C_API_MSL
 		case SPVC_BACKEND_MSL:
 			if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP)
-				comp->compiler.reset(new CompilerMSL(move(parsed_ir->parsed)));
+				comp->compiler.reset(new CompilerMSL(std::move(parsed_ir->parsed)));
 			else if (mode == SPVC_CAPTURE_MODE_COPY)
 				comp->compiler.reset(new CompilerMSL(parsed_ir->parsed));
 			break;
@@ -308,7 +319,7 @@ spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend back
 #if SPIRV_CROSS_C_API_CPP
 		case SPVC_BACKEND_CPP:
 			if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP)
-				comp->compiler.reset(new CompilerCPP(move(parsed_ir->parsed)));
+				comp->compiler.reset(new CompilerCPP(std::move(parsed_ir->parsed)));
 			else if (mode == SPVC_CAPTURE_MODE_COPY)
 				comp->compiler.reset(new CompilerCPP(parsed_ir->parsed));
 			break;
@@ -317,7 +328,7 @@ spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend back
 #if SPIRV_CROSS_C_API_REFLECT
 		case SPVC_BACKEND_JSON:
 			if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP)
-				comp->compiler.reset(new CompilerReflection(move(parsed_ir->parsed)));
+				comp->compiler.reset(new CompilerReflection(std::move(parsed_ir->parsed)));
 			else if (mode == SPVC_CAPTURE_MODE_COPY)
 				comp->compiler.reset(new CompilerReflection(parsed_ir->parsed));
 			break;
@@ -420,6 +431,12 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
 	case SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES:
 		options->glsl.emit_line_directives = value != 0;
 		break;
+	case SPVC_COMPILER_OPTION_ENABLE_STORAGE_IMAGE_QUALIFIER_DEDUCTION:
+		options->glsl.enable_storage_image_qualifier_deduction = value != 0;
+		break;
+	case SPVC_COMPILER_OPTION_FORCE_ZERO_INITIALIZED_VARIABLES:
+		options->glsl.force_zero_initialized_variables = value != 0;
+		break;
 
 	case SPVC_COMPILER_OPTION_GLSL_SUPPORT_NONZERO_BASE_INSTANCE:
 		options->glsl.vertex.support_nonzero_base_instance = value != 0;
@@ -453,6 +470,18 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
 	case SPVC_COMPILER_OPTION_GLSL_EMIT_UNIFORM_BUFFER_AS_PLAIN_UNIFORMS:
 		options->glsl.emit_uniform_buffer_as_plain_uniforms = value != 0;
 		break;
+	case SPVC_COMPILER_OPTION_GLSL_FORCE_FLATTENED_IO_BLOCKS:
+		options->glsl.force_flattened_io_blocks = value != 0;
+		break;
+	case SPVC_COMPILER_OPTION_GLSL_OVR_MULTIVIEW_VIEW_COUNT:
+		options->glsl.ovr_multiview_view_count = value;
+		break;
+	case SPVC_COMPILER_OPTION_RELAX_NAN_CHECKS:
+		options->glsl.relax_nan_checks = value != 0;
+		break;
+	case SPVC_COMPILER_OPTION_GLSL_ENABLE_ROW_MAJOR_LOAD_WORKAROUND:
+		options->glsl.enable_row_major_load_workaround = value != 0;
+		break;
 #endif
 
 #if SPIRV_CROSS_C_API_HLSL
@@ -471,6 +500,22 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
 	case SPVC_COMPILER_OPTION_HLSL_SUPPORT_NONZERO_BASE_VERTEX_BASE_INSTANCE:
 		options->hlsl.support_nonzero_base_vertex_base_instance = value != 0;
 		break;
+
+	case SPVC_COMPILER_OPTION_HLSL_FORCE_STORAGE_BUFFER_AS_UAV:
+		options->hlsl.force_storage_buffer_as_uav = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_HLSL_NONWRITABLE_UAV_TEXTURE_AS_SRV:
+		options->hlsl.nonwritable_uav_texture_as_srv = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_HLSL_ENABLE_16BIT_TYPES:
+		options->hlsl.enable_16bit_types = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_HLSL_FLATTEN_MATRIX_VERTEX_INPUT_SEMANTICS:
+		options->hlsl.flatten_matrix_vertex_input_semantics = value != 0;
+		break;
 #endif
 
 #if SPIRV_CROSS_C_API_MSL
@@ -545,6 +590,154 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
 	case SPVC_COMPILER_OPTION_MSL_BUFFER_SIZE_BUFFER_INDEX:
 		options->msl.buffer_size_buffer_index = value;
 		break;
+
+	case SPVC_COMPILER_OPTION_MSL_MULTIVIEW:
+		options->msl.multiview = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX:
+		options->msl.view_mask_buffer_index = value;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX:
+		options->msl.device_index = value;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX:
+		options->msl.view_index_from_device_index = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE:
+		options->msl.dispatch_base = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX:
+		options->msl.dynamic_offsets_buffer_index = value;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D:
+		options->msl.texture_1D_as_2D = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO:
+		options->msl.enable_base_index_zero = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS:
+		options->msl.use_framebuffer_fetch_subpasses = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH:
+		options->msl.invariant_float_math = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY:
+		options->msl.emulate_cube_array = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING:
+		options->msl.enable_decoration_binding = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_FORCE_ACTIVE_ARGUMENT_BUFFER_RESOURCES:
+		options->msl.force_active_argument_buffer_resources = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_FORCE_NATIVE_ARRAYS:
+		options->msl.force_native_arrays = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_OUTPUT_MASK:
+		options->msl.enable_frag_output_mask = value;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_DEPTH_BUILTIN:
+		options->msl.enable_frag_depth_builtin = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_STENCIL_REF_BUILTIN:
+		options->msl.enable_frag_stencil_ref_builtin = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_ENABLE_CLIP_DISTANCE_USER_VARYING:
+		options->msl.enable_clip_distance_user_varying = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_MULTI_PATCH_WORKGROUP:
+		options->msl.multi_patch_workgroup = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_BUFFER_INDEX:
+		options->msl.shader_input_buffer_index = value;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_SHADER_INDEX_BUFFER_INDEX:
+		options->msl.shader_index_buffer_index = value;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_VERTEX_FOR_TESSELLATION:
+		options->msl.vertex_for_tessellation = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_VERTEX_INDEX_TYPE:
+		options->msl.vertex_index_type = static_cast<CompilerMSL::Options::IndexType>(value);
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_MULTIVIEW_LAYERED_RENDERING:
+		options->msl.multiview_layered_rendering = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_ARRAYED_SUBPASS_INPUT:
+		options->msl.arrayed_subpass_input = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_R32UI_LINEAR_TEXTURE_ALIGNMENT:
+		options->msl.r32ui_linear_texture_alignment = value;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_R32UI_ALIGNMENT_CONSTANT_ID:
+		options->msl.r32ui_alignment_constant_id = value;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_IOS_USE_SIMDGROUP_FUNCTIONS:
+		options->msl.ios_use_simdgroup_functions = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_EMULATE_SUBGROUPS:
+		options->msl.emulate_subgroups = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_FIXED_SUBGROUP_SIZE:
+		options->msl.fixed_subgroup_size = value;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_FORCE_SAMPLE_RATE_SHADING:
+		options->msl.force_sample_rate_shading = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_IOS_SUPPORT_BASE_VERTEX_INSTANCE:
+		options->msl.ios_support_base_vertex_instance = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_RAW_BUFFER_TESE_INPUT:
+		options->msl.raw_buffer_tese_input = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_INPUT_BUFFER_INDEX:
+		options->msl.shader_patch_input_buffer_index = value;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_MANUAL_HELPER_INVOCATION_UPDATES:
+		options->msl.manual_helper_invocation_updates = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_CHECK_DISCARDED_FRAG_STORES:
+		options->msl.check_discarded_frag_stores = value != 0;
+		break;
+
+	case SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS_TIER:
+		options->msl.argument_buffers_tier = static_cast<CompilerMSL::Options::ArgumentBuffersTier>(value);
+		break;
 #endif
 
 	default:
@@ -641,6 +834,61 @@ spvc_result spvc_compiler_flatten_buffer_block(spvc_compiler compiler, spvc_vari
 #endif
 }
 
+spvc_bool spvc_compiler_variable_is_depth_or_compare(spvc_compiler compiler, spvc_variable_id id)
+{
+#if SPIRV_CROSS_C_API_GLSL
+	if (compiler->backend == SPVC_BACKEND_NONE)
+	{
+		compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	return static_cast<CompilerGLSL *>(compiler->compiler.get())->variable_is_depth_or_compare(id) ? SPVC_TRUE : SPVC_FALSE;
+#else
+	(void)id;
+	compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection.");
+	return SPVC_FALSE;
+#endif
+}
+
+spvc_result spvc_compiler_mask_stage_output_by_location(spvc_compiler compiler,
+                                                        unsigned location, unsigned component)
+{
+#if SPIRV_CROSS_C_API_GLSL
+	if (compiler->backend == SPVC_BACKEND_NONE)
+	{
+		compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	static_cast<CompilerGLSL *>(compiler->compiler.get())->mask_stage_output_by_location(location, component);
+	return SPVC_SUCCESS;
+#else
+	(void)location;
+	(void)component;
+	compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_result spvc_compiler_mask_stage_output_by_builtin(spvc_compiler compiler, SpvBuiltIn builtin)
+{
+#if SPIRV_CROSS_C_API_GLSL
+	if (compiler->backend == SPVC_BACKEND_NONE)
+	{
+		compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	static_cast<CompilerGLSL *>(compiler->compiler.get())->mask_stage_output_by_builtin(spv::BuiltIn(builtin));
+	return SPVC_SUCCESS;
+#else
+	(void)builtin;
+	compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
 spvc_result spvc_compiler_hlsl_set_root_constants_layout(spvc_compiler compiler,
                                                          const spvc_hlsl_root_constants *constant_info,
                                                          size_t count)
@@ -721,6 +969,80 @@ spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin(spvc_compiler c
 #endif
 }
 
+spvc_result spvc_compiler_hlsl_set_resource_binding_flags(spvc_compiler compiler,
+                                                          spvc_hlsl_binding_flags flags)
+{
+#if SPIRV_CROSS_C_API_HLSL
+	if (compiler->backend != SPVC_BACKEND_HLSL)
+	{
+		compiler->context->report_error("HLSL function used on a non-HLSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &hlsl = *static_cast<CompilerHLSL *>(compiler->compiler.get());
+	hlsl.set_resource_binding_flags(flags);
+	return SPVC_SUCCESS;
+#else
+	(void)flags;
+	compiler->context->report_error("HLSL function used on a non-HLSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_result spvc_compiler_hlsl_add_resource_binding(spvc_compiler compiler,
+                                                    const spvc_hlsl_resource_binding *binding)
+{
+#if SPIRV_CROSS_C_API_HLSL
+	if (compiler->backend != SPVC_BACKEND_HLSL)
+	{
+		compiler->context->report_error("HLSL function used on a non-HLSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &hlsl = *static_cast<CompilerHLSL *>(compiler->compiler.get());
+	HLSLResourceBinding bind;
+	bind.binding = binding->binding;
+	bind.desc_set = binding->desc_set;
+	bind.stage = static_cast<spv::ExecutionModel>(binding->stage);
+	bind.cbv.register_binding = binding->cbv.register_binding;
+	bind.cbv.register_space = binding->cbv.register_space;
+	bind.uav.register_binding = binding->uav.register_binding;
+	bind.uav.register_space = binding->uav.register_space;
+	bind.srv.register_binding = binding->srv.register_binding;
+	bind.srv.register_space = binding->srv.register_space;
+	bind.sampler.register_binding = binding->sampler.register_binding;
+	bind.sampler.register_space = binding->sampler.register_space;
+	hlsl.add_hlsl_resource_binding(bind);
+	return SPVC_SUCCESS;
+#else
+	(void)binding;
+	compiler->context->report_error("HLSL function used on a non-HLSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_bool spvc_compiler_hlsl_is_resource_used(spvc_compiler compiler, SpvExecutionModel model, unsigned set,
+                                              unsigned binding)
+{
+#if SPIRV_CROSS_C_API_HLSL
+	if (compiler->backend != SPVC_BACKEND_HLSL)
+	{
+		compiler->context->report_error("HLSL function used on a non-HLSL backend.");
+		return SPVC_FALSE;
+	}
+
+	auto &hlsl = *static_cast<CompilerHLSL *>(compiler->compiler.get());
+	return hlsl.is_hlsl_resource_binding_used(static_cast<spv::ExecutionModel>(model), set, binding) ? SPVC_TRUE :
+	       SPVC_FALSE;
+#else
+	(void)model;
+	(void)set;
+	(void)binding;
+	compiler->context->report_error("HLSL function used on a non-HLSL backend.");
+	return SPVC_FALSE;
+#endif
+}
+
 spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler)
 {
 #if SPIRV_CROSS_C_API_MSL
@@ -838,15 +1160,11 @@ spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, const
 	}
 
 	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
-	MSLVertexAttr attr;
+	MSLShaderInterfaceVariable attr;
 	attr.location = va->location;
-	attr.msl_buffer = va->msl_buffer;
-	attr.msl_offset = va->msl_offset;
-	attr.msl_stride = va->msl_stride;
-	attr.format = static_cast<MSLVertexFormat>(va->format);
+	attr.format = static_cast<MSLShaderVariableFormat>(va->format);
 	attr.builtin = static_cast<spv::BuiltIn>(va->builtin);
-	attr.per_instance = va->per_instance;
-	msl.add_msl_vertex_attribute(attr);
+	msl.add_msl_shader_input(attr);
 	return SPVC_SUCCESS;
 #else
 	(void)va;
@@ -855,6 +1173,104 @@ spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, const
 #endif
 }
 
+spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, const spvc_msl_shader_interface_var *si)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	MSLShaderInterfaceVariable input;
+	input.location = si->location;
+	input.format = static_cast<MSLShaderVariableFormat>(si->format);
+	input.builtin = static_cast<spv::BuiltIn>(si->builtin);
+	input.vecsize = si->vecsize;
+	msl.add_msl_shader_input(input);
+	return SPVC_SUCCESS;
+#else
+	(void)si;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_result spvc_compiler_msl_add_shader_input_2(spvc_compiler compiler, const spvc_msl_shader_interface_var_2 *si)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	MSLShaderInterfaceVariable input;
+	input.location = si->location;
+	input.format = static_cast<MSLShaderVariableFormat>(si->format);
+	input.builtin = static_cast<spv::BuiltIn>(si->builtin);
+	input.vecsize = si->vecsize;
+	input.rate = static_cast<MSLShaderVariableRate>(si->rate);
+	msl.add_msl_shader_input(input);
+	return SPVC_SUCCESS;
+#else
+	(void)si;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_result spvc_compiler_msl_add_shader_output(spvc_compiler compiler, const spvc_msl_shader_interface_var *so)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	MSLShaderInterfaceVariable output;
+	output.location = so->location;
+	output.format = static_cast<MSLShaderVariableFormat>(so->format);
+	output.builtin = static_cast<spv::BuiltIn>(so->builtin);
+	output.vecsize = so->vecsize;
+	msl.add_msl_shader_output(output);
+	return SPVC_SUCCESS;
+#else
+	(void)so;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_result spvc_compiler_msl_add_shader_output_2(spvc_compiler compiler, const spvc_msl_shader_interface_var_2 *so)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	MSLShaderInterfaceVariable output;
+	output.location = so->location;
+	output.format = static_cast<MSLShaderVariableFormat>(so->format);
+	output.builtin = static_cast<spv::BuiltIn>(so->builtin);
+	output.vecsize = so->vecsize;
+	output.rate = static_cast<MSLShaderVariableRate>(so->rate);
+	msl.add_msl_shader_output(output);
+	return SPVC_SUCCESS;
+#else
+	(void)so;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
 spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler,
                                                    const spvc_msl_resource_binding *binding)
 {
@@ -882,6 +1298,47 @@ spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler,
 #endif
 }
 
+spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	msl.add_dynamic_buffer(desc_set, binding, index);
+	return SPVC_SUCCESS;
+#else
+	(void)binding;
+	(void)desc_set;
+	(void)index;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_result spvc_compiler_msl_add_inline_uniform_block(spvc_compiler compiler, unsigned desc_set, unsigned binding)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	msl.add_inline_uniform_block(desc_set, binding);
+	return SPVC_SUCCESS;
+#else
+	(void)binding;
+	(void)desc_set;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
 spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set)
 {
 #if SPIRV_CROSS_C_API_MSL
@@ -901,7 +1358,27 @@ spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler
 #endif
 }
 
-spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location)
+spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	msl.set_argument_buffer_device_address_space(desc_set, bool(device_address));
+	return SPVC_SUCCESS;
+#else
+	(void)desc_set;
+	(void)device_address;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_bool spvc_compiler_msl_is_shader_input_used(spvc_compiler compiler, unsigned location)
 {
 #if SPIRV_CROSS_C_API_MSL
 	if (compiler->backend != SPVC_BACKEND_MSL)
@@ -911,7 +1388,7 @@ spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, uns
 	}
 
 	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
-	return msl.is_msl_vertex_attribute_used(location) ? SPVC_TRUE : SPVC_FALSE;
+	return msl.is_msl_shader_input_used(location) ? SPVC_TRUE : SPVC_FALSE;
 #else
 	(void)location;
 	compiler->context->report_error("MSL function used on a non-MSL backend.");
@@ -919,6 +1396,29 @@ spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, uns
 #endif
 }
 
+spvc_bool spvc_compiler_msl_is_shader_output_used(spvc_compiler compiler, unsigned location)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_FALSE;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	return msl.is_msl_shader_output_used(location) ? SPVC_TRUE : SPVC_FALSE;
+#else
+	(void)location;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_FALSE;
+#endif
+}
+
+spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location)
+{
+	return spvc_compiler_msl_is_shader_input_used(compiler, location);
+}
+
 spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, SpvExecutionModel model, unsigned set,
                                              unsigned binding)
 {
@@ -941,6 +1441,42 @@ spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, SpvExecutio
 #endif
 }
 
+spvc_result spvc_compiler_msl_set_combined_sampler_suffix(spvc_compiler compiler, const char *suffix)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	msl.set_combined_sampler_suffix(suffix);
+	return SPVC_SUCCESS;
+#else
+	(void)suffix;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+const char *spvc_compiler_msl_get_combined_sampler_suffix(spvc_compiler compiler)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return "";
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	return msl.get_combined_sampler_suffix();
+#else
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return "";
+#endif
+}
+
 #if SPIRV_CROSS_C_API_MSL
 static void spvc_convert_msl_sampler(MSLConstexprSampler &samp, const spvc_msl_constexpr_sampler *sampler)
 {
@@ -949,17 +1485,33 @@ static void spvc_convert_msl_sampler(MSLConstexprSampler &samp, const spvc_msl_c
 	samp.r_address = static_cast<MSLSamplerAddress>(sampler->r_address);
 	samp.lod_clamp_min = sampler->lod_clamp_min;
 	samp.lod_clamp_max = sampler->lod_clamp_max;
-	samp.lod_clamp_enable = sampler->lod_clamp_enable;
+	samp.lod_clamp_enable = sampler->lod_clamp_enable != 0;
 	samp.min_filter = static_cast<MSLSamplerFilter>(sampler->min_filter);
 	samp.mag_filter = static_cast<MSLSamplerFilter>(sampler->mag_filter);
 	samp.mip_filter = static_cast<MSLSamplerMipFilter>(sampler->mip_filter);
-	samp.compare_enable = sampler->compare_enable;
-	samp.anisotropy_enable = sampler->anisotropy_enable;
+	samp.compare_enable = sampler->compare_enable != 0;
+	samp.anisotropy_enable = sampler->anisotropy_enable != 0;
 	samp.max_anisotropy = sampler->max_anisotropy;
 	samp.compare_func = static_cast<MSLSamplerCompareFunc>(sampler->compare_func);
 	samp.coord = static_cast<MSLSamplerCoord>(sampler->coord);
 	samp.border_color = static_cast<MSLSamplerBorderColor>(sampler->border_color);
 }
+
+static void spvc_convert_msl_sampler_ycbcr_conversion(MSLConstexprSampler &samp, const spvc_msl_sampler_ycbcr_conversion *conv)
+{
+	samp.ycbcr_conversion_enable = conv != nullptr;
+	if (conv == nullptr) return;
+	samp.planes = conv->planes;
+	samp.resolution = static_cast<MSLFormatResolution>(conv->resolution);
+	samp.chroma_filter = static_cast<MSLSamplerFilter>(conv->chroma_filter);
+	samp.x_chroma_offset = static_cast<MSLChromaLocation>(conv->x_chroma_offset);
+	samp.y_chroma_offset = static_cast<MSLChromaLocation>(conv->y_chroma_offset);
+	for (int i = 0; i < 4; i++)
+		samp.swizzle[i] = static_cast<MSLComponentSwizzle>(conv->swizzle[i]);
+	samp.ycbcr_model = static_cast<MSLSamplerYCbCrModelConversion>(conv->ycbcr_model);
+	samp.ycbcr_range = static_cast<MSLSamplerYCbCrRange>(conv->ycbcr_range);
+	samp.bpc = conv->bpc;
+}
 #endif
 
 spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id,
@@ -1010,6 +1562,60 @@ spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler c
 #endif
 }
 
+spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id,
+                                                            const spvc_msl_constexpr_sampler *sampler,
+                                                            const spvc_msl_sampler_ycbcr_conversion *conv)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	MSLConstexprSampler samp;
+	spvc_convert_msl_sampler(samp, sampler);
+	spvc_convert_msl_sampler_ycbcr_conversion(samp, conv);
+	msl.remap_constexpr_sampler(id, samp);
+	return SPVC_SUCCESS;
+#else
+	(void)id;
+	(void)sampler;
+	(void)conv;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
+spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler,
+                                                                       unsigned desc_set, unsigned binding,
+                                                                       const spvc_msl_constexpr_sampler *sampler,
+                                                                       const spvc_msl_sampler_ycbcr_conversion *conv)
+{
+#if SPIRV_CROSS_C_API_MSL
+	if (compiler->backend != SPVC_BACKEND_MSL)
+	{
+		compiler->context->report_error("MSL function used on a non-MSL backend.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
+	MSLConstexprSampler samp;
+	spvc_convert_msl_sampler(samp, sampler);
+	spvc_convert_msl_sampler_ycbcr_conversion(samp, conv);
+	msl.remap_constexpr_sampler_by_binding(desc_set, binding, samp);
+	return SPVC_SUCCESS;
+#else
+	(void)desc_set;
+	(void)binding;
+	(void)sampler;
+	(void)conv;
+	compiler->context->report_error("MSL function used on a non-MSL backend.");
+	return SPVC_ERROR_INVALID_ARGUMENT;
+#endif
+}
+
 spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location,
                                                              unsigned components)
 {
@@ -1108,6 +1714,30 @@ bool spvc_resources_s::copy_resources(SmallVector<spvc_reflected_resource> &outp
 	return true;
 }
 
+bool spvc_resources_s::copy_resources(SmallVector<spvc_reflected_builtin_resource> &outputs,
+                                      const SmallVector<BuiltInResource> &inputs)
+{
+	for (auto &i : inputs)
+	{
+		spvc_reflected_builtin_resource br;
+
+		br.value_type_id = i.value_type_id;
+		br.builtin = SpvBuiltIn(i.builtin);
+
+		auto &r = br.resource;
+		r.base_type_id = i.resource.base_type_id;
+		r.type_id = i.resource.type_id;
+		r.id = i.resource.id;
+		r.name = context->allocate_name(i.resource.name);
+		if (!r.name)
+			return false;
+
+		outputs.push_back(br);
+	}
+
+	return true;
+}
+
 bool spvc_resources_s::copy_resources(const ShaderResources &resources)
 {
 	if (!copy_resources(uniform_buffers, resources.uniform_buffers))
@@ -1128,12 +1758,18 @@ bool spvc_resources_s::copy_resources(const ShaderResources &resources)
 		return false;
 	if (!copy_resources(push_constant_buffers, resources.push_constant_buffers))
 		return false;
+	if (!copy_resources(shader_record_buffers, resources.shader_record_buffers))
+		return false;
 	if (!copy_resources(separate_images, resources.separate_images))
 		return false;
 	if (!copy_resources(separate_samplers, resources.separate_samplers))
 		return false;
 	if (!copy_resources(acceleration_structures, resources.acceleration_structures))
 		return false;
+	if (!copy_resources(builtin_inputs, resources.builtin_inputs))
+		return false;
+	if (!copy_resources(builtin_outputs, resources.builtin_outputs))
+		return false;
 
 	return true;
 }
@@ -1277,6 +1913,41 @@ spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources,
 		list = &resources->acceleration_structures;
 		break;
 
+	case SPVC_RESOURCE_TYPE_SHADER_RECORD_BUFFER:
+		list = &resources->shader_record_buffers;
+		break;
+
+	default:
+		break;
+	}
+
+	if (!list)
+	{
+		resources->context->report_error("Invalid argument.");
+		return SPVC_ERROR_INVALID_ARGUMENT;
+	}
+
+	*resource_size = list->size();
+	*resource_list = list->data();
+	return SPVC_SUCCESS;
+}
+
+spvc_result spvc_resources_get_builtin_resource_list_for_type(
+		spvc_resources resources, spvc_builtin_resource_type type,
+		const spvc_reflected_builtin_resource **resource_list,
+		size_t *resource_size)
+{
+	const SmallVector<spvc_reflected_builtin_resource> *list = nullptr;
+	switch (type)
+	{
+	case SPVC_BUILTIN_RESOURCE_TYPE_STAGE_INPUT:
+		list = &resources->builtin_inputs;
+		break;
+
+	case SPVC_BUILTIN_RESOURCE_TYPE_STAGE_OUTPUT:
+		list = &resources->builtin_outputs;
+		break;
+
 	default:
 		break;
 	}
@@ -1494,6 +2165,18 @@ SpvExecutionModel spvc_compiler_get_execution_model(spvc_compiler compiler)
 	return static_cast<SpvExecutionModel>(compiler->compiler->get_execution_model());
 }
 
+void spvc_compiler_update_active_builtins(spvc_compiler compiler)
+{
+       compiler->compiler->update_active_builtins();
+}
+
+spvc_bool spvc_compiler_has_active_builtin(spvc_compiler compiler, SpvBuiltIn builtin, SpvStorageClass storage)
+{
+	return compiler->compiler->has_active_builtin(static_cast<spv::BuiltIn>(builtin), static_cast<spv::StorageClass>(storage)) ?
+		SPVC_TRUE :
+		SPVC_FALSE;
+}
+
 spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id)
 {
 	// Should only throw if an intentionally garbage ID is passed, but the IDs are not type-safe.
@@ -1504,6 +2187,11 @@ spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id)
 	SPVC_END_SAFE_SCOPE(compiler->context, nullptr)
 }
 
+spvc_type_id spvc_type_get_base_type_id(spvc_type type)
+{
+	return type->self;
+}
+
 static spvc_basetype convert_basetype(SPIRType::BaseType type)
 {
 	// For now the enums match up.
@@ -1945,19 +2633,47 @@ void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr)
 {
 #if SPIRV_CROSS_C_API_MSL
 	// Crude, but works.
-	MSLVertexAttr attr_default;
+	MSLShaderInterfaceVariable attr_default;
 	attr->location = attr_default.location;
-	attr->per_instance = attr_default.per_instance ? SPVC_TRUE : SPVC_FALSE;
 	attr->format = static_cast<spvc_msl_vertex_format>(attr_default.format);
 	attr->builtin = static_cast<SpvBuiltIn>(attr_default.builtin);
-	attr->msl_buffer = attr_default.msl_buffer;
-	attr->msl_offset = attr_default.msl_offset;
-	attr->msl_stride = attr_default.msl_stride;
 #else
 	memset(attr, 0, sizeof(*attr));
 #endif
 }
 
+void spvc_msl_shader_interface_var_init(spvc_msl_shader_interface_var *var)
+{
+#if SPIRV_CROSS_C_API_MSL
+	MSLShaderInterfaceVariable var_default;
+	var->location = var_default.location;
+	var->format = static_cast<spvc_msl_shader_variable_format>(var_default.format);
+	var->builtin = static_cast<SpvBuiltIn>(var_default.builtin);
+	var->vecsize = var_default.vecsize;
+#else
+	memset(var, 0, sizeof(*var));
+#endif
+}
+
+void spvc_msl_shader_input_init(spvc_msl_shader_input *input)
+{
+	spvc_msl_shader_interface_var_init(input);
+}
+
+void spvc_msl_shader_interface_var_init_2(spvc_msl_shader_interface_var_2 *var)
+{
+#if SPIRV_CROSS_C_API_MSL
+	MSLShaderInterfaceVariable var_default;
+	var->location = var_default.location;
+	var->format = static_cast<spvc_msl_shader_variable_format>(var_default.format);
+	var->builtin = static_cast<SpvBuiltIn>(var_default.builtin);
+	var->vecsize = var_default.vecsize;
+	var->rate = static_cast<spvc_msl_shader_variable_rate>(var_default.rate);
+#else
+	memset(var, 0, sizeof(*var));
+#endif
+}
+
 void spvc_msl_resource_binding_init(spvc_msl_resource_binding *binding)
 {
 #if SPIRV_CROSS_C_API_MSL
@@ -1973,6 +2689,26 @@ void spvc_msl_resource_binding_init(spvc_msl_resource_binding *binding)
 #endif
 }
 
+void spvc_hlsl_resource_binding_init(spvc_hlsl_resource_binding *binding)
+{
+#if SPIRV_CROSS_C_API_HLSL
+	HLSLResourceBinding binding_default;
+	binding->desc_set = binding_default.desc_set;
+	binding->binding = binding_default.binding;
+	binding->cbv.register_binding = binding_default.cbv.register_binding;
+	binding->cbv.register_space = binding_default.cbv.register_space;
+	binding->srv.register_binding = binding_default.srv.register_binding;
+	binding->srv.register_space = binding_default.srv.register_space;
+	binding->uav.register_binding = binding_default.uav.register_binding;
+	binding->uav.register_space = binding_default.uav.register_space;
+	binding->sampler.register_binding = binding_default.sampler.register_binding;
+	binding->sampler.register_space = binding_default.sampler.register_space;
+	binding->stage = static_cast<SpvExecutionModel>(binding_default.stage);
+#else
+	memset(binding, 0, sizeof(*binding));
+#endif
+}
+
 void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler)
 {
 #if SPIRV_CROSS_C_API_MSL
@@ -1997,6 +2733,24 @@ void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler)
 #endif
 }
 
+void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv)
+{
+#if SPIRV_CROSS_C_API_MSL
+	MSLConstexprSampler defaults;
+	conv->planes = defaults.planes;
+	conv->resolution = static_cast<spvc_msl_format_resolution>(defaults.resolution);
+	conv->chroma_filter = static_cast<spvc_msl_sampler_filter>(defaults.chroma_filter);
+	conv->x_chroma_offset = static_cast<spvc_msl_chroma_location>(defaults.x_chroma_offset);
+	conv->y_chroma_offset = static_cast<spvc_msl_chroma_location>(defaults.y_chroma_offset);
+	for (int i = 0; i < 4; i++)
+		conv->swizzle[i] = static_cast<spvc_msl_component_swizzle>(defaults.swizzle[i]);
+	conv->ycbcr_model = static_cast<spvc_msl_sampler_ycbcr_model_conversion>(defaults.ycbcr_model);
+	conv->ycbcr_range = static_cast<spvc_msl_sampler_ycbcr_range>(defaults.ycbcr_range);
+#else
+	memset(conv, 0, sizeof(*conv));
+#endif
+}
+
 unsigned spvc_compiler_get_current_id_bound(spvc_compiler compiler)
 {
 	return compiler->compiler->get_current_id_bound();
diff --git a/spirv_cross_c.h b/spirv_cross_c.h
index 6efaf8543ed..826e25a7401 100644
--- a/spirv_cross_c.h
+++ b/spirv_cross_c.h
@@ -1,5 +1,6 @@
 /*
- * Copyright 2019 Hans-Kristian Arntzen
+ * Copyright 2019-2021 Hans-Kristian Arntzen
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_CROSS_C_API_H
 #define SPIRV_CROSS_C_API_H
 
@@ -33,7 +40,7 @@ extern "C" {
 /* Bumped if ABI or API breaks backwards compatibility. */
 #define SPVC_C_API_VERSION_MAJOR 0
 /* Bumped if APIs or enumerations are added in a backwards compatible way. */
-#define SPVC_C_API_VERSION_MINOR 16
+#define SPVC_C_API_VERSION_MINOR 54
 /* Bumped if internal implementation details change. */
 #define SPVC_C_API_VERSION_PATCH 0
 
@@ -92,6 +99,13 @@ typedef struct spvc_reflected_resource
 	const char *name;
 } spvc_reflected_resource;
 
+typedef struct spvc_reflected_builtin_resource
+{
+	SpvBuiltIn builtin;
+	spvc_type_id value_type_id;
+	spvc_reflected_resource resource;
+} spvc_reflected_builtin_resource;
+
 /* See C++ API. */
 typedef struct spvc_entry_point
 {
@@ -210,9 +224,19 @@ typedef enum spvc_resource_type
 	SPVC_RESOURCE_TYPE_SEPARATE_IMAGE = 10,
 	SPVC_RESOURCE_TYPE_SEPARATE_SAMPLERS = 11,
 	SPVC_RESOURCE_TYPE_ACCELERATION_STRUCTURE = 12,
+	SPVC_RESOURCE_TYPE_RAY_QUERY = 13,
+	SPVC_RESOURCE_TYPE_SHADER_RECORD_BUFFER = 14,
 	SPVC_RESOURCE_TYPE_INT_MAX = 0x7fffffff
 } spvc_resource_type;
 
+typedef enum spvc_builtin_resource_type
+{
+	SPVC_BUILTIN_RESOURCE_TYPE_UNKNOWN = 0,
+	SPVC_BUILTIN_RESOURCE_TYPE_STAGE_INPUT = 1,
+	SPVC_BUILTIN_RESOURCE_TYPE_STAGE_OUTPUT = 2,
+	SPVC_BUILTIN_RESOURCE_TYPE_INT_MAX = 0x7fffffff
+} spvc_builtin_resource_type;
+
 /* Maps to spirv_cross::SPIRType::BaseType. */
 typedef enum spvc_basetype
 {
@@ -258,21 +282,51 @@ typedef enum spvc_msl_platform
 } spvc_msl_platform;
 
 /* Maps to C++ API. */
-typedef enum spvc_msl_vertex_format
+typedef enum spvc_msl_index_type
 {
-	SPVC_MSL_VERTEX_FORMAT_OTHER = 0,
-	SPVC_MSL_VERTEX_FORMAT_UINT8 = 1,
-	SPVC_MSL_VERTEX_FORMAT_UINT16 = 2
-} spvc_msl_vertex_format;
+	SPVC_MSL_INDEX_TYPE_NONE = 0,
+	SPVC_MSL_INDEX_TYPE_UINT16 = 1,
+	SPVC_MSL_INDEX_TYPE_UINT32 = 2,
+	SPVC_MSL_INDEX_TYPE_MAX_INT = 0x7fffffff
+} spvc_msl_index_type;
 
 /* Maps to C++ API. */
+typedef enum spvc_msl_shader_variable_format
+{
+	SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER = 0,
+	SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8 = 1,
+	SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16 = 2,
+	SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY16 = 3,
+	SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY32 = 4,
+
+	/* Deprecated names. */
+	SPVC_MSL_VERTEX_FORMAT_OTHER = SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER,
+	SPVC_MSL_VERTEX_FORMAT_UINT8 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8,
+	SPVC_MSL_VERTEX_FORMAT_UINT16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16,
+	SPVC_MSL_SHADER_INPUT_FORMAT_OTHER = SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER,
+	SPVC_MSL_SHADER_INPUT_FORMAT_UINT8 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8,
+	SPVC_MSL_SHADER_INPUT_FORMAT_UINT16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16,
+	SPVC_MSL_SHADER_INPUT_FORMAT_ANY16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY16,
+	SPVC_MSL_SHADER_INPUT_FORMAT_ANY32 = SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY32,
+
+
+	SPVC_MSL_SHADER_INPUT_FORMAT_INT_MAX = 0x7fffffff
+} spvc_msl_shader_variable_format, spvc_msl_shader_input_format, spvc_msl_vertex_format;
+
+/* Maps to C++ API. Deprecated; use spvc_msl_shader_interface_var. */
 typedef struct spvc_msl_vertex_attribute
 {
 	unsigned location;
+
+	/* Obsolete, do not use. Only lingers on for ABI compatibility. */
 	unsigned msl_buffer;
+	/* Obsolete, do not use. Only lingers on for ABI compatibility. */
 	unsigned msl_offset;
+	/* Obsolete, do not use. Only lingers on for ABI compatibility. */
 	unsigned msl_stride;
+	/* Obsolete, do not use. Only lingers on for ABI compatibility. */
 	spvc_bool per_instance;
+
 	spvc_msl_vertex_format format;
 	SpvBuiltIn builtin;
 } spvc_msl_vertex_attribute;
@@ -282,6 +336,50 @@ typedef struct spvc_msl_vertex_attribute
  */
 SPVC_PUBLIC_API void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr);
 
+/* Maps to C++ API. Deprecated; use spvc_msl_shader_interface_var_2. */
+typedef struct spvc_msl_shader_interface_var
+{
+	unsigned location;
+	spvc_msl_vertex_format format;
+	SpvBuiltIn builtin;
+	unsigned vecsize;
+} spvc_msl_shader_interface_var, spvc_msl_shader_input;
+
+/*
+ * Initializes the shader input struct.
+ * Deprecated. Use spvc_msl_shader_interface_var_init_2().
+ */
+SPVC_PUBLIC_API void spvc_msl_shader_interface_var_init(spvc_msl_shader_interface_var *var);
+/*
+ * Deprecated. Use spvc_msl_shader_interface_var_init_2().
+ */
+SPVC_PUBLIC_API void spvc_msl_shader_input_init(spvc_msl_shader_input *input);
+
+/* Maps to C++ API. */
+typedef enum spvc_msl_shader_variable_rate
+{
+	SPVC_MSL_SHADER_VARIABLE_RATE_PER_VERTEX = 0,
+	SPVC_MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE = 1,
+	SPVC_MSL_SHADER_VARIABLE_RATE_PER_PATCH = 2,
+
+	SPVC_MSL_SHADER_VARIABLE_RATE_INT_MAX = 0x7fffffff,
+} spvc_msl_shader_variable_rate;
+
+/* Maps to C++ API. */
+typedef struct spvc_msl_shader_interface_var_2
+{
+	unsigned location;
+	spvc_msl_shader_variable_format format;
+	SpvBuiltIn builtin;
+	unsigned vecsize;
+	spvc_msl_shader_variable_rate rate;
+} spvc_msl_shader_interface_var_2;
+
+/*
+ * Initializes the shader interface variable struct.
+ */
+SPVC_PUBLIC_API void spvc_msl_shader_interface_var_init_2(spvc_msl_shader_interface_var_2 *var);
+
 /* Maps to C++ API. */
 typedef struct spvc_msl_resource_binding
 {
@@ -370,6 +468,55 @@ typedef enum spvc_msl_sampler_border_color
 	SPVC_MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff
 } spvc_msl_sampler_border_color;
 
+/* Maps to C++ API. */
+typedef enum spvc_msl_format_resolution
+{
+	SPVC_MSL_FORMAT_RESOLUTION_444 = 0,
+	SPVC_MSL_FORMAT_RESOLUTION_422,
+	SPVC_MSL_FORMAT_RESOLUTION_420,
+	SPVC_MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff
+} spvc_msl_format_resolution;
+
+/* Maps to C++ API. */
+typedef enum spvc_msl_chroma_location
+{
+	SPVC_MSL_CHROMA_LOCATION_COSITED_EVEN = 0,
+	SPVC_MSL_CHROMA_LOCATION_MIDPOINT,
+	SPVC_MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff
+} spvc_msl_chroma_location;
+
+/* Maps to C++ API. */
+typedef enum spvc_msl_component_swizzle
+{
+	SPVC_MSL_COMPONENT_SWIZZLE_IDENTITY = 0,
+	SPVC_MSL_COMPONENT_SWIZZLE_ZERO,
+	SPVC_MSL_COMPONENT_SWIZZLE_ONE,
+	SPVC_MSL_COMPONENT_SWIZZLE_R,
+	SPVC_MSL_COMPONENT_SWIZZLE_G,
+	SPVC_MSL_COMPONENT_SWIZZLE_B,
+	SPVC_MSL_COMPONENT_SWIZZLE_A,
+	SPVC_MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff
+} spvc_msl_component_swizzle;
+
+/* Maps to C++ API. */
+typedef enum spvc_msl_sampler_ycbcr_model_conversion
+{
+	SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0,
+	SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY,
+	SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709,
+	SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601,
+	SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020,
+	SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff
+} spvc_msl_sampler_ycbcr_model_conversion;
+
+/* Maps to C+ API. */
+typedef enum spvc_msl_sampler_ycbcr_range
+{
+	SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0,
+	SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW,
+	SPVC_MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff
+} spvc_msl_sampler_ycbcr_range;
+
 /* Maps to C++ API. */
 typedef struct spvc_msl_constexpr_sampler
 {
@@ -397,6 +544,64 @@ typedef struct spvc_msl_constexpr_sampler
  */
 SPVC_PUBLIC_API void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler);
 
+/* Maps to the sampler Y'CbCr conversion-related portions of MSLConstexprSampler. See C++ API for defaults and details. */
+typedef struct spvc_msl_sampler_ycbcr_conversion
+{
+	unsigned planes;
+	spvc_msl_format_resolution resolution;
+	spvc_msl_sampler_filter chroma_filter;
+	spvc_msl_chroma_location x_chroma_offset;
+	spvc_msl_chroma_location y_chroma_offset;
+	spvc_msl_component_swizzle swizzle[4];
+	spvc_msl_sampler_ycbcr_model_conversion ycbcr_model;
+	spvc_msl_sampler_ycbcr_range ycbcr_range;
+	unsigned bpc;
+} spvc_msl_sampler_ycbcr_conversion;
+
+/*
+ * Initializes the constexpr sampler struct.
+ * The defaults are non-zero.
+ */
+SPVC_PUBLIC_API void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv);
+
+/* Maps to C++ API. */
+typedef enum spvc_hlsl_binding_flag_bits
+{
+	SPVC_HLSL_BINDING_AUTO_NONE_BIT = 0,
+	SPVC_HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT = 1 << 0,
+	SPVC_HLSL_BINDING_AUTO_CBV_BIT = 1 << 1,
+	SPVC_HLSL_BINDING_AUTO_SRV_BIT = 1 << 2,
+	SPVC_HLSL_BINDING_AUTO_UAV_BIT = 1 << 3,
+	SPVC_HLSL_BINDING_AUTO_SAMPLER_BIT = 1 << 4,
+	SPVC_HLSL_BINDING_AUTO_ALL = 0x7fffffff
+} spvc_hlsl_binding_flag_bits;
+typedef unsigned spvc_hlsl_binding_flags;
+
+#define SPVC_HLSL_PUSH_CONSTANT_DESC_SET (~(0u))
+#define SPVC_HLSL_PUSH_CONSTANT_BINDING (0)
+
+/* Maps to C++ API. */
+typedef struct spvc_hlsl_resource_binding_mapping
+{
+	unsigned register_space;
+	unsigned register_binding;
+} spvc_hlsl_resource_binding_mapping;
+
+typedef struct spvc_hlsl_resource_binding
+{
+	SpvExecutionModel stage;
+	unsigned desc_set;
+	unsigned binding;
+
+	spvc_hlsl_resource_binding_mapping cbv, uav, srv, sampler;
+} spvc_hlsl_resource_binding;
+
+/*
+ * Initializes the resource binding struct.
+ * The defaults are non-zero.
+ */
+SPVC_PUBLIC_API void spvc_hlsl_resource_binding_init(spvc_hlsl_resource_binding *binding);
+
 /* Maps to the various spirv_cross::Compiler*::Option structures. See C++ API for defaults and details. */
 typedef enum spvc_compiler_option
 {
@@ -452,6 +657,74 @@ typedef enum spvc_compiler_option
 
 	SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES = 37 | SPVC_COMPILER_OPTION_COMMON_BIT,
 
+	SPVC_COMPILER_OPTION_MSL_MULTIVIEW = 38 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX = 39 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX = 40 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX = 41 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE = 42 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D = 44 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO = 45 | SPVC_COMPILER_OPTION_MSL_BIT,
+
+	/* Obsolete. Use MSL_FRAMEBUFFER_FETCH_SUBPASS instead. */
+	SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT,
+
+	SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH = 47 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY = 48 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING = 49 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_FORCE_ACTIVE_ARGUMENT_BUFFER_RESOURCES = 50 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_FORCE_NATIVE_ARRAYS = 51 | SPVC_COMPILER_OPTION_MSL_BIT,
+
+	SPVC_COMPILER_OPTION_ENABLE_STORAGE_IMAGE_QUALIFIER_DEDUCTION = 52 | SPVC_COMPILER_OPTION_COMMON_BIT,
+
+	SPVC_COMPILER_OPTION_HLSL_FORCE_STORAGE_BUFFER_AS_UAV = 53 | SPVC_COMPILER_OPTION_HLSL_BIT,
+
+	SPVC_COMPILER_OPTION_FORCE_ZERO_INITIALIZED_VARIABLES = 54 | SPVC_COMPILER_OPTION_COMMON_BIT,
+
+	SPVC_COMPILER_OPTION_HLSL_NONWRITABLE_UAV_TEXTURE_AS_SRV = 55 | SPVC_COMPILER_OPTION_HLSL_BIT,
+
+	SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_OUTPUT_MASK = 56 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_DEPTH_BUILTIN = 57 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_STENCIL_REF_BUILTIN = 58 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_ENABLE_CLIP_DISTANCE_USER_VARYING = 59 | SPVC_COMPILER_OPTION_MSL_BIT,
+
+	SPVC_COMPILER_OPTION_HLSL_ENABLE_16BIT_TYPES = 60 | SPVC_COMPILER_OPTION_HLSL_BIT,
+
+	SPVC_COMPILER_OPTION_MSL_MULTI_PATCH_WORKGROUP = 61 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_BUFFER_INDEX = 62 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_SHADER_INDEX_BUFFER_INDEX = 63 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_VERTEX_FOR_TESSELLATION = 64 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_VERTEX_INDEX_TYPE = 65 | SPVC_COMPILER_OPTION_MSL_BIT,
+
+	SPVC_COMPILER_OPTION_GLSL_FORCE_FLATTENED_IO_BLOCKS = 66 | SPVC_COMPILER_OPTION_GLSL_BIT,
+
+	SPVC_COMPILER_OPTION_MSL_MULTIVIEW_LAYERED_RENDERING = 67 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_ARRAYED_SUBPASS_INPUT = 68 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_R32UI_LINEAR_TEXTURE_ALIGNMENT = 69 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_R32UI_ALIGNMENT_CONSTANT_ID = 70 | SPVC_COMPILER_OPTION_MSL_BIT,
+
+	SPVC_COMPILER_OPTION_HLSL_FLATTEN_MATRIX_VERTEX_INPUT_SEMANTICS = 71 | SPVC_COMPILER_OPTION_HLSL_BIT,
+
+	SPVC_COMPILER_OPTION_MSL_IOS_USE_SIMDGROUP_FUNCTIONS = 72 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_EMULATE_SUBGROUPS = 73 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_FIXED_SUBGROUP_SIZE = 74 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_FORCE_SAMPLE_RATE_SHADING = 75 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_IOS_SUPPORT_BASE_VERTEX_INSTANCE = 76 | SPVC_COMPILER_OPTION_MSL_BIT,
+
+	SPVC_COMPILER_OPTION_GLSL_OVR_MULTIVIEW_VIEW_COUNT = 77 | SPVC_COMPILER_OPTION_GLSL_BIT,
+
+	SPVC_COMPILER_OPTION_RELAX_NAN_CHECKS = 78 | SPVC_COMPILER_OPTION_COMMON_BIT,
+
+	SPVC_COMPILER_OPTION_MSL_RAW_BUFFER_TESE_INPUT = 79 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_INPUT_BUFFER_INDEX = 80 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_MANUAL_HELPER_INVOCATION_UPDATES = 81 | SPVC_COMPILER_OPTION_MSL_BIT,
+	SPVC_COMPILER_OPTION_MSL_CHECK_DISCARDED_FRAG_STORES = 82 | SPVC_COMPILER_OPTION_MSL_BIT,
+
+	SPVC_COMPILER_OPTION_GLSL_ENABLE_ROW_MAJOR_LOAD_WORKAROUND = 83 | SPVC_COMPILER_OPTION_GLSL_BIT,
+
+	SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS_TIER = 84 | SPVC_COMPILER_OPTION_MSL_BIT,
+
 	SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff
 } spvc_compiler_option;
 
@@ -512,6 +785,12 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_add_header_line(spvc_compiler compiler
 SPVC_PUBLIC_API spvc_result spvc_compiler_require_extension(spvc_compiler compiler, const char *ext);
 SPVC_PUBLIC_API spvc_result spvc_compiler_flatten_buffer_block(spvc_compiler compiler, spvc_variable_id id);
 
+SPVC_PUBLIC_API spvc_bool spvc_compiler_variable_is_depth_or_compare(spvc_compiler compiler, spvc_variable_id id);
+
+SPVC_PUBLIC_API spvc_result spvc_compiler_mask_stage_output_by_location(spvc_compiler compiler,
+                                                                        unsigned location, unsigned component);
+SPVC_PUBLIC_API spvc_result spvc_compiler_mask_stage_output_by_builtin(spvc_compiler compiler, SpvBuiltIn builtin);
+
 /*
  * HLSL specifics.
  * Maps to C++ API.
@@ -524,6 +803,16 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_add_vertex_attribute_remap(spvc_c
                                                                           size_t remaps);
 SPVC_PUBLIC_API spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin(spvc_compiler compiler);
 
+SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_set_resource_binding_flags(spvc_compiler compiler,
+                                                                          spvc_hlsl_binding_flags flags);
+
+SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_add_resource_binding(spvc_compiler compiler,
+                                                                    const spvc_hlsl_resource_binding *binding);
+SPVC_PUBLIC_API spvc_bool spvc_compiler_hlsl_is_resource_used(spvc_compiler compiler,
+                                                              SpvExecutionModel model,
+                                                              unsigned set,
+                                                              unsigned binding);
+
 /*
  * MSL specifics.
  * Maps to C++ API.
@@ -542,19 +831,44 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler
                                                                    const spvc_msl_vertex_attribute *attrs);
 SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler,
                                                                    const spvc_msl_resource_binding *binding);
+/* Deprecated; use spvc_compiler_msl_add_shader_input_2(). */
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler,
+                                                               const spvc_msl_shader_interface_var *input);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_input_2(spvc_compiler compiler,
+                                                                 const spvc_msl_shader_interface_var_2 *input);
+/* Deprecated; use spvc_compiler_msl_add_shader_output_2(). */
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_output(spvc_compiler compiler,
+                                                                const spvc_msl_shader_interface_var *output);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_output_2(spvc_compiler compiler,
+                                                                  const spvc_msl_shader_interface_var_2 *output);
 SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address);
+
+/* Obsolete, use is_shader_input_used. */
 SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location);
+SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_shader_input_used(spvc_compiler compiler, unsigned location);
+SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_shader_output_used(spvc_compiler compiler, unsigned location);
+
 SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler,
                                                              SpvExecutionModel model,
                                                              unsigned set,
                                                              unsigned binding);
 SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler);
 SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv);
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv);
 SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location, unsigned components);
 
 SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding(spvc_compiler compiler, spvc_variable_id id);
 SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding_secondary(spvc_compiler compiler, spvc_variable_id id);
 
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index);
+
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_inline_uniform_block(spvc_compiler compiler, unsigned desc_set, unsigned binding);
+
+SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_combined_sampler_suffix(spvc_compiler compiler, const char *suffix);
+SPVC_PUBLIC_API const char *spvc_compiler_msl_get_combined_sampler_suffix(spvc_compiler compiler);
+
 /*
  * Reflect resources.
  * Maps almost 1:1 to C++ API.
@@ -569,6 +883,11 @@ SPVC_PUBLIC_API spvc_result spvc_resources_get_resource_list_for_type(spvc_resou
                                                                       const spvc_reflected_resource **resource_list,
                                                                       size_t *resource_size);
 
+SPVC_PUBLIC_API spvc_result spvc_resources_get_builtin_resource_list_for_type(
+		spvc_resources resources, spvc_builtin_resource_type type,
+		const spvc_reflected_builtin_resource **resource_list,
+		size_t *resource_size);
+
 /*
  * Decorations.
  * Maps to C++ API.
@@ -625,6 +944,8 @@ SPVC_PUBLIC_API unsigned spvc_compiler_get_execution_mode_argument(spvc_compiler
 SPVC_PUBLIC_API unsigned spvc_compiler_get_execution_mode_argument_by_index(spvc_compiler compiler,
                                                                             SpvExecutionMode mode, unsigned index);
 SPVC_PUBLIC_API SpvExecutionModel spvc_compiler_get_execution_model(spvc_compiler compiler);
+SPVC_PUBLIC_API void spvc_compiler_update_active_builtins(spvc_compiler compiler);
+SPVC_PUBLIC_API spvc_bool spvc_compiler_has_active_builtin(spvc_compiler compiler, SpvBuiltIn builtin, SpvStorageClass storage);
 
 /*
  * Type query interface.
@@ -632,6 +953,12 @@ SPVC_PUBLIC_API SpvExecutionModel spvc_compiler_get_execution_model(spvc_compile
  */
 SPVC_PUBLIC_API spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id);
 
+/* Pulls out SPIRType::self. This effectively gives the type ID without array or pointer qualifiers.
+ * This is necessary when reflecting decoration/name information on members of a struct,
+ * which are placed in the base type, not the qualified type.
+ * This is similar to spvc_reflected_resource::base_type_id. */
+SPVC_PUBLIC_API spvc_type_id spvc_type_get_base_type_id(spvc_type type);
+
 SPVC_PUBLIC_API spvc_basetype spvc_type_get_basetype(spvc_type type);
 SPVC_PUBLIC_API unsigned spvc_type_get_bit_width(spvc_type type);
 SPVC_PUBLIC_API unsigned spvc_type_get_vector_size(spvc_type type);
diff --git a/spirv_cross_containers.hpp b/spirv_cross_containers.hpp
index 31a8abbd0d3..50513f49e7b 100644
--- a/spirv_cross_containers.hpp
+++ b/spirv_cross_containers.hpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2019 Hans-Kristian Arntzen
+ * Copyright 2019-2021 Hans-Kristian Arntzen
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_CROSS_CONTAINERS_HPP
 #define SPIRV_CROSS_CONTAINERS_HPP
 
@@ -21,8 +28,10 @@
 #include <algorithm>
 #include <functional>
 #include <iterator>
+#include <limits>
 #include <memory>
 #include <stack>
+#include <stddef.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@@ -61,7 +70,8 @@ class AlignedBuffer
 private:
 #if defined(_MSC_VER) && _MSC_VER < 1900
 	// MSVC 2013 workarounds, sigh ...
-	union {
+	union
+	{
 		char aligned_char[sizeof(T) * N];
 		double dummy_aligner;
 	} u;
@@ -85,72 +95,72 @@ template <typename T>
 class VectorView
 {
 public:
-	T &operator[](size_t i)
+	T &operator[](size_t i) SPIRV_CROSS_NOEXCEPT
 	{
 		return ptr[i];
 	}
 
-	const T &operator[](size_t i) const
+	const T &operator[](size_t i) const SPIRV_CROSS_NOEXCEPT
 	{
 		return ptr[i];
 	}
 
-	bool empty() const
+	bool empty() const SPIRV_CROSS_NOEXCEPT
 	{
 		return buffer_size == 0;
 	}
 
-	size_t size() const
+	size_t size() const SPIRV_CROSS_NOEXCEPT
 	{
 		return buffer_size;
 	}
 
-	T *data()
+	T *data() SPIRV_CROSS_NOEXCEPT
 	{
 		return ptr;
 	}
 
-	const T *data() const
+	const T *data() const SPIRV_CROSS_NOEXCEPT
 	{
 		return ptr;
 	}
 
-	T *begin()
+	T *begin() SPIRV_CROSS_NOEXCEPT
 	{
 		return ptr;
 	}
 
-	T *end()
+	T *end() SPIRV_CROSS_NOEXCEPT
 	{
 		return ptr + buffer_size;
 	}
 
-	const T *begin() const
+	const T *begin() const SPIRV_CROSS_NOEXCEPT
 	{
 		return ptr;
 	}
 
-	const T *end() const
+	const T *end() const SPIRV_CROSS_NOEXCEPT
 	{
 		return ptr + buffer_size;
 	}
 
-	T &front()
+	T &front() SPIRV_CROSS_NOEXCEPT
 	{
 		return ptr[0];
 	}
 
-	const T &front() const
+	const T &front() const SPIRV_CROSS_NOEXCEPT
 	{
 		return ptr[0];
 	}
 
-	T &back()
+	T &back() SPIRV_CROSS_NOEXCEPT
 	{
 		return ptr[buffer_size - 1];
 	}
 
-	const T &back() const
+	const T &back() const SPIRV_CROSS_NOEXCEPT
 	{
 		return ptr[buffer_size - 1];
 	}
@@ -194,14 +204,14 @@ template <typename T, size_t N = 8>
 class SmallVector : public VectorView<T>
 {
 public:
-	SmallVector()
+	SmallVector() SPIRV_CROSS_NOEXCEPT
 	{
 		this->ptr = stack_storage.data();
 		buffer_capacity = N;
 	}
 
-	SmallVector(const T *arg_list_begin, const T *arg_list_end)
-	    : SmallVector()
+	template <typename U>
+	SmallVector(const U *arg_list_begin, const U *arg_list_end) SPIRV_CROSS_NOEXCEPT : SmallVector()
 	{
 		auto count = size_t(arg_list_end - arg_list_begin);
 		reserve(count);
@@ -210,6 +220,16 @@ class SmallVector : public VectorView<T>
 		this->buffer_size = count;
 	}
 
+	template <typename U>
+	SmallVector(std::initializer_list<U> init) SPIRV_CROSS_NOEXCEPT : SmallVector(init.begin(), init.end())
+	{
+	}
+
+	template <typename U, size_t M>
+	explicit SmallVector(const U (&init)[M]) SPIRV_CROSS_NOEXCEPT : SmallVector(init, init + M)
+	{
+	}
+
 	SmallVector(SmallVector &&other) SPIRV_CROSS_NOEXCEPT : SmallVector()
 	{
 		*this = std::move(other);
@@ -245,14 +265,16 @@ class SmallVector : public VectorView<T>
 		return *this;
 	}
 
-	SmallVector(const SmallVector &other)
-	    : SmallVector()
+	SmallVector(const SmallVector &other) SPIRV_CROSS_NOEXCEPT : SmallVector()
 	{
 		*this = other;
 	}
 
-	SmallVector &operator=(const SmallVector &other)
+	SmallVector &operator=(const SmallVector &other) SPIRV_CROSS_NOEXCEPT
 	{
+		if (this == &other)
+			return *this;
+
 		clear();
 		reserve(other.buffer_size);
 		for (size_t i = 0; i < other.buffer_size; i++)
@@ -261,8 +283,7 @@ class SmallVector : public VectorView<T>
 		return *this;
 	}
 
-	explicit SmallVector(size_t count)
-	    : SmallVector()
+	explicit SmallVector(size_t count) SPIRV_CROSS_NOEXCEPT : SmallVector()
 	{
 		resize(count);
 	}
@@ -274,28 +295,28 @@ class SmallVector : public VectorView<T>
 			free(this->ptr);
 	}
 
-	void clear()
+	void clear() SPIRV_CROSS_NOEXCEPT
 	{
 		for (size_t i = 0; i < this->buffer_size; i++)
 			this->ptr[i].~T();
 		this->buffer_size = 0;
 	}
 
-	void push_back(const T &t)
+	void push_back(const T &t) SPIRV_CROSS_NOEXCEPT
 	{
 		reserve(this->buffer_size + 1);
 		new (&this->ptr[this->buffer_size]) T(t);
 		this->buffer_size++;
 	}
 
-	void push_back(T &&t)
+	void push_back(T &&t) SPIRV_CROSS_NOEXCEPT
 	{
 		reserve(this->buffer_size + 1);
 		new (&this->ptr[this->buffer_size]) T(std::move(t));
 		this->buffer_size++;
 	}
 
-	void pop_back()
+	void pop_back() SPIRV_CROSS_NOEXCEPT
 	{
 		// Work around false positive warning on GCC 8.3.
 		// Calling pop_back on empty vector is undefined.
@@ -304,31 +325,42 @@ class SmallVector : public VectorView<T>
 	}
 
 	template <typename... Ts>
-	void emplace_back(Ts &&... ts)
+	void emplace_back(Ts &&... ts) SPIRV_CROSS_NOEXCEPT
 	{
 		reserve(this->buffer_size + 1);
 		new (&this->ptr[this->buffer_size]) T(std::forward<Ts>(ts)...);
 		this->buffer_size++;
 	}
 
-	void reserve(size_t count)
+	void reserve(size_t count) SPIRV_CROSS_NOEXCEPT
 	{
+		if ((count > (std::numeric_limits<size_t>::max)() / sizeof(T)) ||
+		    (count > (std::numeric_limits<size_t>::max)() / 2))
+		{
+			// Only way this should ever happen is with garbage input, terminate.
+			std::terminate();
+		}
+
 		if (count > buffer_capacity)
 		{
 			size_t target_capacity = buffer_capacity;
 			if (target_capacity == 0)
 				target_capacity = 1;
-			if (target_capacity < N)
-				target_capacity = N;
 
+			// Weird parens works around macro issues on Windows if NOMINMAX is not used.
+			target_capacity = (std::max)(target_capacity, N);
+
+			// Need to ensure there is a POT value of target capacity which is larger than count,
+			// otherwise this will overflow.
 			while (target_capacity < count)
 				target_capacity <<= 1u;
 
 			T *new_buffer =
 			    target_capacity > N ? static_cast<T *>(malloc(target_capacity * sizeof(T))) : stack_storage.data();
 
+			// If we actually fail this malloc, we are hosed anyways, there is no reason to attempt recovery.
 			if (!new_buffer)
-				SPIRV_CROSS_THROW("Out of memory.");
+				std::terminate();
 
 			// In case for some reason two allocations both come from same stack.
 			if (new_buffer != this->ptr)
@@ -348,7 +380,7 @@ class SmallVector : public VectorView<T>
 		}
 	}
 
-	void insert(T *itr, const T *insert_begin, const T *insert_end)
+	void insert(T *itr, const T *insert_begin, const T *insert_end) SPIRV_CROSS_NOEXCEPT
 	{
 		auto count = size_t(insert_end - insert_begin);
 		if (itr == this->end())
@@ -374,8 +406,10 @@ class SmallVector : public VectorView<T>
 				// Need to allocate new buffer. Move everything to a new buffer.
 				T *new_buffer =
 				    target_capacity > N ? static_cast<T *>(malloc(target_capacity * sizeof(T))) : stack_storage.data();
+
+				// If we actually fail this malloc, we are hosed anyways, there is no reason to attempt recovery.
 				if (!new_buffer)
-					SPIRV_CROSS_THROW("Out of memory.");
+					std::terminate();
 
 				// First, move elements from source buffer to new buffer.
 				// We don't deal with types which can throw in move constructor.
@@ -447,19 +481,19 @@ class SmallVector : public VectorView<T>
 		}
 	}
 
-	void insert(T *itr, const T &value)
+	void insert(T *itr, const T &value) SPIRV_CROSS_NOEXCEPT
 	{
 		insert(itr, &value, &value + 1);
 	}
 
-	T *erase(T *itr)
+	T *erase(T *itr) SPIRV_CROSS_NOEXCEPT
 	{
 		std::move(itr + 1, this->end(), itr);
 		this->ptr[--this->buffer_size].~T();
 		return itr;
 	}
 
-	void erase(T *start_erase, T *end_erase)
+	void erase(T *start_erase, T *end_erase) SPIRV_CROSS_NOEXCEPT
 	{
 		if (end_erase == this->end())
 		{
@@ -473,7 +507,7 @@ class SmallVector : public VectorView<T>
 		}
 	}
 
-	void resize(size_t new_size)
+	void resize(size_t new_size) SPIRV_CROSS_NOEXCEPT
 	{
 		if (new_size < this->buffer_size)
 		{
@@ -519,7 +553,7 @@ class ObjectPoolBase
 {
 public:
 	virtual ~ObjectPoolBase() = default;
-	virtual void free_opaque(void *ptr) = 0;
+	virtual void deallocate_opaque(void *ptr) = 0;
 };
 
 template <typename T>
@@ -553,15 +587,15 @@ class ObjectPool : public ObjectPoolBase
 		return ptr;
 	}
 
-	void free(T *ptr)
+	void deallocate(T *ptr)
 	{
 		ptr->~T();
 		vacants.push_back(ptr);
 	}
 
-	void free_opaque(void *ptr) override
+	void deallocate_opaque(void *ptr) override
 	{
-		free(static_cast<T *>(ptr));
+		deallocate(static_cast<T *>(ptr));
 	}
 
 	void clear()
diff --git a/spirv_cross_error_handling.hpp b/spirv_cross_error_handling.hpp
index e821c043d5d..e96ebb9a796 100644
--- a/spirv_cross_error_handling.hpp
+++ b/spirv_cross_error_handling.hpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2015-2019 Arm Limited
+ * Copyright 2015-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +15,21 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_CROSS_ERROR_HANDLING
 #define SPIRV_CROSS_ERROR_HANDLING
 
-#include <stdexcept>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string>
+#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS
+#include <stdexcept>
+#endif
 
 #ifdef SPIRV_CROSS_NAMESPACE_OVERRIDE
 #define SPIRV_CROSS_NAMESPACE SPIRV_CROSS_NAMESPACE_OVERRIDE
@@ -33,6 +42,8 @@ namespace SPIRV_CROSS_NAMESPACE
 #ifdef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS
 #if !defined(_MSC_VER) || defined(__clang__)
 [[noreturn]]
+#elif defined(_MSC_VER)
+__declspec(noreturn)
 #endif
 inline void
 report_and_abort(const std::string &msg)
diff --git a/spirv_cross_parsed_ir.cpp b/spirv_cross_parsed_ir.cpp
index f0b6f7b1df8..8d1acf69f97 100644
--- a/spirv_cross_parsed_ir.cpp
+++ b/spirv_cross_parsed_ir.cpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2018-2019 Arm Limited
+ * Copyright 2018-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #include "spirv_cross_parsed_ir.hpp"
 #include <algorithm>
 #include <assert.h>
@@ -47,32 +54,36 @@ ParsedIR::ParsedIR()
 // Should have been default-implemented, but need this on MSVC 2013.
 ParsedIR::ParsedIR(ParsedIR &&other) SPIRV_CROSS_NOEXCEPT
 {
-	*this = move(other);
+	*this = std::move(other);
 }
 
 ParsedIR &ParsedIR::operator=(ParsedIR &&other) SPIRV_CROSS_NOEXCEPT
 {
 	if (this != &other)
 	{
-		pool_group = move(other.pool_group);
-		spirv = move(other.spirv);
-		meta = move(other.meta);
+		pool_group = std::move(other.pool_group);
+		spirv = std::move(other.spirv);
+		meta = std::move(other.meta);
 		for (int i = 0; i < TypeCount; i++)
-			ids_for_type[i] = move(other.ids_for_type[i]);
-		ids_for_constant_or_type = move(other.ids_for_constant_or_type);
-		ids_for_constant_or_variable = move(other.ids_for_constant_or_variable);
-		declared_capabilities = move(other.declared_capabilities);
-		declared_extensions = move(other.declared_extensions);
-		block_meta = move(other.block_meta);
-		continue_block_to_loop_header = move(other.continue_block_to_loop_header);
-		entry_points = move(other.entry_points);
-		ids = move(other.ids);
+			ids_for_type[i] = std::move(other.ids_for_type[i]);
+		ids_for_constant_undef_or_type = std::move(other.ids_for_constant_undef_or_type);
+		ids_for_constant_or_variable = std::move(other.ids_for_constant_or_variable);
+		declared_capabilities = std::move(other.declared_capabilities);
+		declared_extensions = std::move(other.declared_extensions);
+		block_meta = std::move(other.block_meta);
+		continue_block_to_loop_header = std::move(other.continue_block_to_loop_header);
+		entry_points = std::move(other.entry_points);
+		ids = std::move(other.ids);
 		addressing_model = other.addressing_model;
 		memory_model = other.memory_model;
 
 		default_entry_point = other.default_entry_point;
 		source = other.source;
-		loop_iteration_depth = other.loop_iteration_depth;
+		loop_iteration_depth_hard = other.loop_iteration_depth_hard;
+		loop_iteration_depth_soft = other.loop_iteration_depth_soft;
+
+		meta_needing_name_fixup = std::move(other.meta_needing_name_fixup);
+		load_type_width = std::move(other.load_type_width);
 	}
 	return *this;
 }
@@ -91,7 +102,7 @@ ParsedIR &ParsedIR::operator=(const ParsedIR &other)
 		meta = other.meta;
 		for (int i = 0; i < TypeCount; i++)
 			ids_for_type[i] = other.ids_for_type[i];
-		ids_for_constant_or_type = other.ids_for_constant_or_type;
+		ids_for_constant_undef_or_type = other.ids_for_constant_undef_or_type;
 		ids_for_constant_or_variable = other.ids_for_constant_or_variable;
 		declared_capabilities = other.declared_capabilities;
 		declared_extensions = other.declared_extensions;
@@ -100,10 +111,15 @@ ParsedIR &ParsedIR::operator=(const ParsedIR &other)
 		entry_points = other.entry_points;
 		default_entry_point = other.default_entry_point;
 		source = other.source;
-		loop_iteration_depth = other.loop_iteration_depth;
+		loop_iteration_depth_hard = other.loop_iteration_depth_hard;
+		loop_iteration_depth_soft = other.loop_iteration_depth_soft;
 		addressing_model = other.addressing_model;
 		memory_model = other.memory_model;
 
+
+		meta_needing_name_fixup = other.meta_needing_name_fixup;
+		load_type_width = other.load_type_width;
+
 		// Very deliberate copying of IDs. There is no default copy constructor, nor a simple default constructor.
 		// Construct object first so we have the correct allocator set-up, then we can copy object into our new pool group.
 		ids.clear();
@@ -126,41 +142,161 @@ void ParsedIR::set_id_bounds(uint32_t bounds)
 	block_meta.resize(bounds);
 }
 
-static string ensure_valid_identifier(const string &name, bool member)
+// Roll our own versions of these functions to avoid potential locale shenanigans.
+static bool is_alpha(char c)
 {
-	// Functions in glslangValidator are mangled with name(<mangled> stuff.
-	// Normally, we would never see '(' in any legal identifiers, so just strip them out.
-	auto str = name.substr(0, name.find('('));
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static bool is_numeric(char c)
+{
+	return c >= '0' && c <= '9';
+}
+
+static bool is_alphanumeric(char c)
+{
+	return is_alpha(c) || is_numeric(c);
+}
+
+static bool is_valid_identifier(const string &name)
+{
+	if (name.empty())
+		return true;
+
+	if (is_numeric(name[0]))
+		return false;
+
+	for (auto c : name)
+		if (!is_alphanumeric(c) && c != '_')
+			return false;
 
-	for (uint32_t i = 0; i < str.size(); i++)
+	bool saw_underscore = false;
+	// Two underscores in a row is not a valid identifier either.
+	// Technically reserved, but it's easier to treat it as invalid.
+	for (auto c : name)
 	{
-		auto &c = str[i];
+		bool is_underscore = c == '_';
+		if (is_underscore && saw_underscore)
+			return false;
+		saw_underscore = is_underscore;
+	}
+
+	return true;
+}
+
+static bool is_reserved_prefix(const string &name)
+{
+	// Generic reserved identifiers used by the implementation.
+	return name.compare(0, 3, "gl_", 3) == 0 ||
+	       // Ignore this case for now, might rewrite internal code to always use spv prefix.
+	       //name.compare(0, 11, "SPIRV_Cross", 11) == 0 ||
+	       name.compare(0, 3, "spv", 3) == 0;
+}
+
+static bool is_reserved_identifier(const string &name, bool member, bool allow_reserved_prefixes)
+{
+	if (!allow_reserved_prefixes && is_reserved_prefix(name))
+		return true;
+
+	if (member)
+	{
+		// Reserved member identifiers come in one form:
+		// _m[0-9]+$.
+		if (name.size() < 3)
+			return false;
+
+		if (name.compare(0, 2, "_m", 2) != 0)
+			return false;
+
+		size_t index = 2;
+		while (index < name.size() && is_numeric(name[index]))
+			index++;
+
+		return index == name.size();
+	}
+	else
+	{
+		// Reserved non-member identifiers come in two forms:
+		// _[0-9]+$, used for temporaries which map directly to a SPIR-V ID.
+		// _[0-9]+_, used for auxillary temporaries which derived from a SPIR-V ID.
+		if (name.size() < 2)
+			return false;
+
+		if (name[0] != '_' || !is_numeric(name[1]))
+			return false;
+
+		size_t index = 2;
+		while (index < name.size() && is_numeric(name[index]))
+			index++;
+
+		return index == name.size() || (index < name.size() && name[index] == '_');
+	}
+}
 
-		if (member)
+bool ParsedIR::is_globally_reserved_identifier(std::string &str, bool allow_reserved_prefixes)
+{
+	return is_reserved_identifier(str, false, allow_reserved_prefixes);
+}
+
+uint32_t ParsedIR::get_spirv_version() const
+{
+	return spirv[1];
+}
+
+static string make_unreserved_identifier(const string &name)
+{
+	if (is_reserved_prefix(name))
+		return "_RESERVED_IDENTIFIER_FIXUP_" + name;
+	else
+		return "_RESERVED_IDENTIFIER_FIXUP" + name;
+}
+
+void ParsedIR::sanitize_underscores(std::string &str)
+{
+	// Compact adjacent underscores to make it valid.
+	auto dst = str.begin();
+	auto src = dst;
+	bool saw_underscore = false;
+	while (src != str.end())
+	{
+		bool is_underscore = *src == '_';
+		if (saw_underscore && is_underscore)
 		{
-			// _m<num> variables are reserved by the internal implementation,
-			// otherwise, make sure the name is a valid identifier.
-			if (i == 0)
-				c = isalpha(c) ? c : '_';
-			else if (i == 2 && str[0] == '_' && str[1] == 'm')
-				c = isalpha(c) ? c : '_';
-			else
-				c = isalnum(c) ? c : '_';
+			src++;
 		}
 		else
 		{
-			// _<num> variables are reserved by the internal implementation,
-			// otherwise, make sure the name is a valid identifier.
-			if (i == 0 || (str[0] == '_' && i == 1))
-				c = isalpha(c) ? c : '_';
-			else
-				c = isalnum(c) ? c : '_';
+			if (dst != src)
+				*dst = *src;
+			dst++;
+			src++;
+			saw_underscore = is_underscore;
 		}
 	}
+	str.erase(dst, str.end());
+}
+
+static string ensure_valid_identifier(const string &name)
+{
+	// Functions in glslangValidator are mangled with name(<mangled> stuff.
+	// Normally, we would never see '(' in any legal identifiers, so just strip them out.
+	auto str = name.substr(0, name.find('('));
+
+	if (str.empty())
+		return str;
+
+	if (is_numeric(str[0]))
+		str[0] = '_';
+
+	for (auto &c : str)
+		if (!is_alphanumeric(c) && c != '_')
+			c = '_';
+
+	ParsedIR::sanitize_underscores(str);
 	return str;
 }
 
-const string &ParsedIR::get_name(uint32_t id) const
+const string &ParsedIR::get_name(ID id) const
 {
 	auto *m = find_meta(id);
 	if (m)
@@ -169,7 +305,7 @@ const string &ParsedIR::get_name(uint32_t id) const
 		return empty_string;
 }
 
-const string &ParsedIR::get_member_name(uint32_t id, uint32_t index) const
+const string &ParsedIR::get_member_name(TypeID id, uint32_t index) const
 {
 	auto *m = find_meta(id);
 	if (m)
@@ -182,38 +318,48 @@ const string &ParsedIR::get_member_name(uint32_t id, uint32_t index) const
 		return empty_string;
 }
 
-void ParsedIR::set_name(uint32_t id, const string &name)
+void ParsedIR::sanitize_identifier(std::string &name, bool member, bool allow_reserved_prefixes)
 {
-	auto &str = meta[id].decoration.alias;
-	str.clear();
-
-	if (name.empty())
-		return;
-
-	// Reserved for temporaries.
-	if (name[0] == '_' && name.size() >= 2 && isdigit(name[1]))
-		return;
-
-	str = ensure_valid_identifier(name, false);
+	if (!is_valid_identifier(name))
+		name = ensure_valid_identifier(name);
+	if (is_reserved_identifier(name, member, allow_reserved_prefixes))
+		name = make_unreserved_identifier(name);
 }
 
-void ParsedIR::set_member_name(uint32_t id, uint32_t index, const string &name)
+void ParsedIR::fixup_reserved_names()
 {
-	meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1));
-
-	auto &str = meta[id].members[index].alias;
-	str.clear();
-	if (name.empty())
-		return;
+	for (uint32_t id : meta_needing_name_fixup)
+	{
+		// Don't rename remapped variables like 'gl_LastFragDepthARM'.
+		if (ids[id].get_type() == TypeVariable && get<SPIRVariable>(id).remapped_variable)
+			continue;
+
+		auto &m = meta[id];
+		sanitize_identifier(m.decoration.alias, false, false);
+		for (auto &memb : m.members)
+			sanitize_identifier(memb.alias, true, false);
+	}
+	meta_needing_name_fixup.clear();
+}
 
-	// Reserved for unnamed members.
-	if (name[0] == '_' && name.size() >= 3 && name[1] == 'm' && isdigit(name[2]))
-		return;
+void ParsedIR::set_name(ID id, const string &name)
+{
+	auto &m = meta[id];
+	m.decoration.alias = name;
+	if (!is_valid_identifier(name) || is_reserved_identifier(name, false, false))
+		meta_needing_name_fixup.insert(id);
+}
 
-	str = ensure_valid_identifier(name, true);
+void ParsedIR::set_member_name(TypeID id, uint32_t index, const string &name)
+{
+	auto &m = meta[id];
+	m.members.resize(max(m.members.size(), size_t(index) + 1));
+	m.members[index].alias = name;
+	if (!is_valid_identifier(name) || is_reserved_identifier(name, true, false))
+		meta_needing_name_fixup.insert(id);
 }
 
-void ParsedIR::set_decoration_string(uint32_t id, Decoration decoration, const string &argument)
+void ParsedIR::set_decoration_string(ID id, Decoration decoration, const string &argument)
 {
 	auto &dec = meta[id].decoration;
 	dec.decoration_flags.set(decoration);
@@ -229,7 +375,7 @@ void ParsedIR::set_decoration_string(uint32_t id, Decoration decoration, const s
 	}
 }
 
-void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argument)
+void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument)
 {
 	auto &dec = meta[id].decoration;
 	dec.decoration_flags.set(decoration);
@@ -253,6 +399,18 @@ void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argum
 		dec.offset = argument;
 		break;
 
+	case DecorationXfbBuffer:
+		dec.xfb_buffer = argument;
+		break;
+
+	case DecorationXfbStride:
+		dec.xfb_stride = argument;
+		break;
+
+	case DecorationStream:
+		dec.stream = argument;
+		break;
+
 	case DecorationArrayStride:
 		dec.array_stride = argument;
 		break;
@@ -295,10 +453,11 @@ void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argum
 	}
 }
 
-void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument)
+void ParsedIR::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument)
 {
-	meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1));
-	auto &dec = meta[id].members[index];
+	auto &m = meta[id];
+	m.members.resize(max(m.members.size(), size_t(index) + 1));
+	auto &dec = m.members[index];
 	dec.decoration_flags.set(decoration);
 
 	switch (decoration)
@@ -324,6 +483,18 @@ void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration dec
 		dec.offset = argument;
 		break;
 
+	case DecorationXfbBuffer:
+		dec.xfb_buffer = argument;
+		break;
+
+	case DecorationXfbStride:
+		dec.xfb_stride = argument;
+		break;
+
+	case DecorationStream:
+		dec.stream = argument;
+		break;
+
 	case DecorationSpecId:
 		dec.spec_id = argument;
 		break;
@@ -343,7 +514,7 @@ void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration dec
 
 // Recursively marks any constants referenced by the specified constant instruction as being used
 // as an array length. The id must be a constant instruction (SPIRConstant or SPIRConstantOp).
-void ParsedIR::mark_used_as_array_length(uint32_t id)
+void ParsedIR::mark_used_as_array_length(ID id)
 {
 	switch (ids[id].get_type())
 	{
@@ -354,8 +525,16 @@ void ParsedIR::mark_used_as_array_length(uint32_t id)
 	case TypeConstantOp:
 	{
 		auto &cop = get<SPIRConstantOp>(id);
-		for (uint32_t arg_id : cop.arguments)
-			mark_used_as_array_length(arg_id);
+		if (cop.opcode == OpCompositeExtract)
+			mark_used_as_array_length(cop.arguments[0]);
+		else if (cop.opcode == OpCompositeInsert)
+		{
+			mark_used_as_array_length(cop.arguments[0]);
+			mark_used_as_array_length(cop.arguments[1]);
+		}
+		else
+			for (uint32_t arg_id : cop.arguments)
+				mark_used_as_array_length(arg_id);
 		break;
 	}
 
@@ -367,6 +546,17 @@ void ParsedIR::mark_used_as_array_length(uint32_t id)
 	}
 }
 
+Bitset ParsedIR::get_buffer_block_type_flags(const SPIRType &type) const
+{
+	if (type.member_types.empty())
+		return {};
+
+	Bitset all_members_flags = get_member_decoration_bitset(type.self, 0);
+	for (uint32_t i = 1; i < uint32_t(type.member_types.size()); i++)
+		all_members_flags.merge_and(get_member_decoration_bitset(type.self, i));
+	return all_members_flags;
+}
+
 Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const
 {
 	auto &type = get<SPIRType>(var.basetype);
@@ -383,15 +573,12 @@ Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const
 	if (type.member_types.empty())
 		return base_flags;
 
-	Bitset all_members_flags = get_member_decoration_bitset(type.self, 0);
-	for (uint32_t i = 1; i < uint32_t(type.member_types.size()); i++)
-		all_members_flags.merge_and(get_member_decoration_bitset(type.self, i));
-
+	auto all_members_flags = get_buffer_block_type_flags(type);
 	base_flags.merge_or(all_members_flags);
 	return base_flags;
 }
 
-const Bitset &ParsedIR::get_member_decoration_bitset(uint32_t id, uint32_t index) const
+const Bitset &ParsedIR::get_member_decoration_bitset(TypeID id, uint32_t index) const
 {
 	auto *m = find_meta(id);
 	if (m)
@@ -404,12 +591,12 @@ const Bitset &ParsedIR::get_member_decoration_bitset(uint32_t id, uint32_t index
 		return cleared_bitset;
 }
 
-bool ParsedIR::has_decoration(uint32_t id, Decoration decoration) const
+bool ParsedIR::has_decoration(ID id, Decoration decoration) const
 {
 	return get_decoration_bitset(id).get(decoration);
 }
 
-uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const
+uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const
 {
 	auto *m = find_meta(id);
 	if (!m)
@@ -429,6 +616,12 @@ uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const
 		return dec.component;
 	case DecorationOffset:
 		return dec.offset;
+	case DecorationXfbBuffer:
+		return dec.xfb_buffer;
+	case DecorationXfbStride:
+		return dec.xfb_stride;
+	case DecorationStream:
+		return dec.stream;
 	case DecorationBinding:
 		return dec.binding;
 	case DecorationDescriptorSet:
@@ -450,7 +643,7 @@ uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const
 	}
 }
 
-const string &ParsedIR::get_decoration_string(uint32_t id, Decoration decoration) const
+const string &ParsedIR::get_decoration_string(ID id, Decoration decoration) const
 {
 	auto *m = find_meta(id);
 	if (!m)
@@ -471,7 +664,7 @@ const string &ParsedIR::get_decoration_string(uint32_t id, Decoration decoration
 	}
 }
 
-void ParsedIR::unset_decoration(uint32_t id, Decoration decoration)
+void ParsedIR::unset_decoration(ID id, Decoration decoration)
 {
 	auto &dec = meta[id].decoration;
 	dec.decoration_flags.clear(decoration);
@@ -493,6 +686,18 @@ void ParsedIR::unset_decoration(uint32_t id, Decoration decoration)
 		dec.offset = 0;
 		break;
 
+	case DecorationXfbBuffer:
+		dec.xfb_buffer = 0;
+		break;
+
+	case DecorationXfbStride:
+		dec.xfb_stride = 0;
+		break;
+
+	case DecorationStream:
+		dec.stream = 0;
+		break;
+
 	case DecorationBinding:
 		dec.binding = 0;
 		break;
@@ -533,12 +738,12 @@ void ParsedIR::unset_decoration(uint32_t id, Decoration decoration)
 	}
 }
 
-bool ParsedIR::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const
+bool ParsedIR::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
 {
 	return get_member_decoration_bitset(id, index).get(decoration);
 }
 
-uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const
+uint32_t ParsedIR::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const
 {
 	auto *m = find_meta(id);
 	if (!m)
@@ -563,6 +768,12 @@ uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration
 		return dec.binding;
 	case DecorationOffset:
 		return dec.offset;
+	case DecorationXfbBuffer:
+		return dec.xfb_buffer;
+	case DecorationXfbStride:
+		return dec.xfb_stride;
+	case DecorationStream:
+		return dec.stream;
 	case DecorationSpecId:
 		return dec.spec_id;
 	case DecorationIndex:
@@ -572,7 +783,7 @@ uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration
 	}
 }
 
-const Bitset &ParsedIR::get_decoration_bitset(uint32_t id) const
+const Bitset &ParsedIR::get_decoration_bitset(ID id) const
 {
 	auto *m = find_meta(id);
 	if (m)
@@ -584,9 +795,10 @@ const Bitset &ParsedIR::get_decoration_bitset(uint32_t id) const
 		return cleared_bitset;
 }
 
-void ParsedIR::set_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration, const string &argument)
+void ParsedIR::set_member_decoration_string(TypeID id, uint32_t index, Decoration decoration, const string &argument)
 {
-	meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1));
+	auto &m = meta[id];
+	m.members.resize(max(m.members.size(), size_t(index) + 1));
 	auto &dec = meta[id].members[index];
 	dec.decoration_flags.set(decoration);
 
@@ -601,7 +813,7 @@ void ParsedIR::set_member_decoration_string(uint32_t id, uint32_t index, Decorat
 	}
 }
 
-const string &ParsedIR::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const
+const string &ParsedIR::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const
 {
 	auto *m = find_meta(id);
 	if (m)
@@ -624,7 +836,7 @@ const string &ParsedIR::get_member_decoration_string(uint32_t id, uint32_t index
 		return empty_string;
 }
 
-void ParsedIR::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration)
+void ParsedIR::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration)
 {
 	auto &m = meta[id];
 	if (index >= m.members.size())
@@ -651,6 +863,18 @@ void ParsedIR::unset_member_decoration(uint32_t id, uint32_t index, Decoration d
 		dec.offset = 0;
 		break;
 
+	case DecorationXfbBuffer:
+		dec.xfb_buffer = 0;
+		break;
+
+	case DecorationXfbStride:
+		dec.xfb_stride = 0;
+		break;
+
+	case DecorationStream:
+		dec.stream = 0;
+		break;
+
 	case DecorationSpecId:
 		dec.spec_id = 0;
 		break;
@@ -677,7 +901,7 @@ uint32_t ParsedIR::increase_bound_by(uint32_t incr_amount)
 	return uint32_t(curr_bound);
 }
 
-void ParsedIR::remove_typed_id(Types type, uint32_t id)
+void ParsedIR::remove_typed_id(Types type, ID id)
 {
 	auto &type_ids = ids_for_type[type];
 	type_ids.erase(remove(begin(type_ids), end(type_ids), id), end(type_ids));
@@ -692,18 +916,25 @@ void ParsedIR::reset_all_of_type(Types type)
 	ids_for_type[type].clear();
 }
 
-void ParsedIR::add_typed_id(Types type, uint32_t id)
+void ParsedIR::add_typed_id(Types type, ID id)
 {
-	if (loop_iteration_depth)
+	if (loop_iteration_depth_hard != 0)
 		SPIRV_CROSS_THROW("Cannot add typed ID while looping over it.");
 
+	if (loop_iteration_depth_soft != 0)
+	{
+		if (!ids[id].empty())
+			SPIRV_CROSS_THROW("Cannot override IDs when loop is soft locked.");
+		return;
+	}
+
 	if (ids[id].empty() || ids[id].get_type() != type)
 	{
 		switch (type)
 		{
 		case TypeConstant:
 			ids_for_constant_or_variable.push_back(id);
-			ids_for_constant_or_type.push_back(id);
+			ids_for_constant_undef_or_type.push_back(id);
 			break;
 
 		case TypeVariable:
@@ -712,7 +943,8 @@ void ParsedIR::add_typed_id(Types type, uint32_t id)
 
 		case TypeType:
 		case TypeConstantOp:
-			ids_for_constant_or_type.push_back(id);
+		case TypeUndef:
+			ids_for_constant_undef_or_type.push_back(id);
 			break;
 
 		default:
@@ -731,7 +963,7 @@ void ParsedIR::add_typed_id(Types type, uint32_t id)
 	}
 }
 
-const Meta *ParsedIR::find_meta(uint32_t id) const
+const Meta *ParsedIR::find_meta(ID id) const
 {
 	auto itr = meta.find(id);
 	if (itr != end(meta))
@@ -740,7 +972,7 @@ const Meta *ParsedIR::find_meta(uint32_t id) const
 		return nullptr;
 }
 
-Meta *ParsedIR::find_meta(uint32_t id)
+Meta *ParsedIR::find_meta(ID id)
 {
 	auto itr = meta.find(id);
 	if (itr != end(meta))
@@ -749,4 +981,94 @@ Meta *ParsedIR::find_meta(uint32_t id)
 		return nullptr;
 }
 
+ParsedIR::LoopLock ParsedIR::create_loop_hard_lock() const
+{
+	return ParsedIR::LoopLock(&loop_iteration_depth_hard);
+}
+
+ParsedIR::LoopLock ParsedIR::create_loop_soft_lock() const
+{
+	return ParsedIR::LoopLock(&loop_iteration_depth_soft);
+}
+
+ParsedIR::LoopLock::~LoopLock()
+{
+	if (lock)
+		(*lock)--;
+}
+
+ParsedIR::LoopLock::LoopLock(uint32_t *lock_)
+    : lock(lock_)
+{
+	if (lock)
+		(*lock)++;
+}
+
+ParsedIR::LoopLock::LoopLock(LoopLock &&other) SPIRV_CROSS_NOEXCEPT
+{
+	*this = std::move(other);
+}
+
+ParsedIR::LoopLock &ParsedIR::LoopLock::operator=(LoopLock &&other) SPIRV_CROSS_NOEXCEPT
+{
+	if (lock)
+		(*lock)--;
+	lock = other.lock;
+	other.lock = nullptr;
+	return *this;
+}
+
+void ParsedIR::make_constant_null(uint32_t id, uint32_t type, bool add_to_typed_id_set)
+{
+	auto &constant_type = get<SPIRType>(type);
+
+	if (constant_type.pointer)
+	{
+		if (add_to_typed_id_set)
+			add_typed_id(TypeConstant, id);
+		auto &constant = variant_set<SPIRConstant>(ids[id], type);
+		constant.self = id;
+		constant.make_null(constant_type);
+	}
+	else if (!constant_type.array.empty())
+	{
+		assert(constant_type.parent_type);
+		uint32_t parent_id = increase_bound_by(1);
+		make_constant_null(parent_id, constant_type.parent_type, add_to_typed_id_set);
+
+		if (!constant_type.array_size_literal.back())
+			SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
+
+		SmallVector<uint32_t> elements(constant_type.array.back());
+		for (uint32_t i = 0; i < constant_type.array.back(); i++)
+			elements[i] = parent_id;
+
+		if (add_to_typed_id_set)
+			add_typed_id(TypeConstant, id);
+		variant_set<SPIRConstant>(ids[id], type, elements.data(), uint32_t(elements.size()), false).self = id;
+	}
+	else if (!constant_type.member_types.empty())
+	{
+		uint32_t member_ids = increase_bound_by(uint32_t(constant_type.member_types.size()));
+		SmallVector<uint32_t> elements(constant_type.member_types.size());
+		for (uint32_t i = 0; i < constant_type.member_types.size(); i++)
+		{
+			make_constant_null(member_ids + i, constant_type.member_types[i], add_to_typed_id_set);
+			elements[i] = member_ids + i;
+		}
+
+		if (add_to_typed_id_set)
+			add_typed_id(TypeConstant, id);
+		variant_set<SPIRConstant>(ids[id], type, elements.data(), uint32_t(elements.size()), false).self = id;
+	}
+	else
+	{
+		if (add_to_typed_id_set)
+			add_typed_id(TypeConstant, id);
+		auto &constant = variant_set<SPIRConstant>(ids[id], type);
+		constant.self = id;
+		constant.make_null(constant_type);
+	}
+}
+
 } // namespace SPIRV_CROSS_NAMESPACE
diff --git a/spirv_cross_parsed_ir.hpp b/spirv_cross_parsed_ir.hpp
index 79e9e15bb05..7f35c3815cd 100644
--- a/spirv_cross_parsed_ir.hpp
+++ b/spirv_cross_parsed_ir.hpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2018-2019 Arm Limited
+ * Copyright 2018-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_CROSS_PARSED_IR_HPP
 #define SPIRV_CROSS_PARSED_IR_HPP
 
@@ -57,19 +64,26 @@ class ParsedIR
 	SmallVector<Variant> ids;
 
 	// Various meta data for IDs, decorations, names, etc.
-	std::unordered_map<uint32_t, Meta> meta;
+	std::unordered_map<ID, Meta> meta;
 
 	// Holds all IDs which have a certain type.
 	// This is needed so we can iterate through a specific kind of resource quickly,
 	// and in-order of module declaration.
-	SmallVector<uint32_t> ids_for_type[TypeCount];
+	SmallVector<ID> ids_for_type[TypeCount];
 
 	// Special purpose lists which contain a union of types.
 	// This is needed so we can declare specialization constants and structs in an interleaved fashion,
 	// among other things.
-	// Constants can be of struct type, and struct array sizes can use specialization constants.
-	SmallVector<uint32_t> ids_for_constant_or_type;
-	SmallVector<uint32_t> ids_for_constant_or_variable;
+	// Constants can be undef or of struct type, and struct array sizes can use specialization constants.
+	SmallVector<ID> ids_for_constant_undef_or_type;
+	SmallVector<ID> ids_for_constant_or_variable;
+
+	// We need to keep track of the width the Ops that contains a type for the
+	// OpSwitch instruction, since this one doesn't contains the type in the
+	// instruction itself. And in some case we need to cast the condition to
+	// wider types. We only need the width to do the branch fixup since the
+	// type check itself can be done at runtime
+	std::unordered_map<ID, uint32_t> load_type_width;
 
 	// Declared capabilities and extensions in the SPIR-V module.
 	// Not really used except for reflection at the moment.
@@ -88,12 +102,12 @@ class ParsedIR
 	};
 	using BlockMetaFlags = uint8_t;
 	SmallVector<BlockMetaFlags> block_meta;
-	std::unordered_map<uint32_t, uint32_t> continue_block_to_loop_header;
+	std::unordered_map<BlockID, BlockID> continue_block_to_loop_header;
 
 	// Normally, we'd stick SPIREntryPoint in ids array, but it conflicts with SPIRFunction.
 	// Entry points can therefore be seen as some sort of meta structure.
-	std::unordered_map<uint32_t, SPIREntryPoint> entry_points;
-	uint32_t default_entry_point = 0;
+	std::unordered_map<FunctionID, SPIREntryPoint> entry_points;
+	FunctionID default_entry_point = 0;
 
 	struct Source
 	{
@@ -114,50 +128,76 @@ class ParsedIR
 	// Can be useful for simple "raw" reflection.
 	// However, most members are here because the Parser needs most of these,
 	// and might as well just have the whole suite of decoration/name handling in one place.
-	void set_name(uint32_t id, const std::string &name);
-	const std::string &get_name(uint32_t id) const;
-	void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0);
-	void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument);
-	bool has_decoration(uint32_t id, spv::Decoration decoration) const;
-	uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const;
-	const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const;
-	const Bitset &get_decoration_bitset(uint32_t id) const;
-	void unset_decoration(uint32_t id, spv::Decoration decoration);
+	void set_name(ID id, const std::string &name);
+	const std::string &get_name(ID id) const;
+	void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0);
+	void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument);
+	bool has_decoration(ID id, spv::Decoration decoration) const;
+	uint32_t get_decoration(ID id, spv::Decoration decoration) const;
+	const std::string &get_decoration_string(ID id, spv::Decoration decoration) const;
+	const Bitset &get_decoration_bitset(ID id) const;
+	void unset_decoration(ID id, spv::Decoration decoration);
 
 	// Decoration handling methods (for members of a struct).
-	void set_member_name(uint32_t id, uint32_t index, const std::string &name);
-	const std::string &get_member_name(uint32_t id, uint32_t index) const;
-	void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0);
-	void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration,
+	void set_member_name(TypeID id, uint32_t index, const std::string &name);
+	const std::string &get_member_name(TypeID id, uint32_t index) const;
+	void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0);
+	void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration,
 	                                  const std::string &argument);
-	uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
-	const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const;
-	bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const;
-	const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const;
-	void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration);
+	uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const;
+	const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const;
+	bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const;
+	const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const;
+	void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration);
 
-	void mark_used_as_array_length(uint32_t id);
+	void mark_used_as_array_length(ID id);
 	uint32_t increase_bound_by(uint32_t count);
 	Bitset get_buffer_block_flags(const SPIRVariable &var) const;
+	Bitset get_buffer_block_type_flags(const SPIRType &type) const;
+
+	void add_typed_id(Types type, ID id);
+	void remove_typed_id(Types type, ID id);
+
+	class LoopLock
+	{
+	public:
+		explicit LoopLock(uint32_t *counter);
+		LoopLock(const LoopLock &) = delete;
+		void operator=(const LoopLock &) = delete;
+		LoopLock(LoopLock &&other) SPIRV_CROSS_NOEXCEPT;
+		LoopLock &operator=(LoopLock &&other) SPIRV_CROSS_NOEXCEPT;
+		~LoopLock();
+
+	private:
+		uint32_t *lock;
+	};
+
+	// This must be held while iterating over a type ID array.
+	// It is undefined if someone calls set<>() while we're iterating over a data structure, so we must
+	// make sure that this case is avoided.
 
-	void add_typed_id(Types type, uint32_t id);
-	void remove_typed_id(Types type, uint32_t id);
+	// If we have a hard lock, it is an error to call set<>(), and an exception is thrown.
+	// If we have a soft lock, we silently ignore any additions to the typed arrays.
+	// This should only be used for physical ID remapping where we need to create an ID, but we will never
+	// care about iterating over them.
+	LoopLock create_loop_hard_lock() const;
+	LoopLock create_loop_soft_lock() const;
 
 	template <typename T, typename Op>
 	void for_each_typed_id(const Op &op)
 	{
-		loop_iteration_depth++;
+		auto loop_lock = create_loop_hard_lock();
 		for (auto &id : ids_for_type[T::type])
 		{
 			if (ids[id].get_type() == static_cast<Types>(T::type))
 				op(id, get<T>(id));
 		}
-		loop_iteration_depth--;
 	}
 
 	template <typename T, typename Op>
 	void for_each_typed_id(const Op &op) const
 	{
+		auto loop_lock = create_loop_hard_lock();
 		for (auto &id : ids_for_type[T::type])
 		{
 			if (ids[id].get_type() == static_cast<Types>(T::type))
@@ -173,14 +213,24 @@ class ParsedIR
 
 	void reset_all_of_type(Types type);
 
-	Meta *find_meta(uint32_t id);
-	const Meta *find_meta(uint32_t id) const;
+	Meta *find_meta(ID id);
+	const Meta *find_meta(ID id) const;
 
 	const std::string &get_empty_string() const
 	{
 		return empty_string;
 	}
 
+	void make_constant_null(uint32_t id, uint32_t type, bool add_to_typed_id_set);
+
+	void fixup_reserved_names();
+
+	static void sanitize_underscores(std::string &str);
+	static void sanitize_identifier(std::string &str, bool member, bool allow_reserved_prefixes);
+	static bool is_globally_reserved_identifier(std::string &str, bool allow_reserved_prefixes);
+
+	uint32_t get_spirv_version() const;
+
 private:
 	template <typename T>
 	T &get(uint32_t id)
@@ -194,9 +244,12 @@ class ParsedIR
 		return variant_get<T>(ids[id]);
 	}
 
-	uint32_t loop_iteration_depth = 0;
+	mutable uint32_t loop_iteration_depth_hard = 0;
+	mutable uint32_t loop_iteration_depth_soft = 0;
 	std::string empty_string;
 	Bitset cleared_bitset;
+
+	std::unordered_set<uint32_t> meta_needing_name_fixup;
 };
 } // namespace SPIRV_CROSS_NAMESPACE
 
diff --git a/spirv_cross_util.cpp b/spirv_cross_util.cpp
index 6ab5d264568..7cff010d1c1 100644
--- a/spirv_cross_util.cpp
+++ b/spirv_cross_util.cpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2015-2019 Arm Limited
+ * Copyright 2015-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #include "spirv_cross_util.hpp"
 #include "spirv_common.hpp"
 
diff --git a/spirv_cross_util.hpp b/spirv_cross_util.hpp
index 7c4030b0b29..e6e3fcdb634 100644
--- a/spirv_cross_util.hpp
+++ b/spirv_cross_util.hpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2015-2019 Arm Limited
+ * Copyright 2015-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_CROSS_UTIL_HPP
 #define SPIRV_CROSS_UTIL_HPP
 
diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp
index 837135cb87e..4b22d47eaeb 100644
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2015-2019 Arm Limited
+ * Copyright 2015-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #include "spirv_glsl.hpp"
 #include "GLSL.std.450.h"
 #include "spirv_common.hpp"
@@ -33,6 +40,13 @@ using namespace spv;
 using namespace SPIRV_CROSS_NAMESPACE;
 using namespace std;
 
+enum ExtraSubExpressionType
+{
+	// Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map.
+	EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000,
+	EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000
+};
+
 static bool is_unsigned_opcode(Op op)
 {
 	// Don't have to be exhaustive, only relevant for legacy target checking ...
@@ -145,32 +159,6 @@ static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard
 	}
 }
 
-// Sanitizes underscores for GLSL where multiple underscores in a row are not allowed.
-string CompilerGLSL::sanitize_underscores(const string &str)
-{
-	string res;
-	res.reserve(str.size());
-
-	bool last_underscore = false;
-	for (auto c : str)
-	{
-		if (c == '_')
-		{
-			if (last_underscore)
-				continue;
-
-			res += c;
-			last_underscore = true;
-		}
-		else
-		{
-			res += c;
-			last_underscore = false;
-		}
-	}
-	return res;
-}
-
 void CompilerGLSL::init()
 {
 	if (ir.source.known)
@@ -286,7 +274,7 @@ static uint32_t pls_format_to_components(PlsFormat format)
 	}
 }
 
-static const char *vector_swizzle(int vecsize, int index)
+const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
 {
 	static const char *const swizzle[4][4] = {
 		{ ".x", ".y", ".z", ".w" },
@@ -308,8 +296,19 @@ static const char *vector_swizzle(int vecsize, int index)
 	return swizzle[vecsize - 1][index];
 }
 
-void CompilerGLSL::reset()
+void CompilerGLSL::reset(uint32_t iteration_count)
 {
+	// Sanity check the iteration count to be robust against a certain class of bugs where
+	// we keep forcing recompilations without making clear forward progress.
+	// In buggy situations we will loop forever, or loop for an unbounded number of iterations.
+	// Certain types of recompilations are considered to make forward progress,
+	// but in almost all situations, we'll never see more than 3 iterations.
+	// It is highly context-sensitive when we need to force recompilation,
+	// and it is not practical with the current architecture
+	// to resolve everything up front.
+	if (iteration_count >= options.force_recompile_max_debug_iterations && !is_force_recompile_forward_progress)
+		SPIRV_CROSS_THROW("Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!");
+
 	// We do some speculative optimizations which should pretty much always work out,
 	// but just in case the SPIR-V is rather weird, recompile until it's happy.
 	// This typically only means one extra pass.
@@ -317,11 +316,18 @@ void CompilerGLSL::reset()
 
 	// Clear invalid expression tracking.
 	invalid_expressions.clear();
+	composite_insert_overwritten.clear();
 	current_function = nullptr;
 
 	// Clear temporary usage tracking.
 	expression_usage_counts.clear();
 	forwarded_temporaries.clear();
+	suppressed_usage_tracking.clear();
+
+	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
+	flushed_phi_variables.clear();
+
+	current_emitting_switch_stack.clear();
 
 	reset_name_caches();
 
@@ -337,6 +343,7 @@ void CompilerGLSL::reset()
 
 	statement_count = 0;
 	indent = 0;
+	current_loop_level = 0;
 }
 
 void CompilerGLSL::remap_pls_variables()
@@ -366,6 +373,28 @@ void CompilerGLSL::remap_pls_variables()
 	}
 }
 
+void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent)
+{
+	subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location });
+	inout_color_attachments.push_back({ color_location, coherent });
+}
+
+bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const
+{
+	return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
+	                    [&](const std::pair<uint32_t, bool> &elem) {
+		                    return elem.first == location;
+	                    }) != end(inout_color_attachments);
+}
+
+bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const
+{
+	return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
+	                    [&](const std::pair<uint32_t, bool> &elem) {
+		                    return elem.first == location && !elem.second;
+	                    }) != end(inout_color_attachments);
+}
+
 void CompilerGLSL::find_static_extensions()
 {
 	ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
@@ -378,10 +407,9 @@ void CompilerGLSL::find_static_extensions()
 		}
 		else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
 		{
-			if (options.es)
-				SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
-			if (!options.es)
-				require_extension_internal("GL_ARB_gpu_shader_int64");
+			if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310.
+				SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310.");
+			require_extension_internal("GL_ARB_gpu_shader_int64");
 		}
 		else if (type.basetype == SPIRType::Half)
 		{
@@ -435,15 +463,47 @@ void CompilerGLSL::find_static_extensions()
 			require_extension_internal("GL_ARB_tessellation_shader");
 		break;
 
-	case ExecutionModelRayGenerationNV:
-	case ExecutionModelIntersectionNV:
-	case ExecutionModelAnyHitNV:
-	case ExecutionModelClosestHitNV:
-	case ExecutionModelMissNV:
-	case ExecutionModelCallableNV:
+	case ExecutionModelRayGenerationKHR:
+	case ExecutionModelIntersectionKHR:
+	case ExecutionModelAnyHitKHR:
+	case ExecutionModelClosestHitKHR:
+	case ExecutionModelMissKHR:
+	case ExecutionModelCallableKHR:
+		// NV enums are aliases.
 		if (options.es || options.version < 460)
 			SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
-		require_extension_internal("GL_NV_ray_tracing");
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
+
+		// Need to figure out if we should target KHR or NV extension based on capabilities.
+		for (auto &cap : ir.declared_capabilities)
+		{
+			if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR ||
+			    cap == CapabilityRayTraversalPrimitiveCullingKHR)
+			{
+				ray_tracing_is_khr = true;
+				break;
+			}
+		}
+
+		if (ray_tracing_is_khr)
+		{
+			// In KHR ray tracing we pass payloads by pointer instead of location,
+			// so make sure we assign locations properly.
+			ray_tracing_khr_fixup_locations();
+			require_extension_internal("GL_EXT_ray_tracing");
+		}
+		else
+			require_extension_internal("GL_NV_ray_tracing");
+		break;
+
+	case ExecutionModelMeshEXT:
+	case ExecutionModelTaskEXT:
+		if (options.es || options.version < 450)
+			SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above.");
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics.");
+		require_extension_internal("GL_EXT_mesh_shader");
 		break;
 
 	default:
@@ -451,7 +511,35 @@ void CompilerGLSL::find_static_extensions()
 	}
 
 	if (!pls_inputs.empty() || !pls_outputs.empty())
+	{
+		if (execution.model != ExecutionModelFragment)
+			SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
 		require_extension_internal("GL_EXT_shader_pixel_local_storage");
+	}
+
+	if (!inout_color_attachments.empty())
+	{
+		if (execution.model != ExecutionModelFragment)
+			SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
+		if (options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
+
+		bool has_coherent = false;
+		bool has_incoherent = false;
+
+		for (auto &att : inout_color_attachments)
+		{
+			if (att.second)
+				has_coherent = true;
+			else
+				has_incoherent = true;
+		}
+
+		if (has_coherent)
+			require_extension_internal("GL_EXT_shader_framebuffer_fetch");
+		if (has_incoherent)
+			require_extension_internal("GL_EXT_shader_framebuffer_fetch_non_coherent");
+	}
 
 	if (options.separate_shader_objects && !options.es && options.version < 410)
 		require_extension_internal("GL_ARB_separate_shader_objects");
@@ -471,38 +559,121 @@ void CompilerGLSL::find_static_extensions()
 		SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
 	}
 
-	// Check for nonuniform qualifier.
+	// Check for nonuniform qualifier and passthrough.
 	// Instead of looping over all decorations to find this, just look at capabilities.
 	for (auto &cap : ir.declared_capabilities)
 	{
-		bool nonuniform_indexing = false;
 		switch (cap)
 		{
 		case CapabilityShaderNonUniformEXT:
+			if (!options.vulkan_semantics)
+				require_extension_internal("GL_NV_gpu_shader5");
+			else
+				require_extension_internal("GL_EXT_nonuniform_qualifier");
+			break;
 		case CapabilityRuntimeDescriptorArrayEXT:
 			if (!options.vulkan_semantics)
 				SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
 			require_extension_internal("GL_EXT_nonuniform_qualifier");
-			nonuniform_indexing = true;
+			break;
+
+		case CapabilityGeometryShaderPassthroughNV:
+			if (execution.model == ExecutionModelGeometry)
+			{
+				require_extension_internal("GL_NV_geometry_shader_passthrough");
+				execution.geometry_passthrough = true;
+			}
+			break;
+
+		case CapabilityVariablePointers:
+		case CapabilityVariablePointersStorageBuffer:
+			SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
+
+		case CapabilityMultiView:
+			if (options.vulkan_semantics)
+				require_extension_internal("GL_EXT_multiview");
+			else
+			{
+				require_extension_internal("GL_OVR_multiview2");
+				if (options.ovr_multiview_view_count == 0)
+					SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2.");
+				if (get_execution_model() != ExecutionModelVertex)
+					SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
+			}
+			break;
+
+		case CapabilityRayQueryKHR:
+			if (options.es || options.version < 460 || !options.vulkan_semantics)
+				SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
+			require_extension_internal("GL_EXT_ray_query");
+			ray_tracing_is_khr = true;
+			break;
+
+		case CapabilityRayTraversalPrimitiveCullingKHR:
+			if (options.es || options.version < 460 || !options.vulkan_semantics)
+				SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
+			require_extension_internal("GL_EXT_ray_flags_primitive_culling");
+			ray_tracing_is_khr = true;
 			break;
 
 		default:
 			break;
 		}
+	}
 
-		if (nonuniform_indexing)
-			break;
+	if (options.ovr_multiview_view_count)
+	{
+		if (options.vulkan_semantics)
+			SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics.");
+		if (get_execution_model() != ExecutionModelVertex)
+			SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
+		require_extension_internal("GL_OVR_multiview2");
 	}
+
+	// KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR.
+	for (auto &ext : ir.declared_extensions)
+		if (ext == "SPV_NV_fragment_shader_barycentric")
+			barycentric_is_nv = true;
+}
+
+void CompilerGLSL::ray_tracing_khr_fixup_locations()
+{
+	uint32_t location = 0;
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+		// Incoming payload storage can also be used for tracing.
+		if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR &&
+		    var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR)
+			return;
+		if (is_hidden_variable(var))
+			return;
+		set_decoration(var.self, DecorationLocation, location++);
+	});
 }
 
 string CompilerGLSL::compile()
 {
-	if (options.vulkan_semantics)
-		backend.allow_precision_qualifiers = true;
+	ir.fixup_reserved_names();
+
+	if (!options.vulkan_semantics)
+	{
+		// only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
+		backend.nonuniform_qualifier = "";
+		backend.needs_row_major_load_workaround = options.enable_row_major_load_workaround;
+	}
+	backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
 	backend.force_gl_in_out_block = true;
 	backend.supports_extensions = true;
+	backend.use_array_constructor = true;
+	backend.workgroup_size_is_hidden = true;
+	backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics;
+	backend.support_precise_qualifier =
+			(!options.es && options.version >= 400) || (options.es && options.version >= 320);
+
+	if (is_legacy_es())
+		backend.support_case_fallthrough = false;
 
 	// Scan the SPIR-V to find trivial uses of extensions.
+	fixup_anonymous_struct_names();
 	fixup_type_alias();
 	reorder_type_alias();
 	build_function_control_flow_graphs_and_analyze();
@@ -510,6 +681,9 @@ string CompilerGLSL::compile()
 	fixup_image_load_store_access();
 	update_active_builtins();
 	analyze_image_and_sampler_usage();
+	analyze_interlocked_resource_usage();
+	if (!inout_color_attachments.empty())
+		emit_inout_fragment_outputs_copy_to_subpass_inputs();
 
 	// Shaders might cast unrelated data to pointers of non-block types.
 	// Find all such instances and make sure we can cast the pointers to a synthesized block type.
@@ -519,21 +693,32 @@ string CompilerGLSL::compile()
 	uint32_t pass_count = 0;
 	do
 	{
-		if (pass_count >= 3)
-			SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
-
-		reset();
+		reset(pass_count);
 
 		buffer.reset();
 
 		emit_header();
 		emit_resources();
+		emit_extension_workarounds(get_execution_model());
 
 		emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
 
 		pass_count++;
 	} while (is_forcing_recompilation());
 
+	// Implement the interlocked wrapper function at the end.
+	// The body was implemented in lieu of main().
+	if (interlocked_is_complex)
+	{
+		statement("void main()");
+		begin_scope();
+		statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
+		statement("SPIRV_Cross_beginInvocationInterlock();");
+		statement("spvMainInterlockedBody();");
+		statement("SPIRV_Cross_endInvocationInterlock();");
+		end_scope();
+	}
+
 	// Entry point in GLSL is always main().
 	get_entry_point().name = "main";
 
@@ -549,6 +734,8 @@ void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const Sp
                                         const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
 {
 	auto &execution = get_entry_point();
+	bool builtin_workgroup = execution.workgroup_size.constant != 0;
+	bool use_local_size_id = !builtin_workgroup && execution.flags.get(ExecutionModeLocalSizeId);
 
 	if (wg_x.id)
 	{
@@ -557,6 +744,8 @@ void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const Sp
 		else
 			arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
 	}
+	else if (use_local_size_id && execution.workgroup_size.id_x)
+		arguments.push_back(join("local_size_x = ", get<SPIRConstant>(execution.workgroup_size.id_x).scalar()));
 	else
 		arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
 
@@ -567,6 +756,8 @@ void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const Sp
 		else
 			arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
 	}
+	else if (use_local_size_id && execution.workgroup_size.id_y)
+		arguments.push_back(join("local_size_y = ", get<SPIRConstant>(execution.workgroup_size.id_y).scalar()));
 	else
 		arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
 
@@ -577,10 +768,27 @@ void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const Sp
 		else
 			arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
 	}
+	else if (use_local_size_id && execution.workgroup_size.id_z)
+		arguments.push_back(join("local_size_z = ", get<SPIRConstant>(execution.workgroup_size.id_z).scalar()));
 	else
 		arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
 }
 
+void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
+{
+	if (options.vulkan_semantics)
+	{
+		auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
+		require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension));
+	}
+	else
+	{
+		if (!shader_subgroup_supporter.is_feature_requested(feature))
+			force_recompile();
+		shader_subgroup_supporter.request_feature(feature);
+	}
+}
+
 void CompilerGLSL::emit_header()
 {
 	auto &execution = get_entry_point();
@@ -600,9 +808,48 @@ void CompilerGLSL::emit_header()
 			require_extension_internal("GL_ARB_shader_image_load_store");
 	}
 
+	// Needed for: layout(post_depth_coverage) in;
+	if (execution.flags.get(ExecutionModePostDepthCoverage))
+		require_extension_internal("GL_ARB_post_depth_coverage");
+
+	// Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
+	bool interlock_used = execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
+	                      execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
+	                      execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
+	                      execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT);
+
+	if (interlock_used)
+	{
+		if (options.es)
+		{
+			if (options.version < 310)
+				SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
+			require_extension_internal("GL_NV_fragment_shader_interlock");
+		}
+		else
+		{
+			if (options.version < 420)
+				require_extension_internal("GL_ARB_shader_image_load_store");
+			require_extension_internal("GL_ARB_fragment_shader_interlock");
+		}
+	}
+
 	for (auto &ext : forced_extensions)
 	{
-		if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
+		if (ext == "GL_ARB_gpu_shader_int64")
+		{
+			statement("#if defined(GL_ARB_gpu_shader_int64)");
+			statement("#extension GL_ARB_gpu_shader_int64 : require");
+			if (!options.vulkan_semantics || options.es)
+			{
+				statement("#elif defined(GL_NV_gpu_shader5)");
+				statement("#extension GL_NV_gpu_shader5 : require");
+			}
+			statement("#else");
+			statement("#error No extension available for 64-bit integers.");
+			statement("#endif");
+		}
+		else if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
 		{
 			// Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
 			// GL_AMD_gpu_shader_half_float is a superset, so try that first.
@@ -622,23 +869,135 @@ void CompilerGLSL::emit_header()
 			statement("#error No extension available for FP16.");
 			statement("#endif");
 		}
+		else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int8")
+		{
+			if (options.vulkan_semantics)
+				statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require");
+			else
+			{
+				statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int8)");
+				statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require");
+				statement("#elif defined(GL_NV_gpu_shader5)");
+				statement("#extension GL_NV_gpu_shader5 : require");
+				statement("#else");
+				statement("#error No extension available for Int8.");
+				statement("#endif");
+			}
+		}
 		else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
 		{
 			if (options.vulkan_semantics)
 				statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
 			else
 			{
-				statement("#if defined(GL_AMD_gpu_shader_int16)");
+				statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)");
+				statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
+				statement("#elif defined(GL_AMD_gpu_shader_int16)");
 				statement("#extension GL_AMD_gpu_shader_int16 : require");
+				statement("#elif defined(GL_NV_gpu_shader5)");
+				statement("#extension GL_NV_gpu_shader5 : require");
 				statement("#else");
 				statement("#error No extension available for Int16.");
 				statement("#endif");
 			}
 		}
+		else if (ext == "GL_ARB_post_depth_coverage")
+		{
+			if (options.es)
+				statement("#extension GL_EXT_post_depth_coverage : require");
+			else
+			{
+				statement("#if defined(GL_ARB_post_depth_coverge)");
+				statement("#extension GL_ARB_post_depth_coverage : require");
+				statement("#else");
+				statement("#extension GL_EXT_post_depth_coverage : require");
+				statement("#endif");
+			}
+		}
+		else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
+		{
+			// Soft-enable this extension on plain GLSL.
+			statement("#ifdef ", ext);
+			statement("#extension ", ext, " : enable");
+			statement("#endif");
+		}
+		else if (ext == "GL_EXT_control_flow_attributes")
+		{
+			// These are just hints so we can conditionally enable and fallback in the shader.
+			statement("#if defined(GL_EXT_control_flow_attributes)");
+			statement("#extension GL_EXT_control_flow_attributes : require");
+			statement("#define SPIRV_CROSS_FLATTEN [[flatten]]");
+			statement("#define SPIRV_CROSS_BRANCH [[dont_flatten]]");
+			statement("#define SPIRV_CROSS_UNROLL [[unroll]]");
+			statement("#define SPIRV_CROSS_LOOP [[dont_unroll]]");
+			statement("#else");
+			statement("#define SPIRV_CROSS_FLATTEN");
+			statement("#define SPIRV_CROSS_BRANCH");
+			statement("#define SPIRV_CROSS_UNROLL");
+			statement("#define SPIRV_CROSS_LOOP");
+			statement("#endif");
+		}
+		else if (ext == "GL_NV_fragment_shader_interlock")
+		{
+			statement("#extension GL_NV_fragment_shader_interlock : require");
+			statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()");
+			statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()");
+		}
+		else if (ext == "GL_ARB_fragment_shader_interlock")
+		{
+			statement("#ifdef GL_ARB_fragment_shader_interlock");
+			statement("#extension GL_ARB_fragment_shader_interlock : enable");
+			statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()");
+			statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()");
+			statement("#elif defined(GL_INTEL_fragment_shader_ordering)");
+			statement("#extension GL_INTEL_fragment_shader_ordering : enable");
+			statement("#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()");
+			statement("#define SPIRV_Cross_endInvocationInterlock()");
+			statement("#endif");
+		}
 		else
 			statement("#extension ", ext, " : require");
 	}
 
+	if (!options.vulkan_semantics)
+	{
+		using Supp = ShaderSubgroupSupportHelper;
+		auto result = shader_subgroup_supporter.resolve();
+
+		for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
+		{
+			auto feature = static_cast<Supp::Feature>(feature_index);
+			if (!shader_subgroup_supporter.is_feature_requested(feature))
+				continue;
+
+			auto exts = Supp::get_candidates_for_feature(feature, result);
+			if (exts.empty())
+				continue;
+
+			statement("");
+
+			for (auto &ext : exts)
+			{
+				const char *name = Supp::get_extension_name(ext);
+				const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext);
+				auto extra_names = Supp::get_extra_required_extension_names(ext);
+				statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")",
+				          (*extra_predicate != '\0' ? " && " : ""), extra_predicate);
+				for (const auto &e : extra_names)
+					statement("#extension ", e, " : enable");
+				statement("#extension ", name, " : require");
+			}
+
+			if (!Supp::can_feature_be_implemented_without_extensions(feature))
+			{
+				statement("#else");
+				statement("#error No extensions available to emulate requested subgroup feature.");
+			}
+
+			statement("#endif");
+		}
+	}
+
 	for (auto &header : header_lines)
 		statement(header);
 
@@ -647,8 +1006,11 @@ void CompilerGLSL::emit_header()
 
 	switch (execution.model)
 	{
+	case ExecutionModelVertex:
+		if (options.ovr_multiview_view_count)
+			inputs.push_back(join("num_views = ", options.ovr_multiview_view_count));
+		break;
 	case ExecutionModelGeometry:
-		outputs.push_back(join("max_vertices = ", execution.output_vertices));
 		if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
 			inputs.push_back(join("invocations = ", execution.invocations));
 		if (execution.flags.get(ExecutionModeInputPoints))
@@ -661,12 +1023,18 @@ void CompilerGLSL::emit_header()
 			inputs.push_back("triangles");
 		if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
 			inputs.push_back("triangles_adjacency");
-		if (execution.flags.get(ExecutionModeOutputTriangleStrip))
-			outputs.push_back("triangle_strip");
-		if (execution.flags.get(ExecutionModeOutputPoints))
-			outputs.push_back("points");
-		if (execution.flags.get(ExecutionModeOutputLineStrip))
-			outputs.push_back("line_strip");
+
+		if (!execution.geometry_passthrough)
+		{
+			// For passthrough, these are implies and cannot be declared in shader.
+			outputs.push_back(join("max_vertices = ", execution.output_vertices));
+			if (execution.flags.get(ExecutionModeOutputTriangleStrip))
+				outputs.push_back("triangle_strip");
+			if (execution.flags.get(ExecutionModeOutputPoints))
+				outputs.push_back("points");
+			if (execution.flags.get(ExecutionModeOutputLineStrip))
+				outputs.push_back("line_strip");
+		}
 		break;
 
 	case ExecutionModelTessellationControl:
@@ -701,15 +1069,18 @@ void CompilerGLSL::emit_header()
 		break;
 
 	case ExecutionModelGLCompute:
+	case ExecutionModelTaskEXT:
+	case ExecutionModelMeshEXT:
 	{
-		if (execution.workgroup_size.constant != 0)
+		if (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId))
 		{
 			SpecializationConstant wg_x, wg_y, wg_z;
 			get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
 
 			// If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
 			// declarations before we can emit the work group size.
-			if (options.vulkan_semantics || ((wg_x.id == 0) && (wg_y.id == 0) && (wg_z.id == 0)))
+			if (options.vulkan_semantics ||
+			    ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
 				build_workgroup_size(inputs, wg_x, wg_y, wg_z);
 		}
 		else
@@ -718,6 +1089,18 @@ void CompilerGLSL::emit_header()
 			inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
 			inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
 		}
+
+		if (execution.model == ExecutionModelMeshEXT)
+		{
+			outputs.push_back(join("max_vertices = ", execution.output_vertices));
+			outputs.push_back(join("max_primitives = ", execution.output_primitives));
+			if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
+				outputs.push_back("triangles");
+			else if (execution.flags.get(ExecutionModeOutputLinesEXT))
+				outputs.push_back("lines");
+			else if (execution.flags.get(ExecutionModeOutputPoints))
+				outputs.push_back("points");
+		}
 		break;
 	}
 
@@ -763,6 +1146,27 @@ void CompilerGLSL::emit_header()
 
 		if (execution.flags.get(ExecutionModeEarlyFragmentTests))
 			inputs.push_back("early_fragment_tests");
+		if (execution.flags.get(ExecutionModePostDepthCoverage))
+			inputs.push_back("post_depth_coverage");
+
+		if (interlock_used)
+			statement("#if defined(GL_ARB_fragment_shader_interlock)");
+
+		if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
+			statement("layout(pixel_interlock_ordered) in;");
+		else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
+			statement("layout(pixel_interlock_unordered) in;");
+		else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
+			statement("layout(sample_interlock_ordered) in;");
+		else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
+			statement("layout(sample_interlock_unordered) in;");
+
+		if (interlock_used)
+		{
+			statement("#elif !defined(GL_INTEL_fragment_shader_ordering)");
+			statement("#error Fragment Shader Interlock/Ordering extension missing!");
+			statement("#endif");
+		}
 
 		if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
 			statement("layout(depth_greater) out float gl_FragDepth;");
@@ -775,6 +1179,10 @@ void CompilerGLSL::emit_header()
 		break;
 	}
 
+	for (auto &cap : ir.declared_capabilities)
+		if (cap == CapabilityRayTraversalPrimitiveCullingKHR)
+			statement("layout(primitive_culling);");
+
 	if (!inputs.empty())
 		statement("layout(", merge(inputs), ") in;");
 	if (!outputs.empty())
@@ -795,7 +1203,8 @@ void CompilerGLSL::emit_struct(SPIRType &type)
 	// Type-punning with these types is legal, which complicates things
 	// when we are storing struct and array types in an SSBO for example.
 	// If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
-	if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked))
+	if (type.type_alias != TypeID(0) &&
+	    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
 		return;
 
 	add_resource_name(type.self);
@@ -823,6 +1232,9 @@ void CompilerGLSL::emit_struct(SPIRType &type)
 		emitted = true;
 	}
 
+	if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
+		emit_struct_padding_target(type);
+
 	end_scope_decl();
 
 	if (emitted)
@@ -846,8 +1258,33 @@ string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
 		res += "sample ";
 	if (flags.get(DecorationInvariant))
 		res += "invariant ";
+	if (flags.get(DecorationPerPrimitiveEXT))
+	    res += "perprimitiveEXT ";
+
 	if (flags.get(DecorationExplicitInterpAMD))
+	{
+		require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
 		res += "__explicitInterpAMD ";
+	}
+
+	if (flags.get(DecorationPerVertexKHR))
+	{
+		if (options.es && options.version < 320)
+			SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320.");
+		else if (!options.es && options.version < 450)
+			SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450.");
+
+		if (barycentric_is_nv)
+		{
+			require_extension_internal("GL_NV_fragment_shader_barycentric");
+			res += "pervertexNV ";
+		}
+		else
+		{
+			require_extension_internal("GL_EXT_fragment_shader_barycentric");
+			res += "pervertexEXT ";
+		}
+	}
 
 	return res;
 }
@@ -857,8 +1294,7 @@ string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
 	if (is_legacy())
 		return "";
 
-	bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
-	                ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
+	bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
 	if (!is_block)
 		return "";
 
@@ -869,6 +1305,9 @@ string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
 
 	SmallVector<string> attr;
 
+	if (has_member_decoration(type.self, index, DecorationPassthroughNV))
+		attr.push_back("passthrough");
+
 	// We can only apply layouts on members in block interfaces.
 	// This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
 	// This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
@@ -911,8 +1350,11 @@ string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
 
 	// SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
 	// This is only done selectively in GLSL as needed.
-	if (has_extended_decoration(type.self, SPIRVCrossDecorationPacked) && dec.decoration_flags.get(DecorationOffset))
+	if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
+	    dec.decoration_flags.get(DecorationOffset))
 		attr.push_back(join("offset = ", dec.offset));
+	else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset))
+		attr.push_back(join("xfb_offset = ", dec.offset));
 
 	if (attr.empty())
 		return "";
@@ -1160,24 +1602,22 @@ uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const B
 	auto &tmp = get<SPIRType>(parent);
 
 	uint32_t size = type_to_packed_size(tmp, flags, packing);
-	if (tmp.array.empty())
-	{
-		uint32_t alignment = type_to_packed_alignment(type, flags, packing);
-		return (size + alignment - 1) & ~(alignment - 1);
-	}
-	else
-	{
-		// For multidimensional arrays, array stride always matches size of subtype.
-		// The alignment cannot change because multidimensional arrays are basically N * M array elements.
-		return size;
-	}
+	uint32_t alignment = type_to_packed_alignment(type, flags, packing);
+	return (size + alignment - 1) & ~(alignment - 1);
 }
 
 uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
 {
 	if (!type.array.empty())
 	{
-		return to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
+		uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
+
+		// For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
+		// so that it is possible to pack other vectors into the last element.
+		if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
+			packed_size -= (4 - type.vecsize) * (type.width / 8);
+
+		return packed_size;
 	}
 
 	// If using PhysicalStorageBufferEXT storage class, this is a pointer,
@@ -1250,6 +1690,11 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &f
 				else
 					size = type.vecsize * type.columns * base_alignment;
 			}
+
+			// For matrices in HLSL, the last element has a size which depends on its vector size,
+			// so that it is possible to pack other vectors into the last element.
+			if (packing_is_hlsl(packing) && type.columns > 1)
+				size -= (4 - type.vecsize) * (type.width / 8);
 		}
 	}
 
@@ -1257,7 +1702,8 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &f
 }
 
 bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
-                                              uint32_t start_offset, uint32_t end_offset)
+                                              uint32_t *failed_validation_index, uint32_t start_offset,
+                                              uint32_t end_offset)
 {
 	// This is very tricky and error prone, but try to be exhaustive and correct here.
 	// SPIR-V doesn't directly say if we're using std430 or std140.
@@ -1301,7 +1747,7 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin
 		    is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
 
 		uint32_t packed_size = 0;
-		if (!member_can_be_unsized)
+		if (!member_can_be_unsized || packing_is_hlsl(packing))
 			packed_size = type_to_packed_size(memb_type, member_flags, packing);
 
 		// We only need to care about this if we have non-array types which can straddle the vec4 boundary.
@@ -1314,13 +1760,14 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin
 				packed_alignment = max(packed_alignment, 16u);
 		}
 
-		uint32_t alignment = max(packed_alignment, pad_alignment);
-		offset = (offset + alignment - 1) & ~(alignment - 1);
-
+		uint32_t actual_offset = type_struct_member_offset(type, i);
 		// Field is not in the specified range anymore and we can ignore any further fields.
-		if (offset >= end_offset)
+		if (actual_offset >= end_offset)
 			break;
 
+		uint32_t alignment = max(packed_alignment, pad_alignment);
+		offset = (offset + alignment - 1) & ~(alignment - 1);
+
 		// The next member following a struct member is aligned to the base alignment of the struct that came before.
 		// GL 4.5 spec, 7.6.2.2.
 		if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
@@ -1329,21 +1776,35 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin
 			pad_alignment = 1;
 
 		// Only care about packing if we are in the given range
-		if (offset >= start_offset)
+		if (actual_offset >= start_offset)
 		{
 			// We only care about offsets in std140, std430, etc ...
 			// For EnhancedLayout variants, we have the flexibility to choose our own offsets.
 			if (!packing_has_flexible_offset(packing))
 			{
-				uint32_t actual_offset = type_struct_member_offset(type, i);
 				if (actual_offset != offset) // This cannot be the packing we're looking for.
+				{
+					if (failed_validation_index)
+						*failed_validation_index = i;
 					return false;
+				}
+			}
+			else if ((actual_offset & (alignment - 1)) != 0)
+			{
+				// We still need to verify that alignment rules are observed, even if we have explicit offset.
+				if (failed_validation_index)
+					*failed_validation_index = i;
+				return false;
 			}
 
 			// Verify array stride rules.
 			if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
 			                                    type_struct_member_array_stride(type, i))
+			{
+				if (failed_validation_index)
+					*failed_validation_index = i;
 				return false;
+			}
 
 			// Verify that sub-structs also follow packing rules.
 			// We cannot use enhanced layouts on substructs, so they better be up to spec.
@@ -1352,12 +1813,14 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin
 			if (!memb_type.pointer && !memb_type.member_types.empty() &&
 			    !buffer_is_packing_standard(memb_type, substruct_packing))
 			{
+				if (failed_validation_index)
+					*failed_validation_index = i;
 				return false;
 			}
 		}
 
 		// Bump size.
-		offset += packed_size;
+		offset = actual_offset + packed_size;
 	}
 
 	return true;
@@ -1408,17 +1871,22 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 	if (is_legacy())
 		return "";
 
+	if (subpass_input_is_framebuffer_fetch(var.self))
+		return "";
+
 	SmallVector<string> attr;
 
-	auto &dec = ir.meta[var.self].decoration;
 	auto &type = get<SPIRType>(var.basetype);
-	auto &flags = dec.decoration_flags;
-	auto typeflags = ir.meta[type.self].decoration.decoration_flags;
+	auto &flags = get_decoration_bitset(var.self);
+	auto &typeflags = get_decoration_bitset(type.self);
+
+	if (flags.get(DecorationPassthroughNV))
+		attr.push_back("passthrough");
 
 	if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
 		attr.push_back("push_constant");
-	else if (var.storage == StorageClassShaderRecordBufferNV)
-		attr.push_back("shaderRecordNV");
+	else if (var.storage == StorageClassShaderRecordBufferKHR)
+		attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
 
 	if (flags.get(DecorationRowMajor))
 		attr.push_back("row_major");
@@ -1428,7 +1896,7 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 	if (options.vulkan_semantics)
 	{
 		if (flags.get(DecorationInputAttachmentIndex))
-			attr.push_back(join("input_attachment_index = ", dec.input_attachment));
+			attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex)));
 	}
 
 	bool is_block = has_decoration(type.self, DecorationBlock);
@@ -1441,37 +1909,153 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 		// If our members have location decorations, we don't need to
 		// emit location decorations at the top as well (looks weird).
 		if (!combined_decoration.get(DecorationLocation))
-			attr.push_back(join("location = ", dec.location));
+			attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation)));
 	}
 
-	// Can only declare Component if we can declare location.
-	if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
+	if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput &&
+	    location_is_non_coherent_framebuffer_fetch(get_decoration(var.self, DecorationLocation)))
 	{
-		if (!options.es)
-		{
-			if (options.version < 440 && options.version >= 140)
-				require_extension_internal("GL_ARB_enhanced_layouts");
-			else if (options.version < 140)
-				SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
-			attr.push_back(join("component = ", dec.component));
-		}
-		else
-			SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
+		attr.push_back("noncoherent");
+	}
+
+	// Transform feedback
+	bool uses_enhanced_layouts = false;
+	if (is_block && var.storage == StorageClassOutput)
+	{
+		// For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
+		// since all members must match the same xfb_buffer. The only thing we will declare for members of the block
+		// is the xfb_offset.
+		uint32_t member_count = uint32_t(type.member_types.size());
+		bool have_xfb_buffer_stride = false;
+		bool have_any_xfb_offset = false;
+		bool have_geom_stream = false;
+		uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
+
+		if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride))
+		{
+			have_xfb_buffer_stride = true;
+			xfb_buffer = get_decoration(var.self, DecorationXfbBuffer);
+			xfb_stride = get_decoration(var.self, DecorationXfbStride);
+		}
+
+		if (flags.get(DecorationStream))
+		{
+			have_geom_stream = true;
+			geom_stream = get_decoration(var.self, DecorationStream);
+		}
+
+		// Verify that none of the members violate our assumption.
+		for (uint32_t i = 0; i < member_count; i++)
+		{
+			if (has_member_decoration(type.self, i, DecorationStream))
+			{
+				uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream);
+				if (have_geom_stream && member_geom_stream != geom_stream)
+					SPIRV_CROSS_THROW("IO block member Stream mismatch.");
+				have_geom_stream = true;
+				geom_stream = member_geom_stream;
+			}
+
+			// Only members with an Offset decoration participate in XFB.
+			if (!has_member_decoration(type.self, i, DecorationOffset))
+				continue;
+			have_any_xfb_offset = true;
+
+			if (has_member_decoration(type.self, i, DecorationXfbBuffer))
+			{
+				uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer);
+				if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
+					SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
+				have_xfb_buffer_stride = true;
+				xfb_buffer = buffer_index;
+			}
+
+			if (has_member_decoration(type.self, i, DecorationXfbStride))
+			{
+				uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride);
+				if (have_xfb_buffer_stride && stride != xfb_stride)
+					SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
+				have_xfb_buffer_stride = true;
+				xfb_stride = stride;
+			}
+		}
+
+		if (have_xfb_buffer_stride && have_any_xfb_offset)
+		{
+			attr.push_back(join("xfb_buffer = ", xfb_buffer));
+			attr.push_back(join("xfb_stride = ", xfb_stride));
+			uses_enhanced_layouts = true;
+		}
+
+		if (have_geom_stream)
+		{
+			if (get_execution_model() != ExecutionModelGeometry)
+				SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
+			if (options.es)
+				SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
+			if (options.version < 400)
+				require_extension_internal("GL_ARB_transform_feedback3");
+			attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
+		}
+	}
+	else if (var.storage == StorageClassOutput)
+	{
+		if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset))
+		{
+			// XFB for standalone variables, we can emit all decorations.
+			attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer)));
+			attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride)));
+			attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset)));
+			uses_enhanced_layouts = true;
+		}
+
+		if (flags.get(DecorationStream))
+		{
+			if (get_execution_model() != ExecutionModelGeometry)
+				SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
+			if (options.es)
+				SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
+			if (options.version < 400)
+				require_extension_internal("GL_ARB_transform_feedback3");
+			attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
+		}
+	}
+
+	// Can only declare Component if we can declare location.
+	if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
+	{
+		uses_enhanced_layouts = true;
+		attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent)));
+	}
+
+	if (uses_enhanced_layouts)
+	{
+		if (!options.es)
+		{
+			if (options.version < 440 && options.version >= 140)
+				require_extension_internal("GL_ARB_enhanced_layouts");
+			else if (options.version < 140)
+				SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
+			if (!options.es && options.version < 440)
+				require_extension_internal("GL_ARB_enhanced_layouts");
+		}
+		else if (options.es)
+			SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
 	}
 
 	if (flags.get(DecorationIndex))
-		attr.push_back(join("index = ", dec.index));
+		attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex)));
 
 	// Do not emit set = decoration in regular GLSL output, but
 	// we need to preserve it in Vulkan GLSL mode.
-	if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferNV)
+	if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
 	{
 		if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
-			attr.push_back(join("set = ", dec.set));
+			attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet)));
 	}
 
 	bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
-	bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV ||
+	bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
 	                  (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
 	bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
 	bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
@@ -1493,14 +2077,14 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
 	if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
 		can_use_binding = false;
 
-	if (var.storage == StorageClassShaderRecordBufferNV)
+	if (var.storage == StorageClassShaderRecordBufferKHR)
 		can_use_binding = false;
 
 	if (can_use_binding && flags.get(DecorationBinding))
-		attr.push_back(join("binding = ", dec.binding));
+		attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding)));
 
-	if (flags.get(DecorationOffset))
-		attr.push_back(join("offset = ", dec.offset));
+	if (var.storage != StorageClassOutput && flags.get(DecorationOffset))
+		attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset)));
 
 	// Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
 	// If SPIR-V does not comply with either layout, we cannot really work around it.
@@ -1551,7 +2135,7 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool suppo
 		if (!options.es && !options.vulkan_semantics && options.version < 440)
 			require_extension_internal("GL_ARB_enhanced_layouts");
 
-		set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
 		return "std430";
 	}
 	else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
@@ -1565,12 +2149,12 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool suppo
 		if (!options.es && !options.vulkan_semantics && options.version < 440)
 			require_extension_internal("GL_ARB_enhanced_layouts");
 
-		set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
 		return "std140";
 	}
 	else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
 	{
-		set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
 		require_extension_internal("GL_EXT_scalar_block_layout");
 		return "scalar";
 	}
@@ -1585,7 +2169,7 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool suppo
 	         buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
 	{
 		// UBOs can support std430 with GL_EXT_scalar_block_layout.
-		set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+		set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
 		require_extension_internal("GL_EXT_scalar_block_layout");
 		return "std430";
 	}
@@ -1618,9 +2202,8 @@ void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
 	// OpenGL has no concept of push constant blocks, implement it as a uniform struct.
 	auto &type = get<SPIRType>(var.basetype);
 
-	auto &flags = ir.meta[var.self].decoration.decoration_flags;
-	flags.clear(DecorationBinding);
-	flags.clear(DecorationDescriptorSet);
+	unset_decoration(var.self, DecorationBinding);
+	unset_decoration(var.self, DecorationDescriptorSet);
 
 #if 0
     if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
@@ -1630,14 +2213,13 @@ void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
 
 	// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
 	// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
-	auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
-	bool block_flag = block_flags.get(DecorationBlock);
-	block_flags.clear(DecorationBlock);
+	bool block_flag = has_decoration(type.self, DecorationBlock);
+	unset_decoration(type.self, DecorationBlock);
 
 	emit_struct(type);
 
 	if (block_flag)
-		block_flags.set(DecorationBlock);
+		set_decoration(type.self, DecorationBlock);
 
 	emit_uniform(var);
 	statement("");
@@ -1677,8 +2259,9 @@ void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
 	statement("");
 }
 
-void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_declaration)
+void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration)
 {
+	auto &type = get<SPIRType>(type_id);
 	string buffer_name;
 
 	if (forward_declaration)
@@ -1710,6 +2293,9 @@ void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_decl
 
 		block_names.insert(buffer_name);
 		block_ssbo_names.insert(buffer_name);
+
+		// Ensure we emit the correct name when emitting non-forward pointer type.
+		ir.meta[type.self].decoration.alias = buffer_name;
 	}
 	else if (type.basetype != SPIRType::Struct)
 		buffer_name = type_to_glsl(type);
@@ -1718,8 +2304,34 @@ void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_decl
 
 	if (!forward_declaration)
 	{
+		auto itr = physical_storage_type_to_alignment.find(type_id);
+		uint32_t alignment = 0;
+		if (itr != physical_storage_type_to_alignment.end())
+			alignment = itr->second.alignment;
+
 		if (type.basetype == SPIRType::Struct)
-			statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true), ") buffer ", buffer_name);
+		{
+			SmallVector<std::string> attributes;
+			attributes.push_back("buffer_reference");
+			if (alignment)
+				attributes.push_back(join("buffer_reference_align = ", alignment));
+			attributes.push_back(buffer_to_packing_standard(type, true));
+
+			auto flags = ir.get_buffer_block_type_flags(type);
+			string decorations;
+			if (flags.get(DecorationRestrict))
+				decorations += " restrict";
+			if (flags.get(DecorationCoherent))
+				decorations += " coherent";
+			if (flags.get(DecorationNonReadable))
+				decorations += " writeonly";
+			if (flags.get(DecorationNonWritable))
+				decorations += " readonly";
+
+			statement("layout(", merge(attributes), ")", decorations, " buffer ", buffer_name);
+		}
+		else if (alignment)
+			statement("layout(buffer_reference, buffer_reference_align = ", alignment, ") buffer ", buffer_name);
 		else
 			statement("layout(buffer_reference) buffer ", buffer_name);
 
@@ -1757,7 +2369,7 @@ void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
 	auto &type = get<SPIRType>(var.basetype);
 
 	Bitset flags = ir.get_buffer_block_flags(var);
-	bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV ||
+	bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
 	            ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
 	bool is_restrict = ssbo && flags.get(DecorationRestrict);
 	bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
@@ -1848,12 +2460,24 @@ const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
 {
 	auto &execution = get_entry_point();
 
+	if (subpass_input_is_framebuffer_fetch(var.self))
+		return "";
+
 	if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
 	{
 		if (is_legacy() && execution.model == ExecutionModelVertex)
 			return var.storage == StorageClassInput ? "attribute " : "varying ";
 		else if (is_legacy() && execution.model == ExecutionModelFragment)
 			return "varying "; // Fragment outputs are renamed so they never hit this case.
+		else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
+		{
+			uint32_t loc = get_decoration(var.self, DecorationLocation);
+			bool is_inout = location_is_framebuffer_fetch(loc);
+			if (is_inout)
+				return "inout ";
+			else
+				return "out ";
+		}
 		else
 			return var.storage == StorageClassInput ? "in " : "out ";
 	}
@@ -1862,74 +2486,143 @@ const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
 	{
 		return "uniform ";
 	}
-	else if (var.storage == StorageClassRayPayloadNV)
+	else if (var.storage == StorageClassRayPayloadKHR)
 	{
-		return "rayPayloadNV ";
+		return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
 	}
-	else if (var.storage == StorageClassIncomingRayPayloadNV)
+	else if (var.storage == StorageClassIncomingRayPayloadKHR)
 	{
-		return "rayPayloadInNV ";
+		return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
 	}
-	else if (var.storage == StorageClassHitAttributeNV)
+	else if (var.storage == StorageClassHitAttributeKHR)
 	{
-		return "hitAttributeNV ";
+		return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
 	}
-	else if (var.storage == StorageClassCallableDataNV)
+	else if (var.storage == StorageClassCallableDataKHR)
 	{
-		return "callableDataNV ";
+		return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
 	}
-	else if (var.storage == StorageClassIncomingCallableDataNV)
+	else if (var.storage == StorageClassIncomingCallableDataKHR)
 	{
-		return "callableDataInNV ";
+		return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
 	}
 
 	return "";
 }
 
+void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
+                                                  const SmallVector<uint32_t> &indices)
+{
+	uint32_t member_type_id = type.self;
+	const SPIRType *member_type = &type;
+	const SPIRType *parent_type = nullptr;
+	auto flattened_name = basename;
+	for (auto &index : indices)
+	{
+		flattened_name += "_";
+		flattened_name += to_member_name(*member_type, index);
+		parent_type = member_type;
+		member_type_id = member_type->member_types[index];
+		member_type = &get<SPIRType>(member_type_id);
+	}
+
+	assert(member_type->basetype != SPIRType::Struct);
+
+	// We're overriding struct member names, so ensure we do so on the primary type.
+	if (parent_type->type_alias)
+		parent_type = &get<SPIRType>(parent_type->type_alias);
+
+	// Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
+	// which is not allowed.
+	ParsedIR::sanitize_underscores(flattened_name);
+
+	uint32_t last_index = indices.back();
+
+	// Pass in the varying qualifier here so it will appear in the correct declaration order.
+	// Replace member name while emitting it so it encodes both struct name and member name.
+	auto backup_name = get_member_name(parent_type->self, last_index);
+	auto member_name = to_member_name(*parent_type, last_index);
+	set_member_name(parent_type->self, last_index, flattened_name);
+	emit_struct_member(*parent_type, member_type_id, last_index, qual);
+	// Restore member name.
+	set_member_name(parent_type->self, last_index, member_name);
+}
+
+void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
+                                                  const SmallVector<uint32_t> &indices)
+{
+	auto sub_indices = indices;
+	sub_indices.push_back(0);
+
+	const SPIRType *member_type = &type;
+	for (auto &index : indices)
+		member_type = &get<SPIRType>(member_type->member_types[index]);
+
+	assert(member_type->basetype == SPIRType::Struct);
+
+	if (!member_type->array.empty())
+		SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
+
+	for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
+	{
+		sub_indices.back() = i;
+		if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
+			emit_flattened_io_block_struct(basename, type, qual, sub_indices);
+		else
+			emit_flattened_io_block_member(basename, type, qual, sub_indices);
+	}
+}
+
 void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
 {
-	auto &type = get<SPIRType>(var.basetype);
-	if (!type.array.empty())
+	auto &var_type = get<SPIRType>(var.basetype);
+	if (!var_type.array.empty())
 		SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
 
+	// Emit flattened types based on the type alias. Normally, we are never supposed to emit
+	// struct declarations for aliased types.
+	auto &type = var_type.type_alias ? get<SPIRType>(var_type.type_alias) : var_type;
+
 	auto old_flags = ir.meta[type.self].decoration.decoration_flags;
 	// Emit the members as if they are part of a block to get all qualifiers.
 	ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
 
 	type.member_name_cache.clear();
 
+	SmallVector<uint32_t> member_indices;
+	member_indices.push_back(0);
+	auto basename = to_name(var.self);
+
 	uint32_t i = 0;
 	for (auto &member : type.member_types)
 	{
 		add_member_name(type, i);
 		auto &membertype = get<SPIRType>(member);
 
+		member_indices.back() = i;
 		if (membertype.basetype == SPIRType::Struct)
-			SPIRV_CROSS_THROW("Cannot flatten struct inside structs in I/O variables.");
-
-		// Pass in the varying qualifier here so it will appear in the correct declaration order.
-		// Replace member name while emitting it so it encodes both struct name and member name.
-		// Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
-		// which is not allowed.
-		auto backup_name = get_member_name(type.self, i);
-		auto member_name = to_member_name(type, i);
-		set_member_name(type.self, i, sanitize_underscores(join(to_name(var.self), "_", member_name)));
-		emit_struct_member(type, member, i, qual);
-		// Restore member name.
-		set_member_name(type.self, i, member_name);
+			emit_flattened_io_block_struct(basename, type, qual, member_indices);
+		else
+			emit_flattened_io_block_member(basename, type, qual, member_indices);
 		i++;
 	}
 
 	ir.meta[type.self].decoration.decoration_flags = old_flags;
 
-	// Treat this variable as flattened from now on.
-	flattened_structs.insert(var.self);
+	// Treat this variable as fully flattened from now on.
+	flattened_structs[var.self] = true;
 }
 
 void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
 {
 	auto &type = get<SPIRType>(var.basetype);
 
+	if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
+	    !options.es && options.version < 410)
+	{
+		require_extension_internal("GL_ARB_vertex_attrib_64bit");
+	}
+
 	// Either make it plain in/out or in/out blocks depending on what shader is doing ...
 	bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
 	const char *qual = to_storage_qualifiers_glsl(var);
@@ -1939,7 +2632,8 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
 		// ESSL earlier than 310 and GLSL earlier than 150 did not support
 		// I/O variables which are struct types.
 		// To support this, flatten the struct into separate varyings instead.
-		if ((options.es && options.version < 310) || (!options.es && options.version < 150))
+		if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
+		    (!options.es && options.version < 150))
 		{
 			// I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
 			// On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
@@ -1954,6 +2648,9 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
 					require_extension_internal("GL_EXT_shader_io_blocks");
 			}
 
+			// Workaround to make sure we can emit "patch in/out" correctly.
+			fixup_io_block_patch_primitive_qualifiers(var);
+
 			// Block names should never alias.
 			auto block_name = to_name(type.self, false);
 
@@ -1975,7 +2672,15 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
 			// Instance names cannot alias block names.
 			resource_names.insert(block_name);
 
-			statement(layout_for_variable(var), qual, block_name);
+			const char *block_qualifier;
+			if (has_decoration(var.self, DecorationPatch))
+				block_qualifier = "patch ";
+			else if (has_decoration(var.self, DecorationPerPrimitiveEXT))
+				block_qualifier = "perprimitiveEXT ";
+			else
+				block_qualifier = "";
+
+			statement(layout_for_variable(var), block_qualifier, qual, block_name);
 			begin_scope();
 
 			type.member_name_cache.clear();
@@ -1999,22 +2704,38 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
 		// I/O variables which are struct types.
 		// To support this, flatten the struct into separate varyings instead.
 		if (type.basetype == SPIRType::Struct &&
-		    ((options.es && options.version < 310) || (!options.es && options.version < 150)))
+		    (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
+		     (!options.es && options.version < 150)))
 		{
 			emit_flattened_io_block(var, qual);
 		}
 		else
 		{
 			add_resource_name(var.self);
+
+			// Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
+			// Opt for unsized as it's the more "correct" variant to use.
+			bool control_point_input_array = type.storage == StorageClassInput && !type.array.empty() &&
+			                                 !has_decoration(var.self, DecorationPatch) &&
+			                                 (get_entry_point().model == ExecutionModelTessellationControl ||
+			                                  get_entry_point().model == ExecutionModelTessellationEvaluation);
+
+			uint32_t old_array_size = 0;
+			bool old_array_size_literal = true;
+
+			if (control_point_input_array)
+			{
+				swap(type.array.back(), old_array_size);
+				swap(type.array_size_literal.back(), old_array_size_literal);
+			}
+
 			statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
 			          variable_decl(type, to_name(var.self), var.self), ";");
 
-			// If a StorageClassOutput variable has an initializer, we need to initialize it in main().
-			if (var.storage == StorageClassOutput && var.initializer)
+			if (control_point_input_array)
 			{
-				auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
-				entry_func.fixup_hooks_in.push_back(
-				    [&]() { statement(to_name(var.self), " = ", to_expression(var.initializer), ";"); });
+				swap(type.array.back(), old_array_size);
+				swap(type.array_size_literal.back(), old_array_size_literal);
 			}
 		}
 	}
@@ -2023,7 +2744,7 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
 void CompilerGLSL::emit_uniform(const SPIRVariable &var)
 {
 	auto &type = get<SPIRType>(var.basetype);
-	if (type.basetype == SPIRType::Image && type.image.sampled == 2)
+	if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
 	{
 		if (!options.es && options.version < 420)
 			require_extension_internal("GL_ARB_shader_image_load_store");
@@ -2043,17 +2764,37 @@ string CompilerGLSL::constant_value_macro_name(uint32_t id)
 void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
 {
 	auto &type = get<SPIRType>(constant.basetype);
+	add_resource_name(constant.self);
 	auto name = to_name(constant.self);
 	statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
 }
 
+int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const
+{
+	auto &entry_point = get_entry_point();
+	int index = -1;
+
+	// Need to redirect specialization constants which are used as WorkGroupSize to the builtin,
+	// since the spec constant declarations are never explicitly declared.
+	if (entry_point.workgroup_size.constant == 0 && entry_point.flags.get(ExecutionModeLocalSizeId))
+	{
+		if (c.self == entry_point.workgroup_size.id_x)
+			index = 0;
+		else if (c.self == entry_point.workgroup_size.id_y)
+			index = 1;
+		else if (c.self == entry_point.workgroup_size.id_z)
+			index = 2;
+	}
+
+	return index;
+}
+
 void CompilerGLSL::emit_constant(const SPIRConstant &constant)
 {
 	auto &type = get<SPIRType>(constant.constant_type);
-	auto name = to_name(constant.self);
 
 	SpecializationConstant wg_x, wg_y, wg_z;
-	uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
+	ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
 
 	// This specialization constant is implicitly declared by emitting layout() in;
 	if (constant.self == workgroup_size_id)
@@ -2062,7 +2803,8 @@ void CompilerGLSL::emit_constant(const SPIRConstant &constant)
 	// These specialization constants are implicitly declared by emitting layout() in;
 	// In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
 	// later can use macro overrides for work group size.
-	bool is_workgroup_size_constant = constant.self == wg_x.id || constant.self == wg_y.id || constant.self == wg_z.id;
+	bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
+	                                  ConstantID(constant.self) == wg_z.id;
 
 	if (options.vulkan_semantics && is_workgroup_size_constant)
 	{
@@ -2076,6 +2818,9 @@ void CompilerGLSL::emit_constant(const SPIRConstant &constant)
 		return;
 	}
 
+	add_resource_name(constant.self);
+	auto name = to_name(constant.self);
+
 	// Only scalars have constant IDs.
 	if (has_decoration(constant.self, DecorationSpecId))
 	{
@@ -2106,6 +2851,46 @@ void CompilerGLSL::emit_entry_point_declarations()
 {
 }
 
+void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
+{
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
+		if (is_hidden_variable(var))
+			return;
+
+		auto *meta = ir.find_meta(var.self);
+		if (!meta)
+			return;
+
+		auto &m = meta->decoration;
+		if (keywords.find(m.alias) != end(keywords))
+			m.alias = join("_", m.alias);
+	});
+
+	ir.for_each_typed_id<SPIRFunction>([&](uint32_t, const SPIRFunction &func) {
+		auto *meta = ir.find_meta(func.self);
+		if (!meta)
+			return;
+
+		auto &m = meta->decoration;
+		if (keywords.find(m.alias) != end(keywords))
+			m.alias = join("_", m.alias);
+	});
+
+	ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
+		auto *meta = ir.find_meta(type.self);
+		if (!meta)
+			return;
+
+		auto &m = meta->decoration;
+		if (keywords.find(m.alias) != end(keywords))
+			m.alias = join("_", m.alias);
+
+		for (auto &memb : meta->members)
+			if (keywords.find(memb.alias) != end(keywords))
+				memb.alias = join("_", memb.alias);
+	});
+}
+
 void CompilerGLSL::replace_illegal_names()
 {
 	// clang-format off
@@ -2160,14 +2945,7 @@ void CompilerGLSL::replace_illegal_names()
 	};
 	// clang-format on
 
-	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
-		if (!is_hidden_variable(var))
-		{
-			auto &m = ir.meta[var.self].decoration;
-			if (m.alias.compare(0, 3, "gl_") == 0 || keywords.find(m.alias) != end(keywords))
-				m.alias = join("_", m.alias);
-		}
-	});
+	replace_illegal_names(keywords);
 }
 
 void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
@@ -2274,19 +3052,124 @@ void CompilerGLSL::emit_pls()
 
 void CompilerGLSL::fixup_image_load_store_access()
 {
+	if (!options.enable_storage_image_qualifier_deduction)
+		return;
+
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
 		auto &vartype = expression_type(var);
-		if (vartype.basetype == SPIRType::Image)
+		if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
 		{
-			// Older glslangValidator does not emit required qualifiers here.
+			// Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
 			// Solve this by making the image access as restricted as possible and loosen up if we need to.
 			// If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
 
-			auto &flags = ir.meta[var].decoration.decoration_flags;
-			if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable))
+			if (!has_decoration(var, DecorationNonWritable) && !has_decoration(var, DecorationNonReadable))
+			{
+				set_decoration(var, DecorationNonWritable);
+				set_decoration(var, DecorationNonReadable);
+			}
+		}
+	});
+}
+
+static bool is_block_builtin(BuiltIn builtin)
+{
+	return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
+	       builtin == BuiltInCullDistance;
+}
+
+bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
+{
+	// If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
+
+	if (storage != StorageClassOutput)
+		return false;
+	bool should_force = false;
+
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+		if (should_force)
+			return;
+
+		auto &type = this->get<SPIRType>(var.basetype);
+		bool block = has_decoration(type.self, DecorationBlock);
+		if (var.storage == storage && block && is_builtin_variable(var))
+		{
+			uint32_t member_count = uint32_t(type.member_types.size());
+			for (uint32_t i = 0; i < member_count; i++)
+			{
+				if (has_member_decoration(type.self, i, DecorationBuiltIn) &&
+				    is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) &&
+				    has_member_decoration(type.self, i, DecorationOffset))
+				{
+					should_force = true;
+				}
+			}
+		}
+		else if (var.storage == storage && !block && is_builtin_variable(var))
+		{
+			if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) &&
+			    has_decoration(var.self, DecorationOffset))
+			{
+				should_force = true;
+			}
+		}
+	});
+
+	// If we're declaring clip/cull planes with control points we need to force block declaration.
+	if ((get_execution_model() == ExecutionModelTessellationControl ||
+	     get_execution_model() == ExecutionModelMeshEXT) &&
+	    (clip_distance_count || cull_distance_count))
+	{
+		should_force = true;
+	}
+
+	return should_force;
+}
+
+void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model)
+{
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+		auto &type = this->get<SPIRType>(var.basetype);
+		bool block = has_decoration(type.self, DecorationBlock);
+		if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
+		    is_builtin_variable(var))
+		{
+			if (model != ExecutionModelMeshEXT)
+			{
+				// Make sure the array has a supported name in the code.
+				if (var.storage == StorageClassOutput)
+					set_name(var.self, "gl_out");
+				else if (var.storage == StorageClassInput)
+					set_name(var.self, "gl_in");
+			}
+			else
+			{
+				auto flags = get_buffer_block_flags(var.self);
+				if (flags.get(DecorationPerPrimitiveEXT))
+				{
+					set_name(var.self, "gl_MeshPrimitivesEXT");
+					set_name(type.self, "gl_MeshPerPrimitiveEXT");
+				}
+				else
+				{
+					set_name(var.self, "gl_MeshVerticesEXT");
+					set_name(type.self, "gl_MeshPerVertexEXT");
+				}
+			}
+		}
+
+		if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block)
+		{
+			auto *m = ir.find_meta(var.self);
+			if (m && m->decoration.builtin)
 			{
-				flags.set(DecorationNonWritable);
-				flags.set(DecorationNonReadable);
+				auto builtin_type = m->decoration.builtin_type;
+				if (builtin_type == BuiltInPrimitivePointIndicesEXT)
+					set_name(var.self, "gl_PrimitivePointIndicesEXT");
+				else if (builtin_type == BuiltInPrimitiveLineIndicesEXT)
+					set_name(var.self, "gl_PrimitiveLineIndicesEXT");
+				else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT)
+					set_name(var.self, "gl_PrimitiveTriangleIndicesEXT");
 			}
 		}
 	});
@@ -2298,13 +3181,23 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 	Bitset global_builtins;
 	const SPIRVariable *block_var = nullptr;
 	bool emitted_block = false;
-	bool builtin_array = false;
 
 	// Need to use declared size in the type.
 	// These variables might have been declared, but not statically used, so we haven't deduced their size yet.
 	uint32_t cull_distance_size = 0;
 	uint32_t clip_distance_size = 0;
 
+	bool have_xfb_buffer_stride = false;
+	bool have_geom_stream = false;
+	bool have_any_xfb_offset = false;
+	uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
+	std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
+
+	const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool {
+		return builtin == BuiltInPosition || builtin == BuiltInPointSize ||
+			builtin == BuiltInClipDistance || builtin == BuiltInCullDistance;
+	};
+
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
 		auto &type = this->get<SPIRType>(var.basetype);
 		bool block = has_decoration(type.self, DecorationBlock);
@@ -2315,28 +3208,91 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 			uint32_t index = 0;
 			for (auto &m : ir.meta[type.self].members)
 			{
-				if (m.builtin)
+				if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
 				{
 					builtins.set(m.builtin_type);
 					if (m.builtin_type == BuiltInCullDistance)
-						cull_distance_size = this->get<SPIRType>(type.member_types[index]).array.front();
+						cull_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
 					else if (m.builtin_type == BuiltInClipDistance)
-						clip_distance_size = this->get<SPIRType>(type.member_types[index]).array.front();
+						clip_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
+
+					if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset))
+					{
+						have_any_xfb_offset = true;
+						builtin_xfb_offsets[m.builtin_type] = m.offset;
+					}
+
+					if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
+					{
+						uint32_t stream = m.stream;
+						if (have_geom_stream && geom_stream != stream)
+							SPIRV_CROSS_THROW("IO block member Stream mismatch.");
+						have_geom_stream = true;
+						geom_stream = stream;
+					}
 				}
 				index++;
 			}
+
+			if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) &&
+			    has_decoration(var.self, DecorationXfbStride))
+			{
+				uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer);
+				uint32_t stride = get_decoration(var.self, DecorationXfbStride);
+				if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
+					SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
+				if (have_xfb_buffer_stride && stride != xfb_stride)
+					SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
+				have_xfb_buffer_stride = true;
+				xfb_buffer = buffer_index;
+				xfb_stride = stride;
+			}
+
+			if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream))
+			{
+				uint32_t stream = get_decoration(var.self, DecorationStream);
+				if (have_geom_stream && geom_stream != stream)
+					SPIRV_CROSS_THROW("IO block member Stream mismatch.");
+				have_geom_stream = true;
+				geom_stream = stream;
+			}
 		}
 		else if (var.storage == storage && !block && is_builtin_variable(var))
 		{
 			// While we're at it, collect all declared global builtins (HLSL mostly ...).
 			auto &m = ir.meta[var.self].decoration;
-			if (m.builtin)
+			if (m.builtin && builtin_is_per_vertex_set(m.builtin_type))
 			{
 				global_builtins.set(m.builtin_type);
 				if (m.builtin_type == BuiltInCullDistance)
-					cull_distance_size = type.array.front();
+					cull_distance_size = to_array_size_literal(type);
 				else if (m.builtin_type == BuiltInClipDistance)
-					clip_distance_size = type.array.front();
+					clip_distance_size = to_array_size_literal(type);
+
+				if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) &&
+				    m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset))
+				{
+					have_any_xfb_offset = true;
+					builtin_xfb_offsets[m.builtin_type] = m.offset;
+					uint32_t buffer_index = m.xfb_buffer;
+					uint32_t stride = m.xfb_stride;
+					if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
+						SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
+					if (have_xfb_buffer_stride && stride != xfb_stride)
+						SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
+					have_xfb_buffer_stride = true;
+					xfb_buffer = buffer_index;
+					xfb_stride = stride;
+				}
+
+				if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
+				{
+					uint32_t stream = get_decoration(var.self, DecorationStream);
+					if (have_geom_stream && geom_stream != stream)
+						SPIRV_CROSS_THROW("IO block member Stream mismatch.");
+					have_geom_stream = true;
+					geom_stream = stream;
+				}
 			}
 		}
 
@@ -2348,7 +3304,6 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 
 		emitted_builtins = builtins;
 		emitted_block = true;
-		builtin_array = !type.array.empty();
 		block_var = &var;
 	});
 
@@ -2365,54 +3320,114 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
 		return;
 
 	if (storage == StorageClassOutput)
-		statement("out gl_PerVertex");
+	{
+		SmallVector<string> attr;
+		if (have_xfb_buffer_stride && have_any_xfb_offset)
+		{
+			if (!options.es)
+			{
+				if (options.version < 440 && options.version >= 140)
+					require_extension_internal("GL_ARB_enhanced_layouts");
+				else if (options.version < 140)
+					SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
+				if (!options.es && options.version < 440)
+					require_extension_internal("GL_ARB_enhanced_layouts");
+			}
+			else if (options.es)
+				SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
+			attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride));
+		}
+
+		if (have_geom_stream)
+		{
+			if (get_execution_model() != ExecutionModelGeometry)
+				SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
+			if (options.es)
+				SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
+			if (options.version < 400)
+				require_extension_internal("GL_ARB_transform_feedback3");
+			attr.push_back(join("stream = ", geom_stream));
+		}
+
+		if (model == ExecutionModelMeshEXT)
+			statement("out gl_MeshPerVertexEXT");
+		else if (!attr.empty())
+			statement("layout(", merge(attr), ") out gl_PerVertex");
+		else
+			statement("out gl_PerVertex");
+	}
 	else
-		statement("in gl_PerVertex");
+	{
+		// If we have passthrough, there is no way PerVertex cannot be passthrough.
+		if (get_entry_point().geometry_passthrough)
+			statement("layout(passthrough) in gl_PerVertex");
+		else
+			statement("in gl_PerVertex");
+	}
 
 	begin_scope();
 	if (emitted_builtins.get(BuiltInPosition))
-		statement("vec4 gl_Position;");
+	{
+		auto itr = builtin_xfb_offsets.find(BuiltInPosition);
+		if (itr != end(builtin_xfb_offsets))
+			statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;");
+		else
+			statement("vec4 gl_Position;");
+	}
+
 	if (emitted_builtins.get(BuiltInPointSize))
-		statement("float gl_PointSize;");
+	{
+		auto itr = builtin_xfb_offsets.find(BuiltInPointSize);
+		if (itr != end(builtin_xfb_offsets))
+			statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;");
+		else
+			statement("float gl_PointSize;");
+	}
+
 	if (emitted_builtins.get(BuiltInClipDistance))
-		statement("float gl_ClipDistance[", clip_distance_size, "];");
+	{
+		auto itr = builtin_xfb_offsets.find(BuiltInClipDistance);
+		if (itr != end(builtin_xfb_offsets))
+			statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];");
+		else
+			statement("float gl_ClipDistance[", clip_distance_size, "];");
+	}
+
 	if (emitted_builtins.get(BuiltInCullDistance))
-		statement("float gl_CullDistance[", cull_distance_size, "];");
+	{
+		auto itr = builtin_xfb_offsets.find(BuiltInCullDistance);
+		if (itr != end(builtin_xfb_offsets))
+			statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];");
+		else
+			statement("float gl_CullDistance[", cull_distance_size, "];");
+	}
+
+	bool builtin_array = model == ExecutionModelTessellationControl ||
+	                     (model == ExecutionModelMeshEXT && storage == StorageClassOutput) ||
+	                     (model == ExecutionModelGeometry && storage == StorageClassInput) ||
+	                     (model == ExecutionModelTessellationEvaluation && storage == StorageClassInput);
 
-	bool tessellation = model == ExecutionModelTessellationEvaluation || model == ExecutionModelTessellationControl;
 	if (builtin_array)
 	{
-		// Make sure the array has a supported name in the code.
-		if (storage == StorageClassOutput)
-			set_name(block_var->self, "gl_out");
-		else if (storage == StorageClassInput)
-			set_name(block_var->self, "gl_in");
+		const char *instance_name;
+		if (model == ExecutionModelMeshEXT)
+			instance_name = "gl_MeshVerticesEXT"; // Per primitive is never synthesized.
+		else
+			instance_name = storage == StorageClassInput ? "gl_in" : "gl_out";
 
 		if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
-			end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]"));
+			end_scope_decl(join(instance_name, "[", get_entry_point().output_vertices, "]"));
 		else
-			end_scope_decl(join(to_name(block_var->self), tessellation ? "[gl_MaxPatchVertices]" : "[]"));
+			end_scope_decl(join(instance_name, "[]"));
 	}
 	else
 		end_scope_decl();
 	statement("");
 }
 
-void CompilerGLSL::declare_undefined_values()
+bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
 {
-	bool emitted = false;
-	ir.for_each_typed_id<SPIRUndef>([&](uint32_t, const SPIRUndef &undef) {
-		statement(variable_decl(this->get<SPIRType>(undef.basetype), to_name(undef.self), undef.self), ";");
-		emitted = true;
-	});
-
-	if (emitted)
-		statement("");
-}
-
-bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
-{
-	bool statically_assigned = var.statically_assigned && var.static_expression != 0 && var.remapped_variable;
+	bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
 
 	if (statically_assigned)
 	{
@@ -2439,6 +3454,19 @@ void CompilerGLSL::emit_resources()
 	if (!pls_inputs.empty() || !pls_outputs.empty())
 		emit_pls();
 
+	switch (execution.model)
+	{
+	case ExecutionModelGeometry:
+	case ExecutionModelTessellationControl:
+	case ExecutionModelTessellationEvaluation:
+	case ExecutionModelMeshEXT:
+		fixup_implicit_builtin_block_names(execution.model);
+		break;
+
+	default:
+		break;
+	}
+
 	// Emit custom gl_PerVertex for SSO compatibility.
 	if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
 	{
@@ -2452,6 +3480,7 @@ void CompilerGLSL::emit_resources()
 			break;
 
 		case ExecutionModelVertex:
+		case ExecutionModelMeshEXT:
 			emit_declared_builtin_block(StorageClassOutput, execution.model);
 			break;
 
@@ -2459,6 +3488,16 @@ void CompilerGLSL::emit_resources()
 			break;
 		}
 	}
+	else if (should_force_emit_builtin_block(StorageClassOutput))
+	{
+		emit_declared_builtin_block(StorageClassOutput, execution.model);
+	}
+	else if (execution.geometry_passthrough)
+	{
+		// Need to declare gl_in with Passthrough.
+		// If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
+		emit_declared_builtin_block(StorageClassInput, execution.model);
+	}
 	else
 	{
 		// Need to redeclare clip/cull distance with explicit size to use them.
@@ -2484,44 +3523,76 @@ void CompilerGLSL::emit_resources()
 	// emit specialization constants as actual floats,
 	// spec op expressions will redirect to the constant name.
 	//
-	for (auto &id_ : ir.ids_for_constant_or_type)
 	{
-		auto &id = ir.ids[id_];
-
-		if (id.get_type() == TypeConstant)
+		auto loop_lock = ir.create_loop_hard_lock();
+		for (auto &id_ : ir.ids_for_constant_undef_or_type)
 		{
-			auto &c = id.get<SPIRConstant>();
+			auto &id = ir.ids[id_];
 
-			bool needs_declaration = c.specialization || c.is_used_as_lut;
-
-			if (needs_declaration)
+			if (id.get_type() == TypeConstant)
 			{
-				if (!options.vulkan_semantics && c.specialization)
+				auto &c = id.get<SPIRConstant>();
+
+				bool needs_declaration = c.specialization || c.is_used_as_lut;
+
+				if (needs_declaration)
 				{
-					c.specialization_constant_macro_name =
-					    constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
+					if (!options.vulkan_semantics && c.specialization)
+					{
+						c.specialization_constant_macro_name =
+						    constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
+					}
+					emit_constant(c);
+					emitted = true;
 				}
-				emit_constant(c);
+			}
+			else if (id.get_type() == TypeConstantOp)
+			{
+				emit_specialization_constant_op(id.get<SPIRConstantOp>());
 				emitted = true;
 			}
-		}
-		else if (id.get_type() == TypeConstantOp)
-		{
-			emit_specialization_constant_op(id.get<SPIRConstantOp>());
-			emitted = true;
-		}
-		else if (id.get_type() == TypeType)
-		{
-			auto &type = id.get<SPIRType>();
-			if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
-			    (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) &&
-			     !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
+			else if (id.get_type() == TypeType)
 			{
-				if (emitted)
-					statement("");
-				emitted = false;
+				auto *type = &id.get<SPIRType>();
 
-				emit_struct(type);
+				bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
+				                         (!has_decoration(type->self, DecorationBlock) &&
+				                          !has_decoration(type->self, DecorationBufferBlock));
+
+				// Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
+				if (type->basetype == SPIRType::Struct && type->pointer &&
+				    has_decoration(type->self, DecorationBlock) &&
+				    (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
+				     type->storage == StorageClassHitAttributeKHR))
+				{
+					type = &get<SPIRType>(type->parent_type);
+					is_natural_struct = true;
+				}
+
+				if (is_natural_struct)
+				{
+					if (emitted)
+						statement("");
+					emitted = false;
+
+					emit_struct(*type);
+				}
+			}
+			else if (id.get_type() == TypeUndef)
+			{
+				auto &undef = id.get<SPIRUndef>();
+				auto &type = this->get<SPIRType>(undef.basetype);
+				// OpUndef can be void for some reason ...
+				if (type.basetype == SPIRType::Void)
+					return;
+
+				string initializer;
+				if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
+					initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
+
+				// FIXME: If used in a constant, we must declare it as one.
+				statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
+				emitted = true;
 			}
 		}
 	}
@@ -2533,12 +3604,12 @@ void CompilerGLSL::emit_resources()
 	// If the work group size depends on a specialization constant, we need to declare the layout() block
 	// after constants (and their macros) have been declared.
 	if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
-	    execution.workgroup_size.constant != 0)
+	    (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId)))
 	{
 		SpecializationConstant wg_x, wg_y, wg_z;
 		get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
 
-		if ((wg_x.id != 0) || (wg_y.id != 0) || (wg_z.id != 0))
+		if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
 		{
 			SmallVector<string> inputs;
 			build_workgroup_size(inputs, wg_x, wg_y, wg_z);
@@ -2553,28 +3624,28 @@ void CompilerGLSL::emit_resources()
 	{
 		for (auto type : physical_storage_non_block_pointer_types)
 		{
-			emit_buffer_reference_block(get<SPIRType>(type), false);
+			emit_buffer_reference_block(type, false);
 		}
 
 		// Output buffer reference blocks.
 		// Do this in two stages, one with forward declaration,
 		// and one without. Buffer reference blocks can reference themselves
 		// to support things like linked lists.
-		ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
-			bool has_block_flags = has_decoration(type.self, DecorationBlock);
-			if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
+		ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
+			if (type.basetype == SPIRType::Struct && type.pointer &&
+			    type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
 			    type.storage == StorageClassPhysicalStorageBufferEXT)
 			{
-				emit_buffer_reference_block(type, true);
+				emit_buffer_reference_block(self, true);
 			}
 		});
 
-		ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
-			bool has_block_flags = has_decoration(type.self, DecorationBlock);
-			if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
+		ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
+			if (type.basetype == SPIRType::Struct &&
+			    type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
 			    type.storage == StorageClassPhysicalStorageBufferEXT)
 			{
-				emit_buffer_reference_block(type, false);
+				emit_buffer_reference_block(self, false);
 			}
 		});
 	}
@@ -2584,7 +3655,7 @@ void CompilerGLSL::emit_resources()
 		auto &type = this->get<SPIRType>(var.basetype);
 
 		bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
-		                        type.storage == StorageClassShaderRecordBufferNV;
+		                        type.storage == StorageClassShaderRecordBufferKHR;
 		bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
 		                       ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
 
@@ -2624,9 +3695,9 @@ void CompilerGLSL::emit_resources()
 
 		if (var.storage != StorageClassFunction && type.pointer &&
 		    (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
-		     type.storage == StorageClassRayPayloadNV || type.storage == StorageClassIncomingRayPayloadNV ||
-		     type.storage == StorageClassCallableDataNV || type.storage == StorageClassIncomingCallableDataNV ||
-		     type.storage == StorageClassHitAttributeNV) &&
+		     type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
+		     type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
+		     type.storage == StorageClassHitAttributeKHR) &&
 		    !is_hidden_variable(var))
 		{
 			emit_uniform(var);
@@ -2638,26 +3709,71 @@ void CompilerGLSL::emit_resources()
 		statement("");
 	emitted = false;
 
+	bool emitted_base_instance = false;
+
 	// Output in/out interfaces.
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
 		auto &type = this->get<SPIRType>(var.basetype);
 
+		bool is_hidden = is_hidden_variable(var);
+
+		// Unused output I/O variables might still be required to implement framebuffer fetch.
+		if (var.storage == StorageClassOutput && !is_legacy() &&
+		    location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != 0)
+		{
+			is_hidden = false;
+		}
+
 		if (var.storage != StorageClassFunction && type.pointer &&
 		    (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
-		    interface_variable_exists_in_entry_point(var.self) && !is_hidden_variable(var))
+		    interface_variable_exists_in_entry_point(var.self) && !is_hidden)
 		{
+			if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput &&
+			    type.array.size() == 1)
+			{
+				SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader.");
+			}
 			emit_interface_block(var);
 			emitted = true;
 		}
 		else if (is_builtin_variable(var))
 		{
+			auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
 			// For gl_InstanceIndex emulation on GLES, the API user needs to
 			// supply this uniform.
-			if (options.vertex.support_nonzero_base_instance &&
-			    ir.meta[var.self].decoration.builtin_type == BuiltInInstanceIndex && !options.vulkan_semantics)
+
+			// The draw parameter extension is soft-enabled on GL with some fallbacks.
+			if (!options.vulkan_semantics)
 			{
-				statement("uniform int SPIRV_Cross_BaseInstance;");
-				emitted = true;
+				if (!emitted_base_instance &&
+				    ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
+				     (builtin == BuiltInBaseInstance)))
+				{
+					statement("#ifdef GL_ARB_shader_draw_parameters");
+					statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
+					statement("#else");
+					// A crude, but simple workaround which should be good enough for non-indirect draws.
+					statement("uniform int SPIRV_Cross_BaseInstance;");
+					statement("#endif");
+					emitted = true;
+					emitted_base_instance = true;
+				}
+				else if (builtin == BuiltInBaseVertex)
+				{
+					statement("#ifdef GL_ARB_shader_draw_parameters");
+					statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
+					statement("#else");
+					// A crude, but simple workaround which should be good enough for non-indirect draws.
+					statement("uniform int SPIRV_Cross_BaseVertex;");
+					statement("#endif");
+				}
+				else if (builtin == BuiltInDrawIndex)
+				{
+					statement("#ifndef GL_ARB_shader_draw_parameters");
+					// Cannot really be worked around.
+					statement("#error GL_ARB_shader_draw_parameters is not supported.");
+					statement("#endif");
+				}
 			}
 		}
 	});
@@ -2666,9698 +3782,13776 @@ void CompilerGLSL::emit_resources()
 	for (auto global : global_variables)
 	{
 		auto &var = get<SPIRVariable>(global);
+		if (is_hidden_variable(var, true))
+			continue;
+
 		if (var.storage != StorageClassOutput)
 		{
 			if (!variable_is_lut(var))
 			{
 				add_resource_name(var.self);
-				statement(variable_decl(var), ";");
+
+				string initializer;
+				if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
+				    !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
+				{
+					initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
+				}
+
+				statement(variable_decl(var), initializer, ";");
 				emitted = true;
 			}
 		}
+		else if (var.initializer && maybe_get<SPIRConstant>(var.initializer) != nullptr)
+		{
+			emit_output_variable_initializer(var);
+		}
 	}
 
 	if (emitted)
 		statement("");
-
-	declare_undefined_values();
-}
-
-// Returns a string representation of the ID, usable as a function arg.
-// Default is to simply return the expression representation fo the arg ID.
-// Subclasses may override to modify the return value.
-string CompilerGLSL::to_func_call_arg(uint32_t id)
-{
-	// Make sure that we use the name of the original variable, and not the parameter alias.
-	uint32_t name_id = id;
-	auto *var = maybe_get<SPIRVariable>(id);
-	if (var && var->basevariable)
-		name_id = var->basevariable;
-	return to_expression(name_id);
-}
-
-void CompilerGLSL::handle_invalid_expression(uint32_t id)
-{
-	// We tried to read an invalidated expression.
-	// This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated.
-	forced_temporaries.insert(id);
-	force_recompile();
-}
-
-// Converts the format of the current expression from packed to unpacked,
-// by wrapping the expression in a constructor of the appropriate type.
-// GLSL does not support packed formats, so simply return the expression.
-// Subclasses that do will override
-string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t)
-{
-	return expr_str;
 }
 
-// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
-void CompilerGLSL::strip_enclosed_expression(string &expr)
+void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
 {
-	if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
-		return;
+	// If a StorageClassOutput variable has an initializer, we need to initialize it in main().
+	auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
+	auto &type = get<SPIRType>(var.basetype);
+	bool is_patch = has_decoration(var.self, DecorationPatch);
+	bool is_block = has_decoration(type.self, DecorationBlock);
+	bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
 
-	// Have to make sure that our first and last parens actually enclose everything inside it.
-	uint32_t paren_count = 0;
-	for (auto &c : expr)
+	if (is_block)
 	{
-		if (c == '(')
-			paren_count++;
-		else if (c == ')')
-		{
-			paren_count--;
-
-			// If we hit 0 and this is not the final char, our first and final parens actually don't
-			// enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
-			if (paren_count == 0 && &c != &expr.back())
-				return;
-		}
-	}
-	expr.erase(expr.size() - 1, 1);
-	expr.erase(begin(expr));
-}
+		uint32_t member_count = uint32_t(type.member_types.size());
+		bool type_is_array = type.array.size() == 1;
+		uint32_t array_size = 1;
+		if (type_is_array)
+			array_size = to_array_size_literal(type);
+		uint32_t iteration_count = is_control_point ? 1 : array_size;
 
-string CompilerGLSL::enclose_expression(const string &expr)
-{
-	bool need_parens = false;
+		// If the initializer is a block, we must initialize each block member one at a time.
+		for (uint32_t i = 0; i < member_count; i++)
+		{
+			// These outputs might not have been properly declared, so don't initialize them in that case.
+			if (has_member_decoration(type.self, i, DecorationBuiltIn))
+			{
+				if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance &&
+				    !cull_distance_count)
+					continue;
 
-	// If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
-	// unary expressions.
-	if (!expr.empty())
-	{
-		auto c = expr.front();
-		if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
-			need_parens = true;
-	}
+				if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance &&
+				    !clip_distance_count)
+					continue;
+			}
 
-	if (!need_parens)
-	{
-		uint32_t paren_count = 0;
-		for (auto c : expr)
-		{
-			if (c == '(' || c == '[')
-				paren_count++;
-			else if (c == ')' || c == ']')
+			// We need to build a per-member array first, essentially transposing from AoS to SoA.
+			// This code path hits when we have an array of blocks.
+			string lut_name;
+			if (type_is_array)
 			{
-				assert(paren_count);
-				paren_count--;
+				lut_name = join("_", var.self, "_", i, "_init");
+				uint32_t member_type_id = get<SPIRType>(var.basetype).member_types[i];
+				auto &member_type = get<SPIRType>(member_type_id);
+				auto array_type = member_type;
+				array_type.parent_type = member_type_id;
+				array_type.array.push_back(array_size);
+				array_type.array_size_literal.push_back(true);
+
+				SmallVector<string> exprs;
+				exprs.reserve(array_size);
+				auto &c = get<SPIRConstant>(var.initializer);
+				for (uint32_t j = 0; j < array_size; j++)
+					exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants[j]).subconstants[i]));
+				statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ",
+				          type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");");
 			}
-			else if (c == ' ' && paren_count == 0)
+
+			for (uint32_t j = 0; j < iteration_count; j++)
 			{
-				need_parens = true;
-				break;
+				entry_func.fixup_hooks_in.push_back([=, &var]() {
+					AccessChainMeta meta;
+					auto &c = this->get<SPIRConstant>(var.initializer);
+
+					uint32_t invocation_id = 0;
+					uint32_t member_index_id = 0;
+					if (is_control_point)
+					{
+						uint32_t ids = ir.increase_bound_by(3);
+						SPIRType uint_type;
+						uint_type.basetype = SPIRType::UInt;
+						uint_type.width = 32;
+						set<SPIRType>(ids, uint_type);
+						set<SPIRExpression>(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true);
+						set<SPIRConstant>(ids + 2, ids, i, false);
+						invocation_id = ids + 1;
+						member_index_id = ids + 2;
+					}
+
+					if (is_patch)
+					{
+						statement("if (gl_InvocationID == 0)");
+						begin_scope();
+					}
+
+					if (type_is_array && !is_control_point)
+					{
+						uint32_t indices[2] = { j, i };
+						auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
+						statement(chain, " = ", lut_name, "[", j, "];");
+					}
+					else if (is_control_point)
+					{
+						uint32_t indices[2] = { invocation_id, member_index_id };
+						auto chain = access_chain_internal(var.self, indices, 2, 0, &meta);
+						statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
+					}
+					else
+					{
+						auto chain =
+								access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
+						statement(chain, " = ", to_expression(c.subconstants[i]), ";");
+					}
+
+					if (is_patch)
+						end_scope();
+				});
 			}
 		}
-		assert(paren_count == 0);
 	}
-
-	// If this expression contains any spaces which are not enclosed by parentheses,
-	// we need to enclose it so we can treat the whole string as an expression.
-	// This happens when two expressions have been part of a binary op earlier.
-	if (need_parens)
-		return join('(', expr, ')');
-	else
-		return expr;
-}
-
-string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
-{
-	// If this expression starts with an address-of operator ('&'), then
-	// just return the part after the operator.
-	// TODO: Strip parens if unnecessary?
-	if (expr.front() == '&')
-		return expr.substr(1);
-	else if (backend.native_pointers)
-		return join('*', expr);
-	else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
-	         expr_type.pointer_depth == 1)
+	else if (is_control_point)
 	{
-		return join(enclose_expression(expr), ".value");
+		auto lut_name = join("_", var.self, "_init");
+		statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type),
+		          " = ", to_expression(var.initializer), ";");
+		entry_func.fixup_hooks_in.push_back([&, lut_name]() {
+			statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];");
+		});
 	}
-	else
-		return expr;
-}
-
-string CompilerGLSL::address_of_expression(const std::string &expr)
-{
-	if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
+	else if (has_decoration(var.self, DecorationBuiltIn) &&
+	         BuiltIn(get_decoration(var.self, DecorationBuiltIn)) == BuiltInSampleMask)
 	{
-		// If we have an expression which looks like (*foo), taking the address of it is the same as stripping
-		// the first two and last characters. We might have to enclose the expression.
-		// This doesn't work for cases like (*foo + 10),
-		// but this is an r-value expression which we cannot take the address of anyways.
-		return enclose_expression(expr.substr(2, expr.size() - 3));
+		// We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_<
+		entry_func.fixup_hooks_in.push_back([&] {
+			auto &c = this->get<SPIRConstant>(var.initializer);
+			uint32_t num_constants = uint32_t(c.subconstants.size());
+			for (uint32_t i = 0; i < num_constants; i++)
+			{
+				// Don't use to_expression on constant since it might be uint, just fish out the raw int.
+				statement(to_expression(var.self), "[", i, "] = ",
+				          convert_to_string(this->get<SPIRConstant>(c.subconstants[i]).scalar_i32()), ";");
+			}
+		});
 	}
-	else if (expr.front() == '*')
+	else
 	{
-		// If this expression starts with a dereference operator ('*'), then
-		// just return the part after the operator.
-		return expr.substr(1);
+		auto lut_name = join("_", var.self, "_init");
+		statement("const ", type_to_glsl(type), " ", lut_name,
+		          type_to_array_glsl(type), " = ", to_expression(var.initializer), ";");
+		entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() {
+			if (is_patch)
+			{
+				statement("if (gl_InvocationID == 0)");
+				begin_scope();
+			}
+			statement(to_expression(var.self), " = ", lut_name, ";");
+			if (is_patch)
+				end_scope();
+		});
 	}
-	else
-		return join('&', enclose_expression(expr));
-}
-
-// Just like to_expression except that we enclose the expression inside parentheses if needed.
-string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
-{
-	return enclose_expression(to_expression(id, register_expression_read));
 }
 
-string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
+void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
 {
-	// If we need to transpose, it will also take care of unpacking rules.
-	auto *e = maybe_get<SPIRExpression>(id);
-	bool need_transpose = e && e->need_transpose;
-	if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked))
-		return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
-		                              get_extended_decoration(id, SPIRVCrossDecorationPackedType));
-	else
-		return to_expression(id, register_expression_read);
-}
+	static const char *workaround_types[] = { "int",   "ivec2", "ivec3", "ivec4", "uint",   "uvec2", "uvec3", "uvec4",
+		                                      "float", "vec2",  "vec3",  "vec4",  "double", "dvec2", "dvec3", "dvec4" };
 
-string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
-{
-	// If we need to transpose, it will also take care of unpacking rules.
-	auto *e = maybe_get<SPIRExpression>(id);
-	bool need_transpose = e && e->need_transpose;
-	if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked))
-		return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id),
-		                              get_extended_decoration(id, SPIRVCrossDecorationPackedType));
-	else
-		return to_enclosed_expression(id, register_expression_read);
-}
+	if (!options.vulkan_semantics)
+	{
+		using Supp = ShaderSubgroupSupportHelper;
+		auto result = shader_subgroup_supporter.resolve();
 
-string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
-{
-	auto &type = expression_type(id);
-	if (type.pointer && should_dereference(id))
-		return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
-	else
-		return to_expression(id, register_expression_read);
-}
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask))
+		{
+			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result);
 
-string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
-{
-	auto &type = expression_type(id);
-	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
-		return address_of_expression(to_enclosed_expression(id, register_expression_read));
-	else
-		return to_unpacked_expression(id, register_expression_read);
-}
+			for (auto &e : exts)
+			{
+				const char *name = Supp::get_extension_name(e);
+				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
 
-string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
-{
-	auto &type = expression_type(id);
-	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
-		return address_of_expression(to_enclosed_expression(id, register_expression_read));
-	else
-		return to_enclosed_unpacked_expression(id, register_expression_read);
-}
+				switch (e)
+				{
+				case Supp::NV_shader_thread_group:
+					statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
+					statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
+					statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
+					statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
+					statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
+					break;
+				case Supp::ARB_shader_ballot:
+					statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
+					statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
+					statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
+					statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
+					statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
+					break;
+				default:
+					break;
+				}
+			}
+			statement("#endif");
+			statement("");
+		}
 
-string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
-{
-	auto expr = to_enclosed_expression(id);
-	if (has_extended_decoration(id, SPIRVCrossDecorationPacked))
-		return join(expr, "[", index, "]");
-	else
-		return join(expr, ".", index_to_swizzle(index));
-}
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize))
+		{
+			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result);
 
-string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
-{
-	uint32_t size = to_array_size_literal(type);
-	auto &parent = get<SPIRType>(type.parent_type);
-	string expr = "{ ";
+			for (auto &e : exts)
+			{
+				const char *name = Supp::get_extension_name(e);
+				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
 
-	for (uint32_t i = 0; i < size; i++)
-	{
-		auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
-		if (parent.array.empty())
-			expr += subexpr;
-		else
-			expr += to_rerolled_array_expression(subexpr, parent);
+				switch (e)
+				{
+				case Supp::NV_shader_thread_group:
+					statement("#define gl_SubgroupSize gl_WarpSizeNV");
+					break;
+				case Supp::ARB_shader_ballot:
+					statement("#define gl_SubgroupSize gl_SubGroupSizeARB");
+					break;
+				case Supp::AMD_gcn_shader:
+					statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
+					break;
+				default:
+					break;
+				}
+			}
+			statement("#endif");
+			statement("");
+		}
 
-		if (i + 1 < size)
-			expr += ", ";
-	}
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID))
+		{
+			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result);
 
-	expr += " }";
-	return expr;
-}
+			for (auto &e : exts)
+			{
+				const char *name = Supp::get_extension_name(e);
+				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
 
-string CompilerGLSL::to_composite_constructor_expression(uint32_t id)
-{
-	auto &type = expression_type(id);
-	if (!backend.array_is_value_type && !type.array.empty())
-	{
-		// For this case, we need to "re-roll" an array initializer from a temporary.
-		// We cannot simply pass the array directly, since it decays to a pointer and it cannot
-		// participate in a struct initializer. E.g.
-		// float arr[2] = { 1.0, 2.0 };
-		// Foo foo = { arr }; must be transformed to
-		// Foo foo = { { arr[0], arr[1] } };
-		// The array sizes cannot be deduced from specialization constants since we cannot use any loops.
+				switch (e)
+				{
+				case Supp::NV_shader_thread_group:
+					statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
+					break;
+				case Supp::ARB_shader_ballot:
+					statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
+					break;
+				default:
+					break;
+				}
+			}
+			statement("#endif");
+			statement("");
+		}
 
-		// We're only triggering one read of the array expression, but this is fine since arrays have to be declared
-		// as temporaries anyways.
-		return to_rerolled_array_expression(to_enclosed_expression(id), type);
-	}
-	else
-		return to_expression(id);
-}
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID))
+		{
+			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result);
 
-string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
-{
-	auto itr = invalid_expressions.find(id);
-	if (itr != end(invalid_expressions))
-		handle_invalid_expression(id);
+			for (auto &e : exts)
+			{
+				const char *name = Supp::get_extension_name(e);
+				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
 
-	if (ir.ids[id].get_type() == TypeExpression)
-	{
-		// We might have a more complex chain of dependencies.
-		// A possible scenario is that we
-		//
-		// %1 = OpLoad
-		// %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
-		// %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
-		// OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
-		// %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
-		//
-		// However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
-		// and see that we should not forward reads of the original variable.
-		auto &expr = get<SPIRExpression>(id);
-		for (uint32_t dep : expr.expression_dependencies)
-			if (invalid_expressions.find(dep) != end(invalid_expressions))
-				handle_invalid_expression(dep);
-	}
+				switch (e)
+				{
+				case Supp::NV_shader_thread_group:
+					statement("#define gl_SubgroupID gl_WarpIDNV");
+					break;
+				default:
+					break;
+				}
+			}
+			statement("#endif");
+			statement("");
+		}
 
-	if (register_expression_read)
-		track_expression_read(id);
+		if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups))
+		{
+			auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result);
 
-	switch (ir.ids[id].get_type())
-	{
-	case TypeExpression:
-	{
-		auto &e = get<SPIRExpression>(id);
-		if (e.base_expression)
-			return to_enclosed_expression(e.base_expression) + e.expression;
-		else if (e.need_transpose)
+			for (auto &e : exts)
+			{
+				const char *name = Supp::get_extension_name(e);
+				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
+
+				switch (e)
+				{
+				case Supp::NV_shader_thread_group:
+					statement("#define gl_NumSubgroups gl_WarpsPerSMNV");
+					break;
+				default:
+					break;
+				}
+			}
+			statement("#endif");
+			statement("");
+		}
+
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First))
 		{
-			bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPacked);
-			return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), is_packed);
+			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result);
+
+			for (auto &e : exts)
+			{
+				const char *name = Supp::get_extension_name(e);
+				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
+
+				switch (e)
+				{
+				case Supp::NV_shader_thread_shuffle:
+					for (const char *t : workaround_types)
+					{
+						statement(t, " subgroupBroadcastFirst(", t,
+						          " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
+					}
+					for (const char *t : workaround_types)
+					{
+						statement(t, " subgroupBroadcast(", t,
+						          " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
+					}
+					break;
+				case Supp::ARB_shader_ballot:
+					for (const char *t : workaround_types)
+					{
+						statement(t, " subgroupBroadcastFirst(", t,
+						          " value) { return readFirstInvocationARB(value); }");
+					}
+					for (const char *t : workaround_types)
+					{
+						statement(t, " subgroupBroadcast(", t,
+						          " value, uint id) { return readInvocationARB(value, id); }");
+					}
+					break;
+				default:
+					break;
+				}
+			}
+			statement("#endif");
+			statement("");
 		}
-		else
+
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB))
 		{
-			if (is_forcing_recompilation())
+			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result);
+
+			for (auto &e : exts)
 			{
-				// During first compilation phase, certain expression patterns can trigger exponential growth of memory.
-				// Avoid this by returning dummy expressions during this phase.
-				// Do not use empty expressions here, because those are sentinels for other cases.
-				return "_";
+				const char *name = Supp::get_extension_name(e);
+				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
+
+				switch (e)
+				{
+				case Supp::NV_shader_thread_group:
+					statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
+					statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
+					break;
+				default:
+					break;
+				}
 			}
-			else
-				return e.expression;
+			statement("#else");
+			statement("uint subgroupBallotFindLSB(uvec4 value)");
+			begin_scope();
+			statement("int firstLive = findLSB(value.x);");
+			statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
+			end_scope();
+			statement("uint subgroupBallotFindMSB(uvec4 value)");
+			begin_scope();
+			statement("int firstLive = findMSB(value.y);");
+			statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
+			end_scope();
+			statement("#endif");
+			statement("");
 		}
-	}
 
-	case TypeConstant:
-	{
-		auto &c = get<SPIRConstant>(id);
-		auto &type = get<SPIRType>(c.constant_type);
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool))
+		{
+			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
 
-		// WorkGroupSize may be a constant.
-		auto &dec = ir.meta[c.self].decoration;
-		if (dec.builtin)
-			return builtin_to_glsl(dec.builtin_type, StorageClassGeneric);
-		else if (c.specialization)
-			return to_name(id);
-		else if (c.is_used_as_lut)
-			return to_name(id);
-		else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
-			return to_name(id);
-		else if (!type.array.empty() && !backend.can_declare_arrays_inline)
-			return to_name(id);
-		else
-			return constant_expression(c);
-	}
+			for (auto &e : exts)
+			{
+				const char *name = Supp::get_extension_name(e);
+				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
 
-	case TypeConstantOp:
-		return to_name(id);
+				switch (e)
+				{
+				case Supp::NV_gpu_shader_5:
+					statement("bool subgroupAll(bool value) { return allThreadsNV(value); }");
+					statement("bool subgroupAny(bool value) { return anyThreadNV(value); }");
+					statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
+					break;
+				case Supp::ARB_shader_group_vote:
+					statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }");
+					statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }");
+					statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
+					break;
+				case Supp::AMD_gcn_shader:
+					statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
+					statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
+					statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
+					          "b == ballotAMD(true); }");
+					break;
+				default:
+					break;
+				}
+			}
+			statement("#endif");
+			statement("");
+		}
 
-	case TypeVariable:
-	{
-		auto &var = get<SPIRVariable>(id);
-		// If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
-		// the variable has not been declared yet.
-		if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
-			return to_expression(var.static_expression);
-		else if (var.deferred_declaration)
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT))
 		{
-			var.deferred_declaration = false;
-			return variable_decl(var);
+			statement("#ifndef GL_KHR_shader_subgroup_vote");
+			statement(
+			    "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
+			    "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
+			for (const char *t : workaround_types)
+				statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")");
+			statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
+			statement("#endif");
+			statement("");
 		}
-		else if (flattened_structs.count(id))
+
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot))
 		{
-			return load_flattened_struct(var);
+			auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result);
+
+			for (auto &e : exts)
+			{
+				const char *name = Supp::get_extension_name(e);
+				statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
+
+				switch (e)
+				{
+				case Supp::NV_shader_thread_group:
+					statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
+					break;
+				case Supp::ARB_shader_ballot:
+					statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
+					break;
+				default:
+					break;
+				}
+			}
+			statement("#endif");
+			statement("");
 		}
-		else
+
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect))
 		{
-			auto &dec = ir.meta[var.self].decoration;
-			if (dec.builtin)
-				return builtin_to_glsl(dec.builtin_type, var.storage);
-			else
-				return to_name(id);
+			statement("#ifndef GL_KHR_shader_subgroup_basic");
+			statement("bool subgroupElect()");
+			begin_scope();
+			statement("uvec4 activeMask = subgroupBallot(true);");
+			statement("uint firstLive = subgroupBallotFindLSB(activeMask);");
+			statement("return gl_SubgroupInvocationID == firstLive;");
+			end_scope();
+			statement("#endif");
+			statement("");
 		}
-	}
 
-	case TypeCombinedImageSampler:
-		// This type should never be taken the expression of directly.
-		// The intention is that texture sampling functions will extract the image and samplers
-		// separately and take their expressions as needed.
-		// GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
-		// expression ala sampler2D(texture, sampler).
-		SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
-
-	case TypeAccessChain:
-		// We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
-		SPIRV_CROSS_THROW("Access chains have no default expression representation.");
-
-	default:
-		return to_name(id);
-	}
-}
-
-string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
-{
-	auto &type = get<SPIRType>(cop.basetype);
-	bool binary = false;
-	bool unary = false;
-	string op;
-
-	if (is_legacy() && is_unsigned_opcode(cop.opcode))
-		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
-
-	// TODO: Find a clean way to reuse emit_instruction.
-	switch (cop.opcode)
-	{
-	case OpSConvert:
-	case OpUConvert:
-	case OpFConvert:
-		op = type_to_glsl_constructor(type);
-		break;
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier))
+		{
+			// Extensions we're using in place of GL_KHR_shader_subgroup_basic state
+			// that subgroup execute in lockstep so this barrier is implicit.
+			// However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
+			// and a specific test of optimizing scans by leveraging lock-step invocation execution,
+			// has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
+			// https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
+			statement("#ifndef GL_KHR_shader_subgroup_basic");
+			statement("void subgroupBarrier() { memoryBarrierShared(); }");
+			statement("#endif");
+			statement("");
+		}
 
-#define GLSL_BOP(opname, x) \
-	case Op##opname:        \
-		binary = true;      \
-		op = x;             \
-		break
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier))
+		{
+			if (model == spv::ExecutionModelGLCompute)
+			{
+				statement("#ifndef GL_KHR_shader_subgroup_basic");
+				statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
+				statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
+				statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
+				statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
+				statement("#endif");
+			}
+			else
+			{
+				statement("#ifndef GL_KHR_shader_subgroup_basic");
+				statement("void subgroupMemoryBarrier() { memoryBarrier(); }");
+				statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
+				statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
+				statement("#endif");
+			}
+			statement("");
+		}
 
-#define GLSL_UOP(opname, x) \
-	case Op##opname:        \
-		unary = true;       \
-		op = x;             \
-		break
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
+		{
+			statement("#ifndef GL_KHR_shader_subgroup_ballot");
+			statement("bool subgroupInverseBallot(uvec4 value)");
+			begin_scope();
+			statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
+			end_scope();
 
-		GLSL_UOP(SNegate, "-");
-		GLSL_UOP(Not, "~");
-		GLSL_BOP(IAdd, "+");
-		GLSL_BOP(ISub, "-");
-		GLSL_BOP(IMul, "*");
-		GLSL_BOP(SDiv, "/");
-		GLSL_BOP(UDiv, "/");
-		GLSL_BOP(UMod, "%");
-		GLSL_BOP(SMod, "%");
-		GLSL_BOP(ShiftRightLogical, ">>");
-		GLSL_BOP(ShiftRightArithmetic, ">>");
-		GLSL_BOP(ShiftLeftLogical, "<<");
-		GLSL_BOP(BitwiseOr, "|");
-		GLSL_BOP(BitwiseXor, "^");
-		GLSL_BOP(BitwiseAnd, "&");
-		GLSL_BOP(LogicalOr, "||");
-		GLSL_BOP(LogicalAnd, "&&");
-		GLSL_UOP(LogicalNot, "!");
-		GLSL_BOP(LogicalEqual, "==");
-		GLSL_BOP(LogicalNotEqual, "!=");
-		GLSL_BOP(IEqual, "==");
-		GLSL_BOP(INotEqual, "!=");
-		GLSL_BOP(ULessThan, "<");
-		GLSL_BOP(SLessThan, "<");
-		GLSL_BOP(ULessThanEqual, "<=");
-		GLSL_BOP(SLessThanEqual, "<=");
-		GLSL_BOP(UGreaterThan, ">");
-		GLSL_BOP(SGreaterThan, ">");
-		GLSL_BOP(UGreaterThanEqual, ">=");
-		GLSL_BOP(SGreaterThanEqual, ">=");
+			statement("uint subgroupBallotInclusiveBitCount(uvec4 value)");
+			begin_scope();
+			statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
+			statement("ivec2 c = bitCount(v);");
+			statement_no_indent("#ifdef GL_NV_shader_thread_group");
+			statement("return uint(c.x);");
+			statement_no_indent("#else");
+			statement("return uint(c.x + c.y);");
+			statement_no_indent("#endif");
+			end_scope();
 
-	case OpSelect:
-	{
-		if (cop.arguments.size() < 3)
-			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
+			statement("uint subgroupBallotExclusiveBitCount(uvec4 value)");
+			begin_scope();
+			statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
+			statement("ivec2 c = bitCount(v);");
+			statement_no_indent("#ifdef GL_NV_shader_thread_group");
+			statement("return uint(c.x);");
+			statement_no_indent("#else");
+			statement("return uint(c.x + c.y);");
+			statement_no_indent("#endif");
+			end_scope();
+			statement("#endif");
+			statement("");
+		}
 
-		// This one is pretty annoying. It's triggered from
-		// uint(bool), int(bool) from spec constants.
-		// In order to preserve its compile-time constness in Vulkan GLSL,
-		// we need to reduce the OpSelect expression back to this simplified model.
-		// If we cannot, fail.
-		if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount))
 		{
-			// Implement as a simple cast down below.
+			statement("#ifndef GL_KHR_shader_subgroup_ballot");
+			statement("uint subgroupBallotBitCount(uvec4 value)");
+			begin_scope();
+			statement("ivec2 c = bitCount(value.xy);");
+			statement_no_indent("#ifdef GL_NV_shader_thread_group");
+			statement("return uint(c.x);");
+			statement_no_indent("#else");
+			statement("return uint(c.x + c.y);");
+			statement_no_indent("#endif");
+			end_scope();
+			statement("#endif");
+			statement("");
 		}
-		else
+
+		if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract))
 		{
-			// Implement a ternary and pray the compiler understands it :)
-			return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
+			statement("#ifndef GL_KHR_shader_subgroup_ballot");
+			statement("bool subgroupBallotBitExtract(uvec4 value, uint index)");
+			begin_scope();
+			statement_no_indent("#ifdef GL_NV_shader_thread_group");
+			statement("uint shifted = value.x >> index;");
+			statement_no_indent("#else");
+			statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);");
+			statement_no_indent("#endif");
+			statement("return (shifted & 1u) != 0u;");
+			end_scope();
+			statement("#endif");
+			statement("");
 		}
-		break;
 	}
 
-	case OpVectorShuffle:
+	if (!workaround_ubo_load_overload_types.empty())
 	{
-		string expr = type_to_glsl_constructor(type);
-		expr += "(";
-
-		uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
-		string left_arg = to_enclosed_expression(cop.arguments[0]);
-		string right_arg = to_enclosed_expression(cop.arguments[1]);
-
-		for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
+		for (auto &type_id : workaround_ubo_load_overload_types)
 		{
-			uint32_t index = cop.arguments[i];
-			if (index >= left_components)
-				expr += right_arg + "." + "xyzw"[index - left_components];
-			else
-				expr += left_arg + "." + "xyzw"[index];
+			auto &type = get<SPIRType>(type_id);
 
-			if (i + 1 < uint32_t(cop.arguments.size()))
-				expr += ", ";
+			if (options.es && is_matrix(type))
+			{
+				// Need both variants.
+				// GLSL cannot overload on precision, so need to dispatch appropriately.
+				statement("highp ", type_to_glsl(type), " spvWorkaroundRowMajor(highp ", type_to_glsl(type), " wrap) { return wrap; }");
+				statement("mediump ", type_to_glsl(type), " spvWorkaroundRowMajorMP(mediump ", type_to_glsl(type), " wrap) { return wrap; }");
+			}
+			else
+			{
+				statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type), " wrap) { return wrap; }");
+			}
 		}
-
-		expr += ")";
-		return expr;
+		statement("");
 	}
 
-	case OpCompositeExtract:
+	if (requires_transpose_2x2)
 	{
-		auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
-		                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
-		return expr;
+		statement("mat2 spvTranspose(mat2 m)");
+		begin_scope();
+		statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
+		end_scope();
+		statement("");
 	}
 
-	case OpCompositeInsert:
-		SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
+	if (requires_transpose_3x3)
+	{
+		statement("mat3 spvTranspose(mat3 m)");
+		begin_scope();
+		statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
+		end_scope();
+		statement("");
+	}
 
-	default:
-		// Some opcodes are unimplemented here, these are currently not possible to test from glslang.
-		SPIRV_CROSS_THROW("Unimplemented spec constant op.");
+	if (requires_transpose_4x4)
+	{
+		statement("mat4 spvTranspose(mat4 m)");
+		begin_scope();
+		statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
+		          "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
+		end_scope();
+		statement("");
 	}
+}
 
-	uint32_t bit_width = 0;
-	if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
-		bit_width = expression_type(cop.arguments[0]).width;
+// Returns a string representation of the ID, usable as a function arg.
+// Default is to simply return the expression representation fo the arg ID.
+// Subclasses may override to modify the return value.
+string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
+{
+	// Make sure that we use the name of the original variable, and not the parameter alias.
+	uint32_t name_id = id;
+	auto *var = maybe_get<SPIRVariable>(id);
+	if (var && var->basevariable)
+		name_id = var->basevariable;
+	return to_expression(name_id);
+}
 
-	SPIRType::BaseType input_type;
-	bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
+void CompilerGLSL::force_temporary_and_recompile(uint32_t id)
+{
+	auto res = forced_temporaries.insert(id);
 
-	switch (cop.opcode)
-	{
-	case OpIEqual:
-	case OpINotEqual:
-		input_type = to_signed_basetype(bit_width);
-		break;
+	// Forcing new temporaries guarantees forward progress.
+	if (res.second)
+		force_recompile_guarantee_forward_progress();
+	else
+		force_recompile();
+}
 
-	case OpSLessThan:
-	case OpSLessThanEqual:
-	case OpSGreaterThan:
-	case OpSGreaterThanEqual:
-	case OpSMod:
-	case OpSDiv:
-	case OpShiftRightArithmetic:
-	case OpSConvert:
-	case OpSNegate:
-		input_type = to_signed_basetype(bit_width);
-		break;
+uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision)
+{
+	// Constants do not have innate precision.
+	auto handle_type = ir.ids[id].get_type();
+	if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
+		return id;
 
-	case OpULessThan:
-	case OpULessThanEqual:
-	case OpUGreaterThan:
-	case OpUGreaterThanEqual:
-	case OpUMod:
-	case OpUDiv:
-	case OpShiftRightLogical:
-	case OpUConvert:
-		input_type = to_unsigned_basetype(bit_width);
-		break;
+	// Ignore anything that isn't 32-bit values.
+	auto &type = get<SPIRType>(type_id);
+	if (type.pointer)
+		return id;
+	if (type.basetype != SPIRType::Float && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int)
+		return id;
 
-	default:
-		input_type = type.basetype;
-		break;
+	if (precision == Options::DontCare)
+	{
+		// If precision is consumed as don't care (operations only consisting of constants),
+		// we need to bind the expression to a temporary,
+		// otherwise we have no way of controlling the precision later.
+		auto itr = forced_temporaries.insert(id);
+		if (itr.second)
+			force_recompile_guarantee_forward_progress();
+		return id;
 	}
 
-#undef GLSL_BOP
-#undef GLSL_UOP
-	if (binary)
-	{
-		if (cop.arguments.size() < 2)
-			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
+	auto current_precision = has_decoration(id, DecorationRelaxedPrecision) ? Options::Mediump : Options::Highp;
+	if (current_precision == precision)
+		return id;
 
-		string cast_op0;
-		string cast_op1;
-		auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
-		                                              cop.arguments[1], skip_cast_if_equal_type);
+	auto itr = temporary_to_mirror_precision_alias.find(id);
+	if (itr == temporary_to_mirror_precision_alias.end())
+	{
+		uint32_t alias_id = ir.increase_bound_by(1);
+		auto &m = ir.meta[alias_id];
+		if (auto *input_m = ir.find_meta(id))
+			m = *input_m;
 
-		if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
+		const char *prefix;
+		if (precision == Options::Mediump)
 		{
-			expected_type.basetype = input_type;
-			auto expr = bitcast_glsl_op(type, expected_type);
-			expr += '(';
-			expr += join(cast_op0, " ", op, " ", cast_op1);
-			expr += ')';
-			return expr;
+			set_decoration(alias_id, DecorationRelaxedPrecision);
+			prefix = "mp_copy_";
 		}
 		else
-			return join("(", cast_op0, " ", op, " ", cast_op1, ")");
-	}
-	else if (unary)
-	{
-		if (cop.arguments.size() < 1)
-			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
+		{
+			unset_decoration(alias_id, DecorationRelaxedPrecision);
+			prefix = "hp_copy_";
+		}
 
-		// Auto-bitcast to result type as needed.
-		// Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
-		return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
+		auto alias_name = join(prefix, to_name(id));
+		ParsedIR::sanitize_underscores(alias_name);
+		set_name(alias_id, alias_name);
+
+		emit_op(type_id, alias_id, to_expression(id), true);
+		temporary_to_mirror_precision_alias[id] = alias_id;
+		forced_temporaries.insert(id);
+		forced_temporaries.insert(alias_id);
+		force_recompile_guarantee_forward_progress();
+		id = alias_id;
 	}
-	else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
+	else
 	{
-		if (cop.arguments.size() < 1)
-			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
+		id = itr->second;
+	}
 
-		auto &arg_type = expression_type(cop.arguments[0]);
-		if (arg_type.width < type.width && input_type != arg_type.basetype)
+	return id;
+}
+
+void CompilerGLSL::handle_invalid_expression(uint32_t id)
+{
+	// We tried to read an invalidated expression.
+	// This means we need another pass at compilation, but next time,
+	// force temporary variables so that they cannot be invalidated.
+	force_temporary_and_recompile(id);
+
+	// If the invalid expression happened as a result of a CompositeInsert
+	// overwrite, we must block this from happening next iteration.
+	if (composite_insert_overwritten.count(id))
+		block_composite_insert_overwrite.insert(id);
+}
+
+// Converts the format of the current expression from packed to unpacked,
+// by wrapping the expression in a constructor of the appropriate type.
+// GLSL does not support packed formats, so simply return the expression.
+// Subclasses that do will override.
+string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
+{
+	return expr_str;
+}
+
+// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
+void CompilerGLSL::strip_enclosed_expression(string &expr)
+{
+	if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
+		return;
+
+	// Have to make sure that our first and last parens actually enclose everything inside it.
+	uint32_t paren_count = 0;
+	for (auto &c : expr)
+	{
+		if (c == '(')
+			paren_count++;
+		else if (c == ')')
 		{
-			auto expected = arg_type;
-			expected.basetype = input_type;
-			return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
+			paren_count--;
+
+			// If we hit 0 and this is not the final char, our first and final parens actually don't
+			// enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
+			if (paren_count == 0 && &c != &expr.back())
+				return;
 		}
-		else
-			return join(op, "(", to_expression(cop.arguments[0]), ")");
-	}
-	else
-	{
-		if (cop.arguments.size() < 1)
-			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
-		return join(op, "(", to_expression(cop.arguments[0]), ")");
 	}
+	expr.erase(expr.size() - 1, 1);
+	expr.erase(begin(expr));
 }
 
-string CompilerGLSL::constant_expression(const SPIRConstant &c)
+string CompilerGLSL::enclose_expression(const string &expr)
 {
-	auto &type = get<SPIRType>(c.constant_type);
+	bool need_parens = false;
 
-	if (type.pointer)
+	// If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
+	// unary expressions.
+	if (!expr.empty())
 	{
-		return backend.null_pointer_literal;
+		auto c = expr.front();
+		if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
+			need_parens = true;
 	}
-	else if (!c.subconstants.empty())
+
+	if (!need_parens)
 	{
-		// Handles Arrays and structures.
-		string res;
-		if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
-		    type.array.empty())
-		{
-			res = type_to_glsl_constructor(type) + "{ ";
-		}
-		else if (backend.use_initializer_list)
-		{
-			res = "{ ";
-		}
-		else
+		uint32_t paren_count = 0;
+		for (auto c : expr)
 		{
-			res = type_to_glsl_constructor(type) + "(";
+			if (c == '(' || c == '[')
+				paren_count++;
+			else if (c == ')' || c == ']')
+			{
+				assert(paren_count);
+				paren_count--;
+			}
+			else if (c == ' ' && paren_count == 0)
+			{
+				need_parens = true;
+				break;
+			}
 		}
+		assert(paren_count == 0);
+	}
 
-		for (auto &elem : c.subconstants)
-		{
-			auto &subc = get<SPIRConstant>(elem);
-			if (subc.specialization)
-				res += to_name(elem);
-			else
-				res += constant_expression(subc);
+	// If this expression contains any spaces which are not enclosed by parentheses,
+	// we need to enclose it so we can treat the whole string as an expression.
+	// This happens when two expressions have been part of a binary op earlier.
+	if (need_parens)
+		return join('(', expr, ')');
+	else
+		return expr;
+}
 
-			if (&elem != &c.subconstants.back())
-				res += ", ";
-		}
+string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
+{
+	// If this expression starts with an address-of operator ('&'), then
+	// just return the part after the operator.
+	// TODO: Strip parens if unnecessary?
+	if (expr.front() == '&')
+		return expr.substr(1);
+	else if (backend.native_pointers)
+		return join('*', expr);
+	else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
+	         expr_type.pointer_depth == 1)
+	{
+		return join(enclose_expression(expr), ".value");
+	}
+	else
+		return expr;
+}
 
-		res += backend.use_initializer_list ? " }" : ")";
-		return res;
+string CompilerGLSL::address_of_expression(const std::string &expr)
+{
+	if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
+	{
+		// If we have an expression which looks like (*foo), taking the address of it is the same as stripping
+		// the first two and last characters. We might have to enclose the expression.
+		// This doesn't work for cases like (*foo + 10),
+		// but this is an r-value expression which we cannot take the address of anyways.
+		return enclose_expression(expr.substr(2, expr.size() - 3));
 	}
-	else if (c.columns() == 1)
+	else if (expr.front() == '*')
 	{
-		return constant_expression_vector(c, 0);
+		// If this expression starts with a dereference operator ('*'), then
+		// just return the part after the operator.
+		return expr.substr(1);
 	}
 	else
-	{
-		string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
-		for (uint32_t col = 0; col < c.columns(); col++)
-		{
-			if (c.specialization_constant_id(col) != 0)
-				res += to_name(c.specialization_constant_id(col));
-			else
-				res += constant_expression_vector(c, col);
+		return join('&', enclose_expression(expr));
+}
 
-			if (col + 1 < c.columns())
-				res += ", ";
-		}
-		res += ")";
-		return res;
-	}
+// Just like to_expression except that we enclose the expression inside parentheses if needed.
+string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
+{
+	return enclose_expression(to_expression(id, register_expression_read));
 }
 
-#ifdef _MSC_VER
-// sprintf warning.
-// We cannot rely on snprintf existing because, ..., MSVC.
-#pragma warning(push)
-#pragma warning(disable : 4996)
-#endif
+// Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
+// need_transpose must be forced to false.
+string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
+{
+	return unpack_expression_type(to_expression(id), expression_type(id),
+	                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
+	                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
+}
 
-string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
+string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
 {
-	string res;
-	float float_value = c.scalar_f16(col, row);
+	// If we need to transpose, it will also take care of unpacking rules.
+	auto *e = maybe_get<SPIRExpression>(id);
+	bool need_transpose = e && e->need_transpose;
+	bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
+	bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
 
-	// There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
-	// of complicated workarounds, just value-cast to the half type always.
-	if (std::isnan(float_value) || std::isinf(float_value))
+	if (!need_transpose && (is_remapped || is_packed))
 	{
-		SPIRType type;
-		type.basetype = SPIRType::Half;
-		type.vecsize = 1;
-		type.columns = 1;
-
-		if (float_value == numeric_limits<float>::infinity())
-			res = join(type_to_glsl(type), "(1.0 / 0.0)");
-		else if (float_value == -numeric_limits<float>::infinity())
-			res = join(type_to_glsl(type), "(-1.0 / 0.0)");
-		else if (std::isnan(float_value))
-			res = join(type_to_glsl(type), "(0.0 / 0.0)");
-		else
-			SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
+		return unpack_expression_type(to_expression(id, register_expression_read),
+		                              get_pointee_type(expression_type_id(id)),
+		                              get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
+		                              has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
 	}
 	else
-	{
-		SPIRType type;
-		type.basetype = SPIRType::Half;
-		type.vecsize = 1;
-		type.columns = 1;
-		res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")");
-	}
+		return to_expression(id, register_expression_read);
+}
 
-	return res;
+string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
+{
+	return enclose_expression(to_unpacked_expression(id, register_expression_read));
 }
 
-string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
+string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
 {
-	string res;
-	float float_value = c.scalar_f32(col, row);
+	auto &type = expression_type(id);
+	if (type.pointer && should_dereference(id))
+		return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
+	else
+		return to_expression(id, register_expression_read);
+}
 
-	if (std::isnan(float_value) || std::isinf(float_value))
+string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
+{
+	auto &type = expression_type(id);
+	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
+		return address_of_expression(to_enclosed_expression(id, register_expression_read));
+	else
+		return to_unpacked_expression(id, register_expression_read);
+}
+
+string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
+{
+	auto &type = expression_type(id);
+	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
+		return address_of_expression(to_enclosed_expression(id, register_expression_read));
+	else
+		return to_enclosed_unpacked_expression(id, register_expression_read);
+}
+
+string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
+{
+	auto expr = to_enclosed_expression(id);
+	if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
+		return join(expr, "[", index, "]");
+	else
+		return join(expr, ".", index_to_swizzle(index));
+}
+
+string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
+                                                              const uint32_t *chain, uint32_t length)
+{
+	// It is kinda silly if application actually enter this path since they know the constant up front.
+	// It is useful here to extract the plain constant directly.
+	SPIRConstant tmp;
+	tmp.constant_type = result_type;
+	auto &composite_type = get<SPIRType>(c.constant_type);
+	assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty());
+	assert(!c.specialization);
+
+	if (is_matrix(composite_type))
 	{
-		// Use special representation.
-		if (!is_legacy())
+		if (length == 2)
 		{
-			SPIRType out_type;
-			SPIRType in_type;
-			out_type.basetype = SPIRType::Float;
-			in_type.basetype = SPIRType::UInt;
-			out_type.vecsize = 1;
-			in_type.vecsize = 1;
-			out_type.width = 32;
-			in_type.width = 32;
-
-			char print_buffer[32];
-			sprintf(print_buffer, "0x%xu", c.scalar(col, row));
-			res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
+			tmp.m.c[0].vecsize = 1;
+			tmp.m.columns = 1;
+			tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]];
 		}
 		else
 		{
-			if (float_value == numeric_limits<float>::infinity())
-			{
-				if (backend.float_literal_suffix)
-					res = "(1.0f / 0.0f)";
-				else
-					res = "(1.0 / 0.0)";
-			}
-			else if (float_value == -numeric_limits<float>::infinity())
-			{
-				if (backend.float_literal_suffix)
-					res = "(-1.0f / 0.0f)";
-				else
-					res = "(-1.0 / 0.0)";
-			}
-			else if (std::isnan(float_value))
-			{
-				if (backend.float_literal_suffix)
-					res = "(0.0f / 0.0f)";
-				else
-					res = "(0.0 / 0.0)";
-			}
-			else
-				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
+			assert(length == 1);
+			tmp.m.c[0].vecsize = composite_type.vecsize;
+			tmp.m.columns = 1;
+			tmp.m.c[0] = c.m.c[chain[0]];
 		}
 	}
 	else
 	{
-		res = convert_to_string(float_value, current_locale_radix_character);
-		if (backend.float_literal_suffix)
-			res += "f";
+		assert(length == 1);
+		tmp.m.c[0].vecsize = 1;
+		tmp.m.columns = 1;
+		tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]];
 	}
 
-	return res;
+	return constant_expression(tmp);
 }
 
-std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
+string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
 {
-	string res;
-	double double_value = c.scalar_f64(col, row);
+	uint32_t size = to_array_size_literal(type);
+	auto &parent = get<SPIRType>(type.parent_type);
+	string expr = "{ ";
 
-	if (std::isnan(double_value) || std::isinf(double_value))
+	for (uint32_t i = 0; i < size; i++)
 	{
-		// Use special representation.
-		if (!is_legacy())
-		{
-			SPIRType out_type;
-			SPIRType in_type;
-			out_type.basetype = SPIRType::Double;
-			in_type.basetype = SPIRType::UInt64;
-			out_type.vecsize = 1;
-			in_type.vecsize = 1;
-			out_type.width = 64;
-			in_type.width = 64;
+		auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
+		if (parent.array.empty())
+			expr += subexpr;
+		else
+			expr += to_rerolled_array_expression(subexpr, parent);
 
-			uint64_t u64_value = c.scalar_u64(col, row);
+		if (i + 1 < size)
+			expr += ", ";
+	}
 
-			if (options.es)
-				SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
-			require_extension_internal("GL_ARB_gpu_shader_int64");
+	expr += " }";
+	return expr;
+}
 
-			char print_buffer[64];
-			sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
-			        backend.long_long_literal_suffix ? "ull" : "ul");
-			res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
-		}
-		else
-		{
-			if (options.es)
-				SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
-			if (options.version < 400)
-				require_extension_internal("GL_ARB_gpu_shader_fp64");
+string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool block_like_type)
+{
+	auto &type = expression_type(id);
 
-			if (double_value == numeric_limits<double>::infinity())
-			{
-				if (backend.double_literal_suffix)
-					res = "(1.0lf / 0.0lf)";
-				else
-					res = "(1.0 / 0.0)";
-			}
-			else if (double_value == -numeric_limits<double>::infinity())
-			{
-				if (backend.double_literal_suffix)
-					res = "(-1.0lf / 0.0lf)";
-				else
-					res = "(-1.0 / 0.0)";
-			}
-			else if (std::isnan(double_value))
-			{
-				if (backend.double_literal_suffix)
-					res = "(0.0lf / 0.0lf)";
-				else
-					res = "(0.0 / 0.0)";
-			}
-			else
-				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
-		}
-	}
-	else
+	bool reroll_array = !type.array.empty() &&
+	                    (!backend.array_is_value_type ||
+	                     (block_like_type && !backend.array_is_value_type_in_buffer_blocks));
+
+	if (reroll_array)
 	{
-		res = convert_to_string(double_value, current_locale_radix_character);
-		if (backend.double_literal_suffix)
-			res += "lf";
-	}
+		// For this case, we need to "re-roll" an array initializer from a temporary.
+		// We cannot simply pass the array directly, since it decays to a pointer and it cannot
+		// participate in a struct initializer. E.g.
+		// float arr[2] = { 1.0, 2.0 };
+		// Foo foo = { arr }; must be transformed to
+		// Foo foo = { { arr[0], arr[1] } };
+		// The array sizes cannot be deduced from specialization constants since we cannot use any loops.
 
-	return res;
+		// We're only triggering one read of the array expression, but this is fine since arrays have to be declared
+		// as temporaries anyways.
+		return to_rerolled_array_expression(to_enclosed_expression(id), type);
+	}
+	else
+		return to_unpacked_expression(id);
 }
 
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
+string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id)
 {
-	auto type = get<SPIRType>(c.constant_type);
-	type.columns = 1;
+	string expr = to_expression(id);
 
-	auto scalar_type = type;
-	scalar_type.vecsize = 1;
+	if (has_decoration(id, DecorationNonUniform))
+		convert_non_uniform_expression(expr, id);
 
-	string res;
-	bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
-	bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
+	return expr;
+}
 
-	if (!type_is_floating_point(type))
+string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
+{
+	auto itr = invalid_expressions.find(id);
+	if (itr != end(invalid_expressions))
+		handle_invalid_expression(id);
+
+	if (ir.ids[id].get_type() == TypeExpression)
 	{
-		// Cannot swizzle literal integers as a special case.
-		swizzle_splat = false;
+		// We might have a more complex chain of dependencies.
+		// A possible scenario is that we
+		//
+		// %1 = OpLoad
+		// %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
+		// %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
+		// OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
+		// %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
+		//
+		// However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
+		// and see that we should not forward reads of the original variable.
+		auto &expr = get<SPIRExpression>(id);
+		for (uint32_t dep : expr.expression_dependencies)
+			if (invalid_expressions.find(dep) != end(invalid_expressions))
+				handle_invalid_expression(dep);
 	}
 
-	if (splat || swizzle_splat)
+	if (register_expression_read)
+		track_expression_read(id);
+
+	switch (ir.ids[id].get_type())
 	{
-		// Cannot use constant splatting if we have specialization constants somewhere in the vector.
-		for (uint32_t i = 0; i < c.vector_size(); i++)
+	case TypeExpression:
+	{
+		auto &e = get<SPIRExpression>(id);
+		if (e.base_expression)
+			return to_enclosed_expression(e.base_expression) + e.expression;
+		else if (e.need_transpose)
 		{
-			if (c.specialization_constant_id(vector, i) != 0)
+			// This should not be reached for access chains, since we always deal explicitly with transpose state
+			// when consuming an access chain expression.
+			uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
+			bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+			return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
+			                                is_packed);
+		}
+		else if (flattened_structs.count(id))
+		{
+			return load_flattened_struct(e.expression, get<SPIRType>(e.expression_type));
+		}
+		else
+		{
+			if (is_forcing_recompilation())
 			{
-				splat = false;
-				swizzle_splat = false;
-				break;
+				// During first compilation phase, certain expression patterns can trigger exponential growth of memory.
+				// Avoid this by returning dummy expressions during this phase.
+				// Do not use empty expressions here, because those are sentinels for other cases.
+				return "_";
 			}
+			else
+				return e.expression;
 		}
 	}
 
-	if (splat || swizzle_splat)
+	case TypeConstant:
 	{
-		if (type.width == 64)
+		auto &c = get<SPIRConstant>(id);
+		auto &type = get<SPIRType>(c.constant_type);
+
+		// WorkGroupSize may be a constant.
+		if (has_decoration(c.self, DecorationBuiltIn))
+			return builtin_to_glsl(BuiltIn(get_decoration(c.self, DecorationBuiltIn)), StorageClassGeneric);
+		else if (c.specialization)
 		{
-			uint64_t ident = c.scalar_u64(vector, 0);
-			for (uint32_t i = 1; i < c.vector_size(); i++)
+			if (backend.workgroup_size_is_hidden)
 			{
-				if (ident != c.scalar_u64(vector, i))
+				int wg_index = get_constant_mapping_to_workgroup_component(c);
+				if (wg_index >= 0)
 				{
-					splat = false;
-					swizzle_splat = false;
-					break;
+					auto wg_size = join(builtin_to_glsl(BuiltInWorkgroupSize, StorageClassInput), vector_swizzle(1, wg_index));
+					if (type.basetype != SPIRType::UInt)
+						wg_size = bitcast_expression(type, SPIRType::UInt, wg_size);
+					return wg_size;
 				}
 			}
+
+			if (expression_is_forwarded(id))
+				return constant_expression(c);
+
+			return to_name(id);
 		}
+		else if (c.is_used_as_lut)
+			return to_name(id);
+		else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
+			return to_name(id);
+		else if (!type.array.empty() && !backend.can_declare_arrays_inline)
+			return to_name(id);
 		else
-		{
-			uint32_t ident = c.scalar(vector, 0);
-			for (uint32_t i = 1; i < c.vector_size(); i++)
-			{
-				if (ident != c.scalar(vector, i))
-				{
-					splat = false;
-					swizzle_splat = false;
-				}
-			}
-		}
+			return constant_expression(c);
 	}
 
-	if (c.vector_size() > 1 && !swizzle_splat)
-		res += type_to_glsl(type) + "(";
+	case TypeConstantOp:
+		return to_name(id);
 
-	switch (type.basetype)
+	case TypeVariable:
 	{
-	case SPIRType::Half:
-		if (splat || swizzle_splat)
-		{
-			res += convert_half_to_string(c, vector, 0);
-			if (swizzle_splat)
-				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
-		}
-		else
+		auto &var = get<SPIRVariable>(id);
+		// If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
+		// the variable has not been declared yet.
+		if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
 		{
-			for (uint32_t i = 0; i < c.vector_size(); i++)
+			// We might try to load from a loop variable before it has been initialized.
+			// Prefer static expression and fallback to initializer.
+			if (var.static_expression)
+				return to_expression(var.static_expression);
+			else if (var.initializer)
+				return to_expression(var.initializer);
+			else
 			{
-				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
-					res += to_name(c.specialization_constant_id(vector, i));
-				else
-					res += convert_half_to_string(c, vector, i);
-
-				if (i + 1 < c.vector_size())
-					res += ", ";
+				// We cannot declare the variable yet, so have to fake it.
+				uint32_t undef_id = ir.increase_bound_by(1);
+				return emit_uninitialized_temporary_expression(get_variable_data_type_id(var), undef_id).expression;
 			}
 		}
-		break;
-
-	case SPIRType::Float:
-		if (splat || swizzle_splat)
+		else if (var.deferred_declaration)
 		{
-			res += convert_float_to_string(c, vector, 0);
-			if (swizzle_splat)
-				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
+			var.deferred_declaration = false;
+			return variable_decl(var);
+		}
+		else if (flattened_structs.count(id))
+		{
+			return load_flattened_struct(to_name(id), get<SPIRType>(var.basetype));
 		}
 		else
 		{
-			for (uint32_t i = 0; i < c.vector_size(); i++)
-			{
-				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
-					res += to_name(c.specialization_constant_id(vector, i));
-				else
-					res += convert_float_to_string(c, vector, i);
-
-				if (i + 1 < c.vector_size())
-					res += ", ";
-			}
+			auto &dec = ir.meta[var.self].decoration;
+			if (dec.builtin)
+				return builtin_to_glsl(dec.builtin_type, var.storage);
+			else
+				return to_name(id);
 		}
-		break;
+	}
 
-	case SPIRType::Double:
-		if (splat || swizzle_splat)
-		{
-			res += convert_double_to_string(c, vector, 0);
-			if (swizzle_splat)
-				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
-		}
-		else
-		{
-			for (uint32_t i = 0; i < c.vector_size(); i++)
-			{
-				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
-					res += to_name(c.specialization_constant_id(vector, i));
-				else
-					res += convert_double_to_string(c, vector, i);
+	case TypeCombinedImageSampler:
+		// This type should never be taken the expression of directly.
+		// The intention is that texture sampling functions will extract the image and samplers
+		// separately and take their expressions as needed.
+		// GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
+		// expression ala sampler2D(texture, sampler).
+		SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
 
-				if (i + 1 < c.vector_size())
-					res += ", ";
-			}
-		}
-		break;
+	case TypeAccessChain:
+		// We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
+		SPIRV_CROSS_THROW("Access chains have no default expression representation.");
 
-	case SPIRType::Int64:
-		if (splat)
+	default:
+		return to_name(id);
+	}
+}
+
+SmallVector<ConstantID> CompilerGLSL::get_composite_constant_ids(ConstantID const_id)
+{
+	if (auto *constant = maybe_get<SPIRConstant>(const_id))
+	{
+		const auto &type = get<SPIRType>(constant->constant_type);
+		if (is_array(type) || type.basetype == SPIRType::Struct)
+			return constant->subconstants;
+		if (is_matrix(type))
+			return SmallVector<ConstantID>(constant->m.id);
+		if (is_vector(type))
+			return SmallVector<ConstantID>(constant->m.c[0].id);
+		SPIRV_CROSS_THROW("Unexpected scalar constant!");
+	}
+	if (!const_composite_insert_ids.count(const_id))
+		SPIRV_CROSS_THROW("Unimplemented for this OpSpecConstantOp!");
+	return const_composite_insert_ids[const_id];
+}
+
+void CompilerGLSL::fill_composite_constant(SPIRConstant &constant, TypeID type_id,
+                                           const SmallVector<ConstantID> &initializers)
+{
+	auto &type = get<SPIRType>(type_id);
+	constant.specialization = true;
+	if (is_array(type) || type.basetype == SPIRType::Struct)
+	{
+		constant.subconstants = initializers;
+	}
+	else if (is_matrix(type))
+	{
+		constant.m.columns = type.columns;
+		for (uint32_t i = 0; i < type.columns; ++i)
 		{
-			res += convert_to_string(c.scalar_i64(vector, 0));
-			if (backend.long_long_literal_suffix)
-				res += "ll";
-			else
-				res += "l";
+			constant.m.id[i] = initializers[i];
+			constant.m.c[i].vecsize = type.vecsize;
 		}
-		else
-		{
-			for (uint32_t i = 0; i < c.vector_size(); i++)
-			{
-				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
-					res += to_name(c.specialization_constant_id(vector, i));
-				else
-				{
-					res += convert_to_string(c.scalar_i64(vector, i));
-					if (backend.long_long_literal_suffix)
-						res += "ll";
-					else
-						res += "l";
-				}
+	}
+	else if (is_vector(type))
+	{
+		constant.m.c[0].vecsize = type.vecsize;
+		for (uint32_t i = 0; i < type.vecsize; ++i)
+			constant.m.c[0].id[i] = initializers[i];
+	}
+	else
+		SPIRV_CROSS_THROW("Unexpected scalar in SpecConstantOp CompositeInsert!");
+}
 
-				if (i + 1 < c.vector_size())
-					res += ", ";
-			}
-		}
+void CompilerGLSL::set_composite_constant(ConstantID const_id, TypeID type_id,
+                                          const SmallVector<ConstantID> &initializers)
+{
+	if (maybe_get<SPIRConstantOp>(const_id))
+	{
+		const_composite_insert_ids[const_id] = initializers;
+		return;
+	}
+
+	auto &constant = set<SPIRConstant>(const_id, type_id);
+	fill_composite_constant(constant, type_id, initializers);
+	forwarded_temporaries.insert(const_id);
+}
+
+TypeID CompilerGLSL::get_composite_member_type(TypeID type_id, uint32_t member_idx)
+{
+	auto &type = get<SPIRType>(type_id);
+	if (is_array(type))
+		return type.parent_type;
+	if (type.basetype == SPIRType::Struct)
+		return type.member_types[member_idx];
+	if (is_matrix(type))
+		return type.parent_type;
+	if (is_vector(type))
+		return type.parent_type;
+	SPIRV_CROSS_THROW("Shouldn't reach lower than vector handling OpSpecConstantOp CompositeInsert!");
+}
+
+string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
+{
+	auto &type = get<SPIRType>(cop.basetype);
+	bool binary = false;
+	bool unary = false;
+	string op;
+
+	if (is_legacy() && is_unsigned_opcode(cop.opcode))
+		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
+
+	// TODO: Find a clean way to reuse emit_instruction.
+	switch (cop.opcode)
+	{
+	case OpSConvert:
+	case OpUConvert:
+	case OpFConvert:
+		op = type_to_glsl_constructor(type);
 		break;
 
-	case SPIRType::UInt64:
-		if (splat)
+#define GLSL_BOP(opname, x) \
+	case Op##opname:        \
+		binary = true;      \
+		op = x;             \
+		break
+
+#define GLSL_UOP(opname, x) \
+	case Op##opname:        \
+		unary = true;       \
+		op = x;             \
+		break
+
+		GLSL_UOP(SNegate, "-");
+		GLSL_UOP(Not, "~");
+		GLSL_BOP(IAdd, "+");
+		GLSL_BOP(ISub, "-");
+		GLSL_BOP(IMul, "*");
+		GLSL_BOP(SDiv, "/");
+		GLSL_BOP(UDiv, "/");
+		GLSL_BOP(UMod, "%");
+		GLSL_BOP(SMod, "%");
+		GLSL_BOP(ShiftRightLogical, ">>");
+		GLSL_BOP(ShiftRightArithmetic, ">>");
+		GLSL_BOP(ShiftLeftLogical, "<<");
+		GLSL_BOP(BitwiseOr, "|");
+		GLSL_BOP(BitwiseXor, "^");
+		GLSL_BOP(BitwiseAnd, "&");
+		GLSL_BOP(LogicalOr, "||");
+		GLSL_BOP(LogicalAnd, "&&");
+		GLSL_UOP(LogicalNot, "!");
+		GLSL_BOP(LogicalEqual, "==");
+		GLSL_BOP(LogicalNotEqual, "!=");
+		GLSL_BOP(IEqual, "==");
+		GLSL_BOP(INotEqual, "!=");
+		GLSL_BOP(ULessThan, "<");
+		GLSL_BOP(SLessThan, "<");
+		GLSL_BOP(ULessThanEqual, "<=");
+		GLSL_BOP(SLessThanEqual, "<=");
+		GLSL_BOP(UGreaterThan, ">");
+		GLSL_BOP(SGreaterThan, ">");
+		GLSL_BOP(UGreaterThanEqual, ">=");
+		GLSL_BOP(SGreaterThanEqual, ">=");
+
+	case OpSRem:
+	{
+		uint32_t op0 = cop.arguments[0];
+		uint32_t op1 = cop.arguments[1];
+		return join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
+		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
+	}
+
+	case OpSelect:
+	{
+		if (cop.arguments.size() < 3)
+			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
+
+		// This one is pretty annoying. It's triggered from
+		// uint(bool), int(bool) from spec constants.
+		// In order to preserve its compile-time constness in Vulkan GLSL,
+		// we need to reduce the OpSelect expression back to this simplified model.
+		// If we cannot, fail.
+		if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
 		{
-			res += convert_to_string(c.scalar_u64(vector, 0));
-			if (backend.long_long_literal_suffix)
-				res += "ull";
-			else
-				res += "ul";
+			// Implement as a simple cast down below.
 		}
 		else
 		{
-			for (uint32_t i = 0; i < c.vector_size(); i++)
-			{
-				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
-					res += to_name(c.specialization_constant_id(vector, i));
-				else
-				{
-					res += convert_to_string(c.scalar_u64(vector, i));
-					if (backend.long_long_literal_suffix)
-						res += "ull";
-					else
-						res += "ul";
-				}
-
-				if (i + 1 < c.vector_size())
-					res += ", ";
-			}
+			// Implement a ternary and pray the compiler understands it :)
+			return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
 		}
 		break;
+	}
 
-	case SPIRType::UInt:
-		if (splat)
+	case OpVectorShuffle:
+	{
+		string expr = type_to_glsl_constructor(type);
+		expr += "(";
+
+		uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
+		string left_arg = to_enclosed_expression(cop.arguments[0]);
+		string right_arg = to_enclosed_expression(cop.arguments[1]);
+
+		for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
 		{
-			res += convert_to_string(c.scalar(vector, 0));
-			if (is_legacy())
+			uint32_t index = cop.arguments[i];
+			if (index == 0xFFFFFFFF)
 			{
-				// Fake unsigned constant literals with signed ones if possible.
-				// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
-				if (c.scalar_i32(vector, 0) < 0)
-					SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
+				SPIRConstant c;
+				c.constant_type = type.parent_type;
+				assert(type.parent_type != ID(0));
+				expr += constant_expression(c);
 			}
-			else if (backend.uint32_t_literal_suffix)
-				res += "u";
-		}
-		else
-		{
-			for (uint32_t i = 0; i < c.vector_size(); i++)
+			else if (index >= left_components)
 			{
-				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
-					res += to_name(c.specialization_constant_id(vector, i));
-				else
-				{
-					res += convert_to_string(c.scalar(vector, i));
-					if (is_legacy())
-					{
-						// Fake unsigned constant literals with signed ones if possible.
-						// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
-						if (c.scalar_i32(vector, i) < 0)
-							SPIRV_CROSS_THROW(
-							    "Tried to convert uint literal into int, but this made the literal negative.");
-					}
-					else if (backend.uint32_t_literal_suffix)
-						res += "u";
-				}
-
-				if (i + 1 < c.vector_size())
-					res += ", ";
+				expr += right_arg + "." + "xyzw"[index - left_components];
 			}
-		}
-		break;
-
-	case SPIRType::Int:
-		if (splat)
-			res += convert_to_string(c.scalar_i32(vector, 0));
-		else
-		{
-			for (uint32_t i = 0; i < c.vector_size(); i++)
+			else
 			{
-				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
-					res += to_name(c.specialization_constant_id(vector, i));
-				else
-					res += convert_to_string(c.scalar_i32(vector, i));
-				if (i + 1 < c.vector_size())
-					res += ", ";
+				expr += left_arg + "." + "xyzw"[index];
 			}
+
+			if (i + 1 < uint32_t(cop.arguments.size()))
+				expr += ", ";
 		}
-		break;
 
-	case SPIRType::UShort:
-		if (splat)
-		{
-			res += convert_to_string(c.scalar(vector, 0));
-			if (is_legacy())
-			{
-				// Fake unsigned constant literals with signed ones if possible.
-				// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
-				if (c.scalar_i16(vector, 0) < 0)
-					SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
-			}
-			else
-				res += backend.uint16_t_literal_suffix;
-		}
-		else
-		{
-			for (uint32_t i = 0; i < c.vector_size(); i++)
-			{
-				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
-					res += to_name(c.specialization_constant_id(vector, i));
-				else
-				{
-					res += convert_to_string(c.scalar(vector, i));
-					if (is_legacy())
-					{
-						// Fake unsigned constant literals with signed ones if possible.
-						// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
-						if (c.scalar_i16(vector, i) < 0)
-							SPIRV_CROSS_THROW(
-							    "Tried to convert uint literal into int, but this made the literal negative.");
-					}
-					else
-						res += backend.uint16_t_literal_suffix;
-				}
+		expr += ")";
+		return expr;
+	}
 
-				if (i + 1 < c.vector_size())
-					res += ", ";
-			}
-		}
-		break;
+	case OpCompositeExtract:
+	{
+		auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
+		                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
+		return expr;
+	}
 
-	case SPIRType::Short:
-		if (splat)
-		{
-			res += convert_to_string(c.scalar_i16(vector, 0));
-			res += backend.int16_t_literal_suffix;
-		}
-		else
-		{
-			for (uint32_t i = 0; i < c.vector_size(); i++)
-			{
-				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
-					res += to_name(c.specialization_constant_id(vector, i));
-				else
-				{
-					res += convert_to_string(c.scalar_i16(vector, i));
-					res += backend.int16_t_literal_suffix;
-				}
-				if (i + 1 < c.vector_size())
-					res += ", ";
-			}
-		}
-		break;
+	case OpCompositeInsert:
+	{
+		SmallVector<ConstantID> new_init = get_composite_constant_ids(cop.arguments[1]);
+		uint32_t idx;
+		uint32_t target_id = cop.self;
+		uint32_t target_type_id = cop.basetype;
+		// We have to drill down to the part we want to modify, and create new
+		// constants for each containing part.
+		for (idx = 2; idx < cop.arguments.size() - 1; ++idx)
+		{
+			uint32_t new_const = ir.increase_bound_by(1);
+			uint32_t old_const = new_init[cop.arguments[idx]];
+			new_init[cop.arguments[idx]] = new_const;
+			set_composite_constant(target_id, target_type_id, new_init);
+			new_init = get_composite_constant_ids(old_const);
+			target_id = new_const;
+			target_type_id = get_composite_member_type(target_type_id, cop.arguments[idx]);
+		}
+		// Now replace the initializer with the one from this instruction.
+		new_init[cop.arguments[idx]] = cop.arguments[0];
+		set_composite_constant(target_id, target_type_id, new_init);
+		SPIRConstant tmp_const(cop.basetype);
+		fill_composite_constant(tmp_const, cop.basetype, const_composite_insert_ids[cop.self]);
+		return constant_expression(tmp_const);
+	}
 
-	case SPIRType::UByte:
-		if (splat)
-		{
-			res += convert_to_string(c.scalar_u8(vector, 0));
-		}
-		else
-		{
-			for (uint32_t i = 0; i < c.vector_size(); i++)
-			{
-				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
-					res += to_name(c.specialization_constant_id(vector, i));
-				else
-				{
-					res += type_to_glsl(scalar_type);
-					res += "(";
-					res += convert_to_string(c.scalar_u8(vector, i));
-					res += ")";
-				}
+	default:
+		// Some opcodes are unimplemented here, these are currently not possible to test from glslang.
+		SPIRV_CROSS_THROW("Unimplemented spec constant op.");
+	}
 
-				if (i + 1 < c.vector_size())
-					res += ", ";
-			}
-		}
-		break;
+	uint32_t bit_width = 0;
+	if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
+		bit_width = expression_type(cop.arguments[0]).width;
 
-	case SPIRType::SByte:
-		if (splat)
-		{
-			res += convert_to_string(c.scalar_i8(vector, 0));
-		}
-		else
-		{
-			for (uint32_t i = 0; i < c.vector_size(); i++)
-			{
-				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
-					res += to_name(c.specialization_constant_id(vector, i));
-				else
-				{
-					res += type_to_glsl(scalar_type);
-					res += "(";
-					res += convert_to_string(c.scalar_i8(vector, i));
-					res += ")";
-				}
+	SPIRType::BaseType input_type;
+	bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
 
-				if (i + 1 < c.vector_size())
-					res += ", ";
-			}
-		}
+	switch (cop.opcode)
+	{
+	case OpIEqual:
+	case OpINotEqual:
+		input_type = to_signed_basetype(bit_width);
 		break;
 
-	case SPIRType::Boolean:
-		if (splat)
-			res += c.scalar(vector, 0) ? "true" : "false";
-		else
-		{
-			for (uint32_t i = 0; i < c.vector_size(); i++)
-			{
-				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
-					res += to_name(c.specialization_constant_id(vector, i));
-				else
-					res += c.scalar(vector, i) ? "true" : "false";
+	case OpSLessThan:
+	case OpSLessThanEqual:
+	case OpSGreaterThan:
+	case OpSGreaterThanEqual:
+	case OpSMod:
+	case OpSDiv:
+	case OpShiftRightArithmetic:
+	case OpSConvert:
+	case OpSNegate:
+		input_type = to_signed_basetype(bit_width);
+		break;
 
-				if (i + 1 < c.vector_size())
-					res += ", ";
-			}
-		}
+	case OpULessThan:
+	case OpULessThanEqual:
+	case OpUGreaterThan:
+	case OpUGreaterThanEqual:
+	case OpUMod:
+	case OpUDiv:
+	case OpShiftRightLogical:
+	case OpUConvert:
+		input_type = to_unsigned_basetype(bit_width);
 		break;
 
 	default:
-		SPIRV_CROSS_THROW("Invalid constant expression basetype.");
+		input_type = type.basetype;
+		break;
 	}
 
-	if (c.vector_size() > 1 && !swizzle_splat)
-		res += ")";
-
-	return res;
-}
+#undef GLSL_BOP
+#undef GLSL_UOP
+	if (binary)
+	{
+		if (cop.arguments.size() < 2)
+			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
 
-SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
-{
-	forced_temporaries.insert(id);
-	emit_uninitialized_temporary(type, id);
-	return set<SPIRExpression>(id, to_name(id), type, true);
-}
+		string cast_op0;
+		string cast_op1;
+		auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
+		                                              cop.arguments[1], skip_cast_if_equal_type);
 
-void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
-{
-	// If we're declaring temporaries inside continue blocks,
-	// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
-	if (current_continue_block && !hoisted_temporaries.count(result_id))
-	{
-		auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
-		if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
-		            [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
-			            return tmp.first == result_type && tmp.second == result_id;
-		            }) == end(header.declare_temporary))
+		if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
 		{
-			header.declare_temporary.emplace_back(result_type, result_id);
-			hoisted_temporaries.insert(result_id);
-			force_recompile();
+			expected_type.basetype = input_type;
+			auto expr = bitcast_glsl_op(type, expected_type);
+			expr += '(';
+			expr += join(cast_op0, " ", op, " ", cast_op1);
+			expr += ')';
+			return expr;
 		}
+		else
+			return join("(", cast_op0, " ", op, " ", cast_op1, ")");
 	}
-	else if (hoisted_temporaries.count(result_id) == 0)
+	else if (unary)
 	{
-		auto &type = get<SPIRType>(result_type);
-		auto &flags = ir.meta[result_id].decoration.decoration_flags;
+		if (cop.arguments.size() < 1)
+			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
 
-		// The result_id has not been made into an expression yet, so use flags interface.
-		add_local_variable_name(result_id);
-		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), ";");
+		// Auto-bitcast to result type as needed.
+		// Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
+		return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
 	}
-}
-
-string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
-{
-	auto &type = get<SPIRType>(result_type);
-	auto &flags = ir.meta[result_id].decoration.decoration_flags;
-
-	// If we're declaring temporaries inside continue blocks,
-	// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
-	if (current_continue_block && !hoisted_temporaries.count(result_id))
+	else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
 	{
-		auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
-		if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
-		            [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
-			            return tmp.first == result_type && tmp.second == result_id;
-		            }) == end(header.declare_temporary))
+		if (cop.arguments.size() < 1)
+			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
+
+		auto &arg_type = expression_type(cop.arguments[0]);
+		if (arg_type.width < type.width && input_type != arg_type.basetype)
 		{
-			header.declare_temporary.emplace_back(result_type, result_id);
-			hoisted_temporaries.insert(result_id);
-			force_recompile();
+			auto expected = arg_type;
+			expected.basetype = input_type;
+			return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
 		}
-
-		return join(to_name(result_id), " = ");
-	}
-	else if (hoisted_temporaries.count(result_id))
-	{
-		// The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
-		return join(to_name(result_id), " = ");
+		else
+			return join(op, "(", to_expression(cop.arguments[0]), ")");
 	}
 	else
 	{
-		// The result_id has not been made into an expression yet, so use flags interface.
-		add_local_variable_name(result_id);
-		return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
+		if (cop.arguments.size() < 1)
+			SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
+		return join(op, "(", to_expression(cop.arguments[0]), ")");
 	}
 }
 
-bool CompilerGLSL::expression_is_forwarded(uint32_t id)
+string CompilerGLSL::constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope)
 {
-	return forwarded_temporaries.find(id) != end(forwarded_temporaries);
-}
+	auto &type = get<SPIRType>(c.constant_type);
 
-SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
-                                      bool suppress_usage_tracking)
-{
-	if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
+	if (type.pointer)
 	{
-		// Just forward it without temporary.
-		// If the forward is trivial, we do not force flushing to temporary for this expression.
-		if (!suppress_usage_tracking)
-			forwarded_temporaries.insert(result_id);
-
-		return set<SPIRExpression>(result_id, rhs, result_type, true);
+		return backend.null_pointer_literal;
 	}
-	else
+	else if (!c.subconstants.empty())
 	{
-		// If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
-		statement(declare_temporary(result_type, result_id), rhs, ";");
-		return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
-	}
-}
+		// Handles Arrays and structures.
+		string res;
 
-void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
-{
-	bool forward = should_forward(op0);
-	emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
-	inherit_expression_dependencies(result_id, op0);
-}
+		// Only consider the decay if we are inside a struct scope where we are emitting a member with Offset decoration.
+		// Outside a block-like struct declaration, we can always bind to a constant array with templated type.
+		// Should look at ArrayStride here as well, but it's possible to declare a constant struct
+		// with Offset = 0, using no ArrayStride on the enclosed array type.
+		// A particular CTS test hits this scenario.
+		bool array_type_decays = inside_block_like_struct_scope &&
+		                         !type.array.empty() && !backend.array_is_value_type_in_buffer_blocks;
 
-void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
-{
-	bool forward = should_forward(op0) && should_forward(op1);
-	emit_op(result_type, result_id,
-	        join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
+		// Allow Metal to use the array<T> template to make arrays a value type
+		bool needs_trailing_tracket = false;
+		if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
+		    type.array.empty())
+		{
+			res = type_to_glsl_constructor(type) + "{ ";
+		}
+		else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
+		         !type.array.empty() && !array_type_decays)
+		{
+			res = type_to_glsl_constructor(type) + "({ ";
+			needs_trailing_tracket = true;
+		}
+		else if (backend.use_initializer_list)
+		{
+			res = "{ ";
+		}
+		else
+		{
+			res = type_to_glsl_constructor(type) + "(";
+		}
 
-	inherit_expression_dependencies(result_id, op0);
-	inherit_expression_dependencies(result_id, op1);
-}
+		uint32_t subconstant_index = 0;
+		for (auto &elem : c.subconstants)
+		{
+			if (auto *op = maybe_get<SPIRConstantOp>(elem))
+			{
+				res += constant_op_expression(*op);
+			}
+			else if (maybe_get<SPIRUndef>(elem) != nullptr)
+			{
+				res += to_name(elem);
+			}
+			else
+			{
+				auto &subc = get<SPIRConstant>(elem);
+				if (subc.specialization && !expression_is_forwarded(elem))
+					res += to_name(elem);
+				else
+				{
+					if (type.array.empty() && type.basetype == SPIRType::Struct)
+					{
+						// When we get down to emitting struct members, override the block-like information.
+						// For constants, we can freely mix and match block-like state.
+						inside_block_like_struct_scope =
+						    has_member_decoration(type.self, subconstant_index, DecorationOffset);
+					}
 
-void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
-{
-	auto &type = get<SPIRType>(result_type);
-	auto expr = type_to_glsl_constructor(type);
-	expr += '(';
-	for (uint32_t i = 0; i < type.vecsize; i++)
-	{
-		// Make sure to call to_expression multiple times to ensure
-		// that these expressions are properly flushed to temporaries if needed.
-		expr += op;
-		expr += to_extract_component_expression(operand, i);
+					res += constant_expression(subc, inside_block_like_struct_scope);
+				}
+			}
 
-		if (i + 1 < type.vecsize)
-			expr += ", ";
-	}
-	expr += ')';
-	emit_op(result_type, result_id, expr, should_forward(operand));
+			if (&elem != &c.subconstants.back())
+				res += ", ";
 
-	inherit_expression_dependencies(result_id, operand);
-}
+			subconstant_index++;
+		}
 
-void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
-                                           const char *op)
-{
-	auto &type = get<SPIRType>(result_type);
-	auto expr = type_to_glsl_constructor(type);
-	expr += '(';
-	for (uint32_t i = 0; i < type.vecsize; i++)
-	{
-		// Make sure to call to_expression multiple times to ensure
-		// that these expressions are properly flushed to temporaries if needed.
-		expr += to_extract_component_expression(op0, i);
-		expr += ' ';
-		expr += op;
-		expr += ' ';
-		expr += to_extract_component_expression(op1, i);
+		res += backend.use_initializer_list ? " }" : ")";
+		if (needs_trailing_tracket)
+			res += ")";
 
-		if (i + 1 < type.vecsize)
-			expr += ", ";
+		return res;
 	}
-	expr += ')';
-	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
+	else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
+	{
+		// Metal tessellation likes empty structs which are then constant expressions.
+		if (backend.supports_empty_struct)
+			return "{ }";
+		else if (backend.use_typed_initializer_list)
+			return join(type_to_glsl(get<SPIRType>(c.constant_type)), "{ 0 }");
+		else if (backend.use_initializer_list)
+			return "{ 0 }";
+		else
+			return join(type_to_glsl(get<SPIRType>(c.constant_type)), "(0)");
+	}
+	else if (c.columns() == 1)
+	{
+		return constant_expression_vector(c, 0);
+	}
+	else
+	{
+		string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
+		for (uint32_t col = 0; col < c.columns(); col++)
+		{
+			if (c.specialization_constant_id(col) != 0)
+				res += to_name(c.specialization_constant_id(col));
+			else
+				res += constant_expression_vector(c, col);
 
-	inherit_expression_dependencies(result_id, op0);
-	inherit_expression_dependencies(result_id, op1);
+			if (col + 1 < c.columns())
+				res += ", ";
+		}
+		res += ")";
+		return res;
+	}
 }
 
-SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
-                                                uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
-{
-	auto &type0 = expression_type(op0);
-	auto &type1 = expression_type(op1);
-
-	// We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
-	// For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
-	// since equality test is exactly the same.
-	bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
+#ifdef _MSC_VER
+// snprintf does not exist or is buggy on older MSVC versions, some of them
+// being used by MinGW. Use sprintf instead and disable corresponding warning.
+#pragma warning(push)
+#pragma warning(disable : 4996)
+#endif
 
-	// Create a fake type so we can bitcast to it.
-	// We only deal with regular arithmetic types here like int, uints and so on.
-	SPIRType expected_type;
-	expected_type.basetype = input_type;
-	expected_type.vecsize = type0.vecsize;
-	expected_type.columns = type0.columns;
-	expected_type.width = type0.width;
+string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
+{
+	string res;
+	float float_value = c.scalar_f16(col, row);
 
-	if (cast)
+	// There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
+	// of complicated workarounds, just value-cast to the half type always.
+	if (std::isnan(float_value) || std::isinf(float_value))
 	{
-		cast_op0 = bitcast_glsl(expected_type, op0);
-		cast_op1 = bitcast_glsl(expected_type, op1);
+		SPIRType type;
+		type.basetype = SPIRType::Half;
+		type.vecsize = 1;
+		type.columns = 1;
+
+		if (float_value == numeric_limits<float>::infinity())
+			res = join(type_to_glsl(type), "(1.0 / 0.0)");
+		else if (float_value == -numeric_limits<float>::infinity())
+			res = join(type_to_glsl(type), "(-1.0 / 0.0)");
+		else if (std::isnan(float_value))
+			res = join(type_to_glsl(type), "(0.0 / 0.0)");
+		else
+			SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
 	}
 	else
 	{
-		// If we don't cast, our actual input type is that of the first (or second) argument.
-		cast_op0 = to_enclosed_unpacked_expression(op0);
-		cast_op1 = to_enclosed_unpacked_expression(op1);
-		input_type = type0.basetype;
+		SPIRType type;
+		type.basetype = SPIRType::Half;
+		type.vecsize = 1;
+		type.columns = 1;
+		res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")");
 	}
 
-	return expected_type;
+	return res;
 }
 
-void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
-                                       const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
+string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
 {
-	string cast_op0, cast_op1;
-	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
-	auto &out_type = get<SPIRType>(result_type);
+	string res;
+	float float_value = c.scalar_f32(col, row);
 
-	// We might have casted away from the result type, so bitcast again.
-	// For example, arithmetic right shift with uint inputs.
-	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
-	string expr;
-	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
+	if (std::isnan(float_value) || std::isinf(float_value))
 	{
-		expected_type.basetype = input_type;
-		expr = bitcast_glsl_op(out_type, expected_type);
-		expr += '(';
-		expr += join(cast_op0, " ", op, " ", cast_op1);
-		expr += ')';
+		// Use special representation.
+		if (!is_legacy())
+		{
+			SPIRType out_type;
+			SPIRType in_type;
+			out_type.basetype = SPIRType::Float;
+			in_type.basetype = SPIRType::UInt;
+			out_type.vecsize = 1;
+			in_type.vecsize = 1;
+			out_type.width = 32;
+			in_type.width = 32;
+
+			char print_buffer[32];
+#ifdef _WIN32
+			sprintf(print_buffer, "0x%xu", c.scalar(col, row));
+#else
+			snprintf(print_buffer, sizeof(print_buffer), "0x%xu", c.scalar(col, row));
+#endif
+
+			const char *comment = "inf";
+			if (float_value == -numeric_limits<float>::infinity())
+				comment = "-inf";
+			else if (std::isnan(float_value))
+				comment = "nan";
+			res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
+		}
+		else
+		{
+			if (float_value == numeric_limits<float>::infinity())
+			{
+				if (backend.float_literal_suffix)
+					res = "(1.0f / 0.0f)";
+				else
+					res = "(1.0 / 0.0)";
+			}
+			else if (float_value == -numeric_limits<float>::infinity())
+			{
+				if (backend.float_literal_suffix)
+					res = "(-1.0f / 0.0f)";
+				else
+					res = "(-1.0 / 0.0)";
+			}
+			else if (std::isnan(float_value))
+			{
+				if (backend.float_literal_suffix)
+					res = "(0.0f / 0.0f)";
+				else
+					res = "(0.0 / 0.0)";
+			}
+			else
+				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
+		}
 	}
 	else
-		expr += join(cast_op0, " ", op, " ", cast_op1);
+	{
+		res = convert_to_string(float_value, current_locale_radix_character);
+		if (backend.float_literal_suffix)
+			res += "f";
+	}
 
-	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
-	inherit_expression_dependencies(result_id, op0);
-	inherit_expression_dependencies(result_id, op1);
+	return res;
 }
 
-void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
+std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
 {
-	bool forward = should_forward(op0);
-	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
-	inherit_expression_dependencies(result_id, op0);
-}
+	string res;
+	double double_value = c.scalar_f64(col, row);
 
-void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
-                                       const char *op)
-{
-	bool forward = should_forward(op0) && should_forward(op1);
-	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
-	        forward);
-	inherit_expression_dependencies(result_id, op0);
-	inherit_expression_dependencies(result_id, op1);
-}
-
-void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
-                                           SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
-{
-	auto &out_type = get<SPIRType>(result_type);
-	auto &expr_type = expression_type(op0);
-	auto expected_type = out_type;
-
-	// Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
-	expected_type.basetype = input_type;
-	expected_type.width = expr_type.width;
-	string cast_op = expr_type.basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
-
-	string expr;
-	if (out_type.basetype != expected_result_type)
-	{
-		expected_type.basetype = expected_result_type;
-		expected_type.width = out_type.width;
-		expr = bitcast_glsl_op(out_type, expected_type);
-		expr += '(';
-		expr += join(op, "(", cast_op, ")");
-		expr += ')';
-	}
-	else
+	if (std::isnan(double_value) || std::isinf(double_value))
 	{
-		expr += join(op, "(", cast_op, ")");
-	}
-
-	emit_op(result_type, result_id, expr, should_forward(op0));
-	inherit_expression_dependencies(result_id, op0);
-}
+		// Use special representation.
+		if (!is_legacy())
+		{
+			SPIRType out_type;
+			SPIRType in_type;
+			out_type.basetype = SPIRType::Double;
+			in_type.basetype = SPIRType::UInt64;
+			out_type.vecsize = 1;
+			in_type.vecsize = 1;
+			out_type.width = 64;
+			in_type.width = 64;
 
-void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
-                                             uint32_t op2, const char *op, SPIRType::BaseType input_type)
-{
-	auto &out_type = get<SPIRType>(result_type);
-	auto expected_type = out_type;
-	expected_type.basetype = input_type;
-	string cast_op0 =
-	    expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
-	string cast_op1 =
-	    expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
-	string cast_op2 =
-	    expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
+			uint64_t u64_value = c.scalar_u64(col, row);
 
-	string expr;
-	if (out_type.basetype != input_type)
-	{
-		expr = bitcast_glsl_op(out_type, expected_type);
-		expr += '(';
-		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
-		expr += ')';
-	}
-	else
-	{
-		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
-	}
+			if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310.
+				SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310.");
+			require_extension_internal("GL_ARB_gpu_shader_int64");
 
-	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
-	inherit_expression_dependencies(result_id, op0);
-	inherit_expression_dependencies(result_id, op1);
-	inherit_expression_dependencies(result_id, op2);
-}
+			char print_buffer[64];
+#ifdef _WIN32
+			sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
+			        backend.long_long_literal_suffix ? "ull" : "ul");
+#else
+			snprintf(print_buffer, sizeof(print_buffer), "0x%llx%s", static_cast<unsigned long long>(u64_value),
+			         backend.long_long_literal_suffix ? "ull" : "ul");
+#endif
 
-void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
-                                            const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
-{
-	string cast_op0, cast_op1;
-	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
-	auto &out_type = get<SPIRType>(result_type);
+			const char *comment = "inf";
+			if (double_value == -numeric_limits<double>::infinity())
+				comment = "-inf";
+			else if (std::isnan(double_value))
+				comment = "nan";
+			res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
+		}
+		else
+		{
+			if (options.es)
+				SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
+			if (options.version < 400)
+				require_extension_internal("GL_ARB_gpu_shader_fp64");
 
-	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
-	string expr;
-	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
-	{
-		expected_type.basetype = input_type;
-		expr = bitcast_glsl_op(out_type, expected_type);
-		expr += '(';
-		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
-		expr += ')';
+			if (double_value == numeric_limits<double>::infinity())
+			{
+				if (backend.double_literal_suffix)
+					res = "(1.0lf / 0.0lf)";
+				else
+					res = "(1.0 / 0.0)";
+			}
+			else if (double_value == -numeric_limits<double>::infinity())
+			{
+				if (backend.double_literal_suffix)
+					res = "(-1.0lf / 0.0lf)";
+				else
+					res = "(-1.0 / 0.0)";
+			}
+			else if (std::isnan(double_value))
+			{
+				if (backend.double_literal_suffix)
+					res = "(0.0lf / 0.0lf)";
+				else
+					res = "(0.0 / 0.0)";
+			}
+			else
+				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
+		}
 	}
 	else
 	{
-		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
+		res = convert_to_string(double_value, current_locale_radix_character);
+		if (backend.double_literal_suffix)
+			res += "lf";
 	}
 
-	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
-	inherit_expression_dependencies(result_id, op0);
-	inherit_expression_dependencies(result_id, op1);
+	return res;
 }
 
-void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
-                                        uint32_t op2, const char *op)
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
 {
-	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
-	emit_op(result_type, result_id,
-	        join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
-	             to_unpacked_expression(op2), ")"),
-	        forward);
+	auto type = get<SPIRType>(c.constant_type);
+	type.columns = 1;
 
-	inherit_expression_dependencies(result_id, op0);
-	inherit_expression_dependencies(result_id, op1);
-	inherit_expression_dependencies(result_id, op2);
-}
+	auto scalar_type = type;
+	scalar_type.vecsize = 1;
 
-void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
-                                           uint32_t op2, uint32_t op3, const char *op)
-{
-	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
-	emit_op(result_type, result_id,
-	        join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
-	             to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
-	        forward);
+	string res;
+	bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
+	bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
 
-	inherit_expression_dependencies(result_id, op0);
-	inherit_expression_dependencies(result_id, op1);
-	inherit_expression_dependencies(result_id, op2);
-	inherit_expression_dependencies(result_id, op3);
-}
+	if (!type_is_floating_point(type))
+	{
+		// Cannot swizzle literal integers as a special case.
+		swizzle_splat = false;
+	}
 
-// EXT_shader_texture_lod only concerns fragment shaders so lod tex functions
-// are not allowed in ES 2 vertex shaders. But SPIR-V only supports lod tex
-// functions in vertex shaders so we revert those back to plain calls when
-// the lod is a constant value of zero.
-bool CompilerGLSL::check_explicit_lod_allowed(uint32_t lod)
-{
-	auto &execution = get_entry_point();
-	bool allowed = !is_legacy_es() || execution.model == ExecutionModelFragment;
-	if (!allowed && lod != 0)
+	if (splat || swizzle_splat)
 	{
-		auto *lod_constant = maybe_get<SPIRConstant>(lod);
-		if (!lod_constant || lod_constant->scalar_f32() != 0.0f)
+		// Cannot use constant splatting if we have specialization constants somewhere in the vector.
+		for (uint32_t i = 0; i < c.vector_size(); i++)
 		{
-			SPIRV_CROSS_THROW("Explicit lod not allowed in legacy ES non-fragment shaders.");
+			if (c.specialization_constant_id(vector, i) != 0)
+			{
+				splat = false;
+				swizzle_splat = false;
+				break;
+			}
 		}
 	}
-	return allowed;
-}
 
-string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t lod, uint32_t tex)
-{
-	const char *type;
-	switch (imgtype.image.dim)
+	if (splat || swizzle_splat)
 	{
-	case spv::Dim1D:
-		type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
-		break;
-	case spv::Dim2D:
-		type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
-		break;
-	case spv::Dim3D:
-		type = "3D";
-		break;
-	case spv::DimCube:
-		type = "Cube";
-		break;
-	case spv::DimRect:
-		type = "2DRect";
-		break;
-	case spv::DimBuffer:
-		type = "Buffer";
-		break;
-	case spv::DimSubpassData:
-		type = "2D";
-		break;
-	default:
-		type = "";
-		break;
+		if (type.width == 64)
+		{
+			uint64_t ident = c.scalar_u64(vector, 0);
+			for (uint32_t i = 1; i < c.vector_size(); i++)
+			{
+				if (ident != c.scalar_u64(vector, i))
+				{
+					splat = false;
+					swizzle_splat = false;
+					break;
+				}
+			}
+		}
+		else
+		{
+			uint32_t ident = c.scalar(vector, 0);
+			for (uint32_t i = 1; i < c.vector_size(); i++)
+			{
+				if (ident != c.scalar(vector, i))
+				{
+					splat = false;
+					swizzle_splat = false;
+				}
+			}
+		}
 	}
 
-	bool use_explicit_lod = check_explicit_lod_allowed(lod);
+	if (c.vector_size() > 1 && !swizzle_splat)
+		res += type_to_glsl(type) + "(";
 
-	if (op == "textureLod" || op == "textureProjLod" || op == "textureGrad" || op == "textureProjGrad")
+	switch (type.basetype)
 	{
-		if (is_legacy_es())
+	case SPIRType::Half:
+		if (splat || swizzle_splat)
 		{
-			if (use_explicit_lod)
-				require_extension_internal("GL_EXT_shader_texture_lod");
+			res += convert_half_to_string(c, vector, 0);
+			if (swizzle_splat)
+				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
 		}
-		else if (is_legacy())
-			require_extension_internal("GL_ARB_shader_texture_lod");
-	}
-
-	if (op == "textureLodOffset" || op == "textureProjLodOffset")
-	{
-		if (is_legacy_es())
-			SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
+		else
+		{
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_expression(c.specialization_constant_id(vector, i));
+				else
+					res += convert_half_to_string(c, vector, i);
 
-		require_extension_internal("GL_EXT_gpu_shader4");
-	}
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
+		}
+		break;
 
-	// GLES has very limited support for shadow samplers.
-	// Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
-	// everything else can just throw
-	if (image_is_comparison(imgtype, tex) && is_legacy_es())
-	{
-		if (op == "texture" || op == "textureProj")
-			require_extension_internal("GL_EXT_shadow_samplers");
+	case SPIRType::Float:
+		if (splat || swizzle_splat)
+		{
+			res += convert_float_to_string(c, vector, 0);
+			if (swizzle_splat)
+				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
+		}
 		else
-			SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
-	}
+		{
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_expression(c.specialization_constant_id(vector, i));
+				else
+					res += convert_float_to_string(c, vector, i);
 
-	bool is_es_and_depth = is_legacy_es() && image_is_comparison(imgtype, tex);
-	std::string type_prefix = image_is_comparison(imgtype, tex) ? "shadow" : "texture";
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
+		}
+		break;
 
-	if (op == "texture")
-		return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
-	else if (op == "textureLod")
-	{
-		if (use_explicit_lod)
-			return join(type_prefix, type, is_legacy_es() ? "LodEXT" : "Lod");
-		else
-			return join(type_prefix, type);
-	}
-	else if (op == "textureProj")
-		return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
-	else if (op == "textureGrad")
-		return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
-	else if (op == "textureProjLod")
-	{
-		if (use_explicit_lod)
-			return join(type_prefix, type, is_legacy_es() ? "ProjLodEXT" : "ProjLod");
-		else
-			return join(type_prefix, type, "Proj");
-	}
-	else if (op == "textureLodOffset")
-	{
-		if (use_explicit_lod)
-			return join(type_prefix, type, "LodOffset");
+	case SPIRType::Double:
+		if (splat || swizzle_splat)
+		{
+			res += convert_double_to_string(c, vector, 0);
+			if (swizzle_splat)
+				res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
+		}
 		else
-			return join(type_prefix, type);
-	}
-	else if (op == "textureProjGrad")
-		return join(type_prefix, type,
-		            is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
-	else if (op == "textureProjLodOffset")
+		{
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_expression(c.specialization_constant_id(vector, i));
+				else
+					res += convert_double_to_string(c, vector, i);
+
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
+		}
+		break;
+
+	case SPIRType::Int64:
 	{
-		if (use_explicit_lod)
-			return join(type_prefix, type, "ProjLodOffset");
+		auto tmp = type;
+		tmp.vecsize = 1;
+		tmp.columns = 1;
+		auto int64_type = type_to_glsl(tmp);
+
+		if (splat)
+		{
+			res += convert_to_string(c.scalar_i64(vector, 0), int64_type, backend.long_long_literal_suffix);
+		}
 		else
-			return join(type_prefix, type, "ProjOffset");
-	}
-	else
-	{
-		SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
-	}
-}
+		{
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_expression(c.specialization_constant_id(vector, i));
+				else
+					res += convert_to_string(c.scalar_i64(vector, i), int64_type, backend.long_long_literal_suffix);
 
-bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
-{
-	auto *cleft = maybe_get<SPIRConstant>(left);
-	auto *cright = maybe_get<SPIRConstant>(right);
-	auto &lerptype = expression_type(lerp);
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
+		}
+		break;
+	}
 
-	// If our targets aren't constants, we cannot use construction.
-	if (!cleft || !cright)
-		return false;
+	case SPIRType::UInt64:
+		if (splat)
+		{
+			res += convert_to_string(c.scalar_u64(vector, 0));
+			if (backend.long_long_literal_suffix)
+				res += "ull";
+			else
+				res += "ul";
+		}
+		else
+		{
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_expression(c.specialization_constant_id(vector, i));
+				else
+				{
+					res += convert_to_string(c.scalar_u64(vector, i));
+					if (backend.long_long_literal_suffix)
+						res += "ull";
+					else
+						res += "ul";
+				}
 
-	// If our targets are spec constants, we cannot use construction.
-	if (cleft->specialization || cright->specialization)
-		return false;
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
+		}
+		break;
 
-	// We can only use trivial construction if we have a scalar
-	// (should be possible to do it for vectors as well, but that is overkill for now).
-	if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1)
-		return false;
+	case SPIRType::UInt:
+		if (splat)
+		{
+			res += convert_to_string(c.scalar(vector, 0));
+			if (is_legacy())
+			{
+				// Fake unsigned constant literals with signed ones if possible.
+				// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
+				if (c.scalar_i32(vector, 0) < 0)
+					SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
+			}
+			else if (backend.uint32_t_literal_suffix)
+				res += "u";
+		}
+		else
+		{
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_expression(c.specialization_constant_id(vector, i));
+				else
+				{
+					res += convert_to_string(c.scalar(vector, i));
+					if (is_legacy())
+					{
+						// Fake unsigned constant literals with signed ones if possible.
+						// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
+						if (c.scalar_i32(vector, i) < 0)
+							SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
+							                  "the literal negative.");
+					}
+					else if (backend.uint32_t_literal_suffix)
+						res += "u";
+				}
 
-	// If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
-	bool ret = false;
-	switch (type.basetype)
-	{
-	case SPIRType::Short:
-	case SPIRType::UShort:
-		ret = cleft->scalar_u16() == 0 && cright->scalar_u16() == 1;
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
+		}
 		break;
 
 	case SPIRType::Int:
-	case SPIRType::UInt:
-		ret = cleft->scalar() == 0 && cright->scalar() == 1;
+		if (splat)
+			res += convert_to_string(c.scalar_i32(vector, 0));
+		else
+		{
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_expression(c.specialization_constant_id(vector, i));
+				else
+					res += convert_to_string(c.scalar_i32(vector, i));
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
+		}
 		break;
 
-	case SPIRType::Half:
-		ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f;
-		break;
+	case SPIRType::UShort:
+		if (splat)
+		{
+			res += convert_to_string(c.scalar(vector, 0));
+		}
+		else
+		{
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_expression(c.specialization_constant_id(vector, i));
+				else
+				{
+					if (*backend.uint16_t_literal_suffix)
+					{
+						res += convert_to_string(c.scalar_u16(vector, i));
+						res += backend.uint16_t_literal_suffix;
+					}
+					else
+					{
+						// If backend doesn't have a literal suffix, we need to value cast.
+						res += type_to_glsl(scalar_type);
+						res += "(";
+						res += convert_to_string(c.scalar_u16(vector, i));
+						res += ")";
+					}
+				}
 
-	case SPIRType::Float:
-		ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f;
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
+		}
 		break;
 
-	case SPIRType::Double:
-		ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0;
-		break;
+	case SPIRType::Short:
+		if (splat)
+		{
+			res += convert_to_string(c.scalar_i16(vector, 0));
+		}
+		else
+		{
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_expression(c.specialization_constant_id(vector, i));
+				else
+				{
+					if (*backend.int16_t_literal_suffix)
+					{
+						res += convert_to_string(c.scalar_i16(vector, i));
+						res += backend.int16_t_literal_suffix;
+					}
+					else
+					{
+						// If backend doesn't have a literal suffix, we need to value cast.
+						res += type_to_glsl(scalar_type);
+						res += "(";
+						res += convert_to_string(c.scalar_i16(vector, i));
+						res += ")";
+					}
+				}
 
-	case SPIRType::Int64:
-	case SPIRType::UInt64:
-		ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1;
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
+		}
 		break;
 
-	default:
-		break;
-	}
+	case SPIRType::UByte:
+		if (splat)
+		{
+			res += convert_to_string(c.scalar_u8(vector, 0));
+		}
+		else
+		{
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_expression(c.specialization_constant_id(vector, i));
+				else
+				{
+					res += type_to_glsl(scalar_type);
+					res += "(";
+					res += convert_to_string(c.scalar_u8(vector, i));
+					res += ")";
+				}
 
-	if (ret)
-		op = type_to_glsl_constructor(type);
-	return ret;
-}
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
+		}
+		break;
 
-string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
-                                           uint32_t false_value)
-{
-	string expr;
-	auto &lerptype = expression_type(select);
+	case SPIRType::SByte:
+		if (splat)
+		{
+			res += convert_to_string(c.scalar_i8(vector, 0));
+		}
+		else
+		{
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_expression(c.specialization_constant_id(vector, i));
+				else
+				{
+					res += type_to_glsl(scalar_type);
+					res += "(";
+					res += convert_to_string(c.scalar_i8(vector, i));
+					res += ")";
+				}
 
-	if (lerptype.vecsize == 1)
-		expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
-		            to_enclosed_pointer_expression(false_value));
-	else
-	{
-		auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
+		}
+		break;
 
-		expr = type_to_glsl_constructor(restype);
-		expr += "(";
-		for (uint32_t i = 0; i < restype.vecsize; i++)
+	case SPIRType::Boolean:
+		if (splat)
+			res += c.scalar(vector, 0) ? "true" : "false";
+		else
 		{
-			expr += swiz(select, i);
-			expr += " ? ";
-			expr += swiz(true_value, i);
-			expr += " : ";
-			expr += swiz(false_value, i);
-			if (i + 1 < restype.vecsize)
-				expr += ", ";
+			for (uint32_t i = 0; i < c.vector_size(); i++)
+			{
+				if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+					res += to_expression(c.specialization_constant_id(vector, i));
+				else
+					res += c.scalar(vector, i) ? "true" : "false";
+
+				if (i + 1 < c.vector_size())
+					res += ", ";
+			}
 		}
-		expr += ")";
+		break;
+
+	default:
+		SPIRV_CROSS_THROW("Invalid constant expression basetype.");
 	}
 
-	return expr;
+	if (c.vector_size() > 1 && !swizzle_splat)
+		res += ")";
+
+	return res;
 }
 
-void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
+SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
 {
-	auto &lerptype = expression_type(lerp);
-	auto &restype = get<SPIRType>(result_type);
+	forced_temporaries.insert(id);
+	emit_uninitialized_temporary(type, id);
+	return set<SPIRExpression>(id, to_name(id), type, true);
+}
 
-	// If this results in a variable pointer, assume it may be written through.
-	if (restype.pointer)
+void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
+{
+	// If we're declaring temporaries inside continue blocks,
+	// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
+	if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id))
 	{
-		register_write(left);
-		register_write(right);
+		auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
+		if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
+		            [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
+			            return tmp.first == result_type && tmp.second == result_id;
+		            }) == end(header.declare_temporary))
+		{
+			header.declare_temporary.emplace_back(result_type, result_id);
+			hoisted_temporaries.insert(result_id);
+			force_recompile();
+		}
 	}
+	else if (hoisted_temporaries.count(result_id) == 0)
+	{
+		auto &type = get<SPIRType>(result_type);
+		auto &flags = get_decoration_bitset(result_id);
 
-	string mix_op;
-	bool has_boolean_mix = backend.boolean_mix_support &&
-	                       ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
-	bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
+		// The result_id has not been made into an expression yet, so use flags interface.
+		add_local_variable_name(result_id);
 
-	// Cannot use boolean mix when the lerp argument is just one boolean,
-	// fall back to regular trinary statements.
-	if (lerptype.vecsize == 1)
-		has_boolean_mix = false;
+		string initializer;
+		if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
+			initializer = join(" = ", to_zero_initialized_expression(result_type));
 
-	// If we can reduce the mix to a simple cast, do so.
-	// This helps for cases like int(bool), uint(bool) which is implemented with
-	// OpSelect bool 1 0.
-	if (trivial_mix)
-	{
-		emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
-	}
-	else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
-	{
-		// Boolean mix not supported on desktop without extension.
-		// Was added in OpenGL 4.5 with ES 3.1 compat.
-		//
-		// Could use GL_EXT_shader_integer_mix on desktop at least,
-		// but Apple doesn't support it. :(
-		// Just implement it as ternary expressions.
-		auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
-		emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
-		inherit_expression_dependencies(id, left);
-		inherit_expression_dependencies(id, right);
-		inherit_expression_dependencies(id, lerp);
+		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";");
 	}
-	else
-		emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
 }
 
-string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_id)
+string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
 {
-	// Keep track of the array indices we have used to load the image.
-	// We'll need to use the same array index into the combined image sampler array.
-	auto image_expr = to_expression(image_id);
-	string array_expr;
-	auto array_index = image_expr.find_first_of('[');
-	if (array_index != string::npos)
-		array_expr = image_expr.substr(array_index, string::npos);
-
-	auto &args = current_function->arguments;
-
-	// For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
-	// all possible combinations into new sampler2D uniforms.
-	auto *image = maybe_get_backing_variable(image_id);
-	auto *samp = maybe_get_backing_variable(samp_id);
-	if (image)
-		image_id = image->self;
-	if (samp)
-		samp_id = samp->self;
-
-	auto image_itr = find_if(begin(args), end(args),
-	                         [image_id](const SPIRFunction::Parameter &param) { return param.id == image_id; });
-
-	auto sampler_itr = find_if(begin(args), end(args),
-	                           [samp_id](const SPIRFunction::Parameter &param) { return param.id == samp_id; });
+	auto &type = get<SPIRType>(result_type);
 
-	if (image_itr != end(args) || sampler_itr != end(args))
+	// If we're declaring temporaries inside continue blocks,
+	// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
+	if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id))
 	{
-		// If any parameter originates from a parameter, we will find it in our argument list.
-		bool global_image = image_itr == end(args);
-		bool global_sampler = sampler_itr == end(args);
-		uint32_t iid = global_image ? image_id : uint32_t(image_itr - begin(args));
-		uint32_t sid = global_sampler ? samp_id : uint32_t(sampler_itr - begin(args));
-
-		auto &combined = current_function->combined_parameters;
-		auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
-			return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
-			       p.sampler_id == sid;
-		});
-
-		if (itr != end(combined))
-			return to_expression(itr->id) + array_expr;
-		else
+		auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
+		if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
+		            [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
+			            return tmp.first == result_type && tmp.second == result_id;
+		            }) == end(header.declare_temporary))
 		{
-			SPIRV_CROSS_THROW(
-			    "Cannot find mapping for combined sampler parameter, was build_combined_image_samplers() used "
-			    "before compile() was called?");
+			header.declare_temporary.emplace_back(result_type, result_id);
+			hoisted_temporaries.insert(result_id);
+			force_recompile_guarantee_forward_progress();
 		}
+
+		return join(to_name(result_id), " = ");
+	}
+	else if (hoisted_temporaries.count(result_id))
+	{
+		// The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
+		return join(to_name(result_id), " = ");
 	}
 	else
 	{
-		// For global sampler2D, look directly at the global remapping table.
-		auto &mapping = combined_image_samplers;
-		auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
-			return combined.image_id == image_id && combined.sampler_id == samp_id;
-		});
-
-		if (itr != end(combined_image_samplers))
-			return to_expression(itr->combined_id) + array_expr;
-		else
-		{
-			SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
-			                  "before compile() was called?");
-		}
+		// The result_id has not been made into an expression yet, so use flags interface.
+		add_local_variable_name(result_id);
+		auto &flags = get_decoration_bitset(result_id);
+		return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
 	}
 }
 
-void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
+bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
 {
-	if (options.vulkan_semantics && combined_image_samplers.empty())
+	return forwarded_temporaries.count(id) != 0;
+}
+
+bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
+{
+	return suppressed_usage_tracking.count(id) != 0;
+}
+
+bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
+{
+	auto *expr = maybe_get<SPIRExpression>(id);
+	if (!expr)
+		return false;
+
+	// If we're emitting code at a deeper loop level than when we emitted the expression,
+	// we're probably reading the same expression over and over.
+	return current_loop_level > expr->emitted_loop_level;
+}
+
+SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
+                                      bool suppress_usage_tracking)
+{
+	if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
 	{
-		emit_binary_func_op(result_type, result_id, image_id, samp_id,
-		                    type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
+		// Just forward it without temporary.
+		// If the forward is trivial, we do not force flushing to temporary for this expression.
+		forwarded_temporaries.insert(result_id);
+		if (suppress_usage_tracking)
+			suppressed_usage_tracking.insert(result_id);
 
-		// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
-		forwarded_temporaries.erase(result_id);
+		return set<SPIRExpression>(result_id, rhs, result_type, true);
 	}
 	else
 	{
-		// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
-		emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
+		// If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
+		statement(declare_temporary(result_type, result_id), rhs, ";");
+		return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
 	}
 }
 
-static inline bool image_opcode_is_sample_no_dref(Op op)
+void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
 {
-	switch (op)
-	{
-	case OpImageSampleExplicitLod:
-	case OpImageSampleImplicitLod:
-	case OpImageSampleProjExplicitLod:
-	case OpImageSampleProjImplicitLod:
-	case OpImageFetch:
-	case OpImageRead:
-	case OpImageSparseSampleExplicitLod:
-	case OpImageSparseSampleImplicitLod:
-	case OpImageSparseSampleProjExplicitLod:
-	case OpImageSparseSampleProjImplicitLod:
-	case OpImageSparseFetch:
-	case OpImageSparseRead:
-		return true;
-
-	default:
-		return false;
-	}
+	bool forward = should_forward(op0);
+	emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
+	inherit_expression_dependencies(result_id, op0);
 }
 
-void CompilerGLSL::emit_texture_op(const Instruction &i)
+void CompilerGLSL::emit_unary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
 {
-	auto *ops = stream(i);
-	auto op = static_cast<Op>(i.op);
-	uint32_t length = i.length;
+	auto &type = get<SPIRType>(result_type);
+	bool forward = should_forward(op0);
+	emit_op(result_type, result_id, join(type_to_glsl(type), "(", op, to_enclosed_unpacked_expression(op0), ")"), forward);
+	inherit_expression_dependencies(result_id, op0);
+}
 
-	SmallVector<uint32_t> inherited_expressions;
+void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
+{
+	// Various FP arithmetic opcodes such as add, sub, mul will hit this.
+	bool force_temporary_precise = backend.support_precise_qualifier &&
+	                               has_decoration(result_id, DecorationNoContraction) &&
+	                               type_is_floating_point(get<SPIRType>(result_type));
+	bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise;
 
-	uint32_t result_type_id = ops[0];
-	uint32_t id = ops[1];
-	uint32_t img = ops[2];
-	uint32_t coord = ops[3];
-	uint32_t dref = 0;
-	uint32_t comp = 0;
-	bool gather = false;
-	bool proj = false;
-	bool fetch = false;
-	const uint32_t *opt = nullptr;
+	emit_op(result_type, result_id,
+	        join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
 
-	auto &result_type = get<SPIRType>(result_type_id);
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
+}
 
-	inherited_expressions.push_back(coord);
+void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
+{
+	auto &type = get<SPIRType>(result_type);
+	auto expr = type_to_glsl_constructor(type);
+	expr += '(';
+	for (uint32_t i = 0; i < type.vecsize; i++)
+	{
+		// Make sure to call to_expression multiple times to ensure
+		// that these expressions are properly flushed to temporaries if needed.
+		expr += op;
+		expr += to_extract_component_expression(operand, i);
 
-	// Make sure non-uniform decoration is back-propagated to where it needs to be.
-	if (has_decoration(img, DecorationNonUniformEXT))
-		propagate_nonuniform_qualifier(img);
+		if (i + 1 < type.vecsize)
+			expr += ", ";
+	}
+	expr += ')';
+	emit_op(result_type, result_id, expr, should_forward(operand));
 
-	switch (op)
-	{
-	case OpImageSampleDrefImplicitLod:
-	case OpImageSampleDrefExplicitLod:
-		dref = ops[4];
-		opt = &ops[5];
-		length -= 5;
-		break;
+	inherit_expression_dependencies(result_id, operand);
+}
 
-	case OpImageSampleProjDrefImplicitLod:
-	case OpImageSampleProjDrefExplicitLod:
-		dref = ops[4];
-		opt = &ops[5];
-		length -= 5;
-		proj = true;
-		break;
+void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                           const char *op, bool negate, SPIRType::BaseType expected_type)
+{
+	auto &type0 = expression_type(op0);
+	auto &type1 = expression_type(op1);
 
-	case OpImageDrefGather:
-		dref = ops[4];
-		opt = &ops[5];
-		length -= 5;
-		gather = true;
-		break;
+	SPIRType target_type0 = type0;
+	SPIRType target_type1 = type1;
+	target_type0.basetype = expected_type;
+	target_type1.basetype = expected_type;
+	target_type0.vecsize = 1;
+	target_type1.vecsize = 1;
 
-	case OpImageGather:
-		comp = ops[4];
-		opt = &ops[5];
-		length -= 5;
-		gather = true;
-		break;
+	auto &type = get<SPIRType>(result_type);
+	auto expr = type_to_glsl_constructor(type);
+	expr += '(';
+	for (uint32_t i = 0; i < type.vecsize; i++)
+	{
+		// Make sure to call to_expression multiple times to ensure
+		// that these expressions are properly flushed to temporaries if needed.
+		if (negate)
+			expr += "!(";
 
-	case OpImageFetch:
-	case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
-		opt = &ops[4];
-		length -= 4;
-		fetch = true;
-		break;
+		if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
+			expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
+		else
+			expr += to_extract_component_expression(op0, i);
 
-	case OpImageSampleProjImplicitLod:
-	case OpImageSampleProjExplicitLod:
-		opt = &ops[4];
-		length -= 4;
-		proj = true;
-		break;
+		expr += ' ';
+		expr += op;
+		expr += ' ';
 
-	default:
-		opt = &ops[4];
-		length -= 4;
-		break;
-	}
+		if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
+			expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
+		else
+			expr += to_extract_component_expression(op1, i);
 
-	// Bypass pointers because we need the real image struct
-	auto &type = expression_type(img);
-	auto &imgtype = get<SPIRType>(type.self);
+		if (negate)
+			expr += ")";
 
-	uint32_t coord_components = 0;
-	switch (imgtype.image.dim)
-	{
-	case spv::Dim1D:
-		coord_components = 1;
-		break;
-	case spv::Dim2D:
-		coord_components = 2;
-		break;
-	case spv::Dim3D:
-		coord_components = 3;
-		break;
-	case spv::DimCube:
-		coord_components = 3;
-		break;
-	case spv::DimBuffer:
-		coord_components = 1;
-		break;
-	default:
-		coord_components = 2;
-		break;
+		if (i + 1 < type.vecsize)
+			expr += ", ";
 	}
+	expr += ')';
+	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
 
-	if (dref)
-		inherited_expressions.push_back(dref);
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
+}
 
-	if (proj)
-		coord_components++;
-	if (imgtype.image.arrayed)
-		coord_components++;
+SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
+                                                uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
+{
+	auto &type0 = expression_type(op0);
+	auto &type1 = expression_type(op1);
 
-	uint32_t bias = 0;
-	uint32_t lod = 0;
-	uint32_t grad_x = 0;
-	uint32_t grad_y = 0;
-	uint32_t coffset = 0;
-	uint32_t offset = 0;
-	uint32_t coffsets = 0;
-	uint32_t sample = 0;
-	uint32_t minlod = 0;
-	uint32_t flags = 0;
+	// We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
+	// For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
+	// since equality test is exactly the same.
+	bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
 
-	if (length)
+	// Create a fake type so we can bitcast to it.
+	// We only deal with regular arithmetic types here like int, uints and so on.
+	SPIRType expected_type;
+	expected_type.basetype = input_type;
+	expected_type.vecsize = type0.vecsize;
+	expected_type.columns = type0.columns;
+	expected_type.width = type0.width;
+
+	if (cast)
 	{
-		flags = *opt++;
-		length--;
+		cast_op0 = bitcast_glsl(expected_type, op0);
+		cast_op1 = bitcast_glsl(expected_type, op1);
+	}
+	else
+	{
+		// If we don't cast, our actual input type is that of the first (or second) argument.
+		cast_op0 = to_enclosed_unpacked_expression(op0);
+		cast_op1 = to_enclosed_unpacked_expression(op1);
+		input_type = type0.basetype;
 	}
 
-	auto test = [&](uint32_t &v, uint32_t flag) {
-		if (length && (flags & flag))
-		{
-			v = *opt++;
-			inherited_expressions.push_back(v);
-			length--;
-		}
-	};
+	return expected_type;
+}
 
-	test(bias, ImageOperandsBiasMask);
-	test(lod, ImageOperandsLodMask);
-	test(grad_x, ImageOperandsGradMask);
-	test(grad_y, ImageOperandsGradMask);
-	test(coffset, ImageOperandsConstOffsetMask);
-	test(offset, ImageOperandsOffsetMask);
-	test(coffsets, ImageOperandsConstOffsetsMask);
-	test(sample, ImageOperandsSampleMask);
-	test(minlod, ImageOperandsMinLodMask);
+bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
+{
+	// Some bitcasts may require complex casting sequences, and are implemented here.
+	// Otherwise a simply unary function will do with bitcast_glsl_op.
 
+	auto &output_type = get<SPIRType>(result_type);
+	auto &input_type = expression_type(op0);
 	string expr;
-	bool forward = false;
-	expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset),
-	                         (!!grad_x || !!grad_y), !!dref, lod, minlod);
-	expr += "(";
-	expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod,
-	                         coffset, offset, bias, comp, sample, minlod, &forward);
-	expr += ")";
 
-	// texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here.
-	if (is_legacy() && image_is_comparison(imgtype, img))
-		expr += ".r";
+	if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
+		expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))");
+	else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
+	         input_type.vecsize == 2)
+		expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))");
+	else
+		return false;
 
-	// Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
-	// Remap back to 4 components as sampling opcodes expect.
-	if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
-	{
-		bool image_is_depth = false;
-		const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
-		uint32_t image_id = combined ? combined->image : img;
+	emit_op(result_type, id, expr, should_forward(op0));
+	return true;
+}
 
-		if (combined && image_is_comparison(imgtype, combined->image))
-			image_is_depth = true;
-		else if (image_is_comparison(imgtype, img))
-			image_is_depth = true;
+void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                       const char *op, SPIRType::BaseType input_type,
+                                       bool skip_cast_if_equal_type,
+                                       bool implicit_integer_promotion)
+{
+	string cast_op0, cast_op1;
+	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
+	auto &out_type = get<SPIRType>(result_type);
 
-		// We must also check the backing variable for the image.
-		// We might have loaded an OpImage, and used that handle for two different purposes.
-		// Once with comparison, once without.
-		auto *image_variable = maybe_get_backing_variable(image_id);
-		if (image_variable && image_is_comparison(get<SPIRType>(image_variable->basetype), image_variable->self))
-			image_is_depth = true;
+	// We might have casted away from the result type, so bitcast again.
+	// For example, arithmetic right shift with uint inputs.
+	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
+	auto bitop = join(cast_op0, " ", op, " ", cast_op1);
+	string expr;
 
-		if (image_is_depth)
-			expr = remap_swizzle(result_type, 1, expr);
+	if (implicit_integer_promotion)
+	{
+		// Simple value cast.
+		expr = join(type_to_glsl(out_type), '(', bitop, ')');
 	}
-
-	if (!backend.support_small_type_sampling_result && result_type.width < 32)
+	else if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
 	{
-		// Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
-		// Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
-		expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
+		expected_type.basetype = input_type;
+		expr = join(bitcast_glsl_op(out_type, expected_type), '(', bitop, ')');
 	}
-
-	// Deals with reads from MSL. We might need to downconvert to fewer components.
-	if (op == OpImageRead)
-		expr = remap_swizzle(result_type, 4, expr);
-
-	emit_op(result_type_id, id, expr, forward);
-	for (auto &inherit : inherited_expressions)
-		inherit_expression_dependencies(id, inherit);
-
-	switch (op)
+	else
 	{
-	case OpImageSampleDrefImplicitLod:
-	case OpImageSampleImplicitLod:
-	case OpImageSampleProjImplicitLod:
-	case OpImageSampleProjDrefImplicitLod:
-		register_control_dependent_expression(id);
-		break;
-
-	default:
-		break;
+		expr = std::move(bitop);
 	}
+
+	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
 }
 
-bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
+void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
 {
-	auto *c = maybe_get<SPIRConstant>(id);
-	if (!c)
-		return false;
-	return c->constant_is_null();
+	bool forward = should_forward(op0);
+	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
+	inherit_expression_dependencies(result_id, op0);
 }
 
-// Returns the function name for a texture sampling function for the specified image and sampling characteristics.
-// For some subclasses, the function is a method on the specified image.
-string CompilerGLSL::to_function_name(uint32_t tex, const SPIRType &imgtype, bool is_fetch, bool is_gather,
-                                      bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, bool,
-                                      uint32_t lod, uint32_t minlod)
+void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                       const char *op)
 {
-	if (minlod != 0)
-		SPIRV_CROSS_THROW("Sparse texturing not yet supported.");
-
-	string fname;
-
-	// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
-	// To emulate this, we will have to use textureGrad with a constant gradient of 0.
-	// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
-	// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
-	bool workaround_lod_array_shadow_as_grad = false;
-	if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
-	    image_is_comparison(imgtype, tex) && lod)
-	{
-		if (!expression_is_constant_null(lod))
-		{
-			SPIRV_CROSS_THROW(
-			    "textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be expressed in GLSL.");
-		}
-		workaround_lod_array_shadow_as_grad = true;
-	}
-
-	if (is_fetch)
-		fname += "texelFetch";
-	else
-	{
-		fname += "texture";
-
-		if (is_gather)
-			fname += "Gather";
-		if (has_array_offsets)
-			fname += "Offsets";
-		if (is_proj)
-			fname += "Proj";
-		if (has_grad || workaround_lod_array_shadow_as_grad)
-			fname += "Grad";
-		if (!!lod && !workaround_lod_array_shadow_as_grad)
-			fname += "Lod";
-	}
-
-	if (has_offset)
-		fname += "Offset";
-
-	return is_legacy() ? legacy_tex_op(fname, imgtype, lod, tex) : fname;
+	bool forward = should_forward(op0) && should_forward(op1);
+	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
+	        forward);
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
 }
 
-std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
+void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                       const char *op)
 {
-	auto *var = maybe_get_backing_variable(id);
-
-	// If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
-	// In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
-	if (var)
+	auto &type = get<SPIRType>(result_type);
+	if (type_is_floating_point(type))
 	{
-		auto &type = get<SPIRType>(var->basetype);
-		if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
-		{
-			if (options.vulkan_semantics)
-			{
-				// Newer glslang supports this extension to deal with texture2D as argument to texture functions.
-				if (dummy_sampler_id)
-					SPIRV_CROSS_THROW("Vulkan GLSL should not have a dummy sampler for combining.");
-				require_extension_internal("GL_EXT_samplerless_texture_functions");
-			}
-			else
-			{
-				if (!dummy_sampler_id)
-					SPIRV_CROSS_THROW(
-					    "Cannot find dummy sampler ID. Was build_dummy_sampler_for_combined_images() called?");
-
-				return to_combined_image_sampler(id, dummy_sampler_id);
-			}
-		}
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Floating point atomics requires Vulkan semantics.");
+		if (options.es)
+			SPIRV_CROSS_THROW("Floating point atomics requires desktop GLSL.");
+		require_extension_internal("GL_EXT_shader_atomic_float");
 	}
 
-	return to_expression(id);
+	forced_temporaries.insert(result_id);
+	emit_op(result_type, result_id,
+	        join(op, "(", to_non_uniform_aware_expression(op0), ", ",
+	             to_unpacked_expression(op1), ")"), false);
+	flush_all_atomic_capable_variables();
 }
 
-// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
-string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
-                                      bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref,
-                                      uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset,
-                                      uint32_t bias, uint32_t comp, uint32_t sample, uint32_t /*minlod*/,
-                                      bool *p_forward)
+void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
+                                       uint32_t op0, uint32_t op1, uint32_t op2,
+                                       const char *op)
 {
-	string farg_str;
-	if (is_fetch)
-		farg_str = convert_separate_image_to_expression(img);
-	else
-		farg_str = to_expression(img);
-
-	bool swizz_func = backend.swizzle_is_function;
-	auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
-		if (comps == in_comps)
-			return "";
-
-		switch (comps)
-		{
-		case 1:
-			return ".x";
-		case 2:
-			return swizz_func ? ".xy()" : ".xy";
-		case 3:
-			return swizz_func ? ".xyz()" : ".xyz";
-		default:
-			return "";
-		}
-	};
+	forced_temporaries.insert(result_id);
+	emit_op(result_type, result_id,
+	        join(op, "(", to_non_uniform_aware_expression(op0), ", ",
+	             to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false);
+	flush_all_atomic_capable_variables();
+}
 
-	bool forward = should_forward(coord);
+void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
+                                           SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
+{
+	auto &out_type = get<SPIRType>(result_type);
+	auto &expr_type = expression_type(op0);
+	auto expected_type = out_type;
 
-	// The IR can give us more components than we need, so chop them off as needed.
-	auto swizzle_expr = swizzle(coord_components, expression_type(coord).vecsize);
-	// Only enclose the UV expression if needed.
-	auto coord_expr = (*swizzle_expr == '\0') ? to_expression(coord) : (to_enclosed_expression(coord) + swizzle_expr);
+	// Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
+	expected_type.basetype = input_type;
+	expected_type.width = expr_type.width;
 
-	// texelFetch only takes int, not uint.
-	auto &coord_type = expression_type(coord);
-	if (coord_type.basetype == SPIRType::UInt)
+	string cast_op;
+	if (expr_type.basetype != input_type)
 	{
-		auto expected_type = coord_type;
-		expected_type.vecsize = coord_components;
-		expected_type.basetype = SPIRType::Int;
-		coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
+		if (expr_type.basetype == SPIRType::Boolean)
+			cast_op = join(type_to_glsl(expected_type), "(", to_unpacked_expression(op0), ")");
+		else
+			cast_op = bitcast_glsl(expected_type, op0);
 	}
+	else
+		cast_op = to_unpacked_expression(op0);
 
-	// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
-	// To emulate this, we will have to use textureGrad with a constant gradient of 0.
-	// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
-	// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
-	bool workaround_lod_array_shadow_as_grad =
-	    ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
-	    image_is_comparison(imgtype, img) && lod;
-
-	if (dref)
+	string expr;
+	if (out_type.basetype != expected_result_type)
 	{
-		forward = forward && should_forward(dref);
-
-		// SPIR-V splits dref and coordinate.
-		if (is_gather || coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
-		{
-			farg_str += ", ";
-			farg_str += to_expression(coord);
-			farg_str += ", ";
-			farg_str += to_expression(dref);
-		}
-		else if (is_proj)
-		{
-			// Have to reshuffle so we get vec4(coord, dref, proj), special case.
-			// Other shading languages splits up the arguments for coord and compare value like SPIR-V.
-			// The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
-			farg_str += ", vec4(";
-
-			if (imgtype.image.dim == Dim1D)
-			{
-				// Could reuse coord_expr, but we will mess up the temporary usage checking.
-				farg_str += to_enclosed_expression(coord) + ".x";
-				farg_str += ", ";
-				farg_str += "0.0, ";
-				farg_str += to_expression(dref);
-				farg_str += ", ";
-				farg_str += to_enclosed_expression(coord) + ".y)";
-			}
-			else if (imgtype.image.dim == Dim2D)
-			{
-				// Could reuse coord_expr, but we will mess up the temporary usage checking.
-				farg_str += to_enclosed_expression(coord) + (swizz_func ? ".xy()" : ".xy");
-				farg_str += ", ";
-				farg_str += to_expression(dref);
-				farg_str += ", ";
-				farg_str += to_enclosed_expression(coord) + ".z)";
-			}
-			else
-				SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
-		}
+		expected_type.basetype = expected_result_type;
+		expected_type.width = out_type.width;
+		if (out_type.basetype == SPIRType::Boolean)
+			expr = type_to_glsl(out_type);
 		else
-		{
-			// Create a composite which merges coord/dref into a single vector.
-			auto type = expression_type(coord);
-			type.vecsize = coord_components + 1;
-			farg_str += ", ";
-			farg_str += type_to_glsl_constructor(type);
-			farg_str += "(";
-			farg_str += coord_expr;
-			farg_str += ", ";
-			farg_str += to_expression(dref);
-			farg_str += ")";
-		}
+			expr = bitcast_glsl_op(out_type, expected_type);
+		expr += '(';
+		expr += join(op, "(", cast_op, ")");
+		expr += ')';
 	}
 	else
 	{
-		farg_str += ", ";
-		farg_str += coord_expr;
+		expr += join(op, "(", cast_op, ")");
 	}
 
-	if (grad_x || grad_y)
-	{
-		forward = forward && should_forward(grad_x);
-		forward = forward && should_forward(grad_y);
-		farg_str += ", ";
-		farg_str += to_expression(grad_x);
-		farg_str += ", ";
-		farg_str += to_expression(grad_y);
-	}
+	emit_op(result_type, result_id, expr, should_forward(op0));
+	inherit_expression_dependencies(result_id, op0);
+}
 
-	if (lod)
-	{
-		if (workaround_lod_array_shadow_as_grad)
-		{
-			// Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
-			// Implementing this as plain texture() is not safe on some implementations.
-			if (imgtype.image.dim == Dim2D)
-				farg_str += ", vec2(0.0), vec2(0.0)";
-			else if (imgtype.image.dim == DimCube)
-				farg_str += ", vec3(0.0), vec3(0.0)";
-		}
-		else
-		{
-			if (check_explicit_lod_allowed(lod))
-			{
-				forward = forward && should_forward(lod);
-				farg_str += ", ";
+// Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
+// and different vector sizes all at once. Need a special purpose method here.
+void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                                   uint32_t op2, const char *op,
+                                                   SPIRType::BaseType expected_result_type,
+                                                   SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
+                                                   SPIRType::BaseType input_type2)
+{
+	auto &out_type = get<SPIRType>(result_type);
+	auto expected_type = out_type;
+	expected_type.basetype = input_type0;
 
-				auto &lod_expr_type = expression_type(lod);
+	string cast_op0 =
+	    expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
 
-				// Lod expression for TexelFetch in GLSL must be int, and only int.
-				if (is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms &&
-				    lod_expr_type.basetype != SPIRType::Int)
-				{
-					farg_str += join("int(", to_expression(lod), ")");
-				}
-				else
-				{
-					farg_str += to_expression(lod);
-				}
-			}
-		}
-	}
-	else if (is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
-	{
-		// Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
-		farg_str += ", 0";
-	}
+	auto op1_expr = to_unpacked_expression(op1);
+	auto op2_expr = to_unpacked_expression(op2);
 
-	if (coffset)
-	{
-		forward = forward && should_forward(coffset);
-		farg_str += ", ";
-		farg_str += to_expression(coffset);
-	}
-	else if (offset)
-	{
-		forward = forward && should_forward(offset);
-		farg_str += ", ";
-		farg_str += to_expression(offset);
-	}
+	// Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
+	expected_type.basetype = input_type1;
+	expected_type.vecsize = 1;
+	string cast_op1 = expression_type(op1).basetype != input_type1 ?
+	                      join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
+	                      op1_expr;
 
-	if (bias)
-	{
-		forward = forward && should_forward(bias);
-		farg_str += ", ";
-		farg_str += to_expression(bias);
-	}
+	expected_type.basetype = input_type2;
+	expected_type.vecsize = 1;
+	string cast_op2 = expression_type(op2).basetype != input_type2 ?
+	                      join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
+	                      op2_expr;
 
-	if (comp)
+	string expr;
+	if (out_type.basetype != expected_result_type)
 	{
-		forward = forward && should_forward(comp);
-		farg_str += ", ";
-		farg_str += to_expression(comp);
+		expected_type.vecsize = out_type.vecsize;
+		expected_type.basetype = expected_result_type;
+		expr = bitcast_glsl_op(out_type, expected_type);
+		expr += '(';
+		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
+		expr += ')';
 	}
-
-	if (sample)
+	else
 	{
-		farg_str += ", ";
-		farg_str += to_expression(sample);
+		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
 	}
 
-	*p_forward = forward;
-
-	return farg_str;
-}
+	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
+	inherit_expression_dependencies(result_id, op2);
+}
 
-void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
+void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                             uint32_t op2, const char *op, SPIRType::BaseType input_type)
 {
-	auto op = static_cast<GLSLstd450>(eop);
+	auto &out_type = get<SPIRType>(result_type);
+	auto expected_type = out_type;
+	expected_type.basetype = input_type;
+	string cast_op0 =
+	    expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
+	string cast_op1 =
+	    expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
+	string cast_op2 =
+	    expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
 
-	if (is_legacy() && is_unsigned_glsl_opcode(op))
-		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
+	string expr;
+	if (out_type.basetype != input_type)
+	{
+		expr = bitcast_glsl_op(out_type, expected_type);
+		expr += '(';
+		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
+		expr += ')';
+	}
+	else
+	{
+		expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
+	}
 
-	// If we need to do implicit bitcasts, make sure we do it with the correct type.
-	uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
-	auto int_type = to_signed_basetype(integer_width);
-	auto uint_type = to_unsigned_basetype(integer_width);
+	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
+	inherit_expression_dependencies(result_id, op2);
+}
 
-	switch (op)
-	{
-	// FP fiddling
-	case GLSLstd450Round:
-		emit_unary_func_op(result_type, id, args[0], "round");
-		break;
+void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
+                                                      uint32_t op1, const char *op, SPIRType::BaseType input_type)
+{
+	// Special purpose method for implementing clustered subgroup opcodes.
+	// Main difference is that op1 does not participate in any casting, it needs to be a literal.
+	auto &out_type = get<SPIRType>(result_type);
+	auto expected_type = out_type;
+	expected_type.basetype = input_type;
+	string cast_op0 =
+	    expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
 
-	case GLSLstd450RoundEven:
-		if ((options.es && options.version >= 300) || (!options.es && options.version >= 130))
-			emit_unary_func_op(result_type, id, args[0], "roundEven");
-		else
-			SPIRV_CROSS_THROW("roundEven supported only in ESSL 300 and GLSL 130 and up.");
-		break;
+	string expr;
+	if (out_type.basetype != input_type)
+	{
+		expr = bitcast_glsl_op(out_type, expected_type);
+		expr += '(';
+		expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
+		expr += ')';
+	}
+	else
+	{
+		expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
+	}
 
-	case GLSLstd450Trunc:
-		emit_unary_func_op(result_type, id, args[0], "trunc");
-		break;
-	case GLSLstd450SAbs:
-		emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
-		break;
-	case GLSLstd450FAbs:
-		emit_unary_func_op(result_type, id, args[0], "abs");
-		break;
-	case GLSLstd450SSign:
-		emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
-		break;
-	case GLSLstd450FSign:
-		emit_unary_func_op(result_type, id, args[0], "sign");
-		break;
-	case GLSLstd450Floor:
-		emit_unary_func_op(result_type, id, args[0], "floor");
-		break;
-	case GLSLstd450Ceil:
-		emit_unary_func_op(result_type, id, args[0], "ceil");
-		break;
-	case GLSLstd450Fract:
-		emit_unary_func_op(result_type, id, args[0], "fract");
-		break;
-	case GLSLstd450Radians:
-		emit_unary_func_op(result_type, id, args[0], "radians");
-		break;
-	case GLSLstd450Degrees:
-		emit_unary_func_op(result_type, id, args[0], "degrees");
-		break;
-	case GLSLstd450Fma:
-		if ((!options.es && options.version < 400) || (options.es && options.version < 320))
-		{
-			auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
-			                 to_enclosed_expression(args[2]));
+	emit_op(result_type, result_id, expr, should_forward(op0));
+	inherit_expression_dependencies(result_id, op0);
+}
 
-			emit_op(result_type, id, expr,
-			        should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
-			for (uint32_t i = 0; i < 3; i++)
-				inherit_expression_dependencies(id, args[i]);
-		}
-		else
-			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
-		break;
-	case GLSLstd450Modf:
-		register_call_out_argument(args[1]);
-		forced_temporaries.insert(id);
-		emit_binary_func_op(result_type, id, args[0], args[1], "modf");
-		break;
+void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                            const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
+{
+	string cast_op0, cast_op1;
+	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
+	auto &out_type = get<SPIRType>(result_type);
 
-	case GLSLstd450ModfStruct:
+	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
+	string expr;
+	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
 	{
-		forced_temporaries.insert(id);
-		auto &type = get<SPIRType>(result_type);
-		emit_uninitialized_temporary_expression(result_type, id);
-		statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
-		          to_expression(id), ".", to_member_name(type, 1), ");");
-		break;
+		expected_type.basetype = input_type;
+		expr = bitcast_glsl_op(out_type, expected_type);
+		expr += '(';
+		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
+		expr += ')';
+	}
+	else
+	{
+		expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
 	}
 
-	// Minmax
-	case GLSLstd450UMin:
-		emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
-		break;
+	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
+}
 
-	case GLSLstd450SMin:
-		emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
-		break;
+void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                        uint32_t op2, const char *op)
+{
+	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
+	emit_op(result_type, result_id,
+	        join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
+	             to_unpacked_expression(op2), ")"),
+	        forward);
 
-	case GLSLstd450FMin:
-		emit_binary_func_op(result_type, id, args[0], args[1], "min");
-		break;
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
+	inherit_expression_dependencies(result_id, op2);
+}
 
-	case GLSLstd450FMax:
-		emit_binary_func_op(result_type, id, args[0], args[1], "max");
-		break;
+void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                           uint32_t op2, uint32_t op3, const char *op)
+{
+	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
+	emit_op(result_type, result_id,
+	        join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
+	             to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
+	        forward);
 
-	case GLSLstd450UMax:
-		emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
-		break;
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
+	inherit_expression_dependencies(result_id, op2);
+	inherit_expression_dependencies(result_id, op3);
+}
 
-	case GLSLstd450SMax:
-		emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
-		break;
+void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                           uint32_t op2, uint32_t op3, const char *op,
+                                           SPIRType::BaseType offset_count_type)
+{
+	// Only need to cast offset/count arguments. Types of base/insert must be same as result type,
+	// and bitfieldInsert is sign invariant.
+	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
 
-	case GLSLstd450FClamp:
-		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
-		break;
+	auto op0_expr = to_unpacked_expression(op0);
+	auto op1_expr = to_unpacked_expression(op1);
+	auto op2_expr = to_unpacked_expression(op2);
+	auto op3_expr = to_unpacked_expression(op3);
 
-	case GLSLstd450UClamp:
-		emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
-		break;
+	SPIRType target_type;
+	target_type.vecsize = 1;
+	target_type.basetype = offset_count_type;
 
-	case GLSLstd450SClamp:
-		emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
-		break;
+	if (expression_type(op2).basetype != offset_count_type)
+	{
+		// Value-cast here. Input might be 16-bit. GLSL requires int.
+		op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
+	}
 
-	// Trig
-	case GLSLstd450Sin:
-		emit_unary_func_op(result_type, id, args[0], "sin");
-		break;
-	case GLSLstd450Cos:
-		emit_unary_func_op(result_type, id, args[0], "cos");
+	if (expression_type(op3).basetype != offset_count_type)
+	{
+		// Value-cast here. Input might be 16-bit. GLSL requires int.
+		op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
+	}
+
+	emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
+	        forward);
+
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
+	inherit_expression_dependencies(result_id, op2);
+	inherit_expression_dependencies(result_id, op3);
+}
+
+string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
+{
+	const char *type;
+	switch (imgtype.image.dim)
+	{
+	case spv::Dim1D:
+		// Force 2D path for ES.
+		if (options.es)
+			type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
+		else
+			type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
 		break;
-	case GLSLstd450Tan:
-		emit_unary_func_op(result_type, id, args[0], "tan");
+	case spv::Dim2D:
+		type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
 		break;
-	case GLSLstd450Asin:
-		emit_unary_func_op(result_type, id, args[0], "asin");
+	case spv::Dim3D:
+		type = "3D";
 		break;
-	case GLSLstd450Acos:
-		emit_unary_func_op(result_type, id, args[0], "acos");
+	case spv::DimCube:
+		type = "Cube";
 		break;
-	case GLSLstd450Atan:
-		emit_unary_func_op(result_type, id, args[0], "atan");
+	case spv::DimRect:
+		type = "2DRect";
 		break;
-	case GLSLstd450Sinh:
-		emit_unary_func_op(result_type, id, args[0], "sinh");
+	case spv::DimBuffer:
+		type = "Buffer";
 		break;
-	case GLSLstd450Cosh:
-		emit_unary_func_op(result_type, id, args[0], "cosh");
+	case spv::DimSubpassData:
+		type = "2D";
 		break;
-	case GLSLstd450Tanh:
-		emit_unary_func_op(result_type, id, args[0], "tanh");
-		break;
-	case GLSLstd450Asinh:
-		emit_unary_func_op(result_type, id, args[0], "asinh");
-		break;
-	case GLSLstd450Acosh:
-		emit_unary_func_op(result_type, id, args[0], "acosh");
-		break;
-	case GLSLstd450Atanh:
-		emit_unary_func_op(result_type, id, args[0], "atanh");
-		break;
-	case GLSLstd450Atan2:
-		emit_binary_func_op(result_type, id, args[0], args[1], "atan");
-		break;
-
-	// Exponentials
-	case GLSLstd450Pow:
-		emit_binary_func_op(result_type, id, args[0], args[1], "pow");
-		break;
-	case GLSLstd450Exp:
-		emit_unary_func_op(result_type, id, args[0], "exp");
-		break;
-	case GLSLstd450Log:
-		emit_unary_func_op(result_type, id, args[0], "log");
-		break;
-	case GLSLstd450Exp2:
-		emit_unary_func_op(result_type, id, args[0], "exp2");
-		break;
-	case GLSLstd450Log2:
-		emit_unary_func_op(result_type, id, args[0], "log2");
-		break;
-	case GLSLstd450Sqrt:
-		emit_unary_func_op(result_type, id, args[0], "sqrt");
-		break;
-	case GLSLstd450InverseSqrt:
-		emit_unary_func_op(result_type, id, args[0], "inversesqrt");
+	default:
+		type = "";
 		break;
+	}
 
-	// Matrix math
-	case GLSLstd450Determinant:
-		emit_unary_func_op(result_type, id, args[0], "determinant");
-		break;
-	case GLSLstd450MatrixInverse:
-		emit_unary_func_op(result_type, id, args[0], "inverse");
-		break;
+	// In legacy GLSL, an extension is required for textureLod in the fragment
+	// shader or textureGrad anywhere.
+	bool legacy_lod_ext = false;
+	auto &execution = get_entry_point();
+	if (op == "textureGrad" || op == "textureProjGrad" ||
+	    ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
+	{
+		if (is_legacy_es())
+		{
+			legacy_lod_ext = true;
+			require_extension_internal("GL_EXT_shader_texture_lod");
+		}
+		else if (is_legacy_desktop())
+			require_extension_internal("GL_ARB_shader_texture_lod");
+	}
 
-	// Lerping
-	case GLSLstd450FMix:
-	case GLSLstd450IMix:
+	if (op == "textureLodOffset" || op == "textureProjLodOffset")
 	{
-		emit_mix_op(result_type, id, args[0], args[1], args[2]);
-		break;
+		if (is_legacy_es())
+			SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
+
+		require_extension_internal("GL_EXT_gpu_shader4");
 	}
-	case GLSLstd450Step:
-		emit_binary_func_op(result_type, id, args[0], args[1], "step");
-		break;
-	case GLSLstd450SmoothStep:
-		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
-		break;
 
-	// Packing
-	case GLSLstd450Frexp:
-		register_call_out_argument(args[1]);
-		forced_temporaries.insert(id);
-		emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
-		break;
+	// GLES has very limited support for shadow samplers.
+	// Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
+	// everything else can just throw
+	bool is_comparison = is_depth_image(imgtype, tex);
+	if (is_comparison && is_legacy_es())
+	{
+		if (op == "texture" || op == "textureProj")
+			require_extension_internal("GL_EXT_shadow_samplers");
+		else
+			SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
+	}
 
-	case GLSLstd450FrexpStruct:
+	if (op == "textureSize")
 	{
-		forced_temporaries.insert(id);
-		auto &type = get<SPIRType>(result_type);
-		emit_uninitialized_temporary_expression(result_type, id);
-		statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
-		          to_expression(id), ".", to_member_name(type, 1), ");");
-		break;
+		if (is_legacy_es())
+			SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
+		if (is_comparison)
+			SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
+		require_extension_internal("GL_EXT_gpu_shader4");
 	}
 
-	case GLSLstd450Ldexp:
-		emit_binary_func_op(result_type, id, args[0], args[1], "ldexp");
-		break;
-	case GLSLstd450PackSnorm4x8:
-		emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
-		break;
-	case GLSLstd450PackUnorm4x8:
-		emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
-		break;
-	case GLSLstd450PackSnorm2x16:
-		emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
-		break;
-	case GLSLstd450PackUnorm2x16:
-		emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
-		break;
-	case GLSLstd450PackHalf2x16:
-		emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
-		break;
-	case GLSLstd450UnpackSnorm4x8:
-		emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
-		break;
-	case GLSLstd450UnpackUnorm4x8:
-		emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
-		break;
-	case GLSLstd450UnpackSnorm2x16:
-		emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
-		break;
-	case GLSLstd450UnpackUnorm2x16:
-		emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
-		break;
-	case GLSLstd450UnpackHalf2x16:
-		emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
-		break;
+	if (op == "texelFetch" && is_legacy_es())
+		SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
 
-	case GLSLstd450PackDouble2x32:
-		emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
-		break;
-	case GLSLstd450UnpackDouble2x32:
-		emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
-		break;
+	bool is_es_and_depth = is_legacy_es() && is_comparison;
+	std::string type_prefix = is_comparison ? "shadow" : "texture";
 
-	// Vector math
-	case GLSLstd450Length:
-		emit_unary_func_op(result_type, id, args[0], "length");
-		break;
-	case GLSLstd450Distance:
-		emit_binary_func_op(result_type, id, args[0], args[1], "distance");
-		break;
-	case GLSLstd450Cross:
-		emit_binary_func_op(result_type, id, args[0], args[1], "cross");
-		break;
-	case GLSLstd450Normalize:
-		emit_unary_func_op(result_type, id, args[0], "normalize");
-		break;
-	case GLSLstd450FaceForward:
-		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
-		break;
-	case GLSLstd450Reflect:
-		emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
-		break;
-	case GLSLstd450Refract:
-		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
-		break;
+	if (op == "texture")
+		return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
+	else if (op == "textureLod")
+		return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod");
+	else if (op == "textureProj")
+		return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
+	else if (op == "textureGrad")
+		return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
+	else if (op == "textureProjLod")
+		return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
+	else if (op == "textureLodOffset")
+		return join(type_prefix, type, "LodOffset");
+	else if (op == "textureProjGrad")
+		return join(type_prefix, type,
+		            is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
+	else if (op == "textureProjLodOffset")
+		return join(type_prefix, type, "ProjLodOffset");
+	else if (op == "textureSize")
+		return join("textureSize", type);
+	else if (op == "texelFetch")
+		return join("texelFetch", type);
+	else
+	{
+		SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
+	}
+}
 
-	// Bit-fiddling
-	case GLSLstd450FindILsb:
-		emit_unary_func_op(result_type, id, args[0], "findLSB");
-		break;
+bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
+{
+	auto *cleft = maybe_get<SPIRConstant>(left);
+	auto *cright = maybe_get<SPIRConstant>(right);
+	auto &lerptype = expression_type(lerp);
 
-	case GLSLstd450FindSMsb:
-		emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
-		break;
+	// If our targets aren't constants, we cannot use construction.
+	if (!cleft || !cright)
+		return false;
 
-	case GLSLstd450FindUMsb:
-		emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
-		                        int_type); // findMSB always returns int.
-		break;
+	// If our targets are spec constants, we cannot use construction.
+	if (cleft->specialization || cright->specialization)
+		return false;
 
-	// Multisampled varying
-	case GLSLstd450InterpolateAtCentroid:
-		emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
-		break;
-	case GLSLstd450InterpolateAtSample:
-		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
-		break;
-	case GLSLstd450InterpolateAtOffset:
-		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
-		break;
+	auto &value_type = get<SPIRType>(cleft->constant_type);
 
-	case GLSLstd450NMin:
-	case GLSLstd450NMax:
-	{
-		emit_nminmax_op(result_type, id, args[0], args[1], op);
-		break;
-	}
+	if (lerptype.basetype != SPIRType::Boolean)
+		return false;
+	if (value_type.basetype == SPIRType::Struct || is_array(value_type))
+		return false;
+	if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize)
+		return false;
 
-	case GLSLstd450NClamp:
+	// Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select.
+	// matrix(scalar) constructor fills in diagnonals, so gets messy very quickly.
+	// Just avoid this case.
+	if (value_type.columns > 1)
+		return false;
+
+	// If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
+	bool ret = true;
+	for (uint32_t row = 0; ret && row < value_type.vecsize; row++)
 	{
-		// Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
-		// IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
-		uint32_t &max_id = extra_sub_expressions[id | 0x80000000u];
-		if (!max_id)
-			max_id = ir.increase_bound_by(1);
+		switch (type.basetype)
+		{
+		case SPIRType::Short:
+		case SPIRType::UShort:
+			ret = cleft->scalar_u16(0, row) == 0 && cright->scalar_u16(0, row) == 1;
+			break;
 
-		// Inherit precision qualifiers.
-		ir.meta[max_id] = ir.meta[id];
+		case SPIRType::Int:
+		case SPIRType::UInt:
+			ret = cleft->scalar(0, row) == 0 && cright->scalar(0, row) == 1;
+			break;
 
-		emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
-		emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
-		break;
-	}
+		case SPIRType::Half:
+			ret = cleft->scalar_f16(0, row) == 0.0f && cright->scalar_f16(0, row) == 1.0f;
+			break;
 
-	default:
-		statement("// unimplemented GLSL op ", eop);
-		break;
+		case SPIRType::Float:
+			ret = cleft->scalar_f32(0, row) == 0.0f && cright->scalar_f32(0, row) == 1.0f;
+			break;
+
+		case SPIRType::Double:
+			ret = cleft->scalar_f64(0, row) == 0.0 && cright->scalar_f64(0, row) == 1.0;
+			break;
+
+		case SPIRType::Int64:
+		case SPIRType::UInt64:
+			ret = cleft->scalar_u64(0, row) == 0 && cright->scalar_u64(0, row) == 1;
+			break;
+
+		default:
+			ret = false;
+			break;
+		}
 	}
+
+	if (ret)
+		op = type_to_glsl_constructor(type);
+	return ret;
 }
 
-void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
+string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
+                                           uint32_t false_value)
 {
-	// Need to emulate this call.
-	uint32_t &ids = extra_sub_expressions[id];
-	if (!ids)
-	{
-		ids = ir.increase_bound_by(5);
-		auto btype = get<SPIRType>(result_type);
-		btype.basetype = SPIRType::Boolean;
-		set<SPIRType>(ids, btype);
-	}
+	string expr;
+	auto &lerptype = expression_type(select);
 
-	uint32_t btype_id = ids + 0;
-	uint32_t left_nan_id = ids + 1;
-	uint32_t right_nan_id = ids + 2;
-	uint32_t tmp_id = ids + 3;
-	uint32_t mixed_first_id = ids + 4;
+	if (lerptype.vecsize == 1)
+		expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
+		            to_enclosed_pointer_expression(false_value));
+	else
+	{
+		auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
 
-	// Inherit precision qualifiers.
-	ir.meta[tmp_id] = ir.meta[id];
-	ir.meta[mixed_first_id] = ir.meta[id];
+		expr = type_to_glsl_constructor(restype);
+		expr += "(";
+		for (uint32_t i = 0; i < restype.vecsize; i++)
+		{
+			expr += swiz(select, i);
+			expr += " ? ";
+			expr += swiz(true_value, i);
+			expr += " : ";
+			expr += swiz(false_value, i);
+			if (i + 1 < restype.vecsize)
+				expr += ", ";
+		}
+		expr += ")";
+	}
 
-	emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
-	emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
-	emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
-	emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
-	emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
+	return expr;
 }
 
-void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
-                                                 uint32_t)
+void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
 {
-	require_extension_internal("GL_AMD_shader_ballot");
+	auto &lerptype = expression_type(lerp);
+	auto &restype = get<SPIRType>(result_type);
 
-	enum AMDShaderBallot
+	// If this results in a variable pointer, assume it may be written through.
+	if (restype.pointer)
 	{
-		SwizzleInvocationsAMD = 1,
-		SwizzleInvocationsMaskedAMD = 2,
-		WriteInvocationAMD = 3,
-		MbcntAMD = 4
-	};
+		register_write(left);
+		register_write(right);
+	}
 
-	auto op = static_cast<AMDShaderBallot>(eop);
+	string mix_op;
+	bool has_boolean_mix = *backend.boolean_mix_function &&
+	                       ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
+	bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
 
-	switch (op)
+	// Cannot use boolean mix when the lerp argument is just one boolean,
+	// fall back to regular trinary statements.
+	if (lerptype.vecsize == 1)
+		has_boolean_mix = false;
+
+	// If we can reduce the mix to a simple cast, do so.
+	// This helps for cases like int(bool), uint(bool) which is implemented with
+	// OpSelect bool 1 0.
+	if (trivial_mix)
 	{
-	case SwizzleInvocationsAMD:
-		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
-		register_control_dependent_expression(id);
-		break;
+		emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
+	}
+	else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
+	{
+		// Boolean mix not supported on desktop without extension.
+		// Was added in OpenGL 4.5 with ES 3.1 compat.
+		//
+		// Could use GL_EXT_shader_integer_mix on desktop at least,
+		// but Apple doesn't support it. :(
+		// Just implement it as ternary expressions.
+		auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
+		emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
+		inherit_expression_dependencies(id, left);
+		inherit_expression_dependencies(id, right);
+		inherit_expression_dependencies(id, lerp);
+	}
+	else if (lerptype.basetype == SPIRType::Boolean)
+		emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
+	else
+		emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
+}
 
-	case SwizzleInvocationsMaskedAMD:
-		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
-		register_control_dependent_expression(id);
-		break;
+string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
+{
+	// Keep track of the array indices we have used to load the image.
+	// We'll need to use the same array index into the combined image sampler array.
+	auto image_expr = to_non_uniform_aware_expression(image_id);
+	string array_expr;
+	auto array_index = image_expr.find_first_of('[');
+	if (array_index != string::npos)
+		array_expr = image_expr.substr(array_index, string::npos);
 
-	case WriteInvocationAMD:
-		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
-		register_control_dependent_expression(id);
-		break;
+	auto &args = current_function->arguments;
 
-	case MbcntAMD:
-		emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
-		register_control_dependent_expression(id);
-		break;
+	// For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
+	// all possible combinations into new sampler2D uniforms.
+	auto *image = maybe_get_backing_variable(image_id);
+	auto *samp = maybe_get_backing_variable(samp_id);
+	if (image)
+		image_id = image->self;
+	if (samp)
+		samp_id = samp->self;
 
-	default:
-		statement("// unimplemented SPV AMD shader ballot op ", eop);
-		break;
-	}
-}
+	auto image_itr = find_if(begin(args), end(args),
+	                         [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
 
-void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
-                                                                    const uint32_t *args, uint32_t)
-{
-	require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
+	auto sampler_itr = find_if(begin(args), end(args),
+	                           [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
 
-	enum AMDShaderExplicitVertexParameter
+	if (image_itr != end(args) || sampler_itr != end(args))
 	{
-		InterpolateAtVertexAMD = 1
-	};
+		// If any parameter originates from a parameter, we will find it in our argument list.
+		bool global_image = image_itr == end(args);
+		bool global_sampler = sampler_itr == end(args);
+		VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
+		VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
 
-	auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
+		auto &combined = current_function->combined_parameters;
+		auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
+			return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
+			       p.sampler_id == sid;
+		});
 
-	switch (op)
+		if (itr != end(combined))
+			return to_expression(itr->id) + array_expr;
+		else
+		{
+			SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
+			                  "build_combined_image_samplers() used "
+			                  "before compile() was called?");
+		}
+	}
+	else
 	{
-	case InterpolateAtVertexAMD:
-		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
-		break;
+		// For global sampler2D, look directly at the global remapping table.
+		auto &mapping = combined_image_samplers;
+		auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
+			return combined.image_id == image_id && combined.sampler_id == samp_id;
+		});
 
-	default:
-		statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
-		break;
+		if (itr != end(combined_image_samplers))
+			return to_expression(itr->combined_id) + array_expr;
+		else
+		{
+			SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
+			                  "before compile() was called?");
+		}
 	}
 }
 
-void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
-                                                         const uint32_t *args, uint32_t)
+bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
 {
-	require_extension_internal("GL_AMD_shader_trinary_minmax");
-
-	enum AMDShaderTrinaryMinMax
-	{
-		FMin3AMD = 1,
-		UMin3AMD = 2,
-		SMin3AMD = 3,
-		FMax3AMD = 4,
-		UMax3AMD = 5,
-		SMax3AMD = 6,
-		FMid3AMD = 7,
-		UMid3AMD = 8,
-		SMid3AMD = 9
-	};
-
-	auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
-
 	switch (op)
 	{
-	case FMin3AMD:
-	case UMin3AMD:
-	case SMin3AMD:
-		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
-		break;
-
-	case FMax3AMD:
-	case UMax3AMD:
-	case SMax3AMD:
-		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
-		break;
-
-	case FMid3AMD:
-	case UMid3AMD:
-	case SMid3AMD:
-		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
-		break;
-
+	case OpGroupNonUniformElect:
+	case OpGroupNonUniformBallot:
+	case OpGroupNonUniformBallotFindLSB:
+	case OpGroupNonUniformBallotFindMSB:
+	case OpGroupNonUniformBroadcast:
+	case OpGroupNonUniformBroadcastFirst:
+	case OpGroupNonUniformAll:
+	case OpGroupNonUniformAny:
+	case OpGroupNonUniformAllEqual:
+	case OpControlBarrier:
+	case OpMemoryBarrier:
+	case OpGroupNonUniformBallotBitCount:
+	case OpGroupNonUniformBallotBitExtract:
+	case OpGroupNonUniformInverseBallot:
+		return true;
 	default:
-		statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
-		break;
+		return false;
 	}
 }
 
-void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
-                                              uint32_t)
+void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
 {
-	require_extension_internal("GL_AMD_gcn_shader");
-
-	enum AMDGCNShader
+	if (options.vulkan_semantics && combined_image_samplers.empty())
 	{
-		CubeFaceIndexAMD = 1,
-		CubeFaceCoordAMD = 2,
-		TimeAMD = 3
-	};
+		emit_binary_func_op(result_type, result_id, image_id, samp_id,
+		                    type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
+	}
+	else
+	{
+		// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
+		emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
+	}
 
-	auto op = static_cast<AMDGCNShader>(eop);
+	// Make sure to suppress usage tracking and any expression invalidation.
+	// It is illegal to create temporaries of opaque types.
+	forwarded_temporaries.erase(result_id);
+}
 
+static inline bool image_opcode_is_sample_no_dref(Op op)
+{
 	switch (op)
 	{
-	case CubeFaceIndexAMD:
-		emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
-		break;
-	case CubeFaceCoordAMD:
-		emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
-		break;
-	case TimeAMD:
-	{
-		string expr = "timeAMD()";
-		emit_op(result_type, id, expr, true);
-		register_control_dependent_expression(id);
-		break;
-	}
+	case OpImageSampleExplicitLod:
+	case OpImageSampleImplicitLod:
+	case OpImageSampleProjExplicitLod:
+	case OpImageSampleProjImplicitLod:
+	case OpImageFetch:
+	case OpImageRead:
+	case OpImageSparseSampleExplicitLod:
+	case OpImageSparseSampleImplicitLod:
+	case OpImageSparseSampleProjExplicitLod:
+	case OpImageSparseSampleProjImplicitLod:
+	case OpImageSparseFetch:
+	case OpImageSparseRead:
+		return true;
 
 	default:
-		statement("// unimplemented SPV AMD gcn shader op ", eop);
-		break;
+		return false;
 	}
 }
 
-void CompilerGLSL::emit_subgroup_op(const Instruction &i)
+void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
+                                                    uint32_t &texel_id)
 {
-	const uint32_t *ops = stream(i);
-	auto op = static_cast<Op>(i.op);
+	// Need to allocate two temporaries.
+	if (options.es)
+		SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
+	require_extension_internal("GL_ARB_sparse_texture2");
 
-	if (!options.vulkan_semantics)
-		SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics.");
+	auto &temps = extra_sub_expressions[id];
+	if (temps == 0)
+		temps = ir.increase_bound_by(2);
 
-	switch (op)
-	{
-	case OpGroupNonUniformElect:
-		require_extension_internal("GL_KHR_shader_subgroup_basic");
-		break;
+	feedback_id = temps + 0;
+	texel_id = temps + 1;
 
-	case OpGroupNonUniformBroadcast:
-	case OpGroupNonUniformBroadcastFirst:
-	case OpGroupNonUniformBallot:
-	case OpGroupNonUniformInverseBallot:
-	case OpGroupNonUniformBallotBitExtract:
-	case OpGroupNonUniformBallotBitCount:
-	case OpGroupNonUniformBallotFindLSB:
-	case OpGroupNonUniformBallotFindMSB:
-		require_extension_internal("GL_KHR_shader_subgroup_ballot");
-		break;
+	auto &return_type = get<SPIRType>(result_type_id);
+	if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
+		SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
+	emit_uninitialized_temporary(return_type.member_types[0], feedback_id);
+	emit_uninitialized_temporary(return_type.member_types[1], texel_id);
+}
 
-	case OpGroupNonUniformShuffle:
-	case OpGroupNonUniformShuffleXor:
-		require_extension_internal("GL_KHR_shader_subgroup_shuffle");
-		break;
+uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
+{
+	auto itr = extra_sub_expressions.find(id);
+	if (itr == extra_sub_expressions.end())
+		return 0;
+	else
+		return itr->second + 1;
+}
 
-	case OpGroupNonUniformShuffleUp:
-	case OpGroupNonUniformShuffleDown:
-		require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
-		break;
+void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
+{
+	auto *ops = stream(i);
+	auto op = static_cast<Op>(i.op);
 
-	case OpGroupNonUniformAll:
-	case OpGroupNonUniformAny:
-	case OpGroupNonUniformAllEqual:
-		require_extension_internal("GL_KHR_shader_subgroup_vote");
-		break;
+	SmallVector<uint32_t> inherited_expressions;
 
-	case OpGroupNonUniformFAdd:
-	case OpGroupNonUniformFMul:
-	case OpGroupNonUniformFMin:
-	case OpGroupNonUniformFMax:
-	case OpGroupNonUniformIAdd:
-	case OpGroupNonUniformIMul:
-	case OpGroupNonUniformSMin:
-	case OpGroupNonUniformSMax:
-	case OpGroupNonUniformUMin:
-	case OpGroupNonUniformUMax:
-	case OpGroupNonUniformBitwiseAnd:
-	case OpGroupNonUniformBitwiseOr:
-	case OpGroupNonUniformBitwiseXor:
+	uint32_t result_type_id = ops[0];
+	uint32_t id = ops[1];
+	auto &return_type = get<SPIRType>(result_type_id);
+
+	uint32_t sparse_code_id = 0;
+	uint32_t sparse_texel_id = 0;
+	if (sparse)
+		emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id);
+
+	bool forward = false;
+	string expr = to_texture_op(i, sparse, &forward, inherited_expressions);
+
+	if (sparse)
 	{
-		auto operation = static_cast<GroupOperation>(ops[3]);
-		if (operation == GroupOperationClusteredReduce)
-		{
-			require_extension_internal("GL_KHR_shader_subgroup_clustered");
-		}
-		else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
-		         operation == GroupOperationReduce)
-		{
-			require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
-		}
-		else
-			SPIRV_CROSS_THROW("Invalid group operation.");
-		break;
+		statement(to_expression(sparse_code_id), " = ", expr, ";");
+		expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id),
+		            ")");
+		forward = true;
+		inherited_expressions.clear();
 	}
 
-	case OpGroupNonUniformQuadSwap:
-	case OpGroupNonUniformQuadBroadcast:
-		require_extension_internal("GL_KHR_shader_subgroup_quad");
+	emit_op(result_type_id, id, expr, forward);
+	for (auto &inherit : inherited_expressions)
+		inherit_expression_dependencies(id, inherit);
+
+	// Do not register sparse ops as control dependent as they are always lowered to a temporary.
+	switch (op)
+	{
+	case OpImageSampleDrefImplicitLod:
+	case OpImageSampleImplicitLod:
+	case OpImageSampleProjImplicitLod:
+	case OpImageSampleProjDrefImplicitLod:
+		register_control_dependent_expression(id);
 		break;
 
 	default:
-		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
+		break;
 	}
+}
 
-	uint32_t result_type = ops[0];
-	uint32_t id = ops[1];
+std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
+                                        SmallVector<uint32_t> &inherited_expressions)
+{
+	auto *ops = stream(i);
+	auto op = static_cast<Op>(i.op);
+	uint32_t length = i.length;
 
-	auto scope = static_cast<Scope>(get<SPIRConstant>(ops[2]).scalar());
-	if (scope != ScopeSubgroup)
-		SPIRV_CROSS_THROW("Only subgroup scope is supported.");
+	uint32_t result_type_id = ops[0];
+	VariableID img = ops[2];
+	uint32_t coord = ops[3];
+	uint32_t dref = 0;
+	uint32_t comp = 0;
+	bool gather = false;
+	bool proj = false;
+	bool fetch = false;
+	bool nonuniform_expression = false;
+	const uint32_t *opt = nullptr;
+
+	auto &result_type = get<SPIRType>(result_type_id);
+
+	inherited_expressions.push_back(coord);
+	if (has_decoration(img, DecorationNonUniform) && !maybe_get_backing_variable(img))
+		nonuniform_expression = true;
 
 	switch (op)
 	{
-	case OpGroupNonUniformElect:
-		emit_op(result_type, id, "subgroupElect()", true);
+	case OpImageSampleDrefImplicitLod:
+	case OpImageSampleDrefExplicitLod:
+	case OpImageSparseSampleDrefImplicitLod:
+	case OpImageSparseSampleDrefExplicitLod:
+		dref = ops[4];
+		opt = &ops[5];
+		length -= 5;
 		break;
 
-	case OpGroupNonUniformBroadcast:
-		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
+	case OpImageSampleProjDrefImplicitLod:
+	case OpImageSampleProjDrefExplicitLod:
+	case OpImageSparseSampleProjDrefImplicitLod:
+	case OpImageSparseSampleProjDrefExplicitLod:
+		dref = ops[4];
+		opt = &ops[5];
+		length -= 5;
+		proj = true;
 		break;
 
-	case OpGroupNonUniformBroadcastFirst:
-		emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
+	case OpImageDrefGather:
+	case OpImageSparseDrefGather:
+		dref = ops[4];
+		opt = &ops[5];
+		length -= 5;
+		gather = true;
+		if (options.es && options.version < 310)
+			SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
+		else if (!options.es && options.version < 400)
+			SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
 		break;
 
-	case OpGroupNonUniformBallot:
-		emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
+	case OpImageGather:
+	case OpImageSparseGather:
+		comp = ops[4];
+		opt = &ops[5];
+		length -= 5;
+		gather = true;
+		if (options.es && options.version < 310)
+			SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
+		else if (!options.es && options.version < 400)
+		{
+			if (!expression_is_constant_null(comp))
+				SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
+			require_extension_internal("GL_ARB_texture_gather");
+		}
 		break;
 
-	case OpGroupNonUniformInverseBallot:
-		emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
+	case OpImageFetch:
+	case OpImageSparseFetch:
+	case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
+		opt = &ops[4];
+		length -= 4;
+		fetch = true;
 		break;
 
-	case OpGroupNonUniformBallotBitExtract:
-		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
+	case OpImageSampleProjImplicitLod:
+	case OpImageSampleProjExplicitLod:
+	case OpImageSparseSampleProjImplicitLod:
+	case OpImageSparseSampleProjExplicitLod:
+		opt = &ops[4];
+		length -= 4;
+		proj = true;
 		break;
 
-	case OpGroupNonUniformBallotFindLSB:
-		emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
+	default:
+		opt = &ops[4];
+		length -= 4;
 		break;
+	}
 
-	case OpGroupNonUniformBallotFindMSB:
-		emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
-		break;
+	// Bypass pointers because we need the real image struct
+	auto &type = expression_type(img);
+	auto &imgtype = get<SPIRType>(type.self);
 
-	case OpGroupNonUniformBallotBitCount:
+	uint32_t coord_components = 0;
+	switch (imgtype.image.dim)
 	{
-		auto operation = static_cast<GroupOperation>(ops[3]);
-		if (operation == GroupOperationReduce)
-			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
-		else if (operation == GroupOperationInclusiveScan)
-			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
-		else if (operation == GroupOperationExclusiveScan)
-			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
-		else
-			SPIRV_CROSS_THROW("Invalid BitCount operation.");
+	case spv::Dim1D:
+		coord_components = 1;
 		break;
-	}
-
-	case OpGroupNonUniformShuffle:
-		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
+	case spv::Dim2D:
+		coord_components = 2;
 		break;
-
-	case OpGroupNonUniformShuffleXor:
-		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
+	case spv::Dim3D:
+		coord_components = 3;
 		break;
-
-	case OpGroupNonUniformShuffleUp:
-		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
+	case spv::DimCube:
+		coord_components = 3;
 		break;
-
-	case OpGroupNonUniformShuffleDown:
-		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
+	case spv::DimBuffer:
+		coord_components = 1;
 		break;
-
-	case OpGroupNonUniformAll:
-		emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
+	default:
+		coord_components = 2;
 		break;
+	}
 
-	case OpGroupNonUniformAny:
-		emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
-		break;
+	if (dref)
+		inherited_expressions.push_back(dref);
 
-	case OpGroupNonUniformAllEqual:
-		emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
-		break;
+	if (proj)
+		coord_components++;
+	if (imgtype.image.arrayed)
+		coord_components++;
 
-		// clang-format off
-#define GLSL_GROUP_OP(op, glsl_op) \
-case OpGroupNonUniform##op: \
-	{ \
-		auto operation = static_cast<GroupOperation>(ops[3]); \
-		if (operation == GroupOperationReduce) \
-			emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
-		else if (operation == GroupOperationInclusiveScan) \
-			emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
-		else if (operation == GroupOperationExclusiveScan) \
-			emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
-		else if (operation == GroupOperationClusteredReduce) \
-			emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
-		else \
-			SPIRV_CROSS_THROW("Invalid group operation."); \
-		break; \
-	}
-	GLSL_GROUP_OP(FAdd, Add)
-	GLSL_GROUP_OP(FMul, Mul)
-	GLSL_GROUP_OP(FMin, Min)
-	GLSL_GROUP_OP(FMax, Max)
-	GLSL_GROUP_OP(IAdd, Add)
-	GLSL_GROUP_OP(IMul, Mul)
-	GLSL_GROUP_OP(SMin, Min)
-	GLSL_GROUP_OP(SMax, Max)
-	GLSL_GROUP_OP(UMin, Min)
-	GLSL_GROUP_OP(UMax, Max)
-	GLSL_GROUP_OP(BitwiseAnd, And)
-	GLSL_GROUP_OP(BitwiseOr, Or)
-	GLSL_GROUP_OP(BitwiseXor, Xor)
-#undef GLSL_GROUP_OP
-		// clang-format on
+	uint32_t bias = 0;
+	uint32_t lod = 0;
+	uint32_t grad_x = 0;
+	uint32_t grad_y = 0;
+	uint32_t coffset = 0;
+	uint32_t offset = 0;
+	uint32_t coffsets = 0;
+	uint32_t sample = 0;
+	uint32_t minlod = 0;
+	uint32_t flags = 0;
 
-	case OpGroupNonUniformQuadSwap:
+	if (length)
 	{
-		uint32_t direction = get<SPIRConstant>(ops[4]).scalar();
-		if (direction == 0)
-			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
-		else if (direction == 1)
-			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
-		else if (direction == 2)
-			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
-		else
-			SPIRV_CROSS_THROW("Invalid quad swap direction.");
-		break;
+		flags = *opt++;
+		length--;
 	}
 
-	case OpGroupNonUniformQuadBroadcast:
-	{
-		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
-		break;
-	}
+	auto test = [&](uint32_t &v, uint32_t flag) {
+		if (length && (flags & flag))
+		{
+			v = *opt++;
+			inherited_expressions.push_back(v);
+			length--;
+		}
+	};
 
-	default:
-		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
-	}
+	test(bias, ImageOperandsBiasMask);
+	test(lod, ImageOperandsLodMask);
+	test(grad_x, ImageOperandsGradMask);
+	test(grad_y, ImageOperandsGradMask);
+	test(coffset, ImageOperandsConstOffsetMask);
+	test(offset, ImageOperandsOffsetMask);
+	test(coffsets, ImageOperandsConstOffsetsMask);
+	test(sample, ImageOperandsSampleMask);
+	test(minlod, ImageOperandsMinLodMask);
 
-	register_control_dependent_expression(id);
-}
+	TextureFunctionBaseArguments base_args = {};
+	base_args.img = img;
+	base_args.imgtype = &imgtype;
+	base_args.is_fetch = fetch != 0;
+	base_args.is_gather = gather != 0;
+	base_args.is_proj = proj != 0;
 
-string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
-{
-	// OpBitcast can deal with pointers.
-	if (out_type.pointer || in_type.pointer)
-		return type_to_glsl(out_type);
+	string expr;
+	TextureFunctionNameArguments name_args = {};
+
+	name_args.base = base_args;
+	name_args.has_array_offsets = coffsets != 0;
+	name_args.has_offset = coffset != 0 || offset != 0;
+	name_args.has_grad = grad_x != 0 || grad_y != 0;
+	name_args.has_dref = dref != 0;
+	name_args.is_sparse_feedback = sparse;
+	name_args.has_min_lod = minlod != 0;
+	name_args.lod = lod;
+	expr += to_function_name(name_args);
+	expr += "(";
 
-	if (out_type.basetype == in_type.basetype)
-		return "";
+	uint32_t sparse_texel_id = 0;
+	if (sparse)
+		sparse_texel_id = get_sparse_feedback_texel_id(ops[1]);
+
+	TextureFunctionArguments args = {};
+	args.base = base_args;
+	args.coord = coord;
+	args.coord_components = coord_components;
+	args.dref = dref;
+	args.grad_x = grad_x;
+	args.grad_y = grad_y;
+	args.lod = lod;
+
+	if (coffsets)
+		args.offset = coffsets;
+	else if (coffset)
+		args.offset = coffset;
+	else
+		args.offset = offset;
+
+	args.bias = bias;
+	args.component = comp;
+	args.sample = sample;
+	args.sparse_texel = sparse_texel_id;
+	args.min_lod = minlod;
+	args.nonuniform_expression = nonuniform_expression;
+	expr += to_function_args(args, forward);
+	expr += ")";
 
-	assert(out_type.basetype != SPIRType::Boolean);
-	assert(in_type.basetype != SPIRType::Boolean);
+	// texture(samplerXShadow) returns float. shadowX() returns vec4, but only in desktop GLSL. Swizzle here.
+	if (is_legacy() && !options.es && is_depth_image(imgtype, img))
+		expr += ".r";
 
-	bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
-	bool same_size_cast = out_type.width == in_type.width;
+	// Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
+	// Remap back to 4 components as sampling opcodes expect.
+	if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
+	{
+		bool image_is_depth = false;
+		const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
+		VariableID image_id = combined ? combined->image : img;
 
-	// Trivial bitcast case, casts between integers.
-	if (integral_cast && same_size_cast)
-		return type_to_glsl(out_type);
+		if (combined && is_depth_image(imgtype, combined->image))
+			image_is_depth = true;
+		else if (is_depth_image(imgtype, img))
+			image_is_depth = true;
 
-	// Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
-	if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
-		return "unpack8";
-	else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
-		return "pack16";
-	else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
-		return "pack32";
+		// We must also check the backing variable for the image.
+		// We might have loaded an OpImage, and used that handle for two different purposes.
+		// Once with comparison, once without.
+		auto *image_variable = maybe_get_backing_variable(image_id);
+		if (image_variable && is_depth_image(get<SPIRType>(image_variable->basetype), image_variable->self))
+			image_is_depth = true;
 
-	// Floating <-> Integer special casts. Just have to enumerate all cases. :(
-	// 16-bit, 32-bit and 64-bit floats.
-	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
-	{
-		if (is_legacy_es())
-			SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
-		else if (!options.es && options.version < 330)
-			require_extension_internal("GL_ARB_shader_bit_encoding");
-		return "floatBitsToUint";
-	}
-	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
-	{
-		if (is_legacy_es())
-			SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
-		else if (!options.es && options.version < 330)
-			require_extension_internal("GL_ARB_shader_bit_encoding");
-		return "floatBitsToInt";
-	}
-	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
-	{
-		if (is_legacy_es())
-			SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
-		else if (!options.es && options.version < 330)
-			require_extension_internal("GL_ARB_shader_bit_encoding");
-		return "uintBitsToFloat";
+		if (image_is_depth)
+			expr = remap_swizzle(result_type, 1, expr);
 	}
-	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
+
+	if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
 	{
-		if (is_legacy_es())
-			SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
-		else if (!options.es && options.version < 330)
-			require_extension_internal("GL_ARB_shader_bit_encoding");
-		return "intBitsToFloat";
+		// Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
+		// Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
+		expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
 	}
 
-	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
-		return "doubleBitsToInt64";
-	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
-		return "doubleBitsToUint64";
-	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
-		return "int64BitsToDouble";
-	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
-		return "uint64BitsToDouble";
-	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
-		return "float16BitsToInt16";
-	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
-		return "float16BitsToUint16";
-	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
-		return "int16BitsToFloat16";
-	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
-		return "uint16BitsToFloat16";
+	// Deals with reads from MSL. We might need to downconvert to fewer components.
+	if (op == OpImageRead)
+		expr = remap_swizzle(result_type, 4, expr);
 
-	// And finally, some even more special purpose casts.
-	if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
-		return "packUint2x32";
-	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
-		return "unpackFloat2x16";
-	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
-		return "packFloat2x16";
-	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
-		return "packInt2x16";
-	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
-		return "unpackInt2x16";
-	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
-		return "packUint2x16";
-	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
-		return "unpackUint2x16";
-	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
-		return "packInt4x16";
-	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
-		return "unpackInt4x16";
-	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
-		return "packUint4x16";
-	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
-		return "unpackUint4x16";
+	return expr;
+}
 
-	return "";
+bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
+{
+	auto *c = maybe_get<SPIRConstant>(id);
+	if (!c)
+		return false;
+	return c->constant_is_null();
 }
 
-string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
+bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
 {
-	auto op = bitcast_glsl_op(result_type, expression_type(argument));
-	if (op.empty())
-		return to_enclosed_unpacked_expression(argument);
-	else
-		return join(op, "(", to_unpacked_expression(argument), ")");
+	auto &type = expression_type(ptr);
+	if (type.array.empty())
+		return false;
+
+	if (!backend.array_is_value_type)
+		return true;
+
+	auto *var = maybe_get_backing_variable(ptr);
+	if (!var)
+		return false;
+
+	auto &backed_type = get<SPIRType>(var->basetype);
+	return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct &&
+	       has_member_decoration(backed_type.self, 0, DecorationOffset);
 }
 
-std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
+// Returns the function name for a texture sampling function for the specified image and sampling characteristics.
+// For some subclasses, the function is a method on the specified image.
+string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
 {
-	auto expr = to_expression(arg);
-	auto &src_type = expression_type(arg);
-	if (src_type.basetype != target_type)
+	if (args.has_min_lod)
 	{
-		auto target = src_type;
-		target.basetype = target_type;
-		expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
+		if (options.es)
+			SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
+		require_extension_internal("GL_ARB_sparse_texture_clamp");
 	}
 
-	return expr;
-}
+	string fname;
+	auto &imgtype = *args.base.imgtype;
+	VariableID tex = args.base.img;
 
-std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
-                                             const std::string &expr)
-{
-	if (target_type.basetype == expr_type)
-		return expr;
+	// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
+	// To emulate this, we will have to use textureGrad with a constant gradient of 0.
+	// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
+	// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
+	bool workaround_lod_array_shadow_as_grad = false;
+	if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
+	    is_depth_image(imgtype, tex) && args.lod && !args.base.is_fetch)
+	{
+		if (!expression_is_constant_null(args.lod))
+		{
+			SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
+			                  "expressed in GLSL.");
+		}
+		workaround_lod_array_shadow_as_grad = true;
+	}
 
-	auto src_type = target_type;
-	src_type.basetype = expr_type;
-	return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
-}
+	if (args.is_sparse_feedback)
+		fname += "sparse";
 
-string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
-{
-	switch (builtin)
+	if (args.base.is_fetch)
+		fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
+	else
 	{
-	case BuiltInPosition:
-		return "gl_Position";
-	case BuiltInPointSize:
-		return "gl_PointSize";
-	case BuiltInClipDistance:
-		return "gl_ClipDistance";
-	case BuiltInCullDistance:
-		return "gl_CullDistance";
-	case BuiltInVertexId:
-		if (options.vulkan_semantics)
-			SPIRV_CROSS_THROW(
-			    "Cannot implement gl_VertexID in Vulkan GLSL. This shader was created with GL semantics.");
-		return "gl_VertexID";
-	case BuiltInInstanceId:
-		if (options.vulkan_semantics)
-			SPIRV_CROSS_THROW(
-			    "Cannot implement gl_InstanceID in Vulkan GLSL. This shader was created with GL semantics.");
-		return "gl_InstanceID";
-	case BuiltInVertexIndex:
-		if (options.vulkan_semantics)
-			return "gl_VertexIndex";
-		else
-			return "gl_VertexID"; // gl_VertexID already has the base offset applied.
-	case BuiltInInstanceIndex:
-		if (options.vulkan_semantics)
-			return "gl_InstanceIndex";
-		else if (options.vertex.support_nonzero_base_instance)
-			return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
-		else
-			return "gl_InstanceID";
-	case BuiltInPrimitiveId:
-		if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
-			return "gl_PrimitiveIDIn";
-		else
-			return "gl_PrimitiveID";
-	case BuiltInInvocationId:
-		return "gl_InvocationID";
-	case BuiltInLayer:
-		return "gl_Layer";
-	case BuiltInViewportIndex:
-		return "gl_ViewportIndex";
-	case BuiltInTessLevelOuter:
-		return "gl_TessLevelOuter";
-	case BuiltInTessLevelInner:
-		return "gl_TessLevelInner";
-	case BuiltInTessCoord:
-		return "gl_TessCoord";
-	case BuiltInFragCoord:
-		return "gl_FragCoord";
-	case BuiltInPointCoord:
-		return "gl_PointCoord";
-	case BuiltInFrontFacing:
-		return "gl_FrontFacing";
-	case BuiltInFragDepth:
-		return "gl_FragDepth";
-	case BuiltInNumWorkgroups:
-		return "gl_NumWorkGroups";
-	case BuiltInWorkgroupSize:
-		return "gl_WorkGroupSize";
-	case BuiltInWorkgroupId:
-		return "gl_WorkGroupID";
-	case BuiltInLocalInvocationId:
-		return "gl_LocalInvocationID";
-	case BuiltInGlobalInvocationId:
-		return "gl_GlobalInvocationID";
-	case BuiltInLocalInvocationIndex:
-		return "gl_LocalInvocationIndex";
-	case BuiltInHelperInvocation:
-		return "gl_HelperInvocation";
-	case BuiltInBaseVertex:
-		if (options.es)
-			SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
-		if (options.version < 460)
-		{
-			require_extension_internal("GL_ARB_shader_draw_parameters");
-			return "gl_BaseVertexARB";
-		}
-		return "gl_BaseVertex";
-	case BuiltInBaseInstance:
-		if (options.es)
-			SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
-		if (options.version < 460)
-		{
-			require_extension_internal("GL_ARB_shader_draw_parameters");
-			return "gl_BaseInstanceARB";
-		}
-		return "gl_BaseInstance";
-	case BuiltInDrawIndex:
-		if (options.es)
-			SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
-		if (options.version < 460)
-		{
-			require_extension_internal("GL_ARB_shader_draw_parameters");
-			return "gl_DrawIDARB";
-		}
-		return "gl_DrawID";
-
-	case BuiltInSampleId:
-		if (options.es && options.version < 320)
-			require_extension_internal("GL_OES_sample_variables");
-		if (!options.es && options.version < 400)
-			SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
-		return "gl_SampleID";
-
-	case BuiltInSampleMask:
-		if (options.es && options.version < 320)
-			require_extension_internal("GL_OES_sample_variables");
-		if (!options.es && options.version < 400)
-			SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
-
-		if (storage == StorageClassInput)
-			return "gl_SampleMaskIn";
-		else
-			return "gl_SampleMask";
-
-	case BuiltInSamplePosition:
-		if (options.es && options.version < 320)
-			require_extension_internal("GL_OES_sample_variables");
-		if (!options.es && options.version < 400)
-			SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
-		return "gl_SamplePosition";
+		fname += args.is_sparse_feedback ? "Texture" : "texture";
 
-	case BuiltInViewIndex:
-		if (options.vulkan_semantics)
-		{
-			require_extension_internal("GL_EXT_multiview");
-			return "gl_ViewIndex";
-		}
-		else
-		{
-			require_extension_internal("GL_OVR_multiview2");
-			return "gl_ViewID_OVR";
-		}
+		if (args.base.is_gather)
+			fname += "Gather";
+		if (args.has_array_offsets)
+			fname += "Offsets";
+		if (args.base.is_proj)
+			fname += "Proj";
+		if (args.has_grad || workaround_lod_array_shadow_as_grad)
+			fname += "Grad";
+		if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
+			fname += "Lod";
+	}
 
-	case BuiltInNumSubgroups:
-		if (!options.vulkan_semantics)
-			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
-		require_extension_internal("GL_KHR_shader_subgroup_basic");
-		return "gl_NumSubgroups";
+	if (args.has_offset)
+		fname += "Offset";
 
-	case BuiltInSubgroupId:
-		if (!options.vulkan_semantics)
-			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
-		require_extension_internal("GL_KHR_shader_subgroup_basic");
-		return "gl_SubgroupID";
+	if (args.has_min_lod)
+		fname += "Clamp";
 
-	case BuiltInSubgroupSize:
-		if (!options.vulkan_semantics)
-			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
-		require_extension_internal("GL_KHR_shader_subgroup_basic");
-		return "gl_SubgroupSize";
+	if (args.is_sparse_feedback || args.has_min_lod)
+		fname += "ARB";
 
-	case BuiltInSubgroupLocalInvocationId:
-		if (!options.vulkan_semantics)
-			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
-		require_extension_internal("GL_KHR_shader_subgroup_basic");
-		return "gl_SubgroupInvocationID";
+	return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname;
+}
 
-	case BuiltInSubgroupEqMask:
-		if (!options.vulkan_semantics)
-			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
-		require_extension_internal("GL_KHR_shader_subgroup_ballot");
-		return "gl_SubgroupEqMask";
+std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
+{
+	auto *var = maybe_get_backing_variable(id);
 
-	case BuiltInSubgroupGeMask:
-		if (!options.vulkan_semantics)
-			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
-		require_extension_internal("GL_KHR_shader_subgroup_ballot");
-		return "gl_SubgroupGeMask";
+	// If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
+	// In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
+	if (var)
+	{
+		auto &type = get<SPIRType>(var->basetype);
+		if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
+		{
+			if (options.vulkan_semantics)
+			{
+				if (dummy_sampler_id)
+				{
+					// Don't need to consider Shadow state since the dummy sampler is always non-shadow.
+					auto sampled_type = type;
+					sampled_type.basetype = SPIRType::SampledImage;
+					return join(type_to_glsl(sampled_type), "(", to_non_uniform_aware_expression(id), ", ",
+					            to_expression(dummy_sampler_id), ")");
+				}
+				else
+				{
+					// Newer glslang supports this extension to deal with texture2D as argument to texture functions.
+					require_extension_internal("GL_EXT_samplerless_texture_functions");
+				}
+			}
+			else
+			{
+				if (!dummy_sampler_id)
+					SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
+					                  "build_dummy_sampler_for_combined_images() called?");
 
-	case BuiltInSubgroupGtMask:
-		if (!options.vulkan_semantics)
-			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
-		require_extension_internal("GL_KHR_shader_subgroup_ballot");
-		return "gl_SubgroupGtMask";
+				return to_combined_image_sampler(id, dummy_sampler_id);
+			}
+		}
+	}
 
-	case BuiltInSubgroupLeMask:
-		if (!options.vulkan_semantics)
-			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
-		require_extension_internal("GL_KHR_shader_subgroup_ballot");
-		return "gl_SubgroupLeMask";
+	return to_non_uniform_aware_expression(id);
+}
 
-	case BuiltInSubgroupLtMask:
-		if (!options.vulkan_semantics)
-			SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
-		require_extension_internal("GL_KHR_shader_subgroup_ballot");
-		return "gl_SubgroupLtMask";
+// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
+string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
+{
+	VariableID img = args.base.img;
+	auto &imgtype = *args.base.imgtype;
 
-	case BuiltInLaunchIdNV:
-		return "gl_LaunchIDNV";
-	case BuiltInLaunchSizeNV:
-		return "gl_LaunchSizeNV";
-	case BuiltInWorldRayOriginNV:
-		return "gl_WorldRayOriginNV";
-	case BuiltInWorldRayDirectionNV:
-		return "gl_WorldRayDirectionNV";
-	case BuiltInObjectRayOriginNV:
-		return "gl_ObjectRayOriginNV";
-	case BuiltInObjectRayDirectionNV:
-		return "gl_ObjectRayDirectionNV";
-	case BuiltInRayTminNV:
-		return "gl_RayTminNV";
-	case BuiltInRayTmaxNV:
-		return "gl_RayTmaxNV";
-	case BuiltInInstanceCustomIndexNV:
-		return "gl_InstanceCustomIndexNV";
-	case BuiltInObjectToWorldNV:
-		return "gl_ObjectToWorldNV";
-	case BuiltInWorldToObjectNV:
-		return "gl_WorldToObjectNV";
-	case BuiltInHitTNV:
-		return "gl_HitTNV";
-	case BuiltInHitKindNV:
-		return "gl_HitKindNV";
-	case BuiltInIncomingRayFlagsNV:
-		return "gl_IncomingRayFlagsNV";
+	string farg_str;
+	if (args.base.is_fetch)
+		farg_str = convert_separate_image_to_expression(img);
+	else
+		farg_str = to_non_uniform_aware_expression(img);
 
-	case BuiltInBaryCoordNV:
+	if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos)
 	{
-		if (options.es && options.version < 320)
-			SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320.");
-		else if (!options.es && options.version < 450)
-			SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450.");
-		require_extension_internal("GL_NV_fragment_shader_barycentric");
-		return "gl_BaryCoordNV";
+		// Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
+		farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")");
 	}
 
-	case BuiltInBaryCoordNoPerspNV:
-	{
-		if (options.es && options.version < 320)
-			SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320.");
-		else if (!options.es && options.version < 450)
-			SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450.");
-		require_extension_internal("GL_NV_fragment_shader_barycentric");
-		return "gl_BaryCoordNoPerspNV";
-	}
+	bool swizz_func = backend.swizzle_is_function;
+	auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
+		if (comps == in_comps)
+			return "";
 
-	case BuiltInFragStencilRefEXT:
-	{
-		if (!options.es)
+		switch (comps)
 		{
-			require_extension_internal("GL_ARB_shader_stencil_export");
-			return "gl_FragStencilRefARB";
+		case 1:
+			return ".x";
+		case 2:
+			return swizz_func ? ".xy()" : ".xy";
+		case 3:
+			return swizz_func ? ".xyz()" : ".xyz";
+		default:
+			return "";
 		}
-		else
-			SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
-	}
+	};
 
-	default:
-		return join("gl_BuiltIn_", convert_to_string(builtin));
-	}
-}
+	bool forward = should_forward(args.coord);
 
-const char *CompilerGLSL::index_to_swizzle(uint32_t index)
-{
-	switch (index)
+	// The IR can give us more components than we need, so chop them off as needed.
+	auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize);
+	// Only enclose the UV expression if needed.
+	auto coord_expr =
+	    (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr);
+
+	// texelFetch only takes int, not uint.
+	auto &coord_type = expression_type(args.coord);
+	if (coord_type.basetype == SPIRType::UInt)
 	{
-	case 0:
-		return "x";
-	case 1:
-		return "y";
-	case 2:
-		return "z";
-	case 3:
-		return "w";
-	default:
-		SPIRV_CROSS_THROW("Swizzle index out of range");
+		auto expected_type = coord_type;
+		expected_type.vecsize = args.coord_components;
+		expected_type.basetype = SPIRType::Int;
+		coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
 	}
-}
-
-string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
-                                           AccessChainFlags flags, AccessChainMeta *meta)
-{
-	string expr;
 
-	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
-	bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
-	bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
-	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
+	// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
+	// To emulate this, we will have to use textureGrad with a constant gradient of 0.
+	// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
+	// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
+	bool workaround_lod_array_shadow_as_grad =
+	    ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
+	    is_depth_image(imgtype, img) && args.lod != 0 && !args.base.is_fetch;
 
-	if (!chain_only)
-		expr = to_enclosed_expression(base, register_expression_read);
+	if (args.dref)
+	{
+		forward = forward && should_forward(args.dref);
 
-	// Start traversing type hierarchy at the proper non-pointer types,
-	// but keep type_id referencing the original pointer for use below.
-	uint32_t type_id = expression_type_id(base);
-
-	if (!backend.native_pointers)
-	{
-		if (ptr_chain)
-			SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
-
-		// Wrapped buffer reference pointer types will need to poke into the internal "value" member before
-		// continuing the access chain.
-		if (should_dereference(base))
+		// SPIR-V splits dref and coordinate.
+		if (args.base.is_gather ||
+		    args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
 		{
-			auto &type = get<SPIRType>(type_id);
-			expr = dereference_expression(type, expr);
+			farg_str += ", ";
+			farg_str += to_expression(args.coord);
+			farg_str += ", ";
+			farg_str += to_expression(args.dref);
 		}
-	}
-
-	const auto *type = &get_pointee_type(type_id);
-
-	bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
-	bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
-	bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPacked);
-	uint32_t packed_type = get_extended_decoration(base, SPIRVCrossDecorationPackedType);
-	bool is_invariant = has_decoration(base, DecorationInvariant);
-	bool pending_array_enclose = false;
-	bool dimension_flatten = false;
-
-	const auto append_index = [&](uint32_t index) {
-		expr += "[";
-
-		// If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier.
-		bool nonuniform_index =
-		    has_decoration(index, DecorationNonUniformEXT) &&
-		    (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock));
-		if (nonuniform_index)
+		else if (args.base.is_proj)
 		{
-			expr += backend.nonuniform_qualifier;
-			expr += "(";
-		}
-
-		if (index_is_literal)
-			expr += convert_to_string(index);
-		else
-			expr += to_expression(index, register_expression_read);
-
-		if (nonuniform_index)
-			expr += ")";
-
-		expr += "]";
-	};
-
-	for (uint32_t i = 0; i < count; i++)
-	{
-		uint32_t index = indices[i];
+			// Have to reshuffle so we get vec4(coord, dref, proj), special case.
+			// Other shading languages splits up the arguments for coord and compare value like SPIR-V.
+			// The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
+			farg_str += ", vec4(";
 
-		// Pointer chains
-		if (ptr_chain && i == 0)
-		{
-			// If we are flattening multidimensional arrays, only create opening bracket on first
-			// array index.
-			if (options.flatten_multidimensional_arrays)
+			if (imgtype.image.dim == Dim1D)
 			{
-				dimension_flatten = type->array.size() >= 1;
-				pending_array_enclose = dimension_flatten;
-				if (pending_array_enclose)
-					expr += "[";
+				// Could reuse coord_expr, but we will mess up the temporary usage checking.
+				farg_str += to_enclosed_expression(args.coord) + ".x";
+				farg_str += ", ";
+				farg_str += "0.0, ";
+				farg_str += to_expression(args.dref);
+				farg_str += ", ";
+				farg_str += to_enclosed_expression(args.coord) + ".y)";
 			}
-
-			if (options.flatten_multidimensional_arrays && dimension_flatten)
+			else if (imgtype.image.dim == Dim2D)
 			{
-				// If we are flattening multidimensional arrays, do manual stride computation.
-				if (index_is_literal)
-					expr += convert_to_string(index);
-				else
-					expr += to_enclosed_expression(index, register_expression_read);
-
-				for (auto j = uint32_t(type->array.size()); j; j--)
-				{
-					expr += " * ";
-					expr += enclose_expression(to_array_size(*type, j - 1));
-				}
-
-				if (type->array.empty())
-					pending_array_enclose = false;
-				else
-					expr += " + ";
-
-				if (!pending_array_enclose)
-					expr += "]";
+				// Could reuse coord_expr, but we will mess up the temporary usage checking.
+				farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy");
+				farg_str += ", ";
+				farg_str += to_expression(args.dref);
+				farg_str += ", ";
+				farg_str += to_enclosed_expression(args.coord) + ".z)";
 			}
 			else
-			{
-				append_index(index);
-			}
-
-			if (type->basetype == SPIRType::ControlPointArray)
-			{
-				type_id = type->parent_type;
-				type = &get<SPIRType>(type_id);
-			}
-
-			access_chain_is_arrayed = true;
+				SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
 		}
-		// Arrays
-		else if (!type->array.empty())
+		else
 		{
-			// If we are flattening multidimensional arrays, only create opening bracket on first
-			// array index.
-			if (options.flatten_multidimensional_arrays && !pending_array_enclose)
-			{
-				dimension_flatten = type->array.size() > 1;
-				pending_array_enclose = dimension_flatten;
-				if (pending_array_enclose)
-					expr += "[";
-			}
-
-			assert(type->parent_type);
+			// Create a composite which merges coord/dref into a single vector.
+			auto type = expression_type(args.coord);
+			type.vecsize = args.coord_components + 1;
+			if (imgtype.image.dim == Dim1D && options.es)
+				type.vecsize++;
+			farg_str += ", ";
+			farg_str += type_to_glsl_constructor(type);
+			farg_str += "(";
 
-			auto *var = maybe_get<SPIRVariable>(base);
-			if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
-			    !has_decoration(type->self, DecorationBlock))
+			if (imgtype.image.dim == Dim1D && options.es)
 			{
-				// This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
-				// Normally, these variables live in blocks when compiled from GLSL,
-				// but HLSL seems to just emit straight arrays here.
-				// We must pretend this access goes through gl_in/gl_out arrays
-				// to be able to access certain builtins as arrays.
-				auto builtin = ir.meta[base].decoration.builtin_type;
-				switch (builtin)
+				if (imgtype.image.arrayed)
 				{
-				// case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
-				// case BuiltInClipDistance:
-				case BuiltInPosition:
-				case BuiltInPointSize:
-					if (var->storage == StorageClassInput)
-						expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
-					else if (var->storage == StorageClassOutput)
-						expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
-					else
-						append_index(index);
-					break;
-
-				default:
-					append_index(index);
-					break;
+					farg_str += enclose_expression(coord_expr) + ".x";
+					farg_str += ", 0.0, ";
+					farg_str += enclose_expression(coord_expr) + ".y";
 				}
-			}
-			else if (options.flatten_multidimensional_arrays && dimension_flatten)
-			{
-				// If we are flattening multidimensional arrays, do manual stride computation.
-				auto &parent_type = get<SPIRType>(type->parent_type);
-
-				if (index_is_literal)
-					expr += convert_to_string(index);
 				else
-					expr += to_enclosed_expression(index, register_expression_read);
-
-				for (auto j = uint32_t(parent_type.array.size()); j; j--)
 				{
-					expr += " * ";
-					expr += enclose_expression(to_array_size(parent_type, j - 1));
+					farg_str += coord_expr;
+					farg_str += ", 0.0";
 				}
-
-				if (parent_type.array.empty())
-					pending_array_enclose = false;
-				else
-					expr += " + ";
-
-				if (!pending_array_enclose)
-					expr += "]";
 			}
 			else
-			{
-				append_index(index);
-			}
-
-			type_id = type->parent_type;
-			type = &get<SPIRType>(type_id);
+				farg_str += coord_expr;
 
-			access_chain_is_arrayed = true;
+			farg_str += ", ";
+			farg_str += to_expression(args.dref);
+			farg_str += ")";
 		}
-		// For structs, the index refers to a constant, which indexes into the members.
-		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
-		else if (type->basetype == SPIRType::Struct)
+	}
+	else
+	{
+		if (imgtype.image.dim == Dim1D && options.es)
 		{
-			if (!index_is_literal)
-				index = get<SPIRConstant>(index).scalar();
-
-			if (index >= type->member_types.size())
-				SPIRV_CROSS_THROW("Member index is out of bounds!");
-
-			BuiltIn builtin;
-			if (is_member_builtin(*type, index, &builtin))
+			// Have to fake a second coordinate.
+			if (type_is_floating_point(coord_type))
 			{
-				// FIXME: We rely here on OpName on gl_in/gl_out to make this work properly.
-				// To make this properly work by omitting all OpName opcodes,
-				// we need to infer gl_in or gl_out based on the builtin, and stage.
-				if (access_chain_is_arrayed)
+				// Cannot mix proj and array.
+				if (imgtype.image.arrayed || args.base.is_proj)
 				{
-					expr += ".";
-					expr += builtin_to_glsl(builtin, type->storage);
+					coord_expr = join("vec3(", enclose_expression(coord_expr), ".x, 0.0, ",
+					                  enclose_expression(coord_expr), ".y)");
 				}
 				else
-					expr = builtin_to_glsl(builtin, type->storage);
+					coord_expr = join("vec2(", coord_expr, ", 0.0)");
 			}
 			else
 			{
-				// If the member has a qualified name, use it as the entire chain
-				string qual_mbr_name = get_member_qualified_name(type_id, index);
-				if (!qual_mbr_name.empty())
-					expr = qual_mbr_name;
+				if (imgtype.image.arrayed)
+				{
+					coord_expr = join("ivec3(", enclose_expression(coord_expr),
+									  ".x, 0, ",
+									  enclose_expression(coord_expr), ".y)");
+				}
 				else
-					expr += to_member_reference(base, *type, index, ptr_chain);
+					coord_expr = join("ivec2(", coord_expr, ", 0)");
 			}
+		}
 
-			if (has_member_decoration(type->self, index, DecorationInvariant))
-				is_invariant = true;
+		farg_str += ", ";
+		farg_str += coord_expr;
+	}
 
-			is_packed = member_is_packed_type(*type, index);
-			if (is_packed)
-				packed_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPackedType);
-			else
-				packed_type = 0;
+	if (args.grad_x || args.grad_y)
+	{
+		forward = forward && should_forward(args.grad_x);
+		forward = forward && should_forward(args.grad_y);
+		farg_str += ", ";
+		farg_str += to_expression(args.grad_x);
+		farg_str += ", ";
+		farg_str += to_expression(args.grad_y);
+	}
 
-			row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
-			type = &get<SPIRType>(type->member_types[index]);
+	if (args.lod)
+	{
+		if (workaround_lod_array_shadow_as_grad)
+		{
+			// Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
+			// Implementing this as plain texture() is not safe on some implementations.
+			if (imgtype.image.dim == Dim2D)
+				farg_str += ", vec2(0.0), vec2(0.0)";
+			else if (imgtype.image.dim == DimCube)
+				farg_str += ", vec3(0.0), vec3(0.0)";
 		}
-		// Matrix -> Vector
-		else if (type->columns > 1)
+		else
 		{
-			if (row_major_matrix_needs_conversion)
-			{
-				expr = convert_row_major_matrix(expr, *type, is_packed);
-				row_major_matrix_needs_conversion = false;
-				is_packed = false;
-				packed_type = 0;
-			}
+			forward = forward && should_forward(args.lod);
+			farg_str += ", ";
 
-			expr += "[";
-			if (index_is_literal)
-				expr += convert_to_string(index);
+			// Lod expression for TexelFetch in GLSL must be int, and only int.
+			if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
+				farg_str += bitcast_expression(SPIRType::Int, args.lod);
 			else
-				expr += to_expression(index, register_expression_read);
-			expr += "]";
-
-			type_id = type->parent_type;
-			type = &get<SPIRType>(type_id);
+				farg_str += to_expression(args.lod);
 		}
-		// Vector -> Scalar
-		else if (type->vecsize > 1)
-		{
-			if (index_is_literal && !is_packed)
-			{
-				expr += ".";
-				expr += index_to_swizzle(index);
-			}
-			else if (ir.ids[index].get_type() == TypeConstant && !is_packed)
-			{
-				auto &c = get<SPIRConstant>(index);
-				expr += ".";
-				expr += index_to_swizzle(c.scalar());
-			}
-			else if (index_is_literal)
-			{
-				// For packed vectors, we can only access them as an array, not by swizzle.
-				expr += join("[", index, "]");
-			}
-			else
-			{
-				expr += "[";
-				expr += to_expression(index, register_expression_read);
-				expr += "]";
-			}
+	}
+	else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
+	{
+		// Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
+		farg_str += ", 0";
+	}
 
-			is_packed = false;
-			packed_type = 0;
-			type_id = type->parent_type;
-			type = &get<SPIRType>(type_id);
-		}
-		else if (!backend.allow_truncated_access_chain)
-			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
+	if (args.offset)
+	{
+		forward = forward && should_forward(args.offset);
+		farg_str += ", ";
+		farg_str += bitcast_expression(SPIRType::Int, args.offset);
 	}
 
-	if (pending_array_enclose)
+	if (args.sample)
 	{
-		SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
-		                  "but the access chain was terminated in the middle of a multidimensional array. "
-		                  "This is not supported.");
+		farg_str += ", ";
+		farg_str += bitcast_expression(SPIRType::Int, args.sample);
 	}
 
-	if (meta)
+	if (args.min_lod)
 	{
-		meta->need_transpose = row_major_matrix_needs_conversion;
-		meta->storage_is_packed = is_packed;
-		meta->storage_is_invariant = is_invariant;
-		meta->storage_packed_type = packed_type;
+		farg_str += ", ";
+		farg_str += to_expression(args.min_lod);
 	}
 
-	return expr;
-}
-
-string CompilerGLSL::to_flattened_struct_member(const SPIRVariable &var, uint32_t index)
-{
-	auto &type = get<SPIRType>(var.basetype);
-	return sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, index)));
-}
-
-string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
-                                  AccessChainMeta *meta, bool ptr_chain)
-{
-	if (flattened_buffer_blocks.count(base))
+	if (args.sparse_texel)
 	{
-		uint32_t matrix_stride = 0;
-		bool need_transpose = false;
-		flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
-		                              ptr_chain);
-
-		if (meta)
-		{
-			meta->need_transpose = target_type.columns > 1 && need_transpose;
-			meta->storage_is_packed = false;
-		}
-
-		return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose);
+		// Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
+		farg_str += ", ";
+		farg_str += to_expression(args.sparse_texel);
 	}
-	else if (flattened_structs.count(base) && count > 0)
-	{
-		AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
-		if (ptr_chain)
-			flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
 
-		auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
-		if (meta)
-		{
-			meta->need_transpose = false;
-			meta->storage_is_packed = false;
-		}
-		return sanitize_underscores(join(to_name(base), "_", chain));
-	}
-	else
+	if (args.bias)
 	{
-		AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
-		if (ptr_chain)
-			flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
-		return access_chain_internal(base, indices, count, flags, meta);
+		forward = forward && should_forward(args.bias);
+		farg_str += ", ";
+		farg_str += to_expression(args.bias);
 	}
-}
 
-string CompilerGLSL::load_flattened_struct(SPIRVariable &var)
-{
-	auto expr = type_to_glsl_constructor(get<SPIRType>(var.basetype));
-	expr += '(';
-
-	auto &type = get<SPIRType>(var.basetype);
-	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
+	if (args.component && !expression_is_constant_null(args.component))
 	{
-		if (i)
-			expr += ", ";
-
-		// Flatten the varyings.
-		// Apply name transformation for flattened I/O blocks.
-		expr += to_flattened_struct_member(var, i);
+		forward = forward && should_forward(args.component);
+		farg_str += ", ";
+		farg_str += bitcast_expression(SPIRType::Int, args.component);
 	}
-	expr += ')';
-	return expr;
-}
 
-void CompilerGLSL::store_flattened_struct(SPIRVariable &var, uint32_t value)
-{
-	// We're trying to store a structure which has been flattened.
-	// Need to copy members one by one.
-	auto rhs = to_expression(value);
+	*p_forward = forward;
 
-	// Store result locally.
-	// Since we're declaring a variable potentially multiple times here,
-	// store the variable in an isolated scope.
-	begin_scope();
-	statement(variable_decl_function_local(var), " = ", rhs, ";");
+	return farg_str;
+}
 
-	auto &type = get<SPIRType>(var.basetype);
-	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
+Op CompilerGLSL::get_remapped_spirv_op(Op op) const
+{
+	if (options.relax_nan_checks)
 	{
-		// Flatten the varyings.
-		// Apply name transformation for flattened I/O blocks.
+		switch (op)
+		{
+		case OpFUnordLessThan:
+			op = OpFOrdLessThan;
+			break;
+		case OpFUnordLessThanEqual:
+			op = OpFOrdLessThanEqual;
+			break;
+		case OpFUnordGreaterThan:
+			op = OpFOrdGreaterThan;
+			break;
+		case OpFUnordGreaterThanEqual:
+			op = OpFOrdGreaterThanEqual;
+			break;
+		case OpFUnordEqual:
+			op = OpFOrdEqual;
+			break;
+		case OpFOrdNotEqual:
+			op = OpFUnordNotEqual;
+			break;
 
-		auto lhs = sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, i)));
-		rhs = join(to_name(var.self), ".", to_member_name(type, i));
-		statement(lhs, " = ", rhs, ";");
+		default:
+			break;
+		}
 	}
-	end_scope();
-}
 
-std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
-                                                 const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
-                                                 bool need_transpose)
-{
-	if (!target_type.array.empty())
-		SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
-	else if (target_type.basetype == SPIRType::Struct)
-		return flattened_access_chain_struct(base, indices, count, target_type, offset);
-	else if (target_type.columns > 1)
-		return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
-	else
-		return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
+	return op;
 }
 
-std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
-                                                        const SPIRType &target_type, uint32_t offset)
+GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const
 {
-	std::string expr;
-
-	expr += type_to_glsl_constructor(target_type);
-	expr += "(";
-
-	for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
+	// Relax to non-NaN aware opcodes.
+	if (options.relax_nan_checks)
 	{
-		if (i != 0)
-			expr += ", ";
-
-		const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
-		uint32_t member_offset = type_struct_member_offset(target_type, i);
-
-		// The access chain terminates at the struct, so we need to find matrix strides and row-major information
-		// ahead of time.
-		bool need_transpose = false;
-		uint32_t matrix_stride = 0;
-		if (member_type.columns > 1)
+		switch (std450_op)
 		{
-			need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
-			matrix_stride = type_struct_member_matrix_stride(target_type, i);
+		case GLSLstd450NClamp:
+			std450_op = GLSLstd450FClamp;
+			break;
+		case GLSLstd450NMin:
+			std450_op = GLSLstd450FMin;
+			break;
+		case GLSLstd450NMax:
+			std450_op = GLSLstd450FMax;
+			break;
+		default:
+			break;
 		}
-
-		auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
-		                                  need_transpose);
-
-		// Cannot forward transpositions, so resolve them here.
-		if (need_transpose)
-			expr += convert_row_major_matrix(tmp, member_type, false);
-		else
-			expr += tmp;
 	}
 
-	expr += ")";
-
-	return expr;
+	return std450_op;
 }
 
-std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
-                                                        const SPIRType &target_type, uint32_t offset,
-                                                        uint32_t matrix_stride, bool need_transpose)
+void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
 {
-	assert(matrix_stride);
-	SPIRType tmp_type = target_type;
-	if (need_transpose)
-		swap(tmp_type.vecsize, tmp_type.columns);
-
-	std::string expr;
-
-	expr += type_to_glsl_constructor(tmp_type);
-	expr += "(";
-
-	for (uint32_t i = 0; i < tmp_type.columns; i++)
-	{
-		if (i != 0)
-			expr += ", ";
-
-		expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
-		                                      /* need_transpose= */ false);
-	}
-
-	expr += ")";
+	auto op = static_cast<GLSLstd450>(eop);
 
-	return expr;
-}
+	if (is_legacy() && is_unsigned_glsl_opcode(op))
+		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
 
-std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
-                                                        const SPIRType &target_type, uint32_t offset,
-                                                        uint32_t matrix_stride, bool need_transpose)
-{
-	auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
+	// If we need to do implicit bitcasts, make sure we do it with the correct type.
+	uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
+	auto int_type = to_signed_basetype(integer_width);
+	auto uint_type = to_unsigned_basetype(integer_width);
 
-	auto buffer_name = to_name(expression_type(base).self);
+	op = get_remapped_glsl_op(op);
 
-	if (need_transpose)
+	switch (op)
 	{
-		std::string expr;
-
-		if (target_type.vecsize > 1)
+	// FP fiddling
+	case GLSLstd450Round:
+		if (!is_legacy())
+			emit_unary_func_op(result_type, id, args[0], "round");
+		else
 		{
-			expr += type_to_glsl_constructor(target_type);
-			expr += "(";
+			auto op0 = to_enclosed_expression(args[0]);
+			auto &op0_type = expression_type(args[0]);
+			auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))");
+			bool forward = should_forward(args[0]);
+			emit_op(result_type, id, expr, forward);
+			inherit_expression_dependencies(id, args[0]);
 		}
+		break;
 
-		for (uint32_t i = 0; i < target_type.vecsize; ++i)
+	case GLSLstd450RoundEven:
+		if (!is_legacy())
+			emit_unary_func_op(result_type, id, args[0], "roundEven");
+		else if (!options.es)
 		{
-			if (i != 0)
-				expr += ", ";
-
-			uint32_t component_offset = result.second + i * matrix_stride;
-
-			assert(component_offset % (target_type.width / 8) == 0);
-			uint32_t index = component_offset / (target_type.width / 8);
-
-			expr += buffer_name;
-			expr += "[";
-			expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
-			expr += convert_to_string(index / 4);
-			expr += "]";
-
-			expr += vector_swizzle(1, index % 4);
+			// This extension provides round() with round-to-even semantics.
+			require_extension_internal("GL_EXT_gpu_shader4");
+			emit_unary_func_op(result_type, id, args[0], "round");
 		}
+		else
+			SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
+		break;
 
-		if (target_type.vecsize > 1)
+	case GLSLstd450Trunc:
+		emit_unary_func_op(result_type, id, args[0], "trunc");
+		break;
+	case GLSLstd450SAbs:
+		emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
+		break;
+	case GLSLstd450FAbs:
+		emit_unary_func_op(result_type, id, args[0], "abs");
+		break;
+	case GLSLstd450SSign:
+		emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
+		break;
+	case GLSLstd450FSign:
+		emit_unary_func_op(result_type, id, args[0], "sign");
+		break;
+	case GLSLstd450Floor:
+		emit_unary_func_op(result_type, id, args[0], "floor");
+		break;
+	case GLSLstd450Ceil:
+		emit_unary_func_op(result_type, id, args[0], "ceil");
+		break;
+	case GLSLstd450Fract:
+		emit_unary_func_op(result_type, id, args[0], "fract");
+		break;
+	case GLSLstd450Radians:
+		emit_unary_func_op(result_type, id, args[0], "radians");
+		break;
+	case GLSLstd450Degrees:
+		emit_unary_func_op(result_type, id, args[0], "degrees");
+		break;
+	case GLSLstd450Fma:
+		if ((!options.es && options.version < 400) || (options.es && options.version < 320))
 		{
-			expr += ")";
+			auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
+			                 to_enclosed_expression(args[2]));
+
+			emit_op(result_type, id, expr,
+			        should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
+			for (uint32_t i = 0; i < 3; i++)
+				inherit_expression_dependencies(id, args[i]);
 		}
+		else
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
+		break;
+	case GLSLstd450Modf:
+		register_call_out_argument(args[1]);
+		forced_temporaries.insert(id);
+		emit_binary_func_op(result_type, id, args[0], args[1], "modf");
+		break;
 
-		return expr;
-	}
-	else
+	case GLSLstd450ModfStruct:
 	{
-		assert(result.second % (target_type.width / 8) == 0);
-		uint32_t index = result.second / (target_type.width / 8);
-
-		std::string expr;
+		auto &type = get<SPIRType>(result_type);
+		emit_uninitialized_temporary_expression(result_type, id);
+		statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
+		          to_expression(id), ".", to_member_name(type, 1), ");");
+		break;
+	}
 
-		expr += buffer_name;
-		expr += "[";
+	// Minmax
+	case GLSLstd450UMin:
+		emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
+		break;
+
+	case GLSLstd450SMin:
+		emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
+		break;
+
+	case GLSLstd450FMin:
+		emit_binary_func_op(result_type, id, args[0], args[1], "min");
+		break;
+
+	case GLSLstd450FMax:
+		emit_binary_func_op(result_type, id, args[0], args[1], "max");
+		break;
+
+	case GLSLstd450UMax:
+		emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
+		break;
+
+	case GLSLstd450SMax:
+		emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
+		break;
+
+	case GLSLstd450FClamp:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
+		break;
+
+	case GLSLstd450UClamp:
+		emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
+		break;
+
+	case GLSLstd450SClamp:
+		emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
+		break;
+
+	// Trig
+	case GLSLstd450Sin:
+		emit_unary_func_op(result_type, id, args[0], "sin");
+		break;
+	case GLSLstd450Cos:
+		emit_unary_func_op(result_type, id, args[0], "cos");
+		break;
+	case GLSLstd450Tan:
+		emit_unary_func_op(result_type, id, args[0], "tan");
+		break;
+	case GLSLstd450Asin:
+		emit_unary_func_op(result_type, id, args[0], "asin");
+		break;
+	case GLSLstd450Acos:
+		emit_unary_func_op(result_type, id, args[0], "acos");
+		break;
+	case GLSLstd450Atan:
+		emit_unary_func_op(result_type, id, args[0], "atan");
+		break;
+	case GLSLstd450Sinh:
+		emit_unary_func_op(result_type, id, args[0], "sinh");
+		break;
+	case GLSLstd450Cosh:
+		emit_unary_func_op(result_type, id, args[0], "cosh");
+		break;
+	case GLSLstd450Tanh:
+		emit_unary_func_op(result_type, id, args[0], "tanh");
+		break;
+	case GLSLstd450Asinh:
+		emit_unary_func_op(result_type, id, args[0], "asinh");
+		break;
+	case GLSLstd450Acosh:
+		emit_unary_func_op(result_type, id, args[0], "acosh");
+		break;
+	case GLSLstd450Atanh:
+		emit_unary_func_op(result_type, id, args[0], "atanh");
+		break;
+	case GLSLstd450Atan2:
+		emit_binary_func_op(result_type, id, args[0], args[1], "atan");
+		break;
+
+	// Exponentials
+	case GLSLstd450Pow:
+		emit_binary_func_op(result_type, id, args[0], args[1], "pow");
+		break;
+	case GLSLstd450Exp:
+		emit_unary_func_op(result_type, id, args[0], "exp");
+		break;
+	case GLSLstd450Log:
+		emit_unary_func_op(result_type, id, args[0], "log");
+		break;
+	case GLSLstd450Exp2:
+		emit_unary_func_op(result_type, id, args[0], "exp2");
+		break;
+	case GLSLstd450Log2:
+		emit_unary_func_op(result_type, id, args[0], "log2");
+		break;
+	case GLSLstd450Sqrt:
+		emit_unary_func_op(result_type, id, args[0], "sqrt");
+		break;
+	case GLSLstd450InverseSqrt:
+		emit_unary_func_op(result_type, id, args[0], "inversesqrt");
+		break;
+
+	// Matrix math
+	case GLSLstd450Determinant:
+		emit_unary_func_op(result_type, id, args[0], "determinant");
+		break;
+	case GLSLstd450MatrixInverse:
+		emit_unary_func_op(result_type, id, args[0], "inverse");
+		break;
+
+	// Lerping
+	case GLSLstd450FMix:
+	case GLSLstd450IMix:
+	{
+		emit_mix_op(result_type, id, args[0], args[1], args[2]);
+		break;
+	}
+	case GLSLstd450Step:
+		emit_binary_func_op(result_type, id, args[0], args[1], "step");
+		break;
+	case GLSLstd450SmoothStep:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
+		break;
+
+	// Packing
+	case GLSLstd450Frexp:
+		register_call_out_argument(args[1]);
+		forced_temporaries.insert(id);
+		emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
+		break;
+
+	case GLSLstd450FrexpStruct:
+	{
+		auto &type = get<SPIRType>(result_type);
+		emit_uninitialized_temporary_expression(result_type, id);
+		statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
+		          to_expression(id), ".", to_member_name(type, 1), ");");
+		break;
+	}
+
+	case GLSLstd450Ldexp:
+	{
+		bool forward = should_forward(args[0]) && should_forward(args[1]);
+
+		auto op0 = to_unpacked_expression(args[0]);
+		auto op1 = to_unpacked_expression(args[1]);
+		auto &op1_type = expression_type(args[1]);
+		if (op1_type.basetype != SPIRType::Int)
+		{
+			// Need a value cast here.
+			auto target_type = op1_type;
+			target_type.basetype = SPIRType::Int;
+			op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
+		}
+
+		auto expr = join("ldexp(", op0, ", ", op1, ")");
+
+		emit_op(result_type, id, expr, forward);
+		inherit_expression_dependencies(id, args[0]);
+		inherit_expression_dependencies(id, args[1]);
+		break;
+	}
+
+	case GLSLstd450PackSnorm4x8:
+		emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
+		break;
+	case GLSLstd450PackUnorm4x8:
+		emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
+		break;
+	case GLSLstd450PackSnorm2x16:
+		emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
+		break;
+	case GLSLstd450PackUnorm2x16:
+		emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
+		break;
+	case GLSLstd450PackHalf2x16:
+		emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
+		break;
+	case GLSLstd450UnpackSnorm4x8:
+		emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
+		break;
+	case GLSLstd450UnpackUnorm4x8:
+		emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
+		break;
+	case GLSLstd450UnpackSnorm2x16:
+		emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
+		break;
+	case GLSLstd450UnpackUnorm2x16:
+		emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
+		break;
+	case GLSLstd450UnpackHalf2x16:
+		emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
+		break;
+
+	case GLSLstd450PackDouble2x32:
+		emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
+		break;
+	case GLSLstd450UnpackDouble2x32:
+		emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
+		break;
+
+	// Vector math
+	case GLSLstd450Length:
+		emit_unary_func_op(result_type, id, args[0], "length");
+		break;
+	case GLSLstd450Distance:
+		emit_binary_func_op(result_type, id, args[0], args[1], "distance");
+		break;
+	case GLSLstd450Cross:
+		emit_binary_func_op(result_type, id, args[0], args[1], "cross");
+		break;
+	case GLSLstd450Normalize:
+		emit_unary_func_op(result_type, id, args[0], "normalize");
+		break;
+	case GLSLstd450FaceForward:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
+		break;
+	case GLSLstd450Reflect:
+		emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
+		break;
+	case GLSLstd450Refract:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
+		break;
+
+	// Bit-fiddling
+	case GLSLstd450FindILsb:
+		// findLSB always returns int.
+		emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
+		break;
+
+	case GLSLstd450FindSMsb:
+		emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
+		break;
+
+	case GLSLstd450FindUMsb:
+		emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
+		                        int_type); // findMSB always returns int.
+		break;
+
+	// Multisampled varying
+	case GLSLstd450InterpolateAtCentroid:
+		emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
+		break;
+	case GLSLstd450InterpolateAtSample:
+		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
+		break;
+	case GLSLstd450InterpolateAtOffset:
+		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
+		break;
+
+	case GLSLstd450NMin:
+	case GLSLstd450NMax:
+	{
+		emit_nminmax_op(result_type, id, args[0], args[1], op);
+		break;
+	}
+
+	case GLSLstd450NClamp:
+	{
+		// Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
+		// IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
+		uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX];
+		if (!max_id)
+			max_id = ir.increase_bound_by(1);
+
+		// Inherit precision qualifiers.
+		ir.meta[max_id] = ir.meta[id];
+
+		emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
+		emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
+		break;
+	}
+
+	default:
+		statement("// unimplemented GLSL op ", eop);
+		break;
+	}
+}
+
+void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
+{
+	// Need to emulate this call.
+	uint32_t &ids = extra_sub_expressions[id];
+	if (!ids)
+	{
+		ids = ir.increase_bound_by(5);
+		auto btype = get<SPIRType>(result_type);
+		btype.basetype = SPIRType::Boolean;
+		set<SPIRType>(ids, btype);
+	}
+
+	uint32_t btype_id = ids + 0;
+	uint32_t left_nan_id = ids + 1;
+	uint32_t right_nan_id = ids + 2;
+	uint32_t tmp_id = ids + 3;
+	uint32_t mixed_first_id = ids + 4;
+
+	// Inherit precision qualifiers.
+	ir.meta[tmp_id] = ir.meta[id];
+	ir.meta[mixed_first_id] = ir.meta[id];
+
+	emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
+	emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
+	emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
+	emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
+	emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
+}
+
+void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
+                                                 uint32_t)
+{
+	require_extension_internal("GL_AMD_shader_ballot");
+
+	enum AMDShaderBallot
+	{
+		SwizzleInvocationsAMD = 1,
+		SwizzleInvocationsMaskedAMD = 2,
+		WriteInvocationAMD = 3,
+		MbcntAMD = 4
+	};
+
+	auto op = static_cast<AMDShaderBallot>(eop);
+
+	switch (op)
+	{
+	case SwizzleInvocationsAMD:
+		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
+		register_control_dependent_expression(id);
+		break;
+
+	case SwizzleInvocationsMaskedAMD:
+		emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
+		register_control_dependent_expression(id);
+		break;
+
+	case WriteInvocationAMD:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
+		register_control_dependent_expression(id);
+		break;
+
+	case MbcntAMD:
+		emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
+		register_control_dependent_expression(id);
+		break;
+
+	default:
+		statement("// unimplemented SPV AMD shader ballot op ", eop);
+		break;
+	}
+}
+
+void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
+                                                                    const uint32_t *args, uint32_t)
+{
+	require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
+
+	enum AMDShaderExplicitVertexParameter
+	{
+		InterpolateAtVertexAMD = 1
+	};
+
+	auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
+
+	switch (op)
+	{
+	case InterpolateAtVertexAMD:
+		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
+		break;
+
+	default:
+		statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
+		break;
+	}
+}
+
+void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
+                                                         const uint32_t *args, uint32_t)
+{
+	require_extension_internal("GL_AMD_shader_trinary_minmax");
+
+	enum AMDShaderTrinaryMinMax
+	{
+		FMin3AMD = 1,
+		UMin3AMD = 2,
+		SMin3AMD = 3,
+		FMax3AMD = 4,
+		UMax3AMD = 5,
+		SMax3AMD = 6,
+		FMid3AMD = 7,
+		UMid3AMD = 8,
+		SMid3AMD = 9
+	};
+
+	auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
+
+	switch (op)
+	{
+	case FMin3AMD:
+	case UMin3AMD:
+	case SMin3AMD:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
+		break;
+
+	case FMax3AMD:
+	case UMax3AMD:
+	case SMax3AMD:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
+		break;
+
+	case FMid3AMD:
+	case UMid3AMD:
+	case SMid3AMD:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
+		break;
+
+	default:
+		statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
+		break;
+	}
+}
+
+void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
+                                              uint32_t)
+{
+	require_extension_internal("GL_AMD_gcn_shader");
+
+	enum AMDGCNShader
+	{
+		CubeFaceIndexAMD = 1,
+		CubeFaceCoordAMD = 2,
+		TimeAMD = 3
+	};
+
+	auto op = static_cast<AMDGCNShader>(eop);
+
+	switch (op)
+	{
+	case CubeFaceIndexAMD:
+		emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
+		break;
+	case CubeFaceCoordAMD:
+		emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
+		break;
+	case TimeAMD:
+	{
+		string expr = "timeAMD()";
+		emit_op(result_type, id, expr, true);
+		register_control_dependent_expression(id);
+		break;
+	}
+
+	default:
+		statement("// unimplemented SPV AMD gcn shader op ", eop);
+		break;
+	}
+}
+
+void CompilerGLSL::emit_subgroup_op(const Instruction &i)
+{
+	const uint32_t *ops = stream(i);
+	auto op = static_cast<Op>(i.op);
+
+	if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op))
+		SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
+
+	// If we need to do implicit bitcasts, make sure we do it with the correct type.
+	uint32_t integer_width = get_integer_width_for_instruction(i);
+	auto int_type = to_signed_basetype(integer_width);
+	auto uint_type = to_unsigned_basetype(integer_width);
+
+	switch (op)
+	{
+	case OpGroupNonUniformElect:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect);
+		break;
+
+	case OpGroupNonUniformBallotBitCount:
+	{
+		const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
+		if (operation == GroupOperationReduce)
+			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
+		else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
+			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
+	}
+	break;
+
+	case OpGroupNonUniformBallotBitExtract:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
+		break;
+
+	case OpGroupNonUniformInverseBallot:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
+		break;
+
+	case OpGroupNonUniformBallot:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot);
+		break;
+
+	case OpGroupNonUniformBallotFindLSB:
+	case OpGroupNonUniformBallotFindMSB:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
+		break;
+
+	case OpGroupNonUniformBroadcast:
+	case OpGroupNonUniformBroadcastFirst:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
+		break;
+
+	case OpGroupNonUniformShuffle:
+	case OpGroupNonUniformShuffleXor:
+		require_extension_internal("GL_KHR_shader_subgroup_shuffle");
+		break;
+
+	case OpGroupNonUniformShuffleUp:
+	case OpGroupNonUniformShuffleDown:
+		require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
+		break;
+
+	case OpGroupNonUniformAll:
+	case OpGroupNonUniformAny:
+	case OpGroupNonUniformAllEqual:
+	{
+		const SPIRType &type = expression_type(ops[3]);
+		if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
+			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
+		else
+			request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT);
+	}
+	break;
+
+	case OpGroupNonUniformFAdd:
+	case OpGroupNonUniformFMul:
+	case OpGroupNonUniformFMin:
+	case OpGroupNonUniformFMax:
+	case OpGroupNonUniformIAdd:
+	case OpGroupNonUniformIMul:
+	case OpGroupNonUniformSMin:
+	case OpGroupNonUniformSMax:
+	case OpGroupNonUniformUMin:
+	case OpGroupNonUniformUMax:
+	case OpGroupNonUniformBitwiseAnd:
+	case OpGroupNonUniformBitwiseOr:
+	case OpGroupNonUniformBitwiseXor:
+	case OpGroupNonUniformLogicalAnd:
+	case OpGroupNonUniformLogicalOr:
+	case OpGroupNonUniformLogicalXor:
+	{
+		auto operation = static_cast<GroupOperation>(ops[3]);
+		if (operation == GroupOperationClusteredReduce)
+		{
+			require_extension_internal("GL_KHR_shader_subgroup_clustered");
+		}
+		else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
+		         operation == GroupOperationReduce)
+		{
+			require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
+		}
+		else
+			SPIRV_CROSS_THROW("Invalid group operation.");
+		break;
+	}
+
+	case OpGroupNonUniformQuadSwap:
+	case OpGroupNonUniformQuadBroadcast:
+		require_extension_internal("GL_KHR_shader_subgroup_quad");
+		break;
+
+	default:
+		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
+	}
+
+	uint32_t result_type = ops[0];
+	uint32_t id = ops[1];
+
+	auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
+	if (scope != ScopeSubgroup)
+		SPIRV_CROSS_THROW("Only subgroup scope is supported.");
+
+	switch (op)
+	{
+	case OpGroupNonUniformElect:
+		emit_op(result_type, id, "subgroupElect()", true);
+		break;
+
+	case OpGroupNonUniformBroadcast:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
+		break;
+
+	case OpGroupNonUniformBroadcastFirst:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
+		break;
+
+	case OpGroupNonUniformBallot:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
+		break;
+
+	case OpGroupNonUniformInverseBallot:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
+		break;
+
+	case OpGroupNonUniformBallotBitExtract:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
+		break;
+
+	case OpGroupNonUniformBallotFindLSB:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
+		break;
+
+	case OpGroupNonUniformBallotFindMSB:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
+		break;
+
+	case OpGroupNonUniformBallotBitCount:
+	{
+		auto operation = static_cast<GroupOperation>(ops[3]);
+		if (operation == GroupOperationReduce)
+			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
+		else if (operation == GroupOperationInclusiveScan)
+			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
+		else if (operation == GroupOperationExclusiveScan)
+			emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
+		else
+			SPIRV_CROSS_THROW("Invalid BitCount operation.");
+		break;
+	}
+
+	case OpGroupNonUniformShuffle:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
+		break;
+
+	case OpGroupNonUniformShuffleXor:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
+		break;
+
+	case OpGroupNonUniformShuffleUp:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
+		break;
+
+	case OpGroupNonUniformShuffleDown:
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
+		break;
+
+	case OpGroupNonUniformAll:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
+		break;
+
+	case OpGroupNonUniformAny:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
+		break;
+
+	case OpGroupNonUniformAllEqual:
+		emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
+		break;
+
+		// clang-format off
+#define GLSL_GROUP_OP(op, glsl_op) \
+case OpGroupNonUniform##op: \
+	{ \
+		auto operation = static_cast<GroupOperation>(ops[3]); \
+		if (operation == GroupOperationReduce) \
+			emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
+		else if (operation == GroupOperationInclusiveScan) \
+			emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
+		else if (operation == GroupOperationExclusiveScan) \
+			emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
+		else if (operation == GroupOperationClusteredReduce) \
+			emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
+		else \
+			SPIRV_CROSS_THROW("Invalid group operation."); \
+		break; \
+	}
+
+#define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
+case OpGroupNonUniform##op: \
+	{ \
+		auto operation = static_cast<GroupOperation>(ops[3]); \
+		if (operation == GroupOperationReduce) \
+			emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
+		else if (operation == GroupOperationInclusiveScan) \
+			emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
+		else if (operation == GroupOperationExclusiveScan) \
+			emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
+		else if (operation == GroupOperationClusteredReduce) \
+			emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
+		else \
+			SPIRV_CROSS_THROW("Invalid group operation."); \
+		break; \
+	}
+
+	GLSL_GROUP_OP(FAdd, Add)
+	GLSL_GROUP_OP(FMul, Mul)
+	GLSL_GROUP_OP(FMin, Min)
+	GLSL_GROUP_OP(FMax, Max)
+	GLSL_GROUP_OP(IAdd, Add)
+	GLSL_GROUP_OP(IMul, Mul)
+	GLSL_GROUP_OP_CAST(SMin, Min, int_type)
+	GLSL_GROUP_OP_CAST(SMax, Max, int_type)
+	GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
+	GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
+	GLSL_GROUP_OP(BitwiseAnd, And)
+	GLSL_GROUP_OP(BitwiseOr, Or)
+	GLSL_GROUP_OP(BitwiseXor, Xor)
+	GLSL_GROUP_OP(LogicalAnd, And)
+	GLSL_GROUP_OP(LogicalOr, Or)
+	GLSL_GROUP_OP(LogicalXor, Xor)
+#undef GLSL_GROUP_OP
+#undef GLSL_GROUP_OP_CAST
+		// clang-format on
+
+	case OpGroupNonUniformQuadSwap:
+	{
+		uint32_t direction = evaluate_constant_u32(ops[4]);
+		if (direction == 0)
+			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
+		else if (direction == 1)
+			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
+		else if (direction == 2)
+			emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
+		else
+			SPIRV_CROSS_THROW("Invalid quad swap direction.");
+		break;
+	}
+
+	case OpGroupNonUniformQuadBroadcast:
+	{
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
+		break;
+	}
+
+	default:
+		SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
+	}
+
+	register_control_dependent_expression(id);
+}
+
+string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
+{
+	// OpBitcast can deal with pointers.
+	if (out_type.pointer || in_type.pointer)
+	{
+		if (out_type.vecsize == 2 || in_type.vecsize == 2)
+			require_extension_internal("GL_EXT_buffer_reference_uvec2");
+		return type_to_glsl(out_type);
+	}
+
+	if (out_type.basetype == in_type.basetype)
+		return "";
+
+	assert(out_type.basetype != SPIRType::Boolean);
+	assert(in_type.basetype != SPIRType::Boolean);
+
+	bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
+	bool same_size_cast = out_type.width == in_type.width;
+
+	// Trivial bitcast case, casts between integers.
+	if (integral_cast && same_size_cast)
+		return type_to_glsl(out_type);
+
+	// Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
+	if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
+		return "unpack8";
+	else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
+		return "pack16";
+	else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
+		return "pack32";
+
+	// Floating <-> Integer special casts. Just have to enumerate all cases. :(
+	// 16-bit, 32-bit and 64-bit floats.
+	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
+	{
+		if (is_legacy_es())
+			SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
+		else if (!options.es && options.version < 330)
+			require_extension_internal("GL_ARB_shader_bit_encoding");
+		return "floatBitsToUint";
+	}
+	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
+	{
+		if (is_legacy_es())
+			SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
+		else if (!options.es && options.version < 330)
+			require_extension_internal("GL_ARB_shader_bit_encoding");
+		return "floatBitsToInt";
+	}
+	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
+	{
+		if (is_legacy_es())
+			SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
+		else if (!options.es && options.version < 330)
+			require_extension_internal("GL_ARB_shader_bit_encoding");
+		return "uintBitsToFloat";
+	}
+	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
+	{
+		if (is_legacy_es())
+			SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
+		else if (!options.es && options.version < 330)
+			require_extension_internal("GL_ARB_shader_bit_encoding");
+		return "intBitsToFloat";
+	}
+
+	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
+		return "doubleBitsToInt64";
+	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
+		return "doubleBitsToUint64";
+	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
+		return "int64BitsToDouble";
+	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
+		return "uint64BitsToDouble";
+	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
+		return "float16BitsToInt16";
+	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
+		return "float16BitsToUint16";
+	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
+		return "int16BitsToFloat16";
+	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
+		return "uint16BitsToFloat16";
+
+	// And finally, some even more special purpose casts.
+	if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
+		return "packUint2x32";
+	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
+		return "unpackUint2x32";
+	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
+		return "unpackFloat2x16";
+	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
+		return "packFloat2x16";
+	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
+		return "packInt2x16";
+	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
+		return "unpackInt2x16";
+	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
+		return "packUint2x16";
+	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
+		return "unpackUint2x16";
+	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
+		return "packInt4x16";
+	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
+		return "unpackInt4x16";
+	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
+		return "packUint4x16";
+	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
+		return "unpackUint4x16";
+
+	return "";
+}
+
+string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
+{
+	auto op = bitcast_glsl_op(result_type, expression_type(argument));
+	if (op.empty())
+		return to_enclosed_unpacked_expression(argument);
+	else
+		return join(op, "(", to_unpacked_expression(argument), ")");
+}
+
+std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
+{
+	auto expr = to_expression(arg);
+	auto &src_type = expression_type(arg);
+	if (src_type.basetype != target_type)
+	{
+		auto target = src_type;
+		target.basetype = target_type;
+		expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
+	}
+
+	return expr;
+}
+
+std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
+                                             const std::string &expr)
+{
+	if (target_type.basetype == expr_type)
+		return expr;
+
+	auto src_type = target_type;
+	src_type.basetype = expr_type;
+	return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
+}
+
+string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
+{
+	switch (builtin)
+	{
+	case BuiltInPosition:
+		return "gl_Position";
+	case BuiltInPointSize:
+		return "gl_PointSize";
+	case BuiltInClipDistance:
+		return "gl_ClipDistance";
+	case BuiltInCullDistance:
+		return "gl_CullDistance";
+	case BuiltInVertexId:
+		if (options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
+			                  "with GL semantics.");
+		return "gl_VertexID";
+	case BuiltInInstanceId:
+		if (options.vulkan_semantics)
+		{
+			auto model = get_entry_point().model;
+			switch (model)
+			{
+			case spv::ExecutionModelIntersectionKHR:
+			case spv::ExecutionModelAnyHitKHR:
+			case spv::ExecutionModelClosestHitKHR:
+				// gl_InstanceID is allowed in these shaders.
+				break;
+
+			default:
+				SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
+				                  "created with GL semantics.");
+			}
+		}
+		if (!options.es && options.version < 140)
+		{
+			require_extension_internal("GL_ARB_draw_instanced");
+		}
+		return "gl_InstanceID";
+	case BuiltInVertexIndex:
+		if (options.vulkan_semantics)
+			return "gl_VertexIndex";
+		else
+			return "gl_VertexID"; // gl_VertexID already has the base offset applied.
+	case BuiltInInstanceIndex:
+		if (options.vulkan_semantics)
+			return "gl_InstanceIndex";
+
+		if (!options.es && options.version < 140)
+		{
+			require_extension_internal("GL_ARB_draw_instanced");
+		}
+
+		if (options.vertex.support_nonzero_base_instance)
+		{
+			if (!options.vulkan_semantics)
+			{
+				// This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
+				require_extension_internal("GL_ARB_shader_draw_parameters");
+			}
+			return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
+		}
+		else
+			return "gl_InstanceID";
+	case BuiltInPrimitiveId:
+		if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
+			return "gl_PrimitiveIDIn";
+		else
+			return "gl_PrimitiveID";
+	case BuiltInInvocationId:
+		return "gl_InvocationID";
+	case BuiltInLayer:
+		return "gl_Layer";
+	case BuiltInViewportIndex:
+		return "gl_ViewportIndex";
+	case BuiltInTessLevelOuter:
+		return "gl_TessLevelOuter";
+	case BuiltInTessLevelInner:
+		return "gl_TessLevelInner";
+	case BuiltInTessCoord:
+		return "gl_TessCoord";
+	case BuiltInFragCoord:
+		return "gl_FragCoord";
+	case BuiltInPointCoord:
+		return "gl_PointCoord";
+	case BuiltInFrontFacing:
+		return "gl_FrontFacing";
+	case BuiltInFragDepth:
+		return "gl_FragDepth";
+	case BuiltInNumWorkgroups:
+		return "gl_NumWorkGroups";
+	case BuiltInWorkgroupSize:
+		return "gl_WorkGroupSize";
+	case BuiltInWorkgroupId:
+		return "gl_WorkGroupID";
+	case BuiltInLocalInvocationId:
+		return "gl_LocalInvocationID";
+	case BuiltInGlobalInvocationId:
+		return "gl_GlobalInvocationID";
+	case BuiltInLocalInvocationIndex:
+		return "gl_LocalInvocationIndex";
+	case BuiltInHelperInvocation:
+		return "gl_HelperInvocation";
+
+	case BuiltInBaseVertex:
+		if (options.es)
+			SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
+
+		if (options.vulkan_semantics)
+		{
+			if (options.version < 460)
+			{
+				require_extension_internal("GL_ARB_shader_draw_parameters");
+				return "gl_BaseVertexARB";
+			}
+			return "gl_BaseVertex";
+		}
+		// On regular GL, this is soft-enabled and we emit ifdefs in code.
+		require_extension_internal("GL_ARB_shader_draw_parameters");
+		return "SPIRV_Cross_BaseVertex";
+
+	case BuiltInBaseInstance:
+		if (options.es)
+			SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
+
+		if (options.vulkan_semantics)
+		{
+			if (options.version < 460)
+			{
+				require_extension_internal("GL_ARB_shader_draw_parameters");
+				return "gl_BaseInstanceARB";
+			}
+			return "gl_BaseInstance";
+		}
+		// On regular GL, this is soft-enabled and we emit ifdefs in code.
+		require_extension_internal("GL_ARB_shader_draw_parameters");
+		return "SPIRV_Cross_BaseInstance";
+
+	case BuiltInDrawIndex:
+		if (options.es)
+			SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
+
+		if (options.vulkan_semantics)
+		{
+			if (options.version < 460)
+			{
+				require_extension_internal("GL_ARB_shader_draw_parameters");
+				return "gl_DrawIDARB";
+			}
+			return "gl_DrawID";
+		}
+		// On regular GL, this is soft-enabled and we emit ifdefs in code.
+		require_extension_internal("GL_ARB_shader_draw_parameters");
+		return "gl_DrawIDARB";
+
+	case BuiltInSampleId:
+		if (options.es && options.version < 320)
+			require_extension_internal("GL_OES_sample_variables");
+		if (!options.es && options.version < 400)
+			SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
+		return "gl_SampleID";
+
+	case BuiltInSampleMask:
+		if (options.es && options.version < 320)
+			require_extension_internal("GL_OES_sample_variables");
+		if (!options.es && options.version < 400)
+			SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
+
+		if (storage == StorageClassInput)
+			return "gl_SampleMaskIn";
+		else
+			return "gl_SampleMask";
+
+	case BuiltInSamplePosition:
+		if (options.es && options.version < 320)
+			require_extension_internal("GL_OES_sample_variables");
+		if (!options.es && options.version < 400)
+			SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
+		return "gl_SamplePosition";
+
+	case BuiltInViewIndex:
+		if (options.vulkan_semantics)
+			return "gl_ViewIndex";
+		else
+			return "gl_ViewID_OVR";
+
+	case BuiltInNumSubgroups:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups);
+		return "gl_NumSubgroups";
+
+	case BuiltInSubgroupId:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID);
+		return "gl_SubgroupID";
+
+	case BuiltInSubgroupSize:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize);
+		return "gl_SubgroupSize";
+
+	case BuiltInSubgroupLocalInvocationId:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID);
+		return "gl_SubgroupInvocationID";
+
+	case BuiltInSubgroupEqMask:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
+		return "gl_SubgroupEqMask";
+
+	case BuiltInSubgroupGeMask:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
+		return "gl_SubgroupGeMask";
+
+	case BuiltInSubgroupGtMask:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
+		return "gl_SubgroupGtMask";
+
+	case BuiltInSubgroupLeMask:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
+		return "gl_SubgroupLeMask";
+
+	case BuiltInSubgroupLtMask:
+		request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
+		return "gl_SubgroupLtMask";
+
+	case BuiltInLaunchIdKHR:
+		return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
+	case BuiltInLaunchSizeKHR:
+		return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
+	case BuiltInWorldRayOriginKHR:
+		return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
+	case BuiltInWorldRayDirectionKHR:
+		return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
+	case BuiltInObjectRayOriginKHR:
+		return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
+	case BuiltInObjectRayDirectionKHR:
+		return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
+	case BuiltInRayTminKHR:
+		return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
+	case BuiltInRayTmaxKHR:
+		return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
+	case BuiltInInstanceCustomIndexKHR:
+		return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
+	case BuiltInObjectToWorldKHR:
+		return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
+	case BuiltInWorldToObjectKHR:
+		return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
+	case BuiltInHitTNV:
+		// gl_HitTEXT is an alias of RayTMax in KHR.
+		return "gl_HitTNV";
+	case BuiltInHitKindKHR:
+		return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
+	case BuiltInIncomingRayFlagsKHR:
+		return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
+
+	case BuiltInBaryCoordKHR:
+	{
+		if (options.es && options.version < 320)
+			SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320.");
+		else if (!options.es && options.version < 450)
+			SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450.");
+
+		if (barycentric_is_nv)
+		{
+			require_extension_internal("GL_NV_fragment_shader_barycentric");
+			return "gl_BaryCoordNV";
+		}
+		else
+		{
+			require_extension_internal("GL_EXT_fragment_shader_barycentric");
+			return "gl_BaryCoordEXT";
+		}
+	}
+
+	case BuiltInBaryCoordNoPerspNV:
+	{
+		if (options.es && options.version < 320)
+			SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320.");
+		else if (!options.es && options.version < 450)
+			SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450.");
+
+		if (barycentric_is_nv)
+		{
+			require_extension_internal("GL_NV_fragment_shader_barycentric");
+			return "gl_BaryCoordNoPerspNV";
+		}
+		else
+		{
+			require_extension_internal("GL_EXT_fragment_shader_barycentric");
+			return "gl_BaryCoordNoPerspEXT";
+		}
+	}
+
+	case BuiltInFragStencilRefEXT:
+	{
+		if (!options.es)
+		{
+			require_extension_internal("GL_ARB_shader_stencil_export");
+			return "gl_FragStencilRefARB";
+		}
+		else
+			SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
+	}
+
+	case BuiltInPrimitiveShadingRateKHR:
+	{
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL.");
+		require_extension_internal("GL_EXT_fragment_shading_rate");
+		return "gl_PrimitiveShadingRateEXT";
+	}
+
+	case BuiltInShadingRateKHR:
+	{
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL.");
+		require_extension_internal("GL_EXT_fragment_shading_rate");
+		return "gl_ShadingRateEXT";
+	}
+
+	case BuiltInDeviceIndex:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
+		require_extension_internal("GL_EXT_device_group");
+		return "gl_DeviceIndex";
+
+	case BuiltInFullyCoveredEXT:
+		if (!options.es)
+			require_extension_internal("GL_NV_conservative_raster_underestimation");
+		else
+			SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
+		return "gl_FragFullyCoveredNV";
+
+	case BuiltInPrimitiveTriangleIndicesEXT:
+		return "gl_PrimitiveTriangleIndicesEXT";
+	case BuiltInPrimitiveLineIndicesEXT:
+		return "gl_PrimitiveLineIndicesEXT";
+	case BuiltInPrimitivePointIndicesEXT:
+		return "gl_PrimitivePointIndicesEXT";
+	case BuiltInCullPrimitiveEXT:
+		return "gl_CullPrimitiveEXT";
+
+	default:
+		return join("gl_BuiltIn_", convert_to_string(builtin));
+	}
+}
+
+const char *CompilerGLSL::index_to_swizzle(uint32_t index)
+{
+	switch (index)
+	{
+	case 0:
+		return "x";
+	case 1:
+		return "y";
+	case 2:
+		return "z";
+	case 3:
+		return "w";
+	default:
+		return "x";		// Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec.
+	}
+}
+
+void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/,
+                                                      AccessChainFlags flags, bool &access_chain_is_arrayed,
+                                                      uint32_t index)
+{
+	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
+	bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
+	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
+
+	string idx_expr = index_is_literal ? convert_to_string(index) : to_unpacked_expression(index, register_expression_read);
+
+	// For the case where the base of an OpPtrAccessChain already ends in [n],
+	// we need to use the index as an offset to the existing index, otherwise,
+	// we can just use the index directly.
+	if (ptr_chain && access_chain_is_arrayed)
+	{
+		size_t split_pos = expr.find_last_of(']');
+		string expr_front = expr.substr(0, split_pos);
+		string expr_back = expr.substr(split_pos);
+		expr = expr_front + " + " +  enclose_expression(idx_expr) + expr_back;
+	}
+	else
+	{
+		expr += "[";
+		expr += idx_expr;
+		expr += "]";
+	}
+}
+
+bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t)
+{
+	return true;
+}
+
+string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
+                                           AccessChainFlags flags, AccessChainMeta *meta)
+{
+	string expr;
+
+	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
+	bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
+	bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
+	bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
+	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
+	bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
+
+	if (!chain_only)
+	{
+		// We handle transpose explicitly, so don't resolve that here.
+		auto *e = maybe_get<SPIRExpression>(base);
+		bool old_transpose = e && e->need_transpose;
+		if (e)
+			e->need_transpose = false;
+		expr = to_enclosed_expression(base, register_expression_read);
+		if (e)
+			e->need_transpose = old_transpose;
+	}
+
+	// Start traversing type hierarchy at the proper non-pointer types,
+	// but keep type_id referencing the original pointer for use below.
+	uint32_t type_id = expression_type_id(base);
+
+	if (!backend.native_pointers)
+	{
+		if (ptr_chain)
+			SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
+
+		// Wrapped buffer reference pointer types will need to poke into the internal "value" member before
+		// continuing the access chain.
+		if (should_dereference(base))
+		{
+			auto &type = get<SPIRType>(type_id);
+			expr = dereference_expression(type, expr);
+		}
+	}
+
+	const auto *type = &get_pointee_type(type_id);
+
+	bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
+	bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
+	bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
+	uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
+	bool is_invariant = has_decoration(base, DecorationInvariant);
+	bool relaxed_precision = has_decoration(base, DecorationRelaxedPrecision);
+	bool pending_array_enclose = false;
+	bool dimension_flatten = false;
+
+	const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) {
+		AccessChainFlags mod_flags = flags;
+		if (!is_literal)
+			mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
+		if (!is_ptr_chain)
+			mod_flags &= ~ACCESS_CHAIN_PTR_CHAIN_BIT;
+		access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
+		check_physical_type_cast(expr, type, physical_type);
+	};
+
+	for (uint32_t i = 0; i < count; i++)
+	{
+		uint32_t index = indices[i];
+
+		bool is_literal = index_is_literal;
+		if (is_literal && msb_is_id && (index >> 31u) != 0u)
+		{
+			is_literal = false;
+			index &= 0x7fffffffu;
+		}
+
+		// Pointer chains
+		if (ptr_chain && i == 0)
+		{
+			// If we are flattening multidimensional arrays, only create opening bracket on first
+			// array index.
+			if (options.flatten_multidimensional_arrays)
+			{
+				dimension_flatten = type->array.size() >= 1;
+				pending_array_enclose = dimension_flatten;
+				if (pending_array_enclose)
+					expr += "[";
+			}
+
+			if (options.flatten_multidimensional_arrays && dimension_flatten)
+			{
+				// If we are flattening multidimensional arrays, do manual stride computation.
+				if (is_literal)
+					expr += convert_to_string(index);
+				else
+					expr += to_enclosed_expression(index, register_expression_read);
+
+				for (auto j = uint32_t(type->array.size()); j; j--)
+				{
+					expr += " * ";
+					expr += enclose_expression(to_array_size(*type, j - 1));
+				}
+
+				if (type->array.empty())
+					pending_array_enclose = false;
+				else
+					expr += " + ";
+
+				if (!pending_array_enclose)
+					expr += "]";
+			}
+			else
+			{
+				append_index(index, is_literal, true);
+			}
+
+			if (type->basetype == SPIRType::ControlPointArray)
+			{
+				type_id = type->parent_type;
+				type = &get<SPIRType>(type_id);
+			}
+
+			access_chain_is_arrayed = true;
+		}
+		// Arrays
+		else if (!type->array.empty())
+		{
+			// If we are flattening multidimensional arrays, only create opening bracket on first
+			// array index.
+			if (options.flatten_multidimensional_arrays && !pending_array_enclose)
+			{
+				dimension_flatten = type->array.size() > 1;
+				pending_array_enclose = dimension_flatten;
+				if (pending_array_enclose)
+					expr += "[";
+			}
+
+			assert(type->parent_type);
+
+			auto *var = maybe_get<SPIRVariable>(base);
+			if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
+			    !has_decoration(type->self, DecorationBlock))
+			{
+				// This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
+				// Normally, these variables live in blocks when compiled from GLSL,
+				// but HLSL seems to just emit straight arrays here.
+				// We must pretend this access goes through gl_in/gl_out arrays
+				// to be able to access certain builtins as arrays.
+				// Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT.
+				auto builtin = ir.meta[base].decoration.builtin_type;
+				bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT;
+
+				switch (builtin)
+				{
+				// case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
+				// case BuiltInClipDistance:
+				case BuiltInPosition:
+				case BuiltInPointSize:
+					if (mesh_shader)
+						expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr);
+					else if (var->storage == StorageClassInput)
+						expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
+					else if (var->storage == StorageClassOutput)
+						expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
+					else
+						append_index(index, is_literal);
+					break;
+
+				case BuiltInPrimitiveId:
+				case BuiltInLayer:
+				case BuiltInViewportIndex:
+				case BuiltInCullPrimitiveEXT:
+				case BuiltInPrimitiveShadingRateKHR:
+					if (mesh_shader)
+						expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr);
+					else
+						append_index(index, is_literal);
+					break;
+
+				default:
+					append_index(index, is_literal);
+					break;
+				}
+			}
+			else if (backend.force_merged_mesh_block && i == 0 && var &&
+			         !is_builtin_variable(*var) && var->storage == StorageClassOutput)
+			{
+				if (is_per_primitive_variable(*var))
+					expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr);
+				else
+					expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr);
+			}
+			else if (options.flatten_multidimensional_arrays && dimension_flatten)
+			{
+				// If we are flattening multidimensional arrays, do manual stride computation.
+				auto &parent_type = get<SPIRType>(type->parent_type);
+
+				if (is_literal)
+					expr += convert_to_string(index);
+				else
+					expr += to_enclosed_expression(index, register_expression_read);
+
+				for (auto j = uint32_t(parent_type.array.size()); j; j--)
+				{
+					expr += " * ";
+					expr += enclose_expression(to_array_size(parent_type, j - 1));
+				}
+
+				if (parent_type.array.empty())
+					pending_array_enclose = false;
+				else
+					expr += " + ";
+
+				if (!pending_array_enclose)
+					expr += "]";
+			}
+			// Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
+			// By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
+			else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
+			{
+				append_index(index, is_literal);
+			}
+
+			type_id = type->parent_type;
+			type = &get<SPIRType>(type_id);
+
+			access_chain_is_arrayed = true;
+		}
+		// For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping.
+		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
+		else if (type->basetype == SPIRType::Struct)
+		{
+			if (!is_literal)
+				index = evaluate_constant_u32(index);
+
+			if (index < uint32_t(type->member_type_index_redirection.size()))
+				index = type->member_type_index_redirection[index];
+
+			if (index >= type->member_types.size())
+				SPIRV_CROSS_THROW("Member index is out of bounds!");
+
+			BuiltIn builtin = BuiltInMax;
+			if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base))
+			{
+				if (access_chain_is_arrayed)
+				{
+					expr += ".";
+					expr += builtin_to_glsl(builtin, type->storage);
+				}
+				else
+					expr = builtin_to_glsl(builtin, type->storage);
+			}
+			else
+			{
+				// If the member has a qualified name, use it as the entire chain
+				string qual_mbr_name = get_member_qualified_name(type_id, index);
+				if (!qual_mbr_name.empty())
+					expr = qual_mbr_name;
+				else if (flatten_member_reference)
+					expr += join("_", to_member_name(*type, index));
+				else
+				{
+					// Any pointer de-refences for values are handled in the first access chain.
+					// For pointer chains, the pointer-ness is resolved through an array access.
+					// The only time this is not true is when accessing array of SSBO/UBO.
+					// This case is explicitly handled.
+					expr += to_member_reference(base, *type, index, ptr_chain || i != 0);
+				}
+			}
+
+			if (has_member_decoration(type->self, index, DecorationInvariant))
+				is_invariant = true;
+			if (has_member_decoration(type->self, index, DecorationRelaxedPrecision))
+				relaxed_precision = true;
+
+			is_packed = member_is_packed_physical_type(*type, index);
+			if (member_is_remapped_physical_type(*type, index))
+				physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
+			else
+				physical_type = 0;
+
+			row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
+			type = &get<SPIRType>(type->member_types[index]);
+		}
+		// Matrix -> Vector
+		else if (type->columns > 1)
+		{
+			// If we have a row-major matrix here, we need to defer any transpose in case this access chain
+			// is used to store a column. We can resolve it right here and now if we access a scalar directly,
+			// by flipping indexing order of the matrix.
+
+			expr += "[";
+			if (is_literal)
+				expr += convert_to_string(index);
+			else
+				expr += to_unpacked_expression(index, register_expression_read);
+			expr += "]";
+
+			type_id = type->parent_type;
+			type = &get<SPIRType>(type_id);
+		}
+		// Vector -> Scalar
+		else if (type->vecsize > 1)
+		{
+			string deferred_index;
+			if (row_major_matrix_needs_conversion)
+			{
+				// Flip indexing order.
+				auto column_index = expr.find_last_of('[');
+				if (column_index != string::npos)
+				{
+					deferred_index = expr.substr(column_index);
+					expr.resize(column_index);
+				}
+			}
+
+			// Internally, access chain implementation can also be used on composites,
+			// ignore scalar access workarounds in this case.
+			StorageClass effective_storage = StorageClassGeneric;
+			bool ignore_potential_sliced_writes = false;
+			if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
+			{
+				if (expression_type(base).pointer)
+					effective_storage = get_expression_effective_storage_class(base);
+
+				// Special consideration for control points.
+				// Control points can only be written by InvocationID, so there is no need
+				// to consider scalar access chains here.
+				// Cleans up some cases where it's very painful to determine the accurate storage class
+				// since blocks can be partially masked ...
+				auto *var = maybe_get_backing_variable(base);
+				if (var && var->storage == StorageClassOutput &&
+				    get_execution_model() == ExecutionModelTessellationControl &&
+				    !has_decoration(var->self, DecorationPatch))
+				{
+					ignore_potential_sliced_writes = true;
+				}
+			}
+			else
+				ignore_potential_sliced_writes = true;
+
+			if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
+			{
+				// On some backends, we might not be able to safely access individual scalars in a vector.
+				// To work around this, we might have to cast the access chain reference to something which can,
+				// like a pointer to scalar, which we can then index into.
+				prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
+				                                       is_packed);
+			}
+
+			if (is_literal)
+			{
+				bool out_of_bounds = (index >= type->vecsize);
+
+				if (!is_packed && !row_major_matrix_needs_conversion)
+				{
+					expr += ".";
+					expr += index_to_swizzle(out_of_bounds ? 0 : index);
+				}
+				else
+				{
+					// For packed vectors, we can only access them as an array, not by swizzle.
+					expr += join("[", out_of_bounds ? 0 : index, "]");
+				}
+			}
+			else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
+			{
+				auto &c = get<SPIRConstant>(index);
+				bool out_of_bounds = (c.scalar() >= type->vecsize);
+
+				if (c.specialization)
+				{
+					// If the index is a spec constant, we cannot turn extract into a swizzle.
+					expr += join("[", out_of_bounds ? "0" : to_expression(index), "]");
+				}
+				else
+				{
+					expr += ".";
+					expr += index_to_swizzle(out_of_bounds ? 0 : c.scalar());
+				}
+			}
+			else
+			{
+				expr += "[";
+				expr += to_unpacked_expression(index, register_expression_read);
+				expr += "]";
+			}
+
+			if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
+			{
+				prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
+				                                       is_packed);
+			}
+
+			expr += deferred_index;
+			row_major_matrix_needs_conversion = false;
+
+			is_packed = false;
+			physical_type = 0;
+			type_id = type->parent_type;
+			type = &get<SPIRType>(type_id);
+		}
+		else if (!backend.allow_truncated_access_chain)
+			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
+	}
+
+	if (pending_array_enclose)
+	{
+		SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
+		                  "but the access chain was terminated in the middle of a multidimensional array. "
+		                  "This is not supported.");
+	}
+
+	if (meta)
+	{
+		meta->need_transpose = row_major_matrix_needs_conversion;
+		meta->storage_is_packed = is_packed;
+		meta->storage_is_invariant = is_invariant;
+		meta->storage_physical_type = physical_type;
+		meta->relaxed_precision = relaxed_precision;
+	}
+
+	return expr;
+}
+
+void CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t)
+{
+}
+
+void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
+{
+}
+
+string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
+{
+	auto ret = join(basename, "_", to_member_name(type, index));
+	ParsedIR::sanitize_underscores(ret);
+	return ret;
+}
+
+string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
+                                  AccessChainMeta *meta, bool ptr_chain)
+{
+	if (flattened_buffer_blocks.count(base))
+	{
+		uint32_t matrix_stride = 0;
+		uint32_t array_stride = 0;
+		bool need_transpose = false;
+		flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
+		                              &array_stride, ptr_chain);
+
+		if (meta)
+		{
+			meta->need_transpose = target_type.columns > 1 && need_transpose;
+			meta->storage_is_packed = false;
+		}
+
+		return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride,
+		                              need_transpose);
+	}
+	else if (flattened_structs.count(base) && count > 0)
+	{
+		AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
+		if (ptr_chain)
+			flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
+
+		if (flattened_structs[base])
+		{
+			flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
+			if (meta)
+				meta->flattened_struct = target_type.basetype == SPIRType::Struct;
+		}
+
+		auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
+		if (meta)
+		{
+			meta->need_transpose = false;
+			meta->storage_is_packed = false;
+		}
+
+		auto basename = to_flattened_access_chain_expression(base);
+		auto ret = join(basename, "_", chain);
+		ParsedIR::sanitize_underscores(ret);
+		return ret;
+	}
+	else
+	{
+		AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
+		if (ptr_chain)
+			flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
+		return access_chain_internal(base, indices, count, flags, meta);
+	}
+}
+
+string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
+{
+	auto expr = type_to_glsl_constructor(type);
+	expr += '(';
+
+	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
+	{
+		if (i)
+			expr += ", ";
+
+		auto &member_type = get<SPIRType>(type.member_types[i]);
+		if (member_type.basetype == SPIRType::Struct)
+			expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type);
+		else
+			expr += to_flattened_struct_member(basename, type, i);
+	}
+	expr += ')';
+	return expr;
+}
+
+std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
+{
+	// Do not use to_expression as that will unflatten access chains.
+	string basename;
+	if (const auto *var = maybe_get<SPIRVariable>(id))
+		basename = to_name(var->self);
+	else if (const auto *expr = maybe_get<SPIRExpression>(id))
+		basename = expr->expression;
+	else
+		basename = to_expression(id);
+
+	return basename;
+}
+
+void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
+                                          const SmallVector<uint32_t> &indices)
+{
+	SmallVector<uint32_t> sub_indices = indices;
+	sub_indices.push_back(0);
+
+	auto *member_type = &type;
+	for (auto &index : indices)
+		member_type = &get<SPIRType>(member_type->member_types[index]);
+
+	for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
+	{
+		sub_indices.back() = i;
+		auto lhs = join(basename, "_", to_member_name(*member_type, i));
+		ParsedIR::sanitize_underscores(lhs);
+
+		if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
+		{
+			store_flattened_struct(lhs, rhs_id, type, sub_indices);
+		}
+		else
+		{
+			auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices);
+			statement(lhs, " = ", rhs, ";");
+		}
+	}
+}
+
+void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
+{
+	auto &type = expression_type(lhs_id);
+	auto basename = to_flattened_access_chain_expression(lhs_id);
+	store_flattened_struct(basename, value, type, {});
+}
+
+std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
+                                                 const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
+                                                 uint32_t /* array_stride */, bool need_transpose)
+{
+	if (!target_type.array.empty())
+		SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
+	else if (target_type.basetype == SPIRType::Struct)
+		return flattened_access_chain_struct(base, indices, count, target_type, offset);
+	else if (target_type.columns > 1)
+		return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
+	else
+		return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
+}
+
+std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
+                                                        const SPIRType &target_type, uint32_t offset)
+{
+	std::string expr;
+
+	if (backend.can_declare_struct_inline)
+	{
+		expr += type_to_glsl_constructor(target_type);
+		expr += "(";
+	}
+	else
+		expr += "{";
+
+	for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
+	{
+		if (i != 0)
+			expr += ", ";
+
+		const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
+		uint32_t member_offset = type_struct_member_offset(target_type, i);
+
+		// The access chain terminates at the struct, so we need to find matrix strides and row-major information
+		// ahead of time.
+		bool need_transpose = false;
+		uint32_t matrix_stride = 0;
+		if (member_type.columns > 1)
+		{
+			need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
+			matrix_stride = type_struct_member_matrix_stride(target_type, i);
+		}
+
+		auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
+		                                  0 /* array_stride */, need_transpose);
+
+		// Cannot forward transpositions, so resolve them here.
+		if (need_transpose)
+			expr += convert_row_major_matrix(tmp, member_type, 0, false);
+		else
+			expr += tmp;
+	}
+
+	expr += backend.can_declare_struct_inline ? ")" : "}";
+
+	return expr;
+}
+
+std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
+                                                        const SPIRType &target_type, uint32_t offset,
+                                                        uint32_t matrix_stride, bool need_transpose)
+{
+	assert(matrix_stride);
+	SPIRType tmp_type = target_type;
+	if (need_transpose)
+		swap(tmp_type.vecsize, tmp_type.columns);
+
+	std::string expr;
+
+	expr += type_to_glsl_constructor(tmp_type);
+	expr += "(";
+
+	for (uint32_t i = 0; i < tmp_type.columns; i++)
+	{
+		if (i != 0)
+			expr += ", ";
+
+		expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
+		                                      /* need_transpose= */ false);
+	}
+
+	expr += ")";
+
+	return expr;
+}
+
+std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
+                                                        const SPIRType &target_type, uint32_t offset,
+                                                        uint32_t matrix_stride, bool need_transpose)
+{
+	auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
+
+	auto buffer_name = to_name(expression_type(base).self);
+
+	if (need_transpose)
+	{
+		std::string expr;
+
+		if (target_type.vecsize > 1)
+		{
+			expr += type_to_glsl_constructor(target_type);
+			expr += "(";
+		}
+
+		for (uint32_t i = 0; i < target_type.vecsize; ++i)
+		{
+			if (i != 0)
+				expr += ", ";
+
+			uint32_t component_offset = result.second + i * matrix_stride;
+
+			assert(component_offset % (target_type.width / 8) == 0);
+			uint32_t index = component_offset / (target_type.width / 8);
+
+			expr += buffer_name;
+			expr += "[";
+			expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
+			expr += convert_to_string(index / 4);
+			expr += "]";
+
+			expr += vector_swizzle(1, index % 4);
+		}
+
+		if (target_type.vecsize > 1)
+		{
+			expr += ")";
+		}
+
+		return expr;
+	}
+	else
+	{
+		assert(result.second % (target_type.width / 8) == 0);
+		uint32_t index = result.second / (target_type.width / 8);
+
+		std::string expr;
+
+		expr += buffer_name;
+		expr += "[";
 		expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
 		expr += convert_to_string(index / 4);
 		expr += "]";
 
-		expr += vector_swizzle(target_type.vecsize, index % 4);
+		expr += vector_swizzle(target_type.vecsize, index % 4);
+
+		return expr;
+	}
+}
+
+std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
+    const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
+    bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
+{
+	// Start traversing type hierarchy at the proper non-pointer types.
+	const auto *type = &get_pointee_type(basetype);
+
+	std::string expr;
+
+	// Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
+	bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
+	uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
+	uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
+
+	for (uint32_t i = 0; i < count; i++)
+	{
+		uint32_t index = indices[i];
+
+		// Pointers
+		if (ptr_chain && i == 0)
+		{
+			// Here, the pointer type will be decorated with an array stride.
+			array_stride = get_decoration(basetype.self, DecorationArrayStride);
+			if (!array_stride)
+				SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
+
+			auto *constant = maybe_get<SPIRConstant>(index);
+			if (constant)
+			{
+				// Constant array access.
+				offset += constant->scalar() * array_stride;
+			}
+			else
+			{
+				// Dynamic array access.
+				if (array_stride % word_stride)
+				{
+					SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
+					                  "of a 4-component vector. "
+					                  "Likely culprit here is a float or vec2 array inside a push "
+					                  "constant block which is std430. "
+					                  "This cannot be flattened. Try using std140 layout instead.");
+				}
+
+				expr += to_enclosed_expression(index);
+				expr += " * ";
+				expr += convert_to_string(array_stride / word_stride);
+				expr += " + ";
+			}
+		}
+		// Arrays
+		else if (!type->array.empty())
+		{
+			auto *constant = maybe_get<SPIRConstant>(index);
+			if (constant)
+			{
+				// Constant array access.
+				offset += constant->scalar() * array_stride;
+			}
+			else
+			{
+				// Dynamic array access.
+				if (array_stride % word_stride)
+				{
+					SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
+					                  "of a 4-component vector. "
+					                  "Likely culprit here is a float or vec2 array inside a push "
+					                  "constant block which is std430. "
+					                  "This cannot be flattened. Try using std140 layout instead.");
+				}
+
+				expr += to_enclosed_expression(index, false);
+				expr += " * ";
+				expr += convert_to_string(array_stride / word_stride);
+				expr += " + ";
+			}
+
+			uint32_t parent_type = type->parent_type;
+			type = &get<SPIRType>(parent_type);
+
+			if (!type->array.empty())
+				array_stride = get_decoration(parent_type, DecorationArrayStride);
+		}
+		// For structs, the index refers to a constant, which indexes into the members.
+		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
+		else if (type->basetype == SPIRType::Struct)
+		{
+			index = evaluate_constant_u32(index);
+
+			if (index >= type->member_types.size())
+				SPIRV_CROSS_THROW("Member index is out of bounds!");
+
+			offset += type_struct_member_offset(*type, index);
+
+			auto &struct_type = *type;
+			type = &get<SPIRType>(type->member_types[index]);
+
+			if (type->columns > 1)
+			{
+				matrix_stride = type_struct_member_matrix_stride(struct_type, index);
+				row_major_matrix_needs_conversion =
+				    combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
+			}
+			else
+				row_major_matrix_needs_conversion = false;
+
+			if (!type->array.empty())
+				array_stride = type_struct_member_array_stride(struct_type, index);
+		}
+		// Matrix -> Vector
+		else if (type->columns > 1)
+		{
+			auto *constant = maybe_get<SPIRConstant>(index);
+			if (constant)
+			{
+				index = evaluate_constant_u32(index);
+				offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
+			}
+			else
+			{
+				uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
+				// Dynamic array access.
+				if (indexing_stride % word_stride)
+				{
+					SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
+					                  "4-component vector. "
+					                  "Likely culprit here is a row-major matrix being accessed dynamically. "
+					                  "This cannot be flattened. Try using std140 layout instead.");
+				}
+
+				expr += to_enclosed_expression(index, false);
+				expr += " * ";
+				expr += convert_to_string(indexing_stride / word_stride);
+				expr += " + ";
+			}
+
+			type = &get<SPIRType>(type->parent_type);
+		}
+		// Vector -> Scalar
+		else if (type->vecsize > 1)
+		{
+			auto *constant = maybe_get<SPIRConstant>(index);
+			if (constant)
+			{
+				index = evaluate_constant_u32(index);
+				offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
+			}
+			else
+			{
+				uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
+
+				// Dynamic array access.
+				if (indexing_stride % word_stride)
+				{
+					SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
+					                  "size of a 4-component vector. "
+					                  "This cannot be flattened in legacy targets.");
+				}
+
+				expr += to_enclosed_expression(index, false);
+				expr += " * ";
+				expr += convert_to_string(indexing_stride / word_stride);
+				expr += " + ";
+			}
+
+			type = &get<SPIRType>(type->parent_type);
+		}
+		else
+			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
+	}
+
+	if (need_transpose)
+		*need_transpose = row_major_matrix_needs_conversion;
+	if (out_matrix_stride)
+		*out_matrix_stride = matrix_stride;
+	if (out_array_stride)
+		*out_array_stride = array_stride;
+
+	return std::make_pair(expr, offset);
+}
+
+bool CompilerGLSL::should_dereference(uint32_t id)
+{
+	const auto &type = expression_type(id);
+	// Non-pointer expressions don't need to be dereferenced.
+	if (!type.pointer)
+		return false;
+
+	// Handles shouldn't be dereferenced either.
+	if (!expression_is_lvalue(id))
+		return false;
+
+	// If id is a variable but not a phi variable, we should not dereference it.
+	if (auto *var = maybe_get<SPIRVariable>(id))
+		return var->phi_variable;
+
+	if (auto *expr = maybe_get<SPIRExpression>(id))
+	{
+		// If id is an access chain, we should not dereference it.
+		if (expr->access_chain)
+			return false;
+
+		// If id is a forwarded copy of a variable pointer, we should not dereference it.
+		SPIRVariable *var = nullptr;
+		while (expr->loaded_from && expression_is_forwarded(expr->self))
+		{
+			auto &src_type = expression_type(expr->loaded_from);
+			// To be a copy, the pointer and its source expression must be the
+			// same type. Can't check type.self, because for some reason that's
+			// usually the base type with pointers stripped off. This check is
+			// complex enough that I've hoisted it out of the while condition.
+			if (src_type.pointer != type.pointer || src_type.pointer_depth != type.pointer_depth ||
+			    src_type.parent_type != type.parent_type)
+				break;
+			if ((var = maybe_get<SPIRVariable>(expr->loaded_from)))
+				break;
+			if (!(expr = maybe_get<SPIRExpression>(expr->loaded_from)))
+				break;
+		}
+
+		return !var || var->phi_variable;
+	}
+
+	// Otherwise, we should dereference this pointer expression.
+	return true;
+}
+
+bool CompilerGLSL::should_forward(uint32_t id) const
+{
+	// If id is a variable we will try to forward it regardless of force_temporary check below
+	// This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
+
+	auto *var = maybe_get<SPIRVariable>(id);
+	if (var)
+	{
+		// Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
+		return !(has_decoration(id, DecorationBuiltIn) && has_decoration(id, DecorationVolatile));
+	}
+
+	// For debugging emit temporary variables for all expressions
+	if (options.force_temporary)
+		return false;
+
+	// If an expression carries enough dependencies we need to stop forwarding at some point,
+	// or we explode compilers. There are usually limits to how much we can nest expressions.
+	auto *expr = maybe_get<SPIRExpression>(id);
+	const uint32_t max_expression_dependencies = 64;
+	if (expr && expr->expression_dependencies.size() >= max_expression_dependencies)
+		return false;
+
+	if (expr && expr->loaded_from
+		&& has_decoration(expr->loaded_from, DecorationBuiltIn)
+		&& has_decoration(expr->loaded_from, DecorationVolatile))
+	{
+		// Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation.
+		return false;
+	}
+
+	// Immutable expression can always be forwarded.
+	if (is_immutable(id))
+		return true;
+
+	return false;
+}
+
+bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
+{
+	// Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
+	return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
+}
+
+void CompilerGLSL::track_expression_read(uint32_t id)
+{
+	switch (ir.ids[id].get_type())
+	{
+	case TypeExpression:
+	{
+		auto &e = get<SPIRExpression>(id);
+		for (auto implied_read : e.implied_read_expressions)
+			track_expression_read(implied_read);
+		break;
+	}
+
+	case TypeAccessChain:
+	{
+		auto &e = get<SPIRAccessChain>(id);
+		for (auto implied_read : e.implied_read_expressions)
+			track_expression_read(implied_read);
+		break;
+	}
+
+	default:
+		break;
+	}
+
+	// If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
+	// In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
+	if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
+	{
+		auto &v = expression_usage_counts[id];
+		v++;
+
+		// If we create an expression outside a loop,
+		// but access it inside a loop, we're implicitly reading it multiple times.
+		// If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
+		// working inside the backend compiler.
+		if (expression_read_implies_multiple_reads(id))
+			v++;
+
+		if (v >= 2)
+		{
+			//if (v == 2)
+			//    fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
+
+			// Force a recompile after this pass to avoid forwarding this variable.
+			force_temporary_and_recompile(id);
+		}
+	}
+}
+
+bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
+{
+	if (forced_temporaries.find(id) != end(forced_temporaries))
+		return false;
+
+	for (uint32_t i = 0; i < num_args; i++)
+		if (!should_forward(args[i]))
+			return false;
+
+	// We need to forward globals as well.
+	if (!pure)
+	{
+		for (auto global : global_variables)
+			if (!should_forward(global))
+				return false;
+		for (auto aliased : aliased_variables)
+			if (!should_forward(aliased))
+				return false;
+	}
+
+	return true;
+}
+
+void CompilerGLSL::register_impure_function_call()
+{
+	// Impure functions can modify globals and aliased variables, so invalidate them as well.
+	for (auto global : global_variables)
+		flush_dependees(get<SPIRVariable>(global));
+	for (auto aliased : aliased_variables)
+		flush_dependees(get<SPIRVariable>(aliased));
+}
+
+void CompilerGLSL::register_call_out_argument(uint32_t id)
+{
+	register_write(id);
+
+	auto *var = maybe_get<SPIRVariable>(id);
+	if (var)
+		flush_variable_declaration(var->self);
+}
+
+string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
+{
+	// These variables are always function local,
+	// so make sure we emit the variable without storage qualifiers.
+	// Some backends will inject custom variables locally in a function
+	// with a storage qualifier which is not function-local.
+	auto old_storage = var.storage;
+	var.storage = StorageClassFunction;
+	auto expr = variable_decl(var);
+	var.storage = old_storage;
+	return expr;
+}
+
+void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
+{
+	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
+	if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
+	{
+		auto &type = get<SPIRType>(var.basetype);
+		auto &flags = get_decoration_bitset(var.self);
+		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
+		flushed_phi_variables.insert(var.self);
+	}
+}
+
+void CompilerGLSL::flush_variable_declaration(uint32_t id)
+{
+	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
+	auto *var = maybe_get<SPIRVariable>(id);
+	if (var && var->deferred_declaration)
+	{
+		string initializer;
+		if (options.force_zero_initialized_variables &&
+		    (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
+		     var->storage == StorageClassPrivate) &&
+		    !var->initializer && type_can_zero_initialize(get_variable_data_type(*var)))
+		{
+			initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var)));
+		}
+
+		statement(variable_decl_function_local(*var), initializer, ";");
+		var->deferred_declaration = false;
+	}
+	if (var)
+	{
+		emit_variable_temporary_copies(*var);
+	}
+}
+
+bool CompilerGLSL::remove_duplicate_swizzle(string &op)
+{
+	auto pos = op.find_last_of('.');
+	if (pos == string::npos || pos == 0)
+		return false;
+
+	string final_swiz = op.substr(pos + 1, string::npos);
+
+	if (backend.swizzle_is_function)
+	{
+		if (final_swiz.size() < 2)
+			return false;
+
+		if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
+			final_swiz.erase(final_swiz.size() - 2, string::npos);
+		else
+			return false;
+	}
+
+	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
+	// If so, and previous swizzle is of same length,
+	// we can drop the final swizzle altogether.
+	for (uint32_t i = 0; i < final_swiz.size(); i++)
+	{
+		static const char expected[] = { 'x', 'y', 'z', 'w' };
+		if (i >= 4 || final_swiz[i] != expected[i])
+			return false;
+	}
+
+	auto prevpos = op.find_last_of('.', pos - 1);
+	if (prevpos == string::npos)
+		return false;
+
+	prevpos++;
+
+	// Make sure there are only swizzles here ...
+	for (auto i = prevpos; i < pos; i++)
+	{
+		if (op[i] < 'w' || op[i] > 'z')
+		{
+			// If swizzles are foo.xyz() like in C++ backend for example, check for that.
+			if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
+				break;
+			return false;
+		}
+	}
+
+	// If original swizzle is large enough, just carve out the components we need.
+	// E.g. foobar.wyx.xy will turn into foobar.wy.
+	if (pos - prevpos >= final_swiz.size())
+	{
+		op.erase(prevpos + final_swiz.size(), string::npos);
+
+		// Add back the function call ...
+		if (backend.swizzle_is_function)
+			op += "()";
+	}
+	return true;
+}
+
+// Optimizes away vector swizzles where we have something like
+// vec3 foo;
+// foo.xyz <-- swizzle expression does nothing.
+// This is a very common pattern after OpCompositeCombine.
+bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
+{
+	auto pos = op.find_last_of('.');
+	if (pos == string::npos || pos == 0)
+		return false;
+
+	string final_swiz = op.substr(pos + 1, string::npos);
+
+	if (backend.swizzle_is_function)
+	{
+		if (final_swiz.size() < 2)
+			return false;
+
+		if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
+			final_swiz.erase(final_swiz.size() - 2, string::npos);
+		else
+			return false;
+	}
+
+	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
+	// If so, and previous swizzle is of same length,
+	// we can drop the final swizzle altogether.
+	for (uint32_t i = 0; i < final_swiz.size(); i++)
+	{
+		static const char expected[] = { 'x', 'y', 'z', 'w' };
+		if (i >= 4 || final_swiz[i] != expected[i])
+			return false;
+	}
+
+	auto &type = expression_type(base);
+
+	// Sanity checking ...
+	assert(type.columns == 1 && type.array.empty());
+
+	if (type.vecsize == final_swiz.size())
+		op.erase(pos, string::npos);
+	return true;
+}
+
+string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
+{
+	ID base = 0;
+	string op;
+	string subop;
+
+	// Can only merge swizzles for vectors.
+	auto &type = get<SPIRType>(return_type);
+	bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
+	bool swizzle_optimization = false;
+
+	for (uint32_t i = 0; i < length; i++)
+	{
+		auto *e = maybe_get<SPIRExpression>(elems[i]);
+
+		// If we're merging another scalar which belongs to the same base
+		// object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
+		if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
+		{
+			// Only supposed to be used for vector swizzle -> scalar.
+			assert(!e->expression.empty() && e->expression.front() == '.');
+			subop += e->expression.substr(1, string::npos);
+			swizzle_optimization = true;
+		}
+		else
+		{
+			// We'll likely end up with duplicated swizzles, e.g.
+			// foobar.xyz.xyz from patterns like
+			// OpVectorShuffle
+			// OpCompositeExtract x 3
+			// OpCompositeConstruct 3x + other scalar.
+			// Just modify op in-place.
+			if (swizzle_optimization)
+			{
+				if (backend.swizzle_is_function)
+					subop += "()";
+
+				// Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
+				// The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
+				// We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
+				// Essentially, we can only remove one set of swizzles, since that's what we have control over ...
+				// Case 1:
+				//  foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
+				//               foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
+				// Case 2:
+				//  foo.xyz: Duplicate swizzle won't kick in.
+				//           If foo is vec3, we can remove xyz, giving just foo.
+				if (!remove_duplicate_swizzle(subop))
+					remove_unity_swizzle(base, subop);
+
+				// Strips away redundant parens if we created them during component extraction.
+				strip_enclosed_expression(subop);
+				swizzle_optimization = false;
+				op += subop;
+			}
+			else
+				op += subop;
+
+			if (i)
+				op += ", ";
+
+			bool uses_buffer_offset =
+			    type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset);
+			subop = to_composite_constructor_expression(elems[i], uses_buffer_offset);
+		}
+
+		base = e ? e->base_expression : ID(0);
+	}
+
+	if (swizzle_optimization)
+	{
+		if (backend.swizzle_is_function)
+			subop += "()";
+
+		if (!remove_duplicate_swizzle(subop))
+			remove_unity_swizzle(base, subop);
+		// Strips away redundant parens if we created them during component extraction.
+		strip_enclosed_expression(subop);
+	}
+
+	op += subop;
+	return op;
+}
+
+bool CompilerGLSL::skip_argument(uint32_t id) const
+{
+	if (!combined_image_samplers.empty() || !options.vulkan_semantics)
+	{
+		auto &type = expression_type(id);
+		if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
+			return true;
+	}
+	return false;
+}
+
+bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
+{
+	// Do this with strings because we have a very clear pattern we can check for and it avoids
+	// adding lots of special cases to the code emission.
+	if (rhs.size() < lhs.size() + 3)
+		return false;
+
+	// Do not optimize matrices. They are a bit awkward to reason about in general
+	// (in which order does operation happen?), and it does not work on MSL anyways.
+	if (type.vecsize > 1 && type.columns > 1)
+		return false;
+
+	auto index = rhs.find(lhs);
+	if (index != 0)
+		return false;
+
+	// TODO: Shift operators, but it's not important for now.
+	auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
+	if (op != lhs.size() + 1)
+		return false;
+
+	// Check that the op is followed by space. This excludes && and ||.
+	if (rhs[op + 1] != ' ')
+		return false;
+
+	char bop = rhs[op];
+	auto expr = rhs.substr(lhs.size() + 3);
+	// Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
+	// Find some common patterns which are equivalent.
+	if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
+		statement(lhs, bop, bop, ";");
+	else
+		statement(lhs, " ", bop, "= ", expr, ";");
+	return true;
+}
+
+void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
+{
+	if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
+		return;
+
+	assert(current_emitting_block);
+	current_emitting_block->invalidate_expressions.push_back(expr);
+}
+
+void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
+{
+	current_emitting_block = &block;
+
+	if (backend.requires_relaxed_precision_analysis)
+	{
+		// If PHI variables are consumed in unexpected precision contexts, copy them here.
+		for (auto &phi : block.phi_variables)
+		{
+			auto itr = temporary_to_mirror_precision_alias.find(phi.function_variable);
+			if (itr != temporary_to_mirror_precision_alias.end())
+			{
+				// Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
+				// so it helps to have handle_instruction_precision() on the outside of emit_instruction().
+				EmbeddedInstruction inst;
+				inst.op = OpCopyObject;
+				inst.length = 3;
+				inst.ops.push_back(expression_type_id(itr->first));
+				inst.ops.push_back(itr->second);
+				inst.ops.push_back(itr->first);
+				emit_instruction(inst);
+			}
+		}
+	}
+
+	for (auto &op : block.ops)
+	{
+		auto temporary_copy = handle_instruction_precision(op);
+		emit_instruction(op);
+		if (temporary_copy.dst_id)
+		{
+			// Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject,
+			// so it helps to have handle_instruction_precision() on the outside of emit_instruction().
+			EmbeddedInstruction inst;
+			inst.op = OpCopyObject;
+			inst.length = 3;
+			inst.ops.push_back(expression_type_id(temporary_copy.src_id));
+			inst.ops.push_back(temporary_copy.dst_id);
+			inst.ops.push_back(temporary_copy.src_id);
+
+			// Never attempt to hoist mirrored temporaries.
+			// They are hoisted in lock-step with their parents.
+			block_temporary_hoisting = true;
+			emit_instruction(inst);
+			block_temporary_hoisting = false;
+		}
+	}
+
+	current_emitting_block = nullptr;
+}
+
+void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
+{
+	// Allow trivially forwarded expressions like OpLoad or trivial shuffles,
+	// these will be marked as having suppressed usage tracking.
+	// Our only concern is to make sure arithmetic operations are done in similar ways.
+	if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
+	    forced_invariant_temporaries.count(expr.self) == 0)
+	{
+		force_temporary_and_recompile(expr.self);
+		forced_invariant_temporaries.insert(expr.self);
+
+		for (auto &dependent : expr.expression_dependencies)
+			disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
+	}
+}
+
+void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
+{
+	// Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
+	// this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
+	// in one translation unit, but not another, e.g. due to multiple use of an expression.
+	// This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
+	// expressions to be temporaries.
+	// It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
+	// for all reasonable uses of invariant.
+	if (!has_decoration(store_id, DecorationInvariant))
+		return;
+
+	auto *expr = maybe_get<SPIRExpression>(value_id);
+	if (!expr)
+		return;
+
+	disallow_forwarding_in_expression_chain(*expr);
+}
+
+void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
+{
+	auto rhs = to_pointer_expression(rhs_expression);
+
+	// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
+	if (!rhs.empty())
+	{
+		handle_store_to_invariant_variable(lhs_expression, rhs_expression);
+
+		if (!unroll_array_to_complex_store(lhs_expression, rhs_expression))
+		{
+			auto lhs = to_dereferenced_expression(lhs_expression);
+			if (has_decoration(lhs_expression, DecorationNonUniform))
+				convert_non_uniform_expression(lhs, lhs_expression);
+
+			// We might need to cast in order to store to a builtin.
+			cast_to_variable_store(lhs_expression, rhs, expression_type(rhs_expression));
+
+			// Tries to optimize assignments like "<lhs> = <lhs> op expr".
+			// While this is purely cosmetic, this is important for legacy ESSL where loop
+			// variable increments must be in either i++ or i += const-expr.
+			// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
+			if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
+				statement(lhs, " = ", rhs, ";");
+		}
+		register_write(lhs_expression);
+	}
+}
+
+uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
+{
+	if (instr.length < 3)
+		return 32;
+
+	auto *ops = stream(instr);
+
+	switch (instr.op)
+	{
+	case OpSConvert:
+	case OpConvertSToF:
+	case OpUConvert:
+	case OpConvertUToF:
+	case OpIEqual:
+	case OpINotEqual:
+	case OpSLessThan:
+	case OpSLessThanEqual:
+	case OpSGreaterThan:
+	case OpSGreaterThanEqual:
+	case OpULessThan:
+	case OpULessThanEqual:
+	case OpUGreaterThan:
+	case OpUGreaterThanEqual:
+		return expression_type(ops[2]).width;
+
+	default:
+	{
+		// We can look at result type which is more robust.
+		auto *type = maybe_get<SPIRType>(ops[0]);
+		if (type && type_is_integral(*type))
+			return type->width;
+		else
+			return 32;
+	}
+	}
+}
+
+uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
+{
+	if (length < 1)
+		return 32;
+
+	switch (op)
+	{
+	case GLSLstd450SAbs:
+	case GLSLstd450SSign:
+	case GLSLstd450UMin:
+	case GLSLstd450SMin:
+	case GLSLstd450UMax:
+	case GLSLstd450SMax:
+	case GLSLstd450UClamp:
+	case GLSLstd450SClamp:
+	case GLSLstd450FindSMsb:
+	case GLSLstd450FindUMsb:
+		return expression_type(ops[0]).width;
+
+	default:
+	{
+		// We don't need to care about other opcodes, just return 32.
+		return 32;
+	}
+	}
+}
+
+void CompilerGLSL::forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length)
+{
+	// Only GLSL supports RelaxedPrecision directly.
+	// We cannot implement this in HLSL or MSL because it is tied to the type system.
+	// In SPIR-V, everything must masquerade as 32-bit.
+	if (!backend.requires_relaxed_precision_analysis)
+		return;
+
+	auto input_precision = analyze_expression_precision(args, length);
+
+	// For expressions which are loaded or directly forwarded, we inherit mediump implicitly.
+	// For dst_id to be analyzed properly, it must inherit any relaxed precision decoration from src_id.
+	if (input_precision == Options::Mediump)
+		set_decoration(dst_id, DecorationRelaxedPrecision);
+}
+
+CompilerGLSL::Options::Precision CompilerGLSL::analyze_expression_precision(const uint32_t *args, uint32_t length) const
+{
+	// Now, analyze the precision at which the arguments would run.
+	// GLSL rules are such that the precision used to evaluate an expression is equal to the highest precision
+	// for the inputs. Constants do not have inherent precision and do not contribute to this decision.
+	// If all inputs are constants, they inherit precision from outer expressions, including an l-value.
+	// In this case, we'll have to force a temporary for dst_id so that we can bind the constant expression with
+	// correct precision.
+	bool expression_has_highp = false;
+	bool expression_has_mediump = false;
 
-		return expr;
+	for (uint32_t i = 0; i < length; i++)
+	{
+		uint32_t arg = args[i];
+
+		auto handle_type = ir.ids[arg].get_type();
+		if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef)
+			continue;
+
+		if (has_decoration(arg, DecorationRelaxedPrecision))
+			expression_has_mediump = true;
+		else
+			expression_has_highp = true;
 	}
+
+	if (expression_has_highp)
+		return Options::Highp;
+	else if (expression_has_mediump)
+		return Options::Mediump;
+	else
+		return Options::DontCare;
 }
 
-std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
-    const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
-    bool *need_transpose, uint32_t *out_matrix_stride, bool ptr_chain)
+void CompilerGLSL::analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length)
 {
-	// Start traversing type hierarchy at the proper non-pointer types.
-	const auto *type = &get_pointee_type(basetype);
+	if (!backend.requires_relaxed_precision_analysis)
+		return;
 
-	// This holds the type of the current pointer which we are traversing through.
-	// We always start out from a struct type which is the block.
-	// This is primarily used to reflect the array strides and matrix strides later.
-	// For the first access chain index, type_id won't be needed, so just keep it as 0, it will be set
-	// accordingly as members of structs are accessed.
-	assert(type->basetype == SPIRType::Struct);
-	uint32_t type_id = 0;
+	auto &type = get<SPIRType>(type_id);
 
-	std::string expr;
+	// RelaxedPrecision only applies to 32-bit values.
+	if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt)
+		return;
 
-	// Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
-	bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
-	uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
+	bool operation_is_highp = !has_decoration(dst_id, DecorationRelaxedPrecision);
 
-	for (uint32_t i = 0; i < count; i++)
+	auto input_precision = analyze_expression_precision(args, length);
+	if (input_precision == Options::DontCare)
 	{
-		uint32_t index = indices[i];
+		consume_temporary_in_precision_context(type_id, dst_id, input_precision);
+		return;
+	}
 
-		// Pointers
-		if (ptr_chain && i == 0)
+	// In SPIR-V and GLSL, the semantics are flipped for how relaxed precision is determined.
+	// In SPIR-V, the operation itself marks RelaxedPrecision, meaning that inputs can be truncated to 16-bit.
+	// However, if the expression is not, inputs must be expanded to 32-bit first,
+	// since the operation must run at high precision.
+	// This is the awkward part, because if we have mediump inputs, or expressions which derived from mediump,
+	// we might have to forcefully bind the source IDs to highp temporaries. This is done by clearing decorations
+	// and forcing temporaries. Similarly for mediump operations. We bind highp expressions to mediump variables.
+	if ((operation_is_highp && input_precision == Options::Mediump) ||
+	    (!operation_is_highp && input_precision == Options::Highp))
+	{
+		auto precision = operation_is_highp ? Options::Highp : Options::Mediump;
+		for (uint32_t i = 0; i < length; i++)
 		{
-			// Here, the pointer type will be decorated with an array stride.
-			uint32_t array_stride = get_decoration(basetype.self, DecorationArrayStride);
-			if (!array_stride)
-				SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
+			// Rewrites the opcode so that we consume an ID in correct precision context.
+			// This is pretty hacky, but it's the most straight forward way of implementing this without adding
+			// lots of extra passes to rewrite all code blocks.
+			args[i] = consume_temporary_in_precision_context(expression_type_id(args[i]), args[i], precision);
+		}
+	}
+}
 
-			auto *constant = maybe_get<SPIRConstant>(index);
-			if (constant)
-			{
-				// Constant array access.
-				offset += constant->scalar() * array_stride;
-			}
-			else
-			{
-				// Dynamic array access.
-				if (array_stride % word_stride)
-				{
-					SPIRV_CROSS_THROW(
-					    "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. "
-					    "Likely culprit here is a float or vec2 array inside a push constant block which is std430. "
-					    "This cannot be flattened. Try using std140 layout instead.");
-				}
+// This is probably not exhaustive ...
+static bool opcode_is_precision_sensitive_operation(Op op)
+{
+	switch (op)
+	{
+	case OpFAdd:
+	case OpFSub:
+	case OpFMul:
+	case OpFNegate:
+	case OpIAdd:
+	case OpISub:
+	case OpIMul:
+	case OpSNegate:
+	case OpFMod:
+	case OpFDiv:
+	case OpFRem:
+	case OpSMod:
+	case OpSDiv:
+	case OpSRem:
+	case OpUMod:
+	case OpUDiv:
+	case OpVectorTimesMatrix:
+	case OpMatrixTimesVector:
+	case OpMatrixTimesMatrix:
+	case OpDPdx:
+	case OpDPdy:
+	case OpDPdxCoarse:
+	case OpDPdyCoarse:
+	case OpDPdxFine:
+	case OpDPdyFine:
+	case OpFwidth:
+	case OpFwidthCoarse:
+	case OpFwidthFine:
+	case OpVectorTimesScalar:
+	case OpMatrixTimesScalar:
+	case OpOuterProduct:
+	case OpFConvert:
+	case OpSConvert:
+	case OpUConvert:
+	case OpConvertSToF:
+	case OpConvertUToF:
+	case OpConvertFToU:
+	case OpConvertFToS:
+		return true;
 
-				expr += to_enclosed_expression(index);
-				expr += " * ";
-				expr += convert_to_string(array_stride / word_stride);
-				expr += " + ";
-			}
-			// Type ID is unchanged.
-		}
-		// Arrays
-		else if (!type->array.empty())
-		{
-			// Here, the type_id will be a type ID for the array type itself.
-			uint32_t array_stride = get_decoration(type_id, DecorationArrayStride);
-			if (!array_stride)
-				SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
+	default:
+		return false;
+	}
+}
 
-			auto *constant = maybe_get<SPIRConstant>(index);
-			if (constant)
-			{
-				// Constant array access.
-				offset += constant->scalar() * array_stride;
-			}
-			else
-			{
-				// Dynamic array access.
-				if (array_stride % word_stride)
-				{
-					SPIRV_CROSS_THROW(
-					    "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. "
-					    "Likely culprit here is a float or vec2 array inside a push constant block which is std430. "
-					    "This cannot be flattened. Try using std140 layout instead.");
-				}
+// Instructions which just load data but don't do any arithmetic operation should just inherit the decoration.
+// SPIR-V doesn't require this, but it's somewhat implied it has to work this way, relaxed precision is only
+// relevant when operating on the IDs, not when shuffling things around.
+static bool opcode_is_precision_forwarding_instruction(Op op, uint32_t &arg_count)
+{
+	switch (op)
+	{
+	case OpLoad:
+	case OpAccessChain:
+	case OpInBoundsAccessChain:
+	case OpCompositeExtract:
+	case OpVectorExtractDynamic:
+	case OpSampledImage:
+	case OpImage:
+	case OpCopyObject:
 
-				expr += to_enclosed_expression(index, false);
-				expr += " * ";
-				expr += convert_to_string(array_stride / word_stride);
-				expr += " + ";
-			}
+	case OpImageRead:
+	case OpImageFetch:
+	case OpImageSampleImplicitLod:
+	case OpImageSampleProjImplicitLod:
+	case OpImageSampleDrefImplicitLod:
+	case OpImageSampleProjDrefImplicitLod:
+	case OpImageSampleExplicitLod:
+	case OpImageSampleProjExplicitLod:
+	case OpImageSampleDrefExplicitLod:
+	case OpImageSampleProjDrefExplicitLod:
+	case OpImageGather:
+	case OpImageDrefGather:
+	case OpImageSparseRead:
+	case OpImageSparseFetch:
+	case OpImageSparseSampleImplicitLod:
+	case OpImageSparseSampleProjImplicitLod:
+	case OpImageSparseSampleDrefImplicitLod:
+	case OpImageSparseSampleProjDrefImplicitLod:
+	case OpImageSparseSampleExplicitLod:
+	case OpImageSparseSampleProjExplicitLod:
+	case OpImageSparseSampleDrefExplicitLod:
+	case OpImageSparseSampleProjDrefExplicitLod:
+	case OpImageSparseGather:
+	case OpImageSparseDrefGather:
+		arg_count = 1;
+		return true;
 
-			uint32_t parent_type = type->parent_type;
-			type = &get<SPIRType>(parent_type);
-			type_id = parent_type;
+	case OpVectorShuffle:
+		arg_count = 2;
+		return true;
 
-			// Type ID now refers to the array type with one less dimension.
-		}
-		// For structs, the index refers to a constant, which indexes into the members.
-		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
-		else if (type->basetype == SPIRType::Struct)
-		{
-			index = get<SPIRConstant>(index).scalar();
+	case OpCompositeConstruct:
+		return true;
 
-			if (index >= type->member_types.size())
-				SPIRV_CROSS_THROW("Member index is out of bounds!");
+	default:
+		break;
+	}
 
-			offset += type_struct_member_offset(*type, index);
-			type_id = type->member_types[index];
+	return false;
+}
 
-			auto &struct_type = *type;
-			type = &get<SPIRType>(type->member_types[index]);
+CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Instruction &instruction)
+{
+	auto ops = stream_mutable(instruction);
+	auto opcode = static_cast<Op>(instruction.op);
+	uint32_t length = instruction.length;
 
-			if (type->columns > 1)
-			{
-				matrix_stride = type_struct_member_matrix_stride(struct_type, index);
-				row_major_matrix_needs_conversion =
-				    combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
-			}
-			else
-				row_major_matrix_needs_conversion = false;
-		}
-		// Matrix -> Vector
-		else if (type->columns > 1)
+	if (backend.requires_relaxed_precision_analysis)
+	{
+		if (length > 2)
 		{
-			auto *constant = maybe_get<SPIRConstant>(index);
-			if (constant)
-			{
-				index = get<SPIRConstant>(index).scalar();
-				offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
-			}
-			else
-			{
-				uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
-				// Dynamic array access.
-				if (indexing_stride % word_stride)
-				{
-					SPIRV_CROSS_THROW(
-					    "Matrix stride for dynamic indexing must be divisible by the size of a 4-component vector. "
-					    "Likely culprit here is a row-major matrix being accessed dynamically. "
-					    "This cannot be flattened. Try using std140 layout instead.");
-				}
+			uint32_t forwarding_length = length - 2;
 
-				expr += to_enclosed_expression(index, false);
-				expr += " * ";
-				expr += convert_to_string(indexing_stride / word_stride);
-				expr += " + ";
-			}
+			if (opcode_is_precision_sensitive_operation(opcode))
+				analyze_precision_requirements(ops[0], ops[1], &ops[2], forwarding_length);
+			else if (opcode == OpExtInst && length >= 5 && get<SPIRExtension>(ops[2]).ext == SPIRExtension::GLSL)
+				analyze_precision_requirements(ops[0], ops[1], &ops[4], forwarding_length - 2);
+			else if (opcode_is_precision_forwarding_instruction(opcode, forwarding_length))
+				forward_relaxed_precision(ops[1], &ops[2], forwarding_length);
+		}
 
-			uint32_t parent_type = type->parent_type;
-			type = &get<SPIRType>(type->parent_type);
-			type_id = parent_type;
+		uint32_t result_type = 0, result_id = 0;
+		if (instruction_to_result_type(result_type, result_id, opcode, ops, length))
+		{
+			auto itr = temporary_to_mirror_precision_alias.find(ops[1]);
+			if (itr != temporary_to_mirror_precision_alias.end())
+				return { itr->second, itr->first };
 		}
-		// Vector -> Scalar
-		else if (type->vecsize > 1)
+	}
+
+	return {};
+}
+
+void CompilerGLSL::emit_instruction(const Instruction &instruction)
+{
+	auto ops = stream(instruction);
+	auto opcode = static_cast<Op>(instruction.op);
+	uint32_t length = instruction.length;
+
+#define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
+#define GLSL_BOP_CAST(op, type) \
+	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, \
+	                    opcode_is_sign_invariant(opcode), implicit_integer_promotion)
+#define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
+#define GLSL_UOP_CAST(op) emit_unary_op_cast(ops[0], ops[1], ops[2], #op)
+#define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
+#define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
+#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
+#define GLSL_BFOP_CAST(op, type) \
+	emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
+#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
+#define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
+
+	// If we need to do implicit bitcasts, make sure we do it with the correct type.
+	uint32_t integer_width = get_integer_width_for_instruction(instruction);
+	auto int_type = to_signed_basetype(integer_width);
+	auto uint_type = to_unsigned_basetype(integer_width);
+
+	// Handle C implicit integer promotion rules.
+	// If we get implicit promotion to int, need to make sure we cast by value to intended return type,
+	// otherwise, future sign-dependent operations and bitcasts will break.
+	bool implicit_integer_promotion = integer_width < 32 && backend.implicit_c_integer_promotion_rules &&
+	                                  opcode_can_promote_integer_implicitly(opcode) &&
+	                                  get<SPIRType>(ops[0]).vecsize == 1;
+
+	opcode = get_remapped_spirv_op(opcode);
+
+	switch (opcode)
+	{
+	// Dealing with memory
+	case OpLoad:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t ptr = ops[2];
+
+		flush_variable_declaration(ptr);
+
+		// If we're loading from memory that cannot be changed by the shader,
+		// just forward the expression directly to avoid needless temporaries.
+		// If an expression is mutable and forwardable, we speculate that it is immutable.
+		bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
+
+		// If loading a non-native row-major matrix, mark the expression as need_transpose.
+		bool need_transpose = false;
+		bool old_need_transpose = false;
+
+		auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
+
+		if (forward)
 		{
-			auto *constant = maybe_get<SPIRConstant>(index);
-			if (constant)
+			// If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
+			// taking the expression.
+			if (ptr_expression && ptr_expression->need_transpose)
 			{
-				index = get<SPIRConstant>(index).scalar();
-				offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
+				old_need_transpose = true;
+				ptr_expression->need_transpose = false;
+				need_transpose = true;
 			}
-			else
-			{
-				uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
-
-				// Dynamic array access.
-				if (indexing_stride % word_stride)
-				{
-					SPIRV_CROSS_THROW(
-					    "Stride for dynamic vector indexing must be divisible by the size of a 4-component vector. "
-					    "This cannot be flattened in legacy targets.");
-				}
+			else if (is_non_native_row_major_matrix(ptr))
+				need_transpose = true;
+		}
 
-				expr += to_enclosed_expression(index, false);
-				expr += " * ";
-				expr += convert_to_string(indexing_stride / word_stride);
-				expr += " + ";
-			}
+		// If we are forwarding this load,
+		// don't register the read to access chain here, defer that to when we actually use the expression,
+		// using the add_implied_read_expression mechanism.
+		string expr;
 
-			uint32_t parent_type = type->parent_type;
-			type = &get<SPIRType>(type->parent_type);
-			type_id = parent_type;
+		bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
+		bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
+		if (forward || (!is_packed && !is_remapped))
+		{
+			// For the simple case, we do not need to deal with repacking.
+			expr = to_dereferenced_expression(ptr, false);
 		}
 		else
-			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
-	}
+		{
+			// If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
+			// storing the expression to a temporary.
+			expr = to_unpacked_expression(ptr);
+		}
 
-	if (need_transpose)
-		*need_transpose = row_major_matrix_needs_conversion;
-	if (out_matrix_stride)
-		*out_matrix_stride = matrix_stride;
+		auto &type = get<SPIRType>(result_type);
+		auto &expr_type = expression_type(ptr);
 
-	return std::make_pair(expr, offset);
-}
+		// If the expression has more vector components than the result type, insert
+		// a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
+		// happen with e.g. the MSL backend replacing the type of an input variable.
+		if (expr_type.vecsize > type.vecsize)
+			expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
 
-bool CompilerGLSL::should_dereference(uint32_t id)
-{
-	const auto &type = expression_type(id);
-	// Non-pointer expressions don't need to be dereferenced.
-	if (!type.pointer)
-		return false;
+		if (forward && ptr_expression)
+			ptr_expression->need_transpose = old_need_transpose;
 
-	// Handles shouldn't be dereferenced either.
-	if (!expression_is_lvalue(id))
-		return false;
+		// We might need to cast in order to load from a builtin.
+		cast_from_variable_load(ptr, expr, type);
 
-	// If id is a variable but not a phi variable, we should not dereference it.
-	if (auto *var = maybe_get<SPIRVariable>(id))
-		return var->phi_variable;
+		if (forward && ptr_expression)
+			ptr_expression->need_transpose = false;
 
-	// If id is an access chain, we should not dereference it.
-	if (auto *expr = maybe_get<SPIRExpression>(id))
-		return !expr->access_chain;
+		// We might be trying to load a gl_Position[N], where we should be
+		// doing float4[](gl_in[i].gl_Position, ...) instead.
+		// Similar workarounds are required for input arrays in tessellation.
+		// Also, loading from gl_SampleMask array needs special unroll.
+		unroll_array_from_complex_load(id, ptr, expr);
 
-	// Otherwise, we should dereference this pointer expression.
-	return true;
-}
+		if (!type_is_opaque_value(type) && has_decoration(ptr, DecorationNonUniform))
+		{
+			// If we're loading something non-opaque, we need to handle non-uniform descriptor access.
+			convert_non_uniform_expression(expr, ptr);
+		}
 
-bool CompilerGLSL::should_forward(uint32_t id)
-{
-	// If id is a variable we will try to forward it regardless of force_temporary check below
-	// This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
-	auto *var = maybe_get<SPIRVariable>(id);
-	if (var && var->forwardable)
-		return true;
+		if (forward && ptr_expression)
+			ptr_expression->need_transpose = old_need_transpose;
 
-	// For debugging emit temporary variables for all expressions
-	if (options.force_temporary)
-		return false;
+		bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
 
-	// Immutable expression can always be forwarded.
-	if (is_immutable(id))
-		return true;
+		if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
+			rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
 
-	return false;
-}
+		// By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
+		// However, if we try to load a complex, composite object from a flattened buffer,
+		// we should avoid emitting the same code over and over and lower the result to a temporary.
+		bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
 
-void CompilerGLSL::track_expression_read(uint32_t id)
-{
-	switch (ir.ids[id].get_type())
-	{
-	case TypeExpression:
-	{
-		auto &e = get<SPIRExpression>(id);
-		for (auto implied_read : e.implied_read_expressions)
-			track_expression_read(implied_read);
-		break;
-	}
+		SPIRExpression *e = nullptr;
+		if (!forward && expression_is_non_value_type_array(ptr))
+		{
+			// Complicated load case where we need to make a copy of ptr, but we cannot, because
+			// it is an array, and our backend does not support arrays as value types.
+			// Emit the temporary, and copy it explicitly.
+			e = &emit_uninitialized_temporary_expression(result_type, id);
+			emit_array_copy(to_expression(id), id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
+		}
+		else
+			e = &emit_op(result_type, id, expr, forward, !usage_tracking);
 
-	case TypeAccessChain:
-	{
-		auto &e = get<SPIRAccessChain>(id);
-		for (auto implied_read : e.implied_read_expressions)
-			track_expression_read(implied_read);
-		break;
-	}
+		e->need_transpose = need_transpose;
+		register_read(id, ptr, forward);
 
-	default:
+		if (forward)
+		{
+			// Pass through whether the result is of a packed type and the physical type ID.
+			if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
+				set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+			if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
+			{
+				set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
+				                        get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
+			}
+		}
+		else
+		{
+			// This might have been set on an earlier compilation iteration, force it to be unset.
+			unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+			unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
+		}
+
+		inherit_expression_dependencies(id, ptr);
+		if (forward)
+			add_implied_read_expression(*e, ptr);
 		break;
 	}
 
-	// If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
-	// In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
-	if (expression_is_forwarded(id))
+	case OpInBoundsAccessChain:
+	case OpAccessChain:
+	case OpPtrAccessChain:
 	{
-		auto &v = expression_usage_counts[id];
-		v++;
+		auto *var = maybe_get<SPIRVariable>(ops[2]);
+		if (var)
+			flush_variable_declaration(var->self);
 
-		if (v >= 2)
-		{
-			//if (v == 2)
-			//    fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
+		// If the base is immutable, the access chain pointer must also be.
+		// If an expression is mutable and forwardable, we speculate that it is immutable.
+		AccessChainMeta meta;
+		bool ptr_chain = opcode == OpPtrAccessChain;
+		auto &target_type = get<SPIRType>(ops[0]);
+		auto e = access_chain(ops[2], &ops[3], length - 3, target_type, &meta, ptr_chain);
 
-			forced_temporaries.insert(id);
-			// Force a recompile after this pass to avoid forwarding this variable.
-			force_recompile();
-		}
-	}
-}
+		// If the base is flattened UBO of struct type, the expression has to be a composite.
+		// In that case, backends which do not support inline syntax need it to be bound to a temporary.
+		// Otherwise, invalid expressions like ({UBO[0].xyz, UBO[0].w, UBO[1]}).member are emitted.
+		bool requires_temporary = false;
+		if (flattened_buffer_blocks.count(ops[2]) && target_type.basetype == SPIRType::Struct)
+			requires_temporary = !backend.can_declare_struct_inline;
 
-bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
-{
-	if (forced_temporaries.find(id) != end(forced_temporaries))
-		return false;
+		auto &expr = requires_temporary ?
+                         emit_op(ops[0], ops[1], std::move(e), false) :
+                         set<SPIRExpression>(ops[1], std::move(e), ops[0], should_forward(ops[2]));
 
-	for (uint32_t i = 0; i < num_args; i++)
-		if (!should_forward(args[i]))
-			return false;
+		auto *backing_variable = maybe_get_backing_variable(ops[2]);
+		expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
+		expr.need_transpose = meta.need_transpose;
+		expr.access_chain = true;
 
-	// We need to forward globals as well.
-	if (!pure)
-	{
-		for (auto global : global_variables)
-			if (!should_forward(global))
-				return false;
-		for (auto aliased : aliased_variables)
-			if (!should_forward(aliased))
-				return false;
-	}
+		// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
+		if (meta.storage_is_packed)
+			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
+		if (meta.storage_physical_type != 0)
+			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
+		if (meta.storage_is_invariant)
+			set_decoration(ops[1], DecorationInvariant);
+		if (meta.flattened_struct)
+			flattened_structs[ops[1]] = true;
+		if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
+			set_decoration(ops[1], DecorationRelaxedPrecision);
+
+		// If we have some expression dependencies in our access chain, this access chain is technically a forwarded
+		// temporary which could be subject to invalidation.
+		// Need to assume we're forwarded while calling inherit_expression_depdendencies.
+		forwarded_temporaries.insert(ops[1]);
+		// The access chain itself is never forced to a temporary, but its dependencies might.
+		suppressed_usage_tracking.insert(ops[1]);
 
-	return true;
-}
+		for (uint32_t i = 2; i < length; i++)
+		{
+			inherit_expression_dependencies(ops[1], ops[i]);
+			add_implied_read_expression(expr, ops[i]);
+		}
 
-void CompilerGLSL::register_impure_function_call()
-{
-	// Impure functions can modify globals and aliased variables, so invalidate them as well.
-	for (auto global : global_variables)
-		flush_dependees(get<SPIRVariable>(global));
-	for (auto aliased : aliased_variables)
-		flush_dependees(get<SPIRVariable>(aliased));
-}
+		// If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
+		// we're not forwarded after all.
+		if (expr.expression_dependencies.empty())
+			forwarded_temporaries.erase(ops[1]);
 
-void CompilerGLSL::register_call_out_argument(uint32_t id)
-{
-	register_write(id);
+		break;
+	}
 
-	auto *var = maybe_get<SPIRVariable>(id);
-	if (var)
-		flush_variable_declaration(var->self);
-}
+	case OpStore:
+	{
+		auto *var = maybe_get<SPIRVariable>(ops[0]);
 
-string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
-{
-	// These variables are always function local,
-	// so make sure we emit the variable without storage qualifiers.
-	// Some backends will inject custom variables locally in a function
-	// with a storage qualifier which is not function-local.
-	auto old_storage = var.storage;
-	var.storage = StorageClassFunction;
-	auto expr = variable_decl(var);
-	var.storage = old_storage;
-	return expr;
-}
+		if (var && var->statically_assigned)
+			var->static_expression = ops[1];
+		else if (var && var->loop_variable && !var->loop_variable_enable)
+			var->static_expression = ops[1];
+		else if (var && var->remapped_variable && var->static_expression)
+		{
+			// Skip the write.
+		}
+		else if (flattened_structs.count(ops[0]))
+		{
+			store_flattened_struct(ops[0], ops[1]);
+			register_write(ops[0]);
+		}
+		else
+		{
+			emit_store_statement(ops[0], ops[1]);
+		}
 
-void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
-{
-	if (var.allocate_temporary_copy)
+		// Storing a pointer results in a variable pointer, so we must conservatively assume
+		// we can write through it.
+		if (expression_type(ops[1]).pointer)
+			register_write(ops[1]);
+		break;
+	}
+
+	case OpArrayLength:
 	{
-		auto &type = get<SPIRType>(var.basetype);
-		auto &flags = get_decoration_bitset(var.self);
-		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
+		if (has_decoration(ops[2], DecorationNonUniform))
+			convert_non_uniform_expression(e, ops[2]);
+		set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
+		                    true);
+		break;
 	}
-}
 
-void CompilerGLSL::flush_variable_declaration(uint32_t id)
-{
-	auto *var = maybe_get<SPIRVariable>(id);
-	if (var && var->deferred_declaration)
-	{
-		statement(variable_decl_function_local(*var), ";");
-		emit_variable_temporary_copies(*var);
-		var->deferred_declaration = false;
-	}
-}
+	// Function calls
+	case OpFunctionCall:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t func = ops[2];
+		const auto *arg = &ops[3];
+		length -= 3;
+
+		auto &callee = get<SPIRFunction>(func);
+		auto &return_type = get<SPIRType>(callee.return_type);
+		bool pure = function_is_pure(callee);
+
+		bool callee_has_out_variables = false;
+		bool emit_return_value_as_argument = false;
 
-bool CompilerGLSL::remove_duplicate_swizzle(string &op)
-{
-	auto pos = op.find_last_of('.');
-	if (pos == string::npos || pos == 0)
-		return false;
+		// Invalidate out variables passed to functions since they can be OpStore'd to.
+		for (uint32_t i = 0; i < length; i++)
+		{
+			if (callee.arguments[i].write_count)
+			{
+				register_call_out_argument(arg[i]);
+				callee_has_out_variables = true;
+			}
 
-	string final_swiz = op.substr(pos + 1, string::npos);
+			flush_variable_declaration(arg[i]);
+		}
 
-	if (backend.swizzle_is_function)
-	{
-		if (final_swiz.size() < 2)
-			return false;
+		if (!return_type.array.empty() && !backend.can_return_array)
+		{
+			callee_has_out_variables = true;
+			emit_return_value_as_argument = true;
+		}
 
-		if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
-			final_swiz.erase(final_swiz.size() - 2, string::npos);
-		else
-			return false;
-	}
+		if (!pure)
+			register_impure_function_call();
 
-	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
-	// If so, and previous swizzle is of same length,
-	// we can drop the final swizzle altogether.
-	for (uint32_t i = 0; i < final_swiz.size(); i++)
-	{
-		static const char expected[] = { 'x', 'y', 'z', 'w' };
-		if (i >= 4 || final_swiz[i] != expected[i])
-			return false;
-	}
+		string funexpr;
+		SmallVector<string> arglist;
+		funexpr += to_name(func) + "(";
 
-	auto prevpos = op.find_last_of('.', pos - 1);
-	if (prevpos == string::npos)
-		return false;
+		if (emit_return_value_as_argument)
+		{
+			statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
+			arglist.push_back(to_name(id));
+		}
 
-	prevpos++;
+		for (uint32_t i = 0; i < length; i++)
+		{
+			// Do not pass in separate images or samplers if we're remapping
+			// to combined image samplers.
+			if (skip_argument(arg[i]))
+				continue;
 
-	// Make sure there are only swizzles here ...
-	for (auto i = prevpos; i < pos; i++)
-	{
-		if (op[i] < 'w' || op[i] > 'z')
+			arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
+		}
+
+		for (auto &combined : callee.combined_parameters)
 		{
-			// If swizzles are foo.xyz() like in C++ backend for example, check for that.
-			if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
-				break;
-			return false;
+			auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
+			auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
+			arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
 		}
-	}
 
-	// If original swizzle is large enough, just carve out the components we need.
-	// E.g. foobar.wyx.xy will turn into foobar.wy.
-	if (pos - prevpos >= final_swiz.size())
-	{
-		op.erase(prevpos + final_swiz.size(), string::npos);
+		append_global_func_args(callee, length, arglist);
 
-		// Add back the function call ...
-		if (backend.swizzle_is_function)
-			op += "()";
-	}
-	return true;
-}
+		funexpr += merge(arglist);
+		funexpr += ")";
 
-// Optimizes away vector swizzles where we have something like
-// vec3 foo;
-// foo.xyz <-- swizzle expression does nothing.
-// This is a very common pattern after OpCompositeCombine.
-bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
-{
-	auto pos = op.find_last_of('.');
-	if (pos == string::npos || pos == 0)
-		return false;
+		// Check for function call constraints.
+		check_function_call_constraints(arg, length);
 
-	string final_swiz = op.substr(pos + 1, string::npos);
+		if (return_type.basetype != SPIRType::Void)
+		{
+			// If the function actually writes to an out variable,
+			// take the conservative route and do not forward.
+			// The problem is that we might not read the function
+			// result (and emit the function) before an out variable
+			// is read (common case when return value is ignored!
+			// In order to avoid start tracking invalid variables,
+			// just avoid the forwarding problem altogether.
+			bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
+			               (forced_temporaries.find(id) == end(forced_temporaries));
 
-	if (backend.swizzle_is_function)
-	{
-		if (final_swiz.size() < 2)
-			return false;
+			if (emit_return_value_as_argument)
+			{
+				statement(funexpr, ";");
+				set<SPIRExpression>(id, to_name(id), result_type, true);
+			}
+			else
+				emit_op(result_type, id, funexpr, forward);
 
-		if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
-			final_swiz.erase(final_swiz.size() - 2, string::npos);
+			// Function calls are implicit loads from all variables in question.
+			// Set dependencies for them.
+			for (uint32_t i = 0; i < length; i++)
+				register_read(id, arg[i], forward);
+
+			// If we're going to forward the temporary result,
+			// put dependencies on every variable that must not change.
+			if (forward)
+				register_global_read_dependencies(callee, id);
+		}
 		else
-			return false;
-	}
+			statement(funexpr, ";");
 
-	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
-	// If so, and previous swizzle is of same length,
-	// we can drop the final swizzle altogether.
-	for (uint32_t i = 0; i < final_swiz.size(); i++)
-	{
-		static const char expected[] = { 'x', 'y', 'z', 'w' };
-		if (i >= 4 || final_swiz[i] != expected[i])
-			return false;
+		break;
 	}
 
-	auto &type = expression_type(base);
-
-	// Sanity checking ...
-	assert(type.columns == 1 && type.array.empty());
+	// Composite munging
+	case OpCompositeConstruct:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		const auto *const elems = &ops[2];
+		length -= 2;
 
-	if (type.vecsize == final_swiz.size())
-		op.erase(pos, string::npos);
-	return true;
-}
+		bool forward = true;
+		for (uint32_t i = 0; i < length; i++)
+			forward = forward && should_forward(elems[i]);
 
-string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
-{
-	uint32_t base = 0;
-	string op;
-	string subop;
+		auto &out_type = get<SPIRType>(result_type);
+		auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
 
-	// Can only merge swizzles for vectors.
-	auto &type = get<SPIRType>(return_type);
-	bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
-	bool swizzle_optimization = false;
+		// Only splat if we have vector constructors.
+		// Arrays and structs must be initialized properly in full.
+		bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
 
-	for (uint32_t i = 0; i < length; i++)
-	{
-		auto *e = maybe_get<SPIRExpression>(elems[i]);
+		bool splat = false;
+		bool swizzle_splat = false;
 
-		// If we're merging another scalar which belongs to the same base
-		// object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
-		if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
+		if (in_type)
 		{
-			// Only supposed to be used for vector swizzle -> scalar.
-			assert(!e->expression.empty() && e->expression.front() == '.');
-			subop += e->expression.substr(1, string::npos);
-			swizzle_optimization = true;
+			splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
+			swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
+
+			if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
+			{
+				// Cannot swizzle literal integers as a special case.
+				swizzle_splat = false;
+			}
 		}
-		else
+
+		if (splat || swizzle_splat)
 		{
-			// We'll likely end up with duplicated swizzles, e.g.
-			// foobar.xyz.xyz from patterns like
-			// OpVectorShuffle
-			// OpCompositeExtract x 3
-			// OpCompositeConstruct 3x + other scalar.
-			// Just modify op in-place.
-			if (swizzle_optimization)
+			uint32_t input = elems[0];
+			for (uint32_t i = 0; i < length; i++)
 			{
-				if (backend.swizzle_is_function)
-					subop += "()";
+				if (input != elems[i])
+				{
+					splat = false;
+					swizzle_splat = false;
+				}
+			}
+		}
 
-				// Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
-				// The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
-				// We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
-				// Essentially, we can only remove one set of swizzles, since that's what we have control over ...
-				// Case 1:
-				//  foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
-				//               foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
-				// Case 2:
-				//  foo.xyz: Duplicate swizzle won't kick in.
-				//           If foo is vec3, we can remove xyz, giving just foo.
-				if (!remove_duplicate_swizzle(subop))
-					remove_unity_swizzle(base, subop);
+		if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
+			forward = false;
+		if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
+			forward = false;
+		if (type_is_empty(out_type) && !backend.supports_empty_struct)
+			forward = false;
 
-				// Strips away redundant parens if we created them during component extraction.
-				strip_enclosed_expression(subop);
-				swizzle_optimization = false;
-				op += subop;
+		string constructor_op;
+		if (backend.use_initializer_list && composite)
+		{
+			bool needs_trailing_tracket = false;
+			// Only use this path if we are building composites.
+			// This path cannot be used for arithmetic.
+			if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
+				constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
+			else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
+			{
+				// MSL path. Array constructor is baked into type here, do not use _constructor variant.
+				constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
+				needs_trailing_tracket = true;
 			}
-			else
-				op += subop;
+			constructor_op += "{ ";
 
-			if (i)
-				op += ", ";
-			subop = to_composite_constructor_expression(elems[i]);
+			if (type_is_empty(out_type) && !backend.supports_empty_struct)
+				constructor_op += "0";
+			else if (splat)
+				constructor_op += to_unpacked_expression(elems[0]);
+			else
+				constructor_op += build_composite_combiner(result_type, elems, length);
+			constructor_op += " }";
+			if (needs_trailing_tracket)
+				constructor_op += ")";
+		}
+		else if (swizzle_splat && !composite)
+		{
+			constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
+		}
+		else
+		{
+			constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
+			if (type_is_empty(out_type) && !backend.supports_empty_struct)
+				constructor_op += "0";
+			else if (splat)
+				constructor_op += to_unpacked_expression(elems[0]);
+			else
+				constructor_op += build_composite_combiner(result_type, elems, length);
+			constructor_op += ")";
 		}
 
-		base = e ? e->base_expression : 0;
+		if (!constructor_op.empty())
+		{
+			emit_op(result_type, id, constructor_op, forward);
+			for (uint32_t i = 0; i < length; i++)
+				inherit_expression_dependencies(id, elems[i]);
+		}
+		break;
 	}
 
-	if (swizzle_optimization)
+	case OpVectorInsertDynamic:
 	{
-		if (backend.swizzle_is_function)
-			subop += "()";
-
-		if (!remove_duplicate_swizzle(subop))
-			remove_unity_swizzle(base, subop);
-		// Strips away redundant parens if we created them during component extraction.
-		strip_enclosed_expression(subop);
-	}
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t vec = ops[2];
+		uint32_t comp = ops[3];
+		uint32_t index = ops[4];
 
-	op += subop;
-	return op;
-}
+		flush_variable_declaration(vec);
 
-bool CompilerGLSL::skip_argument(uint32_t id) const
-{
-	if (!combined_image_samplers.empty() || !options.vulkan_semantics)
-	{
-		auto &type = expression_type(id);
-		if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
-			return true;
+		// Make a copy, then use access chain to store the variable.
+		statement(declare_temporary(result_type, id), to_expression(vec), ";");
+		set<SPIRExpression>(id, to_name(id), result_type, true);
+		auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
+		statement(chain, " = ", to_unpacked_expression(comp), ";");
+		break;
 	}
-	return false;
-}
-
-bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
-{
-	// Do this with strings because we have a very clear pattern we can check for and it avoids
-	// adding lots of special cases to the code emission.
-	if (rhs.size() < lhs.size() + 3)
-		return false;
 
-	// Do not optimize matrices. They are a bit awkward to reason about in general
-	// (in which order does operation happen?), and it does not work on MSL anyways.
-	if (type.vecsize > 1 && type.columns > 1)
-		return false;
+	case OpVectorExtractDynamic:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
 
-	auto index = rhs.find(lhs);
-	if (index != 0)
-		return false;
+		auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
+		emit_op(result_type, id, expr, should_forward(ops[2]));
+		inherit_expression_dependencies(id, ops[2]);
+		inherit_expression_dependencies(id, ops[3]);
+		break;
+	}
 
-	// TODO: Shift operators, but it's not important for now.
-	auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
-	if (op != lhs.size() + 1)
-		return false;
+	case OpCompositeExtract:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		length -= 3;
 
-	// Check that the op is followed by space. This excludes && and ||.
-	if (rhs[op + 1] != ' ')
-		return false;
+		auto &type = get<SPIRType>(result_type);
 
-	char bop = rhs[op];
-	auto expr = rhs.substr(lhs.size() + 3);
-	// Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
-	// Find some common patterns which are equivalent.
-	if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
-		statement(lhs, bop, bop, ";");
-	else
-		statement(lhs, " ", bop, "= ", expr, ";");
-	return true;
-}
+		// We can only split the expression here if our expression is forwarded as a temporary.
+		bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
 
-void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
-{
-	if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
-		return;
+		// Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
+		auto &composite_type = expression_type(ops[2]);
+		bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty();
+		if (composite_type_is_complex)
+			allow_base_expression = false;
 
-	assert(current_emitting_block);
-	current_emitting_block->invalidate_expressions.push_back(expr);
-}
+		// Packed expressions or physical ID mapped expressions cannot be split up.
+		if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) ||
+		    has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID))
+			allow_base_expression = false;
 
-void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
-{
-	current_emitting_block = &block;
-	for (auto &op : block.ops)
-		emit_instruction(op);
-	current_emitting_block = nullptr;
-}
+		// Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
+		// into the base expression.
+		if (is_non_native_row_major_matrix(ops[2]))
+			allow_base_expression = false;
 
-void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
-{
-	if (forwarded_temporaries.count(expr.self))
-	{
-		forced_temporaries.insert(expr.self);
-		force_recompile();
-	}
+		AccessChainMeta meta;
+		SPIRExpression *e = nullptr;
+		auto *c = maybe_get<SPIRConstant>(ops[2]);
 
-	for (auto &dependent : expr.expression_dependencies)
-		disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
-}
+		if (c && !c->specialization && !composite_type_is_complex)
+		{
+			auto expr = to_extract_constant_composite_expression(result_type, *c, ops + 3, length);
+			e = &emit_op(result_type, id, expr, true, true);
+		}
+		else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
+		{
+			// Only apply this optimization if result is scalar.
 
-void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
-{
-	// Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
-	// this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
-	// in one translation unit, but not another, e.g. due to multiple use of an expression.
-	// This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
-	// expressions to be temporaries.
-	// It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
-	// for all reasonable uses of invariant.
-	if (!has_decoration(store_id, DecorationInvariant))
-		return;
+			// We want to split the access chain from the base.
+			// This is so we can later combine different CompositeExtract results
+			// with CompositeConstruct without emitting code like
+			//
+			// vec3 temp = texture(...).xyz
+			// vec4(temp.x, temp.y, temp.z, 1.0).
+			//
+			// when we actually wanted to emit this
+			// vec4(texture(...).xyz, 1.0).
+			//
+			// Including the base will prevent this and would trigger multiple reads
+			// from expression causing it to be forced to an actual temporary in GLSL.
+			auto expr = access_chain_internal(ops[2], &ops[3], length,
+			                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT |
+			                                  ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
+			e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
+			inherit_expression_dependencies(id, ops[2]);
+			e->base_expression = ops[2];
 
-	auto *expr = maybe_get<SPIRExpression>(value_id);
-	if (!expr)
-		return;
+			if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis)
+				set_decoration(ops[1], DecorationRelaxedPrecision);
+		}
+		else
+		{
+			auto expr = access_chain_internal(ops[2], &ops[3], length,
+			                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
+			e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
+			inherit_expression_dependencies(id, ops[2]);
+		}
 
-	disallow_forwarding_in_expression_chain(*expr);
-}
+		// Pass through some meta information to the loaded expression.
+		// We can still end up loading a buffer type to a variable, then CompositeExtract from it
+		// instead of loading everything through an access chain.
+		e->need_transpose = meta.need_transpose;
+		if (meta.storage_is_packed)
+			set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
+		if (meta.storage_physical_type != 0)
+			set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
+		if (meta.storage_is_invariant)
+			set_decoration(id, DecorationInvariant);
 
-void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
-{
-	auto rhs = to_pointer_expression(rhs_expression);
+		break;
+	}
 
-	// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
-	if (!rhs.empty())
+	case OpCompositeInsert:
 	{
-		handle_store_to_invariant_variable(lhs_expression, rhs_expression);
-
-		auto lhs = to_dereferenced_expression(lhs_expression);
-
-		// We might need to bitcast in order to store to a builtin.
-		bitcast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t obj = ops[2];
+		uint32_t composite = ops[3];
+		const auto *elems = &ops[4];
+		length -= 4;
 
-		// Tries to optimize assignments like "<lhs> = <lhs> op expr".
-		// While this is purely cosmetic, this is important for legacy ESSL where loop
-		// variable increments must be in either i++ or i += const-expr.
-		// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
-		if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
-			statement(lhs, " = ", rhs, ";");
-		register_write(lhs_expression);
-	}
-}
+		flush_variable_declaration(composite);
 
-uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
-{
-	if (instr.length < 3)
-		return 32;
+		// CompositeInsert requires a copy + modification, but this is very awkward code in HLL.
+		// Speculate that the input composite is no longer used, and we can modify it in-place.
+		// There are various scenarios where this is not possible to satisfy.
+		bool can_modify_in_place = true;
+		forced_temporaries.insert(id);
 
-	auto *ops = stream(instr);
+		// Cannot safely RMW PHI variables since they have no way to be invalidated,
+		// forcing temporaries is not going to help.
+		// This is similar for Constant and Undef inputs.
+		// The only safe thing to RMW is SPIRExpression.
+		// If the expression has already been used (i.e. used in a continue block), we have to keep using
+		// that loop variable, since we won't be able to override the expression after the fact.
+		// If the composite is hoisted, we might never be able to properly invalidate any usage
+		// of that composite in a subsequent loop iteration.
+		if (invalid_expressions.count(composite) ||
+		    block_composite_insert_overwrite.count(composite) ||
+		    hoisted_temporaries.count(id) || hoisted_temporaries.count(composite) ||
+		    maybe_get<SPIRExpression>(composite) == nullptr)
+		{
+			can_modify_in_place = false;
+		}
+		else if (backend.requires_relaxed_precision_analysis &&
+		         has_decoration(composite, DecorationRelaxedPrecision) !=
+		         has_decoration(id, DecorationRelaxedPrecision) &&
+		         get<SPIRType>(result_type).basetype != SPIRType::Struct)
+		{
+			// Similarly, if precision does not match for input and output,
+			// we cannot alias them. If we write a composite into a relaxed precision
+			// ID, we might get a false truncation.
+			can_modify_in_place = false;
+		}
 
-	switch (instr.op)
-	{
-	case OpSConvert:
-	case OpConvertSToF:
-	case OpUConvert:
-	case OpConvertUToF:
-	case OpIEqual:
-	case OpINotEqual:
-	case OpSLessThan:
-	case OpSLessThanEqual:
-	case OpSGreaterThan:
-	case OpSGreaterThanEqual:
-		return expression_type(ops[2]).width;
+		if (can_modify_in_place)
+		{
+			// Have to make sure the modified SSA value is bound to a temporary so we can modify it in-place.
+			if (!forced_temporaries.count(composite))
+				force_temporary_and_recompile(composite);
 
-	default:
-	{
-		// We can look at result type which is more robust.
-		auto *type = maybe_get<SPIRType>(ops[0]);
-		if (type && type_is_integral(*type))
-			return type->width;
+			auto chain = access_chain_internal(composite, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
+			statement(chain, " = ", to_unpacked_expression(obj), ";");
+			set<SPIRExpression>(id, to_expression(composite), result_type, true);
+			invalid_expressions.insert(composite);
+			composite_insert_overwritten.insert(composite);
+		}
 		else
-			return 32;
-	}
-	}
-}
-
-uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
-{
-	if (length < 1)
-		return 32;
+		{
+			if (maybe_get<SPIRUndef>(composite) != nullptr)
+			{
+				emit_uninitialized_temporary_expression(result_type, id);
+			}
+			else
+			{
+				// Make a copy, then use access chain to store the variable.
+				statement(declare_temporary(result_type, id), to_expression(composite), ";");
+				set<SPIRExpression>(id, to_name(id), result_type, true);
+			}
 
-	switch (op)
-	{
-	case GLSLstd450SAbs:
-	case GLSLstd450SSign:
-	case GLSLstd450UMin:
-	case GLSLstd450SMin:
-	case GLSLstd450UMax:
-	case GLSLstd450SMax:
-	case GLSLstd450UClamp:
-	case GLSLstd450SClamp:
-	case GLSLstd450FindSMsb:
-	case GLSLstd450FindUMsb:
-		return expression_type(ops[0]).width;
+			auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
+			statement(chain, " = ", to_unpacked_expression(obj), ";");
+		}
 
-	default:
-	{
-		// We don't need to care about other opcodes, just return 32.
-		return 32;
-	}
+		break;
 	}
-}
 
-void CompilerGLSL::emit_instruction(const Instruction &instruction)
-{
-	auto ops = stream(instruction);
-	auto opcode = static_cast<Op>(instruction.op);
-	uint32_t length = instruction.length;
+	case OpCopyMemory:
+	{
+		uint32_t lhs = ops[0];
+		uint32_t rhs = ops[1];
+		if (lhs != rhs)
+		{
+			uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET];
+			if (!tmp_id)
+				tmp_id = ir.increase_bound_by(1);
+			uint32_t tmp_type_id = expression_type(rhs).parent_type;
 
-#define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
-#define GLSL_BOP_CAST(op, type) \
-	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
-#define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
-#define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
-#define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
-#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
-#define GLSL_BFOP_CAST(op, type) \
-	emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
-#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
-#define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
+			EmbeddedInstruction fake_load, fake_store;
+			fake_load.op = OpLoad;
+			fake_load.length = 3;
+			fake_load.ops.push_back(tmp_type_id);
+			fake_load.ops.push_back(tmp_id);
+			fake_load.ops.push_back(rhs);
 
-	// If we need to do implicit bitcasts, make sure we do it with the correct type.
-	uint32_t integer_width = get_integer_width_for_instruction(instruction);
-	auto int_type = to_signed_basetype(integer_width);
-	auto uint_type = to_unsigned_basetype(integer_width);
+			fake_store.op = OpStore;
+			fake_store.length = 2;
+			fake_store.ops.push_back(lhs);
+			fake_store.ops.push_back(tmp_id);
 
-	switch (opcode)
-	{
-	// Dealing with memory
-	case OpLoad:
+			// Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible.
+			// Synthesize a fake Load and Store pair for CopyMemory.
+			emit_instruction(fake_load);
+			emit_instruction(fake_store);
+		}
+		break;
+	}
+
+	case OpCopyLogical:
 	{
+		// This is used for copying object of different types, arrays and structs.
+		// We need to unroll the copy, element-by-element.
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
-		uint32_t ptr = ops[2];
-
-		flush_variable_declaration(ptr);
+		uint32_t rhs = ops[2];
 
-		// If we're loading from memory that cannot be changed by the shader,
-		// just forward the expression directly to avoid needless temporaries.
-		// If an expression is mutable and forwardable, we speculate that it is immutable.
-		bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
+		emit_uninitialized_temporary_expression(result_type, id);
+		emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {});
+		break;
+	}
 
-		// If loading a non-native row-major matrix, mark the expression as need_transpose.
-		bool need_transpose = false;
-		bool old_need_transpose = false;
+	case OpCopyObject:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t rhs = ops[2];
+		bool pointer = get<SPIRType>(result_type).pointer;
 
-		auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
-		if (ptr_expression && ptr_expression->need_transpose)
+		auto *chain = maybe_get<SPIRAccessChain>(rhs);
+		auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(rhs);
+		if (chain)
 		{
-			old_need_transpose = true;
-			ptr_expression->need_transpose = false;
-			need_transpose = true;
+			// Cannot lower to a SPIRExpression, just copy the object.
+			auto &e = set<SPIRAccessChain>(id, *chain);
+			e.self = id;
+		}
+		else if (imgsamp)
+		{
+			// Cannot lower to a SPIRExpression, just copy the object.
+			// GLSL does not currently use this type and will never get here, but MSL does.
+			// Handled here instead of CompilerMSL for better integration and general handling,
+			// and in case GLSL or other subclasses require it in the future.
+			auto &e = set<SPIRCombinedImageSampler>(id, *imgsamp);
+			e.self = id;
+		}
+		else if (expression_is_lvalue(rhs) && !pointer)
+		{
+			// Need a copy.
+			// For pointer types, we copy the pointer itself.
+			emit_op(result_type, id, to_unpacked_expression(rhs), false);
 		}
-		else if (is_non_native_row_major_matrix(ptr))
-			need_transpose = true;
+		else
+		{
+			// RHS expression is immutable, so just forward it.
+			// Copying these things really make no sense, but
+			// seems to be allowed anyways.
+			auto &e = emit_op(result_type, id, to_expression(rhs), true, true);
+			if (pointer)
+			{
+				auto *var = maybe_get_backing_variable(rhs);
+				e.loaded_from = var ? var->self : ID(0);
+			}
 
-		// If we are forwarding this load,
-		// don't register the read to access chain here, defer that to when we actually use the expression,
-		// using the add_implied_read_expression mechanism.
-		auto expr = to_dereferenced_expression(ptr, !forward);
+			// If we're copying an access chain, need to inherit the read expressions.
+			auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
+			if (rhs_expr)
+			{
+				e.implied_read_expressions = rhs_expr->implied_read_expressions;
+				e.expression_dependencies = rhs_expr->expression_dependencies;
+			}
+		}
+		break;
+	}
 
-		// We might need to bitcast in order to load from a builtin.
-		bitcast_from_builtin_load(ptr, expr, get<SPIRType>(result_type));
+	case OpVectorShuffle:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t vec0 = ops[2];
+		uint32_t vec1 = ops[3];
+		const auto *elems = &ops[4];
+		length -= 4;
 
-		// We might be trying to load a gl_Position[N], where we should be
-		// doing float4[](gl_in[i].gl_Position, ...) instead.
-		// Similar workarounds are required for input arrays in tessellation.
-		unroll_array_from_complex_load(id, ptr, expr);
+		auto &type0 = expression_type(vec0);
 
-		auto &type = get<SPIRType>(result_type);
-		// Shouldn't need to check for ID, but current glslang codegen requires it in some cases
-		// when loading Image/Sampler descriptors. It does not hurt to check ID as well.
-		if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT))
-		{
-			propagate_nonuniform_qualifier(ptr);
-			convert_non_uniform_expression(type, expr);
-		}
+		// If we have the undefined swizzle index -1, we need to swizzle in undefined data,
+		// or in our case, T(0).
+		bool shuffle = false;
+		for (uint32_t i = 0; i < length; i++)
+			if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
+				shuffle = true;
 
-		if (ptr_expression)
-			ptr_expression->need_transpose = old_need_transpose;
+		// Cannot use swizzles with packed expressions, force shuffle path.
+		if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
+			shuffle = true;
 
-		// By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
-		// However, if we try to load a complex, composite object from a flattened buffer,
-		// we should avoid emitting the same code over and over and lower the result to a temporary.
-		bool usage_tracking = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0 &&
-		                      (type.basetype == SPIRType::Struct || (type.columns > 1));
+		string expr;
+		bool should_fwd, trivial_forward;
 
-		SPIRExpression *e = nullptr;
-		if (!backend.array_is_value_type && !type.array.empty() && !forward)
+		if (shuffle)
 		{
-			// Complicated load case where we need to make a copy of ptr, but we cannot, because
-			// it is an array, and our backend does not support arrays as value types.
-			// Emit the temporary, and copy it explicitly.
-			e = &emit_uninitialized_temporary_expression(result_type, id);
-			emit_array_copy(to_expression(id), ptr);
+			should_fwd = should_forward(vec0) && should_forward(vec1);
+			trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
+
+			// Constructor style and shuffling from two different vectors.
+			SmallVector<string> args;
+			for (uint32_t i = 0; i < length; i++)
+			{
+				if (elems[i] == 0xffffffffu)
+				{
+					// Use a constant 0 here.
+					// We could use the first component or similar, but then we risk propagating
+					// a value we might not need, and bog down codegen.
+					SPIRConstant c;
+					c.constant_type = type0.parent_type;
+					assert(type0.parent_type != ID(0));
+					args.push_back(constant_expression(c));
+				}
+				else if (elems[i] >= type0.vecsize)
+					args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
+				else
+					args.push_back(to_extract_component_expression(vec0, elems[i]));
+			}
+			expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
 		}
 		else
-			e = &emit_op(result_type, id, expr, forward, !usage_tracking);
+		{
+			should_fwd = should_forward(vec0);
+			trivial_forward = should_suppress_usage_tracking(vec0);
 
-		e->need_transpose = need_transpose;
-		register_read(id, ptr, forward);
+			// We only source from first vector, so can use swizzle.
+			// If the vector is packed, unpack it before applying a swizzle (needed for MSL)
+			expr += to_enclosed_unpacked_expression(vec0);
+			expr += ".";
+			for (uint32_t i = 0; i < length; i++)
+			{
+				assert(elems[i] != 0xffffffffu);
+				expr += index_to_swizzle(elems[i]);
+			}
 
-		// Pass through whether the result is of a packed type.
-		if (has_extended_decoration(ptr, SPIRVCrossDecorationPacked))
-		{
-			set_extended_decoration(id, SPIRVCrossDecorationPacked);
-			set_extended_decoration(id, SPIRVCrossDecorationPackedType,
-			                        get_extended_decoration(ptr, SPIRVCrossDecorationPackedType));
+			if (backend.swizzle_is_function && length > 1)
+				expr += "()";
 		}
 
-		inherit_expression_dependencies(id, ptr);
-		if (forward)
-			add_implied_read_expression(*e, ptr);
+		// A shuffle is trivial in that it doesn't actually *do* anything.
+		// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
+
+		emit_op(result_type, id, expr, should_fwd, trivial_forward);
+
+		inherit_expression_dependencies(id, vec0);
+		if (vec0 != vec1)
+			inherit_expression_dependencies(id, vec1);
 		break;
 	}
 
-	case OpInBoundsAccessChain:
-	case OpAccessChain:
-	case OpPtrAccessChain:
-	{
-		auto *var = maybe_get<SPIRVariable>(ops[2]);
-		if (var)
-			flush_variable_declaration(var->self);
+	// ALU
+	case OpIsNan:
+		GLSL_UFOP(isnan);
+		break;
 
-		// If the base is immutable, the access chain pointer must also be.
-		// If an expression is mutable and forwardable, we speculate that it is immutable.
-		AccessChainMeta meta;
-		bool ptr_chain = opcode == OpPtrAccessChain;
-		auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &meta, ptr_chain);
+	case OpIsInf:
+		GLSL_UFOP(isinf);
+		break;
 
-		auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
+	case OpSNegate:
+		if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0])
+			GLSL_UOP_CAST(-);
+		else
+			GLSL_UOP(-);
+		break;
 
-		auto *backing_variable = maybe_get_backing_variable(ops[2]);
-		expr.loaded_from = backing_variable ? backing_variable->self : ops[2];
-		expr.need_transpose = meta.need_transpose;
-		expr.access_chain = true;
+	case OpFNegate:
+		GLSL_UOP(-);
+		break;
 
-		// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
-		if (meta.storage_is_packed)
-			set_extended_decoration(ops[1], SPIRVCrossDecorationPacked);
-		if (meta.storage_packed_type != 0)
-			set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type);
-		if (meta.storage_is_invariant)
-			set_decoration(ops[1], DecorationInvariant);
+	case OpIAdd:
+	{
+		// For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
+		auto type = get<SPIRType>(ops[0]).basetype;
+		GLSL_BOP_CAST(+, type);
+		break;
+	}
 
-		for (uint32_t i = 2; i < length; i++)
-		{
-			inherit_expression_dependencies(ops[1], ops[i]);
-			add_implied_read_expression(expr, ops[i]);
-		}
+	case OpFAdd:
+		GLSL_BOP(+);
+		break;
+
+	case OpISub:
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		GLSL_BOP_CAST(-, type);
+		break;
+	}
+
+	case OpFSub:
+		GLSL_BOP(-);
+		break;
 
+	case OpIMul:
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		GLSL_BOP_CAST(*, type);
 		break;
 	}
 
-	case OpStore:
+	case OpVectorTimesMatrix:
+	case OpMatrixTimesVector:
 	{
-		auto *var = maybe_get<SPIRVariable>(ops[0]);
+		// If the matrix needs transpose, just flip the multiply order.
+		auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
+		if (e && e->need_transpose)
+		{
+			e->need_transpose = false;
+			string expr;
 
-		if (has_decoration(ops[0], DecorationNonUniformEXT))
-			propagate_nonuniform_qualifier(ops[0]);
+			if (opcode == OpMatrixTimesVector)
+				expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
+				            enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
+			else
+				expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
+				            to_enclosed_unpacked_expression(ops[2]));
 
-		if (var && var->statically_assigned)
-			var->static_expression = ops[1];
-		else if (var && var->loop_variable && !var->loop_variable_enable)
-			var->static_expression = ops[1];
-		else if (var && var->remapped_variable)
-		{
-			// Skip the write.
-		}
-		else if (var && flattened_structs.count(ops[0]))
-		{
-			store_flattened_struct(*var, ops[1]);
-			register_write(ops[0]);
+			bool forward = should_forward(ops[2]) && should_forward(ops[3]);
+			emit_op(ops[0], ops[1], expr, forward);
+			e->need_transpose = true;
+			inherit_expression_dependencies(ops[1], ops[2]);
+			inherit_expression_dependencies(ops[1], ops[3]);
 		}
 		else
-		{
-			emit_store_statement(ops[0], ops[1]);
-		}
-
-		// Storing a pointer results in a variable pointer, so we must conservatively assume
-		// we can write through it.
-		if (expression_type(ops[1]).pointer)
-			register_write(ops[1]);
+			GLSL_BOP(*);
 		break;
 	}
 
-	case OpArrayLength:
+	case OpMatrixTimesMatrix:
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
-		set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
-		                    true);
+		auto *a = maybe_get<SPIRExpression>(ops[2]);
+		auto *b = maybe_get<SPIRExpression>(ops[3]);
+
+		// If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
+		// a^T * b^T = (b * a)^T.
+		if (a && b && a->need_transpose && b->need_transpose)
+		{
+			a->need_transpose = false;
+			b->need_transpose = false;
+			auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
+			                 enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
+			bool forward = should_forward(ops[2]) && should_forward(ops[3]);
+			auto &e = emit_op(ops[0], ops[1], expr, forward);
+			e.need_transpose = true;
+			a->need_transpose = true;
+			b->need_transpose = true;
+			inherit_expression_dependencies(ops[1], ops[2]);
+			inherit_expression_dependencies(ops[1], ops[3]);
+		}
+		else
+			GLSL_BOP(*);
+
 		break;
 	}
 
-	// Function calls
-	case OpFunctionCall:
+	case OpFMul:
+	case OpMatrixTimesScalar:
+	case OpVectorTimesScalar:
+		GLSL_BOP(*);
+		break;
+
+	case OpOuterProduct:
+		GLSL_BFOP(outerProduct);
+		break;
+
+	case OpDot:
+		GLSL_BFOP(dot);
+		break;
+
+	case OpTranspose:
+		if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
+		{
+			// transpose() is not available, so instead, flip need_transpose,
+			// which can later be turned into an emulated transpose op by
+			// convert_row_major_matrix(), if necessary.
+			uint32_t result_type = ops[0];
+			uint32_t result_id = ops[1];
+			uint32_t input = ops[2];
+
+			// Force need_transpose to false temporarily to prevent
+			// to_expression() from doing the transpose.
+			bool need_transpose = false;
+			auto *input_e = maybe_get<SPIRExpression>(input);
+			if (input_e)
+				swap(need_transpose, input_e->need_transpose);
+
+			bool forward = should_forward(input);
+			auto &e = emit_op(result_type, result_id, to_expression(input), forward);
+			e.need_transpose = !need_transpose;
+
+			// Restore the old need_transpose flag.
+			if (input_e)
+				input_e->need_transpose = need_transpose;
+		}
+		else
+			GLSL_UFOP(transpose);
+		break;
+
+	case OpSRem:
 	{
 		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		uint32_t func = ops[2];
-		const auto *arg = &ops[3];
-		length -= 3;
+		uint32_t result_id = ops[1];
+		uint32_t op0 = ops[2];
+		uint32_t op1 = ops[3];
 
-		auto &callee = get<SPIRFunction>(func);
-		auto &return_type = get<SPIRType>(callee.return_type);
-		bool pure = function_is_pure(callee);
+		// Needs special handling.
+		bool forward = should_forward(op0) && should_forward(op1);
+		auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
+		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
 
-		bool callee_has_out_variables = false;
-		bool emit_return_value_as_argument = false;
+		if (implicit_integer_promotion)
+			expr = join(type_to_glsl(get<SPIRType>(result_type)), '(', expr, ')');
 
-		// Invalidate out variables passed to functions since they can be OpStore'd to.
-		for (uint32_t i = 0; i < length; i++)
-		{
-			if (callee.arguments[i].write_count)
-			{
-				register_call_out_argument(arg[i]);
-				callee_has_out_variables = true;
-			}
+		emit_op(result_type, result_id, expr, forward);
+		inherit_expression_dependencies(result_id, op0);
+		inherit_expression_dependencies(result_id, op1);
+		break;
+	}
 
-			flush_variable_declaration(arg[i]);
-		}
+	case OpSDiv:
+		GLSL_BOP_CAST(/, int_type);
+		break;
 
-		if (!return_type.array.empty() && !backend.can_return_array)
-		{
-			callee_has_out_variables = true;
-			emit_return_value_as_argument = true;
-		}
+	case OpUDiv:
+		GLSL_BOP_CAST(/, uint_type);
+		break;
 
-		if (!pure)
-			register_impure_function_call();
+	case OpIAddCarry:
+	case OpISubBorrow:
+	{
+		if (options.es && options.version < 310)
+			SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
+		else if (!options.es && options.version < 400)
+			SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
 
-		string funexpr;
-		SmallVector<string> arglist;
-		funexpr += to_name(func) + "(";
+		uint32_t result_type = ops[0];
+		uint32_t result_id = ops[1];
+		uint32_t op0 = ops[2];
+		uint32_t op1 = ops[3];
+		auto &type = get<SPIRType>(result_type);
+		emit_uninitialized_temporary_expression(result_type, result_id);
+		const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
 
-		if (emit_return_value_as_argument)
-		{
-			statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
-			arglist.push_back(to_name(id));
-		}
+		statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
+		          to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
+		break;
+	}
 
-		for (uint32_t i = 0; i < length; i++)
-		{
-			// Do not pass in separate images or samplers if we're remapping
-			// to combined image samplers.
-			if (skip_argument(arg[i]))
-				continue;
+	case OpUMulExtended:
+	case OpSMulExtended:
+	{
+		if (options.es && options.version < 310)
+			SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
+		else if (!options.es && options.version < 400)
+			SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
 
-			arglist.push_back(to_func_call_arg(arg[i]));
-		}
+		uint32_t result_type = ops[0];
+		uint32_t result_id = ops[1];
+		uint32_t op0 = ops[2];
+		uint32_t op1 = ops[3];
+		auto &type = get<SPIRType>(result_type);
+		emit_uninitialized_temporary_expression(result_type, result_id);
+		const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
 
-		for (auto &combined : callee.combined_parameters)
-		{
-			uint32_t image_id = combined.global_image ? combined.image_id : arg[combined.image_id];
-			uint32_t sampler_id = combined.global_sampler ? combined.sampler_id : arg[combined.sampler_id];
-			arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
-		}
+		statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
+		          to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
+		break;
+	}
 
-		append_global_func_args(callee, length, arglist);
+	case OpFDiv:
+		GLSL_BOP(/);
+		break;
 
-		funexpr += merge(arglist);
-		funexpr += ")";
+	case OpShiftRightLogical:
+		GLSL_BOP_CAST(>>, uint_type);
+		break;
 
-		// Check for function call constraints.
-		check_function_call_constraints(arg, length);
+	case OpShiftRightArithmetic:
+		GLSL_BOP_CAST(>>, int_type);
+		break;
 
-		if (return_type.basetype != SPIRType::Void)
-		{
-			// If the function actually writes to an out variable,
-			// take the conservative route and do not forward.
-			// The problem is that we might not read the function
-			// result (and emit the function) before an out variable
-			// is read (common case when return value is ignored!
-			// In order to avoid start tracking invalid variables,
-			// just avoid the forwarding problem altogether.
-			bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
-			               (forced_temporaries.find(id) == end(forced_temporaries));
+	case OpShiftLeftLogical:
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		GLSL_BOP_CAST(<<, type);
+		break;
+	}
 
-			if (emit_return_value_as_argument)
-			{
-				statement(funexpr, ";");
-				set<SPIRExpression>(id, to_name(id), result_type, true);
-			}
-			else
-				emit_op(result_type, id, funexpr, forward);
+	case OpBitwiseOr:
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		GLSL_BOP_CAST(|, type);
+		break;
+	}
 
-			// Function calls are implicit loads from all variables in question.
-			// Set dependencies for them.
-			for (uint32_t i = 0; i < length; i++)
-				register_read(id, arg[i], forward);
+	case OpBitwiseXor:
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		GLSL_BOP_CAST(^, type);
+		break;
+	}
 
-			// If we're going to forward the temporary result,
-			// put dependencies on every variable that must not change.
-			if (forward)
-				register_global_read_dependencies(callee, id);
-		}
+	case OpBitwiseAnd:
+	{
+		auto type = get<SPIRType>(ops[0]).basetype;
+		GLSL_BOP_CAST(&, type);
+		break;
+	}
+
+	case OpNot:
+		if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0])
+			GLSL_UOP_CAST(~);
 		else
-			statement(funexpr, ";");
+			GLSL_UOP(~);
+		break;
 
+	case OpUMod:
+		GLSL_BOP_CAST(%, uint_type);
 		break;
-	}
 
-	// Composite munging
-	case OpCompositeConstruct:
+	case OpSMod:
+		GLSL_BOP_CAST(%, int_type);
+		break;
+
+	case OpFMod:
+		GLSL_BFOP(mod);
+		break;
+
+	case OpFRem:
 	{
+		if (is_legacy())
+			SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
+			                  "needed for legacy.");
+
 		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		const auto *const elems = &ops[2];
-		length -= 2;
+		uint32_t result_id = ops[1];
+		uint32_t op0 = ops[2];
+		uint32_t op1 = ops[3];
 
-		bool forward = true;
-		for (uint32_t i = 0; i < length; i++)
-			forward = forward && should_forward(elems[i]);
+		// Needs special handling.
+		bool forward = should_forward(op0) && should_forward(op1);
+		auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
+		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
 
-		auto &out_type = get<SPIRType>(result_type);
-		auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
+		emit_op(result_type, result_id, expr, forward);
+		inherit_expression_dependencies(result_id, op0);
+		inherit_expression_dependencies(result_id, op1);
+		break;
+	}
 
-		// Only splat if we have vector constructors.
-		// Arrays and structs must be initialized properly in full.
-		bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
+	// Relational
+	case OpAny:
+		GLSL_UFOP(any);
+		break;
 
-		bool splat = false;
-		bool swizzle_splat = false;
+	case OpAll:
+		GLSL_UFOP(all);
+		break;
 
-		if (in_type)
-		{
-			splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
-			swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
+	case OpSelect:
+		emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
+		break;
 
-			if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
-			{
-				// Cannot swizzle literal integers as a special case.
-				swizzle_splat = false;
-			}
-		}
+	case OpLogicalOr:
+	{
+		// No vector variant in GLSL for logical OR.
+		auto result_type = ops[0];
+		auto id = ops[1];
+		auto &type = get<SPIRType>(result_type);
 
-		if (splat || swizzle_splat)
-		{
-			uint32_t input = elems[0];
-			for (uint32_t i = 0; i < length; i++)
-			{
-				if (input != elems[i])
-				{
-					splat = false;
-					swizzle_splat = false;
-				}
-			}
-		}
+		if (type.vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
+		else
+			GLSL_BOP(||);
+		break;
+	}
 
-		if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
-			forward = false;
-		if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
-			forward = false;
-		if (type_is_empty(out_type) && !backend.supports_empty_struct)
-			forward = false;
+	case OpLogicalAnd:
+	{
+		// No vector variant in GLSL for logical AND.
+		auto result_type = ops[0];
+		auto id = ops[1];
+		auto &type = get<SPIRType>(result_type);
 
-		string constructor_op;
-		if (backend.use_initializer_list && composite)
-		{
-			// Only use this path if we are building composites.
-			// This path cannot be used for arithmetic.
-			if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
-				constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
-			constructor_op += "{ ";
-			if (type_is_empty(out_type) && !backend.supports_empty_struct)
-				constructor_op += "0";
-			else if (splat)
-				constructor_op += to_expression(elems[0]);
-			else
-				constructor_op += build_composite_combiner(result_type, elems, length);
-			constructor_op += " }";
-		}
-		else if (swizzle_splat && !composite)
-		{
-			constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_expression(elems[0]));
-		}
+		if (type.vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
 		else
-		{
-			constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
-			if (type_is_empty(out_type) && !backend.supports_empty_struct)
-				constructor_op += "0";
-			else if (splat)
-				constructor_op += to_expression(elems[0]);
-			else
-				constructor_op += build_composite_combiner(result_type, elems, length);
-			constructor_op += ")";
-		}
+			GLSL_BOP(&&);
+		break;
+	}
 
-		if (!constructor_op.empty())
-		{
-			emit_op(result_type, id, constructor_op, forward);
-			for (uint32_t i = 0; i < length; i++)
-				inherit_expression_dependencies(id, elems[i]);
-		}
+	case OpLogicalNot:
+	{
+		auto &type = get<SPIRType>(ops[0]);
+		if (type.vecsize > 1)
+			GLSL_UFOP(not );
+		else
+			GLSL_UOP(!);
 		break;
 	}
 
-	case OpVectorInsertDynamic:
+	case OpIEqual:
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		uint32_t vec = ops[2];
-		uint32_t comp = ops[3];
-		uint32_t index = ops[4];
+		if (expression_type(ops[2]).vecsize > 1)
+			GLSL_BFOP_CAST(equal, int_type);
+		else
+			GLSL_BOP_CAST(==, int_type);
+		break;
+	}
 
-		flush_variable_declaration(vec);
+	case OpLogicalEqual:
+	case OpFOrdEqual:
+	{
+		if (expression_type(ops[2]).vecsize > 1)
+			GLSL_BFOP(equal);
+		else
+			GLSL_BOP(==);
+		break;
+	}
 
-		// Make a copy, then use access chain to store the variable.
-		statement(declare_temporary(result_type, id), to_expression(vec), ";");
-		set<SPIRExpression>(id, to_name(id), result_type, true);
-		auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
-		statement(chain, " = ", to_expression(comp), ";");
+	case OpINotEqual:
+	{
+		if (expression_type(ops[2]).vecsize > 1)
+			GLSL_BFOP_CAST(notEqual, int_type);
+		else
+			GLSL_BOP_CAST(!=, int_type);
 		break;
 	}
 
-	case OpVectorExtractDynamic:
+	case OpLogicalNotEqual:
+	case OpFOrdNotEqual:
+	case OpFUnordNotEqual:
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
+		// GLSL is fuzzy on what to do with ordered vs unordered not equal.
+		// glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE,
+		// but this means we have no easy way of implementing ordered not equal.
+		if (expression_type(ops[2]).vecsize > 1)
+			GLSL_BFOP(notEqual);
+		else
+			GLSL_BOP(!=);
+		break;
+	}
 
-		auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
-		emit_op(result_type, id, expr, should_forward(ops[2]));
-		inherit_expression_dependencies(id, ops[2]);
-		inherit_expression_dependencies(id, ops[3]);
+	case OpUGreaterThan:
+	case OpSGreaterThan:
+	{
+		auto type = opcode == OpUGreaterThan ? uint_type : int_type;
+		if (expression_type(ops[2]).vecsize > 1)
+			GLSL_BFOP_CAST(greaterThan, type);
+		else
+			GLSL_BOP_CAST(>, type);
 		break;
 	}
 
-	case OpCompositeExtract:
+	case OpFOrdGreaterThan:
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		length -= 3;
+		if (expression_type(ops[2]).vecsize > 1)
+			GLSL_BFOP(greaterThan);
+		else
+			GLSL_BOP(>);
+		break;
+	}
 
-		auto &type = get<SPIRType>(result_type);
+	case OpUGreaterThanEqual:
+	case OpSGreaterThanEqual:
+	{
+		auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
+		if (expression_type(ops[2]).vecsize > 1)
+			GLSL_BFOP_CAST(greaterThanEqual, type);
+		else
+			GLSL_BOP_CAST(>=, type);
+		break;
+	}
 
-		// We can only split the expression here if our expression is forwarded as a temporary.
-		bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
+	case OpFOrdGreaterThanEqual:
+	{
+		if (expression_type(ops[2]).vecsize > 1)
+			GLSL_BFOP(greaterThanEqual);
+		else
+			GLSL_BOP(>=);
+		break;
+	}
 
-		// Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
-		auto &composite_type = expression_type(ops[2]);
-		if (composite_type.basetype == SPIRType::Struct || !composite_type.array.empty())
-			allow_base_expression = false;
+	case OpULessThan:
+	case OpSLessThan:
+	{
+		auto type = opcode == OpULessThan ? uint_type : int_type;
+		if (expression_type(ops[2]).vecsize > 1)
+			GLSL_BFOP_CAST(lessThan, type);
+		else
+			GLSL_BOP_CAST(<, type);
+		break;
+	}
 
-		// Packed expressions cannot be split up.
-		if (has_extended_decoration(ops[2], SPIRVCrossDecorationPacked))
-			allow_base_expression = false;
+	case OpFOrdLessThan:
+	{
+		if (expression_type(ops[2]).vecsize > 1)
+			GLSL_BFOP(lessThan);
+		else
+			GLSL_BOP(<);
+		break;
+	}
 
-		AccessChainMeta meta;
-		SPIRExpression *e = nullptr;
+	case OpULessThanEqual:
+	case OpSLessThanEqual:
+	{
+		auto type = opcode == OpULessThanEqual ? uint_type : int_type;
+		if (expression_type(ops[2]).vecsize > 1)
+			GLSL_BFOP_CAST(lessThanEqual, type);
+		else
+			GLSL_BOP_CAST(<=, type);
+		break;
+	}
 
-		// Only apply this optimization if result is scalar.
-		if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
-		{
-			// We want to split the access chain from the base.
-			// This is so we can later combine different CompositeExtract results
-			// with CompositeConstruct without emitting code like
-			//
-			// vec3 temp = texture(...).xyz
-			// vec4(temp.x, temp.y, temp.z, 1.0).
-			//
-			// when we actually wanted to emit this
-			// vec4(texture(...).xyz, 1.0).
-			//
-			// Including the base will prevent this and would trigger multiple reads
-			// from expression causing it to be forced to an actual temporary in GLSL.
-			auto expr = access_chain_internal(ops[2], &ops[3], length,
-			                                  ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta);
-			e = &emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2]));
-			inherit_expression_dependencies(id, ops[2]);
-			e->base_expression = ops[2];
-		}
+	case OpFOrdLessThanEqual:
+	{
+		if (expression_type(ops[2]).vecsize > 1)
+			GLSL_BFOP(lessThanEqual);
 		else
-		{
-			auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
-			e = &emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2]));
-			inherit_expression_dependencies(id, ops[2]);
-		}
+			GLSL_BOP(<=);
+		break;
+	}
 
-		// Pass through some meta information to the loaded expression.
-		// We can still end up loading a buffer type to a variable, then CompositeExtract from it
-		// instead of loading everything through an access chain.
-		e->need_transpose = meta.need_transpose;
-		if (meta.storage_is_packed)
-			set_extended_decoration(id, SPIRVCrossDecorationPacked);
-		if (meta.storage_packed_type != 0)
-			set_extended_decoration(id, SPIRVCrossDecorationPackedType, meta.storage_packed_type);
-		if (meta.storage_is_invariant)
-			set_decoration(id, DecorationInvariant);
+	// Conversion
+	case OpSConvert:
+	case OpConvertSToF:
+	case OpUConvert:
+	case OpConvertUToF:
+	{
+		auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+
+		auto &type = get<SPIRType>(result_type);
+		auto &arg_type = expression_type(ops[2]);
+		auto func = type_to_glsl_constructor(type);
 
+		if (arg_type.width < type.width || type_is_floating_point(type))
+			emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
+		else
+			emit_unary_func_op(result_type, id, ops[2], func.c_str());
 		break;
 	}
 
-	case OpCompositeInsert:
+	case OpConvertFToU:
+	case OpConvertFToS:
 	{
+		// Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
-		uint32_t obj = ops[2];
-		uint32_t composite = ops[3];
-		const auto *elems = &ops[4];
-		length -= 4;
+		auto &type = get<SPIRType>(result_type);
+		auto expected_type = type;
+		auto &float_type = expression_type(ops[2]);
+		expected_type.basetype =
+		    opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
 
-		flush_variable_declaration(composite);
+		auto func = type_to_glsl_constructor(expected_type);
+		emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
+		break;
+	}
 
-		// Make a copy, then use access chain to store the variable.
-		statement(declare_temporary(result_type, id), to_expression(composite), ";");
-		set<SPIRExpression>(id, to_name(id), result_type, true);
-		auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
-		statement(chain, " = ", to_expression(obj), ";");
+	case OpFConvert:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
 
+		auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
+		emit_unary_func_op(result_type, id, ops[2], func.c_str());
 		break;
 	}
 
-	case OpCopyMemory:
+	case OpBitcast:
 	{
-		uint32_t lhs = ops[0];
-		uint32_t rhs = ops[1];
-		if (lhs != rhs)
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t arg = ops[2];
+
+		if (!emit_complex_bitcast(result_type, id, arg))
 		{
-			flush_variable_declaration(lhs);
-			flush_variable_declaration(rhs);
-			statement(to_expression(lhs), " = ", to_expression(rhs), ";");
-			register_write(lhs);
+			auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
+			emit_unary_func_op(result_type, id, arg, op.c_str());
 		}
 		break;
 	}
 
-	case OpCopyObject:
+	case OpQuantizeToF16:
 	{
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
-		uint32_t rhs = ops[2];
-		bool pointer = get<SPIRType>(result_type).pointer;
+		uint32_t arg = ops[2];
 
-		auto *chain = maybe_get<SPIRAccessChain>(rhs);
-		if (chain)
+		string op;
+		auto &type = get<SPIRType>(result_type);
+
+		switch (type.vecsize)
 		{
-			// Cannot lower to a SPIRExpression, just copy the object.
-			auto &e = set<SPIRAccessChain>(id, *chain);
-			e.self = id;
-		}
-		else if (expression_is_lvalue(rhs) && !pointer)
+		case 1:
+			op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
+			break;
+		case 2:
+			op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
+			break;
+		case 3:
 		{
-			// Need a copy.
-			// For pointer types, we copy the pointer itself.
-			statement(declare_temporary(result_type, id), to_expression(rhs), ";");
-			set<SPIRExpression>(id, to_name(id), result_type, true);
-			inherit_expression_dependencies(id, rhs);
+			auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
+			auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
+			op = join("vec3(", op0, ", ", op1, ")");
+			break;
 		}
-		else
+		case 4:
 		{
-			// RHS expression is immutable, so just forward it.
-			// Copying these things really make no sense, but
-			// seems to be allowed anyways.
-			auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
-			if (pointer)
-			{
-				auto *var = maybe_get_backing_variable(rhs);
-				e.loaded_from = var ? var->self : 0;
-			}
-
-			// If we're copying an access chain, need to inherit the read expressions.
-			auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
-			if (rhs_expr)
-				e.implied_read_expressions = rhs_expr->implied_read_expressions;
+			auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
+			auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
+			op = join("vec4(", op0, ", ", op1, ")");
+			break;
 		}
+		default:
+			SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
+		}
+
+		emit_op(result_type, id, op, should_forward(arg));
+		inherit_expression_dependencies(id, arg);
 		break;
 	}
 
-	case OpVectorShuffle:
-	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		uint32_t vec0 = ops[2];
-		uint32_t vec1 = ops[3];
-		const auto *elems = &ops[4];
-		length -= 4;
-
-		auto &type0 = expression_type(vec0);
-
-		// If we have the undefined swizzle index -1, we need to swizzle in undefined data,
-		// or in our case, T(0).
-		bool shuffle = false;
-		for (uint32_t i = 0; i < length; i++)
-			if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
-				shuffle = true;
-
-		// Cannot use swizzles with packed expressions, force shuffle path.
-		if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPacked))
-			shuffle = true;
+	// Derivatives
+	case OpDPdx:
+		GLSL_UFOP(dFdx);
+		if (is_legacy_es())
+			require_extension_internal("GL_OES_standard_derivatives");
+		register_control_dependent_expression(ops[1]);
+		break;
 
-		string expr;
-		bool should_fwd, trivial_forward;
+	case OpDPdy:
+		GLSL_UFOP(dFdy);
+		if (is_legacy_es())
+			require_extension_internal("GL_OES_standard_derivatives");
+		register_control_dependent_expression(ops[1]);
+		break;
 
-		if (shuffle)
+	case OpDPdxFine:
+		GLSL_UFOP(dFdxFine);
+		if (options.es)
 		{
-			should_fwd = should_forward(vec0) && should_forward(vec1);
-			trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1);
-
-			// Constructor style and shuffling from two different vectors.
-			SmallVector<string> args;
-			for (uint32_t i = 0; i < length; i++)
-			{
-				if (elems[i] == 0xffffffffu)
-				{
-					// Use a constant 0 here.
-					// We could use the first component or similar, but then we risk propagating
-					// a value we might not need, and bog down codegen.
-					SPIRConstant c;
-					c.constant_type = type0.parent_type;
-					assert(type0.parent_type != 0);
-					args.push_back(constant_expression(c));
-				}
-				else if (elems[i] >= type0.vecsize)
-					args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
-				else
-					args.push_back(to_extract_component_expression(vec0, elems[i]));
-			}
-			expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
+			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
 		}
-		else
-		{
-			should_fwd = should_forward(vec0);
-			trivial_forward = !expression_is_forwarded(vec0);
-
-			// We only source from first vector, so can use swizzle.
-			// If the vector is packed, unpack it before applying a swizzle (needed for MSL)
-			expr += to_enclosed_unpacked_expression(vec0);
-			expr += ".";
-			for (uint32_t i = 0; i < length; i++)
-			{
-				assert(elems[i] != 0xffffffffu);
-				expr += index_to_swizzle(elems[i]);
-			}
+		if (options.version < 450)
+			require_extension_internal("GL_ARB_derivative_control");
+		register_control_dependent_expression(ops[1]);
+		break;
 
-			if (backend.swizzle_is_function && length > 1)
-				expr += "()";
+	case OpDPdyFine:
+		GLSL_UFOP(dFdyFine);
+		if (options.es)
+		{
+			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
 		}
-
-		// A shuffle is trivial in that it doesn't actually *do* anything.
-		// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
-
-		emit_op(result_type, id, expr, should_fwd, trivial_forward);
-		inherit_expression_dependencies(id, vec0);
-		inherit_expression_dependencies(id, vec1);
+		if (options.version < 450)
+			require_extension_internal("GL_ARB_derivative_control");
+		register_control_dependent_expression(ops[1]);
 		break;
-	}
 
-	// ALU
-	case OpIsNan:
-		GLSL_UFOP(isnan);
+	case OpDPdxCoarse:
+		if (options.es)
+		{
+			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+		}
+		GLSL_UFOP(dFdxCoarse);
+		if (options.version < 450)
+			require_extension_internal("GL_ARB_derivative_control");
+		register_control_dependent_expression(ops[1]);
 		break;
 
-	case OpIsInf:
-		GLSL_UFOP(isinf);
+	case OpDPdyCoarse:
+		GLSL_UFOP(dFdyCoarse);
+		if (options.es)
+		{
+			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+		}
+		if (options.version < 450)
+			require_extension_internal("GL_ARB_derivative_control");
+		register_control_dependent_expression(ops[1]);
 		break;
 
-	case OpSNegate:
-	case OpFNegate:
-		GLSL_UOP(-);
+	case OpFwidth:
+		GLSL_UFOP(fwidth);
+		if (is_legacy_es())
+			require_extension_internal("GL_OES_standard_derivatives");
+		register_control_dependent_expression(ops[1]);
 		break;
 
-	case OpIAdd:
-	{
-		// For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
-		auto type = get<SPIRType>(ops[0]).basetype;
-		GLSL_BOP_CAST(+, type);
+	case OpFwidthCoarse:
+		GLSL_UFOP(fwidthCoarse);
+		if (options.es)
+		{
+			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+		}
+		if (options.version < 450)
+			require_extension_internal("GL_ARB_derivative_control");
+		register_control_dependent_expression(ops[1]);
 		break;
-	}
 
-	case OpFAdd:
-		GLSL_BOP(+);
+	case OpFwidthFine:
+		GLSL_UFOP(fwidthFine);
+		if (options.es)
+		{
+			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+		}
+		if (options.version < 450)
+			require_extension_internal("GL_ARB_derivative_control");
+		register_control_dependent_expression(ops[1]);
 		break;
 
-	case OpISub:
+	// Bitfield
+	case OpBitFieldInsert:
 	{
-		auto type = get<SPIRType>(ops[0]).basetype;
-		GLSL_BOP_CAST(-, type);
+		emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
 		break;
 	}
 
-	case OpFSub:
-		GLSL_BOP(-);
-		break;
-
-	case OpIMul:
+	case OpBitFieldSExtract:
 	{
-		auto type = get<SPIRType>(ops[0]).basetype;
-		GLSL_BOP_CAST(*, type);
+		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
+		                                SPIRType::Int, SPIRType::Int);
 		break;
 	}
 
-	case OpVectorTimesMatrix:
-	case OpMatrixTimesVector:
+	case OpBitFieldUExtract:
 	{
-		// If the matrix needs transpose, just flip the multiply order.
-		auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
-		if (e && e->need_transpose)
-		{
-			e->need_transpose = false;
-			emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
-			e->need_transpose = true;
-		}
-		else
-			GLSL_BOP(*);
+		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
+		                                SPIRType::Int, SPIRType::Int);
 		break;
 	}
 
-	case OpFMul:
-	case OpMatrixTimesScalar:
-	case OpVectorTimesScalar:
-	case OpMatrixTimesMatrix:
-		GLSL_BOP(*);
+	case OpBitReverse:
+		// BitReverse does not have issues with sign since result type must match input type.
+		GLSL_UFOP(bitfieldReverse);
 		break;
 
-	case OpOuterProduct:
-		GLSL_BFOP(outerProduct);
+	case OpBitCount:
+	{
+		auto basetype = expression_type(ops[2]).basetype;
+		emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
 		break;
+	}
 
-	case OpDot:
-		GLSL_BFOP(dot);
-		break;
+	// Atomics
+	case OpAtomicExchange:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t ptr = ops[2];
+		// Ignore semantics for now, probably only relevant to CL.
+		uint32_t val = ops[5];
+		const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
 
-	case OpTranspose:
-		GLSL_UFOP(transpose);
+		emit_atomic_func_op(result_type, id, ptr, val, op);
 		break;
+	}
 
-	case OpSRem:
+	case OpAtomicCompareExchange:
 	{
 		uint32_t result_type = ops[0];
-		uint32_t result_id = ops[1];
-		uint32_t op0 = ops[2];
-		uint32_t op1 = ops[3];
-
-		// Needs special handling.
-		bool forward = should_forward(op0) && should_forward(op1);
-		auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
-		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
+		uint32_t id = ops[1];
+		uint32_t ptr = ops[2];
+		uint32_t val = ops[6];
+		uint32_t comp = ops[7];
+		const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
 
-		emit_op(result_type, result_id, expr, forward);
-		inherit_expression_dependencies(result_id, op0);
-		inherit_expression_dependencies(result_id, op1);
+		emit_atomic_func_op(result_type, id, ptr, comp, val, op);
 		break;
 	}
 
-	case OpSDiv:
-		GLSL_BOP_CAST(/, int_type);
-		break;
-
-	case OpUDiv:
-		GLSL_BOP_CAST(/, uint_type);
+	case OpAtomicLoad:
+	{
+		// In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
+		// Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
+		auto &type = expression_type(ops[2]);
+		forced_temporaries.insert(ops[1]);
+		bool atomic_image = check_atomic_image(ops[2]);
+		bool unsigned_type = (type.basetype == SPIRType::UInt) ||
+		                     (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
+		const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
+		const char *increment = unsigned_type ? "0u" : "0";
+		emit_op(ops[0], ops[1],
+		        join(op, "(",
+		             to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
+		flush_all_atomic_capable_variables();
 		break;
+	}
 
-	case OpIAddCarry:
-	case OpISubBorrow:
+	case OpAtomicStore:
 	{
-		if (options.es && options.version < 310)
-			SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
-		else if (!options.es && options.version < 400)
-			SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
-
-		uint32_t result_type = ops[0];
-		uint32_t result_id = ops[1];
-		uint32_t op0 = ops[2];
-		uint32_t op1 = ops[3];
-		auto &type = get<SPIRType>(result_type);
-		emit_uninitialized_temporary_expression(result_type, result_id);
-		const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
-
-		statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
-		          to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
+		// In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
+		// Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
+		uint32_t ptr = ops[0];
+		// Ignore semantics for now, probably only relevant to CL.
+		uint32_t val = ops[3];
+		const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
+		statement(op, "(", to_non_uniform_aware_expression(ptr), ", ", to_expression(val), ");");
+		flush_all_atomic_capable_variables();
 		break;
 	}
 
-	case OpUMulExtended:
-	case OpSMulExtended:
+	case OpAtomicIIncrement:
+	case OpAtomicIDecrement:
 	{
-		if (options.es && options.version < 310)
-			SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
-		else if (!options.es && options.version < 400)
-			SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
+		forced_temporaries.insert(ops[1]);
+		auto &type = expression_type(ops[2]);
+		if (type.storage == StorageClassAtomicCounter)
+		{
+			// Legacy GLSL stuff, not sure if this is relevant to support.
+			if (opcode == OpAtomicIIncrement)
+				GLSL_UFOP(atomicCounterIncrement);
+			else
+				GLSL_UFOP(atomicCounterDecrement);
+		}
+		else
+		{
+			bool atomic_image = check_atomic_image(ops[2]);
+			bool unsigned_type = (type.basetype == SPIRType::UInt) ||
+			                     (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
+			const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
 
-		uint32_t result_type = ops[0];
-		uint32_t result_id = ops[1];
-		uint32_t op0 = ops[2];
-		uint32_t op1 = ops[3];
-		forced_temporaries.insert(result_id);
-		auto &type = get<SPIRType>(result_type);
-		emit_uninitialized_temporary_expression(result_type, result_id);
-		const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
+			const char *increment = nullptr;
+			if (opcode == OpAtomicIIncrement && unsigned_type)
+				increment = "1u";
+			else if (opcode == OpAtomicIIncrement)
+				increment = "1";
+			else if (unsigned_type)
+				increment = "uint(-1)";
+			else
+				increment = "-1";
 
-		statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
-		          to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
+			emit_op(ops[0], ops[1],
+			        join(op, "(", to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
+		}
+
+		flush_all_atomic_capable_variables();
 		break;
 	}
 
-	case OpFDiv:
-		GLSL_BOP(/);
+	case OpAtomicIAdd:
+	case OpAtomicFAddEXT:
+	{
+		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
+		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
 		break;
+	}
 
-	case OpShiftRightLogical:
-		GLSL_BOP_CAST(>>, uint_type);
+	case OpAtomicISub:
+	{
+		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
+		forced_temporaries.insert(ops[1]);
+		auto expr = join(op, "(", to_non_uniform_aware_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
+		emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
+		flush_all_atomic_capable_variables();
 		break;
+	}
 
-	case OpShiftRightArithmetic:
-		GLSL_BOP_CAST(>>, int_type);
+	case OpAtomicSMin:
+	case OpAtomicUMin:
+	{
+		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
+		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
 		break;
+	}
 
-	case OpShiftLeftLogical:
+	case OpAtomicSMax:
+	case OpAtomicUMax:
 	{
-		auto type = get<SPIRType>(ops[0]).basetype;
-		GLSL_BOP_CAST(<<, type);
+		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
+		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
 		break;
 	}
 
-	case OpBitwiseOr:
+	case OpAtomicAnd:
 	{
-		auto type = get<SPIRType>(ops[0]).basetype;
-		GLSL_BOP_CAST(|, type);
+		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
+		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
 		break;
 	}
 
-	case OpBitwiseXor:
+	case OpAtomicOr:
 	{
-		auto type = get<SPIRType>(ops[0]).basetype;
-		GLSL_BOP_CAST(^, type);
+		const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
+		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
 		break;
 	}
 
-	case OpBitwiseAnd:
+	case OpAtomicXor:
 	{
-		auto type = get<SPIRType>(ops[0]).basetype;
-		GLSL_BOP_CAST(&, type);
+		const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
+		emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
 		break;
 	}
 
-	case OpNot:
-		GLSL_UOP(~);
+	// Geometry shaders
+	case OpEmitVertex:
+		statement("EmitVertex();");
 		break;
 
-	case OpUMod:
-		GLSL_BOP_CAST(%, uint_type);
+	case OpEndPrimitive:
+		statement("EndPrimitive();");
 		break;
 
-	case OpSMod:
-		GLSL_BOP_CAST(%, int_type);
-		break;
+	case OpEmitStreamVertex:
+	{
+		if (options.es)
+			SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
+		else if (!options.es && options.version < 400)
+			SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
 
-	case OpFMod:
-		GLSL_BFOP(mod);
+		auto stream_expr = to_expression(ops[0]);
+		if (expression_type(ops[0]).basetype != SPIRType::Int)
+			stream_expr = join("int(", stream_expr, ")");
+		statement("EmitStreamVertex(", stream_expr, ");");
 		break;
+	}
 
-	case OpFRem:
+	case OpEndStreamPrimitive:
 	{
-		if (is_legacy())
-			SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
-			                  "needed for legacy.");
-
-		uint32_t result_type = ops[0];
-		uint32_t result_id = ops[1];
-		uint32_t op0 = ops[2];
-		uint32_t op1 = ops[3];
-
-		// Needs special handling.
-		bool forward = should_forward(op0) && should_forward(op1);
-		auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
-		                 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
+		if (options.es)
+			SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
+		else if (!options.es && options.version < 400)
+			SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
 
-		emit_op(result_type, result_id, expr, forward);
-		inherit_expression_dependencies(result_id, op0);
-		inherit_expression_dependencies(result_id, op1);
+		auto stream_expr = to_expression(ops[0]);
+		if (expression_type(ops[0]).basetype != SPIRType::Int)
+			stream_expr = join("int(", stream_expr, ")");
+		statement("EndStreamPrimitive(", stream_expr, ");");
 		break;
 	}
 
-	// Relational
-	case OpAny:
-		GLSL_UFOP(any);
+	// Textures
+	case OpImageSampleExplicitLod:
+	case OpImageSampleProjExplicitLod:
+	case OpImageSampleDrefExplicitLod:
+	case OpImageSampleProjDrefExplicitLod:
+	case OpImageSampleImplicitLod:
+	case OpImageSampleProjImplicitLod:
+	case OpImageSampleDrefImplicitLod:
+	case OpImageSampleProjDrefImplicitLod:
+	case OpImageFetch:
+	case OpImageGather:
+	case OpImageDrefGather:
+		// Gets a bit hairy, so move this to a separate instruction.
+		emit_texture_op(instruction, false);
 		break;
 
-	case OpAll:
-		GLSL_UFOP(all);
+	case OpImageSparseSampleExplicitLod:
+	case OpImageSparseSampleProjExplicitLod:
+	case OpImageSparseSampleDrefExplicitLod:
+	case OpImageSparseSampleProjDrefExplicitLod:
+	case OpImageSparseSampleImplicitLod:
+	case OpImageSparseSampleProjImplicitLod:
+	case OpImageSparseSampleDrefImplicitLod:
+	case OpImageSparseSampleProjDrefImplicitLod:
+	case OpImageSparseFetch:
+	case OpImageSparseGather:
+	case OpImageSparseDrefGather:
+		// Gets a bit hairy, so move this to a separate instruction.
+		emit_texture_op(instruction, true);
 		break;
 
-	case OpSelect:
-		emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
+	case OpImageSparseTexelsResident:
+		if (options.es)
+			SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
+		require_extension_internal("GL_ARB_sparse_texture2");
+		emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean);
 		break;
 
-	case OpLogicalOr:
+	case OpImage:
 	{
-		// No vector variant in GLSL for logical OR.
-		auto result_type = ops[0];
-		auto id = ops[1];
-		auto &type = get<SPIRType>(result_type);
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
 
-		if (type.vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||");
-		else
-			GLSL_BOP(||);
+		// Suppress usage tracking.
+		auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
+
+		// When using the image, we need to know which variable it is actually loaded from.
+		auto *var = maybe_get_backing_variable(ops[2]);
+		e.loaded_from = var ? var->self : ID(0);
 		break;
 	}
 
-	case OpLogicalAnd:
+	case OpImageQueryLod:
 	{
-		// No vector variant in GLSL for logical AND.
-		auto result_type = ops[0];
-		auto id = ops[1];
-		auto &type = get<SPIRType>(result_type);
-
-		if (type.vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&");
+		const char *op = nullptr;
+		if (!options.es && options.version < 400)
+		{
+			require_extension_internal("GL_ARB_texture_query_lod");
+			// For some reason, the ARB spec is all-caps.
+			op = "textureQueryLOD";
+		}
+		else if (options.es)
+			SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
 		else
-			GLSL_BOP(&&);
+			op = "textureQueryLod";
+
+		auto sampler_expr = to_expression(ops[2]);
+		if (has_decoration(ops[2], DecorationNonUniform))
+		{
+			if (maybe_get_backing_variable(ops[2]))
+				convert_non_uniform_expression(sampler_expr, ops[2]);
+			else if (*backend.nonuniform_qualifier != '\0')
+				sampler_expr = join(backend.nonuniform_qualifier, "(", sampler_expr, ")");
+		}
+
+		bool forward = should_forward(ops[3]);
+		emit_op(ops[0], ops[1],
+		        join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"),
+		        forward);
+		inherit_expression_dependencies(ops[1], ops[2]);
+		inherit_expression_dependencies(ops[1], ops[3]);
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
-	case OpLogicalNot:
+	case OpImageQueryLevels:
 	{
-		auto &type = get<SPIRType>(ops[0]);
-		if (type.vecsize > 1)
-			GLSL_UFOP(not);
-		else
-			GLSL_UOP(!);
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+
+		if (!options.es && options.version < 430)
+			require_extension_internal("GL_ARB_texture_query_levels");
+		if (options.es)
+			SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
+
+		auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
+		auto &restype = get<SPIRType>(ops[0]);
+		expr = bitcast_expression(restype, SPIRType::Int, expr);
+		emit_op(result_type, id, expr, true);
 		break;
 	}
 
-	case OpIEqual:
+	case OpImageQuerySamples:
 	{
-		if (expression_type(ops[2]).vecsize > 1)
-			GLSL_BFOP_CAST(equal, int_type);
+		auto &type = expression_type(ops[2]);
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+
+		string expr;
+		if (type.image.sampled == 2)
+			expr = join("imageSamples(", to_non_uniform_aware_expression(ops[2]), ")");
 		else
-			GLSL_BOP_CAST(==, int_type);
+			expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
+
+		auto &restype = get<SPIRType>(ops[0]);
+		expr = bitcast_expression(restype, SPIRType::Int, expr);
+		emit_op(result_type, id, expr, true);
 		break;
 	}
 
-	case OpLogicalEqual:
-	case OpFOrdEqual:
+	case OpSampledImage:
 	{
-		if (expression_type(ops[2]).vecsize > 1)
-			GLSL_BFOP(equal);
-		else
-			GLSL_BOP(==);
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_sampled_image_op(result_type, id, ops[2], ops[3]);
+		inherit_expression_dependencies(id, ops[2]);
+		inherit_expression_dependencies(id, ops[3]);
 		break;
 	}
 
-	case OpINotEqual:
+	case OpImageQuerySizeLod:
 	{
-		if (expression_type(ops[2]).vecsize > 1)
-			GLSL_BFOP_CAST(notEqual, int_type);
-		else
-			GLSL_BOP_CAST(!=, int_type);
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t img = ops[2];
+		auto &type = expression_type(img);
+		auto &imgtype = get<SPIRType>(type.self);
+
+		std::string fname = "textureSize";
+		if (is_legacy_desktop())
+		{
+			fname = legacy_tex_op(fname, imgtype, img);
+		}
+		else if (is_legacy_es())
+			SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
+
+		auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ",
+		                 bitcast_expression(SPIRType::Int, ops[3]), ")");
+
+		// ES needs to emulate 1D images as 2D.
+		if (type.image.dim == Dim1D && options.es)
+			expr = join(expr, ".x");
+
+		auto &restype = get<SPIRType>(ops[0]);
+		expr = bitcast_expression(restype, SPIRType::Int, expr);
+		emit_op(result_type, id, expr, true);
 		break;
 	}
 
-	case OpLogicalNotEqual:
-	case OpFOrdNotEqual:
+	// Image load/store
+	case OpImageRead:
+	case OpImageSparseRead:
 	{
-		if (expression_type(ops[2]).vecsize > 1)
-			GLSL_BFOP(notEqual);
+		// We added Nonreadable speculatively to the OpImage variable due to glslangValidator
+		// not adding the proper qualifiers.
+		// If it turns out we need to read the image after all, remove the qualifier and recompile.
+		auto *var = maybe_get_backing_variable(ops[2]);
+		if (var)
+		{
+			auto &flags = get_decoration_bitset(var->self);
+			if (flags.get(DecorationNonReadable))
+			{
+				unset_decoration(var->self, DecorationNonReadable);
+				force_recompile();
+			}
+		}
+
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+
+		bool pure;
+		string imgexpr;
+		auto &type = expression_type(ops[2]);
+
+		if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
+		{
+			if (type.image.ms)
+				SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
+
+			auto itr =
+			    find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
+
+			if (itr == end(pls_inputs))
+			{
+				// For non-PLS inputs, we rely on subpass type remapping information to get it right
+				// since ImageRead always returns 4-component vectors and the backing type is opaque.
+				if (!var->remapped_components)
+					SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
+				imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
+			}
+			else
+			{
+				// PLS input could have different number of components than what the SPIR expects, swizzle to
+				// the appropriate vector size.
+				uint32_t components = pls_format_to_components(itr->format);
+				imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
+			}
+			pure = true;
+		}
+		else if (type.image.dim == DimSubpassData)
+		{
+			if (var && subpass_input_is_framebuffer_fetch(var->self))
+			{
+				imgexpr = to_expression(var->self);
+			}
+			else if (options.vulkan_semantics)
+			{
+				// With Vulkan semantics, use the proper Vulkan GLSL construct.
+				if (type.image.ms)
+				{
+					uint32_t operands = ops[4];
+					if (operands != ImageOperandsSampleMask || length != 6)
+						SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
+						                  "operand mask was used.");
+
+					uint32_t samples = ops[5];
+					imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", to_expression(samples), ")");
+				}
+				else
+					imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ")");
+			}
+			else
+			{
+				if (type.image.ms)
+				{
+					uint32_t operands = ops[4];
+					if (operands != ImageOperandsSampleMask || length != 6)
+						SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
+						                  "operand mask was used.");
+
+					uint32_t samples = ops[5];
+					imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
+					               to_expression(samples), ")");
+				}
+				else
+				{
+					// Implement subpass loads via texture barrier style sampling.
+					imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
+				}
+			}
+			imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
+			pure = true;
+		}
 		else
-			GLSL_BOP(!=);
-		break;
-	}
+		{
+			bool sparse = opcode == OpImageSparseRead;
+			uint32_t sparse_code_id = 0;
+			uint32_t sparse_texel_id = 0;
+			if (sparse)
+				emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id);
 
-	case OpUGreaterThan:
-	case OpSGreaterThan:
-	{
-		auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
-		if (expression_type(ops[2]).vecsize > 1)
-			GLSL_BFOP_CAST(greaterThan, type);
+			// imageLoad only accepts int coords, not uint.
+			auto coord_expr = to_expression(ops[3]);
+			auto target_coord_type = expression_type(ops[3]);
+			target_coord_type.basetype = SPIRType::Int;
+			coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
+
+			// ES needs to emulate 1D images as 2D.
+			if (type.image.dim == Dim1D && options.es)
+				coord_expr = join("ivec2(", coord_expr, ", 0)");
+
+			// Plain image load/store.
+			if (sparse)
+			{
+				if (type.image.ms)
+				{
+					uint32_t operands = ops[4];
+					if (operands != ImageOperandsSampleMask || length != 6)
+						SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
+						                  "operand mask was used.");
+
+					uint32_t samples = ops[5];
+					statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
+					          coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");");
+				}
+				else
+				{
+					statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
+					          coord_expr, ", ", to_expression(sparse_texel_id), ");");
+				}
+				imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ",
+				               to_expression(sparse_texel_id), ")");
+			}
+			else
+			{
+				if (type.image.ms)
+				{
+					uint32_t operands = ops[4];
+					if (operands != ImageOperandsSampleMask || length != 6)
+						SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
+						                  "operand mask was used.");
+
+					uint32_t samples = ops[5];
+					imgexpr =
+					    join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
+				}
+				else
+					imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ")");
+			}
+
+			if (!sparse)
+				imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
+			pure = false;
+		}
+
+		if (var)
+		{
+			bool forward = forced_temporaries.find(id) == end(forced_temporaries);
+			auto &e = emit_op(result_type, id, imgexpr, forward);
+
+			// We only need to track dependencies if we're reading from image load/store.
+			if (!pure)
+			{
+				e.loaded_from = var->self;
+				if (forward)
+					var->dependees.push_back(id);
+			}
+		}
 		else
-			GLSL_BOP_CAST(>, type);
-		break;
-	}
+			emit_op(result_type, id, imgexpr, false);
 
-	case OpFOrdGreaterThan:
-	{
-		if (expression_type(ops[2]).vecsize > 1)
-			GLSL_BFOP(greaterThan);
-		else
-			GLSL_BOP(>);
+		inherit_expression_dependencies(id, ops[2]);
+		if (type.image.ms)
+			inherit_expression_dependencies(id, ops[5]);
 		break;
 	}
 
-	case OpUGreaterThanEqual:
-	case OpSGreaterThanEqual:
+	case OpImageTexelPointer:
 	{
-		auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
-		if (expression_type(ops[2]).vecsize > 1)
-			GLSL_BFOP_CAST(greaterThanEqual, type);
-		else
-			GLSL_BOP_CAST(>=, type);
-		break;
-	}
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
 
-	case OpFOrdGreaterThanEqual:
-	{
-		if (expression_type(ops[2]).vecsize > 1)
-			GLSL_BFOP(greaterThanEqual);
-		else
-			GLSL_BOP(>=);
-		break;
-	}
+		auto coord_expr = to_expression(ops[3]);
+		auto target_coord_type = expression_type(ops[3]);
+		target_coord_type.basetype = SPIRType::Int;
+		coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
 
-	case OpULessThan:
-	case OpSLessThan:
-	{
-		auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
-		if (expression_type(ops[2]).vecsize > 1)
-			GLSL_BFOP_CAST(lessThan, type);
-		else
-			GLSL_BOP_CAST(<, type);
-		break;
-	}
+		auto expr = join(to_expression(ops[2]), ", ", coord_expr);
+		auto &e = set<SPIRExpression>(id, expr, result_type, true);
 
-	case OpFOrdLessThan:
-	{
-		if (expression_type(ops[2]).vecsize > 1)
-			GLSL_BFOP(lessThan);
-		else
-			GLSL_BOP(<);
+		// When using the pointer, we need to know which variable it is actually loaded from.
+		auto *var = maybe_get_backing_variable(ops[2]);
+		e.loaded_from = var ? var->self : ID(0);
+		inherit_expression_dependencies(id, ops[3]);
 		break;
 	}
 
-	case OpULessThanEqual:
-	case OpSLessThanEqual:
+	case OpImageWrite:
 	{
-		auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
-		if (expression_type(ops[2]).vecsize > 1)
-			GLSL_BFOP_CAST(lessThanEqual, type);
-		else
-			GLSL_BOP_CAST(<=, type);
-		break;
-	}
+		// We added Nonwritable speculatively to the OpImage variable due to glslangValidator
+		// not adding the proper qualifiers.
+		// If it turns out we need to write to the image after all, remove the qualifier and recompile.
+		auto *var = maybe_get_backing_variable(ops[0]);
+		if (var)
+		{
+			if (has_decoration(var->self, DecorationNonWritable))
+			{
+				unset_decoration(var->self, DecorationNonWritable);
+				force_recompile();
+			}
+		}
 
-	case OpFOrdLessThanEqual:
-	{
-		if (expression_type(ops[2]).vecsize > 1)
-			GLSL_BFOP(lessThanEqual);
+		auto &type = expression_type(ops[0]);
+		auto &value_type = expression_type(ops[2]);
+		auto store_type = value_type;
+		store_type.vecsize = 4;
+
+		// imageStore only accepts int coords, not uint.
+		auto coord_expr = to_expression(ops[1]);
+		auto target_coord_type = expression_type(ops[1]);
+		target_coord_type.basetype = SPIRType::Int;
+		coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
+
+		// ES needs to emulate 1D images as 2D.
+		if (type.image.dim == Dim1D && options.es)
+			coord_expr = join("ivec2(", coord_expr, ", 0)");
+
+		if (type.image.ms)
+		{
+			uint32_t operands = ops[3];
+			if (operands != ImageOperandsSampleMask || length != 5)
+				SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
+			uint32_t samples = ops[4];
+			statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
+			          remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
+		}
 		else
-			GLSL_BOP(<=);
+			statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ",
+			          remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
+
+		if (var && variable_storage_is_aliased(*var))
+			flush_all_aliased_variables();
 		break;
 	}
 
-	// Conversion
-	case OpSConvert:
-	case OpConvertSToF:
-	case OpUConvert:
-	case OpConvertUToF:
+	case OpImageQuerySize:
 	{
-		auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
+		auto &type = expression_type(ops[2]);
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
 
-		auto &type = get<SPIRType>(result_type);
-		auto &arg_type = expression_type(ops[2]);
-		auto func = type_to_glsl_constructor(type);
+		if (type.basetype == SPIRType::Image)
+		{
+			string expr;
+			if (type.image.sampled == 2)
+			{
+				if (!options.es && options.version < 430)
+					require_extension_internal("GL_ARB_shader_image_size");
+				else if (options.es && options.version < 310)
+					SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
 
-		// If we're sign-extending or zero-extending, we need to make sure we cast from the correct type.
-		// For truncation, it does not matter, so don't emit useless casts.
-		if (arg_type.width < type.width)
-			emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
+				// The size of an image is always constant.
+				expr = join("imageSize(", to_non_uniform_aware_expression(ops[2]), ")");
+			}
+			else
+			{
+				// This path is hit for samplerBuffers and multisampled images which do not have LOD.
+				std::string fname = "textureSize";
+				if (is_legacy())
+				{
+					auto &imgtype = get<SPIRType>(type.self);
+					fname = legacy_tex_op(fname, imgtype, ops[2]);
+				}
+				expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")");
+			}
+
+			auto &restype = get<SPIRType>(ops[0]);
+			expr = bitcast_expression(restype, SPIRType::Int, expr);
+			emit_op(result_type, id, expr, true);
+		}
 		else
-			emit_unary_func_op(result_type, id, ops[2], func.c_str());
+			SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
 		break;
 	}
 
-	case OpConvertFToU:
-	case OpConvertFToS:
+	// Compute
+	case OpControlBarrier:
+	case OpMemoryBarrier:
 	{
-		// Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		auto &type = get<SPIRType>(result_type);
-		auto expected_type = type;
-		auto &float_type = expression_type(ops[2]);
-		expected_type.basetype =
-		    opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
+		uint32_t execution_scope = 0;
+		uint32_t memory;
+		uint32_t semantics;
 
-		auto func = type_to_glsl_constructor(expected_type);
-		emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
-		break;
-	}
+		if (opcode == OpMemoryBarrier)
+		{
+			memory = evaluate_constant_u32(ops[0]);
+			semantics = evaluate_constant_u32(ops[1]);
+		}
+		else
+		{
+			execution_scope = evaluate_constant_u32(ops[0]);
+			memory = evaluate_constant_u32(ops[1]);
+			semantics = evaluate_constant_u32(ops[2]);
+		}
 
-	case OpFConvert:
-	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
+		if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
+		{
+			// OpControlBarrier with ScopeSubgroup is subgroupBarrier()
+			if (opcode != OpControlBarrier)
+			{
+				request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier);
+			}
+			else
+			{
+				request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier);
+			}
+		}
 
-		auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
-		emit_unary_func_op(result_type, id, ops[2], func.c_str());
-		break;
-	}
+		if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
+		{
+			// Control shaders only have barriers, and it implies memory barriers.
+			if (opcode == OpControlBarrier)
+				statement("barrier();");
+			break;
+		}
+
+		// We only care about these flags, acquire/release and friends are not relevant to GLSL.
+		semantics = mask_relevant_memory_semantics(semantics);
+
+		if (opcode == OpMemoryBarrier)
+		{
+			// If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
+			// does what we need, so we avoid redundant barriers.
+			const Instruction *next = get_next_instruction_in_block(instruction);
+			if (next && next->op == OpControlBarrier)
+			{
+				auto *next_ops = stream(*next);
+				uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
+				uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
+				next_semantics = mask_relevant_memory_semantics(next_semantics);
+
+				bool memory_scope_covered = false;
+				if (next_memory == memory)
+					memory_scope_covered = true;
+				else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
+				{
+					// If we only care about workgroup memory, either Device or Workgroup scope is fine,
+					// scope does not have to match.
+					if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
+					    (memory == ScopeDevice || memory == ScopeWorkgroup))
+					{
+						memory_scope_covered = true;
+					}
+				}
+				else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
+				{
+					// The control barrier has device scope, but the memory barrier just has workgroup scope.
+					memory_scope_covered = true;
+				}
+
+				// If we have the same memory scope, and all memory types are covered, we're good.
+				if (memory_scope_covered && (semantics & next_semantics) == semantics)
+					break;
+			}
+		}
+
+		// We are synchronizing some memory or syncing execution,
+		// so we cannot forward any loads beyond the memory barrier.
+		if (semantics || opcode == OpControlBarrier)
+		{
+			assert(current_emitting_block);
+			flush_control_dependent_expressions(current_emitting_block->self);
+			flush_all_active_variables();
+		}
+
+		if (memory == ScopeWorkgroup) // Only need to consider memory within a group
+		{
+			if (semantics == MemorySemanticsWorkgroupMemoryMask)
+			{
+				// OpControlBarrier implies a memory barrier for shared memory as well.
+				bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
+				if (!implies_shared_barrier)
+					statement("memoryBarrierShared();");
+			}
+			else if (semantics != 0)
+				statement("groupMemoryBarrier();");
+		}
+		else if (memory == ScopeSubgroup)
+		{
+			const uint32_t all_barriers =
+			    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
+
+			if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
+			{
+				// These are not relevant for GLSL, but assume it means memoryBarrier().
+				// memoryBarrier() does everything, so no need to test anything else.
+				statement("subgroupMemoryBarrier();");
+			}
+			else if ((semantics & all_barriers) == all_barriers)
+			{
+				// Short-hand instead of emitting 3 barriers.
+				statement("subgroupMemoryBarrier();");
+			}
+			else
+			{
+				// Pick out individual barriers.
+				if (semantics & MemorySemanticsWorkgroupMemoryMask)
+					statement("subgroupMemoryBarrierShared();");
+				if (semantics & MemorySemanticsUniformMemoryMask)
+					statement("subgroupMemoryBarrierBuffer();");
+				if (semantics & MemorySemanticsImageMemoryMask)
+					statement("subgroupMemoryBarrierImage();");
+			}
+		}
+		else
+		{
+			const uint32_t all_barriers =
+			    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
 
-	case OpBitcast:
-	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		uint32_t arg = ops[2];
+			if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
+			{
+				// These are not relevant for GLSL, but assume it means memoryBarrier().
+				// memoryBarrier() does everything, so no need to test anything else.
+				statement("memoryBarrier();");
+			}
+			else if ((semantics & all_barriers) == all_barriers)
+			{
+				// Short-hand instead of emitting 4 barriers.
+				statement("memoryBarrier();");
+			}
+			else
+			{
+				// Pick out individual barriers.
+				if (semantics & MemorySemanticsWorkgroupMemoryMask)
+					statement("memoryBarrierShared();");
+				if (semantics & MemorySemanticsUniformMemoryMask)
+					statement("memoryBarrierBuffer();");
+				if (semantics & MemorySemanticsImageMemoryMask)
+					statement("memoryBarrierImage();");
+			}
+		}
 
-		auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
-		emit_unary_func_op(result_type, id, arg, op.c_str());
+		if (opcode == OpControlBarrier)
+		{
+			if (execution_scope == ScopeSubgroup)
+				statement("subgroupBarrier();");
+			else
+				statement("barrier();");
+		}
 		break;
 	}
 
-	case OpQuantizeToF16:
+	case OpExtInst:
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		uint32_t arg = ops[2];
-
-		string op;
-		auto &type = get<SPIRType>(result_type);
+		uint32_t extension_set = ops[2];
+		auto ext = get<SPIRExtension>(extension_set).ext;
 
-		switch (type.vecsize)
-		{
-		case 1:
-			op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
-			break;
-		case 2:
-			op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
-			break;
-		case 3:
+		if (ext == SPIRExtension::GLSL)
 		{
-			auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
-			auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
-			op = join("vec3(", op0, ", ", op1, ")");
-			break;
+			emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
 		}
-		case 4:
+		else if (ext == SPIRExtension::SPV_AMD_shader_ballot)
 		{
-			auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
-			auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
-			op = join("vec4(", op0, ", ", op1, ")");
-			break;
-		}
-		default:
-			SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
+			emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
 		}
-
-		emit_op(result_type, id, op, should_forward(arg));
-		inherit_expression_dependencies(id, arg);
-		break;
-	}
-
-	// Derivatives
-	case OpDPdx:
-		GLSL_UFOP(dFdx);
-		if (is_legacy_es())
-			require_extension_internal("GL_OES_standard_derivatives");
-		register_control_dependent_expression(ops[1]);
-		break;
-
-	case OpDPdy:
-		GLSL_UFOP(dFdy);
-		if (is_legacy_es())
-			require_extension_internal("GL_OES_standard_derivatives");
-		register_control_dependent_expression(ops[1]);
-		break;
-
-	case OpDPdxFine:
-		GLSL_UFOP(dFdxFine);
-		if (options.es)
+		else if (ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
 		{
-			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+			emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
 		}
-		if (options.version < 450)
-			require_extension_internal("GL_ARB_derivative_control");
-		register_control_dependent_expression(ops[1]);
-		break;
-
-	case OpDPdyFine:
-		GLSL_UFOP(dFdyFine);
-		if (options.es)
+		else if (ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
 		{
-			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+			emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
 		}
-		if (options.version < 450)
-			require_extension_internal("GL_ARB_derivative_control");
-		register_control_dependent_expression(ops[1]);
-		break;
-
-	case OpDPdxCoarse:
-		if (options.es)
+		else if (ext == SPIRExtension::SPV_AMD_gcn_shader)
 		{
-			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+			emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
 		}
-		GLSL_UFOP(dFdxCoarse);
-		if (options.version < 450)
-			require_extension_internal("GL_ARB_derivative_control");
-		register_control_dependent_expression(ops[1]);
-		break;
-
-	case OpDPdyCoarse:
-		GLSL_UFOP(dFdyCoarse);
-		if (options.es)
+		else if (ext == SPIRExtension::SPV_debug_info ||
+		         ext == SPIRExtension::NonSemanticShaderDebugInfo ||
+		         ext == SPIRExtension::NonSemanticGeneric)
 		{
-			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+			break; // Ignore SPIR-V debug information extended instructions.
 		}
-		if (options.version < 450)
-			require_extension_internal("GL_ARB_derivative_control");
-		register_control_dependent_expression(ops[1]);
-		break;
-
-	case OpFwidth:
-		GLSL_UFOP(fwidth);
-		if (is_legacy_es())
-			require_extension_internal("GL_OES_standard_derivatives");
-		register_control_dependent_expression(ops[1]);
-		break;
-
-	case OpFwidthCoarse:
-		GLSL_UFOP(fwidthCoarse);
-		if (options.es)
+		else if (ext == SPIRExtension::NonSemanticDebugPrintf)
 		{
-			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+			// Operation 1 is printf.
+			if (ops[3] == 1)
+			{
+				if (!options.vulkan_semantics)
+					SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n");
+				require_extension_internal("GL_EXT_debug_printf");
+				auto &format_string = get<SPIRString>(ops[4]).str;
+				string expr = join("debugPrintfEXT(\"", format_string, "\"");
+				for (uint32_t i = 5; i < length; i++)
+				{
+					expr += ", ";
+					expr += to_expression(ops[i]);
+				}
+				statement(expr, ");");
+			}
 		}
-		if (options.version < 450)
-			require_extension_internal("GL_ARB_derivative_control");
-		register_control_dependent_expression(ops[1]);
-		break;
-
-	case OpFwidthFine:
-		GLSL_UFOP(fwidthFine);
-		if (options.es)
+		else
 		{
-			SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
+			statement("// unimplemented ext op ", instruction.op);
+			break;
 		}
-		if (options.version < 450)
-			require_extension_internal("GL_ARB_derivative_control");
-		register_control_dependent_expression(ops[1]);
-		break;
-
-	// Bitfield
-	case OpBitFieldInsert:
-		// TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary.
-		GLSL_QFOP(bitfieldInsert);
-		break;
 
-	case OpBitFieldSExtract:
-	case OpBitFieldUExtract:
-		// TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary.
-		GLSL_TFOP(bitfieldExtract);
 		break;
+	}
 
-	case OpBitReverse:
-		GLSL_UFOP(bitfieldReverse);
-		break;
+	// Legacy sub-group stuff ...
+	case OpSubgroupBallotKHR:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		string expr;
+		expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
+		emit_op(result_type, id, expr, should_forward(ops[2]));
 
-	case OpBitCount:
-		GLSL_UFOP(bitCount);
+		require_extension_internal("GL_ARB_shader_ballot");
+		inherit_expression_dependencies(id, ops[2]);
+		register_control_dependent_expression(ops[1]);
 		break;
+	}
 
-	// Atomics
-	case OpAtomicExchange:
+	case OpSubgroupFirstInvocationKHR:
 	{
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
-		uint32_t ptr = ops[2];
-		// Ignore semantics for now, probably only relevant to CL.
-		uint32_t val = ops[5];
-		const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
-		forced_temporaries.insert(id);
-		emit_binary_func_op(result_type, id, ptr, val, op);
-		flush_all_atomic_capable_variables();
+		emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
+
+		require_extension_internal("GL_ARB_shader_ballot");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
-	case OpAtomicCompareExchange:
+	case OpSubgroupReadInvocationKHR:
 	{
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
-		uint32_t ptr = ops[2];
-		uint32_t val = ops[6];
-		uint32_t comp = ops[7];
-		const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
+		emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
 
-		forced_temporaries.insert(id);
-		emit_trinary_func_op(result_type, id, ptr, comp, val, op);
-		flush_all_atomic_capable_variables();
+		require_extension_internal("GL_ARB_shader_ballot");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
-	case OpAtomicLoad:
-		flush_all_atomic_capable_variables();
-		// FIXME: Image?
-		// OpAtomicLoad seems to only be relevant for atomic counters.
-		forced_temporaries.insert(ops[1]);
-		GLSL_UFOP(atomicCounter);
-		break;
-
-	case OpAtomicStore:
-		SPIRV_CROSS_THROW("Unsupported opcode OpAtomicStore.");
-
-	case OpAtomicIIncrement:
-	case OpAtomicIDecrement:
+	case OpSubgroupAllKHR:
 	{
-		forced_temporaries.insert(ops[1]);
-		auto &type = expression_type(ops[2]);
-		if (type.storage == StorageClassAtomicCounter)
-		{
-			// Legacy GLSL stuff, not sure if this is relevant to support.
-			if (opcode == OpAtomicIIncrement)
-				GLSL_UFOP(atomicCounterIncrement);
-			else
-				GLSL_UFOP(atomicCounterDecrement);
-		}
-		else
-		{
-			bool atomic_image = check_atomic_image(ops[2]);
-			bool unsigned_type = (type.basetype == SPIRType::UInt) ||
-			                     (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
-			const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
-
-			const char *increment = nullptr;
-			if (opcode == OpAtomicIIncrement && unsigned_type)
-				increment = "1u";
-			else if (opcode == OpAtomicIIncrement)
-				increment = "1";
-			else if (unsigned_type)
-				increment = "uint(-1)";
-			else
-				increment = "-1";
-
-			emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false);
-		}
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
 
-		flush_all_atomic_capable_variables();
+		require_extension_internal("GL_ARB_shader_group_vote");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
-	case OpAtomicIAdd:
+	case OpSubgroupAnyKHR:
 	{
-		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
-		forced_temporaries.insert(ops[1]);
-		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
-		flush_all_atomic_capable_variables();
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
+
+		require_extension_internal("GL_ARB_shader_group_vote");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
-	case OpAtomicISub:
+	case OpSubgroupAllEqualKHR:
 	{
-		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
-		forced_temporaries.insert(ops[1]);
-		auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
-		emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
-		flush_all_atomic_capable_variables();
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
+
+		require_extension_internal("GL_ARB_shader_group_vote");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
-	case OpAtomicSMin:
-	case OpAtomicUMin:
+	case OpGroupIAddNonUniformAMD:
+	case OpGroupFAddNonUniformAMD:
 	{
-		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
-		forced_temporaries.insert(ops[1]);
-		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
-		flush_all_atomic_capable_variables();
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
+
+		require_extension_internal("GL_AMD_shader_ballot");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
-	case OpAtomicSMax:
-	case OpAtomicUMax:
+	case OpGroupFMinNonUniformAMD:
+	case OpGroupUMinNonUniformAMD:
+	case OpGroupSMinNonUniformAMD:
 	{
-		const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
-		forced_temporaries.insert(ops[1]);
-		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
-		flush_all_atomic_capable_variables();
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
+
+		require_extension_internal("GL_AMD_shader_ballot");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
-	case OpAtomicAnd:
+	case OpGroupFMaxNonUniformAMD:
+	case OpGroupUMaxNonUniformAMD:
+	case OpGroupSMaxNonUniformAMD:
 	{
-		const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
-		forced_temporaries.insert(ops[1]);
-		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
-		flush_all_atomic_capable_variables();
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
+
+		require_extension_internal("GL_AMD_shader_ballot");
+		register_control_dependent_expression(ops[1]);
 		break;
 	}
 
-	case OpAtomicOr:
+	case OpFragmentMaskFetchAMD:
 	{
-		const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
-		forced_temporaries.insert(ops[1]);
-		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
-		flush_all_atomic_capable_variables();
+		auto &type = expression_type(ops[2]);
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+
+		if (type.image.dim == spv::DimSubpassData)
+		{
+			emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
+		}
+		else
+		{
+			emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
+		}
+
+		require_extension_internal("GL_AMD_shader_fragment_mask");
 		break;
 	}
 
-	case OpAtomicXor:
+	case OpFragmentFetchAMD:
 	{
-		const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
-		forced_temporaries.insert(ops[1]);
-		emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
-		flush_all_atomic_capable_variables();
+		auto &type = expression_type(ops[2]);
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+
+		if (type.image.dim == spv::DimSubpassData)
+		{
+			emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
+		}
+		else
+		{
+			emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
+		}
+
+		require_extension_internal("GL_AMD_shader_fragment_mask");
 		break;
 	}
 
-	// Geometry shaders
-	case OpEmitVertex:
-		statement("EmitVertex();");
+	// Vulkan 1.1 sub-group stuff ...
+	case OpGroupNonUniformElect:
+	case OpGroupNonUniformBroadcast:
+	case OpGroupNonUniformBroadcastFirst:
+	case OpGroupNonUniformBallot:
+	case OpGroupNonUniformInverseBallot:
+	case OpGroupNonUniformBallotBitExtract:
+	case OpGroupNonUniformBallotBitCount:
+	case OpGroupNonUniformBallotFindLSB:
+	case OpGroupNonUniformBallotFindMSB:
+	case OpGroupNonUniformShuffle:
+	case OpGroupNonUniformShuffleXor:
+	case OpGroupNonUniformShuffleUp:
+	case OpGroupNonUniformShuffleDown:
+	case OpGroupNonUniformAll:
+	case OpGroupNonUniformAny:
+	case OpGroupNonUniformAllEqual:
+	case OpGroupNonUniformFAdd:
+	case OpGroupNonUniformIAdd:
+	case OpGroupNonUniformFMul:
+	case OpGroupNonUniformIMul:
+	case OpGroupNonUniformFMin:
+	case OpGroupNonUniformFMax:
+	case OpGroupNonUniformSMin:
+	case OpGroupNonUniformSMax:
+	case OpGroupNonUniformUMin:
+	case OpGroupNonUniformUMax:
+	case OpGroupNonUniformBitwiseAnd:
+	case OpGroupNonUniformBitwiseOr:
+	case OpGroupNonUniformBitwiseXor:
+	case OpGroupNonUniformLogicalAnd:
+	case OpGroupNonUniformLogicalOr:
+	case OpGroupNonUniformLogicalXor:
+	case OpGroupNonUniformQuadSwap:
+	case OpGroupNonUniformQuadBroadcast:
+		emit_subgroup_op(instruction);
 		break;
 
-	case OpEndPrimitive:
-		statement("EndPrimitive();");
-		break;
+	case OpFUnordEqual:
+	case OpFUnordLessThan:
+	case OpFUnordGreaterThan:
+	case OpFUnordLessThanEqual:
+	case OpFUnordGreaterThanEqual:
+	{
+		// GLSL doesn't specify if floating point comparisons are ordered or unordered,
+		// but glslang always emits ordered floating point compares for GLSL.
+		// To get unordered compares, we can test the opposite thing and invert the result.
+		// This way, we force true when there is any NaN present.
+		uint32_t op0 = ops[2];
+		uint32_t op1 = ops[3];
 
-	case OpEmitStreamVertex:
-		statement("EmitStreamVertex();");
-		break;
+		string expr;
+		if (expression_type(op0).vecsize > 1)
+		{
+			const char *comp_op = nullptr;
+			switch (opcode)
+			{
+			case OpFUnordEqual:
+				comp_op = "notEqual";
+				break;
 
-	case OpEndStreamPrimitive:
-		statement("EndStreamPrimitive();");
-		break;
+			case OpFUnordLessThan:
+				comp_op = "greaterThanEqual";
+				break;
 
-	// Textures
-	case OpImageSampleExplicitLod:
-	case OpImageSampleProjExplicitLod:
-	case OpImageSampleDrefExplicitLod:
-	case OpImageSampleProjDrefExplicitLod:
-	case OpImageSampleImplicitLod:
-	case OpImageSampleProjImplicitLod:
-	case OpImageSampleDrefImplicitLod:
-	case OpImageSampleProjDrefImplicitLod:
-	case OpImageFetch:
-	case OpImageGather:
-	case OpImageDrefGather:
-		// Gets a bit hairy, so move this to a separate instruction.
-		emit_texture_op(instruction);
-		break;
+			case OpFUnordLessThanEqual:
+				comp_op = "greaterThan";
+				break;
 
-	case OpImage:
-	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
+			case OpFUnordGreaterThan:
+				comp_op = "lessThanEqual";
+				break;
 
-		// Suppress usage tracking.
-		auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
+			case OpFUnordGreaterThanEqual:
+				comp_op = "lessThan";
+				break;
 
-		// When using the image, we need to know which variable it is actually loaded from.
-		auto *var = maybe_get_backing_variable(ops[2]);
-		e.loaded_from = var ? var->self : 0;
+			default:
+				assert(0);
+				break;
+			}
+
+			expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
+		}
+		else
+		{
+			const char *comp_op = nullptr;
+			switch (opcode)
+			{
+			case OpFUnordEqual:
+				comp_op = " != ";
+				break;
+
+			case OpFUnordLessThan:
+				comp_op = " >= ";
+				break;
+
+			case OpFUnordLessThanEqual:
+				comp_op = " > ";
+				break;
+
+			case OpFUnordGreaterThan:
+				comp_op = " <= ";
+				break;
+
+			case OpFUnordGreaterThanEqual:
+				comp_op = " < ";
+				break;
+
+			default:
+				assert(0);
+				break;
+			}
+
+			expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
+		}
+
+		emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
+		inherit_expression_dependencies(ops[1], op0);
+		inherit_expression_dependencies(ops[1], op1);
 		break;
 	}
 
-	case OpImageQueryLod:
-	{
-		if (!options.es && options.version < 400)
-		{
-			require_extension_internal("GL_ARB_texture_query_lod");
-			// For some reason, the ARB spec is all-caps.
-			GLSL_BFOP(textureQueryLOD);
+	case OpReportIntersectionKHR:
+		// NV is same opcode.
+		forced_temporaries.insert(ops[1]);
+		if (ray_tracing_is_khr)
+			GLSL_BFOP(reportIntersectionEXT);
+		else
+			GLSL_BFOP(reportIntersectionNV);
+		flush_control_dependent_expressions(current_emitting_block->self);
+		break;
+	case OpIgnoreIntersectionNV:
+		// KHR variant is a terminator.
+		statement("ignoreIntersectionNV();");
+		flush_control_dependent_expressions(current_emitting_block->self);
+		break;
+	case OpTerminateRayNV:
+		// KHR variant is a terminator.
+		statement("terminateRayNV();");
+		flush_control_dependent_expressions(current_emitting_block->self);
+		break;
+	case OpTraceNV:
+		statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
+		          to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
+		          to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
+		          to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
+		flush_control_dependent_expressions(current_emitting_block->self);
+		break;
+	case OpTraceRayKHR:
+		if (!has_decoration(ops[10], DecorationLocation))
+			SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
+		statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
+		          to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
+		          to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
+		          to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");");
+		flush_control_dependent_expressions(current_emitting_block->self);
+		break;
+	case OpExecuteCallableNV:
+		statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
+		flush_control_dependent_expressions(current_emitting_block->self);
+		break;
+	case OpExecuteCallableKHR:
+		if (!has_decoration(ops[1], DecorationLocation))
+			SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
+		statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");");
+		flush_control_dependent_expressions(current_emitting_block->self);
+		break;
+
+		// Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects.
+	case OpRayQueryInitializeKHR:
+		flush_variable_declaration(ops[0]);
+		statement("rayQueryInitializeEXT(",
+		          to_expression(ops[0]), ", ", to_expression(ops[1]), ", ",
+		          to_expression(ops[2]), ", ", to_expression(ops[3]), ", ",
+		          to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
+		          to_expression(ops[6]), ", ", to_expression(ops[7]), ");");
+		break;
+	case OpRayQueryProceedKHR:
+		flush_variable_declaration(ops[0]);
+		emit_op(ops[0], ops[1], join("rayQueryProceedEXT(", to_expression(ops[2]), ")"), false);
+		break;
+	case OpRayQueryTerminateKHR:
+		flush_variable_declaration(ops[0]);
+		statement("rayQueryTerminateEXT(", to_expression(ops[0]), ");");
+		break;
+	case OpRayQueryGenerateIntersectionKHR:
+		flush_variable_declaration(ops[0]);
+		statement("rayQueryGenerateIntersectionEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
+		break;
+	case OpRayQueryConfirmIntersectionKHR:
+		flush_variable_declaration(ops[0]);
+		statement("rayQueryConfirmIntersectionEXT(", to_expression(ops[0]), ");");
+		break;
+#define GLSL_RAY_QUERY_GET_OP(op) \
+	case OpRayQueryGet##op##KHR: \
+		flush_variable_declaration(ops[2]); \
+		emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \
+		break
+#define GLSL_RAY_QUERY_GET_OP2(op) \
+	case OpRayQueryGet##op##KHR: \
+		flush_variable_declaration(ops[2]); \
+		emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \
+		break
+	GLSL_RAY_QUERY_GET_OP(RayTMin);
+	GLSL_RAY_QUERY_GET_OP(RayFlags);
+	GLSL_RAY_QUERY_GET_OP(WorldRayOrigin);
+	GLSL_RAY_QUERY_GET_OP(WorldRayDirection);
+	GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque);
+	GLSL_RAY_QUERY_GET_OP2(IntersectionType);
+	GLSL_RAY_QUERY_GET_OP2(IntersectionT);
+	GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex);
+	GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId);
+	GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset);
+	GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex);
+	GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex);
+	GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics);
+	GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace);
+	GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection);
+	GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin);
+	GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld);
+	GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject);
+#undef GLSL_RAY_QUERY_GET_OP
+#undef GLSL_RAY_QUERY_GET_OP2
+
+	case OpConvertUToAccelerationStructureKHR:
+	{
+		require_extension_internal("GL_EXT_ray_tracing");
+
+		bool elide_temporary = should_forward(ops[2]) && forced_temporaries.count(ops[1]) == 0 &&
+		                       !hoisted_temporaries.count(ops[1]);
+
+		if (elide_temporary)
+		{
+			GLSL_UFOP(accelerationStructureEXT);
 		}
-		else if (options.es)
-			SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
 		else
-			GLSL_BFOP(textureQueryLod);
-		register_control_dependent_expression(ops[1]);
+		{
+			// Force this path in subsequent iterations.
+			forced_temporaries.insert(ops[1]);
+
+			// We cannot declare a temporary acceleration structure in GLSL.
+			// If we get to this point, we'll have to emit a temporary uvec2,
+			// and cast to RTAS on demand.
+			statement(declare_temporary(expression_type_id(ops[2]), ops[1]), to_unpacked_expression(ops[2]), ";");
+			// Use raw SPIRExpression interface to block all usage tracking.
+			set<SPIRExpression>(ops[1], join("accelerationStructureEXT(", to_name(ops[1]), ")"), ops[0], true);
+		}
 		break;
 	}
 
-	case OpImageQueryLevels:
+	case OpConvertUToPtr:
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
+		auto &type = get<SPIRType>(ops[0]);
+		if (type.storage != StorageClassPhysicalStorageBufferEXT)
+			SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
 
-		if (!options.es && options.version < 430)
-			require_extension_internal("GL_ARB_texture_query_levels");
-		if (options.es)
-			SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
+		auto &in_type = expression_type(ops[2]);
+		if (in_type.vecsize == 2)
+			require_extension_internal("GL_EXT_buffer_reference_uvec2");
 
-		auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
-		auto &restype = get<SPIRType>(ops[0]);
-		expr = bitcast_expression(restype, SPIRType::Int, expr);
-		emit_op(result_type, id, expr, true);
+		auto op = type_to_glsl(type);
+		emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
 		break;
 	}
 
-	case OpImageQuerySamples:
+	case OpConvertPtrToU:
 	{
-		auto &type = expression_type(ops[2]);
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
+		auto &type = get<SPIRType>(ops[0]);
+		auto &ptr_type = expression_type(ops[2]);
+		if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
+			SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
 
-		string expr;
-		if (type.image.sampled == 2)
-			expr = join("imageSamples(", to_expression(ops[2]), ")");
-		else
-			expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
+		if (type.vecsize == 2)
+			require_extension_internal("GL_EXT_buffer_reference_uvec2");
 
-		auto &restype = get<SPIRType>(ops[0]);
-		expr = bitcast_expression(restype, SPIRType::Int, expr);
-		emit_op(result_type, id, expr, true);
+		auto op = type_to_glsl(type);
+		emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
 		break;
 	}
 
-	case OpSampledImage:
-	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		emit_sampled_image_op(result_type, id, ops[2], ops[3]);
-		inherit_expression_dependencies(id, ops[2]);
-		inherit_expression_dependencies(id, ops[3]);
+	case OpUndef:
+		// Undefined value has been declared.
 		break;
-	}
 
-	case OpImageQuerySizeLod:
+	case OpLine:
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-
-		auto expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ", ",
-		                 bitcast_expression(SPIRType::Int, ops[3]), ")");
-		auto &restype = get<SPIRType>(ops[0]);
-		expr = bitcast_expression(restype, SPIRType::Int, expr);
-		emit_op(result_type, id, expr, true);
+		emit_line_directive(ops[0], ops[1]);
 		break;
 	}
 
-	// Image load/store
-	case OpImageRead:
-	{
-		// We added Nonreadable speculatively to the OpImage variable due to glslangValidator
-		// not adding the proper qualifiers.
-		// If it turns out we need to read the image after all, remove the qualifier and recompile.
-		auto *var = maybe_get_backing_variable(ops[2]);
-		if (var)
-		{
-			auto &flags = ir.meta[var->self].decoration.decoration_flags;
-			if (flags.get(DecorationNonReadable))
-			{
-				flags.clear(DecorationNonReadable);
-				force_recompile();
-			}
-		}
+	case OpNoLine:
+		break;
 
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
+	case OpDemoteToHelperInvocationEXT:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
+		require_extension_internal("GL_EXT_demote_to_helper_invocation");
+		statement(backend.demote_literal, ";");
+		break;
 
-		bool pure;
-		string imgexpr;
-		auto &type = expression_type(ops[2]);
+	case OpIsHelperInvocationEXT:
+		if (!options.vulkan_semantics)
+			SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
+		require_extension_internal("GL_EXT_demote_to_helper_invocation");
+		// Helper lane state with demote is volatile by nature.
+		// Do not forward this.
+		emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
+		break;
 
-		if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
+	case OpBeginInvocationInterlockEXT:
+		// If the interlock is complex, we emit this elsewhere.
+		if (!interlocked_is_complex)
 		{
-			if (type.image.ms)
-				SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
-
-			auto itr =
-			    find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
-
-			if (itr == end(pls_inputs))
-			{
-				// For non-PLS inputs, we rely on subpass type remapping information to get it right
-				// since ImageRead always returns 4-component vectors and the backing type is opaque.
-				if (!var->remapped_components)
-					SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
-				imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
-			}
-			else
-			{
-				// PLS input could have different number of components than what the SPIR expects, swizzle to
-				// the appropriate vector size.
-				uint32_t components = pls_format_to_components(itr->format);
-				imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
-			}
-			pure = true;
+			statement("SPIRV_Cross_beginInvocationInterlock();");
+			flush_all_active_variables();
+			// Make sure forwarding doesn't propagate outside interlock region.
 		}
-		else if (type.image.dim == DimSubpassData)
+		break;
+
+	case OpEndInvocationInterlockEXT:
+		// If the interlock is complex, we emit this elsewhere.
+		if (!interlocked_is_complex)
 		{
-			if (options.vulkan_semantics)
-			{
-				// With Vulkan semantics, use the proper Vulkan GLSL construct.
-				if (type.image.ms)
-				{
-					uint32_t operands = ops[4];
-					if (operands != ImageOperandsSampleMask || length != 6)
-						SPIRV_CROSS_THROW(
-						    "Multisampled image used in OpImageRead, but unexpected operand mask was used.");
+			statement("SPIRV_Cross_endInvocationInterlock();");
+			flush_all_active_variables();
+			// Make sure forwarding doesn't propagate outside interlock region.
+		}
+		break;
 
-					uint32_t samples = ops[5];
-					imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")");
-				}
-				else
-					imgexpr = join("subpassLoad(", to_expression(ops[2]), ")");
-			}
-			else
-			{
-				if (type.image.ms)
-				{
-					uint32_t operands = ops[4];
-					if (operands != ImageOperandsSampleMask || length != 6)
-						SPIRV_CROSS_THROW(
-						    "Multisampled image used in OpImageRead, but unexpected operand mask was used.");
+	case OpSetMeshOutputsEXT:
+		statement("SetMeshOutputsEXT(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");");
+		break;
 
-					uint32_t samples = ops[5];
-					imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
-					               to_expression(samples), ")");
-				}
-				else
-				{
-					// Implement subpass loads via texture barrier style sampling.
-					imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
-				}
-			}
-			imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
-			pure = true;
-		}
-		else
-		{
-			// imageLoad only accepts int coords, not uint.
-			auto coord_expr = to_expression(ops[3]);
-			auto target_coord_type = expression_type(ops[3]);
-			target_coord_type.basetype = SPIRType::Int;
-			coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
+	default:
+		statement("// unimplemented op ", instruction.op);
+		break;
+	}
+}
 
-			// Plain image load/store.
-			if (type.image.ms)
-			{
-				uint32_t operands = ops[4];
-				if (operands != ImageOperandsSampleMask || length != 6)
-					SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used.");
+// Appends function arguments, mapped from global variables, beyond the specified arg index.
+// This is used when a function call uses fewer arguments than the function defines.
+// This situation may occur if the function signature has been dynamically modified to
+// extract global variables referenced from within the function, and convert them to
+// function arguments. This is necessary for shader languages that do not support global
+// access to shader input content from within a function (eg. Metal). Each additional
+// function args uses the name of the global variable. Function nesting will modify the
+// functions and function calls all the way up the nesting chain.
+void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
+{
+	auto &args = func.arguments;
+	uint32_t arg_cnt = uint32_t(args.size());
+	for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
+	{
+		auto &arg = args[arg_idx];
+		assert(arg.alias_global_variable);
 
-				uint32_t samples = ops[5];
-				imgexpr =
-				    join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
-			}
-			else
-				imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")");
+		// If the underlying variable needs to be declared
+		// (ie. a local variable with deferred declaration), do so now.
+		uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
+		if (var_id)
+			flush_variable_declaration(var_id);
 
-			imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
-			pure = false;
-		}
+		arglist.push_back(to_func_call_arg(arg, arg.id));
+	}
+}
 
-		if (var && var->forwardable)
-		{
-			bool forward = forced_temporaries.find(id) == end(forced_temporaries);
-			auto &e = emit_op(result_type, id, imgexpr, forward);
+string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
+{
+	if (type.type_alias != TypeID(0) &&
+	    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
+	{
+		return to_member_name(get<SPIRType>(type.type_alias), index);
+	}
 
-			// We only need to track dependencies if we're reading from image load/store.
-			if (!pure)
-			{
-				e.loaded_from = var->self;
-				if (forward)
-					var->dependees.push_back(id);
-			}
-		}
-		else
-			emit_op(result_type, id, imgexpr, false);
+	auto &memb = ir.meta[type.self].members;
+	if (index < memb.size() && !memb[index].alias.empty())
+		return memb[index].alias;
+	else
+		return join("_m", index);
+}
 
-		inherit_expression_dependencies(id, ops[2]);
-		if (type.image.ms)
-			inherit_expression_dependencies(id, ops[5]);
-		break;
+string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
+{
+	return join(".", to_member_name(type, index));
+}
+
+string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
+{
+	string ret;
+	auto *member_type = &type;
+	for (auto &index : indices)
+	{
+		ret += join(".", to_member_name(*member_type, index));
+		member_type = &get<SPIRType>(member_type->member_types[index]);
 	}
+	return ret;
+}
 
-	case OpImageTexelPointer:
+void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
+{
+	auto &memb = ir.meta[type.self].members;
+	if (index < memb.size() && !memb[index].alias.empty())
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true);
+		auto &name = memb[index].alias;
+		if (name.empty())
+			return;
+
+		ParsedIR::sanitize_identifier(name, true, true);
+		update_name_cache(type.member_name_cache, name);
+	}
+}
+
+// Checks whether the ID is a row_major matrix that requires conversion before use
+bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
+{
+	// Natively supported row-major matrices do not need to be converted.
+	// Legacy targets do not support row major.
+	if (backend.native_row_major_matrix && !is_legacy())
+		return false;
+
+	auto *e = maybe_get<SPIRExpression>(id);
+	if (e)
+		return e->need_transpose;
+	else
+		return has_decoration(id, DecorationRowMajor);
+}
+
+// Checks whether the member is a row_major matrix that requires conversion before use
+bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
+{
+	// Natively supported row-major matrices do not need to be converted.
+	if (backend.native_row_major_matrix && !is_legacy())
+		return false;
+
+	// Non-matrix or column-major matrix types do not need to be converted.
+	if (!has_member_decoration(type.self, index, DecorationRowMajor))
+		return false;
+
+	// Only square row-major matrices can be converted at this time.
+	// Converting non-square matrices will require defining custom GLSL function that
+	// swaps matrix elements while retaining the original dimensional form of the matrix.
+	const auto mbr_type = get<SPIRType>(type.member_types[index]);
+	if (mbr_type.columns != mbr_type.vecsize)
+		SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
+
+	return true;
+}
+
+// Checks if we need to remap physical type IDs when declaring the type in a buffer.
+bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
+{
+	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
+}
 
-		// When using the pointer, we need to know which variable it is actually loaded from.
-		auto *var = maybe_get_backing_variable(ops[2]);
-		e.loaded_from = var ? var->self : 0;
-		break;
-	}
+// Checks whether the member is in packed data type, that might need to be unpacked.
+bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
+{
+	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
+}
 
-	case OpImageWrite:
+// Wraps the expression string in a function call that converts the
+// row_major matrix result of the expression to a column_major matrix.
+// Base implementation uses the standard library transpose() function.
+// Subclasses may override to use a different function.
+string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
+                                              bool /*is_packed*/)
+{
+	strip_enclosed_expression(exp_str);
+	if (!is_matrix(exp_type))
 	{
-		// We added Nonwritable speculatively to the OpImage variable due to glslangValidator
-		// not adding the proper qualifiers.
-		// If it turns out we need to write to the image after all, remove the qualifier and recompile.
-		auto *var = maybe_get_backing_variable(ops[0]);
-		if (var)
-		{
-			auto &flags = ir.meta[var->self].decoration.decoration_flags;
-			if (flags.get(DecorationNonWritable))
-			{
-				flags.clear(DecorationNonWritable);
-				force_recompile();
-			}
-		}
+		auto column_index = exp_str.find_last_of('[');
+		if (column_index == string::npos)
+			return exp_str;
 
-		auto &type = expression_type(ops[0]);
-		auto &value_type = expression_type(ops[2]);
-		auto store_type = value_type;
-		store_type.vecsize = 4;
+		auto column_expr = exp_str.substr(column_index);
+		exp_str.resize(column_index);
 
-		// imageStore only accepts int coords, not uint.
-		auto coord_expr = to_expression(ops[1]);
-		auto target_coord_type = expression_type(ops[1]);
-		target_coord_type.basetype = SPIRType::Int;
-		coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
+		auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
 
-		if (type.image.ms)
+		// Loading a column from a row-major matrix. Unroll the load.
+		for (uint32_t c = 0; c < exp_type.vecsize; c++)
 		{
-			uint32_t operands = ops[3];
-			if (operands != ImageOperandsSampleMask || length != 5)
-				SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
-			uint32_t samples = ops[4];
-			statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
-			          remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
+			transposed_expr += join(exp_str, '[', c, ']', column_expr);
+			if (c + 1 < exp_type.vecsize)
+				transposed_expr += ", ";
 		}
-		else
-			statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ",
-			          remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
 
-		if (var && variable_storage_is_aliased(*var))
-			flush_all_aliased_variables();
-		break;
+		transposed_expr += ")";
+		return transposed_expr;
 	}
-
-	case OpImageQuerySize:
+	else if (options.version < 120)
 	{
-		auto &type = expression_type(ops[2]);
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-
-		if (type.basetype == SPIRType::Image)
+		// GLSL 110, ES 100 do not have transpose(), so emulate it.  Note that
+		// these GLSL versions do not support non-square matrices.
+		if (exp_type.vecsize == 2 && exp_type.columns == 2)
 		{
-			string expr;
-			if (type.image.sampled == 2)
+			if (!requires_transpose_2x2)
 			{
-				// The size of an image is always constant.
-				expr = join("imageSize(", to_expression(ops[2]), ")");
+				requires_transpose_2x2 = true;
+				force_recompile();
 			}
-			else
+		}
+		else if (exp_type.vecsize == 3 && exp_type.columns == 3)
+		{
+			if (!requires_transpose_3x3)
 			{
-				// This path is hit for samplerBuffers and multisampled images which do not have LOD.
-				expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ")");
+				requires_transpose_3x3 = true;
+				force_recompile();
+			}
+		}
+		else if (exp_type.vecsize == 4 && exp_type.columns == 4)
+		{
+			if (!requires_transpose_4x4)
+			{
+				requires_transpose_4x4 = true;
+				force_recompile();
 			}
-
-			auto &restype = get<SPIRType>(ops[0]);
-			expr = bitcast_expression(restype, SPIRType::Int, expr);
-			emit_op(result_type, id, expr, true);
 		}
 		else
-			SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
-		break;
+			SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
+		return join("spvTranspose(", exp_str, ")");
 	}
+	else
+		return join("transpose(", exp_str, ")");
+}
 
-	// Compute
-	case OpControlBarrier:
-	case OpMemoryBarrier:
-	{
-		uint32_t execution_scope = 0;
-		uint32_t memory;
-		uint32_t semantics;
+string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
+{
+	string type_name = type_to_glsl(type, id);
+	remap_variable_type_name(type, name, type_name);
+	return join(type_name, " ", name, type_to_array_glsl(type));
+}
 
-		if (opcode == OpMemoryBarrier)
-		{
-			memory = get<SPIRConstant>(ops[0]).scalar();
-			semantics = get<SPIRConstant>(ops[1]).scalar();
-		}
-		else
-		{
-			execution_scope = get<SPIRConstant>(ops[0]).scalar();
-			memory = get<SPIRConstant>(ops[1]).scalar();
-			semantics = get<SPIRConstant>(ops[2]).scalar();
-		}
+bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const
+{
+	return var.storage == storage;
+}
 
-		if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
-		{
-			if (!options.vulkan_semantics)
-				SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics.");
-			require_extension_internal("GL_KHR_shader_subgroup_basic");
-		}
+// Emit a structure member. Subclasses may override to modify output,
+// or to dynamically add a padding member if needed.
+void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
+                                      const string &qualifier, uint32_t)
+{
+	auto &membertype = get<SPIRType>(member_type_id);
 
-		if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
-		{
-			// Control shaders only have barriers, and it implies memory barriers.
-			if (opcode == OpControlBarrier)
-				statement("barrier();");
-			break;
-		}
+	Bitset memberflags;
+	auto &memb = ir.meta[type.self].members;
+	if (index < memb.size())
+		memberflags = memb[index].decoration_flags;
 
-		// We only care about these flags, acquire/release and friends are not relevant to GLSL.
-		semantics = mask_relevant_memory_semantics(semantics);
+	string qualifiers;
+	bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
+	                ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
 
-		if (opcode == OpMemoryBarrier)
-		{
-			// If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
-			// does what we need, so we avoid redundant barriers.
-			const Instruction *next = get_next_instruction_in_block(instruction);
-			if (next && next->op == OpControlBarrier)
-			{
-				auto *next_ops = stream(*next);
-				uint32_t next_memory = get<SPIRConstant>(next_ops[1]).scalar();
-				uint32_t next_semantics = get<SPIRConstant>(next_ops[2]).scalar();
-				next_semantics = mask_relevant_memory_semantics(next_semantics);
+	if (is_block)
+		qualifiers = to_interpolation_qualifiers(memberflags);
 
-				bool memory_scope_covered = false;
-				if (next_memory == memory)
-					memory_scope_covered = true;
-				else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
-				{
-					// If we only care about workgroup memory, either Device or Workgroup scope is fine,
-					// scope does not have to match.
-					if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
-					    (memory == ScopeDevice || memory == ScopeWorkgroup))
-					{
-						memory_scope_covered = true;
-					}
-				}
-				else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
-				{
-					// The control barrier has device scope, but the memory barrier just has workgroup scope.
-					memory_scope_covered = true;
-				}
+	statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags),
+	          variable_decl(membertype, to_member_name(type, index)), ";");
+}
 
-				// If we have the same memory scope, and all memory types are covered, we're good.
-				if (memory_scope_covered && (semantics & next_semantics) == semantics)
-					break;
-			}
-		}
+void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
+{
+}
 
-		// We are synchronizing some memory or syncing execution,
-		// so we cannot forward any loads beyond the memory barrier.
-		if (semantics || opcode == OpControlBarrier)
-		{
-			assert(current_emitting_block);
-			flush_control_dependent_expressions(current_emitting_block->self);
-			flush_all_active_variables();
-		}
+string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
+{
+	// GL_EXT_buffer_reference variables can be marked as restrict.
+	if (flags.get(DecorationRestrictPointerEXT))
+		return "restrict ";
 
-		if (memory == ScopeWorkgroup) // Only need to consider memory within a group
+	string qual;
+
+	if (type_is_floating_point(type) && flags.get(DecorationNoContraction) && backend.support_precise_qualifier)
+		qual = "precise ";
+
+	// Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
+	bool type_supports_precision =
+			type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt ||
+			type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
+			type.basetype == SPIRType::Sampler;
+
+	if (!type_supports_precision)
+		return qual;
+
+	if (options.es)
+	{
+		auto &execution = get_entry_point();
+
+		if (flags.get(DecorationRelaxedPrecision))
 		{
-			if (semantics == MemorySemanticsWorkgroupMemoryMask)
-				statement("memoryBarrierShared();");
-			else if (semantics != 0)
-				statement("groupMemoryBarrier();");
+			bool implied_fmediump = type.basetype == SPIRType::Float &&
+			                        options.fragment.default_float_precision == Options::Mediump &&
+			                        execution.model == ExecutionModelFragment;
+
+			bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
+			                        options.fragment.default_int_precision == Options::Mediump &&
+			                        execution.model == ExecutionModelFragment;
+
+			qual += (implied_fmediump || implied_imediump) ? "" : "mediump ";
 		}
-		else if (memory == ScopeSubgroup)
+		else
 		{
-			const uint32_t all_barriers =
-			    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
+			bool implied_fhighp =
+			    type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
+			                                          execution.model == ExecutionModelFragment) ||
+			                                         (execution.model != ExecutionModelFragment));
 
-			if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
-			{
-				// These are not relevant for GLSL, but assume it means memoryBarrier().
-				// memoryBarrier() does everything, so no need to test anything else.
-				statement("subgroupMemoryBarrier();");
-			}
-			else if ((semantics & all_barriers) == all_barriers)
-			{
-				// Short-hand instead of emitting 3 barriers.
-				statement("subgroupMemoryBarrier();");
-			}
-			else
-			{
-				// Pick out individual barriers.
-				if (semantics & MemorySemanticsWorkgroupMemoryMask)
-					statement("subgroupMemoryBarrierShared();");
-				if (semantics & MemorySemanticsUniformMemoryMask)
-					statement("subgroupMemoryBarrierBuffer();");
-				if (semantics & MemorySemanticsImageMemoryMask)
-					statement("subgroupMemoryBarrierImage();");
-			}
+			bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
+			                      ((options.fragment.default_int_precision == Options::Highp &&
+			                        execution.model == ExecutionModelFragment) ||
+			                       (execution.model != ExecutionModelFragment));
+
+			qual += (implied_fhighp || implied_ihighp) ? "" : "highp ";
 		}
-		else
+	}
+	else if (backend.allow_precision_qualifiers)
+	{
+		// Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
+		// The default is highp however, so only emit mediump in the rare case that a shader has these.
+		if (flags.get(DecorationRelaxedPrecision))
+			qual += "mediump ";
+	}
+
+	return qual;
+}
+
+string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
+{
+	auto &type = expression_type(id);
+	bool use_precision_qualifiers = backend.allow_precision_qualifiers;
+	if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
+	{
+		// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
+		auto &result_type = get<SPIRType>(type.image.type);
+		if (result_type.width < 32)
+			return "mediump ";
+	}
+	return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
+}
+
+void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var)
+{
+	// Works around weird behavior in glslangValidator where
+	// a patch out block is translated to just block members getting the decoration.
+	// To make glslang not complain when we compile again, we have to transform this back to a case where
+	// the variable itself has Patch decoration, and not members.
+	// Same for perprimitiveEXT.
+	auto &type = get<SPIRType>(var.basetype);
+	if (has_decoration(type.self, DecorationBlock))
+	{
+		uint32_t member_count = uint32_t(type.member_types.size());
+		Decoration promoted_decoration = {};
+		bool do_promote_decoration = false;
+		for (uint32_t i = 0; i < member_count; i++)
 		{
-			const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
-			                              MemorySemanticsImageMemoryMask | MemorySemanticsAtomicCounterMemoryMask;
-
-			if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
-			{
-				// These are not relevant for GLSL, but assume it means memoryBarrier().
-				// memoryBarrier() does everything, so no need to test anything else.
-				statement("memoryBarrier();");
-			}
-			else if ((semantics & all_barriers) == all_barriers)
+			if (has_member_decoration(type.self, i, DecorationPatch))
 			{
-				// Short-hand instead of emitting 4 barriers.
-				statement("memoryBarrier();");
+				promoted_decoration = DecorationPatch;
+				do_promote_decoration = true;
+				break;
 			}
-			else
+			else if (has_member_decoration(type.self, i, DecorationPerPrimitiveEXT))
 			{
-				// Pick out individual barriers.
-				if (semantics & MemorySemanticsWorkgroupMemoryMask)
-					statement("memoryBarrierShared();");
-				if (semantics & MemorySemanticsUniformMemoryMask)
-					statement("memoryBarrierBuffer();");
-				if (semantics & MemorySemanticsImageMemoryMask)
-					statement("memoryBarrierImage();");
-				if (semantics & MemorySemanticsAtomicCounterMemoryMask)
-					statement("memoryBarrierAtomicCounter();");
+				promoted_decoration = DecorationPerPrimitiveEXT;
+				do_promote_decoration = true;
+				break;
 			}
 		}
 
-		if (opcode == OpControlBarrier)
+		if (do_promote_decoration)
 		{
-			if (execution_scope == ScopeSubgroup)
-				statement("subgroupBarrier();");
-			else
-				statement("barrier();");
+			set_decoration(var.self, promoted_decoration);
+			for (uint32_t i = 0; i < member_count; i++)
+				unset_member_decoration(type.self, i, promoted_decoration);
 		}
-		break;
 	}
+}
 
-	case OpExtInst:
+string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
+{
+	auto &flags = get_decoration_bitset(id);
+	string res;
+
+	auto *var = maybe_get<SPIRVariable>(id);
+
+	if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
+		res += "shared ";
+	else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT && !backend.shared_is_implied)
+		res += "taskPayloadSharedEXT ";
+
+	res += to_interpolation_qualifiers(flags);
+	if (var)
+		res += to_storage_qualifiers_glsl(*var);
+
+	auto &type = expression_type(id);
+	if (type.image.dim != DimSubpassData && type.image.sampled == 2)
 	{
-		uint32_t extension_set = ops[2];
+		if (flags.get(DecorationCoherent))
+			res += "coherent ";
+		if (flags.get(DecorationRestrict))
+			res += "restrict ";
 
-		if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
-		{
-			emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
-		}
-		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
-		{
-			emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
-		}
-		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
+		if (flags.get(DecorationNonWritable))
+			res += "readonly ";
+
+		bool formatted_load = type.image.format == ImageFormatUnknown;
+		if (flags.get(DecorationNonReadable))
 		{
-			emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
+			res += "writeonly ";
+			formatted_load = false;
 		}
-		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
+
+		if (formatted_load)
 		{
-			emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
+			if (!options.es)
+				require_extension_internal("GL_EXT_shader_image_load_formatted");
+			else
+				SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL.");
 		}
-		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
+	}
+
+	res += to_precision_qualifiers_glsl(id);
+
+	return res;
+}
+
+string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
+{
+	// glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
+	auto &type = expression_type(arg.id);
+	const char *direction = "";
+
+	if (type.pointer)
+	{
+		if (arg.write_count && arg.read_count)
+			direction = "inout ";
+		else if (arg.write_count)
+			direction = "out ";
+	}
+
+	return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
+}
+
+string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
+{
+	return to_unpacked_expression(var.initializer);
+}
+
+string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
+{
+#ifndef NDEBUG
+	auto &type = get<SPIRType>(type_id);
+	assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
+	       type.storage == StorageClassGeneric);
+#endif
+	uint32_t id = ir.increase_bound_by(1);
+	ir.make_constant_null(id, type_id, false);
+	return constant_expression(get<SPIRConstant>(id));
+}
+
+bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
+{
+	if (type.pointer)
+		return false;
+
+	if (!type.array.empty() && options.flatten_multidimensional_arrays)
+		return false;
+
+	for (auto &literal : type.array_size_literal)
+		if (!literal)
+			return false;
+
+	for (auto &memb : type.member_types)
+		if (!type_can_zero_initialize(get<SPIRType>(memb)))
+			return false;
+
+	return true;
+}
+
+string CompilerGLSL::variable_decl(const SPIRVariable &variable)
+{
+	// Ignore the pointer type since GLSL doesn't have pointers.
+	auto &type = get_variable_data_type(variable);
+
+	if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer)
+		SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
+
+	auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
+
+	if (variable.loop_variable && variable.static_expression)
+	{
+		uint32_t expr = variable.static_expression;
+		if (ir.ids[expr].get_type() != TypeUndef)
+			res += join(" = ", to_unpacked_expression(variable.static_expression));
+		else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
+			res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
+	}
+	else if (variable.initializer && !variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
+	{
+		uint32_t expr = variable.initializer;
+		if (ir.ids[expr].get_type() != TypeUndef)
+			res += join(" = ", to_initializer_expression(variable));
+		else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
+			res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
+	}
+
+	return res;
+}
+
+const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
+{
+	auto &flags = get_decoration_bitset(variable.self);
+	if (flags.get(DecorationRelaxedPrecision))
+		return "mediump ";
+	else
+		return "highp ";
+}
+
+string CompilerGLSL::pls_decl(const PlsRemap &var)
+{
+	auto &variable = get<SPIRVariable>(var.id);
+
+	SPIRType type;
+	type.vecsize = pls_format_to_components(var.format);
+	type.basetype = pls_format_to_basetype(var.format);
+
+	return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
+	            to_name(variable.self));
+}
+
+uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
+{
+	return to_array_size_literal(type, uint32_t(type.array.size() - 1));
+}
+
+uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
+{
+	assert(type.array.size() == type.array_size_literal.size());
+
+	if (type.array_size_literal[index])
+	{
+		return type.array[index];
+	}
+	else
+	{
+		// Use the default spec constant value.
+		// This is the best we can do.
+		return evaluate_constant_u32(type.array[index]);
+	}
+}
+
+string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
+{
+	assert(type.array.size() == type.array_size_literal.size());
+
+	auto &size = type.array[index];
+	if (!type.array_size_literal[index])
+		return to_expression(size);
+	else if (size)
+		return convert_to_string(size);
+	else if (!backend.unsized_array_supported)
+	{
+		// For runtime-sized arrays, we can work around
+		// lack of standard support for this by simply having
+		// a single element array.
+		//
+		// Runtime length arrays must always be the last element
+		// in an interface block.
+		return "1";
+	}
+	else
+		return "";
+}
+
+string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
+{
+	if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
+	{
+		// We are using a wrapped pointer type, and we should not emit any array declarations here.
+		return "";
+	}
+
+	if (type.array.empty())
+		return "";
+
+	if (options.flatten_multidimensional_arrays)
+	{
+		string res;
+		res += "[";
+		for (auto i = uint32_t(type.array.size()); i; i--)
 		{
-			emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
+			res += enclose_expression(to_array_size(type, i - 1));
+			if (i > 1)
+				res += " * ";
 		}
-		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
+		res += "]";
+		return res;
+	}
+	else
+	{
+		if (type.array.size() > 1)
 		{
-			break; // Ignore SPIR-V debug information extended instructions.
+			if (!options.es && options.version < 430)
+				require_extension_internal("GL_ARB_arrays_of_arrays");
+			else if (options.es && options.version < 310)
+				SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
+				                  "Try using --flatten-multidimensional-arrays or set "
+				                  "options.flatten_multidimensional_arrays to true.");
 		}
-		else
+
+		string res;
+		for (auto i = uint32_t(type.array.size()); i; i--)
 		{
-			statement("// unimplemented ext op ", instruction.op);
-			break;
+			res += "[";
+			res += to_array_size(type, i - 1);
+			res += "]";
 		}
+		return res;
+	}
+}
 
+string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
+{
+	auto &imagetype = get<SPIRType>(type.image.type);
+	string res;
+
+	switch (imagetype.basetype)
+	{
+	case SPIRType::Int:
+	case SPIRType::Short:
+	case SPIRType::SByte:
+		res = "i";
+		break;
+	case SPIRType::UInt:
+	case SPIRType::UShort:
+	case SPIRType::UByte:
+		res = "u";
+		break;
+	default:
 		break;
 	}
 
-	// Legacy sub-group stuff ...
-	case OpSubgroupBallotKHR:
-	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		string expr;
-		expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
-		emit_op(result_type, id, expr, should_forward(ops[2]));
+	// For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
+	// We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
 
-		require_extension_internal("GL_ARB_shader_ballot");
-		inherit_expression_dependencies(id, ops[2]);
-		register_control_dependent_expression(ops[1]);
-		break;
+	if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
+		return res + "subpassInput" + (type.image.ms ? "MS" : "");
+	else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
+	         subpass_input_is_framebuffer_fetch(id))
+	{
+		SPIRType sampled_type = get<SPIRType>(type.image.type);
+		sampled_type.vecsize = 4;
+		return type_to_glsl(sampled_type);
 	}
 
-	case OpSubgroupFirstInvocationKHR:
+	// If we're emulating subpassInput with samplers, force sampler2D
+	// so we don't have to specify format.
+	if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
-
-		require_extension_internal("GL_ARB_shader_ballot");
-		register_control_dependent_expression(ops[1]);
-		break;
+		// Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
+		if (type.image.dim == DimBuffer && type.image.sampled == 1)
+			res += "sampler";
+		else
+			res += type.image.sampled == 2 ? "image" : "texture";
 	}
+	else
+		res += "sampler";
 
-	case OpSubgroupReadInvocationKHR:
+	switch (type.image.dim)
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
-
-		require_extension_internal("GL_ARB_shader_ballot");
-		register_control_dependent_expression(ops[1]);
+	case Dim1D:
+		// ES doesn't support 1D. Fake it with 2D.
+		res += options.es ? "2D" : "1D";
 		break;
-	}
+	case Dim2D:
+		res += "2D";
+		break;
+	case Dim3D:
+		res += "3D";
+		break;
+	case DimCube:
+		res += "Cube";
+		break;
+	case DimRect:
+		if (options.es)
+			SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
 
-	case OpSubgroupAllKHR:
-	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
+		if (is_legacy_desktop())
+			require_extension_internal("GL_ARB_texture_rectangle");
 
-		require_extension_internal("GL_ARB_shader_group_vote");
-		register_control_dependent_expression(ops[1]);
+		res += "2DRect";
 		break;
-	}
 
-	case OpSubgroupAnyKHR:
-	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
+	case DimBuffer:
+		if (options.es && options.version < 320)
+			require_extension_internal("GL_EXT_texture_buffer");
+		else if (!options.es && options.version < 300)
+			require_extension_internal("GL_EXT_texture_buffer_object");
+		res += "Buffer";
+		break;
 
-		require_extension_internal("GL_ARB_shader_group_vote");
-		register_control_dependent_expression(ops[1]);
+	case DimSubpassData:
+		res += "2D";
 		break;
+	default:
+		SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
 	}
 
-	case OpSubgroupAllEqualKHR:
+	if (type.image.ms)
+		res += "MS";
+	if (type.image.arrayed)
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
-
-		require_extension_internal("GL_ARB_shader_group_vote");
-		register_control_dependent_expression(ops[1]);
-		break;
+		if (is_legacy_desktop())
+			require_extension_internal("GL_EXT_texture_array");
+		res += "Array";
 	}
 
-	case OpGroupIAddNonUniformAMD:
-	case OpGroupFAddNonUniformAMD:
+	// "Shadow" state in GLSL only exists for samplers and combined image samplers.
+	if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
+	    is_depth_image(type, id))
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
+		res += "Shadow";
+	}
 
-		require_extension_internal("GL_AMD_shader_ballot");
-		register_control_dependent_expression(ops[1]);
-		break;
+	return res;
+}
+
+string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
+{
+	if (backend.use_array_constructor && type.array.size() > 1)
+	{
+		if (options.flatten_multidimensional_arrays)
+			SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
+			                  "e.g. float[][]().");
+		else if (!options.es && options.version < 430)
+			require_extension_internal("GL_ARB_arrays_of_arrays");
+		else if (options.es && options.version < 310)
+			SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
 	}
 
-	case OpGroupFMinNonUniformAMD:
-	case OpGroupUMinNonUniformAMD:
-	case OpGroupSMinNonUniformAMD:
+	auto e = type_to_glsl(type);
+	if (backend.use_array_constructor)
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
+		for (uint32_t i = 0; i < type.array.size(); i++)
+			e += "[]";
+	}
+	return e;
+}
 
-		require_extension_internal("GL_AMD_shader_ballot");
-		register_control_dependent_expression(ops[1]);
-		break;
+// The optional id parameter indicates the object whose type we are trying
+// to find the description for. It is optional. Most type descriptions do not
+// depend on a specific object's use of that type.
+string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
+{
+	if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
+	{
+		// Need to create a magic type name which compacts the entire type information.
+		string name = type_to_glsl(get_pointee_type(type));
+		for (size_t i = 0; i < type.array.size(); i++)
+		{
+			if (type.array_size_literal[i])
+				name += join(type.array[i], "_");
+			else
+				name += join("id", type.array[i], "_");
+		}
+		name += "Pointer";
+		return name;
 	}
 
-	case OpGroupFMaxNonUniformAMD:
-	case OpGroupUMaxNonUniformAMD:
-	case OpGroupSMaxNonUniformAMD:
+	switch (type.basetype)
 	{
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-		emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
+	case SPIRType::Struct:
+		// Need OpName lookup here to get a "sensible" name for a struct.
+		if (backend.explicit_struct_type)
+			return join("struct ", to_name(type.self));
+		else
+			return to_name(type.self);
 
-		require_extension_internal("GL_AMD_shader_ballot");
-		register_control_dependent_expression(ops[1]);
+	case SPIRType::Image:
+	case SPIRType::SampledImage:
+		return image_type_glsl(type, id);
+
+	case SPIRType::Sampler:
+		// The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
+		// this distinction into the type system.
+		return comparison_ids.count(id) ? "samplerShadow" : "sampler";
+
+	case SPIRType::AccelerationStructure:
+		return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
+
+	case SPIRType::RayQuery:
+		return "rayQueryEXT";
+
+	case SPIRType::Void:
+		return "void";
+
+	default:
 		break;
 	}
 
-	case OpFragmentMaskFetchAMD:
-	{
-		auto &type = expression_type(ops[2]);
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
+	if (type.basetype == SPIRType::UInt && is_legacy())
+		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
 
-		if (type.image.dim == spv::DimSubpassData)
+	if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
+	{
+		switch (type.basetype)
 		{
-			emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
+		case SPIRType::Boolean:
+			return "bool";
+		case SPIRType::SByte:
+			return backend.basic_int8_type;
+		case SPIRType::UByte:
+			return backend.basic_uint8_type;
+		case SPIRType::Short:
+			return backend.basic_int16_type;
+		case SPIRType::UShort:
+			return backend.basic_uint16_type;
+		case SPIRType::Int:
+			return backend.basic_int_type;
+		case SPIRType::UInt:
+			return backend.basic_uint_type;
+		case SPIRType::AtomicCounter:
+			return "atomic_uint";
+		case SPIRType::Half:
+			return "float16_t";
+		case SPIRType::Float:
+			return "float";
+		case SPIRType::Double:
+			return "double";
+		case SPIRType::Int64:
+			return "int64_t";
+		case SPIRType::UInt64:
+			return "uint64_t";
+		default:
+			return "???";
 		}
-		else
+	}
+	else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
+	{
+		switch (type.basetype)
 		{
-			emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
+		case SPIRType::Boolean:
+			return join("bvec", type.vecsize);
+		case SPIRType::SByte:
+			return join("i8vec", type.vecsize);
+		case SPIRType::UByte:
+			return join("u8vec", type.vecsize);
+		case SPIRType::Short:
+			return join("i16vec", type.vecsize);
+		case SPIRType::UShort:
+			return join("u16vec", type.vecsize);
+		case SPIRType::Int:
+			return join("ivec", type.vecsize);
+		case SPIRType::UInt:
+			return join("uvec", type.vecsize);
+		case SPIRType::Half:
+			return join("f16vec", type.vecsize);
+		case SPIRType::Float:
+			return join("vec", type.vecsize);
+		case SPIRType::Double:
+			return join("dvec", type.vecsize);
+		case SPIRType::Int64:
+			return join("i64vec", type.vecsize);
+		case SPIRType::UInt64:
+			return join("u64vec", type.vecsize);
+		default:
+			return "???";
 		}
-
-		require_extension_internal("GL_AMD_shader_fragment_mask");
-		break;
 	}
-
-	case OpFragmentFetchAMD:
+	else if (type.vecsize == type.columns) // Simple Matrix builtin
 	{
-		auto &type = expression_type(ops[2]);
-		uint32_t result_type = ops[0];
-		uint32_t id = ops[1];
-
-		if (type.image.dim == spv::DimSubpassData)
+		switch (type.basetype)
 		{
-			emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
+		case SPIRType::Boolean:
+			return join("bmat", type.vecsize);
+		case SPIRType::Int:
+			return join("imat", type.vecsize);
+		case SPIRType::UInt:
+			return join("umat", type.vecsize);
+		case SPIRType::Half:
+			return join("f16mat", type.vecsize);
+		case SPIRType::Float:
+			return join("mat", type.vecsize);
+		case SPIRType::Double:
+			return join("dmat", type.vecsize);
+		// Matrix types not supported for int64/uint64.
+		default:
+			return "???";
 		}
-		else
+	}
+	else
+	{
+		switch (type.basetype)
 		{
-			emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
+		case SPIRType::Boolean:
+			return join("bmat", type.columns, "x", type.vecsize);
+		case SPIRType::Int:
+			return join("imat", type.columns, "x", type.vecsize);
+		case SPIRType::UInt:
+			return join("umat", type.columns, "x", type.vecsize);
+		case SPIRType::Half:
+			return join("f16mat", type.columns, "x", type.vecsize);
+		case SPIRType::Float:
+			return join("mat", type.columns, "x", type.vecsize);
+		case SPIRType::Double:
+			return join("dmat", type.columns, "x", type.vecsize);
+		// Matrix types not supported for int64/uint64.
+		default:
+			return "???";
 		}
-
-		require_extension_internal("GL_AMD_shader_fragment_mask");
-		break;
 	}
+}
 
-	// Vulkan 1.1 sub-group stuff ...
-	case OpGroupNonUniformElect:
-	case OpGroupNonUniformBroadcast:
-	case OpGroupNonUniformBroadcastFirst:
-	case OpGroupNonUniformBallot:
-	case OpGroupNonUniformInverseBallot:
-	case OpGroupNonUniformBallotBitExtract:
-	case OpGroupNonUniformBallotBitCount:
-	case OpGroupNonUniformBallotFindLSB:
-	case OpGroupNonUniformBallotFindMSB:
-	case OpGroupNonUniformShuffle:
-	case OpGroupNonUniformShuffleXor:
-	case OpGroupNonUniformShuffleUp:
-	case OpGroupNonUniformShuffleDown:
-	case OpGroupNonUniformAll:
-	case OpGroupNonUniformAny:
-	case OpGroupNonUniformAllEqual:
-	case OpGroupNonUniformFAdd:
-	case OpGroupNonUniformIAdd:
-	case OpGroupNonUniformFMul:
-	case OpGroupNonUniformIMul:
-	case OpGroupNonUniformFMin:
-	case OpGroupNonUniformFMax:
-	case OpGroupNonUniformSMin:
-	case OpGroupNonUniformSMax:
-	case OpGroupNonUniformUMin:
-	case OpGroupNonUniformUMax:
-	case OpGroupNonUniformBitwiseAnd:
-	case OpGroupNonUniformBitwiseOr:
-	case OpGroupNonUniformBitwiseXor:
-	case OpGroupNonUniformQuadSwap:
-	case OpGroupNonUniformQuadBroadcast:
-		emit_subgroup_op(instruction);
-		break;
-
-	case OpFUnordEqual:
-		GLSL_BFOP(unsupported_FUnordEqual);
-		break;
-
-	case OpFUnordNotEqual:
-		GLSL_BFOP(unsupported_FUnordNotEqual);
-		break;
-
-	case OpFUnordLessThan:
-		GLSL_BFOP(unsupported_FUnordLessThan);
-		break;
-
-	case OpFUnordGreaterThan:
-		GLSL_BFOP(unsupported_FUnordGreaterThan);
-		break;
-
-	case OpFUnordLessThanEqual:
-		GLSL_BFOP(unsupported_FUnordLessThanEqual);
-		break;
-
-	case OpFUnordGreaterThanEqual:
-		GLSL_BFOP(unsupported_FUnordGreaterThanEqual);
-		break;
-
-	case OpReportIntersectionNV:
-		statement("reportIntersectionNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
-		break;
-	case OpIgnoreIntersectionNV:
-		statement("ignoreIntersectionNV();");
-		break;
-	case OpTerminateRayNV:
-		statement("terminateRayNV();");
-		break;
-	case OpTraceNV:
-		statement("traceNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
-		          to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
-		          to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
-		          to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
-		break;
-	case OpExecuteCallableNV:
-		statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
-		break;
+void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
+                                const unordered_set<string> &variables_secondary, string &name)
+{
+	if (name.empty())
+		return;
 
-	case OpConvertUToPtr:
+	ParsedIR::sanitize_underscores(name);
+	if (ParsedIR::is_globally_reserved_identifier(name, true))
 	{
-		auto &type = get<SPIRType>(ops[0]);
-		if (type.storage != StorageClassPhysicalStorageBufferEXT)
-			SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
-
-		auto op = type_to_glsl(type);
-		emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
-		break;
+		name.clear();
+		return;
 	}
 
-	case OpConvertPtrToU:
-	{
-		auto &type = get<SPIRType>(ops[0]);
-		auto &ptr_type = expression_type(ops[2]);
-		if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
-			SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
+	update_name_cache(variables_primary, variables_secondary, name);
+}
 
-		auto op = type_to_glsl(type);
-		emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
-		break;
-	}
+void CompilerGLSL::add_local_variable_name(uint32_t id)
+{
+	add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
+}
 
-	case OpUndef:
-		// Undefined value has been declared.
-		break;
+void CompilerGLSL::add_resource_name(uint32_t id)
+{
+	add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
+}
 
-	case OpLine:
-	{
-		emit_line_directive(ops[0], ops[1]);
-		break;
-	}
+void CompilerGLSL::add_header_line(const std::string &line)
+{
+	header_lines.push_back(line);
+}
 
-	case OpNoLine:
-		break;
+bool CompilerGLSL::has_extension(const std::string &ext) const
+{
+	auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
+	return itr != end(forced_extensions);
+}
 
-	default:
-		statement("// unimplemented op ", instruction.op);
-		break;
-	}
+void CompilerGLSL::require_extension(const std::string &ext)
+{
+	if (!has_extension(ext))
+		forced_extensions.push_back(ext);
 }
 
-// Appends function arguments, mapped from global variables, beyond the specified arg index.
-// This is used when a function call uses fewer arguments than the function defines.
-// This situation may occur if the function signature has been dynamically modified to
-// extract global variables referenced from within the function, and convert them to
-// function arguments. This is necessary for shader languages that do not support global
-// access to shader input content from within a function (eg. Metal). Each additional
-// function args uses the name of the global variable. Function nesting will modify the
-// functions and function calls all the way up the nesting chain.
-void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
+void CompilerGLSL::require_extension_internal(const string &ext)
 {
-	auto &args = func.arguments;
-	uint32_t arg_cnt = uint32_t(args.size());
-	for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
+	if (backend.supports_extensions && !has_extension(ext))
 	{
-		auto &arg = args[arg_idx];
-		assert(arg.alias_global_variable);
+		forced_extensions.push_back(ext);
+		force_recompile();
+	}
+}
 
-		// If the underlying variable needs to be declared
-		// (ie. a local variable with deferred declaration), do so now.
-		uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
-		if (var_id)
-			flush_variable_declaration(var_id);
+void CompilerGLSL::flatten_buffer_block(VariableID id)
+{
+	auto &var = get<SPIRVariable>(id);
+	auto &type = get<SPIRType>(var.basetype);
+	auto name = to_name(type.self, false);
+	auto &flags = get_decoration_bitset(type.self);
 
-		arglist.push_back(to_func_call_arg(arg.id));
-	}
+	if (!type.array.empty())
+		SPIRV_CROSS_THROW(name + " is an array of UBOs.");
+	if (type.basetype != SPIRType::Struct)
+		SPIRV_CROSS_THROW(name + " is not a struct.");
+	if (!flags.get(DecorationBlock))
+		SPIRV_CROSS_THROW(name + " is not a block.");
+	if (type.member_types.empty())
+		SPIRV_CROSS_THROW(name + " is an empty struct.");
+
+	flattened_buffer_blocks.insert(id);
 }
 
-string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
+bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
 {
-	auto &memb = ir.meta[type.self].members;
-	if (index < memb.size() && !memb[index].alias.empty())
-		return memb[index].alias;
-	else
-		return join("_m", index);
+	return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
 }
 
-string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
+bool CompilerGLSL::check_atomic_image(uint32_t id)
 {
-	return join(".", to_member_name(type, index));
+	auto &type = expression_type(id);
+	if (type.storage == StorageClassImage)
+	{
+		if (options.es && options.version < 320)
+			require_extension_internal("GL_OES_shader_image_atomic");
+
+		auto *var = maybe_get_backing_variable(id);
+		if (var)
+		{
+			if (has_decoration(var->self, DecorationNonWritable) || has_decoration(var->self, DecorationNonReadable))
+			{
+				unset_decoration(var->self, DecorationNonWritable);
+				unset_decoration(var->self, DecorationNonReadable);
+				force_recompile();
+			}
+		}
+		return true;
+	}
+	else
+		return false;
 }
 
-void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
+void CompilerGLSL::add_function_overload(const SPIRFunction &func)
 {
-	auto &memb = ir.meta[type.self].members;
-	if (index < memb.size() && !memb[index].alias.empty())
+	Hasher hasher;
+	for (auto &arg : func.arguments)
 	{
-		auto &name = memb[index].alias;
-		if (name.empty())
-			return;
+		// Parameters can vary with pointer type or not,
+		// but that will not change the signature in GLSL/HLSL,
+		// so strip the pointer type before hashing.
+		uint32_t type_id = get_pointee_type_id(arg.type);
+		auto &type = get<SPIRType>(type_id);
 
-		// Reserved for temporaries.
-		if (name[0] == '_' && name.size() >= 2 && isdigit(name[1]))
+		if (!combined_image_samplers.empty())
 		{
-			name.clear();
-			return;
+			// If we have combined image samplers, we cannot really trust the image and sampler arguments
+			// we pass down to callees, because they may be shuffled around.
+			// Ignore these arguments, to make sure that functions need to differ in some other way
+			// to be considered different overloads.
+			if (type.basetype == SPIRType::SampledImage ||
+			    (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
+			{
+				continue;
+			}
 		}
 
-		update_name_cache(type.member_name_cache, name);
+		hasher.u32(type_id);
+	}
+	uint64_t types_hash = hasher.get();
+
+	auto function_name = to_name(func.self);
+	auto itr = function_overloads.find(function_name);
+	if (itr != end(function_overloads))
+	{
+		// There exists a function with this name already.
+		auto &overloads = itr->second;
+		if (overloads.count(types_hash) != 0)
+		{
+			// Overload conflict, assign a new name.
+			add_resource_name(func.self);
+			function_overloads[to_name(func.self)].insert(types_hash);
+		}
+		else
+		{
+			// Can reuse the name.
+			overloads.insert(types_hash);
+		}
+	}
+	else
+	{
+		// First time we see this function name.
+		add_resource_name(func.self);
+		function_overloads[to_name(func.self)].insert(types_hash);
 	}
 }
 
-// Checks whether the ID is a row_major matrix that requires conversion before use
-bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
+void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
 {
-	// Natively supported row-major matrices do not need to be converted.
-	// Legacy targets do not support row major.
-	if (backend.native_row_major_matrix && !is_legacy())
-		return false;
+	if (func.self != ir.default_entry_point)
+		add_function_overload(func);
 
-	// Non-matrix or column-major matrix types do not need to be converted.
-	if (!has_decoration(id, DecorationRowMajor))
-		return false;
+	// Avoid shadow declarations.
+	local_variable_names = resource_names;
 
-	// Only square row-major matrices can be converted at this time.
-	// Converting non-square matrices will require defining custom GLSL function that
-	// swaps matrix elements while retaining the original dimensional form of the matrix.
-	const auto type = expression_type(id);
-	if (type.columns != type.vecsize)
-		SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
+	string decl;
 
-	return true;
-}
+	auto &type = get<SPIRType>(func.return_type);
+	decl += flags_to_qualifiers_glsl(type, return_flags);
+	decl += type_to_glsl(type);
+	decl += type_to_array_glsl(type);
+	decl += " ";
 
-// Checks whether the member is a row_major matrix that requires conversion before use
-bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
-{
-	// Natively supported row-major matrices do not need to be converted.
-	if (backend.native_row_major_matrix && !is_legacy())
-		return false;
+	if (func.self == ir.default_entry_point)
+	{
+		// If we need complex fallback in GLSL, we just wrap main() in a function
+		// and interlock the entire shader ...
+		if (interlocked_is_complex)
+			decl += "spvMainInterlockedBody";
+		else
+			decl += "main";
 
-	// Non-matrix or column-major matrix types do not need to be converted.
-	if (!has_member_decoration(type.self, index, DecorationRowMajor))
-		return false;
+		processing_entry_point = true;
+	}
+	else
+		decl += to_name(func.self);
 
-	// Only square row-major matrices can be converted at this time.
-	// Converting non-square matrices will require defining custom GLSL function that
-	// swaps matrix elements while retaining the original dimensional form of the matrix.
-	const auto mbr_type = get<SPIRType>(type.member_types[index]);
-	if (mbr_type.columns != mbr_type.vecsize)
-		SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
+	decl += "(";
+	SmallVector<string> arglist;
+	for (auto &arg : func.arguments)
+	{
+		// Do not pass in separate images or samplers if we're remapping
+		// to combined image samplers.
+		if (skip_argument(arg.id))
+			continue;
 
-	return true;
-}
+		// Might change the variable name if it already exists in this function.
+		// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
+		// to use same name for variables.
+		// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
+		add_local_variable_name(arg.id);
 
-// Checks whether the member is in packed data type, that might need to be unpacked.
-// GLSL does not define packed data types, but certain subclasses do.
-bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) const
-{
-	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked);
-}
+		arglist.push_back(argument_decl(arg));
 
-// Wraps the expression string in a function call that converts the
-// row_major matrix result of the expression to a column_major matrix.
-// Base implementation uses the standard library transpose() function.
-// Subclasses may override to use a different function.
-string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/, bool /*is_packed*/)
-{
-	strip_enclosed_expression(exp_str);
-	return join("transpose(", exp_str, ")");
-}
+		// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
+		auto *var = maybe_get<SPIRVariable>(arg.id);
+		if (var)
+			var->parameter = &arg;
+	}
 
-string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
-{
-	string type_name = type_to_glsl(type, id);
-	remap_variable_type_name(type, name, type_name);
-	return join(type_name, " ", name, type_to_array_glsl(type));
+	for (auto &arg : func.shadow_arguments)
+	{
+		// Might change the variable name if it already exists in this function.
+		// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
+		// to use same name for variables.
+		// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
+		add_local_variable_name(arg.id);
+
+		arglist.push_back(argument_decl(arg));
+
+		// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
+		auto *var = maybe_get<SPIRVariable>(arg.id);
+		if (var)
+			var->parameter = &arg;
+	}
+
+	decl += merge(arglist);
+	decl += ")";
+	statement(decl);
 }
 
-// Emit a structure member. Subclasses may override to modify output,
-// or to dynamically add a padding member if needed.
-void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
-                                      const string &qualifier, uint32_t)
+void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
 {
-	auto &membertype = get<SPIRType>(member_type_id);
+	// Avoid potential cycles.
+	if (func.active)
+		return;
+	func.active = true;
 
-	Bitset memberflags;
-	auto &memb = ir.meta[type.self].members;
-	if (index < memb.size())
-		memberflags = memb[index].decoration_flags;
+	// If we depend on a function, emit that function before we emit our own function.
+	for (auto block : func.blocks)
+	{
+		auto &b = get<SPIRBlock>(block);
+		for (auto &i : b.ops)
+		{
+			auto ops = stream(i);
+			auto op = static_cast<Op>(i.op);
 
-	string qualifiers;
-	bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
-	                ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
+			if (op == OpFunctionCall)
+			{
+				// Recursively emit functions which are called.
+				uint32_t id = ops[2];
+				emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
+			}
+		}
+	}
 
-	if (is_block)
-		qualifiers = to_interpolation_qualifiers(memberflags);
+	if (func.entry_line.file_id != 0)
+		emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
+	emit_function_prototype(func, return_flags);
+	begin_scope();
 
-	statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags),
-	          variable_decl(membertype, to_member_name(type, index)), ";");
-}
+	if (func.self == ir.default_entry_point)
+		emit_entry_point_declarations();
 
-const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
-{
-	// GL_EXT_buffer_reference variables can be marked as restrict.
-	if (flags.get(DecorationRestrictPointerEXT))
-		return "restrict ";
+	current_function = &func;
+	auto &entry_block = get<SPIRBlock>(func.entry_block);
 
-	// Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
-	if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt &&
-	    type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage &&
-	    type.basetype != SPIRType::Sampler)
-		return "";
+	sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
+	for (auto &array : func.constant_arrays_needed_on_stack)
+	{
+		auto &c = get<SPIRConstant>(array);
+		auto &type = get<SPIRType>(c.constant_type);
+		statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
+	}
 
-	if (options.es)
+	for (auto &v : func.local_variables)
 	{
-		auto &execution = get_entry_point();
+		auto &var = get<SPIRVariable>(v);
+		var.deferred_declaration = false;
 
-		if (flags.get(DecorationRelaxedPrecision))
+		if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup))
 		{
-			bool implied_fmediump = type.basetype == SPIRType::Float &&
-			                        options.fragment.default_float_precision == Options::Mediump &&
-			                        execution.model == ExecutionModelFragment;
+			// Special variable type which cannot have initializer,
+			// need to be declared as standalone variables.
+			// Comes from MSL which can push global variables as local variables in main function.
+			add_local_variable_name(var.self);
+			statement(variable_decl(var), ";");
+			var.deferred_declaration = false;
+		}
+		else if (var.storage == StorageClassPrivate)
+		{
+			// These variables will not have had their CFG usage analyzed, so move it to the entry block.
+			// Comes from MSL which can push global variables as local variables in main function.
+			// We could just declare them right now, but we would miss out on an important initialization case which is
+			// LUT declaration in MSL.
+			// If we don't declare the variable when it is assigned we're forced to go through a helper function
+			// which copies elements one by one.
+			add_local_variable_name(var.self);
 
-			bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
-			                        options.fragment.default_int_precision == Options::Mediump &&
-			                        execution.model == ExecutionModelFragment;
+			if (var.initializer)
+			{
+				statement(variable_decl(var), ";");
+				var.deferred_declaration = false;
+			}
+			else
+			{
+				auto &dominated = entry_block.dominated_variables;
+				if (find(begin(dominated), end(dominated), var.self) == end(dominated))
+					entry_block.dominated_variables.push_back(var.self);
+				var.deferred_declaration = true;
+			}
+		}
+		else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
+		{
+			// No need to declare this variable, it has a static expression.
+			var.deferred_declaration = false;
+		}
+		else if (expression_is_lvalue(v))
+		{
+			add_local_variable_name(var.self);
 
-			return implied_fmediump || implied_imediump ? "" : "mediump ";
+			// Loop variables should never be declared early, they are explicitly emitted in a loop.
+			if (var.initializer && !var.loop_variable)
+				statement(variable_decl_function_local(var), ";");
+			else
+			{
+				// Don't declare variable until first use to declutter the GLSL output quite a lot.
+				// If we don't touch the variable before first branch,
+				// declare it then since we need variable declaration to be in top scope.
+				var.deferred_declaration = true;
+			}
 		}
 		else
 		{
-			bool implied_fhighp =
-			    type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
-			                                          execution.model == ExecutionModelFragment) ||
-			                                         (execution.model != ExecutionModelFragment));
+			// HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
+			// For these types (non-lvalue), we enforce forwarding through a shadowed variable.
+			// This means that when we OpStore to these variables, we just write in the expression ID directly.
+			// This breaks any kind of branching, since the variable must be statically assigned.
+			// Branching on samplers and images would be pretty much impossible to fake in GLSL.
+			var.statically_assigned = true;
+		}
 
-			bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
-			                      ((options.fragment.default_int_precision == Options::Highp &&
-			                        execution.model == ExecutionModelFragment) ||
-			                       (execution.model != ExecutionModelFragment));
+		var.loop_variable_enable = false;
 
-			return implied_fhighp || implied_ihighp ? "" : "highp ";
+		// Loop variables are never declared outside their for-loop, so block any implicit declaration.
+		if (var.loop_variable)
+		{
+			var.deferred_declaration = false;
+			// Need to reset the static expression so we can fallback to initializer if need be.
+			var.static_expression = 0;
 		}
 	}
-	else if (backend.allow_precision_qualifiers)
+
+	// Enforce declaration order for regression testing purposes.
+	for (auto &block_id : func.blocks)
 	{
-		// Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
-		// The default is highp however, so only emit mediump in the rare case that a shader has these.
-		if (flags.get(DecorationRelaxedPrecision))
-			return "mediump ";
-		else
-			return "";
+		auto &block = get<SPIRBlock>(block_id);
+		sort(begin(block.dominated_variables), end(block.dominated_variables));
+	}
+
+	for (auto &line : current_function->fixup_hooks_in)
+		line();
+
+	emit_block_chain(entry_block);
+
+	end_scope();
+	processing_entry_point = false;
+	statement("");
+
+	// Make sure deferred declaration state for local variables is cleared when we are done with function.
+	// We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
+	for (auto &v : func.local_variables)
+	{
+		auto &var = get<SPIRVariable>(v);
+		var.deferred_declaration = false;
 	}
-	else
-		return "";
 }
 
-const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
+void CompilerGLSL::emit_fixup()
 {
-	auto &type = expression_type(id);
-	bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
-	if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
+	if (is_vertex_like_shader())
 	{
-		// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
-		auto &result_type = get<SPIRType>(type.image.type);
-		if (result_type.width < 32)
-			return "mediump ";
+		if (options.vertex.fixup_clipspace)
+		{
+			const char *suffix = backend.float_literal_suffix ? "f" : "";
+			statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
+		}
+
+		if (options.vertex.flip_vert_y)
+			statement("gl_Position.y = -gl_Position.y;");
 	}
-	return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
 }
 
-string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
+void CompilerGLSL::flush_phi(BlockID from, BlockID to)
 {
-	auto &flags = ir.meta[id].decoration.decoration_flags;
-	string res;
+	auto &child = get<SPIRBlock>(to);
+	if (child.ignore_phi_from_block == from)
+		return;
 
-	auto *var = maybe_get<SPIRVariable>(id);
+	unordered_set<uint32_t> temporary_phi_variables;
 
-	if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
-		res += "shared ";
+	for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
+	{
+		auto &phi = *itr;
 
-	res += to_interpolation_qualifiers(flags);
-	if (var)
-		res += to_storage_qualifiers_glsl(*var);
+		if (phi.parent == from)
+		{
+			auto &var = get<SPIRVariable>(phi.function_variable);
 
-	auto &type = expression_type(id);
-	if (type.image.dim != DimSubpassData && type.image.sampled == 2)
-	{
-		if (flags.get(DecorationCoherent))
-			res += "coherent ";
-		if (flags.get(DecorationRestrict))
-			res += "restrict ";
-		if (flags.get(DecorationNonWritable))
-			res += "readonly ";
-		if (flags.get(DecorationNonReadable))
-			res += "writeonly ";
-	}
+			// A Phi variable might be a loop variable, so flush to static expression.
+			if (var.loop_variable && !var.loop_variable_enable)
+				var.static_expression = phi.local_variable;
+			else
+			{
+				flush_variable_declaration(phi.function_variable);
 
-	res += to_precision_qualifiers_glsl(id);
+				// Check if we are going to write to a Phi variable that another statement will read from
+				// as part of another Phi node in our target block.
+				// For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
+				// This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
+				bool need_saved_temporary =
+				    find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
+					    return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
+				    }) != end(child.phi_variables);
 
-	return res;
-}
+				if (need_saved_temporary)
+				{
+					// Need to make sure we declare the phi variable with a copy at the right scope.
+					// We cannot safely declare a temporary here since we might be inside a continue block.
+					if (!var.allocate_temporary_copy)
+					{
+						var.allocate_temporary_copy = true;
+						force_recompile();
+					}
+					statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
+					temporary_phi_variables.insert(phi.function_variable);
+				}
 
-string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
-{
-	// glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
-	auto &type = expression_type(arg.id);
-	const char *direction = "";
+				// This might be called in continue block, so make sure we
+				// use this to emit ESSL 1.0 compliant increments/decrements.
+				auto lhs = to_expression(phi.function_variable);
 
-	if (type.pointer)
-	{
-		if (arg.write_count && arg.read_count)
-			direction = "inout ";
-		else if (arg.write_count)
-			direction = "out ";
-	}
+				string rhs;
+				if (temporary_phi_variables.count(phi.local_variable))
+					rhs = join("_", phi.local_variable, "_copy");
+				else
+					rhs = to_pointer_expression(phi.local_variable);
 
-	return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
-}
+				if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
+					statement(lhs, " = ", rhs, ";");
+			}
 
-string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
-{
-	return to_expression(var.initializer);
+			register_write(phi.function_variable);
+		}
+	}
 }
 
-string CompilerGLSL::variable_decl(const SPIRVariable &variable)
+void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
 {
-	// Ignore the pointer type since GLSL doesn't have pointers.
-	auto &type = get_variable_data_type(variable);
+	auto &to_block = get<SPIRBlock>(to);
+	if (from == to)
+		return;
 
-	if (type.pointer_depth > 1)
-		SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
+	assert(is_continue(to));
+	if (to_block.complex_continue)
+	{
+		// Just emit the whole block chain as is.
+		auto usage_counts = expression_usage_counts;
 
-	auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
+		emit_block_chain(to_block);
 
-	if (variable.loop_variable && variable.static_expression)
-	{
-		uint32_t expr = variable.static_expression;
-		if (ir.ids[expr].get_type() != TypeUndef)
-			res += join(" = ", to_expression(variable.static_expression));
+		// Expression usage counts are moot after returning from the continue block.
+		expression_usage_counts = usage_counts;
 	}
-	else if (variable.initializer)
+	else
 	{
-		uint32_t expr = variable.initializer;
-		if (ir.ids[expr].get_type() != TypeUndef)
-			res += join(" = ", to_initializer_expression(variable));
-	}
-	return res;
-}
+		auto &from_block = get<SPIRBlock>(from);
+		bool outside_control_flow = false;
+		uint32_t loop_dominator = 0;
 
-const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
-{
-	auto &flags = ir.meta[variable.self].decoration.decoration_flags;
-	if (flags.get(DecorationRelaxedPrecision))
-		return "mediump ";
-	else
-		return "highp ";
-}
+		// FIXME: Refactor this to not use the old loop_dominator tracking.
+		if (from_block.merge_block)
+		{
+			// If we are a loop header, we don't set the loop dominator,
+			// so just use "self" here.
+			loop_dominator = from;
+		}
+		else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
+		{
+			loop_dominator = from_block.loop_dominator;
+		}
 
-string CompilerGLSL::pls_decl(const PlsRemap &var)
-{
-	auto &variable = get<SPIRVariable>(var.id);
+		if (loop_dominator != 0)
+		{
+			auto &cfg = get_cfg_for_current_function();
 
-	SPIRType type;
-	type.vecsize = pls_format_to_components(var.format);
-	type.basetype = pls_format_to_basetype(var.format);
+			// For non-complex continue blocks, we implicitly branch to the continue block
+			// by having the continue block be part of the loop header in for (; ; continue-block).
+			outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
+		}
 
-	return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
-	            to_name(variable.self));
+		// Some simplification for for-loops. We always end up with a useless continue;
+		// statement since we branch to a loop block.
+		// Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
+		// we can avoid writing out an explicit continue statement.
+		// Similar optimization to return statements if we know we're outside flow control.
+		if (!outside_control_flow)
+			statement("continue;");
+	}
 }
 
-uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
+void CompilerGLSL::branch(BlockID from, BlockID to)
 {
-	return to_array_size_literal(type, uint32_t(type.array.size() - 1));
-}
+	flush_phi(from, to);
+	flush_control_dependent_expressions(from);
 
-uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
-{
-	assert(type.array.size() == type.array_size_literal.size());
+	bool to_is_continue = is_continue(to);
 
-	if (type.array_size_literal[index])
+	// This is only a continue if we branch to our loop dominator.
+	if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
 	{
-		return type.array[index];
+		// This can happen if we had a complex continue block which was emitted.
+		// Once the continue block tries to branch to the loop header, just emit continue;
+		// and end the chain here.
+		statement("continue;");
+	}
+	else if (from != to && is_break(to))
+	{
+		// We cannot break to ourselves, so check explicitly for from != to.
+		// This case can trigger if a loop header is all three of these things:
+		// - Continue block
+		// - Loop header
+		// - Break merge target all at once ...
+
+		// Very dirty workaround.
+		// Switch constructs are able to break, but they cannot break out of a loop at the same time,
+		// yet SPIR-V allows it.
+		// Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
+		// write to the ladder here, and defer the break.
+		// The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
+		if (is_loop_break(to))
+		{
+			for (size_t n = current_emitting_switch_stack.size(); n; n--)
+			{
+				auto *current_emitting_switch = current_emitting_switch_stack[n - 1];
+
+				if (current_emitting_switch &&
+				    current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
+				    get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
+				{
+					if (!current_emitting_switch->need_ladder_break)
+					{
+						force_recompile();
+						current_emitting_switch->need_ladder_break = true;
+					}
+
+					statement("_", current_emitting_switch->self, "_ladder_break = true;");
+				}
+				else
+					break;
+			}
+		}
+		statement("break;");
 	}
-	else
+	else if (to_is_continue || from == to)
 	{
-		// Use the default spec constant value.
-		// This is the best we can do.
-		uint32_t array_size_id = type.array[index];
+		// For from == to case can happen for a do-while loop which branches into itself.
+		// We don't mark these cases as continue blocks, but the only possible way to branch into
+		// ourselves is through means of continue blocks.
 
-		// Explicitly check for this case. The error message you would get (bad cast) makes no sense otherwise.
-		if (ir.ids[array_size_id].get_type() == TypeConstantOp)
-			SPIRV_CROSS_THROW("An array size was found to be an OpSpecConstantOp. This is not supported since "
-			                  "SPIRV-Cross cannot deduce the actual size here.");
+		// If we are merging to a continue block, there is no need to emit the block chain for continue here.
+		// We can branch to the continue block after we merge execution.
 
-		uint32_t array_size = get<SPIRConstant>(array_size_id).scalar();
-		return array_size;
+		// Here we make use of structured control flow rules from spec:
+		// 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
+		//       - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
+		// If we are branching to a merge block, we must be inside a construct which dominates the merge block.
+		auto &block_meta = ir.block_meta[to];
+		bool branching_to_merge =
+		    (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
+		                   ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
+		if (!to_is_continue || !branching_to_merge)
+			branch_to_continue(from, to);
 	}
+	else if (!is_conditional(to))
+		emit_block_chain(get<SPIRBlock>(to));
+
+	// It is important that we check for break before continue.
+	// A block might serve two purposes, a break block for the inner scope, and
+	// a continue block in the outer scope.
+	// Inner scope always takes precedence.
 }
 
-string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
+void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
 {
-	assert(type.array.size() == type.array_size_literal.size());
+	auto &from_block = get<SPIRBlock>(from);
+	BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
 
-	// Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
-	// Opt for unsized as it's the more "correct" variant to use.
-	if (type.storage == StorageClassInput && (get_entry_point().model == ExecutionModelTessellationControl ||
-	                                          get_entry_point().model == ExecutionModelTessellationEvaluation))
-		return "";
+	// If we branch directly to our selection merge target, we don't need a code path.
+	bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block);
+	bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block);
 
-	auto &size = type.array[index];
-	if (!type.array_size_literal[index])
-		return to_expression(size);
-	else if (size)
-		return convert_to_string(size);
-	else if (!backend.unsized_array_supported)
+	if (!true_block_needs_code && !false_block_needs_code)
+		return;
+
+	// We might have a loop merge here. Only consider selection flattening constructs.
+	// Loop hints are handled explicitly elsewhere.
+	if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten)
+		emit_block_hints(from_block);
+
+	if (true_block_needs_code)
 	{
-		// For runtime-sized arrays, we can work around
-		// lack of standard support for this by simply having
-		// a single element array.
-		//
-		// Runtime length arrays must always be the last element
-		// in an interface block.
-		return "1";
+		statement("if (", to_expression(cond), ")");
+		begin_scope();
+		branch(from, true_block);
+		end_scope();
+
+		if (false_block_needs_code)
+		{
+			statement("else");
+			begin_scope();
+			branch(from, false_block);
+			end_scope();
+		}
+	}
+	else if (false_block_needs_code)
+	{
+		// Only need false path, use negative conditional.
+		statement("if (!", to_enclosed_expression(cond), ")");
+		begin_scope();
+		branch(from, false_block);
+		end_scope();
 	}
-	else
-		return "";
 }
 
-string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
+// FIXME: This currently cannot handle complex continue blocks
+// as in do-while.
+// This should be seen as a "trivial" continue block.
+string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
 {
-	if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
-	{
-		// We are using a wrapped pointer type, and we should not emit any array declarations here.
-		return "";
-	}
+	auto *block = &get<SPIRBlock>(continue_block);
 
-	if (type.array.empty())
-		return "";
+	// While emitting the continue block, declare_temporary will check this
+	// if we have to emit temporaries.
+	current_continue_block = block;
 
-	if (options.flatten_multidimensional_arrays)
+	SmallVector<string> statements;
+
+	// Capture all statements into our list.
+	auto *old = redirect_statement;
+	redirect_statement = &statements;
+
+	// Stamp out all blocks one after each other.
+	while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
 	{
-		string res;
-		res += "[";
-		for (auto i = uint32_t(type.array.size()); i; i--)
+		// Write out all instructions we have in this block.
+		emit_block_instructions(*block);
+
+		// For plain branchless for/while continue blocks.
+		if (block->next_block)
 		{
-			res += enclose_expression(to_array_size(type, i - 1));
-			if (i > 1)
-				res += " * ";
+			flush_phi(continue_block, block->next_block);
+			block = &get<SPIRBlock>(block->next_block);
 		}
-		res += "]";
-		return res;
-	}
-	else
-	{
-		if (type.array.size() > 1)
+		// For do while blocks. The last block will be a select block.
+		else if (block->true_block && follow_true_block)
 		{
-			if (!options.es && options.version < 430)
-				require_extension_internal("GL_ARB_arrays_of_arrays");
-			else if (options.es && options.version < 310)
-				SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
-				                  "Try using --flatten-multidimensional-arrays or set "
-				                  "options.flatten_multidimensional_arrays to true.");
+			flush_phi(continue_block, block->true_block);
+			block = &get<SPIRBlock>(block->true_block);
 		}
-
-		string res;
-		for (auto i = uint32_t(type.array.size()); i; i--)
+		else if (block->false_block && follow_false_block)
 		{
-			res += "[";
-			res += to_array_size(type, i - 1);
-			res += "]";
+			flush_phi(continue_block, block->false_block);
+			block = &get<SPIRBlock>(block->false_block);
+		}
+		else
+		{
+			SPIRV_CROSS_THROW("Invalid continue block detected!");
 		}
-		return res;
 	}
-}
 
-string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
-{
-	auto &imagetype = get<SPIRType>(type.image.type);
-	string res;
+	// Restore old pointer.
+	redirect_statement = old;
 
-	switch (imagetype.basetype)
+	// Somewhat ugly, strip off the last ';' since we use ',' instead.
+	// Ideally, we should select this behavior in statement().
+	for (auto &s : statements)
 	{
-	case SPIRType::Int:
-	case SPIRType::Short:
-	case SPIRType::SByte:
-		res = "i";
-		break;
-	case SPIRType::UInt:
-	case SPIRType::UShort:
-	case SPIRType::UByte:
-		res = "u";
-		break;
-	default:
-		break;
+		if (!s.empty() && s.back() == ';')
+			s.erase(s.size() - 1, 1);
 	}
 
-	// For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
-	// We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
-
-	if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
-		return res + "subpassInput" + (type.image.ms ? "MS" : "");
+	current_continue_block = nullptr;
+	return merge(statements);
+}
 
-	// If we're emulating subpassInput with samplers, force sampler2D
-	// so we don't have to specify format.
-	if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
+void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
+{
+	// While loops do not take initializers, so declare all of them outside.
+	for (auto &loop_var : block.loop_variables)
 	{
-		// Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
-		if (type.image.dim == DimBuffer && type.image.sampled == 1)
-			res += "sampler";
-		else
-			res += type.image.sampled == 2 ? "image" : "texture";
+		auto &var = get<SPIRVariable>(loop_var);
+		statement(variable_decl(var), ";");
 	}
-	else
-		res += "sampler";
-
-	switch (type.image.dim)
-	{
-	case Dim1D:
-		res += "1D";
-		break;
-	case Dim2D:
-		res += "2D";
-		break;
-	case Dim3D:
-		res += "3D";
-		break;
-	case DimCube:
-		res += "Cube";
-		break;
-	case DimRect:
-		if (options.es)
-			SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
+}
 
-		if (is_legacy_desktop())
-			require_extension_internal("GL_ARB_texture_rectangle");
+string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
+{
+	if (block.loop_variables.empty())
+		return "";
 
-		res += "2DRect";
-		break;
+	bool same_types = for_loop_initializers_are_same_type(block);
+	// We can only declare for loop initializers if all variables are of same type.
+	// If we cannot do this, declare individual variables before the loop header.
 
-	case DimBuffer:
-		if (options.es && options.version < 320)
-			require_extension_internal("GL_OES_texture_buffer");
-		else if (!options.es && options.version < 300)
-			require_extension_internal("GL_EXT_texture_buffer_object");
-		res += "Buffer";
-		break;
+	// We might have a loop variable candidate which was not assigned to for some reason.
+	uint32_t missing_initializers = 0;
+	for (auto &variable : block.loop_variables)
+	{
+		uint32_t expr = get<SPIRVariable>(variable).static_expression;
 
-	case DimSubpassData:
-		res += "2D";
-		break;
-	default:
-		SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
+		// Sometimes loop variables are initialized with OpUndef, but we can just declare
+		// a plain variable without initializer in this case.
+		if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
+			missing_initializers++;
 	}
 
-	if (type.image.ms)
-		res += "MS";
-	if (type.image.arrayed)
+	if (block.loop_variables.size() == 1 && missing_initializers == 0)
 	{
-		if (is_legacy_desktop())
-			require_extension_internal("GL_EXT_texture_array");
-		res += "Array";
+		return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
+	}
+	else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
+	{
+		for (auto &loop_var : block.loop_variables)
+			statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
+		return "";
 	}
+	else
+	{
+		// We have a mix of loop variables, either ones with a clear initializer, or ones without.
+		// Separate the two streams.
+		string expr;
+
+		for (auto &loop_var : block.loop_variables)
+		{
+			uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
+			if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
+			{
+				statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
+			}
+			else
+			{
+				auto &var = get<SPIRVariable>(loop_var);
+				auto &type = get_variable_data_type(var);
+				if (expr.empty())
+				{
+					// For loop initializers are of the form <type id = value, id = value, id = value, etc ...
+					expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
+				}
+				else
+				{
+					expr += ", ";
+					// In MSL, being based on C++, the asterisk marking a pointer
+					// binds to the identifier, not the type.
+					if (type.pointer)
+						expr += "* ";
+				}
 
-	// "Shadow" state in GLSL only exists for samplers and combined image samplers.
-	if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
-	    image_is_comparison(type, id))
-	{
-		res += "Shadow";
+				expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
+			}
+		}
+		return expr;
 	}
-
-	return res;
 }
 
-string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
+bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
 {
-	if (type.array.size() > 1)
+	if (block.loop_variables.size() <= 1)
+		return true;
+
+	uint32_t expected = 0;
+	Bitset expected_flags;
+	for (auto &var : block.loop_variables)
 	{
-		if (options.flatten_multidimensional_arrays)
-			SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, e.g. float[][]().");
-		else if (!options.es && options.version < 430)
-			require_extension_internal("GL_ARB_arrays_of_arrays");
-		else if (options.es && options.version < 310)
-			SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
+		// Don't care about uninitialized variables as they will not be part of the initializers.
+		uint32_t expr = get<SPIRVariable>(var).static_expression;
+		if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
+			continue;
+
+		if (expected == 0)
+		{
+			expected = get<SPIRVariable>(var).basetype;
+			expected_flags = get_decoration_bitset(var);
+		}
+		else if (expected != get<SPIRVariable>(var).basetype)
+			return false;
+
+		// Precision flags and things like that must also match.
+		if (expected_flags != get_decoration_bitset(var))
+			return false;
 	}
 
-	auto e = type_to_glsl(type);
-	for (uint32_t i = 0; i < type.array.size(); i++)
-		e += "[]";
-	return e;
+	return true;
 }
 
-// The optional id parameter indicates the object whose type we are trying
-// to find the description for. It is optional. Most type descriptions do not
-// depend on a specific object's use of that type.
-string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
+bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
 {
-	if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
+	SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
+
+	if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
 	{
-		// Need to create a magic type name which compacts the entire type information.
-		string name = type_to_glsl(get_pointee_type(type));
-		for (size_t i = 0; i < type.array.size(); i++)
+		uint32_t current_count = statement_count;
+		// If we're trying to create a true for loop,
+		// we need to make sure that all opcodes before branch statement do not actually emit any code.
+		// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
+		emit_block_instructions(block);
+
+		bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
+
+		// This can work! We only did trivial things which could be forwarded in block body!
+		if (current_count == statement_count && condition_is_temporary)
 		{
-			if (type.array_size_literal[i])
-				name += join(type.array[i], "_");
-			else
-				name += join("id", type.array[i], "_");
-		}
-		name += "Pointer";
-		return name;
-	}
+			switch (continue_type)
+			{
+			case SPIRBlock::ForLoop:
+			{
+				// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
+				flush_undeclared_variables(block);
 
-	switch (type.basetype)
-	{
-	case SPIRType::Struct:
-		// Need OpName lookup here to get a "sensible" name for a struct.
-		if (backend.explicit_struct_type)
-			return join("struct ", to_name(type.self));
-		else
-			return to_name(type.self);
+				// Important that we do this in this order because
+				// emitting the continue block can invalidate the condition expression.
+				auto initializer = emit_for_loop_initializers(block);
+				auto condition = to_expression(block.condition);
 
-	case SPIRType::Image:
-	case SPIRType::SampledImage:
-		return image_type_glsl(type, id);
+				// Condition might have to be inverted.
+				if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
+					condition = join("!", enclose_expression(condition));
 
-	case SPIRType::Sampler:
-		// The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
-		// this distinction into the type system.
-		return comparison_ids.count(id) ? "samplerShadow" : "sampler";
+				emit_block_hints(block);
+				if (method != SPIRBlock::MergeToSelectContinueForLoop)
+				{
+					auto continue_block = emit_continue_block(block.continue_block, false, false);
+					statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
+				}
+				else
+					statement("for (", initializer, "; ", condition, "; )");
+				break;
+			}
 
-	case SPIRType::AccelerationStructureNV:
-		return "accelerationStructureNV";
+			case SPIRBlock::WhileLoop:
+			{
+				// This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
+				flush_undeclared_variables(block);
+				emit_while_loop_initializers(block);
+				emit_block_hints(block);
 
-	case SPIRType::Void:
-		return "void";
+				auto condition = to_expression(block.condition);
+				// Condition might have to be inverted.
+				if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
+					condition = join("!", enclose_expression(condition));
 
-	default:
-		break;
-	}
+				statement("while (", condition, ")");
+				break;
+			}
 
-	if (type.basetype == SPIRType::UInt && is_legacy())
-		SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
+			default:
+				block.disable_block_optimization = true;
+				force_recompile();
+				begin_scope(); // We'll see an end_scope() later.
+				return false;
+			}
 
-	if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
-	{
-		switch (type.basetype)
-		{
-		case SPIRType::Boolean:
-			return "bool";
-		case SPIRType::SByte:
-			return backend.basic_int8_type;
-		case SPIRType::UByte:
-			return backend.basic_uint8_type;
-		case SPIRType::Short:
-			return backend.basic_int16_type;
-		case SPIRType::UShort:
-			return backend.basic_uint16_type;
-		case SPIRType::Int:
-			return backend.basic_int_type;
-		case SPIRType::UInt:
-			return backend.basic_uint_type;
-		case SPIRType::AtomicCounter:
-			return "atomic_uint";
-		case SPIRType::Half:
-			return "float16_t";
-		case SPIRType::Float:
-			return "float";
-		case SPIRType::Double:
-			return "double";
-		case SPIRType::Int64:
-			return "int64_t";
-		case SPIRType::UInt64:
-			return "uint64_t";
-		default:
-			return "???";
-		}
-	}
-	else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
-	{
-		switch (type.basetype)
-		{
-		case SPIRType::Boolean:
-			return join("bvec", type.vecsize);
-		case SPIRType::SByte:
-			return join("i8vec", type.vecsize);
-		case SPIRType::UByte:
-			return join("u8vec", type.vecsize);
-		case SPIRType::Short:
-			return join("i16vec", type.vecsize);
-		case SPIRType::UShort:
-			return join("u16vec", type.vecsize);
-		case SPIRType::Int:
-			return join("ivec", type.vecsize);
-		case SPIRType::UInt:
-			return join("uvec", type.vecsize);
-		case SPIRType::Half:
-			return join("f16vec", type.vecsize);
-		case SPIRType::Float:
-			return join("vec", type.vecsize);
-		case SPIRType::Double:
-			return join("dvec", type.vecsize);
-		case SPIRType::Int64:
-			return join("i64vec", type.vecsize);
-		case SPIRType::UInt64:
-			return join("u64vec", type.vecsize);
-		default:
-			return "???";
-		}
-	}
-	else if (type.vecsize == type.columns) // Simple Matrix builtin
-	{
-		switch (type.basetype)
-		{
-		case SPIRType::Boolean:
-			return join("bmat", type.vecsize);
-		case SPIRType::Int:
-			return join("imat", type.vecsize);
-		case SPIRType::UInt:
-			return join("umat", type.vecsize);
-		case SPIRType::Half:
-			return join("f16mat", type.vecsize);
-		case SPIRType::Float:
-			return join("mat", type.vecsize);
-		case SPIRType::Double:
-			return join("dmat", type.vecsize);
-		// Matrix types not supported for int64/uint64.
-		default:
-			return "???";
+			begin_scope();
+			return true;
 		}
-	}
-	else
-	{
-		switch (type.basetype)
+		else
 		{
-		case SPIRType::Boolean:
-			return join("bmat", type.columns, "x", type.vecsize);
-		case SPIRType::Int:
-			return join("imat", type.columns, "x", type.vecsize);
-		case SPIRType::UInt:
-			return join("umat", type.columns, "x", type.vecsize);
-		case SPIRType::Half:
-			return join("f16mat", type.columns, "x", type.vecsize);
-		case SPIRType::Float:
-			return join("mat", type.columns, "x", type.vecsize);
-		case SPIRType::Double:
-			return join("dmat", type.columns, "x", type.vecsize);
-		// Matrix types not supported for int64/uint64.
-		default:
-			return "???";
+			block.disable_block_optimization = true;
+			force_recompile();
+			begin_scope(); // We'll see an end_scope() later.
+			return false;
 		}
 	}
-}
+	else if (method == SPIRBlock::MergeToDirectForLoop)
+	{
+		auto &child = get<SPIRBlock>(block.next_block);
 
-void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
-                                const unordered_set<string> &variables_secondary, string &name)
-{
-	if (name.empty())
-		return;
+		// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
+		flush_undeclared_variables(child);
 
-	// Reserved for temporaries.
-	if (name[0] == '_' && name.size() >= 2 && isdigit(name[1]))
-	{
-		name.clear();
-		return;
-	}
+		uint32_t current_count = statement_count;
 
-	// Avoid double underscores.
-	name = sanitize_underscores(name);
+		// If we're trying to create a true for loop,
+		// we need to make sure that all opcodes before branch statement do not actually emit any code.
+		// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
+		emit_block_instructions(child);
 
-	update_name_cache(variables_primary, variables_secondary, name);
-}
+		bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
 
-void CompilerGLSL::add_local_variable_name(uint32_t id)
-{
-	add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
-}
+		if (current_count == statement_count && condition_is_temporary)
+		{
+			uint32_t target_block = child.true_block;
 
-void CompilerGLSL::add_resource_name(uint32_t id)
-{
-	add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
-}
+			switch (continue_type)
+			{
+			case SPIRBlock::ForLoop:
+			{
+				// Important that we do this in this order because
+				// emitting the continue block can invalidate the condition expression.
+				auto initializer = emit_for_loop_initializers(block);
+				auto condition = to_expression(child.condition);
 
-void CompilerGLSL::add_header_line(const std::string &line)
-{
-	header_lines.push_back(line);
-}
+				// Condition might have to be inverted.
+				if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
+				{
+					condition = join("!", enclose_expression(condition));
+					target_block = child.false_block;
+				}
 
-bool CompilerGLSL::has_extension(const std::string &ext) const
-{
-	auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
-	return itr != end(forced_extensions);
+				auto continue_block = emit_continue_block(block.continue_block, false, false);
+				emit_block_hints(block);
+				statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
+				break;
+			}
+
+			case SPIRBlock::WhileLoop:
+			{
+				emit_while_loop_initializers(block);
+				emit_block_hints(block);
+
+				auto condition = to_expression(child.condition);
+				// Condition might have to be inverted.
+				if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
+				{
+					condition = join("!", enclose_expression(condition));
+					target_block = child.false_block;
+				}
+
+				statement("while (", condition, ")");
+				break;
+			}
+
+			default:
+				block.disable_block_optimization = true;
+				force_recompile();
+				begin_scope(); // We'll see an end_scope() later.
+				return false;
+			}
+
+			begin_scope();
+			branch(child.self, target_block);
+			return true;
+		}
+		else
+		{
+			block.disable_block_optimization = true;
+			force_recompile();
+			begin_scope(); // We'll see an end_scope() later.
+			return false;
+		}
+	}
+	else
+		return false;
 }
 
-void CompilerGLSL::require_extension(const std::string &ext)
+void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
 {
-	if (!has_extension(ext))
-		forced_extensions.push_back(ext);
+	for (auto &v : block.dominated_variables)
+		flush_variable_declaration(v);
 }
 
-void CompilerGLSL::require_extension_internal(const string &ext)
+void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
 {
-	if (backend.supports_extensions && !has_extension(ext))
+	// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
+	// Need to sort these to ensure that reference output is stable.
+	sort(begin(temporaries), end(temporaries),
+	     [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
+
+	for (auto &tmp : temporaries)
 	{
-		forced_extensions.push_back(ext);
-		force_recompile();
-	}
-}
+		auto &type = get<SPIRType>(tmp.first);
 
-void CompilerGLSL::flatten_buffer_block(uint32_t id)
-{
-	auto &var = get<SPIRVariable>(id);
-	auto &type = get<SPIRType>(var.basetype);
-	auto name = to_name(type.self, false);
-	auto &flags = ir.meta[type.self].decoration.decoration_flags;
+		// There are some rare scenarios where we are asked to declare pointer types as hoisted temporaries.
+		// This should be ignored unless we're doing actual variable pointers and backend supports it.
+		// Access chains cannot normally be lowered to temporaries in GLSL and HLSL.
+		if (type.pointer && !backend.native_pointers)
+			continue;
 
-	if (!type.array.empty())
-		SPIRV_CROSS_THROW(name + " is an array of UBOs.");
-	if (type.basetype != SPIRType::Struct)
-		SPIRV_CROSS_THROW(name + " is not a struct.");
-	if (!flags.get(DecorationBlock))
-		SPIRV_CROSS_THROW(name + " is not a block.");
-	if (type.member_types.empty())
-		SPIRV_CROSS_THROW(name + " is an empty struct.");
+		add_local_variable_name(tmp.second);
+		auto &flags = get_decoration_bitset(tmp.second);
 
-	flattened_buffer_blocks.insert(id);
-}
+		// Not all targets support pointer literals, so don't bother with that case.
+		string initializer;
+		if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
+			initializer = join(" = ", to_zero_initialized_expression(tmp.first));
 
-bool CompilerGLSL::check_atomic_image(uint32_t id)
-{
-	auto &type = expression_type(id);
-	if (type.storage == StorageClassImage)
-	{
-		if (options.es && options.version < 320)
-			require_extension_internal("GL_OES_shader_image_atomic");
+		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";");
 
-		auto *var = maybe_get_backing_variable(id);
-		if (var)
+		hoisted_temporaries.insert(tmp.second);
+		forced_temporaries.insert(tmp.second);
+
+		// The temporary might be read from before it's assigned, set up the expression now.
+		set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
+
+		// If we have hoisted temporaries in multi-precision contexts, emit that here too ...
+		// We will not be able to analyze hoisted-ness for dependent temporaries that we hallucinate here.
+		auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(tmp.second);
+		if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end())
 		{
-			auto &flags = ir.meta[var->self].decoration.decoration_flags;
-			if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable))
-			{
-				flags.clear(DecorationNonWritable);
-				flags.clear(DecorationNonReadable);
-				force_recompile();
-			}
+			uint32_t mirror_id = mirrored_precision_itr->second;
+			auto &mirror_flags = get_decoration_bitset(mirror_id);
+			statement(flags_to_qualifiers_glsl(type, mirror_flags),
+			          variable_decl(type, to_name(mirror_id)),
+			          initializer, ";");
+			// The temporary might be read from before it's assigned, set up the expression now.
+			set<SPIRExpression>(mirror_id, to_name(mirror_id), tmp.first, true);
+			hoisted_temporaries.insert(mirror_id);
 		}
-		return true;
 	}
-	else
-		return false;
 }
 
-void CompilerGLSL::add_function_overload(const SPIRFunction &func)
+void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 {
-	Hasher hasher;
-	for (auto &arg : func.arguments)
+	bool select_branch_to_true_block = false;
+	bool select_branch_to_false_block = false;
+	bool skip_direct_branch = false;
+	bool emitted_loop_header_variables = false;
+	bool force_complex_continue_block = false;
+	ValueSaver<uint32_t> loop_level_saver(current_loop_level);
+
+	if (block.merge == SPIRBlock::MergeLoop)
+		add_loop_level();
+
+	emit_hoisted_temporaries(block.declare_temporary);
+
+	SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
+	if (block.continue_block)
 	{
-		// Parameters can vary with pointer type or not,
-		// but that will not change the signature in GLSL/HLSL,
-		// so strip the pointer type before hashing.
-		uint32_t type_id = get_pointee_type_id(arg.type);
-		auto &type = get<SPIRType>(type_id);
+		continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
+		// If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
+		if (continue_type == SPIRBlock::ComplexLoop)
+			block.complex_continue = true;
+	}
 
-		if (!combined_image_samplers.empty())
-		{
-			// If we have combined image samplers, we cannot really trust the image and sampler arguments
-			// we pass down to callees, because they may be shuffled around.
-			// Ignore these arguments, to make sure that functions need to differ in some other way
-			// to be considered different overloads.
-			if (type.basetype == SPIRType::SampledImage ||
-			    (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
-			{
-				continue;
-			}
-		}
+	// If we have loop variables, stop masking out access to the variable now.
+	for (auto var_id : block.loop_variables)
+	{
+		auto &var = get<SPIRVariable>(var_id);
+		var.loop_variable_enable = true;
+		// We're not going to declare the variable directly, so emit a copy here.
+		emit_variable_temporary_copies(var);
+	}
 
-		hasher.u32(type_id);
+	// Remember deferred declaration state. We will restore it before returning.
+	SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
+	for (size_t i = 0; i < block.dominated_variables.size(); i++)
+	{
+		uint32_t var_id = block.dominated_variables[i];
+		auto &var = get<SPIRVariable>(var_id);
+		rearm_dominated_variables[i] = var.deferred_declaration;
 	}
-	uint64_t types_hash = hasher.get();
 
-	auto function_name = to_name(func.self);
-	auto itr = function_overloads.find(function_name);
-	if (itr != end(function_overloads))
+	// This is the method often used by spirv-opt to implement loops.
+	// The loop header goes straight into the continue block.
+	// However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
+	// it *MUST* be used in the continue block. This loop method will not work.
+	if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
 	{
-		// There exists a function with this name already.
-		auto &overloads = itr->second;
-		if (overloads.count(types_hash) != 0)
+		flush_undeclared_variables(block);
+		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
 		{
-			// Overload conflict, assign a new name.
-			add_resource_name(func.self);
-			function_overloads[to_name(func.self)].insert(types_hash);
+			if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
+				select_branch_to_false_block = true;
+			else
+				select_branch_to_true_block = true;
+
+			emitted_loop_header_variables = true;
+			force_complex_continue_block = true;
 		}
-		else
+	}
+	// This is the older loop behavior in glslang which branches to loop body directly from the loop header.
+	else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
+	{
+		flush_undeclared_variables(block);
+		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
+		{
+			// The body of while, is actually just the true (or false) block, so always branch there unconditionally.
+			if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
+				select_branch_to_false_block = true;
+			else
+				select_branch_to_true_block = true;
+
+			emitted_loop_header_variables = true;
+		}
+	}
+	// This is the newer loop behavior in glslang which branches from Loop header directly to
+	// a new block, which in turn has a OpBranchSelection without a selection merge.
+	else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
+	{
+		flush_undeclared_variables(block);
+		if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
 		{
-			// Can reuse the name.
-			overloads.insert(types_hash);
+			skip_direct_branch = true;
+			emitted_loop_header_variables = true;
 		}
 	}
-	else
+	else if (continue_type == SPIRBlock::DoWhileLoop)
 	{
-		// First time we see this function name.
-		add_resource_name(func.self);
-		function_overloads[to_name(func.self)].insert(types_hash);
-	}
-}
-
-void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
-{
-	if (func.self != ir.default_entry_point)
-		add_function_overload(func);
+		flush_undeclared_variables(block);
+		emit_while_loop_initializers(block);
+		emitted_loop_header_variables = true;
+		// We have some temporaries where the loop header is the dominator.
+		// We risk a case where we have code like:
+		// for (;;) { create-temporary; break; } consume-temporary;
+		// so force-declare temporaries here.
+		emit_hoisted_temporaries(block.potential_declare_temporary);
+		statement("do");
+		begin_scope();
 
-	// Avoid shadow declarations.
-	local_variable_names = resource_names;
+		emit_block_instructions(block);
+	}
+	else if (block.merge == SPIRBlock::MergeLoop)
+	{
+		flush_undeclared_variables(block);
+		emit_while_loop_initializers(block);
+		emitted_loop_header_variables = true;
 
-	string decl;
+		// We have a generic loop without any distinguishable pattern like for, while or do while.
+		get<SPIRBlock>(block.continue_block).complex_continue = true;
+		continue_type = SPIRBlock::ComplexLoop;
 
-	auto &type = get<SPIRType>(func.return_type);
-	decl += flags_to_qualifiers_glsl(type, return_flags);
-	decl += type_to_glsl(type);
-	decl += type_to_array_glsl(type);
-	decl += " ";
+		// We have some temporaries where the loop header is the dominator.
+		// We risk a case where we have code like:
+		// for (;;) { create-temporary; break; } consume-temporary;
+		// so force-declare temporaries here.
+		emit_hoisted_temporaries(block.potential_declare_temporary);
+		emit_block_hints(block);
+		statement("for (;;)");
+		begin_scope();
 
-	if (func.self == ir.default_entry_point)
-	{
-		decl += "main";
-		processing_entry_point = true;
+		emit_block_instructions(block);
 	}
 	else
-		decl += to_name(func.self);
-
-	decl += "(";
-	SmallVector<string> arglist;
-	for (auto &arg : func.arguments)
 	{
-		// Do not pass in separate images or samplers if we're remapping
-		// to combined image samplers.
-		if (skip_argument(arg.id))
-			continue;
-
-		// Might change the variable name if it already exists in this function.
-		// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
-		// to use same name for variables.
-		// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
-		add_local_variable_name(arg.id);
-
-		arglist.push_back(argument_decl(arg));
-
-		// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
-		auto *var = maybe_get<SPIRVariable>(arg.id);
-		if (var)
-			var->parameter = &arg;
+		emit_block_instructions(block);
 	}
 
-	for (auto &arg : func.shadow_arguments)
+	// If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
+	// as writes to said loop variables might have been masked out, we need a recompile.
+	if (!emitted_loop_header_variables && !block.loop_variables.empty())
 	{
-		// Might change the variable name if it already exists in this function.
-		// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
-		// to use same name for variables.
-		// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
-		add_local_variable_name(arg.id);
-
-		arglist.push_back(argument_decl(arg));
-
-		// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
-		auto *var = maybe_get<SPIRVariable>(arg.id);
-		if (var)
-			var->parameter = &arg;
+		force_recompile_guarantee_forward_progress();
+		for (auto var : block.loop_variables)
+			get<SPIRVariable>(var).loop_variable = false;
+		block.loop_variables.clear();
 	}
 
-	decl += merge(arglist);
-	decl += ")";
-	statement(decl);
-}
-
-void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
-{
-	// Avoid potential cycles.
-	if (func.active)
-		return;
-	func.active = true;
+	flush_undeclared_variables(block);
+	bool emit_next_block = true;
 
-	// If we depend on a function, emit that function before we emit our own function.
-	for (auto block : func.blocks)
+	// Handle end of block.
+	switch (block.terminator)
 	{
-		auto &b = get<SPIRBlock>(block);
-		for (auto &i : b.ops)
+	case SPIRBlock::Direct:
+		// True when emitting complex continue block.
+		if (block.loop_dominator == block.next_block)
 		{
-			auto ops = stream(i);
-			auto op = static_cast<Op>(i.op);
+			branch(block.self, block.next_block);
+			emit_next_block = false;
+		}
+		// True if MergeToDirectForLoop succeeded.
+		else if (skip_direct_branch)
+			emit_next_block = false;
+		else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
+		{
+			branch(block.self, block.next_block);
+			emit_next_block = false;
+		}
+		break;
 
-			if (op == OpFunctionCall)
+	case SPIRBlock::Select:
+		// True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
+		if (select_branch_to_true_block)
+		{
+			if (force_complex_continue_block)
 			{
-				// Recursively emit functions which are called.
-				uint32_t id = ops[2];
-				emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
+				assert(block.true_block == block.continue_block);
+
+				// We're going to emit a continue block directly here, so make sure it's marked as complex.
+				auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
+				bool old_complex = complex_continue;
+				complex_continue = true;
+				branch(block.self, block.true_block);
+				complex_continue = old_complex;
 			}
+			else
+				branch(block.self, block.true_block);
 		}
-	}
-
-	if (func.entry_line.file_id != 0)
-		emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
-	emit_function_prototype(func, return_flags);
-	begin_scope();
-
-	if (func.self == ir.default_entry_point)
-		emit_entry_point_declarations();
+		else if (select_branch_to_false_block)
+		{
+			if (force_complex_continue_block)
+			{
+				assert(block.false_block == block.continue_block);
 
-	current_function = &func;
-	auto &entry_block = get<SPIRBlock>(func.entry_block);
+				// We're going to emit a continue block directly here, so make sure it's marked as complex.
+				auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
+				bool old_complex = complex_continue;
+				complex_continue = true;
+				branch(block.self, block.false_block);
+				complex_continue = old_complex;
+			}
+			else
+				branch(block.self, block.false_block);
+		}
+		else
+			branch(block.self, block.condition, block.true_block, block.false_block);
+		break;
 
-	sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
-	for (auto &array : func.constant_arrays_needed_on_stack)
+	case SPIRBlock::MultiSelect:
 	{
-		auto &c = get<SPIRConstant>(array);
-		auto &type = get<SPIRType>(c.constant_type);
-		statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
-	}
+		auto &type = expression_type(block.condition);
+		bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort ||
+		                     type.basetype == SPIRType::UByte || type.basetype == SPIRType::UInt64;
 
-	for (auto &v : func.local_variables)
-	{
-		auto &var = get<SPIRVariable>(v);
-		var.deferred_declaration = false;
+		if (block.merge == SPIRBlock::MergeNone)
+			SPIRV_CROSS_THROW("Switch statement is not structured");
 
-		if (var.storage == StorageClassWorkgroup)
-		{
-			// Special variable type which cannot have initializer,
-			// need to be declared as standalone variables.
-			// Comes from MSL which can push global variables as local variables in main function.
-			add_local_variable_name(var.self);
-			statement(variable_decl(var), ";");
-			var.deferred_declaration = false;
-		}
-		else if (var.storage == StorageClassPrivate)
-		{
-			// These variables will not have had their CFG usage analyzed, so move it to the entry block.
-			// Comes from MSL which can push global variables as local variables in main function.
-			// We could just declare them right now, but we would miss out on an important initialization case which is
-			// LUT declaration in MSL.
-			// If we don't declare the variable when it is assigned we're forced to go through a helper function
-			// which copies elements one by one.
-			add_local_variable_name(var.self);
-			auto &dominated = entry_block.dominated_variables;
-			if (find(begin(dominated), end(dominated), var.self) == end(dominated))
-				entry_block.dominated_variables.push_back(var.self);
-			var.deferred_declaration = true;
-		}
-		else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
+		if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64))
 		{
-			// No need to declare this variable, it has a static expression.
-			var.deferred_declaration = false;
+			// SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
+			SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
 		}
-		else if (expression_is_lvalue(v))
-		{
-			add_local_variable_name(var.self);
 
-			if (var.initializer)
-				statement(variable_decl_function_local(var), ";");
-			else
+		const char *label_suffix = "";
+		if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
+			label_suffix = "u";
+		else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch)
+			label_suffix = "l";
+		else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch)
+			label_suffix = "ul";
+		else if (type.basetype == SPIRType::UShort)
+			label_suffix = backend.uint16_t_literal_suffix;
+		else if (type.basetype == SPIRType::Short)
+			label_suffix = backend.int16_t_literal_suffix;
+
+		current_emitting_switch_stack.push_back(&block);
+
+		if (block.need_ladder_break)
+			statement("bool _", block.self, "_ladder_break = false;");
+
+		// Find all unique case constructs.
+		unordered_map<uint32_t, SmallVector<uint64_t>> case_constructs;
+		SmallVector<uint32_t> block_declaration_order;
+		SmallVector<uint64_t> literals_to_merge;
+
+		// If a switch case branches to the default block for some reason, we can just remove that literal from consideration
+		// and let the default: block handle it.
+		// 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
+		// We only need to consider possible fallthrough if order[i] branches to order[i + 1].
+		auto &cases = get_case_list(block);
+		for (auto &c : cases)
+		{
+			if (c.block != block.next_block && c.block != block.default_block)
 			{
-				// Don't declare variable until first use to declutter the GLSL output quite a lot.
-				// If we don't touch the variable before first branch,
-				// declare it then since we need variable declaration to be in top scope.
-				var.deferred_declaration = true;
+				if (!case_constructs.count(c.block))
+					block_declaration_order.push_back(c.block);
+				case_constructs[c.block].push_back(c.value);
+			}
+			else if (c.block == block.next_block && block.default_block != block.next_block)
+			{
+				// We might have to flush phi inside specific case labels.
+				// If we can piggyback on default:, do so instead.
+				literals_to_merge.push_back(c.value);
 			}
 		}
-		else
-		{
-			// HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
-			// For these types (non-lvalue), we enforce forwarding through a shadowed variable.
-			// This means that when we OpStore to these variables, we just write in the expression ID directly.
-			// This breaks any kind of branching, since the variable must be statically assigned.
-			// Branching on samplers and images would be pretty much impossible to fake in GLSL.
-			var.statically_assigned = true;
-		}
-
-		var.loop_variable_enable = false;
-
-		// Loop variables are never declared outside their for-loop, so block any implicit declaration.
-		if (var.loop_variable)
-			var.deferred_declaration = false;
-	}
-
-	// Enforce declaration order for regression testing purposes.
-	for (auto &block_id : func.blocks)
-	{
-		auto &block = get<SPIRBlock>(block_id);
-		sort(begin(block.dominated_variables), end(block.dominated_variables));
-	}
 
-	for (auto &line : current_function->fixup_hooks_in)
-		line();
+		// Empty literal array -> default.
+		if (block.default_block != block.next_block)
+		{
+			auto &default_block = get<SPIRBlock>(block.default_block);
 
-	emit_block_chain(entry_block);
+			// We need to slide in the default block somewhere in this chain
+			// if there are fall-through scenarios since the default is declared separately in OpSwitch.
+			// Only consider trivial fall-through cases here.
+			size_t num_blocks = block_declaration_order.size();
+			bool injected_block = false;
 
-	end_scope();
-	processing_entry_point = false;
-	statement("");
+			for (size_t i = 0; i < num_blocks; i++)
+			{
+				auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
+				if (execution_is_direct_branch(case_block, default_block))
+				{
+					// Fallthrough to default block, we must inject the default block here.
+					block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
+					injected_block = true;
+					break;
+				}
+				else if (execution_is_direct_branch(default_block, case_block))
+				{
+					// Default case is falling through to another case label, we must inject the default block here.
+					block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
+					injected_block = true;
+					break;
+				}
+			}
 
-	// Make sure deferred declaration state for local variables is cleared when we are done with function.
-	// We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
-	for (auto &v : func.local_variables)
-	{
-		auto &var = get<SPIRVariable>(v);
-		var.deferred_declaration = false;
-	}
-}
+			// Order does not matter.
+			if (!injected_block)
+				block_declaration_order.push_back(block.default_block);
+			else if (is_legacy_es())
+				SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
 
-void CompilerGLSL::emit_fixup()
-{
-	auto &execution = get_entry_point();
-	if (execution.model == ExecutionModelVertex)
-	{
-		if (options.vertex.fixup_clipspace)
-		{
-			const char *suffix = backend.float_literal_suffix ? "f" : "";
-			statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
+			case_constructs[block.default_block] = {};
 		}
 
-		if (options.vertex.flip_vert_y)
-			statement("gl_Position.y = -gl_Position.y;");
-	}
-}
+		size_t num_blocks = block_declaration_order.size();
 
-bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to)
-{
-	auto &child = get<SPIRBlock>(to);
-	for (auto &phi : child.phi_variables)
-		if (phi.parent == from)
-			return true;
-	return false;
-}
+		const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string
+		{
+			if (is_unsigned_case)
+				return convert_to_string(literal);
 
-void CompilerGLSL::flush_phi(uint32_t from, uint32_t to)
-{
-	auto &child = get<SPIRBlock>(to);
-	if (child.ignore_phi_from_block == from)
-		return;
+			// For smaller cases, the literals are compiled as 32 bit wide
+			// literals so we don't need to care for all sizes specifically.
+			if (width <= 32)
+			{
+				return convert_to_string(int64_t(int32_t(literal)));
+			}
 
-	unordered_set<uint32_t> temporary_phi_variables;
+			return convert_to_string(int64_t(literal));
+		};
 
-	for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
-	{
-		auto &phi = *itr;
+		const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint64_t> &labels,
+		                                      const char *suffix) -> string {
+			string ret;
+			size_t count = labels.size();
+			for (size_t i = 0; i < count; i++)
+			{
+				if (i)
+					ret += " || ";
+				ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix,
+				            count > 1 ? ")" : "");
+			}
+			return ret;
+		};
 
-		if (phi.parent == from)
+		// We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
+		// we need to flush phi nodes outside the switch block in a branch,
+		// and skip any Phi handling inside the case label to make fall-through work as expected.
+		// This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
+		// inside the case label if at all possible.
+		for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
 		{
-			auto &var = get<SPIRVariable>(phi.function_variable);
-
-			// A Phi variable might be a loop variable, so flush to static expression.
-			if (var.loop_variable && !var.loop_variable_enable)
-				var.static_expression = phi.local_variable;
-			else
+			if (flush_phi_required(block.self, block_declaration_order[i]) &&
+			    flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
 			{
-				flush_variable_declaration(phi.function_variable);
+				uint32_t target_block = block_declaration_order[i];
 
-				// Check if we are going to write to a Phi variable that another statement will read from
-				// as part of another Phi node in our target block.
-				// For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
-				// This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
-				bool need_saved_temporary =
-				    find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
-					    return future_phi.local_variable == phi.function_variable && future_phi.parent == from;
-				    }) != end(child.phi_variables);
+				// Make sure we flush Phi, it might have been marked to be ignored earlier.
+				get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
 
-				if (need_saved_temporary)
+				auto &literals = case_constructs[target_block];
+
+				if (literals.empty())
 				{
-					// Need to make sure we declare the phi variable with a copy at the right scope.
-					// We cannot safely declare a temporary here since we might be inside a continue block.
-					if (!var.allocate_temporary_copy)
+					// Oh boy, gotta make a complete negative test instead! o.o
+					// Find all possible literals that would *not* make us enter the default block.
+					// If none of those literals match, we flush Phi ...
+					SmallVector<string> conditions;
+					for (size_t j = 0; j < num_blocks; j++)
 					{
-						var.allocate_temporary_copy = true;
-						force_recompile();
+						auto &negative_literals = case_constructs[block_declaration_order[j]];
+						for (auto &case_label : negative_literals)
+							conditions.push_back(join(to_enclosed_expression(block.condition),
+							                          " != ", to_case_label(case_label, type.width, unsigned_case)));
 					}
-					statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
-					temporary_phi_variables.insert(phi.function_variable);
-				}
-
-				// This might be called in continue block, so make sure we
-				// use this to emit ESSL 1.0 compliant increments/decrements.
-				auto lhs = to_expression(phi.function_variable);
 
-				string rhs;
-				if (temporary_phi_variables.count(phi.local_variable))
-					rhs = join("_", phi.local_variable, "_copy");
+					statement("if (", merge(conditions, " && "), ")");
+					begin_scope();
+					flush_phi(block.self, target_block);
+					end_scope();
+				}
 				else
-					rhs = to_pointer_expression(phi.local_variable);
+				{
+					SmallVector<string> conditions;
+					conditions.reserve(literals.size());
+					for (auto &case_label : literals)
+						conditions.push_back(join(to_enclosed_expression(block.condition),
+						                          " == ", to_case_label(case_label, type.width, unsigned_case)));
+					statement("if (", merge(conditions, " || "), ")");
+					begin_scope();
+					flush_phi(block.self, target_block);
+					end_scope();
+				}
 
-				if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
-					statement(lhs, " = ", rhs, ";");
+				// Mark the block so that we don't flush Phi from header to case label.
+				get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
 			}
-
-			register_write(phi.function_variable);
 		}
-	}
-}
-
-void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
-{
-	auto &to_block = get<SPIRBlock>(to);
-	if (from == to)
-		return;
-
-	assert(is_continue(to));
-	if (to_block.complex_continue)
-	{
-		// Just emit the whole block chain as is.
-		auto usage_counts = expression_usage_counts;
-		auto invalid = invalid_expressions;
 
-		emit_block_chain(to_block);
+		// If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
+		// non-structured exits with the help of a switch block.
+		// This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
+		bool block_like_switch = cases.empty();
 
-		// Expression usage counts and invalid expressions
-		// are moot after returning from the continue block.
-		// Since we emit the same block multiple times,
-		// we don't want to invalidate ourselves.
-		expression_usage_counts = usage_counts;
-		invalid_expressions = invalid;
-	}
-	else
-	{
-		auto &from_block = get<SPIRBlock>(from);
-		bool outside_control_flow = false;
-		uint32_t loop_dominator = 0;
+		// If this is true, the switch is completely meaningless, and we should just avoid it.
+		bool collapsed_switch = block_like_switch && block.default_block == block.next_block;
 
-		// FIXME: Refactor this to not use the old loop_dominator tracking.
-		if (from_block.merge_block)
-		{
-			// If we are a loop header, we don't set the loop dominator,
-			// so just use "self" here.
-			loop_dominator = from;
-		}
-		else if (from_block.loop_dominator != SPIRBlock::NoDominator)
+		if (!collapsed_switch)
 		{
-			loop_dominator = from_block.loop_dominator;
+			if (block_like_switch || is_legacy_es())
+			{
+				// ESSL 1.0 is not guaranteed to support do/while.
+				if (is_legacy_es())
+				{
+					uint32_t counter = statement_count;
+					statement("for (int spvDummy", counter, " = 0; spvDummy", counter, " < 1; spvDummy", counter,
+					          "++)");
+				}
+				else
+					statement("do");
+			}
+			else
+			{
+				emit_block_hints(block);
+				statement("switch (", to_unpacked_expression(block.condition), ")");
+			}
+			begin_scope();
 		}
 
-		if (loop_dominator != 0)
+		for (size_t i = 0; i < num_blocks; i++)
 		{
-			auto &dominator = get<SPIRBlock>(loop_dominator);
-
-			// For non-complex continue blocks, we implicitly branch to the continue block
-			// by having the continue block be part of the loop header in for (; ; continue-block).
-			outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block);
-		}
-
-		// Some simplification for for-loops. We always end up with a useless continue;
-		// statement since we branch to a loop block.
-		// Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block,
-		// we can avoid writing out an explicit continue statement.
-		// Similar optimization to return statements if we know we're outside flow control.
-		if (!outside_control_flow)
-			statement("continue;");
-	}
-}
-
-void CompilerGLSL::branch(uint32_t from, uint32_t to)
-{
-	flush_phi(from, to);
-	flush_control_dependent_expressions(from);
-	flush_all_active_variables();
+			uint32_t target_block = block_declaration_order[i];
+			auto &literals = case_constructs[target_block];
 
-	// This is only a continue if we branch to our loop dominator.
-	if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
-	{
-		// This can happen if we had a complex continue block which was emitted.
-		// Once the continue block tries to branch to the loop header, just emit continue;
-		// and end the chain here.
-		statement("continue;");
-	}
-	else if (is_break(to))
-	{
-		// Very dirty workaround.
-		// Switch constructs are able to break, but they cannot break out of a loop at the same time.
-		// Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
-		// write to the ladder here, and defer the break.
-		// The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
-		if (current_emitting_switch && is_loop_break(to) && current_emitting_switch->loop_dominator != ~0u &&
-		    get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
-		{
-			if (!current_emitting_switch->need_ladder_break)
+			if (literals.empty())
+			{
+				// Default case.
+				if (!block_like_switch)
+				{
+					if (is_legacy_es())
+						statement("else");
+					else
+						statement("default:");
+				}
+			}
+			else
 			{
-				force_recompile();
-				current_emitting_switch->need_ladder_break = true;
+				if (is_legacy_es())
+				{
+					statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix),
+					          ")");
+				}
+				else
+				{
+					for (auto &case_literal : literals)
+					{
+						// The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
+						statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":");
+					}
+				}
 			}
 
-			statement("_", current_emitting_switch->self, "_ladder_break = true;");
-		}
-		statement("break;");
-	}
-	else if (is_continue(to) || (from == to))
-	{
-		// For from == to case can happen for a do-while loop which branches into itself.
-		// We don't mark these cases as continue blocks, but the only possible way to branch into
-		// ourselves is through means of continue blocks.
-		branch_to_continue(from, to);
-	}
-	else if (!is_conditional(to))
-		emit_block_chain(get<SPIRBlock>(to));
+			auto &case_block = get<SPIRBlock>(target_block);
+			if (backend.support_case_fallthrough && i + 1 < num_blocks &&
+			    execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
+			{
+				// We will fall through here, so just terminate the block chain early.
+				// We still need to deal with Phi potentially.
+				// No need for a stack-like thing here since we only do fall-through when there is a
+				// single trivial branch to fall-through target..
+				current_emitting_switch_fallthrough = true;
+			}
+			else
+				current_emitting_switch_fallthrough = false;
 
-	// It is important that we check for break before continue.
-	// A block might serve two purposes, a break block for the inner scope, and
-	// a continue block in the outer scope.
-	// Inner scope always takes precedence.
-}
+			if (!block_like_switch)
+				begin_scope();
+			branch(block.self, target_block);
+			if (!block_like_switch)
+				end_scope();
 
-void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block)
-{
-	// If we branch directly to a selection merge target, we don't really need a code path.
-	bool true_sub = !is_conditional(true_block);
-	bool false_sub = !is_conditional(false_block);
+			current_emitting_switch_fallthrough = false;
+		}
 
-	if (true_sub)
-	{
-		emit_block_hints(get<SPIRBlock>(from));
-		statement("if (", to_expression(cond), ")");
-		begin_scope();
-		branch(from, true_block);
-		end_scope();
+		// Might still have to flush phi variables if we branch from loop header directly to merge target.
+		// This is supposed to emit all cases where we branch from header to merge block directly.
+		// There are two main scenarios where cannot rely on default fallthrough.
+		// - There is an explicit default: label already.
+		//   In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block.
+		// - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there.
+		bool header_merge_requires_phi = flush_phi_required(block.self, block.next_block);
+		bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty();
+		if (!collapsed_switch && ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty()))
+		{
+			for (auto &case_literal : literals_to_merge)
+				statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":");
+
+			if (block.default_block == block.next_block)
+			{
+				if (is_legacy_es())
+					statement("else");
+				else
+					statement("default:");
+			}
 
-		if (false_sub || is_continue(false_block) || is_break(false_block))
-		{
-			statement("else");
-			begin_scope();
-			branch(from, false_block);
-			end_scope();
-		}
-		else if (flush_phi_required(from, false_block))
-		{
-			statement("else");
 			begin_scope();
-			flush_phi(from, false_block);
+			flush_phi(block.self, block.next_block);
+			statement("break;");
 			end_scope();
 		}
-	}
-	else if (false_sub && !true_sub)
-	{
-		// Only need false path, use negative conditional.
-		emit_block_hints(get<SPIRBlock>(from));
-		statement("if (!", to_enclosed_expression(cond), ")");
-		begin_scope();
-		branch(from, false_block);
-		end_scope();
 
-		if (is_continue(true_block) || is_break(true_block))
+		if (!collapsed_switch)
 		{
-			statement("else");
-			begin_scope();
-			branch(from, true_block);
-			end_scope();
+			if (block_like_switch && !is_legacy_es())
+				end_scope_decl("while(false)");
+			else
+				end_scope();
 		}
-		else if (flush_phi_required(from, true_block))
+		else
+			flush_phi(block.self, block.next_block);
+
+		if (block.need_ladder_break)
 		{
-			statement("else");
+			statement("if (_", block.self, "_ladder_break)");
 			begin_scope();
-			flush_phi(from, true_block);
+			statement("break;");
 			end_scope();
 		}
-	}
-}
 
-// FIXME: This currently cannot handle complex continue blocks
-// as in do-while.
-// This should be seen as a "trivial" continue block.
-string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
-{
-	auto *block = &get<SPIRBlock>(continue_block);
-
-	// While emitting the continue block, declare_temporary will check this
-	// if we have to emit temporaries.
-	current_continue_block = block;
+		current_emitting_switch_stack.pop_back();
+		break;
+	}
 
-	SmallVector<string> statements;
+	case SPIRBlock::Return:
+	{
+		for (auto &line : current_function->fixup_hooks_out)
+			line();
 
-	// Capture all statements into our list.
-	auto *old = redirect_statement;
-	redirect_statement = &statements;
+		if (processing_entry_point)
+			emit_fixup();
 
-	// Stamp out all blocks one after each other.
-	while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
-	{
-		// Write out all instructions we have in this block.
-		emit_block_instructions(*block);
+		auto &cfg = get_cfg_for_current_function();
 
-		// For plain branchless for/while continue blocks.
-		if (block->next_block)
-		{
-			flush_phi(continue_block, block->next_block);
-			block = &get<SPIRBlock>(block->next_block);
-		}
-		// For do while blocks. The last block will be a select block.
-		else if (block->true_block && follow_true_block)
-		{
-			flush_phi(continue_block, block->true_block);
-			block = &get<SPIRBlock>(block->true_block);
-		}
-		else if (block->false_block && follow_false_block)
+		if (block.return_value)
 		{
-			flush_phi(continue_block, block->false_block);
-			block = &get<SPIRBlock>(block->false_block);
+			auto &type = expression_type(block.return_value);
+			if (!type.array.empty() && !backend.can_return_array)
+			{
+				// If we cannot return arrays, we will have a special out argument we can write to instead.
+				// The backend is responsible for setting this up, and redirection the return values as appropriate.
+				if (ir.ids[block.return_value].get_type() != TypeUndef)
+				{
+					emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction,
+					                get_expression_effective_storage_class(block.return_value));
+				}
+
+				if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
+				    block.loop_dominator != BlockID(SPIRBlock::NoDominator))
+				{
+					statement("return;");
+				}
+			}
+			else
+			{
+				// OpReturnValue can return Undef, so don't emit anything for this case.
+				if (ir.ids[block.return_value].get_type() != TypeUndef)
+					statement("return ", to_unpacked_expression(block.return_value), ";");
+			}
 		}
-		else
+		else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
+		         block.loop_dominator != BlockID(SPIRBlock::NoDominator))
 		{
-			SPIRV_CROSS_THROW("Invalid continue block detected!");
+			// If this block is the very final block and not called from control flow,
+			// we do not need an explicit return which looks out of place. Just end the function here.
+			// In the very weird case of for(;;) { return; } executing return is unconditional,
+			// but we actually need a return here ...
+			statement("return;");
 		}
+		break;
 	}
 
-	// Restore old pointer.
-	redirect_statement = old;
+	// If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement.
+	case SPIRBlock::Kill:
+		statement(backend.discard_literal, ";");
+		if (block.return_value)
+			statement("return ", to_unpacked_expression(block.return_value), ";");
+		break;
 
-	// Somewhat ugly, strip off the last ';' since we use ',' instead.
-	// Ideally, we should select this behavior in statement().
-	for (auto &s : statements)
+	case SPIRBlock::Unreachable:
 	{
-		if (!s.empty() && s.back() == ';')
-			s.erase(s.size() - 1, 1);
-	}
+		// Avoid emitting false fallthrough, which can happen for
+		// if (cond) break; else discard; inside a case label.
+		// Discard is not always implementable as a terminator.
 
-	current_continue_block = nullptr;
-	return merge(statements);
-}
+		auto &cfg = get_cfg_for_current_function();
+		bool inner_dominator_is_switch = false;
+		ID id = block.self;
 
-void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
-{
-	// While loops do not take initializers, so declare all of them outside.
-	for (auto &loop_var : block.loop_variables)
-	{
-		auto &var = get<SPIRVariable>(loop_var);
-		statement(variable_decl(var), ";");
-	}
-}
+		while (id)
+		{
+			auto &iter_block = get<SPIRBlock>(id);
+			if (iter_block.terminator == SPIRBlock::MultiSelect ||
+			    iter_block.merge == SPIRBlock::MergeLoop)
+			{
+				ID next_block = iter_block.merge == SPIRBlock::MergeLoop ?
+				                iter_block.merge_block : iter_block.next_block;
+				bool outside_construct = next_block && cfg.find_common_dominator(next_block, block.self) == next_block;
+				if (!outside_construct)
+				{
+					inner_dominator_is_switch = iter_block.terminator == SPIRBlock::MultiSelect;
+					break;
+				}
+			}
 
-string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
-{
-	if (block.loop_variables.empty())
-		return "";
+			if (cfg.get_preceding_edges(id).empty())
+				break;
 
-	bool same_types = for_loop_initializers_are_same_type(block);
-	// We can only declare for loop initializers if all variables are of same type.
-	// If we cannot do this, declare individual variables before the loop header.
+			id = cfg.get_immediate_dominator(id);
+		}
 
-	// We might have a loop variable candidate which was not assigned to for some reason.
-	uint32_t missing_initializers = 0;
-	for (auto &variable : block.loop_variables)
-	{
-		uint32_t expr = get<SPIRVariable>(variable).static_expression;
+		if (inner_dominator_is_switch)
+			statement("break; // unreachable workaround");
 
-		// Sometimes loop variables are initialized with OpUndef, but we can just declare
-		// a plain variable without initializer in this case.
-		if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
-			missing_initializers++;
+		emit_next_block = false;
+		break;
 	}
 
-	if (block.loop_variables.size() == 1 && missing_initializers == 0)
-	{
-		return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
-	}
-	else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
-	{
-		for (auto &loop_var : block.loop_variables)
-			statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
-		return "";
+	case SPIRBlock::IgnoreIntersection:
+		statement("ignoreIntersectionEXT;");
+		break;
+
+	case SPIRBlock::TerminateRay:
+		statement("terminateRayEXT;");
+		break;
+
+	case SPIRBlock::EmitMeshTasks:
+		statement("EmitMeshTasksEXT(",
+		          to_unpacked_expression(block.mesh.groups[0]), ", ",
+		          to_unpacked_expression(block.mesh.groups[1]), ", ",
+		          to_unpacked_expression(block.mesh.groups[2]), ");");
+		break;
+
+	default:
+		SPIRV_CROSS_THROW("Unimplemented block terminator.");
 	}
-	else
-	{
-		// We have a mix of loop variables, either ones with a clear initializer, or ones without.
-		// Separate the two streams.
-		string expr;
 
-		for (auto &loop_var : block.loop_variables)
+	if (block.next_block && emit_next_block)
+	{
+		// If we hit this case, we're dealing with an unconditional branch, which means we will output
+		// that block after this. If we had selection merge, we already flushed phi variables.
+		if (block.merge != SPIRBlock::MergeSelection)
 		{
-			uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
-			if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
-			{
-				statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
-			}
-			else
-			{
-				auto &var = get<SPIRVariable>(loop_var);
-				auto &type = get_variable_data_type(var);
-				if (expr.empty())
-				{
-					// For loop initializers are of the form <type id = value, id = value, id = value, etc ...
-					expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
-				}
-				else
-				{
-					expr += ", ";
-					// In MSL, being based on C++, the asterisk marking a pointer
-					// binds to the identifier, not the type.
-					if (type.pointer)
-						expr += "* ";
-				}
+			flush_phi(block.self, block.next_block);
+			// For a direct branch, need to remember to invalidate expressions in the next linear block instead.
+			get<SPIRBlock>(block.next_block).invalidate_expressions = block.invalidate_expressions;
+		}
 
-				expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
+		// For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
+		if (!current_emitting_switch_fallthrough)
+		{
+			// For merge selects we might have ignored the fact that a merge target
+			// could have been a break; or continue;
+			// We will need to deal with it here.
+			if (is_loop_break(block.next_block))
+			{
+				// Cannot check for just break, because switch statements will also use break.
+				assert(block.merge == SPIRBlock::MergeSelection);
+				statement("break;");
+			}
+			else if (is_continue(block.next_block))
+			{
+				assert(block.merge == SPIRBlock::MergeSelection);
+				branch_to_continue(block.self, block.next_block);
 			}
+			else if (BlockID(block.self) != block.next_block)
+				emit_block_chain(get<SPIRBlock>(block.next_block));
 		}
-		return expr;
 	}
-}
-
-bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
-{
-	if (block.loop_variables.size() <= 1)
-		return true;
 
-	uint32_t expected = 0;
-	Bitset expected_flags;
-	for (auto &var : block.loop_variables)
+	if (block.merge == SPIRBlock::MergeLoop)
 	{
-		// Don't care about uninitialized variables as they will not be part of the initializers.
-		uint32_t expr = get<SPIRVariable>(var).static_expression;
-		if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
-			continue;
-
-		if (expected == 0)
+		if (continue_type == SPIRBlock::DoWhileLoop)
 		{
-			expected = get<SPIRVariable>(var).basetype;
-			expected_flags = get_decoration_bitset(var);
-		}
-		else if (expected != get<SPIRVariable>(var).basetype)
-			return false;
+			// Make sure that we run the continue block to get the expressions set, but this
+			// should become an empty string.
+			// We have no fallbacks if we cannot forward everything to temporaries ...
+			const auto &continue_block = get<SPIRBlock>(block.continue_block);
+			bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
+			                                       get<SPIRBlock>(continue_block.loop_dominator));
 
-		// Precision flags and things like that must also match.
-		if (expected_flags != get_decoration_bitset(var))
-			return false;
-	}
+			uint32_t current_count = statement_count;
+			auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
+			if (statement_count != current_count)
+			{
+				// The DoWhile block has side effects, force ComplexLoop pattern next pass.
+				get<SPIRBlock>(block.continue_block).complex_continue = true;
+				force_recompile();
+			}
 
-	return true;
-}
+			// Might have to invert the do-while test here.
+			auto condition = to_expression(continue_block.condition);
+			if (!positive_test)
+				condition = join("!", enclose_expression(condition));
 
-bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
-{
-	SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
+			end_scope_decl(join("while (", condition, ")"));
+		}
+		else
+			end_scope();
 
-	if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
-	{
-		uint32_t current_count = statement_count;
-		// If we're trying to create a true for loop,
-		// we need to make sure that all opcodes before branch statement do not actually emit any code.
-		// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
-		emit_block_instructions(block);
+		loop_level_saver.release();
 
-		bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
+		// We cannot break out of two loops at once, so don't check for break; here.
+		// Using block.self as the "from" block isn't quite right, but it has the same scope
+		// and dominance structure, so it's fine.
+		if (is_continue(block.merge_block))
+			branch_to_continue(block.self, block.merge_block);
+		else
+			emit_block_chain(get<SPIRBlock>(block.merge_block));
+	}
 
-		// This can work! We only did trivial things which could be forwarded in block body!
-		if (current_count == statement_count && condition_is_temporary)
-		{
-			switch (continue_type)
-			{
-			case SPIRBlock::ForLoop:
-			{
-				// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
-				flush_undeclared_variables(block);
+	// Forget about control dependent expressions now.
+	block.invalidate_expressions.clear();
 
-				// Important that we do this in this order because
-				// emitting the continue block can invalidate the condition expression.
-				auto initializer = emit_for_loop_initializers(block);
-				auto condition = to_expression(block.condition);
+	// After we return, we must be out of scope, so if we somehow have to re-emit this function,
+	// re-declare variables if necessary.
+	assert(rearm_dominated_variables.size() == block.dominated_variables.size());
+	for (size_t i = 0; i < block.dominated_variables.size(); i++)
+	{
+		uint32_t var = block.dominated_variables[i];
+		get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
+	}
 
-				// Condition might have to be inverted.
-				if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
-					condition = join("!", enclose_expression(condition));
+	// Just like for deferred declaration, we need to forget about loop variable enable
+	// if our block chain is reinstantiated later.
+	for (auto &var_id : block.loop_variables)
+		get<SPIRVariable>(var_id).loop_variable_enable = false;
+}
 
-				emit_block_hints(block);
-				if (method != SPIRBlock::MergeToSelectContinueForLoop)
-				{
-					auto continue_block = emit_continue_block(block.continue_block, false, false);
-					statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
-				}
-				else
-					statement("for (", initializer, "; ", condition, "; )");
-				break;
-			}
+void CompilerGLSL::begin_scope()
+{
+	statement("{");
+	indent++;
+}
 
-			case SPIRBlock::WhileLoop:
-			{
-				// This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
-				flush_undeclared_variables(block);
-				emit_while_loop_initializers(block);
-				emit_block_hints(block);
+void CompilerGLSL::end_scope()
+{
+	if (!indent)
+		SPIRV_CROSS_THROW("Popping empty indent stack.");
+	indent--;
+	statement("}");
+}
 
-				auto condition = to_expression(block.condition);
-				// Condition might have to be inverted.
-				if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
-					condition = join("!", enclose_expression(condition));
+void CompilerGLSL::end_scope(const string &trailer)
+{
+	if (!indent)
+		SPIRV_CROSS_THROW("Popping empty indent stack.");
+	indent--;
+	statement("}", trailer);
+}
 
-				statement("while (", condition, ")");
-				break;
-			}
+void CompilerGLSL::end_scope_decl()
+{
+	if (!indent)
+		SPIRV_CROSS_THROW("Popping empty indent stack.");
+	indent--;
+	statement("};");
+}
 
-			default:
-				block.disable_block_optimization = true;
-				force_recompile();
-				begin_scope(); // We'll see an end_scope() later.
-				return false;
-			}
+void CompilerGLSL::end_scope_decl(const string &decl)
+{
+	if (!indent)
+		SPIRV_CROSS_THROW("Popping empty indent stack.");
+	indent--;
+	statement("} ", decl, ";");
+}
 
-			begin_scope();
-			return true;
-		}
-		else
+void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
+{
+	// If our variable is remapped, and we rely on type-remapping information as
+	// well, then we cannot pass the variable as a function parameter.
+	// Fixing this is non-trivial without stamping out variants of the same function,
+	// so for now warn about this and suggest workarounds instead.
+	for (uint32_t i = 0; i < length; i++)
+	{
+		auto *var = maybe_get<SPIRVariable>(args[i]);
+		if (!var || !var->remapped_variable)
+			continue;
+
+		auto &type = get<SPIRType>(var->basetype);
+		if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
 		{
-			block.disable_block_optimization = true;
-			force_recompile();
-			begin_scope(); // We'll see an end_scope() later.
-			return false;
+			SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
+			                  "This will not work correctly because type-remapping information is lost. "
+			                  "To workaround, please consider not passing the subpass input as a function parameter, "
+			                  "or use in/out variables instead which do not need type remapping information.");
 		}
 	}
-	else if (method == SPIRBlock::MergeToDirectForLoop)
-	{
-		auto &child = get<SPIRBlock>(block.next_block);
+}
 
-		// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
-		flush_undeclared_variables(child);
+const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
+{
+	// FIXME: This is kind of hacky. There should be a cleaner way.
+	auto offset = uint32_t(&instr - current_emitting_block->ops.data());
+	if ((offset + 1) < current_emitting_block->ops.size())
+		return &current_emitting_block->ops[offset + 1];
+	else
+		return nullptr;
+}
 
-		uint32_t current_count = statement_count;
+uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
+{
+	return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
+	                    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
+	                    MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
+}
 
-		// If we're trying to create a true for loop,
-		// we need to make sure that all opcodes before branch statement do not actually emit any code.
-		// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
-		emit_block_instructions(child);
+void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass)
+{
+	statement(lhs, " = ", to_expression(rhs_id), ";");
+}
 
-		bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
+bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id)
+{
+	if (!backend.force_gl_in_out_block)
+		return false;
+	// This path is only relevant for GL backends.
 
-		if (current_count == statement_count && condition_is_temporary)
-		{
-			uint32_t target_block = child.true_block;
+	auto *var = maybe_get<SPIRVariable>(target_id);
+	if (!var || var->storage != StorageClassOutput)
+		return false;
 
-			switch (continue_type)
-			{
-			case SPIRBlock::ForLoop:
-			{
-				// Important that we do this in this order because
-				// emitting the continue block can invalidate the condition expression.
-				auto initializer = emit_for_loop_initializers(block);
-				auto condition = to_expression(child.condition);
+	if (!is_builtin_variable(*var) || BuiltIn(get_decoration(var->self, DecorationBuiltIn)) != BuiltInSampleMask)
+		return false;
+
+	auto &type = expression_type(source_id);
+	string array_expr;
+	if (type.array_size_literal.back())
+	{
+		array_expr = convert_to_string(type.array.back());
+		if (type.array.back() == 0)
+			SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
+	}
+	else
+		array_expr = to_expression(type.array.back());
 
-				// Condition might have to be inverted.
-				if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
-				{
-					condition = join("!", enclose_expression(condition));
-					target_block = child.false_block;
-				}
+	SPIRType target_type;
+	target_type.basetype = SPIRType::Int;
 
-				auto continue_block = emit_continue_block(block.continue_block, false, false);
-				emit_block_hints(block);
-				statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
-				break;
-			}
+	statement("for (int i = 0; i < int(", array_expr, "); i++)");
+	begin_scope();
+	statement(to_expression(target_id), "[i] = ",
+	          bitcast_expression(target_type, type.basetype, join(to_expression(source_id), "[i]")),
+	          ";");
+	end_scope();
 
-			case SPIRBlock::WhileLoop:
-			{
-				emit_while_loop_initializers(block);
-				emit_block_hints(block);
+	return true;
+}
 
-				auto condition = to_expression(child.condition);
-				// Condition might have to be inverted.
-				if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
-				{
-					condition = join("!", enclose_expression(condition));
-					target_block = child.false_block;
-				}
+void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
+{
+	if (!backend.force_gl_in_out_block)
+		return;
+	// This path is only relevant for GL backends.
 
-				statement("while (", condition, ")");
-				break;
-			}
+	auto *var = maybe_get<SPIRVariable>(source_id);
+	if (!var)
+		return;
 
-			default:
-				block.disable_block_optimization = true;
-				force_recompile();
-				begin_scope(); // We'll see an end_scope() later.
-				return false;
-			}
+	if (var->storage != StorageClassInput && var->storage != StorageClassOutput)
+		return;
 
-			begin_scope();
-			branch(child.self, target_block);
-			return true;
+	auto &type = get_variable_data_type(*var);
+	if (type.array.empty())
+		return;
+
+	auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
+	bool is_builtin = is_builtin_variable(*var) &&
+	                  (builtin == BuiltInPointSize ||
+	                   builtin == BuiltInPosition ||
+	                   builtin == BuiltInSampleMask);
+	bool is_tess = is_tessellation_shader();
+	bool is_patch = has_decoration(var->self, DecorationPatch);
+	bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask;
+
+	// Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
+	// We must unroll the array load.
+	// For builtins, we couldn't catch this case normally,
+	// because this is resolved in the OpAccessChain in most cases.
+	// If we load the entire array, we have no choice but to unroll here.
+	if (!is_patch && (is_builtin || is_tess))
+	{
+		auto new_expr = join("_", target_id, "_unrolled");
+		statement(variable_decl(type, new_expr, target_id), ";");
+		string array_expr;
+		if (type.array_size_literal.back())
+		{
+			array_expr = convert_to_string(type.array.back());
+			if (type.array.back() == 0)
+				SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
 		}
 		else
+			array_expr = to_expression(type.array.back());
+
+		// The array size might be a specialization constant, so use a for-loop instead.
+		statement("for (int i = 0; i < int(", array_expr, "); i++)");
+		begin_scope();
+		if (is_builtin && !is_sample_mask)
+			statement(new_expr, "[i] = gl_in[i].", expr, ";");
+		else if (is_sample_mask)
 		{
-			block.disable_block_optimization = true;
-			force_recompile();
-			begin_scope(); // We'll see an end_scope() later.
-			return false;
+			SPIRType target_type;
+			target_type.basetype = SPIRType::Int;
+			statement(new_expr, "[i] = ", bitcast_expression(target_type, type.basetype, join(expr, "[i]")), ";");
 		}
+		else
+			statement(new_expr, "[i] = ", expr, "[i];");
+		end_scope();
+
+		expr = std::move(new_expr);
 	}
-	else
-		return false;
 }
 
-void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
+void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
 {
-	for (auto &v : block.dominated_variables)
-		flush_variable_declaration(v);
-}
+	// We will handle array cases elsewhere.
+	if (!expr_type.array.empty())
+		return;
 
-void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<uint32_t, uint32_t>> &temporaries)
-{
-	// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
-	// Need to sort these to ensure that reference output is stable.
-	sort(begin(temporaries), end(temporaries),
-	     [](const pair<uint32_t, uint32_t> &a, const pair<uint32_t, uint32_t> &b) { return a.second < b.second; });
+	auto *var = maybe_get_backing_variable(source_id);
+	if (var)
+		source_id = var->self;
 
-	for (auto &tmp : temporaries)
+	// Only interested in standalone builtin variables.
+	if (!has_decoration(source_id, DecorationBuiltIn))
+		return;
+
+	auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
+	auto expected_type = expr_type.basetype;
+
+	// TODO: Fill in for more builtins.
+	switch (builtin)
 	{
-		add_local_variable_name(tmp.second);
-		auto &flags = ir.meta[tmp.second].decoration.decoration_flags;
-		auto &type = get<SPIRType>(tmp.first);
-		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";");
+	case BuiltInLayer:
+	case BuiltInPrimitiveId:
+	case BuiltInViewportIndex:
+	case BuiltInInstanceId:
+	case BuiltInInstanceIndex:
+	case BuiltInVertexId:
+	case BuiltInVertexIndex:
+	case BuiltInSampleId:
+	case BuiltInBaseVertex:
+	case BuiltInBaseInstance:
+	case BuiltInDrawIndex:
+	case BuiltInFragStencilRefEXT:
+	case BuiltInInstanceCustomIndexNV:
+	case BuiltInSampleMask:
+	case BuiltInPrimitiveShadingRateKHR:
+	case BuiltInShadingRateKHR:
+		expected_type = SPIRType::Int;
+		break;
 
-		hoisted_temporaries.insert(tmp.second);
-		forced_temporaries.insert(tmp.second);
+	case BuiltInGlobalInvocationId:
+	case BuiltInLocalInvocationId:
+	case BuiltInWorkgroupId:
+	case BuiltInLocalInvocationIndex:
+	case BuiltInWorkgroupSize:
+	case BuiltInNumWorkgroups:
+	case BuiltInIncomingRayFlagsNV:
+	case BuiltInLaunchIdNV:
+	case BuiltInLaunchSizeNV:
+	case BuiltInPrimitiveTriangleIndicesEXT:
+	case BuiltInPrimitiveLineIndicesEXT:
+	case BuiltInPrimitivePointIndicesEXT:
+		expected_type = SPIRType::UInt;
+		break;
 
-		// The temporary might be read from before it's assigned, set up the expression now.
-		set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
+	default:
+		break;
 	}
+
+	if (expected_type != expr_type.basetype)
+		expr = bitcast_expression(expr_type, expected_type, expr);
 }
 
-void CompilerGLSL::emit_block_chain(SPIRBlock &block)
+void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
 {
-	bool select_branch_to_true_block = false;
-	bool select_branch_to_false_block = false;
-	bool skip_direct_branch = false;
-	bool emitted_loop_header_variables = false;
-	bool force_complex_continue_block = false;
+	auto *var = maybe_get_backing_variable(target_id);
+	if (var)
+		target_id = var->self;
 
-	emit_hoisted_temporaries(block.declare_temporary);
+	// Only interested in standalone builtin variables.
+	if (!has_decoration(target_id, DecorationBuiltIn))
+		return;
 
-	SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
-	if (block.continue_block)
-		continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
+	auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
+	auto expected_type = expr_type.basetype;
 
-	// If we have loop variables, stop masking out access to the variable now.
-	for (auto var_id : block.loop_variables)
+	// TODO: Fill in for more builtins.
+	switch (builtin)
 	{
-		auto &var = get<SPIRVariable>(var_id);
-		var.loop_variable_enable = true;
-		// We're not going to declare the variable directly, so emit a copy here.
-		emit_variable_temporary_copies(var);
-	}
+	case BuiltInLayer:
+	case BuiltInPrimitiveId:
+	case BuiltInViewportIndex:
+	case BuiltInFragStencilRefEXT:
+	case BuiltInSampleMask:
+	case BuiltInPrimitiveShadingRateKHR:
+	case BuiltInShadingRateKHR:
+		expected_type = SPIRType::Int;
+		break;
 
-	// Remember deferred declaration state. We will restore it before returning.
-	SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
-	for (size_t i = 0; i < block.dominated_variables.size(); i++)
-	{
-		uint32_t var_id = block.dominated_variables[i];
-		auto &var = get<SPIRVariable>(var_id);
-		rearm_dominated_variables[i] = var.deferred_declaration;
+	default:
+		break;
 	}
 
-	// This is the method often used by spirv-opt to implement loops.
-	// The loop header goes straight into the continue block.
-	// However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
-	// it *MUST* be used in the continue block. This loop method will not work.
-	if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
+	if (expected_type != expr_type.basetype)
 	{
-		flush_undeclared_variables(block);
-		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
-		{
-			if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
-				select_branch_to_false_block = true;
-			else
-				select_branch_to_true_block = true;
-
-			emitted_loop_header_variables = true;
-			force_complex_continue_block = true;
-		}
+		auto type = expr_type;
+		type.basetype = expected_type;
+		expr = bitcast_expression(type, expr_type.basetype, expr);
 	}
-	// This is the older loop behavior in glslang which branches to loop body directly from the loop header.
-	else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
-	{
-		flush_undeclared_variables(block);
-		if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
-		{
-			// The body of while, is actually just the true (or false) block, so always branch there unconditionally.
-			if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
-				select_branch_to_false_block = true;
-			else
-				select_branch_to_true_block = true;
+}
 
-			emitted_loop_header_variables = true;
-		}
-	}
-	// This is the newer loop behavior in glslang which branches from Loop header directly to
-	// a new block, which in turn has a OpBranchSelection without a selection merge.
-	else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
-	{
-		flush_undeclared_variables(block);
-		if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
-		{
-			skip_direct_branch = true;
-			emitted_loop_header_variables = true;
-		}
-	}
-	else if (continue_type == SPIRBlock::DoWhileLoop)
-	{
-		flush_undeclared_variables(block);
-		emit_while_loop_initializers(block);
-		emitted_loop_header_variables = true;
-		// We have some temporaries where the loop header is the dominator.
-		// We risk a case where we have code like:
-		// for (;;) { create-temporary; break; } consume-temporary;
-		// so force-declare temporaries here.
-		emit_hoisted_temporaries(block.potential_declare_temporary);
-		statement("do");
-		begin_scope();
+void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id)
+{
+	if (*backend.nonuniform_qualifier == '\0')
+		return;
 
-		emit_block_instructions(block);
-	}
-	else if (block.merge == SPIRBlock::MergeLoop)
-	{
-		flush_undeclared_variables(block);
-		emit_while_loop_initializers(block);
-		emitted_loop_header_variables = true;
+	auto *var = maybe_get_backing_variable(ptr_id);
+	if (!var)
+		return;
 
-		// We have a generic loop without any distinguishable pattern like for, while or do while.
-		get<SPIRBlock>(block.continue_block).complex_continue = true;
-		continue_type = SPIRBlock::ComplexLoop;
+	if (var->storage != StorageClassUniformConstant &&
+	    var->storage != StorageClassStorageBuffer &&
+	    var->storage != StorageClassUniform)
+		return;
 
-		// We have some temporaries where the loop header is the dominator.
-		// We risk a case where we have code like:
-		// for (;;) { create-temporary; break; } consume-temporary;
-		// so force-declare temporaries here.
-		emit_hoisted_temporaries(block.potential_declare_temporary);
-		statement("for (;;)");
-		begin_scope();
+	auto &backing_type = get<SPIRType>(var->basetype);
+	if (backing_type.array.empty())
+		return;
 
-		emit_block_instructions(block);
-	}
-	else
-	{
-		emit_block_instructions(block);
-	}
+	// If we get here, we know we're accessing an arrayed resource which
+	// might require nonuniform qualifier.
 
-	// If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
-	// as writes to said loop variables might have been masked out, we need a recompile.
-	if (!emitted_loop_header_variables && !block.loop_variables.empty())
-	{
-		force_recompile();
-		for (auto var : block.loop_variables)
-			get<SPIRVariable>(var).loop_variable = false;
-		block.loop_variables.clear();
-	}
+	auto start_array_index = expr.find_first_of('[');
 
-	flush_undeclared_variables(block);
-	bool emit_next_block = true;
+	if (start_array_index == string::npos)
+		return;
 
-	// Handle end of block.
-	switch (block.terminator)
+	// We've opened a bracket, track expressions until we can close the bracket.
+	// This must be our resource index.
+	size_t end_array_index = string::npos;
+	unsigned bracket_count = 1;
+	for (size_t index = start_array_index + 1; index < expr.size(); index++)
 	{
-	case SPIRBlock::Direct:
-		// True when emitting complex continue block.
-		if (block.loop_dominator == block.next_block)
-		{
-			branch(block.self, block.next_block);
-			emit_next_block = false;
-		}
-		// True if MergeToDirectForLoop succeeded.
-		else if (skip_direct_branch)
-			emit_next_block = false;
-		else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
-		{
-			branch(block.self, block.next_block);
-			emit_next_block = false;
-		}
-		break;
-
-	case SPIRBlock::Select:
-		// True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
-		if (select_branch_to_true_block)
+		if (expr[index] == ']')
 		{
-			if (force_complex_continue_block)
+			if (--bracket_count == 0)
 			{
-				assert(block.true_block == block.continue_block);
-
-				// We're going to emit a continue block directly here, so make sure it's marked as complex.
-				auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
-				bool old_complex = complex_continue;
-				complex_continue = true;
-				branch(block.self, block.true_block);
-				complex_continue = old_complex;
+				end_array_index = index;
+				break;
 			}
-			else
-				branch(block.self, block.true_block);
 		}
-		else if (select_branch_to_false_block)
-		{
-			if (force_complex_continue_block)
-			{
-				assert(block.false_block == block.continue_block);
+		else if (expr[index] == '[')
+			bracket_count++;
+	}
 
-				// We're going to emit a continue block directly here, so make sure it's marked as complex.
-				auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
-				bool old_complex = complex_continue;
-				complex_continue = true;
-				branch(block.self, block.false_block);
-				complex_continue = old_complex;
-			}
-			else
-				branch(block.self, block.false_block);
-		}
-		else
-			branch(block.self, block.condition, block.true_block, block.false_block);
-		break;
+	assert(bracket_count == 0);
 
-	case SPIRBlock::MultiSelect:
-	{
-		auto &type = expression_type(block.condition);
-		bool unsigned_case =
-		    type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte;
+	// Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
+	// nothing we can do here to express that.
+	if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
+		return;
 
-		if (block.merge == SPIRBlock::MergeNone)
-			SPIRV_CROSS_THROW("Switch statement is not structured");
+	start_array_index++;
 
-		if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
-		{
-			// SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
-			SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
-		}
+	expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
+	            expr.substr(start_array_index, end_array_index - start_array_index), ")",
+	            expr.substr(end_array_index, string::npos));
+}
 
-		const char *label_suffix = "";
-		if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
-			label_suffix = "u";
-		else if (type.basetype == SPIRType::UShort)
-			label_suffix = backend.uint16_t_literal_suffix;
-		else if (type.basetype == SPIRType::Short)
-			label_suffix = backend.int16_t_literal_suffix;
+void CompilerGLSL::emit_block_hints(const SPIRBlock &block)
+{
+	if ((options.es && options.version < 310) || (!options.es && options.version < 140))
+		return;
 
-		SPIRBlock *old_emitting_switch = current_emitting_switch;
-		current_emitting_switch = &block;
+	switch (block.hint)
+	{
+	case SPIRBlock::HintFlatten:
+		require_extension_internal("GL_EXT_control_flow_attributes");
+		statement("SPIRV_CROSS_FLATTEN");
+		break;
+	case SPIRBlock::HintDontFlatten:
+		require_extension_internal("GL_EXT_control_flow_attributes");
+		statement("SPIRV_CROSS_BRANCH");
+		break;
+	case SPIRBlock::HintUnroll:
+		require_extension_internal("GL_EXT_control_flow_attributes");
+		statement("SPIRV_CROSS_UNROLL");
+		break;
+	case SPIRBlock::HintDontUnroll:
+		require_extension_internal("GL_EXT_control_flow_attributes");
+		statement("SPIRV_CROSS_LOOP");
+		break;
+	default:
+		break;
+	}
+}
 
-		if (block.need_ladder_break)
-			statement("bool _", block.self, "_ladder_break = false;");
+void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
+{
+	preserved_aliases[id] = get_name(id);
+}
 
-		// Find all unique case constructs.
-		unordered_map<uint32_t, SmallVector<uint32_t>> case_constructs;
-		SmallVector<uint32_t> block_declaration_order;
-		SmallVector<uint32_t> literals_to_merge;
+void CompilerGLSL::reset_name_caches()
+{
+	for (auto &preserved : preserved_aliases)
+		set_name(preserved.first, preserved.second);
 
-		// If a switch case branches to the default block for some reason, we can just remove that literal from consideration
-		// and let the default: block handle it.
-		// 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
-		// We only need to consider possible fallthrough if order[i] branches to order[i + 1].
-		for (auto &c : block.cases)
-		{
-			if (c.block != block.next_block && c.block != block.default_block)
-			{
-				if (!case_constructs.count(c.block))
-					block_declaration_order.push_back(c.block);
-				case_constructs[c.block].push_back(c.value);
-			}
-			else if (c.block == block.next_block && block.default_block != block.next_block)
-			{
-				// We might have to flush phi inside specific case labels.
-				// If we can piggyback on default:, do so instead.
-				literals_to_merge.push_back(c.value);
-			}
-		}
+	preserved_aliases.clear();
+	resource_names.clear();
+	block_input_names.clear();
+	block_output_names.clear();
+	block_ubo_names.clear();
+	block_ssbo_names.clear();
+	block_names.clear();
+	function_overloads.clear();
+}
 
-		// Empty literal array -> default.
-		if (block.default_block != block.next_block)
-		{
-			auto &default_block = get<SPIRBlock>(block.default_block);
+void CompilerGLSL::fixup_anonymous_struct_names(std::unordered_set<uint32_t> &visited, const SPIRType &type)
+{
+	if (visited.count(type.self))
+		return;
+	visited.insert(type.self);
 
-			// We need to slide in the default block somewhere in this chain
-			// if there are fall-through scenarios since the default is declared separately in OpSwitch.
-			// Only consider trivial fall-through cases here.
-			size_t num_blocks = block_declaration_order.size();
-			bool injected_block = false;
+	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
+	{
+		auto &mbr_type = get<SPIRType>(type.member_types[i]);
 
-			for (size_t i = 0; i < num_blocks; i++)
+		if (mbr_type.basetype == SPIRType::Struct)
+		{
+			// If there are multiple aliases, the output might be somewhat unpredictable,
+			// but the only real alternative in that case is to do nothing, which isn't any better.
+			// This check should be fine in practice.
+			if (get_name(mbr_type.self).empty() && !get_member_name(type.self, i).empty())
 			{
-				auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
-				if (execution_is_direct_branch(case_block, default_block))
-				{
-					// Fallthrough to default block, we must inject the default block here.
-					block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
-					injected_block = true;
-					break;
-				}
-				else if (execution_is_direct_branch(default_block, case_block))
-				{
-					// Default case is falling through to another case label, we must inject the default block here.
-					block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
-					injected_block = true;
-					break;
-				}
+				auto anon_name = join("anon_", get_member_name(type.self, i));
+				ParsedIR::sanitize_underscores(anon_name);
+				set_name(mbr_type.self, anon_name);
 			}
 
-			// Order does not matter.
-			if (!injected_block)
-				block_declaration_order.push_back(block.default_block);
-
-			case_constructs[block.default_block] = {};
+			fixup_anonymous_struct_names(visited, mbr_type);
 		}
+	}
+}
 
-		size_t num_blocks = block_declaration_order.size();
+void CompilerGLSL::fixup_anonymous_struct_names()
+{
+	// HLSL codegen can often end up emitting anonymous structs inside blocks, which
+	// breaks GL linking since all names must match ...
+	// Try to emit sensible code, so attempt to find such structs and emit anon_$member.
 
-		const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string {
-			return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal));
-		};
+	// Breaks exponential explosion with weird type trees.
+	std::unordered_set<uint32_t> visited;
 
-		// We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
-		// we need to flush phi nodes outside the switch block in a branch,
-		// and skip any Phi handling inside the case label to make fall-through work as expected.
-		// This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
-		// inside the case label if at all possible.
-		for (size_t i = 1; i < num_blocks; i++)
+	ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
+		if (type.basetype == SPIRType::Struct &&
+		    (has_decoration(type.self, DecorationBlock) ||
+		     has_decoration(type.self, DecorationBufferBlock)))
 		{
-			if (flush_phi_required(block.self, block_declaration_order[i]) &&
-			    flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
-			{
-				uint32_t target_block = block_declaration_order[i];
-
-				// Make sure we flush Phi, it might have been marked to be ignored earlier.
-				get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
+			fixup_anonymous_struct_names(visited, type);
+		}
+	});
+}
 
-				auto &literals = case_constructs[target_block];
+void CompilerGLSL::fixup_type_alias()
+{
+	// Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
+	ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
+		if (!type.type_alias)
+			return;
 
-				if (literals.empty())
-				{
-					// Oh boy, gotta make a complete negative test instead! o.o
-					// Find all possible literals that would *not* make us enter the default block.
-					// If none of those literals match, we flush Phi ...
-					SmallVector<string> conditions;
-					for (size_t j = 0; j < num_blocks; j++)
-					{
-						auto &negative_literals = case_constructs[block_declaration_order[j]];
-						for (auto &case_label : negative_literals)
-							conditions.push_back(join(to_enclosed_expression(block.condition),
-							                          " != ", to_case_label(case_label, unsigned_case)));
-					}
+		if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
+		{
+			// Top-level block types should never alias anything else.
+			type.type_alias = 0;
+		}
+		else if (type_is_block_like(type) && type.self == ID(self))
+		{
+			// A block-like type is any type which contains Offset decoration, but not top-level blocks,
+			// i.e. blocks which are placed inside buffers.
+			// Become the master.
+			ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
+				if (other_id == self)
+					return;
 
-					statement("if (", merge(conditions, " && "), ")");
-					begin_scope();
-					flush_phi(block.self, target_block);
-					end_scope();
-				}
-				else
-				{
-					SmallVector<string> conditions;
-					conditions.reserve(literals.size());
-					for (auto &case_label : literals)
-						conditions.push_back(join(to_enclosed_expression(block.condition),
-						                          " == ", to_case_label(case_label, unsigned_case)));
-					statement("if (", merge(conditions, " || "), ")");
-					begin_scope();
-					flush_phi(block.self, target_block);
-					end_scope();
-				}
+				if (other_type.type_alias == type.type_alias)
+					other_type.type_alias = self;
+			});
 
-				// Mark the block so that we don't flush Phi from header to case label.
-				get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
-			}
+			this->get<SPIRType>(type.type_alias).type_alias = self;
+			type.type_alias = 0;
 		}
+	});
+}
 
-		emit_block_hints(block);
-		statement("switch (", to_expression(block.condition), ")");
-		begin_scope();
+void CompilerGLSL::reorder_type_alias()
+{
+	// Reorder declaration of types so that the master of the type alias is always emitted first.
+	// We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
+	// means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
+	auto loop_lock = ir.create_loop_hard_lock();
 
-		for (size_t i = 0; i < num_blocks; i++)
+	auto &type_ids = ir.ids_for_type[TypeType];
+	for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
+	{
+		auto &type = get<SPIRType>(*alias_itr);
+		if (type.type_alias != TypeID(0) &&
+		    !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
 		{
-			uint32_t target_block = block_declaration_order[i];
-			auto &literals = case_constructs[target_block];
+			// We will skip declaring this type, so make sure the type_alias type comes before.
+			auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
+			assert(master_itr != end(type_ids));
 
-			if (literals.empty())
-			{
-				// Default case.
-				statement("default:");
-			}
-			else
+			if (alias_itr < master_itr)
 			{
-				for (auto &case_literal : literals)
-				{
-					// The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
-					statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
-				}
-			}
+				// Must also swap the type order for the constant-type joined array.
+				auto &joined_types = ir.ids_for_constant_undef_or_type;
+				auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
+				auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
+				assert(alt_alias_itr != end(joined_types));
+				assert(alt_master_itr != end(joined_types));
 
-			auto &case_block = get<SPIRBlock>(target_block);
-			if (backend.support_case_fallthrough && i + 1 < num_blocks &&
-			    execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
-			{
-				// We will fall through here, so just terminate the block chain early.
-				// We still need to deal with Phi potentially.
-				// No need for a stack-like thing here since we only do fall-through when there is a
-				// single trivial branch to fall-through target..
-				current_emitting_switch_fallthrough = true;
+				swap(*alias_itr, *master_itr);
+				swap(*alt_alias_itr, *alt_master_itr);
 			}
-			else
-				current_emitting_switch_fallthrough = false;
-
-			begin_scope();
-			branch(block.self, target_block);
-			end_scope();
-
-			current_emitting_switch_fallthrough = false;
 		}
+	}
+}
 
-		// Might still have to flush phi variables if we branch from loop header directly to merge target.
-		if (flush_phi_required(block.self, block.next_block))
-		{
-			if (block.default_block == block.next_block || !literals_to_merge.empty())
-			{
-				for (auto &case_literal : literals_to_merge)
-					statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":");
+void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
+{
+	// If we are redirecting statements, ignore the line directive.
+	// Common case here is continue blocks.
+	if (redirect_statement)
+		return;
 
-				if (block.default_block == block.next_block)
-					statement("default:");
+	if (options.emit_line_directives)
+	{
+		require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
+		statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
+	}
+}
 
-				begin_scope();
-				flush_phi(block.self, block.next_block);
-				statement("break;");
-				end_scope();
-			}
-		}
+void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
+                                          SmallVector<uint32_t> chain)
+{
+	// Fully unroll all member/array indices one by one.
 
-		end_scope();
+	auto &lhs_type = get<SPIRType>(lhs_type_id);
+	auto &rhs_type = get<SPIRType>(rhs_type_id);
 
-		if (block.need_ladder_break)
+	if (!lhs_type.array.empty())
+	{
+		// Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
+		// and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
+		uint32_t array_size = to_array_size_literal(lhs_type);
+		chain.push_back(0);
+
+		for (uint32_t i = 0; i < array_size; i++)
 		{
-			statement("if (_", block.self, "_ladder_break)");
-			begin_scope();
-			statement("break;");
-			end_scope();
+			chain.back() = i;
+			emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain);
+		}
+	}
+	else if (lhs_type.basetype == SPIRType::Struct)
+	{
+		chain.push_back(0);
+		uint32_t member_count = uint32_t(lhs_type.member_types.size());
+		for (uint32_t i = 0; i < member_count; i++)
+		{
+			chain.back() = i;
+			emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain);
 		}
-
-		current_emitting_switch = old_emitting_switch;
-		break;
 	}
+	else
+	{
+		// Need to handle unpack/packing fixups since this can differ wildly between the logical types,
+		// particularly in MSL.
+		// To deal with this, we emit access chains and go through emit_store_statement
+		// to deal with all the special cases we can encounter.
 
-	case SPIRBlock::Return:
-		for (auto &line : current_function->fixup_hooks_out)
-			line();
+		AccessChainMeta lhs_meta, rhs_meta;
+		auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()),
+		                                 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta);
+		auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()),
+		                                 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta);
 
-		if (processing_entry_point)
-			emit_fixup();
+		uint32_t id = ir.increase_bound_by(2);
+		lhs_id = id;
+		rhs_id = id + 1;
 
-		if (block.return_value)
 		{
-			auto &type = expression_type(block.return_value);
-			if (!type.array.empty() && !backend.can_return_array)
-			{
-				// If we cannot return arrays, we will have a special out argument we can write to instead.
-				// The backend is responsible for setting this up, and redirection the return values as appropriate.
-				if (ir.ids[block.return_value].get_type() != TypeUndef)
-					emit_array_copy("SPIRV_Cross_return_value", block.return_value);
+			auto &lhs_expr = set<SPIRExpression>(lhs_id, std::move(lhs), lhs_type_id, true);
+			lhs_expr.need_transpose = lhs_meta.need_transpose;
 
-				if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) ||
-				    block.loop_dominator != SPIRBlock::NoDominator)
-				{
-					statement("return;");
-				}
-			}
-			else
-			{
-				// OpReturnValue can return Undef, so don't emit anything for this case.
-				if (ir.ids[block.return_value].get_type() != TypeUndef)
-					statement("return ", to_expression(block.return_value), ";");
-			}
+			if (lhs_meta.storage_is_packed)
+				set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked);
+			if (lhs_meta.storage_physical_type != 0)
+				set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type);
+
+			forwarded_temporaries.insert(lhs_id);
+			suppressed_usage_tracking.insert(lhs_id);
 		}
-		// If this block is the very final block and not called from control flow,
-		// we do not need an explicit return which looks out of place. Just end the function here.
-		// In the very weird case of for(;;) { return; } executing return is unconditional,
-		// but we actually need a return here ...
-		else if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) ||
-		         block.loop_dominator != SPIRBlock::NoDominator)
+
 		{
-			statement("return;");
-		}
-		break;
+			auto &rhs_expr = set<SPIRExpression>(rhs_id, std::move(rhs), rhs_type_id, true);
+			rhs_expr.need_transpose = rhs_meta.need_transpose;
 
-	case SPIRBlock::Kill:
-		statement(backend.discard_literal, ";");
-		break;
+			if (rhs_meta.storage_is_packed)
+				set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked);
+			if (rhs_meta.storage_physical_type != 0)
+				set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type);
 
-	case SPIRBlock::Unreachable:
-		emit_next_block = false;
-		break;
+			forwarded_temporaries.insert(rhs_id);
+			suppressed_usage_tracking.insert(rhs_id);
+		}
 
-	default:
-		SPIRV_CROSS_THROW("Unimplemented block terminator.");
+		emit_store_statement(lhs_id, rhs_id);
 	}
+}
 
-	if (block.next_block && emit_next_block)
-	{
-		// If we hit this case, we're dealing with an unconditional branch, which means we will output
-		// that block after this. If we had selection merge, we already flushed phi variables.
-		if (block.merge != SPIRBlock::MergeSelection)
-			flush_phi(block.self, block.next_block);
+bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
+{
+	if (!has_decoration(id, DecorationInputAttachmentIndex))
+		return false;
 
-		// For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
-		if (!current_emitting_switch_fallthrough)
+	uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex);
+	for (auto &remap : subpass_to_framebuffer_fetch_attachment)
+		if (remap.first == input_attachment_index)
+			return true;
+
+	return false;
+}
+
+const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
+{
+	const SPIRVariable *ret = nullptr;
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
+		if (has_decoration(var.self, DecorationInputAttachmentIndex) &&
+		    get_decoration(var.self, DecorationInputAttachmentIndex) == index)
 		{
-			// For merge selects we might have ignored the fact that a merge target
-			// could have been a break; or continue;
-			// We will need to deal with it here.
-			if (is_loop_break(block.next_block))
-			{
-				// Cannot check for just break, because switch statements will also use break.
-				assert(block.merge == SPIRBlock::MergeSelection);
-				statement("break;");
-			}
-			else if (is_continue(block.next_block))
-			{
-				assert(block.merge == SPIRBlock::MergeSelection);
-				branch_to_continue(block.self, block.next_block);
-			}
-			else if (block.self != block.next_block)
-				emit_block_chain(get<SPIRBlock>(block.next_block));
+			ret = &var;
 		}
-	}
+	});
+	return ret;
+}
 
-	if (block.merge == SPIRBlock::MergeLoop)
-	{
-		if (continue_type == SPIRBlock::DoWhileLoop)
-		{
-			// Make sure that we run the continue block to get the expressions set, but this
-			// should become an empty string.
-			// We have no fallbacks if we cannot forward everything to temporaries ...
-			const auto &continue_block = get<SPIRBlock>(block.continue_block);
-			bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
-			                                       get<SPIRBlock>(continue_block.loop_dominator));
+const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
+{
+	const SPIRVariable *ret = nullptr;
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
+		if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location)
+			ret = &var;
+	});
+	return ret;
+}
 
-			uint32_t current_count = statement_count;
-			auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
-			if (statement_count != current_count)
+void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
+{
+	for (auto &remap : subpass_to_framebuffer_fetch_attachment)
+	{
+		auto *subpass_var = find_subpass_input_by_attachment_index(remap.first);
+		auto *output_var = find_color_output_by_location(remap.second);
+		if (!subpass_var)
+			continue;
+		if (!output_var)
+			SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
+			                  "to read from it.");
+		if (is_array(get<SPIRType>(output_var->basetype)))
+			SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
+
+		auto &func = get<SPIRFunction>(get_entry_point().self);
+		func.fixup_hooks_in.push_back([=]() {
+			if (is_legacy())
 			{
-				// The DoWhile block has side effects, force ComplexLoop pattern next pass.
-				get<SPIRBlock>(block.continue_block).complex_continue = true;
-				force_recompile();
+				statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[",
+				          get_decoration(output_var->self, DecorationLocation), "];");
 			}
+			else
+			{
+				uint32_t num_rt_components = this->get<SPIRType>(output_var->basetype).vecsize;
+				statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ",
+				          to_expression(output_var->self), ";");
+			}
+		});
+	}
+}
 
-			// Might have to invert the do-while test here.
-			auto condition = to_expression(continue_block.condition);
-			if (!positive_test)
-				condition = join("!", enclose_expression(condition));
+bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
+{
+	return is_depth_image(get<SPIRType>(get<SPIRVariable>(id).basetype), id);
+}
 
-			end_scope_decl(join("while (", condition, ")"));
-		}
-		else
-			end_scope();
+const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
+{
+	static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
+		                                                "GL_KHR_shader_subgroup_basic",
+		                                                "GL_KHR_shader_subgroup_vote",
+		                                                "GL_NV_gpu_shader_5",
+		                                                "GL_NV_shader_thread_group",
+		                                                "GL_NV_shader_thread_shuffle",
+		                                                "GL_ARB_shader_ballot",
+		                                                "GL_ARB_shader_group_vote",
+		                                                "GL_AMD_gcn_shader" };
+	return retval[c];
+}
 
-		// We cannot break out of two loops at once, so don't check for break; here.
-		// Using block.self as the "from" block isn't quite right, but it has the same scope
-		// and dominance structure, so it's fine.
-		if (is_continue(block.merge_block))
-			branch_to_continue(block.self, block.merge_block);
-		else
-			emit_block_chain(get<SPIRBlock>(block.merge_block));
+SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
+{
+	switch (c)
+	{
+	case ARB_shader_ballot:
+		return { "GL_ARB_shader_int64" };
+	case AMD_gcn_shader:
+		return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
+	default:
+		return {};
 	}
+}
 
-	// Forget about control dependent expressions now.
-	block.invalidate_expressions.clear();
-
-	// After we return, we must be out of scope, so if we somehow have to re-emit this function,
-	// re-declare variables if necessary.
-	assert(rearm_dominated_variables.size() == block.dominated_variables.size());
-	for (size_t i = 0; i < block.dominated_variables.size(); i++)
+const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
+{
+	switch (c)
 	{
-		uint32_t var = block.dominated_variables[i];
-		get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
+	case ARB_shader_ballot:
+		return "defined(GL_ARB_shader_int64)";
+	case AMD_gcn_shader:
+		return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
+	default:
+		return "";
 	}
+}
 
-	// Just like for deferred declaration, we need to forget about loop variable enable
-	// if our block chain is reinstantiated later.
-	for (auto &var_id : block.loop_variables)
-		get<SPIRVariable>(var_id).loop_variable_enable = false;
+CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
+    get_feature_dependencies(Feature feature)
+{
+	switch (feature)
+	{
+	case SubgroupAllEqualT:
+		return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
+	case SubgroupElect:
+		return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
+	case SubgroupInverseBallot_InclBitCount_ExclBitCout:
+		return { SubgroupMask };
+	case SubgroupBallotBitCount:
+		return { SubgroupBallot };
+	default:
+		return {};
+	}
 }
 
-void CompilerGLSL::begin_scope()
+CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
+    get_feature_dependency_mask(Feature feature)
 {
-	statement("{");
-	indent++;
+	return build_mask(get_feature_dependencies(feature));
 }
 
-void CompilerGLSL::end_scope()
+bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
 {
-	if (!indent)
-		SPIRV_CROSS_THROW("Popping empty indent stack.");
-	indent--;
-	statement("}");
+	static const bool retval[FeatureCount] = { false, false, false, false, false, false,
+		                                       true, // SubgroupBalloFindLSB_MSB
+		                                       false, false, false, false,
+		                                       true, // SubgroupMemBarrier - replaced with workgroup memory barriers
+		                                       false, false, true,  false };
+
+	return retval[feature];
 }
 
-void CompilerGLSL::end_scope_decl()
+CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
+    get_KHR_extension_for_feature(Feature feature)
 {
-	if (!indent)
-		SPIRV_CROSS_THROW("Popping empty indent stack.");
-	indent--;
-	statement("};");
+	static const Candidate extensions[FeatureCount] = {
+		KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic,  KHR_shader_subgroup_basic,  KHR_shader_subgroup_basic,
+		KHR_shader_subgroup_basic,  KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
+		KHR_shader_subgroup_vote,   KHR_shader_subgroup_basic,  KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
+		KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot
+	};
+
+	return extensions[feature];
 }
 
-void CompilerGLSL::end_scope_decl(const string &decl)
+void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
 {
-	if (!indent)
-		SPIRV_CROSS_THROW("Popping empty indent stack.");
-	indent--;
-	statement("} ", decl, ";");
+	feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
+}
+
+bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
+{
+	return (feature_mask & (1u << feature)) != 0;
 }
 
-void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
+CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
 {
-	// If our variable is remapped, and we rely on type-remapping information as
-	// well, then we cannot pass the variable as a function parameter.
-	// Fixing this is non-trivial without stamping out variants of the same function,
-	// so for now warn about this and suggest workarounds instead.
-	for (uint32_t i = 0; i < length; i++)
-	{
-		auto *var = maybe_get<SPIRVariable>(args[i]);
-		if (!var || !var->remapped_variable)
-			continue;
+	Result res;
 
-		auto &type = get<SPIRType>(var->basetype);
-		if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
+	for (uint32_t i = 0u; i < FeatureCount; ++i)
+	{
+		if (feature_mask & (1u << i))
 		{
-			SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
-			                  "This will not work correctly because type-remapping information is lost. "
-			                  "To workaround, please consider not passing the subpass input as a function parameter, "
-			                  "or use in/out variables instead which do not need type remapping information.");
+			auto feature = static_cast<Feature>(i);
+			std::unordered_set<uint32_t> unique_candidates;
+
+			auto candidates = get_candidates_for_feature(feature);
+			unique_candidates.insert(candidates.begin(), candidates.end());
+
+			auto deps = get_feature_dependencies(feature);
+			for (Feature d : deps)
+			{
+				candidates = get_candidates_for_feature(d);
+				if (!candidates.empty())
+					unique_candidates.insert(candidates.begin(), candidates.end());
+			}
+
+			for (uint32_t c : unique_candidates)
+				++res.weights[static_cast<Candidate>(c)];
 		}
 	}
+
+	return res;
 }
 
-const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
+CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
+    get_candidates_for_feature(Feature ft, const Result &r)
 {
-	// FIXME: This is kind of hacky. There should be a cleaner way.
-	auto offset = uint32_t(&instr - current_emitting_block->ops.data());
-	if ((offset + 1) < current_emitting_block->ops.size())
-		return &current_emitting_block->ops[offset + 1];
-	else
-		return nullptr;
+	auto c = get_candidates_for_feature(ft);
+	auto cmp = [&r](Candidate a, Candidate b) {
+		if (r.weights[a] == r.weights[b])
+			return a < b; // Prefer candidates with lower enum value
+		return r.weights[a] > r.weights[b];
+	};
+	std::sort(c.begin(), c.end(), cmp);
+	return c;
 }
 
-uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
+CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
+    get_candidates_for_feature(Feature feature)
 {
-	return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
-	                    MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
-	                    MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
+	switch (feature)
+	{
+	case SubgroupMask:
+		return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
+	case SubgroupSize:
+		return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
+	case SubgroupInvocationID:
+		return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
+	case SubgroupID:
+		return { KHR_shader_subgroup_basic, NV_shader_thread_group };
+	case NumSubgroups:
+		return { KHR_shader_subgroup_basic, NV_shader_thread_group };
+	case SubgroupBroadcast_First:
+		return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
+	case SubgroupBallotFindLSB_MSB:
+		return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
+	case SubgroupAll_Any_AllEqualBool:
+		return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
+	case SubgroupAllEqualT:
+		return {}; // depends on other features only
+	case SubgroupElect:
+		return {}; // depends on other features only
+	case SubgroupBallot:
+		return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
+	case SubgroupBarrier:
+		return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
+	case SubgroupMemBarrier:
+		return { KHR_shader_subgroup_basic };
+	case SubgroupInverseBallot_InclBitCount_ExclBitCout:
+		return {};
+	case SubgroupBallotBitExtract:
+		return { NV_shader_thread_group };
+	case SubgroupBallotBitCount:
+		return {};
+	default:
+		return {};
+	}
 }
 
-void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id)
+CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
+    const SmallVector<Feature> &features)
 {
-	statement(lhs, " = ", to_expression(rhs_id), ";");
+	FeatureMask mask = 0;
+	for (Feature f : features)
+		mask |= FeatureMask(1) << f;
+	return mask;
 }
 
-void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
+CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
 {
-	if (!backend.force_gl_in_out_block)
-		return;
-	// This path is only relevant for GL backends.
+	for (auto &weight : weights)
+		weight = 0;
+
+	// Make sure KHR_shader_subgroup extensions are always prefered.
+	const uint32_t big_num = FeatureCount;
+	weights[KHR_shader_subgroup_ballot] = big_num;
+	weights[KHR_shader_subgroup_basic] = big_num;
+	weights[KHR_shader_subgroup_vote] = big_num;
+}
 
-	auto *var = maybe_get<SPIRVariable>(source_id);
+void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
+{
+	// Must be ordered to maintain deterministic output, so vector is appropriate.
+	if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
+	    end(workaround_ubo_load_overload_types))
+	{
+		force_recompile();
+		workaround_ubo_load_overload_types.push_back(id);
+	}
+}
+
+void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
+{
+	// Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
+	// To load these types correctly, we must first wrap them in a dummy function which only purpose is to
+	// ensure row_major decoration is actually respected.
+	auto *var = maybe_get_backing_variable(ptr);
 	if (!var)
 		return;
 
-	if (var->storage != StorageClassInput)
+	auto &backing_type = get<SPIRType>(var->basetype);
+	bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
+	              has_decoration(backing_type.self, DecorationBlock);
+	if (!is_ubo)
 		return;
 
-	auto &type = get_variable_data_type(*var);
-	if (type.array.empty())
-		return;
+	auto *type = &get<SPIRType>(loaded_type);
+	bool rewrite = false;
+	bool relaxed = options.es;
 
-	auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
-	bool is_builtin = is_builtin_variable(*var) && (builtin == BuiltInPointSize || builtin == BuiltInPosition);
-	bool is_tess = is_tessellation_shader();
+	if (is_matrix(*type))
+	{
+		// To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
+		// we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
+		// If there is any row-major action going on, we apply the workaround.
+		// It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
+		// If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
+		type = &backing_type;
+	}
+	else
+	{
+		// If we're loading a composite, we don't have overloads like these.
+		relaxed = false;
+	}
 
-	// Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
-	// We must unroll the array load.
-	// For builtins, we couldn't catch this case normally,
-	// because this is resolved in the OpAccessChain in most cases.
-	// If we load the entire array, we have no choice but to unroll here.
-	if (is_builtin || is_tess)
+	if (type->basetype == SPIRType::Struct)
 	{
-		auto new_expr = join("_", target_id, "_unrolled");
-		statement(variable_decl(type, new_expr, target_id), ";");
-		string array_expr;
-		if (type.array_size_literal.front())
+		// If we're loading a struct where any member is a row-major matrix, apply the workaround.
+		for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
 		{
-			array_expr = convert_to_string(type.array.front());
-			if (type.array.front() == 0)
-				SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
-		}
-		else
-			array_expr = to_expression(type.array.front());
+			auto decorations = combined_decoration_for_member(*type, i);
+			if (decorations.get(DecorationRowMajor))
+				rewrite = true;
 
-		// The array size might be a specialization constant, so use a for-loop instead.
-		statement("for (int i = 0; i < int(", array_expr, "); i++)");
-		begin_scope();
-		if (is_builtin)
-			statement(new_expr, "[i] = gl_in[i].", expr, ";");
-		else
-			statement(new_expr, "[i] = ", expr, "[i];");
-		end_scope();
+			// Since we decide on a per-struct basis, only use mediump wrapper if all candidates are mediump.
+			if (!decorations.get(DecorationRelaxedPrecision))
+				relaxed = false;
+		}
+	}
 
-		expr = move(new_expr);
+	if (rewrite)
+	{
+		request_workaround_wrapper_overload(loaded_type);
+		expr = join("spvWorkaroundRowMajor", (relaxed ? "MP" : ""), "(", expr, ")");
 	}
 }
 
-void CompilerGLSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
+void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component)
 {
-	auto *var = maybe_get_backing_variable(source_id);
-	if (var)
-		source_id = var->self;
-
-	// Only interested in standalone builtin variables.
-	if (!has_decoration(source_id, DecorationBuiltIn))
-		return;
+	masked_output_locations.insert({ location, component });
+}
 
-	auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
-	auto expected_type = expr_type.basetype;
+void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin)
+{
+	masked_output_builtins.insert(builtin);
+}
 
-	// TODO: Fill in for more builtins.
-	switch (builtin)
-	{
-	case BuiltInLayer:
-	case BuiltInPrimitiveId:
-	case BuiltInViewportIndex:
-	case BuiltInInstanceId:
-	case BuiltInInstanceIndex:
-	case BuiltInVertexId:
-	case BuiltInVertexIndex:
-	case BuiltInSampleId:
-	case BuiltInBaseVertex:
-	case BuiltInBaseInstance:
-	case BuiltInDrawIndex:
-	case BuiltInFragStencilRefEXT:
-		expected_type = SPIRType::Int;
-		break;
+bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const
+{
+	auto &type = get<SPIRType>(var.basetype);
+	bool is_block = has_decoration(type.self, DecorationBlock);
+	// Blocks by themselves are never masked. Must be masked per-member.
+	if (is_block)
+		return false;
 
-	case BuiltInGlobalInvocationId:
-	case BuiltInLocalInvocationId:
-	case BuiltInWorkgroupId:
-	case BuiltInLocalInvocationIndex:
-	case BuiltInWorkgroupSize:
-	case BuiltInNumWorkgroups:
-		expected_type = SPIRType::UInt;
-		break;
+	bool is_builtin = has_decoration(var.self, DecorationBuiltIn);
 
-	default:
-		break;
+	if (is_builtin)
+	{
+		return is_stage_output_builtin_masked(BuiltIn(get_decoration(var.self, DecorationBuiltIn)));
 	}
+	else
+	{
+		if (!has_decoration(var.self, DecorationLocation))
+			return false;
 
-	if (expected_type != expr_type.basetype)
-		expr = bitcast_expression(expr_type, expected_type, expr);
+		return is_stage_output_location_masked(
+				get_decoration(var.self, DecorationLocation),
+				get_decoration(var.self, DecorationComponent));
+	}
 }
 
-void CompilerGLSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
+bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const
 {
-	// Only interested in standalone builtin variables.
-	if (!has_decoration(target_id, DecorationBuiltIn))
-		return;
-
-	auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
-	auto expected_type = expr_type.basetype;
+	auto &type = get<SPIRType>(var.basetype);
+	bool is_block = has_decoration(type.self, DecorationBlock);
+	if (!is_block)
+		return false;
 
-	// TODO: Fill in for more builtins.
-	switch (builtin)
+	BuiltIn builtin = BuiltInMax;
+	if (is_member_builtin(type, index, &builtin))
 	{
-	case BuiltInLayer:
-	case BuiltInPrimitiveId:
-	case BuiltInViewportIndex:
-	case BuiltInFragStencilRefEXT:
-		expected_type = SPIRType::Int;
-		break;
-
-	default:
-		break;
+		return is_stage_output_builtin_masked(builtin);
 	}
-
-	if (expected_type != expr_type.basetype)
+	else
 	{
-		auto type = expr_type;
-		type.basetype = expected_type;
-		expr = bitcast_expression(type, expr_type.basetype, expr);
+		uint32_t location = get_declared_member_location(var, index, strip_array);
+		uint32_t component = get_member_decoration(type.self, index, DecorationComponent);
+		return is_stage_output_location_masked(location, component);
 	}
 }
 
-void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::string &expr)
+bool CompilerGLSL::is_per_primitive_variable(const SPIRVariable &var) const
 {
-	if (*backend.nonuniform_qualifier == '\0')
-		return;
+	if (has_decoration(var.self, DecorationPerPrimitiveEXT))
+		return true;
 
-	// Handle SPV_EXT_descriptor_indexing.
-	if (type.basetype == SPIRType::Sampler || type.basetype == SPIRType::SampledImage ||
-	    type.basetype == SPIRType::Image)
-	{
-		// The image/sampler ID must be declared as non-uniform.
-		// However, it is not legal GLSL to have
-		// nonuniformEXT(samplers[index]), so we must move the nonuniform qualifier
-		// to the array indexing, like
-		// samplers[nonuniformEXT(index)].
-		// While the access chain will generally be nonuniformEXT, it's not necessarily so,
-		// so we might have to fixup the OpLoad-ed expression late.
-
-		auto start_array_index = expr.find_first_of('[');
-		auto end_array_index = expr.find_last_of(']');
-		// Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
-		// nothing we can do here to express that.
-		if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
-			return;
+	auto &type = get<SPIRType>(var.basetype);
+	if (!has_decoration(type.self, DecorationBlock))
+		return false;
 
-		start_array_index++;
+	for (uint32_t i = 0, n = uint32_t(type.member_types.size()); i < n; i++)
+		if (!has_member_decoration(type.self, i, DecorationPerPrimitiveEXT))
+			return false;
 
-		expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
-		            expr.substr(start_array_index, end_array_index - start_array_index), ")",
-		            expr.substr(end_array_index, string::npos));
-	}
+	return true;
 }
 
-void CompilerGLSL::emit_block_hints(const SPIRBlock &)
+bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const
 {
+	return masked_output_locations.count({ location, component }) != 0;
 }
 
-void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
+bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const
 {
-	preserved_aliases[id] = get_name(id);
+	return masked_output_builtins.count(builtin) != 0;
 }
 
-void CompilerGLSL::reset_name_caches()
+uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
 {
-	for (auto &preserved : preserved_aliases)
-		set_name(preserved.first, preserved.second);
-
-	preserved_aliases.clear();
-	resource_names.clear();
-	block_input_names.clear();
-	block_output_names.clear();
-	block_ubo_names.clear();
-	block_ssbo_names.clear();
-	block_names.clear();
-	function_overloads.clear();
+	auto &block_type = get<SPIRType>(var.basetype);
+	if (has_member_decoration(block_type.self, mbr_idx, DecorationLocation))
+		return get_member_decoration(block_type.self, mbr_idx, DecorationLocation);
+	else
+		return get_accumulated_member_location(var, mbr_idx, strip_array);
 }
 
-void CompilerGLSL::fixup_type_alias()
+uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
 {
-	// Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
-	// FIXME: Multiple alias types which are both block-like will be awkward, for now, it's best to just drop the type
-	// alias if the slave type is a block type.
-	ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
-		if (type.type_alias && type_is_block_like(type))
-		{
-			// Become the master.
-			ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
-				if (other_id == type.self)
-					return;
-
-				if (other_type.type_alias == type.type_alias)
-					other_type.type_alias = type.self;
-			});
-
-			this->get<SPIRType>(type.type_alias).type_alias = self;
-			type.type_alias = 0;
-		}
-	});
-
-	ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
-		if (type.type_alias && type_is_block_like(type))
-		{
-			// This is not allowed, drop the type_alias.
-			type.type_alias = 0;
-		}
-	});
-}
+	auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
+	uint32_t location = get_decoration(var.self, DecorationLocation);
 
-void CompilerGLSL::reorder_type_alias()
-{
-	// Reorder declaration of types so that the master of the type alias is always emitted first.
-	// We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
-	// means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
-	auto &type_ids = ir.ids_for_type[TypeType];
-	for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
+	for (uint32_t i = 0; i < mbr_idx; i++)
 	{
-		auto &type = get<SPIRType>(*alias_itr);
-		if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked))
-		{
-			// We will skip declaring this type, so make sure the type_alias type comes before.
-			auto master_itr = find(begin(type_ids), end(type_ids), type.type_alias);
-			assert(master_itr != end(type_ids));
+		auto &mbr_type = get<SPIRType>(type.member_types[i]);
 
-			if (alias_itr < master_itr)
-			{
-				// Must also swap the type order for the constant-type joined array.
-				auto &joined_types = ir.ids_for_constant_or_type;
-				auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
-				auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
-				assert(alt_alias_itr != end(joined_types));
-				assert(alt_master_itr != end(joined_types));
+		// Start counting from any place we have a new location decoration.
+		if (has_member_decoration(type.self, mbr_idx, DecorationLocation))
+			location = get_member_decoration(type.self, mbr_idx, DecorationLocation);
 
-				swap(*alias_itr, *master_itr);
-				swap(*alt_alias_itr, *alt_master_itr);
-			}
-		}
+		uint32_t location_count = type_to_location_count(mbr_type);
+		location += location_count;
 	}
+
+	return location;
 }
 
-void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
+StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
 {
-	// If we are redirecting statements, ignore the line directive.
-	// Common case here is continue blocks.
-	if (redirect_statement)
-		return;
-
-	if (options.emit_line_directives)
-	{
-		require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
-		statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
+	auto *var = maybe_get_backing_variable(ptr);
+
+	// If the expression has been lowered to a temporary, we need to use the Generic storage class.
+	// We're looking for the effective storage class of a given expression.
+	// An access chain or forwarded OpLoads from such access chains
+	// will generally have the storage class of the underlying variable, but if the load was not forwarded
+	// we have lost any address space qualifiers.
+	bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
+	                        (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
+
+	if (var && !forced_temporary)
+	{
+		if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
+			return StorageClassWorkgroup;
+		if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
+			return StorageClassStorageBuffer;
+
+		// Normalize SSBOs to StorageBuffer here.
+		if (var->storage == StorageClassUniform &&
+		    has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+			return StorageClassStorageBuffer;
+		else
+			return var->storage;
 	}
+	else
+		return expression_type(ptr).storage;
 }
 
-void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id)
+uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const
 {
-	// SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen,
-	// we need to know NonUniformEXT a little earlier, when the resource is actually loaded.
-	// Back-propagate the qualifier based on the expression dependency chain.
-
-	if (!has_decoration(id, DecorationNonUniformEXT))
-	{
-		set_decoration(id, DecorationNonUniformEXT);
-		force_recompile();
-	}
-
-	auto *e = maybe_get<SPIRExpression>(id);
-	auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
-	auto *chain = maybe_get<SPIRAccessChain>(id);
-	if (e)
-	{
-		for (auto &expr : e->expression_dependencies)
-			propagate_nonuniform_qualifier(expr);
-		for (auto &expr : e->implied_read_expressions)
-			propagate_nonuniform_qualifier(expr);
-	}
-	else if (combined)
+	uint32_t count;
+	if (type.basetype == SPIRType::Struct)
 	{
-		propagate_nonuniform_qualifier(combined->image);
-		propagate_nonuniform_qualifier(combined->sampler);
+		uint32_t mbr_count = uint32_t(type.member_types.size());
+		count = 0;
+		for (uint32_t i = 0; i < mbr_count; i++)
+			count += type_to_location_count(get<SPIRType>(type.member_types[i]));
 	}
-	else if (chain)
+	else
 	{
-		for (auto &expr : chain->implied_read_expressions)
-			propagate_nonuniform_qualifier(expr);
+		count = type.columns > 1 ? type.columns : 1;
 	}
+
+	uint32_t dim_count = uint32_t(type.array.size());
+	for (uint32_t i = 0; i < dim_count; i++)
+		count *= to_array_size_literal(type, i);
+
+	return count;
 }
diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp
index e1eb39bf7a2..4dcde5540f4 100644
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2015-2019 Arm Limited
+ * Copyright 2015-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_CROSS_GLSL_HPP
 #define SPIRV_CROSS_GLSL_HPP
 
@@ -56,7 +63,10 @@ enum AccessChainFlagBits
 	ACCESS_CHAIN_INDEX_IS_LITERAL_BIT = 1 << 0,
 	ACCESS_CHAIN_CHAIN_ONLY_BIT = 1 << 1,
 	ACCESS_CHAIN_PTR_CHAIN_BIT = 1 << 2,
-	ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT = 1 << 3
+	ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT = 1 << 3,
+	ACCESS_CHAIN_LITERAL_MSB_FORCE_ID = 1 << 4,
+	ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT = 1 << 5,
+	ACCESS_CHAIN_FORCE_COMPOSITE_BIT = 1 << 6
 };
 typedef uint32_t AccessChainFlags;
 
@@ -73,6 +83,11 @@ class CompilerGLSL : public Compiler
 
 		// Debug option to always emit temporary variables for all expressions.
 		bool force_temporary = false;
+		// Debug option, can be increased in an attempt to workaround SPIRV-Cross bugs temporarily.
+		// If this limit has to be increased, it points to an implementation bug.
+		// In certain scenarios, the maximum number of debug iterations may increase beyond this limit
+		// as long as we can prove we're making certain kinds of forward progress.
+		uint32_t force_recompile_max_debug_iterations = 3;
 
 		// If true, Vulkan GLSL features are used instead of GL-compatible features.
 		// Mostly useful for debugging SPIR-V files.
@@ -107,6 +122,38 @@ class CompilerGLSL : public Compiler
 		// May not correspond exactly to original source, but should be a good approximation.
 		bool emit_line_directives = false;
 
+		// In cases where readonly/writeonly decoration are not used at all,
+		// we try to deduce which qualifier(s) we should actually used, since actually emitting
+		// read-write decoration is very rare, and older glslang/HLSL compilers tend to just emit readwrite as a matter of fact.
+		// The default (true) is to enable automatic deduction for these cases, but if you trust the decorations set
+		// by the SPIR-V, it's recommended to set this to false.
+		bool enable_storage_image_qualifier_deduction = true;
+
+		// On some targets (WebGPU), uninitialized variables are banned.
+		// If this is enabled, all variables (temporaries, Private, Function)
+		// which would otherwise be uninitialized will now be initialized to 0 instead.
+		bool force_zero_initialized_variables = false;
+
+		// In GLSL, force use of I/O block flattening, similar to
+		// what happens on legacy GLSL targets for blocks and structs.
+		bool force_flattened_io_blocks = false;
+
+		// For opcodes where we have to perform explicit additional nan checks, very ugly code is generated.
+		// If we opt-in, ignore these requirements.
+		// In opcodes like NClamp/NMin/NMax and FP compare, ignore NaN behavior.
+		// Use FClamp/FMin/FMax semantics for clamps and lets implementation choose ordered or unordered
+		// compares.
+		bool relax_nan_checks = false;
+
+		// Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
+		// To load these types correctly, we must generate a wrapper. them in a dummy function which only purpose is to
+		// ensure row_major decoration is actually respected.
+		// This workaround may cause significant performance degeneration on some Android devices.
+		bool enable_row_major_load_workaround = true;
+
+		// If non-zero, controls layout(num_views = N) in; in GL_OVR_multiview2.
+		uint32_t ovr_multiview_view_count = 0;
+
 		enum Precision
 		{
 			DontCare,
@@ -115,14 +162,16 @@ class CompilerGLSL : public Compiler
 			Highp
 		};
 
-		struct
+		struct VertexOptions
 		{
-			// GLSL: In vertex shaders, rewrite [0, w] depth (Vulkan/D3D style) to [-w, w] depth (GL style).
-			// MSL: In vertex shaders, rewrite [-w, w] depth (GL style) to [0, w] depth.
-			// HLSL: In vertex shaders, rewrite [-w, w] depth (GL style) to [0, w] depth.
+			// "Vertex-like shader" here is any shader stage that can write BuiltInPosition.
+
+			// GLSL: In vertex-like shaders, rewrite [0, w] depth (Vulkan/D3D style) to [-w, w] depth (GL style).
+			// MSL: In vertex-like shaders, rewrite [-w, w] depth (GL style) to [0, w] depth.
+			// HLSL: In vertex-like shaders, rewrite [-w, w] depth (GL style) to [0, w] depth.
 			bool fixup_clipspace = false;
 
-			// Inverts gl_Position.y or equivalent.
+			// In vertex-like shaders, inverts gl_Position.y or equivalent.
 			bool flip_vert_y = false;
 
 			// GLSL only, for HLSL version of this option, see CompilerHLSL.
@@ -132,7 +181,7 @@ class CompilerGLSL : public Compiler
 			bool support_nonzero_base_instance = true;
 		} vertex;
 
-		struct
+		struct FragmentOptions
 		{
 			// Add precision mediump float in ES targets when emitting GLES source.
 			// Add precision highp int in ES targets when emitting GLES source.
@@ -148,6 +197,11 @@ class CompilerGLSL : public Compiler
 		remap_pls_variables();
 	}
 
+	// Redirect a subpassInput reading from input_attachment_index to instead load its value from
+	// the color attachment at location = color_location. Requires ESSL.
+	// If coherent, uses GL_EXT_shader_framebuffer_fetch, if not, uses noncoherent variant.
+	void remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent);
+
 	explicit CompilerGLSL(std::vector<uint32_t> spirv_)
 	    : Compiler(std::move(spirv_))
 	{
@@ -209,10 +263,104 @@ class CompilerGLSL : public Compiler
 	// For this to work, all types in the block must be the same basic type, e.g. mixing vec2 and vec4 is fine, but
 	// mixing int and float is not.
 	// The name of the uniform array will be the same as the interface block name.
-	void flatten_buffer_block(uint32_t id);
+	void flatten_buffer_block(VariableID id);
+
+	// After compilation, query if a variable ID was used as a depth resource.
+	// This is meaningful for MSL since descriptor types depend on this knowledge.
+	// Cases which return true:
+	// - Images which are declared with depth = 1 image type.
+	// - Samplers which are statically used at least once with Dref opcodes.
+	// - Images which are statically used at least once with Dref opcodes.
+	bool variable_is_depth_or_compare(VariableID id) const;
+
+	// If a shader output is active in this stage, but inactive in a subsequent stage,
+	// this can be signalled here. This can be used to work around certain cross-stage matching problems
+	// which plagues MSL and HLSL in certain scenarios.
+	// An output which matches one of these will not be emitted in stage output interfaces, but rather treated as a private
+	// variable.
+	// This option is only meaningful for MSL and HLSL, since GLSL matches by location directly.
+	// Masking builtins only takes effect if the builtin in question is part of the stage output interface.
+	void mask_stage_output_by_location(uint32_t location, uint32_t component);
+	void mask_stage_output_by_builtin(spv::BuiltIn builtin);
 
 protected:
-	void reset();
+	struct ShaderSubgroupSupportHelper
+	{
+		// lower enum value = greater priority
+		enum Candidate
+		{
+			KHR_shader_subgroup_ballot,
+			KHR_shader_subgroup_basic,
+			KHR_shader_subgroup_vote,
+			NV_gpu_shader_5,
+			NV_shader_thread_group,
+			NV_shader_thread_shuffle,
+			ARB_shader_ballot,
+			ARB_shader_group_vote,
+			AMD_gcn_shader,
+
+			CandidateCount
+		};
+
+		static const char *get_extension_name(Candidate c);
+		static SmallVector<std::string> get_extra_required_extension_names(Candidate c);
+		static const char *get_extra_required_extension_predicate(Candidate c);
+
+		enum Feature
+		{
+			SubgroupMask = 0,
+			SubgroupSize = 1,
+			SubgroupInvocationID = 2,
+			SubgroupID = 3,
+			NumSubgroups = 4,
+			SubgroupBroadcast_First = 5,
+			SubgroupBallotFindLSB_MSB = 6,
+			SubgroupAll_Any_AllEqualBool = 7,
+			SubgroupAllEqualT = 8,
+			SubgroupElect = 9,
+			SubgroupBarrier = 10,
+			SubgroupMemBarrier = 11,
+			SubgroupBallot = 12,
+			SubgroupInverseBallot_InclBitCount_ExclBitCout = 13,
+			SubgroupBallotBitExtract = 14,
+			SubgroupBallotBitCount = 15,
+
+			FeatureCount
+		};
+
+		using FeatureMask = uint32_t;
+		static_assert(sizeof(FeatureMask) * 8u >= FeatureCount, "Mask type needs more bits.");
+
+		using CandidateVector = SmallVector<Candidate, CandidateCount>;
+		using FeatureVector = SmallVector<Feature>;
+
+		static FeatureVector get_feature_dependencies(Feature feature);
+		static FeatureMask get_feature_dependency_mask(Feature feature);
+		static bool can_feature_be_implemented_without_extensions(Feature feature);
+		static Candidate get_KHR_extension_for_feature(Feature feature);
+
+		struct Result
+		{
+			Result();
+			uint32_t weights[CandidateCount];
+		};
+
+		void request_feature(Feature feature);
+		bool is_feature_requested(Feature feature) const;
+		Result resolve() const;
+
+		static CandidateVector get_candidates_for_feature(Feature ft, const Result &r);
+
+	private:
+		static CandidateVector get_candidates_for_feature(Feature ft);
+		static FeatureMask build_mask(const SmallVector<Feature> &features);
+		FeatureMask feature_mask = 0;
+	};
+
+	// TODO remove this function when all subgroup ops are supported (or make it always return true)
+	static bool is_supported_subgroup_op_in_opengl(spv::Op op);
+
+	void reset(uint32_t iteration_count);
 	void emit_function(SPIRFunction &func, const Bitset &return_flags);
 
 	bool has_extension(const std::string &ext) const;
@@ -222,11 +370,22 @@ class CompilerGLSL : public Compiler
 	virtual void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags);
 
 	SPIRBlock *current_emitting_block = nullptr;
-	SPIRBlock *current_emitting_switch = nullptr;
+	SmallVector<SPIRBlock *> current_emitting_switch_stack;
 	bool current_emitting_switch_fallthrough = false;
 
 	virtual void emit_instruction(const Instruction &instr);
+	struct TemporaryCopy
+	{
+		uint32_t dst_id;
+		uint32_t src_id;
+	};
+	TemporaryCopy handle_instruction_precision(const Instruction &instr);
 	void emit_block_instructions(SPIRBlock &block);
+
+	// For relax_nan_checks.
+	GLSLstd450 get_remapped_glsl_op(GLSLstd450 std450_op) const;
+	spv::Op get_remapped_spirv_op(spv::Op op) const;
+
 	virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
 	                          uint32_t count);
 	virtual void emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t result_id, uint32_t op,
@@ -242,32 +401,72 @@ class CompilerGLSL : public Compiler
 	void build_workgroup_size(SmallVector<std::string> &arguments, const SpecializationConstant &x,
 	                          const SpecializationConstant &y, const SpecializationConstant &z);
 
+	void request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature);
+
 	virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id);
-	virtual void emit_texture_op(const Instruction &i);
+	virtual void emit_texture_op(const Instruction &i, bool sparse);
+	virtual std::string to_texture_op(const Instruction &i, bool sparse, bool *forward,
+	                                  SmallVector<uint32_t> &inherited_expressions);
 	virtual void emit_subgroup_op(const Instruction &i);
 	virtual std::string type_to_glsl(const SPIRType &type, uint32_t id = 0);
 	virtual std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage);
 	virtual void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
 	                                const std::string &qualifier = "", uint32_t base_offset = 0);
+	virtual void emit_struct_padding_target(const SPIRType &type);
 	virtual std::string image_type_glsl(const SPIRType &type, uint32_t id = 0);
-	std::string constant_expression(const SPIRConstant &c);
-	std::string constant_op_expression(const SPIRConstantOp &cop);
+	std::string constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope = false);
+	virtual std::string constant_op_expression(const SPIRConstantOp &cop);
 	virtual std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector);
 	virtual void emit_fixup();
 	virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0);
-	virtual std::string to_func_call_arg(uint32_t id);
-	virtual std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
-	                                     bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad,
-	                                     bool has_dref, uint32_t lod, uint32_t minlod);
-	virtual std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
-	                                     bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref,
-	                                     uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset,
-	                                     uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample,
-	                                     uint32_t minlod, bool *p_forward);
+	virtual bool variable_decl_is_remapped_storage(const SPIRVariable &var, spv::StorageClass storage) const;
+	virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id);
+
+	struct TextureFunctionBaseArguments
+	{
+		// GCC 4.8 workarounds, it doesn't understand '{}' constructor here, use explicit default constructor.
+		TextureFunctionBaseArguments() = default;
+		VariableID img = 0;
+		const SPIRType *imgtype = nullptr;
+		bool is_fetch = false, is_gather = false, is_proj = false;
+	};
+
+	struct TextureFunctionNameArguments
+	{
+		// GCC 4.8 workarounds, it doesn't understand '{}' constructor here, use explicit default constructor.
+		TextureFunctionNameArguments() = default;
+		TextureFunctionBaseArguments base;
+		bool has_array_offsets = false, has_offset = false, has_grad = false;
+		bool has_dref = false, is_sparse_feedback = false, has_min_lod = false;
+		uint32_t lod = 0;
+	};
+	virtual std::string to_function_name(const TextureFunctionNameArguments &args);
+
+	struct TextureFunctionArguments
+	{
+		// GCC 4.8 workarounds, it doesn't understand '{}' constructor here, use explicit default constructor.
+		TextureFunctionArguments() = default;
+		TextureFunctionBaseArguments base;
+		uint32_t coord = 0, coord_components = 0, dref = 0;
+		uint32_t grad_x = 0, grad_y = 0, lod = 0, offset = 0;
+		uint32_t bias = 0, component = 0, sample = 0, sparse_texel = 0, min_lod = 0;
+		bool nonuniform_expression = false;
+	};
+	virtual std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward);
+
+	void emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
+	                                      uint32_t &texel_id);
+	uint32_t get_sparse_feedback_texel_id(uint32_t id) const;
 	virtual void emit_buffer_block(const SPIRVariable &type);
 	virtual void emit_push_constant_block(const SPIRVariable &var);
 	virtual void emit_uniform(const SPIRVariable &var);
-	virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id);
+	virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id,
+	                                           bool packed_type, bool row_major);
+
+	virtual bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const;
+
+	void emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
+	                            SmallVector<uint32_t> chain);
 
 	StringStream<> buffer;
 
@@ -325,19 +524,22 @@ class CompilerGLSL : public Compiler
 	// on a single line separated by comma.
 	SmallVector<std::string> *redirect_statement = nullptr;
 	const SPIRBlock *current_continue_block = nullptr;
+	bool block_temporary_hoisting = false;
 
 	void begin_scope();
 	void end_scope();
+	void end_scope(const std::string &trailer);
 	void end_scope_decl();
 	void end_scope_decl(const std::string &decl);
 
 	Options options;
 
-	std::string type_to_array_glsl(const SPIRType &type);
+	virtual std::string type_to_array_glsl(
+	    const SPIRType &type); // Allow Metal to use the array<T> template to make arrays a value type
 	std::string to_array_size(const SPIRType &type, uint32_t index);
 	uint32_t to_array_size_literal(const SPIRType &type, uint32_t index) const;
 	uint32_t to_array_size_literal(const SPIRType &type) const;
-	std::string variable_decl(const SPIRVariable &variable);
+	virtual std::string variable_decl(const SPIRVariable &variable); // Threadgroup arrays can't have a wrapper type
 	std::string variable_decl_function_local(SPIRVariable &variable);
 
 	void add_local_variable_name(uint32_t id);
@@ -347,8 +549,10 @@ class CompilerGLSL : public Compiler
 
 	virtual bool is_non_native_row_major_matrix(uint32_t id);
 	virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index);
-	bool member_is_packed_type(const SPIRType &type, uint32_t index) const;
-	virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed);
+	bool member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const;
+	bool member_is_packed_physical_type(const SPIRType &type, uint32_t index) const;
+	virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type,
+	                                             uint32_t physical_type_id, bool is_packed);
 
 	std::unordered_set<std::string> local_variable_names;
 	std::unordered_set<std::string> resource_names;
@@ -369,6 +573,7 @@ class CompilerGLSL : public Compiler
 	struct BackendVariations
 	{
 		std::string discard_literal = "discard";
+		std::string demote_literal = "demote";
 		std::string null_pointer_literal = "";
 		bool float_literal_suffix = false;
 		bool double_literal_suffix = true;
@@ -383,6 +588,7 @@ class CompilerGLSL : public Compiler
 		const char *int16_t_literal_suffix = "s";
 		const char *uint16_t_literal_suffix = "us";
 		const char *nonuniform_qualifier = "nonuniformEXT";
+		const char *boolean_mix_function = "mix";
 		bool swizzle_is_function = false;
 		bool shared_is_implied = false;
 		bool unsized_array_supported = true;
@@ -393,51 +599,68 @@ class CompilerGLSL : public Compiler
 		bool can_declare_arrays_inline = true;
 		bool native_row_major_matrix = true;
 		bool use_constructor_splatting = true;
-		bool boolean_mix_support = true;
 		bool allow_precision_qualifiers = false;
 		bool can_swizzle_scalar = false;
 		bool force_gl_in_out_block = false;
+		bool force_merged_mesh_block = false;
 		bool can_return_array = true;
 		bool allow_truncated_access_chain = false;
 		bool supports_extensions = false;
 		bool supports_empty_struct = false;
 		bool array_is_value_type = true;
+		bool array_is_value_type_in_buffer_blocks = true;
 		bool comparison_image_samples_scalar = false;
 		bool native_pointers = false;
 		bool support_small_type_sampling_result = false;
 		bool support_case_fallthrough = true;
+		bool use_array_constructor = false;
+		bool needs_row_major_load_workaround = false;
+		bool support_pointer_to_pointer = false;
+		bool support_precise_qualifier = false;
+		bool support_64bit_switch = false;
+		bool workgroup_size_is_hidden = false;
+		bool requires_relaxed_precision_analysis = false;
+		bool implicit_c_integer_promotion_rules = false;
 	} backend;
 
 	void emit_struct(SPIRType &type);
 	void emit_resources();
+	void emit_extension_workarounds(spv::ExecutionModel model);
 	void emit_buffer_block_native(const SPIRVariable &var);
-	void emit_buffer_reference_block(SPIRType &type, bool forward_declaration);
+	void emit_buffer_reference_block(uint32_t type_id, bool forward_declaration);
 	void emit_buffer_block_legacy(const SPIRVariable &var);
 	void emit_buffer_block_flattened(const SPIRVariable &type);
+	void fixup_implicit_builtin_block_names(spv::ExecutionModel model);
 	void emit_declared_builtin_block(spv::StorageClass storage, spv::ExecutionModel model);
+	bool should_force_emit_builtin_block(spv::StorageClass storage);
 	void emit_push_constant_block_vulkan(const SPIRVariable &var);
 	void emit_push_constant_block_glsl(const SPIRVariable &var);
 	void emit_interface_block(const SPIRVariable &type);
 	void emit_flattened_io_block(const SPIRVariable &var, const char *qual);
+	void emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
+	                                    const SmallVector<uint32_t> &indices);
+	void emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
+	                                    const SmallVector<uint32_t> &indices);
 	void emit_block_chain(SPIRBlock &block);
-	void emit_hoisted_temporaries(SmallVector<std::pair<uint32_t, uint32_t>> &temporaries);
+	void emit_hoisted_temporaries(SmallVector<std::pair<TypeID, ID>> &temporaries);
 	std::string constant_value_macro_name(uint32_t id);
+	int get_constant_mapping_to_workgroup_component(const SPIRConstant &constant) const;
 	void emit_constant(const SPIRConstant &constant);
 	void emit_specialization_constant_op(const SPIRConstantOp &constant);
 	std::string emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block);
 	bool attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method);
 
-	void branch(uint32_t from, uint32_t to);
-	void branch_to_continue(uint32_t from, uint32_t to);
-	void branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block);
-	void flush_phi(uint32_t from, uint32_t to);
-	bool flush_phi_required(uint32_t from, uint32_t to);
+	void branch(BlockID from, BlockID to);
+	void branch_to_continue(BlockID from, BlockID to);
+	void branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block);
+	void flush_phi(BlockID from, BlockID to);
 	void flush_variable_declaration(uint32_t id);
 	void flush_undeclared_variables(SPIRBlock &block);
 	void emit_variable_temporary_copies(const SPIRVariable &var);
 
 	bool should_dereference(uint32_t id);
-	bool should_forward(uint32_t id);
+	bool should_forward(uint32_t id) const;
+	bool should_suppress_usage_tracking(uint32_t id) const;
 	void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp);
 	void emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op);
 	bool to_trivial_mix_op(const SPIRType &type, std::string &op, uint32_t left, uint32_t right, uint32_t lerp);
@@ -446,41 +669,67 @@ class CompilerGLSL : public Compiler
 	void emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2,
 	                          const char *op);
 	void emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
+	void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
+	void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op);
 
 	void emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
 	                             SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type);
 	void emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
 	                              SPIRType::BaseType input_type, bool skip_cast_if_equal_type);
+	void emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+	                                        const char *op, SPIRType::BaseType input_type);
 	void emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2,
 	                               const char *op, SPIRType::BaseType input_type);
+	void emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+	                                     uint32_t op2, const char *op, SPIRType::BaseType expected_result_type,
+	                                     SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
+	                                     SPIRType::BaseType input_type2);
+	void emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2,
+	                             uint32_t op3, const char *op, SPIRType::BaseType offset_count_type);
 
 	void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
 	void emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op);
 	void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
-	void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
+	void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
+	                             bool negate, SPIRType::BaseType expected_type);
 	void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op,
-	                         SPIRType::BaseType input_type, bool skip_cast_if_equal_type);
+	                         SPIRType::BaseType input_type, bool skip_cast_if_equal_type, bool implicit_integer_promotion);
 
 	SPIRType binary_op_bitcast_helper(std::string &cast_op0, std::string &cast_op1, SPIRType::BaseType &input_type,
 	                                  uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type);
 
+	virtual bool emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0);
+
 	std::string to_ternary_expression(const SPIRType &result_type, uint32_t select, uint32_t true_value,
 	                                  uint32_t false_value);
 
 	void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
-	bool expression_is_forwarded(uint32_t id);
+	void emit_unary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op);
+	bool expression_is_forwarded(uint32_t id) const;
+	bool expression_suppresses_usage_tracking(uint32_t id) const;
+	bool expression_read_implies_multiple_reads(uint32_t id) const;
 	SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs,
 	                        bool suppress_usage_tracking = false);
 
+	void access_chain_internal_append_index(std::string &expr, uint32_t base, const SPIRType *type,
+	                                        AccessChainFlags flags, bool &access_chain_is_arrayed, uint32_t index);
+
 	std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags,
 	                                  AccessChainMeta *meta);
 
+	spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
+	virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base);
+
+	virtual void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type);
+	virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
+	                                                    spv::StorageClass storage, bool &is_packed);
+
 	std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
 	                         AccessChainMeta *meta = nullptr, bool ptr_chain = false);
 
 	std::string flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
 	                                   const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
-	                                   bool need_transpose);
+	                                   uint32_t array_stride, bool need_transpose);
 	std::string flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
 	                                          const SPIRType &target_type, uint32_t offset);
 	std::string flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
@@ -493,6 +742,7 @@ class CompilerGLSL : public Compiler
 	                                                               uint32_t count, uint32_t offset,
 	                                                               uint32_t word_stride, bool *need_transpose = nullptr,
 	                                                               uint32_t *matrix_stride = nullptr,
+	                                                               uint32_t *array_stride = nullptr,
 	                                                               bool ptr_chain = false);
 
 	const char *index_to_swizzle(uint32_t index);
@@ -501,39 +751,50 @@ class CompilerGLSL : public Compiler
 	void emit_uninitialized_temporary(uint32_t type, uint32_t id);
 	SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id);
 	void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist);
+	std::string to_non_uniform_aware_expression(uint32_t id);
 	std::string to_expression(uint32_t id, bool register_expression_read = true);
-	std::string to_composite_constructor_expression(uint32_t id);
+	std::string to_composite_constructor_expression(uint32_t id, bool block_like_type);
 	std::string to_rerolled_array_expression(const std::string &expr, const SPIRType &type);
 	std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true);
 	std::string to_unpacked_expression(uint32_t id, bool register_expression_read = true);
+	std::string to_unpacked_row_major_matrix_expression(uint32_t id);
 	std::string to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read = true);
 	std::string to_dereferenced_expression(uint32_t id, bool register_expression_read = true);
 	std::string to_pointer_expression(uint32_t id, bool register_expression_read = true);
 	std::string to_enclosed_pointer_expression(uint32_t id, bool register_expression_read = true);
 	std::string to_extract_component_expression(uint32_t id, uint32_t index);
+	std::string to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
+	                                                     const uint32_t *chain, uint32_t length);
 	std::string enclose_expression(const std::string &expr);
 	std::string dereference_expression(const SPIRType &expression_type, const std::string &expr);
 	std::string address_of_expression(const std::string &expr);
 	void strip_enclosed_expression(std::string &expr);
 	std::string to_member_name(const SPIRType &type, uint32_t index);
-	virtual std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain);
+	virtual std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain_is_resolved);
+	std::string to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices);
 	std::string type_to_glsl_constructor(const SPIRType &type);
 	std::string argument_decl(const SPIRFunction::Parameter &arg);
 	virtual std::string to_qualifiers_glsl(uint32_t id);
-	const char *to_precision_qualifiers_glsl(uint32_t id);
+	void fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var);
+	void emit_output_variable_initializer(const SPIRVariable &var);
+	std::string to_precision_qualifiers_glsl(uint32_t id);
 	virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var);
-	const char *flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags);
+	std::string flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags);
 	const char *format_to_glsl(spv::ImageFormat format);
 	virtual std::string layout_for_member(const SPIRType &type, uint32_t index);
 	virtual std::string to_interpolation_qualifiers(const Bitset &flags);
 	std::string layout_for_variable(const SPIRVariable &variable);
-	std::string to_combined_image_sampler(uint32_t image_id, uint32_t samp_id);
+	std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id);
 	virtual bool skip_argument(uint32_t id) const;
-	virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id);
+	virtual void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id,
+	                             spv::StorageClass lhs_storage, spv::StorageClass rhs_storage);
 	virtual void emit_block_hints(const SPIRBlock &block);
 	virtual std::string to_initializer_expression(const SPIRVariable &var);
+	virtual std::string to_zero_initialized_expression(uint32_t type_id);
+	bool type_can_zero_initialize(const SPIRType &type) const;
 
-	bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t start_offset = 0,
+	bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
+	                                uint32_t *failed_index = nullptr, uint32_t start_offset = 0,
 	                                uint32_t end_offset = ~(0u));
 	std::string buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout);
 
@@ -541,6 +802,7 @@ class CompilerGLSL : public Compiler
 	uint32_t type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing);
 	uint32_t type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing);
 	uint32_t type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing);
+	uint32_t type_to_location_count(const SPIRType &type) const;
 
 	std::string bitcast_glsl(const SPIRType &result_type, uint32_t arg);
 	virtual std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type);
@@ -557,23 +819,35 @@ class CompilerGLSL : public Compiler
 	bool check_atomic_image(uint32_t id);
 
 	virtual void replace_illegal_names();
+	void replace_illegal_names(const std::unordered_set<std::string> &keywords);
 	virtual void emit_entry_point_declarations();
 
 	void replace_fragment_output(SPIRVariable &var);
 	void replace_fragment_outputs();
-	bool check_explicit_lod_allowed(uint32_t lod);
-	std::string legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t lod, uint32_t id);
+	std::string legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t id);
+
+	void forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length);
+	void analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length);
+	Options::Precision analyze_expression_precision(const uint32_t *args, uint32_t length) const;
 
 	uint32_t indent = 0;
 
 	std::unordered_set<uint32_t> emitted_functions;
 
+	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
+	std::unordered_set<uint32_t> flushed_phi_variables;
+
 	std::unordered_set<uint32_t> flattened_buffer_blocks;
-	std::unordered_set<uint32_t> flattened_structs;
+	std::unordered_map<uint32_t, bool> flattened_structs;
+
+	ShaderSubgroupSupportHelper shader_subgroup_supporter;
 
-	std::string load_flattened_struct(SPIRVariable &var);
-	std::string to_flattened_struct_member(const SPIRVariable &var, uint32_t index);
-	void store_flattened_struct(SPIRVariable &var, uint32_t value);
+	std::string load_flattened_struct(const std::string &basename, const SPIRType &type);
+	std::string to_flattened_struct_member(const std::string &basename, const SPIRType &type, uint32_t index);
+	void store_flattened_struct(uint32_t lhs_id, uint32_t value);
+	void store_flattened_struct(const std::string &basename, uint32_t rhs, const SPIRType &type,
+	                            const SmallVector<uint32_t> &indices);
+	std::string to_flattened_access_chain_expression(uint32_t id);
 
 	// Usage tracking. If a temporary is used more than once, use the temporary instead to
 	// avoid AST explosion when SPIRV is generated with pure SSA and doesn't write stuff to variables.
@@ -588,6 +862,10 @@ class CompilerGLSL : public Compiler
 	// Currently used by NMin/Max/Clamp implementations.
 	std::unordered_map<uint32_t, uint32_t> extra_sub_expressions;
 
+	SmallVector<TypeID> workaround_ubo_load_overload_types;
+	void request_workaround_wrapper_overload(TypeID id);
+	void rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr);
+
 	uint32_t statement_count = 0;
 
 	inline bool is_legacy() const
@@ -605,6 +883,13 @@ class CompilerGLSL : public Compiler
 		return !options.es && options.version < 130;
 	}
 
+	bool requires_transpose_2x2 = false;
+	bool requires_transpose_3x3 = false;
+	bool requires_transpose_4x4 = false;
+	bool ray_tracing_is_khr = false;
+	bool barycentric_is_nv = false;
+	void ray_tracing_khr_fixup_locations();
+
 	bool args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure);
 	void register_call_out_argument(uint32_t id);
 	void register_impure_function_call();
@@ -618,6 +903,16 @@ class CompilerGLSL : public Compiler
 	void emit_pls();
 	void remap_pls_variables();
 
+	// GL_EXT_shader_framebuffer_fetch support.
+	std::vector<std::pair<uint32_t, uint32_t>> subpass_to_framebuffer_fetch_attachment;
+	std::vector<std::pair<uint32_t, bool>> inout_color_attachments;
+	bool location_is_framebuffer_fetch(uint32_t location) const;
+	bool location_is_non_coherent_framebuffer_fetch(uint32_t location) const;
+	bool subpass_input_is_framebuffer_fetch(uint32_t id) const;
+	void emit_inout_fragment_outputs_copy_to_subpass_inputs();
+	const SPIRVariable *find_subpass_input_by_attachment_index(uint32_t index) const;
+	const SPIRVariable *find_color_output_by_location(uint32_t location) const;
+
 	// A variant which takes two sets of name. The secondary is only used to verify there are no collisions,
 	// but the set is not updated when we have found a new name.
 	// Used primarily when adding block interface names.
@@ -626,8 +921,14 @@ class CompilerGLSL : public Compiler
 
 	void check_function_call_constraints(const uint32_t *args, uint32_t length);
 	void handle_invalid_expression(uint32_t id);
+	void force_temporary_and_recompile(uint32_t id);
 	void find_static_extensions();
 
+	uint32_t consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision);
+	std::unordered_map<uint32_t, uint32_t> temporary_to_mirror_precision_alias;
+	std::unordered_set<uint32_t> composite_insert_overwritten;
+	std::unordered_set<uint32_t> block_composite_insert_overwrite;
+
 	std::string emit_for_loop_initializers(const SPIRBlock &block);
 	void emit_while_loop_initializers(const SPIRBlock &block);
 	bool for_loop_initializers_are_same_type(const SPIRBlock &block);
@@ -636,10 +937,6 @@ class CompilerGLSL : public Compiler
 
 	bool type_is_empty(const SPIRType &type);
 
-	virtual void declare_undefined_values();
-
-	static std::string sanitize_underscores(const std::string &str);
-
 	bool can_use_io_location(spv::StorageClass storage, bool block);
 	const Instruction *get_next_instruction_in_block(const Instruction &instr);
 	static uint32_t mask_relevant_memory_semantics(uint32_t semantics);
@@ -652,16 +949,18 @@ class CompilerGLSL : public Compiler
 
 	// Builtins in GLSL are always specific signedness, but the SPIR-V can declare them
 	// as either unsigned or signed.
-	// Sometimes we will need to automatically perform bitcasts on load and store to make this work.
-	virtual void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type);
-	virtual void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type);
+	// Sometimes we will need to automatically perform casts on load and store to make this work.
+	virtual void cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type);
+	virtual void cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type);
 	void unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr);
-	void convert_non_uniform_expression(const SPIRType &type, std::string &expr);
+	bool unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id);
+	void convert_non_uniform_expression(std::string &expr, uint32_t ptr_id);
 
 	void handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id);
 	void disallow_forwarding_in_expression_chain(const SPIRExpression &expr);
 
 	bool expression_is_constant_null(uint32_t id) const;
+	bool expression_is_non_value_type_array(uint32_t ptr);
 	virtual void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression);
 
 	uint32_t get_integer_width_for_instruction(const Instruction &instr) const;
@@ -673,11 +972,29 @@ class CompilerGLSL : public Compiler
 
 	void fixup_type_alias();
 	void reorder_type_alias();
+	void fixup_anonymous_struct_names();
+	void fixup_anonymous_struct_names(std::unordered_set<uint32_t> &visited, const SPIRType &type);
+
+	static const char *vector_swizzle(int vecsize, int index);
 
-	void propagate_nonuniform_qualifier(uint32_t id);
+	bool is_stage_output_location_masked(uint32_t location, uint32_t component) const;
+	bool is_stage_output_builtin_masked(spv::BuiltIn builtin) const;
+	bool is_stage_output_variable_masked(const SPIRVariable &var) const;
+	bool is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const;
+	bool is_per_primitive_variable(const SPIRVariable &var) const;
+	uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const;
+	uint32_t get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const;
+	std::unordered_set<LocationComponentPair, InternalHasher> masked_output_locations;
+	std::unordered_set<uint32_t> masked_output_builtins;
 
 private:
 	void init();
+
+	SmallVector<ConstantID> get_composite_constant_ids(ConstantID const_id);
+	void fill_composite_constant(SPIRConstant &constant, TypeID type_id, const SmallVector<ConstantID> &initializers);
+	void set_composite_constant(ConstantID const_id, TypeID type_id, const SmallVector<ConstantID> &initializers);
+	TypeID get_composite_member_type(TypeID type_id, uint32_t member_idx);
+	std::unordered_map<uint32_t, SmallVector<ConstantID>> const_composite_insert_ids;
 };
 } // namespace SPIRV_CROSS_NAMESPACE
 
diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp
index 57bbef8b818..b3ba58041ae 100644
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2016-2019 Robert Konrad
+ * Copyright 2016-2021 Robert Konrad
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -12,6 +13,13 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
+ *
+ */
+
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
  */
 
 #include "spirv_hlsl.hpp"
@@ -23,6 +31,41 @@ using namespace spv;
 using namespace SPIRV_CROSS_NAMESPACE;
 using namespace std;
 
+enum class ImageFormatNormalizedState
+{
+	None = 0,
+	Unorm = 1,
+	Snorm = 2
+};
+
+static ImageFormatNormalizedState image_format_to_normalized_state(ImageFormat fmt)
+{
+	switch (fmt)
+	{
+	case ImageFormatR8:
+	case ImageFormatR16:
+	case ImageFormatRg8:
+	case ImageFormatRg16:
+	case ImageFormatRgba8:
+	case ImageFormatRgba16:
+	case ImageFormatRgb10A2:
+		return ImageFormatNormalizedState::Unorm;
+
+	case ImageFormatR8Snorm:
+	case ImageFormatR16Snorm:
+	case ImageFormatRg8Snorm:
+	case ImageFormatRg16Snorm:
+	case ImageFormatRgba8Snorm:
+	case ImageFormatRgba16Snorm:
+		return ImageFormatNormalizedState::Snorm;
+
+	default:
+		break;
+	}
+
+	return ImageFormatNormalizedState::None;
+}
+
 static unsigned image_format_to_components(ImageFormat fmt)
 {
 	switch (fmt)
@@ -203,13 +246,15 @@ static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype)
 	}
 }
 
-string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
+string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id)
 {
 	auto &imagetype = get<SPIRType>(type.image.type);
 	const char *dim = nullptr;
 	bool typed_load = false;
 	uint32_t components = 4;
 
+	bool force_image_srv = hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(id, DecorationNonWritable);
+
 	switch (type.image.dim)
 	{
 	case Dim1D:
@@ -235,7 +280,19 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
 		if (type.image.sampled == 1)
 			return join("Buffer<", type_to_glsl(imagetype), components, ">");
 		else if (type.image.sampled == 2)
-			return join("RWBuffer<", image_format_to_type(type.image.format, imagetype.basetype), ">");
+		{
+			if (interlocked_resources.count(id))
+				return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype),
+				            ">");
+
+			typed_load = !force_image_srv && type.image.sampled == 2;
+
+			const char *rw = force_image_srv ? "" : "RW";
+			return join(rw, "Buffer<",
+			            typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
+			                         join(type_to_glsl(imagetype), components),
+			            ">");
+		}
 		else
 			SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
 	case DimSubpassData:
@@ -247,14 +304,21 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
 	}
 	const char *arrayed = type.image.arrayed ? "Array" : "";
 	const char *ms = type.image.ms ? "MS" : "";
-	const char *rw = typed_load ? "RW" : "";
+	const char *rw = typed_load && !force_image_srv ? "RW" : "";
+
+	if (force_image_srv)
+		typed_load = false;
+
+	if (typed_load && interlocked_resources.count(id))
+		rw = "RasterizerOrdered";
+
 	return join(rw, "Texture", dim, ms, arrayed, "<",
 	            typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
 	                         join(type_to_glsl(imagetype), components),
 	            ">");
 }
 
-string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t id)
+string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t /*id*/)
 {
 	auto &imagetype = get<SPIRType>(type.image.type);
 	string res;
@@ -317,8 +381,6 @@ string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t id)
 		res += "MS";
 	if (type.image.arrayed)
 		res += "Array";
-	if (image_is_comparison(type, id))
-		res += "Shadow";
 
 	return res;
 }
@@ -374,15 +436,36 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		case SPIRType::AtomicCounter:
 			return "atomic_uint";
 		case SPIRType::Half:
-			return "min16float";
+			if (hlsl_options.enable_16bit_types)
+				return "half";
+			else
+				return "min16float";
+		case SPIRType::Short:
+			if (hlsl_options.enable_16bit_types)
+				return "int16_t";
+			else
+				return "min16int";
+		case SPIRType::UShort:
+			if (hlsl_options.enable_16bit_types)
+				return "uint16_t";
+			else
+				return "min16uint";
 		case SPIRType::Float:
 			return "float";
 		case SPIRType::Double:
 			return "double";
 		case SPIRType::Int64:
+			if (hlsl_options.shader_model < 60)
+				SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0.");
 			return "int64_t";
 		case SPIRType::UInt64:
+			if (hlsl_options.shader_model < 60)
+				SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0.");
 			return "uint64_t";
+		case SPIRType::AccelerationStructure:
+			return "RaytracingAccelerationStructure";
+		case SPIRType::RayQuery:
+			return "RayQuery<RAY_FLAG_NONE>";
 		default:
 			return "???";
 		}
@@ -398,7 +481,11 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		case SPIRType::UInt:
 			return join("uint", type.vecsize);
 		case SPIRType::Half:
-			return join("min16float", type.vecsize);
+			return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.vecsize);
+		case SPIRType::Short:
+			return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.vecsize);
+		case SPIRType::UShort:
+			return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.vecsize);
 		case SPIRType::Float:
 			return join("float", type.vecsize);
 		case SPIRType::Double:
@@ -422,7 +509,11 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		case SPIRType::UInt:
 			return join("uint", type.columns, "x", type.vecsize);
 		case SPIRType::Half:
-			return join("min16float", type.columns, "x", type.vecsize);
+			return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.columns, "x", type.vecsize);
+		case SPIRType::Short:
+			return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.columns, "x", type.vecsize);
+		case SPIRType::UShort:
+			return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.columns, "x", type.vecsize);
 		case SPIRType::Float:
 			return join("float", type.columns, "x", type.vecsize);
 		case SPIRType::Double:
@@ -483,10 +574,17 @@ void CompilerHLSL::emit_builtin_outputs_in_struct()
 		switch (builtin)
 		{
 		case BuiltInPosition:
-			type = "float4";
+			type = is_position_invariant() && backend.support_precise_qualifier ? "precise float4" : "float4";
 			semantic = legacy ? "POSITION" : "SV_Position";
 			break;
 
+		case BuiltInSampleMask:
+			if (hlsl_options.shader_model < 41 || execution.model != ExecutionModelFragment)
+				SPIRV_CROSS_THROW("Sample Mask output is only supported in PS 4.1 or higher.");
+			type = "uint";
+			semantic = "SV_Coverage";
+			break;
+
 		case BuiltInFragDepth:
 			type = "float";
 			if (legacy)
@@ -505,36 +603,80 @@ void CompilerHLSL::emit_builtin_outputs_in_struct()
 			break;
 
 		case BuiltInClipDistance:
+		{
+			static const char *types[] = { "float", "float2", "float3", "float4" };
+
 			// HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors.
-			for (uint32_t clip = 0; clip < clip_distance_count; clip += 4)
+			if (execution.model == ExecutionModelMeshEXT)
 			{
-				uint32_t to_declare = clip_distance_count - clip;
-				if (to_declare > 4)
-					to_declare = 4;
+				if (clip_distance_count > 4)
+					SPIRV_CROSS_THROW("Clip distance count > 4 not supported for mesh shaders.");
 
-				uint32_t semantic_index = clip / 4;
+				if (clip_distance_count == 1)
+				{
+					// Avoids having to hack up access_chain code. Makes it trivially indexable.
+					statement("float gl_ClipDistance[1] : SV_ClipDistance;");
+				}
+				else
+				{
+					// Replace array with vector directly, avoids any weird fixup path.
+					statement(types[clip_distance_count - 1], " gl_ClipDistance : SV_ClipDistance;");
+				}
+			}
+			else
+			{
+				for (uint32_t clip = 0; clip < clip_distance_count; clip += 4)
+				{
+					uint32_t to_declare = clip_distance_count - clip;
+					if (to_declare > 4)
+						to_declare = 4;
 
-				static const char *types[] = { "float", "float2", "float3", "float4" };
-				statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
-				          " : SV_ClipDistance", semantic_index, ";");
+					uint32_t semantic_index = clip / 4;
+
+					statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
+					          " : SV_ClipDistance", semantic_index, ";");
+				}
 			}
 			break;
+		}
 
 		case BuiltInCullDistance:
+		{
+			static const char *types[] = { "float", "float2", "float3", "float4" };
+
 			// HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors.
-			for (uint32_t cull = 0; cull < cull_distance_count; cull += 4)
+			if (execution.model == ExecutionModelMeshEXT)
 			{
-				uint32_t to_declare = cull_distance_count - cull;
-				if (to_declare > 4)
-					to_declare = 4;
+				if (cull_distance_count > 4)
+					SPIRV_CROSS_THROW("Cull distance count > 4 not supported for mesh shaders.");
 
-				uint32_t semantic_index = cull / 4;
+				if (cull_distance_count == 1)
+				{
+					// Avoids having to hack up access_chain code. Makes it trivially indexable.
+					statement("float gl_CullDistance[1] : SV_CullDistance;");
+				}
+				else
+				{
+					// Replace array with vector directly, avoids any weird fixup path.
+					statement(types[cull_distance_count - 1], " gl_CullDistance : SV_CullDistance;");
+				}
+			}
+			else
+			{
+				for (uint32_t cull = 0; cull < cull_distance_count; cull += 4)
+				{
+					uint32_t to_declare = cull_distance_count - cull;
+					if (to_declare > 4)
+						to_declare = 4;
 
-				static const char *types[] = { "float", "float2", "float3", "float4" };
-				statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
-				          " : SV_CullDistance", semantic_index, ";");
+					uint32_t semantic_index = cull / 4;
+
+					statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index,
+					          " : SV_CullDistance", semantic_index, ";");
+				}
 			}
 			break;
+		}
 
 		case BuiltInPointSize:
 			// If point_size_compat is enabled, just ignore PointSize.
@@ -545,8 +687,69 @@ void CompilerHLSL::emit_builtin_outputs_in_struct()
 			else
 				SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
 
+		case BuiltInLayer:
+		case BuiltInPrimitiveId:
+		case BuiltInViewportIndex:
+		case BuiltInPrimitiveShadingRateKHR:
+		case BuiltInCullPrimitiveEXT:
+			// per-primitive attributes handled separatly
+			break;
+
+		case BuiltInPrimitivePointIndicesEXT:
+		case BuiltInPrimitiveLineIndicesEXT:
+		case BuiltInPrimitiveTriangleIndicesEXT:
+			// meshlet local-index buffer handled separatly
+			break;
+
 		default:
 			SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
+		}
+
+		if (type && semantic)
+			statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";");
+	    });
+}
+
+void CompilerHLSL::emit_builtin_primitive_outputs_in_struct()
+{
+	active_output_builtins.for_each_bit([&](uint32_t i) {
+		const char *type = nullptr;
+		const char *semantic = nullptr;
+		auto builtin = static_cast<BuiltIn>(i);
+		switch (builtin)
+		{
+		case BuiltInLayer:
+		{
+			const ExecutionModel model = get_entry_point().model;
+			if (hlsl_options.shader_model < 50 ||
+			    (model != ExecutionModelGeometry && model != ExecutionModelMeshEXT))
+				SPIRV_CROSS_THROW("Render target array index output is only supported in GS/MS 5.0 or higher.");
+			type = "uint";
+			semantic = "SV_RenderTargetArrayIndex";
+			break;
+		}
+
+		case BuiltInPrimitiveId:
+			type = "uint";
+			semantic = "SV_PrimitiveID";
+			break;
+
+		case BuiltInViewportIndex:
+			type = "uint";
+			semantic = "SV_ViewportArrayIndex";
+			break;
+
+		case BuiltInPrimitiveShadingRateKHR:
+			type = "uint";
+			semantic = "SV_ShadingRate";
+			break;
+
+		case BuiltInCullPrimitiveEXT:
+			type = "bool";
+			semantic = "SV_CullPrimitive";
+			break;
+
+		default:
 			break;
 		}
 
@@ -577,6 +780,11 @@ void CompilerHLSL::emit_builtin_inputs_in_struct()
 			semantic = "SV_VertexID";
 			break;
 
+		case BuiltInPrimitiveId:
+			type = "uint";
+			semantic = "SV_PrimitiveID";
+			break;
+
 		case BuiltInInstanceId:
 		case BuiltInInstanceIndex:
 			if (legacy)
@@ -592,6 +800,13 @@ void CompilerHLSL::emit_builtin_inputs_in_struct()
 			semantic = "SV_SampleIndex";
 			break;
 
+		case BuiltInSampleMask:
+			if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
+				SPIRV_CROSS_THROW("Sample Mask input is only supported in PS 5.0 or higher.");
+			type = "uint";
+			semantic = "SV_Coverage";
+			break;
+
 		case BuiltInGlobalInvocationId:
 			type = "uint3";
 			semantic = "SV_DispatchThreadID";
@@ -617,6 +832,13 @@ void CompilerHLSL::emit_builtin_inputs_in_struct()
 			semantic = "SV_IsFrontFace";
 			break;
 
+		case BuiltInViewIndex:
+			if (hlsl_options.shader_model < 61 || (get_entry_point().model != ExecutionModelVertex && get_entry_point().model != ExecutionModelFragment))
+				SPIRV_CROSS_THROW("View Index input is only supported in VS and PS 6.1 or higher.");
+			type = "uint";
+			semantic = "SV_ViewID";
+			break;
+
 		case BuiltInNumWorkgroups:
 		case BuiltInSubgroupSize:
 		case BuiltInSubgroupLocalInvocationId:
@@ -625,9 +847,16 @@ void CompilerHLSL::emit_builtin_inputs_in_struct()
 		case BuiltInSubgroupLeMask:
 		case BuiltInSubgroupGtMask:
 		case BuiltInSubgroupGeMask:
+		case BuiltInBaseVertex:
+		case BuiltInBaseInstance:
 			// Handled specially.
 			break;
 
+		case BuiltInHelperInvocation:
+			if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
+				SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher.");
+			break;
+
 		case BuiltInClipDistance:
 			// HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors.
 			for (uint32_t clip = 0; clip < clip_distance_count; clip += 4)
@@ -667,9 +896,15 @@ void CompilerHLSL::emit_builtin_inputs_in_struct()
 			else
 				SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
 
+		case BuiltInLayer:
+			if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
+				SPIRV_CROSS_THROW("Render target array index input is only supported in PS 5.0 or higher.");
+			type = "uint";
+			semantic = "SV_RenderTargetArrayIndex";
+			break;
+
 		default:
 			SPIRV_CROSS_THROW("Unsupported builtin in HLSL.");
-			break;
 		}
 
 		if (type && semantic)
@@ -695,7 +930,7 @@ uint32_t CompilerHLSL::type_to_consumed_locations(const SPIRType &type) const
 			if (type.array_size_literal[i])
 				array_multiplier *= type.array[i];
 			else
-				array_multiplier *= get<SPIRConstant>(type.array[i]).scalar();
+				array_multiplier *= evaluate_constant_u32(type.array[i]);
 		}
 		elements += array_multiplier * type.columns;
 	}
@@ -717,8 +952,8 @@ string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags)
 		res += "patch "; // Seems to be different in actual HLSL.
 	if (flags.get(DecorationSample))
 		res += "sample ";
-	if (flags.get(DecorationInvariant))
-		res += "invariant "; // Not supported?
+	if (flags.get(DecorationInvariant) && backend.support_precise_qualifier)
+		res += "precise "; // Not supported?
 
 	return res;
 }
@@ -738,48 +973,40 @@ std::string CompilerHLSL::to_semantic(uint32_t location, ExecutionModel em, Stor
 	return join("TEXCOORD", location);
 }
 
-void CompilerHLSL::emit_io_block(const SPIRVariable &var)
+std::string CompilerHLSL::to_initializer_expression(const SPIRVariable &var)
 {
-	auto &execution = get_entry_point();
-
+	// We cannot emit static const initializer for block constants for practical reasons,
+	// so just inline the initializer.
+	// FIXME: There is a theoretical problem here if someone tries to composite extract
+	// into this initializer since we don't declare it properly, but that is somewhat non-sensical.
 	auto &type = get<SPIRType>(var.basetype);
-	add_resource_name(type.self);
-
-	statement("struct ", to_name(type.self));
-	begin_scope();
-	type.member_name_cache.clear();
-
-	uint32_t base_location = get_decoration(var.self, DecorationLocation);
-
-	for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
-	{
-		string semantic;
-		if (has_member_decoration(type.self, i, DecorationLocation))
-		{
-			uint32_t location = get_member_decoration(type.self, i, DecorationLocation);
-			semantic = join(" : ", to_semantic(location, execution.model, var.storage));
-		}
-		else
-		{
-			// If the block itself has a location, but not its members, use the implicit location.
-			// There could be a conflict if the block members partially specialize the locations.
-			// It is unclear how SPIR-V deals with this. Assume this does not happen for now.
-			uint32_t location = base_location + i;
-			semantic = join(" : ", to_semantic(location, execution.model, var.storage));
-		}
-
-		add_member_name(type, i);
-
-		auto &membertype = get<SPIRType>(type.member_types[i]);
-		statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, i)),
-		          variable_decl(membertype, to_member_name(type, i)), semantic, ";");
-	}
-
-	end_scope_decl();
-	statement("");
+	bool is_block = has_decoration(type.self, DecorationBlock);
+	auto *c = maybe_get<SPIRConstant>(var.initializer);
+	if (is_block && c)
+		return constant_expression(*c);
+	else
+		return CompilerGLSL::to_initializer_expression(var);
+}
 
-	statement("static ", variable_decl(var), ";");
-	statement("");
+void CompilerHLSL::emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index,
+                                                         uint32_t location,
+                                                         std::unordered_set<uint32_t> &active_locations)
+{
+	auto &execution = get_entry_point();
+	auto type = get<SPIRType>(var.basetype);
+	auto semantic = to_semantic(location, execution.model, var.storage);
+	auto mbr_name = join(to_name(type.self), "_", to_member_name(type, member_index));
+	auto &mbr_type = get<SPIRType>(type.member_types[member_index]);
+
+	statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, member_index)),
+	          type_to_glsl(mbr_type),
+	          " ", mbr_name, type_to_array_glsl(mbr_type),
+	          " : ", semantic, ";");
+
+	// Structs and arrays should consume more locations.
+	uint32_t consumed_locations = type_to_consumed_locations(mbr_type);
+	for (uint32_t i = 0; i < consumed_locations; i++)
+		active_locations.insert(location + i);
 }
 
 void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unordered_set<uint32_t> &active_locations)
@@ -814,7 +1041,6 @@ void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unord
 
 	bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex;
 
-	auto &m = ir.meta[var.self].decoration;
 	auto name = to_name(var.self);
 	if (use_location_number)
 	{
@@ -822,8 +1048,8 @@ void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unord
 
 		// If an explicit location exists, use it with TEXCOORD[N] semantic.
 		// Otherwise, pick a vacant location.
-		if (m.decoration_flags.get(DecorationLocation))
-			location_number = m.location;
+		if (has_decoration(var.self, DecorationLocation))
+			location_number = get_decoration(var.self, DecorationLocation);
 		else
 			location_number = get_vacant_location();
 
@@ -840,24 +1066,39 @@ void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unord
 			{
 				SPIRType newtype = type;
 				newtype.columns = 1;
+
+				string effective_semantic;
+				if (hlsl_options.flatten_matrix_vertex_input_semantics)
+					effective_semantic = to_semantic(location_number, execution.model, var.storage);
+				else
+					effective_semantic = join(semantic, "_", i);
+
 				statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)),
-				          variable_decl(newtype, join(name, "_", i)), " : ", semantic, "_", i, ";");
+				          variable_decl(newtype, join(name, "_", i)), " : ", effective_semantic, ";");
 				active_locations.insert(location_number++);
 			}
 		}
 		else
 		{
-			statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(type, name), " : ",
+			auto decl_type = type;
+			if (execution.model == ExecutionModelMeshEXT)
+			{
+				decl_type.array.erase(decl_type.array.begin());
+				decl_type.array_size_literal.erase(decl_type.array_size_literal.begin());
+			}
+			statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(decl_type, name), " : ",
 			          semantic, ";");
 
 			// Structs and arrays should consume more locations.
-			uint32_t consumed_locations = type_to_consumed_locations(type);
+			uint32_t consumed_locations = type_to_consumed_locations(decl_type);
 			for (uint32_t i = 0; i < consumed_locations; i++)
 				active_locations.insert(location_number + i);
 		}
 	}
 	else
+	{
 		statement(variable_decl(type, name), " : ", binding, ";");
+	}
 }
 
 std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage)
@@ -876,7 +1117,9 @@ std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClas
 
 		auto &var = get<SPIRVariable>(num_workgroups_builtin);
 		auto &type = get<SPIRType>(var.basetype);
-		return sanitize_underscores(join(to_name(num_workgroups_builtin), "_", get_member_name(type.self, 0)));
+		auto ret = join(to_name(num_workgroups_builtin), "_", get_member_name(type.self, 0));
+		ParsedIR::sanitize_underscores(ret);
+		return ret;
 	}
 	case BuiltInPointCoord:
 		// Crude hack, but there is no real alternative. This path is only enabled if point_coord_compat is set.
@@ -885,6 +1128,8 @@ std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClas
 		return "WaveGetLaneIndex()";
 	case BuiltInSubgroupSize:
 		return "WaveGetLaneCount()";
+	case BuiltInHelperInvocation:
+		return "IsHelperLane()";
 
 	default:
 		return CompilerGLSL::builtin_to_glsl(builtin, storage);
@@ -896,7 +1141,31 @@ void CompilerHLSL::emit_builtin_variables()
 	Bitset builtins = active_input_builtins;
 	builtins.merge_or(active_output_builtins);
 
-	bool need_base_vertex_info = false;
+	std::unordered_map<uint32_t, ID> builtin_to_initializer;
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+		if (!is_builtin_variable(var) || var.storage != StorageClassOutput || !var.initializer)
+			return;
+
+		auto *c = this->maybe_get<SPIRConstant>(var.initializer);
+		if (!c)
+			return;
+
+		auto &type = this->get<SPIRType>(var.basetype);
+		if (type.basetype == SPIRType::Struct)
+		{
+			uint32_t member_count = uint32_t(type.member_types.size());
+			for (uint32_t i = 0; i < member_count; i++)
+			{
+				if (has_member_decoration(type.self, i, DecorationBuiltIn))
+				{
+					builtin_to_initializer[get_member_decoration(type.self, i, DecorationBuiltIn)] =
+						c->subconstants[i];
+				}
+			}
+		}
+		else if (has_decoration(var.self, DecorationBuiltIn))
+			builtin_to_initializer[get_decoration(var.self, DecorationBuiltIn)] = var.initializer;
+	});
 
 	// Emit global variables for the interface variables which are statically used by the shader.
 	builtins.for_each_bit([&](uint32_t i) {
@@ -904,6 +1173,23 @@ void CompilerHLSL::emit_builtin_variables()
 		auto builtin = static_cast<BuiltIn>(i);
 		uint32_t array_size = 0;
 
+		string init_expr;
+		auto init_itr = builtin_to_initializer.find(builtin);
+		if (init_itr != builtin_to_initializer.end())
+			init_expr = join(" = ", to_expression(init_itr->second));
+
+		if (get_execution_model() == ExecutionModelMeshEXT)
+		{
+			if (builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
+			    builtin == BuiltInCullDistance || builtin == BuiltInLayer || builtin == BuiltInPrimitiveId ||
+			    builtin == BuiltInViewportIndex || builtin == BuiltInCullPrimitiveEXT ||
+			    builtin == BuiltInPrimitiveShadingRateKHR || builtin == BuiltInPrimitivePointIndicesEXT ||
+			    builtin == BuiltInPrimitiveLineIndicesEXT || builtin == BuiltInPrimitiveTriangleIndicesEXT)
+			{
+				return;
+			}
+		}
+
 		switch (builtin)
 		{
 		case BuiltInFragCoord:
@@ -920,7 +1206,13 @@ void CompilerHLSL::emit_builtin_variables()
 		case BuiltInInstanceIndex:
 			type = "int";
 			if (hlsl_options.support_nonzero_base_vertex_base_instance)
-				need_base_vertex_info = true;
+				base_vertex_info.used = true;
+			break;
+
+		case BuiltInBaseVertex:
+		case BuiltInBaseInstance:
+			type = "int";
+			base_vertex_info.used = true;
 			break;
 
 		case BuiltInInstanceId:
@@ -973,6 +1265,11 @@ void CompilerHLSL::emit_builtin_variables()
 			type = "uint4";
 			break;
 
+		case BuiltInHelperInvocation:
+			if (hlsl_options.shader_model < 50)
+				SPIRV_CROSS_THROW("Need SM 5.0 for Helper Invocation.");
+			break;
+
 		case BuiltInClipDistance:
 			array_size = clip_distance_count;
 			type = "float";
@@ -983,26 +1280,56 @@ void CompilerHLSL::emit_builtin_variables()
 			type = "float";
 			break;
 
+		case BuiltInSampleMask:
+			type = "int";
+			break;
+
+		case BuiltInPrimitiveId:
+		case BuiltInViewIndex:
+		case BuiltInLayer:
+			type = "uint";
+			break;
+
+		case BuiltInViewportIndex:
+		case BuiltInPrimitiveShadingRateKHR:
+		case BuiltInPrimitiveLineIndicesEXT:
+		case BuiltInCullPrimitiveEXT:
+			type = "uint";
+			break;
+
 		default:
 			SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin)));
 		}
 
 		StorageClass storage = active_input_builtins.get(i) ? StorageClassInput : StorageClassOutput;
-		// FIXME: SampleMask can be both in and out with sample builtin,
-		// need to distinguish that when we add support for that.
 
 		if (type)
 		{
 			if (array_size)
-				statement("static ", type, " ", builtin_to_glsl(builtin, storage), "[", array_size, "];");
+				statement("static ", type, " ", builtin_to_glsl(builtin, storage), "[", array_size, "]", init_expr, ";");
 			else
-				statement("static ", type, " ", builtin_to_glsl(builtin, storage), ";");
+				statement("static ", type, " ", builtin_to_glsl(builtin, storage), init_expr, ";");
+		}
+
+		// SampleMask can be both in and out with sample builtin, in this case we have already
+		// declared the input variable and we need to add the output one now.
+		if (builtin == BuiltInSampleMask && storage == StorageClassInput && this->active_output_builtins.get(i))
+		{
+			statement("static ", type, " ", this->builtin_to_glsl(builtin, StorageClassOutput), init_expr, ";");
 		}
 	});
 
-	if (need_base_vertex_info)
+	if (base_vertex_info.used)
 	{
-		statement("cbuffer SPIRV_Cross_VertexInfo");
+		string binding_info;
+		if (base_vertex_info.explicit_binding)
+		{
+			binding_info = join(" : register(b", base_vertex_info.register_index);
+			if (base_vertex_info.register_space)
+				binding_info += join(", space", base_vertex_info.register_space);
+			binding_info += ")";
+		}
+		statement("cbuffer SPIRV_Cross_VertexInfo", binding_info);
 		begin_scope();
 		statement("int SPIRV_Cross_BaseVertex;");
 		statement("int SPIRV_Cross_BaseInstance;");
@@ -1011,6 +1338,30 @@ void CompilerHLSL::emit_builtin_variables()
 	}
 }
 
+void CompilerHLSL::set_hlsl_aux_buffer_binding(HLSLAuxBinding binding, uint32_t register_index, uint32_t register_space)
+{
+	if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE)
+	{
+		base_vertex_info.explicit_binding = true;
+		base_vertex_info.register_space = register_space;
+		base_vertex_info.register_index = register_index;
+	}
+}
+
+void CompilerHLSL::unset_hlsl_aux_buffer_binding(HLSLAuxBinding binding)
+{
+	if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE)
+		base_vertex_info.explicit_binding = false;
+}
+
+bool CompilerHLSL::is_hlsl_aux_buffer_binding_used(HLSLAuxBinding binding) const
+{
+	if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE)
+		return base_vertex_info.used;
+	else
+		return false;
+}
+
 void CompilerHLSL::emit_composite_constants()
 {
 	// HLSL cannot declare structs or arrays inline, so we must move them out to
@@ -1022,8 +1373,13 @@ void CompilerHLSL::emit_composite_constants()
 			return;
 
 		auto &type = this->get<SPIRType>(c.constant_type);
+
+		if (type.basetype == SPIRType::Struct && is_builtin_type(type))
+			return;
+
 		if (type.basetype == SPIRType::Struct || !type.array.empty())
 		{
+			add_resource_name(c.self);
 			auto name = to_name(c.self);
 			statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";");
 			emitted = true;
@@ -1038,9 +1394,22 @@ void CompilerHLSL::emit_specialization_constants_and_structs()
 {
 	bool emitted = false;
 	SpecializationConstant wg_x, wg_y, wg_z;
-	uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
+	ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
+
+	std::unordered_set<TypeID> io_block_types;
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
+		auto &type = this->get<SPIRType>(var.basetype);
+		if ((var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
+		    !var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
+		    interface_variable_exists_in_entry_point(var.self) &&
+		    has_decoration(type.self, DecorationBlock))
+		{
+			io_block_types.insert(type.self);
+		}
+	});
 
-	for (auto &id_ : ir.ids_for_constant_or_type)
+	auto loop_lock = ir.create_loop_hard_lock();
+	for (auto &id_ : ir.ids_for_constant_undef_or_type)
 	{
 		auto &id = ir.ids[id_];
 
@@ -1057,16 +1426,23 @@ void CompilerHLSL::emit_specialization_constants_and_structs()
 			else if (c.specialization)
 			{
 				auto &type = get<SPIRType>(c.constant_type);
+				add_resource_name(c.self);
 				auto name = to_name(c.self);
 
-				// HLSL does not support specialization constants, so fallback to macros.
-				c.specialization_constant_macro_name =
-				    constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
+				if (has_decoration(c.self, DecorationSpecId))
+				{
+					// HLSL does not support specialization constants, so fallback to macros.
+					c.specialization_constant_macro_name =
+							constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
+
+					statement("#ifndef ", c.specialization_constant_macro_name);
+					statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c));
+					statement("#endif");
+					statement("static const ", variable_decl(type, name), " = ", c.specialization_constant_macro_name, ";");
+				}
+				else
+					statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";");
 
-				statement("#ifndef ", c.specialization_constant_macro_name);
-				statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c));
-				statement("#endif");
-				statement("static const ", variable_decl(type, name), " = ", c.specialization_constant_macro_name, ";");
 				emitted = true;
 			}
 		}
@@ -1074,6 +1450,7 @@ void CompilerHLSL::emit_specialization_constants_and_structs()
 		{
 			auto &c = id.get<SPIRConstantOp>();
 			auto &type = get<SPIRType>(c.basetype);
+			add_resource_name(c.self);
 			auto name = to_name(c.self);
 			statement("static const ", variable_decl(type, name), " = ", constant_op_expression(c), ";");
 			emitted = true;
@@ -1081,9 +1458,11 @@ void CompilerHLSL::emit_specialization_constants_and_structs()
 		else if (id.get_type() == TypeType)
 		{
 			auto &type = id.get<SPIRType>();
-			if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
-			    (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) &&
-			     !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
+			bool is_non_io_block = has_decoration(type.self, DecorationBlock) &&
+			                       io_block_types.count(type.self) == 0;
+			bool is_buffer_block = has_decoration(type.self, DecorationBufferBlock);
+			if (type.basetype == SPIRType::Struct && type.array.empty() &&
+			    !type.pointer && !is_non_io_block && !is_buffer_block)
 			{
 				if (emitted)
 					statement("");
@@ -1092,6 +1471,21 @@ void CompilerHLSL::emit_specialization_constants_and_structs()
 				emit_struct(type);
 			}
 		}
+		else if (id.get_type() == TypeUndef)
+		{
+			auto &undef = id.get<SPIRUndef>();
+			auto &type = this->get<SPIRType>(undef.basetype);
+			// OpUndef can be void for some reason ...
+			if (type.basetype == SPIRType::Void)
+				return;
+
+			string initializer;
+			if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
+				initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
+
+			statement("static ", variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
+			emitted = true;
+		}
 	}
 
 	if (emitted)
@@ -1102,18 +1496,36 @@ void CompilerHLSL::replace_illegal_names()
 {
 	static const unordered_set<string> keywords = {
 		// Additional HLSL specific keywords.
-		"line", "linear", "matrix", "point", "row_major", "sampler",
+		// From https://docs.microsoft.com/en-US/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-keywords
+		"AppendStructuredBuffer", "asm", "asm_fragment",
+		"BlendState", "bool", "break", "Buffer", "ByteAddressBuffer",
+		"case", "cbuffer", "centroid", "class", "column_major", "compile",
+		"compile_fragment", "CompileShader", "const", "continue", "ComputeShader",
+		"ConsumeStructuredBuffer",
+		"default", "DepthStencilState", "DepthStencilView", "discard", "do",
+		"double", "DomainShader", "dword",
+		"else", "export", "false", "float", "for", "fxgroup",
+		"GeometryShader", "groupshared", "half", "HullShader",
+		"indices", "if", "in", "inline", "inout", "InputPatch", "int", "interface",
+		"line", "lineadj", "linear", "LineStream",
+		"matrix", "min16float", "min10float", "min16int", "min16uint",
+		"namespace", "nointerpolation", "noperspective", "NULL",
+		"out", "OutputPatch",
+		"payload", "packoffset", "pass", "pixelfragment", "PixelShader", "point",
+		"PointStream", "precise", "RasterizerState", "RenderTargetView",
+		"return", "register", "row_major", "RWBuffer", "RWByteAddressBuffer",
+		"RWStructuredBuffer", "RWTexture1D", "RWTexture1DArray", "RWTexture2D",
+		"RWTexture2DArray", "RWTexture3D", "sample", "sampler", "SamplerState",
+		"SamplerComparisonState", "shared", "snorm", "stateblock", "stateblock_state",
+		"static", "string", "struct", "switch", "StructuredBuffer", "tbuffer",
+		"technique", "technique10", "technique11", "texture", "Texture1D",
+		"Texture1DArray", "Texture2D", "Texture2DArray", "Texture2DMS", "Texture2DMSArray",
+		"Texture3D", "TextureCube", "TextureCubeArray", "true", "typedef", "triangle",
+		"triangleadj", "TriangleStream", "uint", "uniform", "unorm", "unsigned",
+		"vector", "vertexfragment", "VertexShader", "vertices", "void", "volatile", "while",
 	};
 
-	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
-		if (!is_hidden_variable(var))
-		{
-			auto &m = ir.meta[var.self].decoration;
-			if (keywords.find(m.alias) != end(keywords))
-				m.alias = join("_", m.alias);
-		}
-	});
-
+	CompilerGLSL::replace_illegal_names(keywords);
 	CompilerGLSL::replace_illegal_names();
 }
 
@@ -1123,6 +1535,19 @@ void CompilerHLSL::emit_resources()
 
 	replace_illegal_names();
 
+	switch (execution.model)
+	{
+	case ExecutionModelGeometry:
+	case ExecutionModelTessellationControl:
+	case ExecutionModelTessellationEvaluation:
+	case ExecutionModelMeshEXT:
+		fixup_implicit_builtin_block_names(execution.model);
+		break;
+
+	default:
+		break;
+	}
+
 	emit_specialization_constants_and_structs();
 	emit_composite_constants();
 
@@ -1155,7 +1580,8 @@ void CompilerHLSL::emit_resources()
 		}
 	});
 
-	if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= 30)
+	if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= 30 &&
+	    active_output_builtins.get(BuiltInPosition))
 	{
 		statement("uniform float4 gl_HalfPixel;");
 		emitted = true;
@@ -1179,7 +1605,8 @@ void CompilerHLSL::emit_resources()
 		}
 
 		if (var.storage != StorageClassFunction && !is_builtin_variable(var) && !var.remapped_variable &&
-		    type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter))
+		    type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter) &&
+		    !is_hidden_variable(var))
 		{
 			emit_uniform(var);
 			emitted = true;
@@ -1193,22 +1620,21 @@ void CompilerHLSL::emit_resources()
 	// Emit builtin input and output variables here.
 	emit_builtin_variables();
 
-	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
-		auto &type = this->get<SPIRType>(var.basetype);
-		bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
+	if (execution.model != ExecutionModelMeshEXT)
+	{
+		ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+			auto &type = this->get<SPIRType>(var.basetype);
 
-		// Do not emit I/O blocks here.
-		// I/O blocks can be arrayed, so we must deal with them separately to support geometry shaders
-		// and tessellation down the line.
-		if (!block && var.storage != StorageClassFunction && !var.remapped_variable && type.pointer &&
-		    (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) &&
-		    interface_variable_exists_in_entry_point(var.self))
-		{
-			// Only emit non-builtins which are not blocks here. Builtin variables are handled separately.
-			emit_interface_block_globally(var);
-			emitted = true;
-		}
-	});
+			if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer &&
+			   (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) &&
+			   interface_variable_exists_in_entry_point(var.self))
+			{
+				// Builtin variables are handled separately.
+				emit_interface_block_globally(var);
+				emitted = true;
+			}
+		});
+	}
 
 	if (emitted)
 		statement("");
@@ -1218,69 +1644,72 @@ void CompilerHLSL::emit_resources()
 	require_output = false;
 	unordered_set<uint32_t> active_inputs;
 	unordered_set<uint32_t> active_outputs;
-	SmallVector<SPIRVariable *> input_variables;
-	SmallVector<SPIRVariable *> output_variables;
+
+	struct IOVariable
+	{
+		const SPIRVariable *var;
+		uint32_t location;
+		uint32_t block_member_index;
+		bool block;
+	};
+
+	SmallVector<IOVariable> input_variables;
+	SmallVector<IOVariable> output_variables;
+
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
 		auto &type = this->get<SPIRType>(var.basetype);
-		bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
+		bool block = has_decoration(type.self, DecorationBlock);
 
 		if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
 			return;
 
-		// Do not emit I/O blocks here.
-		// I/O blocks can be arrayed, so we must deal with them separately to support geometry shaders
-		// and tessellation down the line.
-		if (!block && !var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
+		if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
 		    interface_variable_exists_in_entry_point(var.self))
 		{
-			if (var.storage == StorageClassInput)
-				input_variables.push_back(&var);
-			else
-				output_variables.push_back(&var);
-		}
-
-		// Reserve input and output locations for block variables as necessary.
-		if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self))
-		{
-			auto &active = var.storage == StorageClassInput ? active_inputs : active_outputs;
-			for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
+			if (block)
 			{
-				if (has_member_decoration(type.self, i, DecorationLocation))
+				for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
 				{
-					uint32_t location = get_member_decoration(type.self, i, DecorationLocation);
-					active.insert(location);
+					uint32_t location = get_declared_member_location(var, i, false);
+					if (var.storage == StorageClassInput)
+						input_variables.push_back({ &var, location, i, true });
+					else
+						output_variables.push_back({ &var, location, i, true });
 				}
 			}
-
-			// Emit the block struct and a global variable here.
-			emit_io_block(var);
+			else
+			{
+				uint32_t location = get_decoration(var.self, DecorationLocation);
+				if (var.storage == StorageClassInput)
+					input_variables.push_back({ &var, location, 0, false });
+				else
+					output_variables.push_back({ &var, location, 0, false });
+			}
 		}
 	});
 
-	const auto variable_compare = [&](const SPIRVariable *a, const SPIRVariable *b) -> bool {
+	const auto variable_compare = [&](const IOVariable &a, const IOVariable &b) -> bool {
 		// Sort input and output variables based on, from more robust to less robust:
 		// - Location
 		// - Variable has a location
 		// - Name comparison
 		// - Variable has a name
 		// - Fallback: ID
-		bool has_location_a = has_decoration(a->self, DecorationLocation);
-		bool has_location_b = has_decoration(b->self, DecorationLocation);
+		bool has_location_a = a.block || has_decoration(a.var->self, DecorationLocation);
+		bool has_location_b = b.block || has_decoration(b.var->self, DecorationLocation);
 
 		if (has_location_a && has_location_b)
-		{
-			return get_decoration(a->self, DecorationLocation) < get_decoration(b->self, DecorationLocation);
-		}
+			return a.location < b.location;
 		else if (has_location_a && !has_location_b)
 			return true;
 		else if (!has_location_a && has_location_b)
 			return false;
 
-		const auto &name1 = to_name(a->self);
-		const auto &name2 = to_name(b->self);
+		const auto &name1 = to_name(a.var->self);
+		const auto &name2 = to_name(b.var->self);
 
 		if (name1.empty() && name2.empty())
-			return a->self < b->self;
+			return a.var->self < b.var->self;
 		else if (name1.empty())
 			return true;
 		else if (name2.empty())
@@ -1307,33 +1736,71 @@ void CompilerHLSL::emit_resources()
 
 		begin_scope();
 		sort(input_variables.begin(), input_variables.end(), variable_compare);
-		for (auto var : input_variables)
-			emit_interface_block_in_struct(*var, active_inputs);
+		for (auto &var : input_variables)
+		{
+			if (var.block)
+				emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_inputs);
+			else
+				emit_interface_block_in_struct(*var.var, active_inputs);
+		}
 		emit_builtin_inputs_in_struct();
 		end_scope_decl();
 		statement("");
 	}
 
+	const bool is_mesh_shader = execution.model == ExecutionModelMeshEXT;
 	if (!output_variables.empty() || !active_output_builtins.empty())
 	{
-		require_output = true;
-		statement("struct SPIRV_Cross_Output");
+		sort(output_variables.begin(), output_variables.end(), variable_compare);
+		require_output = !is_mesh_shader;
 
+		statement(is_mesh_shader ? "struct gl_MeshPerVertexEXT" : "struct SPIRV_Cross_Output");
 		begin_scope();
-		// FIXME: Use locations properly if they exist.
-		sort(output_variables.begin(), output_variables.end(), variable_compare);
-		for (auto var : output_variables)
-			emit_interface_block_in_struct(*var, active_outputs);
+		for (auto &var : output_variables)
+		{
+			if (is_per_primitive_variable(*var.var))
+				continue;
+			if (var.block && is_mesh_shader && var.block_member_index != 0)
+				continue;
+			if (var.block && !is_mesh_shader)
+				emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs);
+			else
+				emit_interface_block_in_struct(*var.var, active_outputs);
+		}
 		emit_builtin_outputs_in_struct();
+		if (!is_mesh_shader)
+			emit_builtin_primitive_outputs_in_struct();
 		end_scope_decl();
 		statement("");
+
+		if (is_mesh_shader)
+		{
+			statement("struct gl_MeshPerPrimitiveEXT");
+			begin_scope();
+			for (auto &var : output_variables)
+			{
+				if (!is_per_primitive_variable(*var.var))
+					continue;
+				if (var.block && var.block_member_index != 0)
+					continue;
+
+				emit_interface_block_in_struct(*var.var, active_outputs);
+			}
+			emit_builtin_primitive_outputs_in_struct();
+			end_scope_decl();
+			statement("");
+		}
 	}
 
 	// Global variables.
 	for (auto global : global_variables)
 	{
 		auto &var = get<SPIRVariable>(global);
-		if (var.storage != StorageClassOutput)
+		if (is_hidden_variable(var, true))
+			continue;
+
+		if (var.storage != StorageClassOutput &&
+		    var.storage != StorageClassTaskPayloadWorkgroupEXT)
 		{
 			if (!variable_is_lut(var))
 			{
@@ -1350,7 +1817,15 @@ void CompilerHLSL::emit_resources()
 					storage = "static";
 					break;
 				}
-				statement(storage, " ", variable_decl(var), ";");
+
+				string initializer;
+				if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
+				    !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
+				{
+					initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
+				}
+				statement(storage, " ", variable_decl(var), initializer, ";");
+
 				emitted = true;
 			}
 		}
@@ -1359,8 +1834,6 @@ void CompilerHLSL::emit_resources()
 	if (emitted)
 		statement("");
 
-	declare_undefined_values();
-
 	if (requires_op_fmod)
 	{
 		static const char *types[] = {
@@ -1380,97 +1853,64 @@ void CompilerHLSL::emit_resources()
 		}
 	}
 
-	if (required_textureSizeVariants != 0)
+	emit_texture_size_variants(required_texture_size_variants.srv, "4", false, "");
+	for (uint32_t norm = 0; norm < 3; norm++)
 	{
-		static const char *types[QueryTypeCount] = { "float4", "int4", "uint4" };
-		static const char *dims[QueryDimCount] = { "Texture1D",   "Texture1DArray",  "Texture2D",   "Texture2DArray",
-			                                       "Texture3D",   "Buffer",          "TextureCube", "TextureCubeArray",
-			                                       "Texture2DMS", "Texture2DMSArray" };
-
-		static const bool has_lod[QueryDimCount] = { true, true, true, true, true, false, true, true, false, false };
-
-		static const char *ret_types[QueryDimCount] = {
-			"uint", "uint2", "uint2", "uint3", "uint3", "uint", "uint2", "uint3", "uint2", "uint3",
-		};
-
-		static const uint32_t return_arguments[QueryDimCount] = {
-			1, 2, 2, 3, 3, 1, 2, 3, 2, 3,
-		};
-
-		for (uint32_t index = 0; index < QueryDimCount; index++)
+		for (uint32_t comp = 0; comp < 4; comp++)
 		{
-			for (uint32_t type_index = 0; type_index < QueryTypeCount; type_index++)
-			{
-				uint32_t bit = 16 * type_index + index;
-				uint64_t mask = 1ull << bit;
-
-				if ((required_textureSizeVariants & mask) == 0)
-					continue;
-
-				statement(ret_types[index], " SPIRV_Cross_textureSize(", dims[index], "<", types[type_index],
-				          "> Tex, uint Level, out uint Param)");
-				begin_scope();
-				statement(ret_types[index], " ret;");
-				switch (return_arguments[index])
-				{
-				case 1:
-					if (has_lod[index])
-						statement("Tex.GetDimensions(Level, ret.x, Param);");
-					else
-					{
-						statement("Tex.GetDimensions(ret.x);");
-						statement("Param = 0u;");
-					}
-					break;
-				case 2:
-					if (has_lod[index])
-						statement("Tex.GetDimensions(Level, ret.x, ret.y, Param);");
-					else
-						statement("Tex.GetDimensions(ret.x, ret.y, Param);");
-					break;
-				case 3:
-					if (has_lod[index])
-						statement("Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);");
-					else
-						statement("Tex.GetDimensions(ret.x, ret.y, ret.z, Param);");
-					break;
-				}
-
-				statement("return ret;");
-				end_scope();
-				statement("");
-			}
+			static const char *qualifiers[] = { "", "unorm ", "snorm " };
+			static const char *vecsizes[] = { "", "2", "3", "4" };
+			emit_texture_size_variants(required_texture_size_variants.uav[norm][comp], vecsizes[comp], true,
+			                           qualifiers[norm]);
 		}
 	}
 
 	if (requires_fp16_packing)
 	{
 		// HLSL does not pack into a single word sadly :(
-		statement("uint SPIRV_Cross_packHalf2x16(float2 value)");
+		statement("uint spvPackHalf2x16(float2 value)");
 		begin_scope();
 		statement("uint2 Packed = f32tof16(value);");
 		statement("return Packed.x | (Packed.y << 16);");
 		end_scope();
 		statement("");
 
-		statement("float2 SPIRV_Cross_unpackHalf2x16(uint value)");
+		statement("float2 spvUnpackHalf2x16(uint value)");
 		begin_scope();
 		statement("return f16tof32(uint2(value & 0xffff, value >> 16));");
 		end_scope();
 		statement("");
 	}
 
+	if (requires_uint2_packing)
+	{
+		statement("uint64_t spvPackUint2x32(uint2 value)");
+		begin_scope();
+		statement("return (uint64_t(value.y) << 32) | uint64_t(value.x);");
+		end_scope();
+		statement("");
+
+		statement("uint2 spvUnpackUint2x32(uint64_t value)");
+		begin_scope();
+		statement("uint2 Unpacked;");
+		statement("Unpacked.x = uint(value & 0xffffffff);");
+		statement("Unpacked.y = uint(value >> 32);");
+		statement("return Unpacked;");
+		end_scope();
+		statement("");
+	}
+
 	if (requires_explicit_fp16_packing)
 	{
 		// HLSL does not pack into a single word sadly :(
-		statement("uint SPIRV_Cross_packFloat2x16(min16float2 value)");
+		statement("uint spvPackFloat2x16(min16float2 value)");
 		begin_scope();
 		statement("uint2 Packed = f32tof16(value);");
 		statement("return Packed.x | (Packed.y << 16);");
 		end_scope();
 		statement("");
 
-		statement("min16float2 SPIRV_Cross_unpackFloat2x16(uint value)");
+		statement("min16float2 spvUnpackFloat2x16(uint value)");
 		begin_scope();
 		statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));");
 		end_scope();
@@ -1480,14 +1920,14 @@ void CompilerHLSL::emit_resources()
 	// HLSL does not seem to have builtins for these operation, so roll them by hand ...
 	if (requires_unorm8_packing)
 	{
-		statement("uint SPIRV_Cross_packUnorm4x8(float4 value)");
+		statement("uint spvPackUnorm4x8(float4 value)");
 		begin_scope();
 		statement("uint4 Packed = uint4(round(saturate(value) * 255.0));");
 		statement("return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);");
 		end_scope();
 		statement("");
 
-		statement("float4 SPIRV_Cross_unpackUnorm4x8(uint value)");
+		statement("float4 spvUnpackUnorm4x8(uint value)");
 		begin_scope();
 		statement("uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);");
 		statement("return float4(Packed) / 255.0;");
@@ -1497,14 +1937,14 @@ void CompilerHLSL::emit_resources()
 
 	if (requires_snorm8_packing)
 	{
-		statement("uint SPIRV_Cross_packSnorm4x8(float4 value)");
+		statement("uint spvPackSnorm4x8(float4 value)");
 		begin_scope();
 		statement("int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;");
 		statement("return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));");
 		end_scope();
 		statement("");
 
-		statement("float4 SPIRV_Cross_unpackSnorm4x8(uint value)");
+		statement("float4 spvUnpackSnorm4x8(uint value)");
 		begin_scope();
 		statement("int SignedValue = int(value);");
 		statement("int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;");
@@ -1515,14 +1955,14 @@ void CompilerHLSL::emit_resources()
 
 	if (requires_unorm16_packing)
 	{
-		statement("uint SPIRV_Cross_packUnorm2x16(float2 value)");
+		statement("uint spvPackUnorm2x16(float2 value)");
 		begin_scope();
 		statement("uint2 Packed = uint2(round(saturate(value) * 65535.0));");
 		statement("return Packed.x | (Packed.y << 16);");
 		end_scope();
 		statement("");
 
-		statement("float2 SPIRV_Cross_unpackUnorm2x16(uint value)");
+		statement("float2 spvUnpackUnorm2x16(uint value)");
 		begin_scope();
 		statement("uint2 Packed = uint2(value & 0xffff, value >> 16);");
 		statement("return float2(Packed) / 65535.0;");
@@ -1532,14 +1972,14 @@ void CompilerHLSL::emit_resources()
 
 	if (requires_snorm16_packing)
 	{
-		statement("uint SPIRV_Cross_packSnorm2x16(float2 value)");
+		statement("uint spvPackSnorm2x16(float2 value)");
 		begin_scope();
 		statement("int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;");
 		statement("return uint(Packed.x | (Packed.y << 16));");
 		end_scope();
 		statement("");
 
-		statement("float2 SPIRV_Cross_unpackSnorm2x16(uint value)");
+		statement("float2 spvUnpackSnorm2x16(uint value)");
 		begin_scope();
 		statement("int SignedValue = int(value);");
 		statement("int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;");
@@ -1553,7 +1993,7 @@ void CompilerHLSL::emit_resources()
 		static const char *types[] = { "uint", "uint2", "uint3", "uint4" };
 		for (auto &type : types)
 		{
-			statement(type, " SPIRV_Cross_bitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)");
+			statement(type, " spvBitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)");
 			begin_scope();
 			statement("uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));");
 			statement("return (Base & ~Mask) | ((Insert << Offset) & Mask);");
@@ -1567,7 +2007,7 @@ void CompilerHLSL::emit_resources()
 		static const char *unsigned_types[] = { "uint", "uint2", "uint3", "uint4" };
 		for (auto &type : unsigned_types)
 		{
-			statement(type, " SPIRV_Cross_bitfieldUExtract(", type, " Base, uint Offset, uint Count)");
+			statement(type, " spvBitfieldUExtract(", type, " Base, uint Offset, uint Count)");
 			begin_scope();
 			statement("uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);");
 			statement("return (Base >> Offset) & Mask;");
@@ -1579,7 +2019,7 @@ void CompilerHLSL::emit_resources()
 		static const char *signed_types[] = { "int", "int2", "int3", "int4" };
 		for (auto &type : signed_types)
 		{
-			statement(type, " SPIRV_Cross_bitfieldSExtract(", type, " Base, int Offset, int Count)");
+			statement(type, " spvBitfieldSExtract(", type, " Base, int Offset, int Count)");
 			begin_scope();
 			statement("int Mask = Count == 32 ? -1 : ((1 << Count) - 1);");
 			statement(type, " Masked = (Base >> Offset) & Mask;");
@@ -1594,7 +2034,7 @@ void CompilerHLSL::emit_resources()
 	{
 		statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
 		statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
-		statement("float2x2 SPIRV_Cross_Inverse(float2x2 m)");
+		statement("float2x2 spvInverse(float2x2 m)");
 		begin_scope();
 		statement("float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)");
 		statement_no_indent("");
@@ -1618,29 +2058,29 @@ void CompilerHLSL::emit_resources()
 	if (requires_inverse_3x3)
 	{
 		statement("// Returns the determinant of a 2x2 matrix.");
-		statement("float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)");
+		statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
 		begin_scope();
 		statement("return a1 * b2 - b1 * a2;");
 		end_scope();
 		statement_no_indent("");
 		statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
 		statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
-		statement("float3x3 SPIRV_Cross_Inverse(float3x3 m)");
+		statement("float3x3 spvInverse(float3x3 m)");
 		begin_scope();
 		statement("float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)");
 		statement_no_indent("");
 		statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
-		statement("adj[0][0] =  SPIRV_Cross_Det2x2(m[1][1], m[1][2], m[2][1], m[2][2]);");
-		statement("adj[0][1] = -SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[2][1], m[2][2]);");
-		statement("adj[0][2] =  SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[1][1], m[1][2]);");
+		statement("adj[0][0] =  spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);");
+		statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);");
+		statement("adj[0][2] =  spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);");
 		statement_no_indent("");
-		statement("adj[1][0] = -SPIRV_Cross_Det2x2(m[1][0], m[1][2], m[2][0], m[2][2]);");
-		statement("adj[1][1] =  SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[2][0], m[2][2]);");
-		statement("adj[1][2] = -SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[1][0], m[1][2]);");
+		statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);");
+		statement("adj[1][1] =  spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);");
+		statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);");
 		statement_no_indent("");
-		statement("adj[2][0] =  SPIRV_Cross_Det2x2(m[1][0], m[1][1], m[2][0], m[2][1]);");
-		statement("adj[2][1] = -SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[2][0], m[2][1]);");
-		statement("adj[2][2] =  SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[1][0], m[1][1]);");
+		statement("adj[2][0] =  spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);");
+		statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);");
+		statement("adj[2][2] =  spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);");
 		statement_no_indent("");
 		statement("// Calculate the determinant as a combination of the cofactors of the first row.");
 		statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);");
@@ -1657,7 +2097,7 @@ void CompilerHLSL::emit_resources()
 		if (!requires_inverse_3x3)
 		{
 			statement("// Returns the determinant of a 2x2 matrix.");
-			statement("float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)");
+			statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
 			begin_scope();
 			statement("return a1 * b2 - b1 * a2;");
 			end_scope();
@@ -1665,71 +2105,71 @@ void CompilerHLSL::emit_resources()
 		}
 
 		statement("// Returns the determinant of a 3x3 matrix.");
-		statement("float SPIRV_Cross_Det3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, "
+		statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, "
 		          "float c2, float c3)");
 		begin_scope();
-		statement("return a1 * SPIRV_Cross_Det2x2(b2, b3, c2, c3) - b1 * SPIRV_Cross_Det2x2(a2, a3, c2, c3) + c1 * "
-		          "SPIRV_Cross_Det2x2(a2, a3, "
+		statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * "
+		          "spvDet2x2(a2, a3, "
 		          "b2, b3);");
 		end_scope();
 		statement_no_indent("");
 		statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
 		statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
-		statement("float4x4 SPIRV_Cross_Inverse(float4x4 m)");
+		statement("float4x4 spvInverse(float4x4 m)");
 		begin_scope();
 		statement("float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)");
 		statement_no_indent("");
 		statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix.");
 		statement(
-		    "adj[0][0] =  SPIRV_Cross_Det3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
+		    "adj[0][0] =  spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
 		    "m[3][3]);");
 		statement(
-		    "adj[0][1] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
+		    "adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], "
 		    "m[3][3]);");
 		statement(
-		    "adj[0][2] =  SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], "
+		    "adj[0][2] =  spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], "
 		    "m[3][3]);");
 		statement(
-		    "adj[0][3] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], "
+		    "adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], "
 		    "m[2][3]);");
 		statement_no_indent("");
 		statement(
-		    "adj[1][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
+		    "adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
 		    "m[3][3]);");
 		statement(
-		    "adj[1][1] =  SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
+		    "adj[1][1] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], "
 		    "m[3][3]);");
 		statement(
-		    "adj[1][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], "
+		    "adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], "
 		    "m[3][3]);");
 		statement(
-		    "adj[1][3] =  SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], "
+		    "adj[1][3] =  spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], "
 		    "m[2][3]);");
 		statement_no_indent("");
 		statement(
-		    "adj[2][0] =  SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
+		    "adj[2][0] =  spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
 		    "m[3][3]);");
 		statement(
-		    "adj[2][1] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
+		    "adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], "
 		    "m[3][3]);");
 		statement(
-		    "adj[2][2] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], "
+		    "adj[2][2] =  spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], "
 		    "m[3][3]);");
 		statement(
-		    "adj[2][3] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], "
+		    "adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], "
 		    "m[2][3]);");
 		statement_no_indent("");
 		statement(
-		    "adj[3][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
+		    "adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
 		    "m[3][2]);");
 		statement(
-		    "adj[3][1] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
+		    "adj[3][1] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], "
 		    "m[3][2]);");
 		statement(
-		    "adj[3][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], "
+		    "adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], "
 		    "m[3][2]);");
 		statement(
-		    "adj[3][3] =  SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], "
+		    "adj[3][3] =  spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], "
 		    "m[2][2]);");
 		statement_no_indent("");
 		statement("// Calculate the determinant as a combination of the cofactors of the first row.");
@@ -1746,7 +2186,7 @@ void CompilerHLSL::emit_resources()
 	if (requires_scalar_reflect)
 	{
 		// FP16/FP64? No templates in HLSL.
-		statement("float SPIRV_Cross_Reflect(float i, float n)");
+		statement("float spvReflect(float i, float n)");
 		begin_scope();
 		statement("return i - 2.0 * dot(n, i) * n;");
 		end_scope();
@@ -1756,7 +2196,7 @@ void CompilerHLSL::emit_resources()
 	if (requires_scalar_refract)
 	{
 		// FP16/FP64? No templates in HLSL.
-		statement("float SPIRV_Cross_Refract(float i, float n, float eta)");
+		statement("float spvRefract(float i, float n, float eta)");
 		begin_scope();
 		statement("float NoI = n * i;");
 		statement("float NoI2 = NoI * NoI;");
@@ -1772,6 +2212,304 @@ void CompilerHLSL::emit_resources()
 		end_scope();
 		statement("");
 	}
+
+	if (requires_scalar_faceforward)
+	{
+		// FP16/FP64? No templates in HLSL.
+		statement("float spvFaceForward(float n, float i, float nref)");
+		begin_scope();
+		statement("return i * nref < 0.0 ? n : -n;");
+		end_scope();
+		statement("");
+	}
+
+	for (TypeID type_id : composite_selection_workaround_types)
+	{
+		// Need out variable since HLSL does not support returning arrays.
+		auto &type = get<SPIRType>(type_id);
+		auto type_str = type_to_glsl(type);
+		auto type_arr_str = type_to_array_glsl(type);
+		statement("void spvSelectComposite(out ", type_str, " out_value", type_arr_str, ", bool cond, ",
+		          type_str, " true_val", type_arr_str, ", ",
+		          type_str, " false_val", type_arr_str, ")");
+		begin_scope();
+		statement("if (cond)");
+		begin_scope();
+		statement("out_value = true_val;");
+		end_scope();
+		statement("else");
+		begin_scope();
+		statement("out_value = false_val;");
+		end_scope();
+		end_scope();
+		statement("");
+	}
+}
+
+void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char *vecsize_qualifier, bool uav,
+                                              const char *type_qualifier)
+{
+	if (variant_mask == 0)
+		return;
+
+	static const char *types[QueryTypeCount] = { "float", "int", "uint" };
+	static const char *dims[QueryDimCount] = { "Texture1D",   "Texture1DArray",  "Texture2D",   "Texture2DArray",
+		                                       "Texture3D",   "Buffer",          "TextureCube", "TextureCubeArray",
+		                                       "Texture2DMS", "Texture2DMSArray" };
+
+	static const bool has_lod[QueryDimCount] = { true, true, true, true, true, false, true, true, false, false };
+
+	static const char *ret_types[QueryDimCount] = {
+		"uint", "uint2", "uint2", "uint3", "uint3", "uint", "uint2", "uint3", "uint2", "uint3",
+	};
+
+	static const uint32_t return_arguments[QueryDimCount] = {
+		1, 2, 2, 3, 3, 1, 2, 3, 2, 3,
+	};
+
+	for (uint32_t index = 0; index < QueryDimCount; index++)
+	{
+		for (uint32_t type_index = 0; type_index < QueryTypeCount; type_index++)
+		{
+			uint32_t bit = 16 * type_index + index;
+			uint64_t mask = 1ull << bit;
+
+			if ((variant_mask & mask) == 0)
+				continue;
+
+			statement(ret_types[index], " spv", (uav ? "Image" : "Texture"), "Size(", (uav ? "RW" : ""),
+			          dims[index], "<", type_qualifier, types[type_index], vecsize_qualifier, "> Tex, ",
+			          (uav ? "" : "uint Level, "), "out uint Param)");
+			begin_scope();
+			statement(ret_types[index], " ret;");
+			switch (return_arguments[index])
+			{
+			case 1:
+				if (has_lod[index] && !uav)
+					statement("Tex.GetDimensions(Level, ret.x, Param);");
+				else
+				{
+					statement("Tex.GetDimensions(ret.x);");
+					statement("Param = 0u;");
+				}
+				break;
+			case 2:
+				if (has_lod[index] && !uav)
+					statement("Tex.GetDimensions(Level, ret.x, ret.y, Param);");
+				else if (!uav)
+					statement("Tex.GetDimensions(ret.x, ret.y, Param);");
+				else
+				{
+					statement("Tex.GetDimensions(ret.x, ret.y);");
+					statement("Param = 0u;");
+				}
+				break;
+			case 3:
+				if (has_lod[index] && !uav)
+					statement("Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);");
+				else if (!uav)
+					statement("Tex.GetDimensions(ret.x, ret.y, ret.z, Param);");
+				else
+				{
+					statement("Tex.GetDimensions(ret.x, ret.y, ret.z);");
+					statement("Param = 0u;");
+				}
+				break;
+			}
+
+			statement("return ret;");
+			end_scope();
+			statement("");
+		}
+	}
+}
+
+void CompilerHLSL::analyze_meshlet_writes()
+{
+	uint32_t id_per_vertex = 0;
+	uint32_t id_per_primitive = 0;
+	bool need_per_primitive = false;
+	bool need_per_vertex = false;
+
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+		auto &type = this->get<SPIRType>(var.basetype);
+		bool block = has_decoration(type.self, DecorationBlock);
+		if (var.storage == StorageClassOutput && block && is_builtin_variable(var))
+		{
+			auto flags = get_buffer_block_flags(var.self);
+			if (flags.get(DecorationPerPrimitiveEXT))
+				id_per_primitive = var.self;
+			else
+				id_per_vertex = var.self;
+		}
+		else if (var.storage == StorageClassOutput)
+		{
+			Bitset flags;
+			if (block)
+				flags = get_buffer_block_flags(var.self);
+			else
+				flags = get_decoration_bitset(var.self);
+
+			if (flags.get(DecorationPerPrimitiveEXT))
+				need_per_primitive = true;
+			else
+				need_per_vertex = true;
+		}
+	});
+
+	// If we have per-primitive outputs, and no per-primitive builtins,
+	// empty version of gl_MeshPerPrimitiveEXT will be emitted.
+	// If we don't use block IO for vertex output, we'll also need to synthesize the PerVertex block.
+
+	const auto generate_block = [&](const char *block_name, const char *instance_name, bool per_primitive) -> uint32_t {
+		auto &execution = get_entry_point();
+
+		uint32_t op_type = ir.increase_bound_by(4);
+		uint32_t op_arr = op_type + 1;
+		uint32_t op_ptr = op_type + 2;
+		uint32_t op_var = op_type + 3;
+
+		auto &type = set<SPIRType>(op_type);
+		type.basetype = SPIRType::Struct;
+		set_name(op_type, block_name);
+		set_decoration(op_type, DecorationBlock);
+		if (per_primitive)
+			set_decoration(op_type, DecorationPerPrimitiveEXT);
+
+		auto &arr = set<SPIRType>(op_arr, type);
+		arr.parent_type = type.self;
+		arr.array.push_back(per_primitive ? execution.output_primitives : execution.output_vertices);
+		arr.array_size_literal.push_back(true);
+
+		auto &ptr = set<SPIRType>(op_ptr, arr);
+		ptr.parent_type = arr.self;
+		ptr.pointer = true;
+		ptr.pointer_depth++;
+		ptr.storage = StorageClassOutput;
+		set_decoration(op_ptr, DecorationBlock);
+		set_name(op_ptr, block_name);
+
+		auto &var = set<SPIRVariable>(op_var, op_ptr, StorageClassOutput);
+		if (per_primitive)
+			set_decoration(op_var, DecorationPerPrimitiveEXT);
+		set_name(op_var, instance_name);
+		execution.interface_variables.push_back(var.self);
+
+		return op_var;
+	};
+
+	if (id_per_vertex == 0 && need_per_vertex)
+		id_per_vertex = generate_block("gl_MeshPerVertexEXT", "gl_MeshVerticesEXT", false);
+	if (id_per_primitive == 0 && need_per_primitive)
+		id_per_primitive = generate_block("gl_MeshPerPrimitiveEXT", "gl_MeshPrimitivesEXT", true);
+
+	unordered_set<uint32_t> processed_func_ids;
+	analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids);
+}
+
+void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive,
+                                          std::unordered_set<uint32_t> &processed_func_ids)
+{
+	// Avoid processing a function more than once
+	if (processed_func_ids.find(func_id) != processed_func_ids.end())
+		return;
+	processed_func_ids.insert(func_id);
+
+	auto &func = get<SPIRFunction>(func_id);
+	// Recursively establish global args added to functions on which we depend.
+	for (auto& block : func.blocks)
+	{
+		auto &b = get<SPIRBlock>(block);
+		for (auto &i : b.ops)
+		{
+			auto ops = stream(i);
+			auto op = static_cast<Op>(i.op);
+
+			switch (op)
+			{
+			case OpFunctionCall:
+			{
+				// Then recurse into the function itself to extract globals used internally in the function
+				uint32_t inner_func_id = ops[2];
+				analyze_meshlet_writes(inner_func_id, id_per_vertex, id_per_primitive, processed_func_ids);
+				auto &inner_func = get<SPIRFunction>(inner_func_id);
+				for (auto &iarg : inner_func.arguments)
+				{
+					if (!iarg.alias_global_variable)
+						continue;
+
+					bool already_declared = false;
+					for (auto &arg : func.arguments)
+					{
+						if (arg.id == iarg.id)
+						{
+							already_declared = true;
+							break;
+						}
+					}
+
+					if (!already_declared)
+					{
+						// basetype is effectively ignored here since we declare the argument
+						// with explicit types. Just pass down a valid type.
+						func.arguments.push_back({ expression_type_id(iarg.id), iarg.id,
+						                           iarg.read_count, iarg.write_count, true });
+					}
+				}
+				break;
+			}
+
+			case OpStore:
+			case OpLoad:
+			case OpInBoundsAccessChain:
+			case OpAccessChain:
+			case OpPtrAccessChain:
+			case OpInBoundsPtrAccessChain:
+			case OpArrayLength:
+			{
+				auto *var = maybe_get<SPIRVariable>(ops[op == OpStore ? 0 : 2]);
+				if (var && (var->storage == StorageClassOutput || var->storage == StorageClassTaskPayloadWorkgroupEXT))
+				{
+					bool already_declared = false;
+					auto builtin_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
+
+					uint32_t var_id = var->self;
+					if (var->storage != StorageClassTaskPayloadWorkgroupEXT &&
+						builtin_type != BuiltInPrimitivePointIndicesEXT &&
+						builtin_type != BuiltInPrimitiveLineIndicesEXT &&
+						builtin_type != BuiltInPrimitiveTriangleIndicesEXT)
+					{
+						var_id = is_per_primitive_variable(*var) ? id_per_primitive : id_per_vertex;
+					}
+
+					for (auto &arg : func.arguments)
+					{
+						if (arg.id == var_id)
+						{
+							already_declared = true;
+							break;
+						}
+					}
+
+					if (!already_declared)
+					{
+						// basetype is effectively ignored here since we declare the argument
+						// with explicit types. Just pass down a valid type.
+						uint32_t type_id = expression_type_id(var_id);
+						if (var->storage == StorageClassTaskPayloadWorkgroupEXT)
+							func.arguments.push_back({ type_id, var_id, 1u, 0u, true });
+						else
+							func.arguments.push_back({ type_id, var_id, 1u, 1u, true });
+					}
+				}
+				break;
+			}
+
+			default:
+				break;
+			}
+		}
+	}
 }
 
 string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
@@ -1801,17 +2539,10 @@ void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type
 	if (index < memb.size())
 		memberflags = memb[index].decoration_flags;
 
-	string qualifiers;
-	bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
-	                ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
-
-	if (is_block)
-		qualifiers = to_interpolation_qualifiers(memberflags);
-
 	string packing_offset;
 	bool is_push_constant = type.storage == StorageClassPushConstant;
 
-	if ((has_extended_decoration(type.self, SPIRVCrossDecorationPacked) || is_push_constant) &&
+	if ((has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) || is_push_constant) &&
 	    has_member_decoration(type.self, index, DecorationOffset))
 	{
 		uint32_t offset = memb[index].offset - base_offset;
@@ -1822,37 +2553,47 @@ void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type
 		packing_offset = join(" : packoffset(c", offset / 16, packing_swizzle[(offset & 15) >> 2], ")");
 	}
 
-	statement(layout_for_member(type, index), qualifiers, qualifier,
+	statement(layout_for_member(type, index), qualifier,
 	          variable_decl(membertype, to_member_name(type, index)), packing_offset, ";");
 }
 
+void CompilerHLSL::emit_rayquery_function(const char *commited, const char *candidate, const uint32_t *ops)
+{
+	flush_variable_declaration(ops[0]);
+	uint32_t is_commited = evaluate_constant_u32(ops[3]);
+	emit_op(ops[0], ops[1], join(to_expression(ops[2]), is_commited ? commited : candidate), false);
+}
+
 void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
 {
 	auto &type = get<SPIRType>(var.basetype);
 
 	bool is_uav = var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock);
 
-	if (is_uav)
+	if (flattened_buffer_blocks.count(var.self))
+	{
+		emit_buffer_block_flattened(var);
+	}
+	else if (is_uav)
 	{
 		Bitset flags = ir.get_buffer_block_flags(var);
-		bool is_readonly = flags.get(DecorationNonWritable);
-		bool is_coherent = flags.get(DecorationCoherent);
+		bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self);
+		bool is_coherent = flags.get(DecorationCoherent) && !is_readonly;
+		bool is_interlocked = interlocked_resources.count(var.self) > 0;
+		const char *type_name = "ByteAddressBuffer ";
+		if (!is_readonly)
+			type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer ";
 		add_resource_name(var.self);
-		statement(is_coherent ? "globallycoherent " : "", is_readonly ? "ByteAddressBuffer " : "RWByteAddressBuffer ",
-		          to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";");
+		statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type),
+		          to_resource_binding(var), ";");
 	}
 	else
 	{
 		if (type.array.empty())
 		{
-			if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset))
-				set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
-			else
-				SPIRV_CROSS_THROW("cbuffer cannot be expressed with either HLSL packing layout or packoffset.");
-
 			// Flatten the top-level struct so we can use packoffset,
 			// this restriction is similar to GLSL where layout(offset) is not possible on sub-structs.
-			flattened_structs.insert(var.self);
+			flattened_structs[var.self] = false;
 
 			// Prefer the block name if possible.
 			auto buffer_name = to_name(type.self, false);
@@ -1870,6 +2611,16 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
 			if (buffer_name.empty())
 				buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
 
+			uint32_t failed_index = 0;
+			if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index))
+				set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
+			else
+			{
+				SPIRV_CROSS_THROW(join("cbuffer ID ", var.self, " (name: ", buffer_name, "), member index ",
+				                       failed_index, " (name: ", to_member_name(type, failed_index),
+				                       ") cannot be expressed with either HLSL packing layout or packoffset."));
+			}
+
 			block_names.insert(buffer_name);
 
 			// Save for post-reflection later.
@@ -1890,7 +2641,9 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
 				add_member_name(type, i);
 				auto backup_name = get_member_name(type.self, i);
 				auto member_name = to_member_name(type, i);
-				set_member_name(type.self, i, sanitize_underscores(join(to_name(var.self), "_", member_name)));
+				member_name = join(to_name(var.self), "_", member_name);
+				ParsedIR::sanitize_underscores(member_name);
+				set_member_name(type.self, i, member_name);
 				emit_struct_member(type, member, i, "");
 				set_member_name(type.self, i, backup_name);
 				i++;
@@ -1905,13 +2658,18 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
 				SPIRV_CROSS_THROW(
 				    "Need ConstantBuffer<T> to use arrays of UBOs, but this is only supported in SM 5.1.");
 
-			// ConstantBuffer<T> does not support packoffset, so it is unuseable unless everything aligns as we expect.
-			if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer))
-				SPIRV_CROSS_THROW("HLSL ConstantBuffer<T> cannot be expressed with normal HLSL packing rules.");
-
 			add_resource_name(type.self);
 			add_resource_name(var.self);
 
+			// ConstantBuffer<T> does not support packoffset, so it is unuseable unless everything aligns as we expect.
+			uint32_t failed_index = 0;
+			if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer, &failed_index))
+			{
+				SPIRV_CROSS_THROW(join("HLSL ConstantBuffer<T> ID ", var.self, " (name: ", to_name(type.self),
+				                       "), member index ", failed_index, " (name: ", to_member_name(type, failed_index),
+				                       ") cannot be expressed with normal HLSL packing rules."));
+			}
+
 			emit_struct(get<SPIRType>(type.self));
 			statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type),
 			          to_resource_binding(var), ";");
@@ -1921,7 +2679,11 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
 
 void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var)
 {
-	if (root_constants_layout.empty())
+	if (flattened_buffer_blocks.count(var.self))
+	{
+		emit_buffer_block_flattened(var);
+	}
+	else if (root_constants_layout.empty())
 	{
 		emit_buffer_block(var);
 	}
@@ -1931,19 +2693,24 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var)
 		{
 			auto &type = get<SPIRType>(var.basetype);
 
-			if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, layout.start, layout.end))
-				set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+			uint32_t failed_index = 0;
+			if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index, layout.start,
+			                               layout.end))
+				set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
 			else
-				SPIRV_CROSS_THROW(
-				    "root constant cbuffer cannot be expressed with either HLSL packing layout or packoffset.");
+			{
+				SPIRV_CROSS_THROW(join("Root constant cbuffer ID ", var.self, " (name: ", to_name(type.self), ")",
+				                       ", member index ", failed_index, " (name: ", to_member_name(type, failed_index),
+				                       ") cannot be expressed with either HLSL packing layout or packoffset."));
+			}
 
-			flattened_structs.insert(var.self);
+			flattened_structs[var.self] = false;
 			type.member_name_cache.clear();
 			add_resource_name(var.self);
 			auto &memb = ir.meta[type.self].members;
 
 			statement("cbuffer SPIRV_CROSS_RootConstant_", to_name(var.self),
-			          to_resource_register('b', layout.binding, layout.space));
+			          to_resource_register(HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT, 'b', layout.binding, layout.space));
 			begin_scope();
 
 			// Index of the next field in the generated root constant constant buffer
@@ -1961,8 +2728,9 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var)
 					add_member_name(type, constant_index);
 					auto backup_name = get_member_name(type.self, i);
 					auto member_name = to_member_name(type, i);
-					set_member_name(type.self, constant_index,
-					                sanitize_underscores(join(to_name(var.self), "_", member_name)));
+					member_name = join(to_name(var.self), "_", member_name);
+					ParsedIR::sanitize_underscores(member_name);
+					set_member_name(type.self, constant_index, member_name);
 					emit_struct_member(type, member, i, "", layout.start);
 					set_member_name(type.self, constant_index, backup_name);
 
@@ -1977,7 +2745,7 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var)
 
 string CompilerHLSL::to_sampler_expression(uint32_t id)
 {
-	auto expr = join("_", to_expression(id));
+	auto expr = join("_", to_non_uniform_aware_expression(id));
 	auto index = expr.find_first_of('[');
 	if (index == string::npos)
 	{
@@ -2003,9 +2771,9 @@ void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_i
 	}
 }
 
-string CompilerHLSL::to_func_call_arg(uint32_t id)
+string CompilerHLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
 {
-	string arg_str = CompilerGLSL::to_func_call_arg(id);
+	string arg_str = CompilerGLSL::to_func_call_arg(arg, id);
 
 	if (hlsl_options.shader_model <= 30)
 		return arg_str;
@@ -2022,12 +2790,34 @@ string CompilerHLSL::to_func_call_arg(uint32_t id)
 	return arg_str;
 }
 
+string CompilerHLSL::get_inner_entry_point_name() const
+{
+	auto &execution = get_entry_point();
+
+	if (hlsl_options.use_entry_point_name)
+	{
+		auto name = join(execution.name, "_inner");
+		ParsedIR::sanitize_underscores(name);
+		return name;
+	}
+
+	if (execution.model == ExecutionModelVertex)
+		return "vert_main";
+	else if (execution.model == ExecutionModelFragment)
+		return "frag_main";
+	else if (execution.model == ExecutionModelGLCompute)
+		return "comp_main";
+	else if (execution.model == ExecutionModelMeshEXT)
+		return "mesh_main";
+	else
+		SPIRV_CROSS_THROW("Unsupported execution model.");
+}
+
 void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
 {
 	if (func.self != ir.default_entry_point)
 		add_function_overload(func);
 
-	auto &execution = get_entry_point();
 	// Avoid shadow declarations.
 	local_variable_names = resource_names;
 
@@ -2048,14 +2838,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret
 
 	if (func.self == ir.default_entry_point)
 	{
-		if (execution.model == ExecutionModelVertex)
-			decl += "vert_main";
-		else if (execution.model == ExecutionModelFragment)
-			decl += "frag_main";
-		else if (execution.model == ExecutionModelGLCompute)
-			decl += "comp_main";
-		else
-			SPIRV_CROSS_THROW("Unsupported execution model.");
+		decl += get_inner_entry_point_name();
 		processing_entry_point = true;
 	}
 	else
@@ -2071,9 +2854,9 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret
 		out_argument += "out ";
 		out_argument += type_to_glsl(type);
 		out_argument += " ";
-		out_argument += "SPIRV_Cross_return_value";
+		out_argument += "spvReturnValue";
 		out_argument += type_to_array_glsl(type);
-		arglist.push_back(move(out_argument));
+		arglist.push_back(std::move(out_argument));
 	}
 
 	for (auto &arg : func.arguments)
@@ -2097,7 +2880,7 @@ void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret
 		    arg_type.image.dim != DimBuffer)
 		{
 			// Manufacture automatic sampler arg for SampledImage texture
-			arglist.push_back(join(image_is_comparison(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ",
+			arglist.push_back(join(is_depth_image(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ",
 			                       to_sampler_expression(arg.id), type_to_array_glsl(arg_type)));
 		}
 
@@ -2135,33 +2918,62 @@ void CompilerHLSL::emit_hlsl_entry_point()
 	if (require_input)
 		arguments.push_back("SPIRV_Cross_Input stage_input");
 
-	// Add I/O blocks as separate arguments with appropriate storage qualifier.
-	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
-		auto &type = this->get<SPIRType>(var.basetype);
-		bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
-
-		if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
-			return;
-
-		if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self))
-		{
-			if (var.storage == StorageClassInput)
-			{
-				arguments.push_back(join("in ", variable_decl(type, join("stage_input", to_name(var.self)))));
-			}
-			else if (var.storage == StorageClassOutput)
-			{
-				arguments.push_back(join("out ", variable_decl(type, join("stage_output", to_name(var.self)))));
-			}
-		}
-	});
-
 	auto &execution = get_entry_point();
 
 	switch (execution.model)
 	{
+	case ExecutionModelMeshEXT:
+	case ExecutionModelMeshNV:
 	case ExecutionModelGLCompute:
 	{
+		if (execution.model == ExecutionModelMeshEXT)
+		{
+			if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
+				statement("[outputtopology(\"triangle\")]");
+			else if (execution.flags.get(ExecutionModeOutputLinesEXT))
+				statement("[outputtopology(\"line\")]");
+			else if (execution.flags.get(ExecutionModeOutputPoints))
+				SPIRV_CROSS_THROW("Topology mode \"points\" is not supported in DirectX");
+
+			auto &func = get<SPIRFunction>(ir.default_entry_point);
+			for (auto &arg : func.arguments)
+			{
+				auto &var = get<SPIRVariable>(arg.id);
+				auto &base_type = get<SPIRType>(var.basetype);
+				bool block = has_decoration(base_type.self, DecorationBlock);
+				if (var.storage == StorageClassTaskPayloadWorkgroupEXT)
+				{
+					arguments.push_back("in payload " + variable_decl(var));
+				}
+				else if (block)
+				{
+					auto flags = get_buffer_block_flags(var.self);
+					if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(arg.id, DecorationPerPrimitiveEXT))
+					{
+						arguments.push_back("out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[" +
+						                    std::to_string(execution.output_primitives) + "]");
+					}
+					else
+					{
+						arguments.push_back("out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[" +
+						                    std::to_string(execution.output_vertices) + "]");
+					}
+				}
+				else
+				{
+					if (execution.flags.get(ExecutionModeOutputTrianglesEXT))
+					{
+						arguments.push_back("out indices uint3 gl_PrimitiveTriangleIndicesEXT[" +
+						                    std::to_string(execution.output_primitives) + "]");
+					}
+					else
+					{
+						arguments.push_back("out indices uint2 gl_PrimitiveLineIndicesEXT[" +
+						                    std::to_string(execution.output_primitives) + "]");
+					}
+				}
+			}
+		}
 		SpecializationConstant wg_x, wg_y, wg_z;
 		get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
 
@@ -2169,6 +2981,16 @@ void CompilerHLSL::emit_hlsl_entry_point()
 		uint32_t y = execution.workgroup_size.y;
 		uint32_t z = execution.workgroup_size.z;
 
+		if (!execution.workgroup_size.constant && execution.flags.get(ExecutionModeLocalSizeId))
+		{
+			if (execution.workgroup_size.id_x)
+				x = get<SPIRConstant>(execution.workgroup_size.id_x).scalar();
+			if (execution.workgroup_size.id_y)
+				y = get<SPIRConstant>(execution.workgroup_size.id_y).scalar();
+			if (execution.workgroup_size.id_z)
+				z = get<SPIRConstant>(execution.workgroup_size.id_z).scalar();
+		}
+
 		auto x_expr = wg_x.id ? get<SPIRConstant>(wg_x.id).specialization_constant_macro_name : to_string(x);
 		auto y_expr = wg_y.id ? get<SPIRConstant>(wg_y.id).specialization_constant_macro_name : to_string(y);
 		auto z_expr = wg_z.id ? get<SPIRConstant>(wg_z.id).specialization_constant_macro_name : to_string(z);
@@ -2184,7 +3006,13 @@ void CompilerHLSL::emit_hlsl_entry_point()
 		break;
 	}
 
-	statement(require_output ? "SPIRV_Cross_Output " : "void ", "main(", merge(arguments), ")");
+	const char *entry_point_name;
+	if (hlsl_options.use_entry_point_name)
+		entry_point_name = get_entry_point().name.c_str();
+	else
+		entry_point_name = "main";
+
+	statement(require_output ? "SPIRV_Cross_Output " : "void ", entry_point_name, "(", merge(arguments), ")");
 	begin_scope();
 	bool legacy = hlsl_options.shader_model <= 30;
 
@@ -2200,7 +3028,11 @@ void CompilerHLSL::emit_hlsl_entry_point()
 			if (legacy)
 				statement(builtin, " = stage_input.", builtin, " + float4(0.5f, 0.5f, 0.0f, 0.0f);");
 			else
+			{
 				statement(builtin, " = stage_input.", builtin, ";");
+				// ZW are undefined in D3D9, only do this fixup here.
+				statement(builtin, ".w = 1.0 / ", builtin, ".w;");
+			}
 			break;
 
 		case BuiltInVertexId:
@@ -2218,6 +3050,14 @@ void CompilerHLSL::emit_hlsl_entry_point()
 				statement(builtin, " = int(stage_input.", builtin, ");");
 			break;
 
+		case BuiltInBaseVertex:
+			statement(builtin, " = SPIRV_Cross_BaseVertex;");
+			break;
+
+		case BuiltInBaseInstance:
+			statement(builtin, " = SPIRV_Cross_BaseInstance;");
+			break;
+
 		case BuiltInInstanceId:
 			// D3D semantics are uint, but shader wants int.
 			statement(builtin, " = int(stage_input.", builtin, ");");
@@ -2227,6 +3067,7 @@ void CompilerHLSL::emit_hlsl_entry_point()
 		case BuiltInPointCoord:
 		case BuiltInSubgroupSize:
 		case BuiltInSubgroupLocalInvocationId:
+		case BuiltInHelperInvocation:
 			break;
 
 		case BuiltInSubgroupEqMask:
@@ -2312,64 +3153,64 @@ void CompilerHLSL::emit_hlsl_entry_point()
 	// Copy from stage input struct to globals.
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
 		auto &type = this->get<SPIRType>(var.basetype);
-		bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
+		bool block = has_decoration(type.self, DecorationBlock);
 
 		if (var.storage != StorageClassInput)
 			return;
 
 		bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex;
 
-		if (!block && !var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
+		if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) &&
 		    interface_variable_exists_in_entry_point(var.self))
 		{
-			auto name = to_name(var.self);
-			auto &mtype = this->get<SPIRType>(var.basetype);
-			if (need_matrix_unroll && mtype.columns > 1)
+			if (block)
 			{
-				// Unroll matrices.
-				for (uint32_t col = 0; col < mtype.columns; col++)
-					statement(name, "[", col, "] = stage_input.", name, "_", col, ";");
+				auto type_name = to_name(type.self);
+				auto var_name = to_name(var.self);
+				for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++)
+				{
+					auto mbr_name = to_member_name(type, mbr_idx);
+					auto flat_name = join(type_name, "_", mbr_name);
+					statement(var_name, ".", mbr_name, " = stage_input.", flat_name, ";");
+				}
 			}
 			else
 			{
-				statement(name, " = stage_input.", name, ";");
+				auto name = to_name(var.self);
+				auto &mtype = this->get<SPIRType>(var.basetype);
+				if (need_matrix_unroll && mtype.columns > 1)
+				{
+					// Unroll matrices.
+					for (uint32_t col = 0; col < mtype.columns; col++)
+						statement(name, "[", col, "] = stage_input.", name, "_", col, ";");
+				}
+				else
+				{
+					statement(name, " = stage_input.", name, ";");
+				}
 			}
 		}
-
-		// I/O blocks don't use the common stage input/output struct, but separate outputs.
-		if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self))
-		{
-			auto name = to_name(var.self);
-			statement(name, " = stage_input", name, ";");
-		}
 	});
 
 	// Run the shader.
-	if (execution.model == ExecutionModelVertex)
-		statement("vert_main();");
-	else if (execution.model == ExecutionModelFragment)
-		statement("frag_main();");
-	else if (execution.model == ExecutionModelGLCompute)
-		statement("comp_main();");
+	if (execution.model == ExecutionModelVertex ||
+	    execution.model == ExecutionModelFragment ||
+	    execution.model == ExecutionModelGLCompute ||
+	    execution.model == ExecutionModelMeshEXT)
+	{
+		// For mesh shaders, we receive special arguments that we must pass down as function arguments.
+		// HLSL does not support proper reference types for passing these IO blocks,
+		// but DXC post-inlining seems to magically fix it up anyways *shrug*.
+		SmallVector<string> arglist;
+		auto &func = get<SPIRFunction>(ir.default_entry_point);
+		// The arguments are marked out, avoid detecting reads and emitting inout.
+		for (auto &arg : func.arguments)
+			arglist.push_back(to_expression(arg.id, false));
+		statement(get_inner_entry_point_name(), "(", merge(arglist), ");");
+	}
 	else
 		SPIRV_CROSS_THROW("Unsupported shader stage.");
 
-	// Copy block outputs.
-	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
-		auto &type = this->get<SPIRType>(var.basetype);
-		bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
-
-		if (var.storage != StorageClassOutput)
-			return;
-
-		// I/O blocks don't use the common stage input/output struct, but separate outputs.
-		if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self))
-		{
-			auto name = to_name(var.self);
-			statement("stage_output", name, " = ", name, ";");
-		}
-	});
-
 	// Copy stage outputs.
 	if (require_output)
 	{
@@ -2406,27 +3247,43 @@ void CompilerHLSL::emit_hlsl_entry_point()
 
 		ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
 			auto &type = this->get<SPIRType>(var.basetype);
-			bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
+			bool block = has_decoration(type.self, DecorationBlock);
 
 			if (var.storage != StorageClassOutput)
 				return;
 
-			if (!block && var.storage != StorageClassFunction && !var.remapped_variable && type.pointer &&
-			    !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self))
+			if (!var.remapped_variable && type.pointer &&
+			    !is_builtin_variable(var) &&
+			    interface_variable_exists_in_entry_point(var.self))
 			{
-				auto name = to_name(var.self);
-
-				if (legacy && execution.model == ExecutionModelFragment)
+				if (block)
 				{
-					string output_filler;
-					for (uint32_t size = type.vecsize; size < 4; ++size)
-						output_filler += ", 0.0";
-
-					statement("stage_output.", name, " = float4(", name, output_filler, ");");
+					// I/O blocks need to flatten output.
+					auto type_name = to_name(type.self);
+					auto var_name = to_name(var.self);
+					for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++)
+					{
+						auto mbr_name = to_member_name(type, mbr_idx);
+						auto flat_name = join(type_name, "_", mbr_name);
+						statement("stage_output.", flat_name, " = ", var_name, ".", mbr_name, ";");
+					}
 				}
 				else
 				{
-					statement("stage_output.", name, " = ", name, ";");
+					auto name = to_name(var.self);
+
+					if (legacy && execution.model == ExecutionModelFragment)
+					{
+						string output_filler;
+						for (uint32_t size = type.vecsize; size < 4; ++size)
+							output_filler += ", 0.0";
+
+						statement("stage_output.", name, " = float4(", name, output_filler, ");");
+					}
+					else
+					{
+						statement("stage_output.", name, " = ", name, ";");
+					}
 				}
 			}
 		});
@@ -2439,7 +3296,7 @@ void CompilerHLSL::emit_hlsl_entry_point()
 
 void CompilerHLSL::emit_fixup()
 {
-	if (get_entry_point().model == ExecutionModelVertex)
+	if (is_vertex_like_shader() && active_output_builtins.get(BuiltInPosition))
 	{
 		// Do various mangling on the gl_Position.
 		if (hlsl_options.shader_model <= 30)
@@ -2457,8 +3314,11 @@ void CompilerHLSL::emit_fixup()
 	}
 }
 
-void CompilerHLSL::emit_texture_op(const Instruction &i)
+void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse)
 {
+	if (sparse)
+		SPIRV_CROSS_THROW("Sparse feedback not yet supported in HLSL.");
+
 	auto *ops = stream(i);
 	auto op = static_cast<Op>(i.op);
 	uint32_t length = i.length;
@@ -2467,7 +3327,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 
 	uint32_t result_type = ops[0];
 	uint32_t id = ops[1];
-	uint32_t img = ops[2];
+	VariableID img = ops[2];
 	uint32_t coord = ops[3];
 	uint32_t dref = 0;
 	uint32_t comp = 0;
@@ -2475,13 +3335,16 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 	bool proj = false;
 	const uint32_t *opt = nullptr;
 	auto *combined_image = maybe_get<SPIRCombinedImageSampler>(img);
-	auto img_expr = to_expression(combined_image ? combined_image->image : img);
 
-	inherited_expressions.push_back(coord);
+	if (combined_image && has_decoration(img, DecorationNonUniform))
+	{
+		set_decoration(combined_image->image, DecorationNonUniform);
+		set_decoration(combined_image->sampler, DecorationNonUniform);
+	}
 
-	// Make sure non-uniform decoration is back-propagated to where it needs to be.
-	if (has_decoration(img, DecorationNonUniformEXT))
-		propagate_nonuniform_qualifier(img);
+	auto img_expr = to_non_uniform_aware_expression(combined_image ? combined_image->image : img);
+
+	inherited_expressions.push_back(coord);
 
 	switch (op)
 	{
@@ -2631,7 +3494,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 		{
 			texop += img_expr;
 
-			if (image_is_comparison(imgtype, img))
+			if (is_depth_image(imgtype, img))
 			{
 				if (gather)
 				{
@@ -2647,7 +3510,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 			}
 			else if (gather)
 			{
-				uint32_t comp_num = get<SPIRConstant>(comp).scalar();
+				uint32_t comp_num = evaluate_constant_u32(comp);
 				if (hlsl_options.shader_model >= 50)
 				{
 					switch (comp_num)
@@ -2713,14 +3576,15 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 				SPIRV_CROSS_THROW("textureGather is not supported in HLSL shader model 2/3.");
 			if (offset || coffset)
 				SPIRV_CROSS_THROW("textureOffset is not supported in HLSL shader model 2/3.");
-			if (proj)
-				texop += "proj";
+
 			if (grad_x || grad_y)
 				texop += "grad";
-			if (lod)
+			else if (lod)
 				texop += "lod";
-			if (bias)
+			else if (bias)
 				texop += "bias";
+			else if (proj || dref)
+				texop += "proj";
 		}
 	}
 
@@ -2736,7 +3600,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 	{
 		string sampler_expr;
 		if (combined_image)
-			sampler_expr = to_expression(combined_image->sampler);
+			sampler_expr = to_non_uniform_aware_expression(combined_image->sampler);
 		else
 			sampler_expr = to_sampler_expression(img);
 		expr += sampler_expr;
@@ -2772,24 +3636,52 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 	if (proj && hlsl_options.shader_model >= 40) // Legacy HLSL has "proj" operations which do this for us.
 		coord_expr = coord_expr + " / " + to_extract_component_expression(coord, coord_components);
 
-	if (hlsl_options.shader_model < 40 && lod)
+	if (hlsl_options.shader_model < 40)
 	{
-		string coord_filler;
-		for (uint32_t size = coord_components; size < 3; ++size)
+		if (dref)
+		{
+			if (imgtype.image.dim != spv::Dim1D && imgtype.image.dim != spv::Dim2D)
+			{
+				SPIRV_CROSS_THROW(
+				    "Depth comparison is only supported for 1D and 2D textures in HLSL shader model 2/3.");
+			}
+
+			if (grad_x || grad_y)
+				SPIRV_CROSS_THROW("Depth comparison is not supported for grad sampling in HLSL shader model 2/3.");
+
+			for (uint32_t size = coord_components; size < 2; ++size)
+				coord_expr += ", 0.0";
+
+			forward = forward && should_forward(dref);
+			coord_expr += ", " + to_expression(dref);
+		}
+		else if (lod || bias || proj)
 		{
-			coord_filler += ", 0.0";
+			for (uint32_t size = coord_components; size < 3; ++size)
+				coord_expr += ", 0.0";
 		}
-		coord_expr = "float4(" + coord_expr + coord_filler + ", " + to_expression(lod) + ")";
-	}
 
-	if (hlsl_options.shader_model < 40 && bias)
-	{
-		string coord_filler;
-		for (uint32_t size = coord_components; size < 3; ++size)
+		if (lod)
 		{
-			coord_filler += ", 0.0";
+			coord_expr = "float4(" + coord_expr + ", " + to_expression(lod) + ")";
 		}
-		coord_expr = "float4(" + coord_expr + coord_filler + ", " + to_expression(bias) + ")";
+		else if (bias)
+		{
+			coord_expr = "float4(" + coord_expr + ", " + to_expression(bias) + ")";
+		}
+		else if (proj)
+		{
+			coord_expr = "float4(" + coord_expr + ", " + to_extract_component_expression(coord, coord_components) + ")";
+		}
+		else if (dref)
+		{
+			// A "normal" sample gets fed into tex2Dproj as well, because the
+			// regular tex2D accepts only two coordinates.
+			coord_expr = "float4(" + coord_expr + ", 1.0)";
+		}
+
+		if (!!lod + !!bias + !!proj > 1)
+			SPIRV_CROSS_THROW("Legacy HLSL can only use one of lod/bias/proj modifiers.");
 	}
 
 	if (op == OpImageFetch)
@@ -2802,11 +3694,8 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 		expr += ", ";
 	expr += coord_expr;
 
-	if (dref)
+	if (dref && hlsl_options.shader_model >= 40)
 	{
-		if (hlsl_options.shader_model < 40)
-			SPIRV_CROSS_THROW("Legacy HLSL does not support comparison sampling.");
-
 		forward = forward && should_forward(dref);
 		expr += ", ";
 
@@ -2861,6 +3750,9 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 
 	expr += ")";
 
+	if (dref && hlsl_options.shader_model < 40)
+		expr += ".x";
+
 	if (op == OpImageQueryLod)
 	{
 		// This is rather awkward.
@@ -2870,7 +3762,8 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 		// according to GLSL spec, and it depends on the sampler itself.
 		// Just assume X == Y, so we will need to splat the result to a float2.
 		statement("float _", id, "_tmp = ", expr, ";");
-		emit_op(result_type, id, join("float2(_", id, "_tmp, _", id, "_tmp)"), true, true);
+		statement("float2 _", id, " = _", id, "_tmp.xx;");
+		set<SPIRExpression>(id, join("_", id), result_type, true);
 	}
 	else
 	{
@@ -2886,7 +3779,6 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 	case OpImageSampleImplicitLod:
 	case OpImageSampleProjImplicitLod:
 	case OpImageSampleProjDrefImplicitLod:
-	case OpImageQueryLod:
 		register_control_dependent_expression(id);
 		break;
 
@@ -2897,30 +3789,52 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 
 string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
 {
-	// TODO: Basic implementation, might need special consideration for RW/RO structured buffers,
-	// RW/RO images, and so on.
+	const auto &type = get<SPIRType>(var.basetype);
 
-	if (!has_decoration(var.self, DecorationBinding))
+	// We can remap push constant blocks, even if they don't have any binding decoration.
+	if (type.storage != StorageClassPushConstant && !has_decoration(var.self, DecorationBinding))
 		return "";
 
-	const auto &type = get<SPIRType>(var.basetype);
 	char space = '\0';
 
+	HLSLBindingFlagBits resource_flags = HLSL_BINDING_AUTO_NONE_BIT;
+
 	switch (type.basetype)
 	{
 	case SPIRType::SampledImage:
 		space = 't'; // SRV
+		resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
 		break;
 
 	case SPIRType::Image:
 		if (type.image.sampled == 2 && type.image.dim != DimSubpassData)
-			space = 'u'; // UAV
+		{
+			if (has_decoration(var.self, DecorationNonWritable) && hlsl_options.nonwritable_uav_texture_as_srv)
+			{
+				space = 't'; // SRV
+				resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
+			}
+			else
+			{
+				space = 'u'; // UAV
+				resource_flags = HLSL_BINDING_AUTO_UAV_BIT;
+			}
+		}
 		else
+		{
 			space = 't'; // SRV
+			resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
+		}
 		break;
 
 	case SPIRType::Sampler:
 		space = 's';
+		resource_flags = HLSL_BINDING_AUTO_SAMPLER_BIT;
+		break;
+
+	case SPIRType::AccelerationStructure:
+		space = 't'; // SRV
+		resource_flags = HLSL_BINDING_AUTO_SRV_BIT;
 		break;
 
 	case SPIRType::Struct:
@@ -2931,20 +3845,28 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
 			if (has_decoration(type.self, DecorationBufferBlock))
 			{
 				Bitset flags = ir.get_buffer_block_flags(var);
-				bool is_readonly = flags.get(DecorationNonWritable);
+				bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self);
 				space = is_readonly ? 't' : 'u'; // UAV
+				resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT;
 			}
 			else if (has_decoration(type.self, DecorationBlock))
+			{
 				space = 'b'; // Constant buffers
+				resource_flags = HLSL_BINDING_AUTO_CBV_BIT;
+			}
 		}
 		else if (storage == StorageClassPushConstant)
+		{
 			space = 'b'; // Constant buffers
+			resource_flags = HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT;
+		}
 		else if (storage == StorageClassStorageBuffer)
 		{
 			// UAV or SRV depending on readonly flag.
 			Bitset flags = ir.get_buffer_block_flags(var);
-			bool is_readonly = flags.get(DecorationNonWritable);
+			bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self);
 			space = is_readonly ? 't' : 'u';
+			resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT;
 		}
 
 		break;
@@ -2956,8 +3878,16 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
 	if (!space)
 		return "";
 
-	return to_resource_register(space, get_decoration(var.self, DecorationBinding),
-	                            get_decoration(var.self, DecorationDescriptorSet));
+	uint32_t desc_set =
+	    resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantDescriptorSet : 0u;
+	uint32_t binding = resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantBinding : 0u;
+
+	if (has_decoration(var.self, DecorationBinding))
+		binding = get_decoration(var.self, DecorationBinding);
+	if (has_decoration(var.self, DecorationDescriptorSet))
+		desc_set = get_decoration(var.self, DecorationDescriptorSet);
+
+	return to_resource_register(resource_flags, space, binding, desc_set);
 }
 
 string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var)
@@ -2966,16 +3896,65 @@ string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var)
 	if (!has_decoration(var.self, DecorationBinding))
 		return "";
 
-	return to_resource_register('s', get_decoration(var.self, DecorationBinding),
+	return to_resource_register(HLSL_BINDING_AUTO_SAMPLER_BIT, 's', get_decoration(var.self, DecorationBinding),
 	                            get_decoration(var.self, DecorationDescriptorSet));
 }
 
-string CompilerHLSL::to_resource_register(char space, uint32_t binding, uint32_t space_set)
+void CompilerHLSL::remap_hlsl_resource_binding(HLSLBindingFlagBits type, uint32_t &desc_set, uint32_t &binding)
+{
+	auto itr = resource_bindings.find({ get_execution_model(), desc_set, binding });
+	if (itr != end(resource_bindings))
+	{
+		auto &remap = itr->second;
+		remap.second = true;
+
+		switch (type)
+		{
+		case HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT:
+		case HLSL_BINDING_AUTO_CBV_BIT:
+			desc_set = remap.first.cbv.register_space;
+			binding = remap.first.cbv.register_binding;
+			break;
+
+		case HLSL_BINDING_AUTO_SRV_BIT:
+			desc_set = remap.first.srv.register_space;
+			binding = remap.first.srv.register_binding;
+			break;
+
+		case HLSL_BINDING_AUTO_SAMPLER_BIT:
+			desc_set = remap.first.sampler.register_space;
+			binding = remap.first.sampler.register_binding;
+			break;
+
+		case HLSL_BINDING_AUTO_UAV_BIT:
+			desc_set = remap.first.uav.register_space;
+			binding = remap.first.uav.register_binding;
+			break;
+
+		default:
+			break;
+		}
+	}
+}
+
+string CompilerHLSL::to_resource_register(HLSLBindingFlagBits flag, char space, uint32_t binding, uint32_t space_set)
 {
-	if (hlsl_options.shader_model >= 51)
-		return join(" : register(", space, binding, ", space", space_set, ")");
+	if ((flag & resource_binding_flags) == 0)
+	{
+		remap_hlsl_resource_binding(flag, space_set, binding);
+
+		// The push constant block did not have a binding, and there were no remap for it,
+		// so, declare without register binding.
+		if (flag == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT && space_set == ResourceBindingPushConstantDescriptorSet)
+			return "";
+
+		if (hlsl_options.shader_model >= 51)
+			return join(" : register(", space, binding, ", space", space_set, ")");
+		else
+			return join(" : register(", space, binding, ")");
+	}
 	else
-		return join(" : register(", space, binding, ")");
+		return "";
 }
 
 void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var)
@@ -2996,7 +3975,7 @@ void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var)
 		if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
 		{
 			// For combined image samplers, also emit a combined image sampler.
-			if (image_is_comparison(type, var.self))
+			if (is_depth_image(type, var.self))
 				statement("SamplerComparisonState ", to_sampler_expression(var.self), type_to_array_glsl(type),
 				          to_resource_binding_sampler(var), ";");
 			else
@@ -3044,6 +4023,11 @@ void CompilerHLSL::emit_uniform(const SPIRVariable &var)
 		emit_legacy_uniform(var);
 }
 
+bool CompilerHLSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t)
+{
+	return false;
+}
+
 string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
 {
 	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
@@ -3077,7 +4061,7 @@ string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i
 			requires_explicit_fp16_packing = true;
 			force_recompile();
 		}
-		return "SPIRV_Cross_unpackFloat2x16";
+		return "spvUnpackFloat2x16";
 	}
 	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
 	{
@@ -3086,7 +4070,19 @@ string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i
 			requires_explicit_fp16_packing = true;
 			force_recompile();
 		}
-		return "SPIRV_Cross_packFloat2x16";
+		return "spvPackFloat2x16";
+	}
+	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
+	{
+		if (hlsl_options.shader_model < 40)
+			SPIRV_CROSS_THROW("Half to UShort requires Shader Model 4.");
+		return "(" + type_to_glsl(out_type) + ")f32tof16";
+	}
+	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
+	{
+		if (hlsl_options.shader_model < 40)
+			SPIRV_CROSS_THROW("UShort to Half requires Shader Model 4.");
+		return "(" + type_to_glsl(out_type) + ")f16tof32";
 	}
 	else
 		return "";
@@ -3101,6 +4097,8 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	auto int_type = to_signed_basetype(integer_width);
 	auto uint_type = to_unsigned_basetype(integer_width);
 
+	op = get_remapped_glsl_op(op);
+
 	switch (op)
 	{
 	case GLSLstd450InverseSqrt:
@@ -3112,7 +4110,10 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		break;
 
 	case GLSLstd450RoundEven:
-		SPIRV_CROSS_THROW("roundEven is not supported on HLSL.");
+		if (hlsl_options.shader_model < 40)
+			SPIRV_CROSS_THROW("roundEven is not supported in HLSL shader model 2/3.");
+		emit_unary_func_op(result_type, id, args[0], "round");
+		break;
 
 	case GLSLstd450Acosh:
 	case GLSLstd450Asinh:
@@ -3148,7 +4149,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 			requires_fp16_packing = true;
 			force_recompile();
 		}
-		emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packHalf2x16");
+		emit_unary_func_op(result_type, id, args[0], "spvPackHalf2x16");
 		break;
 
 	case GLSLstd450UnpackHalf2x16:
@@ -3157,7 +4158,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 			requires_fp16_packing = true;
 			force_recompile();
 		}
-		emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackHalf2x16");
+		emit_unary_func_op(result_type, id, args[0], "spvUnpackHalf2x16");
 		break;
 
 	case GLSLstd450PackSnorm4x8:
@@ -3166,7 +4167,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 			requires_snorm8_packing = true;
 			force_recompile();
 		}
-		emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packSnorm4x8");
+		emit_unary_func_op(result_type, id, args[0], "spvPackSnorm4x8");
 		break;
 
 	case GLSLstd450UnpackSnorm4x8:
@@ -3175,7 +4176,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 			requires_snorm8_packing = true;
 			force_recompile();
 		}
-		emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackSnorm4x8");
+		emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm4x8");
 		break;
 
 	case GLSLstd450PackUnorm4x8:
@@ -3184,7 +4185,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 			requires_unorm8_packing = true;
 			force_recompile();
 		}
-		emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packUnorm4x8");
+		emit_unary_func_op(result_type, id, args[0], "spvPackUnorm4x8");
 		break;
 
 	case GLSLstd450UnpackUnorm4x8:
@@ -3193,7 +4194,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 			requires_unorm8_packing = true;
 			force_recompile();
 		}
-		emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackUnorm4x8");
+		emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm4x8");
 		break;
 
 	case GLSLstd450PackSnorm2x16:
@@ -3202,7 +4203,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 			requires_snorm16_packing = true;
 			force_recompile();
 		}
-		emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packSnorm2x16");
+		emit_unary_func_op(result_type, id, args[0], "spvPackSnorm2x16");
 		break;
 
 	case GLSLstd450UnpackSnorm2x16:
@@ -3211,7 +4212,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 			requires_snorm16_packing = true;
 			force_recompile();
 		}
-		emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackSnorm2x16");
+		emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm2x16");
 		break;
 
 	case GLSLstd450PackUnorm2x16:
@@ -3220,7 +4221,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 			requires_unorm16_packing = true;
 			force_recompile();
 		}
-		emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packUnorm2x16");
+		emit_unary_func_op(result_type, id, args[0], "spvPackUnorm2x16");
 		break;
 
 	case GLSLstd450UnpackUnorm2x16:
@@ -3229,7 +4230,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 			requires_unorm16_packing = true;
 			force_recompile();
 		}
-		emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackUnorm2x16");
+		emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm2x16");
 		break;
 
 	case GLSLstd450PackDouble2x32:
@@ -3237,8 +4238,11 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		SPIRV_CROSS_THROW("packDouble2x32/unpackDouble2x32 not supported in HLSL.");
 
 	case GLSLstd450FindILsb:
-		emit_unary_func_op(result_type, id, args[0], "firstbitlow");
+	{
+		auto basetype = expression_type(args[0]).basetype;
+		emit_unary_func_op_cast(result_type, id, args[0], "firstbitlow", basetype, basetype);
 		break;
+	}
 
 	case GLSLstd450FindSMsb:
 		emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", int_type, int_type);
@@ -3275,7 +4279,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 				force_recompile();
 			}
 		}
-		emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_Inverse");
+		emit_unary_func_op(result_type, id, args[0], "spvInverse");
 		break;
 	}
 
@@ -3298,7 +4302,7 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 				requires_scalar_reflect = true;
 				force_recompile();
 			}
-			emit_binary_func_op(result_type, id, args[0], args[1], "SPIRV_Cross_Reflect");
+			emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect");
 		}
 		else
 			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
@@ -3312,7 +4316,21 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 				requires_scalar_refract = true;
 				force_recompile();
 			}
-			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_Refract");
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450FaceForward:
+		if (get<SPIRType>(result_type).vecsize == 1)
+		{
+			if (!requires_scalar_faceforward)
+			{
+				requires_scalar_faceforward = true;
+				force_recompile();
+			}
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward");
 		}
 		else
 			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
@@ -3324,7 +4342,57 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	}
 }
 
-string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain)
+void CompilerHLSL::read_access_chain_array(const string &lhs, const SPIRAccessChain &chain)
+{
+	auto &type = get<SPIRType>(chain.basetype);
+
+	// Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops.
+	auto ident = get_unique_identifier();
+
+	statement("[unroll]");
+	statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ",
+	          ident, "++)");
+	begin_scope();
+	auto subchain = chain;
+	subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index);
+	subchain.basetype = type.parent_type;
+	if (!get<SPIRType>(subchain.basetype).array.empty())
+		subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride);
+	read_access_chain(nullptr, join(lhs, "[", ident, "]"), subchain);
+	end_scope();
+}
+
+void CompilerHLSL::read_access_chain_struct(const string &lhs, const SPIRAccessChain &chain)
+{
+	auto &type = get<SPIRType>(chain.basetype);
+	auto subchain = chain;
+	uint32_t member_count = uint32_t(type.member_types.size());
+
+	for (uint32_t i = 0; i < member_count; i++)
+	{
+		uint32_t offset = type_struct_member_offset(type, i);
+		subchain.static_index = chain.static_index + offset;
+		subchain.basetype = type.member_types[i];
+
+		subchain.matrix_stride = 0;
+		subchain.array_stride = 0;
+		subchain.row_major_matrix = false;
+
+		auto &member_type = get<SPIRType>(subchain.basetype);
+		if (member_type.columns > 1)
+		{
+			subchain.matrix_stride = type_struct_member_matrix_stride(type, i);
+			subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor);
+		}
+
+		if (!member_type.array.empty())
+			subchain.array_stride = type_struct_member_array_stride(type, i);
+
+		read_access_chain(nullptr, join(lhs, ".", to_member_name(type, i)), subchain);
+	}
+}
+
+void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIRAccessChain &chain)
 {
 	auto &type = get<SPIRType>(chain.basetype);
 
@@ -3333,17 +4401,31 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain)
 	target_type.vecsize = type.vecsize;
 	target_type.columns = type.columns;
 
-	if (type.basetype == SPIRType::Struct)
-		SPIRV_CROSS_THROW("Reading structs from ByteAddressBuffer not yet supported.");
-
-	if (type.width != 32)
-		SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported.");
-
 	if (!type.array.empty())
-		SPIRV_CROSS_THROW("Reading arrays from ByteAddressBuffer not yet supported.");
+	{
+		read_access_chain_array(lhs, chain);
+		return;
+	}
+	else if (type.basetype == SPIRType::Struct)
+	{
+		read_access_chain_struct(lhs, chain);
+		return;
+	}
+	else if (type.width != 32 && !hlsl_options.enable_16bit_types)
+		SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported, unless SM 6.2 and "
+		                  "native 16-bit types are enabled.");
 
+	string base = chain.base;
+	if (has_decoration(chain.self, DecorationNonUniform))
+		convert_non_uniform_expression(base, chain.self);
+
+	bool templated_load = hlsl_options.shader_model >= 62;
 	string load_expr;
 
+	string template_expr;
+	if (templated_load)
+		template_expr = join("<", type_to_glsl(type), ">");
+
 	// Load a vector or scalar.
 	if (type.columns == 1 && !chain.row_major_matrix)
 	{
@@ -3366,12 +4448,24 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain)
 			SPIRV_CROSS_THROW("Unknown vector size.");
 		}
 
-		load_expr = join(chain.base, ".", load_op, "(", chain.dynamic_index, chain.static_index, ")");
+		if (templated_load)
+			load_op = "Load";
+
+		load_expr = join(base, ".", load_op, template_expr, "(", chain.dynamic_index, chain.static_index, ")");
 	}
 	else if (type.columns == 1)
 	{
 		// Strided load since we are loading a column from a row-major matrix.
-		if (type.vecsize > 1)
+		if (templated_load)
+		{
+			auto scalar_type = type;
+			scalar_type.vecsize = 1;
+			scalar_type.columns = 1;
+			template_expr = join("<", type_to_glsl(scalar_type), ">");
+			if (type.vecsize > 1)
+				load_expr += type_to_glsl(type) + "(";
+		}
+		else if (type.vecsize > 1)
 		{
 			load_expr = type_to_glsl(target_type);
 			load_expr += "(";
@@ -3379,8 +4473,8 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain)
 
 		for (uint32_t r = 0; r < type.vecsize; r++)
 		{
-			load_expr +=
-			    join(chain.base, ".Load(", chain.dynamic_index, chain.static_index + r * chain.matrix_stride, ")");
+			load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index,
+			                  chain.static_index + r * chain.matrix_stride, ")");
 			if (r + 1 < type.vecsize)
 				load_expr += ", ";
 		}
@@ -3410,13 +4504,25 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain)
 			SPIRV_CROSS_THROW("Unknown vector size.");
 		}
 
-		// Note, this loading style in HLSL is *actually* row-major, but we always treat matrices as transposed in this backend,
-		// so row-major is technically column-major ...
-		load_expr = type_to_glsl(target_type);
+		if (templated_load)
+		{
+			auto vector_type = type;
+			vector_type.columns = 1;
+			template_expr = join("<", type_to_glsl(vector_type), ">");
+			load_expr = type_to_glsl(type);
+			load_op = "Load";
+		}
+		else
+		{
+			// Note, this loading style in HLSL is *actually* row-major, but we always treat matrices as transposed in this backend,
+			// so row-major is technically column-major ...
+			load_expr = type_to_glsl(target_type);
+		}
 		load_expr += "(";
+
 		for (uint32_t c = 0; c < type.columns; c++)
 		{
-			load_expr += join(chain.base, ".", load_op, "(", chain.dynamic_index,
+			load_expr += join(base, ".", load_op, template_expr, "(", chain.dynamic_index,
 			                  chain.static_index + c * chain.matrix_stride, ")");
 			if (c + 1 < type.columns)
 				load_expr += ", ";
@@ -3428,13 +4534,24 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain)
 		// Pick out elements one by one ... Hopefully compilers are smart enough to recognize this pattern
 		// considering HLSL is "row-major decl", but "column-major" memory layout (basically implicit transpose model, ugh) ...
 
-		load_expr = type_to_glsl(target_type);
+		if (templated_load)
+		{
+			load_expr = type_to_glsl(type);
+			auto scalar_type = type;
+			scalar_type.vecsize = 1;
+			scalar_type.columns = 1;
+			template_expr = join("<", type_to_glsl(scalar_type), ">");
+		}
+		else
+			load_expr = type_to_glsl(target_type);
+
 		load_expr += "(";
+
 		for (uint32_t c = 0; c < type.columns; c++)
 		{
 			for (uint32_t r = 0; r < type.vecsize; r++)
 			{
-				load_expr += join(chain.base, ".Load(", chain.dynamic_index,
+				load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index,
 				                  chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ")");
 
 				if ((r + 1 < type.vecsize) || (c + 1 < type.columns))
@@ -3444,11 +4561,20 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain)
 		load_expr += ")";
 	}
 
-	auto bitcast_op = bitcast_glsl_op(type, target_type);
-	if (!bitcast_op.empty())
-		load_expr = join(bitcast_op, "(", load_expr, ")");
+	if (!templated_load)
+	{
+		auto bitcast_op = bitcast_glsl_op(type, target_type);
+		if (!bitcast_op.empty())
+			load_expr = join(bitcast_op, "(", load_expr, ")");
+	}
 
-	return load_expr;
+	if (lhs.empty())
+	{
+		assert(expr);
+		*expr = std::move(load_expr);
+	}
+	else
+		statement(lhs, " = ", load_expr, ";");
 }
 
 void CompilerHLSL::emit_load(const Instruction &instruction)
@@ -3462,56 +4588,174 @@ void CompilerHLSL::emit_load(const Instruction &instruction)
 		uint32_t id = ops[1];
 		uint32_t ptr = ops[2];
 
-		if (has_decoration(ptr, DecorationNonUniformEXT))
-			propagate_nonuniform_qualifier(ptr);
+		auto &type = get<SPIRType>(result_type);
+		bool composite_load = !type.array.empty() || type.basetype == SPIRType::Struct;
+
+		if (composite_load)
+		{
+			// We cannot make this work in one single expression as we might have nested structures and arrays,
+			// so unroll the load to an uninitialized temporary.
+			emit_uninitialized_temporary_expression(result_type, id);
+			read_access_chain(nullptr, to_expression(id), *chain);
+			track_expression_read(chain->self);
+		}
+		else
+		{
+			string load_expr;
+			read_access_chain(&load_expr, "", *chain);
+
+			bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
+
+			// If we are forwarding this load,
+			// don't register the read to access chain here, defer that to when we actually use the expression,
+			// using the add_implied_read_expression mechanism.
+			if (!forward)
+				track_expression_read(chain->self);
+
+			// Do not forward complex load sequences like matrices, structs and arrays.
+			if (type.columns > 1)
+				forward = false;
+
+			auto &e = emit_op(result_type, id, load_expr, forward, true);
+			e.need_transpose = false;
+			register_read(id, ptr, forward);
+			inherit_expression_dependencies(id, ptr);
+			if (forward)
+				add_implied_read_expression(e, chain->self);
+		}
+	}
+	else
+		CompilerGLSL::emit_instruction(instruction);
+}
+
+void CompilerHLSL::write_access_chain_array(const SPIRAccessChain &chain, uint32_t value,
+                                            const SmallVector<uint32_t> &composite_chain)
+{
+	auto &type = get<SPIRType>(chain.basetype);
 
-		auto load_expr = read_access_chain(*chain);
+	// Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops.
+	auto ident = get_unique_identifier();
+
+	uint32_t id = ir.increase_bound_by(2);
+	uint32_t int_type_id = id + 1;
+	SPIRType int_type;
+	int_type.basetype = SPIRType::Int;
+	int_type.width = 32;
+	set<SPIRType>(int_type_id, int_type);
+	set<SPIRExpression>(id, ident, int_type_id, true);
+	set_name(id, ident);
+	suppressed_usage_tracking.insert(id);
+
+	statement("[unroll]");
+	statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ",
+	          ident, "++)");
+	begin_scope();
+	auto subchain = chain;
+	subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index);
+	subchain.basetype = type.parent_type;
 
-		bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
+	// Forcefully allow us to use an ID here by setting MSB.
+	auto subcomposite_chain = composite_chain;
+	subcomposite_chain.push_back(0x80000000u | id);
 
-		// If we are forwarding this load,
-		// don't register the read to access chain here, defer that to when we actually use the expression,
-		// using the add_implied_read_expression mechanism.
-		if (!forward)
-			track_expression_read(chain->self);
+	if (!get<SPIRType>(subchain.basetype).array.empty())
+		subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride);
 
-		// Do not forward complex load sequences like matrices, structs and arrays.
-		auto &type = get<SPIRType>(result_type);
-		if (type.columns > 1 || !type.array.empty() || type.basetype == SPIRType::Struct)
-			forward = false;
+	write_access_chain(subchain, value, subcomposite_chain);
+	end_scope();
+}
 
-		auto &e = emit_op(result_type, id, load_expr, forward, true);
-		e.need_transpose = false;
-		register_read(id, ptr, forward);
-		inherit_expression_dependencies(id, ptr);
-		if (forward)
-			add_implied_read_expression(e, chain->self);
+void CompilerHLSL::write_access_chain_struct(const SPIRAccessChain &chain, uint32_t value,
+                                             const SmallVector<uint32_t> &composite_chain)
+{
+	auto &type = get<SPIRType>(chain.basetype);
+	uint32_t member_count = uint32_t(type.member_types.size());
+	auto subchain = chain;
+
+	auto subcomposite_chain = composite_chain;
+	subcomposite_chain.push_back(0);
+
+	for (uint32_t i = 0; i < member_count; i++)
+	{
+		uint32_t offset = type_struct_member_offset(type, i);
+		subchain.static_index = chain.static_index + offset;
+		subchain.basetype = type.member_types[i];
+
+		subchain.matrix_stride = 0;
+		subchain.array_stride = 0;
+		subchain.row_major_matrix = false;
+
+		auto &member_type = get<SPIRType>(subchain.basetype);
+		if (member_type.columns > 1)
+		{
+			subchain.matrix_stride = type_struct_member_matrix_stride(type, i);
+			subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor);
+		}
+
+		if (!member_type.array.empty())
+			subchain.array_stride = type_struct_member_array_stride(type, i);
+
+		subcomposite_chain.back() = i;
+		write_access_chain(subchain, value, subcomposite_chain);
 	}
+}
+
+string CompilerHLSL::write_access_chain_value(uint32_t value, const SmallVector<uint32_t> &composite_chain,
+                                              bool enclose)
+{
+	string ret;
+	if (composite_chain.empty())
+		ret = to_expression(value);
 	else
-		CompilerGLSL::emit_instruction(instruction);
+	{
+		AccessChainMeta meta;
+		ret = access_chain_internal(value, composite_chain.data(), uint32_t(composite_chain.size()),
+		                            ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_LITERAL_MSB_FORCE_ID, &meta);
+	}
+
+	if (enclose)
+		ret = enclose_expression(ret);
+	return ret;
 }
 
-void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t value)
+void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t value,
+                                      const SmallVector<uint32_t> &composite_chain)
 {
 	auto &type = get<SPIRType>(chain.basetype);
 
 	// Make sure we trigger a read of the constituents in the access chain.
 	track_expression_read(chain.self);
 
-	if (has_decoration(chain.self, DecorationNonUniformEXT))
-		propagate_nonuniform_qualifier(chain.self);
-
 	SPIRType target_type;
 	target_type.basetype = SPIRType::UInt;
 	target_type.vecsize = type.vecsize;
 	target_type.columns = type.columns;
 
-	if (type.basetype == SPIRType::Struct)
-		SPIRV_CROSS_THROW("Writing structs to RWByteAddressBuffer not yet supported.");
-	if (type.width != 32)
-		SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported.");
 	if (!type.array.empty())
-		SPIRV_CROSS_THROW("Reading arrays from ByteAddressBuffer not yet supported.");
+	{
+		write_access_chain_array(chain, value, composite_chain);
+		register_write(chain.self);
+		return;
+	}
+	else if (type.basetype == SPIRType::Struct)
+	{
+		write_access_chain_struct(chain, value, composite_chain);
+		register_write(chain.self);
+		return;
+	}
+	else if (type.width != 32 && !hlsl_options.enable_16bit_types)
+		SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported, unless SM 6.2 and "
+		                  "native 16-bit types are enabled.");
+
+	bool templated_store = hlsl_options.shader_model >= 62;
+
+	auto base = chain.base;
+	if (has_decoration(chain.self, DecorationNonUniform))
+		convert_non_uniform_expression(base, chain.self);
+
+	string template_expr;
+	if (templated_store)
+		template_expr = join("<", type_to_glsl(type), ">");
 
 	if (type.columns == 1 && !chain.row_major_matrix)
 	{
@@ -3534,18 +4778,33 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 			SPIRV_CROSS_THROW("Unknown vector size.");
 		}
 
-		auto store_expr = to_expression(value);
-		auto bitcast_op = bitcast_glsl_op(target_type, type);
-		if (!bitcast_op.empty())
-			store_expr = join(bitcast_op, "(", store_expr, ")");
-		statement(chain.base, ".", store_op, "(", chain.dynamic_index, chain.static_index, ", ", store_expr, ");");
+		auto store_expr = write_access_chain_value(value, composite_chain, false);
+
+		if (!templated_store)
+		{
+			auto bitcast_op = bitcast_glsl_op(target_type, type);
+			if (!bitcast_op.empty())
+				store_expr = join(bitcast_op, "(", store_expr, ")");
+		}
+		else
+			store_op = "Store";
+		statement(base, ".", store_op, template_expr, "(", chain.dynamic_index, chain.static_index, ", ",
+		          store_expr, ");");
 	}
 	else if (type.columns == 1)
 	{
+		if (templated_store)
+		{
+			auto scalar_type = type;
+			scalar_type.vecsize = 1;
+			scalar_type.columns = 1;
+			template_expr = join("<", type_to_glsl(scalar_type), ">");
+		}
+
 		// Strided store.
 		for (uint32_t r = 0; r < type.vecsize; r++)
 		{
-			auto store_expr = to_enclosed_expression(value);
+			auto store_expr = write_access_chain_value(value, composite_chain, true);
 			if (type.vecsize > 1)
 			{
 				store_expr += ".";
@@ -3553,11 +4812,15 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 			}
 			remove_duplicate_swizzle(store_expr);
 
-			auto bitcast_op = bitcast_glsl_op(target_type, type);
-			if (!bitcast_op.empty())
-				store_expr = join(bitcast_op, "(", store_expr, ")");
-			statement(chain.base, ".Store(", chain.dynamic_index, chain.static_index + chain.matrix_stride * r, ", ",
-			          store_expr, ");");
+			if (!templated_store)
+			{
+				auto bitcast_op = bitcast_glsl_op(target_type, type);
+				if (!bitcast_op.empty())
+					store_expr = join(bitcast_op, "(", store_expr, ")");
+			}
+
+			statement(base, ".Store", template_expr, "(", chain.dynamic_index,
+			          chain.static_index + chain.matrix_stride * r, ", ", store_expr, ");");
 		}
 	}
 	else if (!chain.row_major_matrix)
@@ -3581,28 +4844,50 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 			SPIRV_CROSS_THROW("Unknown vector size.");
 		}
 
+		if (templated_store)
+		{
+			store_op = "Store";
+			auto vector_type = type;
+			vector_type.columns = 1;
+			template_expr = join("<", type_to_glsl(vector_type), ">");
+		}
+
 		for (uint32_t c = 0; c < type.columns; c++)
 		{
-			auto store_expr = join(to_enclosed_expression(value), "[", c, "]");
-			auto bitcast_op = bitcast_glsl_op(target_type, type);
-			if (!bitcast_op.empty())
-				store_expr = join(bitcast_op, "(", store_expr, ")");
-			statement(chain.base, ".", store_op, "(", chain.dynamic_index, chain.static_index + c * chain.matrix_stride,
-			          ", ", store_expr, ");");
+			auto store_expr = join(write_access_chain_value(value, composite_chain, true), "[", c, "]");
+
+			if (!templated_store)
+			{
+				auto bitcast_op = bitcast_glsl_op(target_type, type);
+				if (!bitcast_op.empty())
+					store_expr = join(bitcast_op, "(", store_expr, ")");
+			}
+
+			statement(base, ".", store_op, template_expr, "(", chain.dynamic_index,
+			          chain.static_index + c * chain.matrix_stride, ", ", store_expr, ");");
 		}
 	}
 	else
 	{
+		if (templated_store)
+		{
+			auto scalar_type = type;
+			scalar_type.vecsize = 1;
+			scalar_type.columns = 1;
+			template_expr = join("<", type_to_glsl(scalar_type), ">");
+		}
+
 		for (uint32_t r = 0; r < type.vecsize; r++)
 		{
 			for (uint32_t c = 0; c < type.columns; c++)
 			{
-				auto store_expr = join(to_enclosed_expression(value), "[", c, "].", index_to_swizzle(r));
+				auto store_expr =
+				    join(write_access_chain_value(value, composite_chain, true), "[", c, "].", index_to_swizzle(r));
 				remove_duplicate_swizzle(store_expr);
 				auto bitcast_op = bitcast_glsl_op(target_type, type);
 				if (!bitcast_op.empty())
 					store_expr = join(bitcast_op, "(", store_expr, ")");
-				statement(chain.base, ".Store(", chain.dynamic_index,
+				statement(base, ".Store", template_expr, "(", chain.dynamic_index,
 				          chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ", ", store_expr, ");");
 			}
 		}
@@ -3616,7 +4901,7 @@ void CompilerHLSL::emit_store(const Instruction &instruction)
 	auto ops = stream(instruction);
 	auto *chain = maybe_get<SPIRAccessChain>(ops[0]);
 	if (chain)
-		write_access_chain(*chain, ops[1]);
+		write_access_chain(*chain, ops[1], {});
 	else
 		CompilerGLSL::emit_instruction(instruction);
 }
@@ -3646,7 +4931,10 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction)
 
 	if (need_byte_access_chain)
 	{
-		uint32_t to_plain_buffer_length = static_cast<uint32_t>(type.array.size());
+		// If we have a chain variable, we are already inside the SSBO, and any array type will refer to arrays within a block,
+		// and not array of SSBO.
+		uint32_t to_plain_buffer_length = chain ? 0u : static_cast<uint32_t>(type.array.size());
+
 		auto *backing_variable = maybe_get_backing_variable(ops[2]);
 
 		string base;
@@ -3668,6 +4956,7 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction)
 		}
 
 		uint32_t matrix_stride = 0;
+		uint32_t array_stride = 0;
 		bool row_major_matrix = false;
 
 		// Inherit matrix information.
@@ -3675,17 +4964,19 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction)
 		{
 			matrix_stride = chain->matrix_stride;
 			row_major_matrix = chain->row_major_matrix;
+			array_stride = chain->array_stride;
 		}
 
-		auto offsets =
-		    flattened_access_chain_offset(*basetype, &ops[3 + to_plain_buffer_length],
-		                                  length - 3 - to_plain_buffer_length, 0, 1, &row_major_matrix, &matrix_stride);
+		auto offsets = flattened_access_chain_offset(*basetype, &ops[3 + to_plain_buffer_length],
+		                                             length - 3 - to_plain_buffer_length, 0, 1, &row_major_matrix,
+		                                             &matrix_stride, &array_stride);
 
 		auto &e = set<SPIRAccessChain>(ops[1], ops[0], type.storage, base, offsets.first, offsets.second);
 		e.row_major_matrix = row_major_matrix;
 		e.matrix_stride = matrix_stride;
+		e.array_stride = array_stride;
 		e.immutable = should_forward(ops[2]);
-		e.loaded_from = backing_variable ? backing_variable->self : 0;
+		e.loaded_from = backing_variable ? backing_variable->self : ID(0);
 
 		if (chain)
 		{
@@ -3710,9 +5001,11 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
 	const char *atomic_op = nullptr;
 
 	string value_expr;
-	if (op != OpAtomicIDecrement && op != OpAtomicIIncrement)
+	if (op != OpAtomicIDecrement && op != OpAtomicIIncrement && op != OpAtomicLoad && op != OpAtomicStore)
 		value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]);
 
+	bool is_atomic_store = false;
+
 	switch (op)
 	{
 	case OpAtomicIIncrement:
@@ -3725,6 +5018,11 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
 		value_expr = "-1";
 		break;
 
+	case OpAtomicLoad:
+		atomic_op = "InterlockedAdd";
+		value_expr = "0";
+		break;
+
 	case OpAtomicISub:
 		atomic_op = "InterlockedAdd";
 		value_expr = join("-", enclose_expression(value_expr));
@@ -3760,6 +5058,11 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
 		atomic_op = "InterlockedExchange";
 		break;
 
+	case OpAtomicStore:
+		atomic_op = "InterlockedExchange";
+		is_atomic_store = true;
+		break;
+
 	case OpAtomicCompareExchange:
 		if (length < 8)
 			SPIRV_CROSS_THROW("Not enough data for opcode.");
@@ -3771,31 +5074,64 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
 		SPIRV_CROSS_THROW("Unknown atomic opcode.");
 	}
 
-	uint32_t result_type = ops[0];
-	uint32_t id = ops[1];
-	forced_temporaries.insert(ops[1]);
+	if (is_atomic_store)
+	{
+		auto &data_type = expression_type(ops[0]);
+		auto *chain = maybe_get<SPIRAccessChain>(ops[0]);
 
-	auto &type = get<SPIRType>(result_type);
-	statement(variable_decl(type, to_name(id)), ";");
+		auto &tmp_id = extra_sub_expressions[ops[0]];
+		if (!tmp_id)
+		{
+			tmp_id = ir.increase_bound_by(1);
+			emit_uninitialized_temporary_expression(get_pointee_type(data_type).self, tmp_id);
+		}
 
-	auto &data_type = expression_type(ops[2]);
-	auto *chain = maybe_get<SPIRAccessChain>(ops[2]);
-	SPIRType::BaseType expr_type;
-	if (data_type.storage == StorageClassImage || !chain)
-	{
-		statement(atomic_op, "(", to_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");");
-		expr_type = data_type.basetype;
+		if (data_type.storage == StorageClassImage || !chain)
+		{
+			statement(atomic_op, "(", to_non_uniform_aware_expression(ops[0]), ", ",
+			          to_expression(ops[3]), ", ", to_expression(tmp_id), ");");
+		}
+		else
+		{
+			string base = chain->base;
+			if (has_decoration(chain->self, DecorationNonUniform))
+				convert_non_uniform_expression(base, chain->self);
+			// RWByteAddress buffer is always uint in its underlying type.
+			statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ",
+			          to_expression(ops[3]), ", ", to_expression(tmp_id), ");");
+		}
 	}
 	else
 	{
-		// RWByteAddress buffer is always uint in its underlying type.
-		expr_type = SPIRType::UInt;
-		statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, ", ",
-		          to_name(id), ");");
-	}
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		forced_temporaries.insert(ops[1]);
+
+		auto &type = get<SPIRType>(result_type);
+		statement(variable_decl(type, to_name(id)), ";");
+
+		auto &data_type = expression_type(ops[2]);
+		auto *chain = maybe_get<SPIRAccessChain>(ops[2]);
+		SPIRType::BaseType expr_type;
+		if (data_type.storage == StorageClassImage || !chain)
+		{
+			statement(atomic_op, "(", to_non_uniform_aware_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");");
+			expr_type = data_type.basetype;
+		}
+		else
+		{
+			// RWByteAddress buffer is always uint in its underlying type.
+			string base = chain->base;
+			if (has_decoration(chain->self, DecorationNonUniform))
+				convert_non_uniform_expression(base, chain->self);
+			expr_type = SPIRType::UInt;
+			statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr,
+			          ", ", to_name(id), ");");
+		}
 
-	auto expr = bitcast_expression(type, expr_type, to_name(id));
-	set<SPIRExpression>(id, expr, result_type, true);
+		auto expr = bitcast_expression(type, expr_type, to_name(id));
+		set<SPIRExpression>(id, expr, result_type, true);
+	}
 	flush_all_atomic_capable_variables();
 }
 
@@ -3810,7 +5146,7 @@ void CompilerHLSL::emit_subgroup_op(const Instruction &i)
 	uint32_t result_type = ops[0];
 	uint32_t id = ops[1];
 
-	auto scope = static_cast<Scope>(get<SPIRConstant>(ops[2]).scalar());
+	auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
 	if (scope != ScopeSubgroup)
 		SPIRV_CROSS_THROW("Only subgroup scope is supported.");
 
@@ -3822,6 +5158,11 @@ void CompilerHLSL::emit_subgroup_op(const Instruction &i)
 		return join(expr, " * ", to_expression(ops[4]));
 	};
 
+	// If we need to do implicit bitcasts, make sure we do it with the correct type.
+	uint32_t integer_width = get_integer_width_for_instruction(i);
+	auto int_type = to_signed_basetype(integer_width);
+	auto uint_type = to_unsigned_basetype(integer_width);
+
 #define make_inclusive_BitAnd(expr) ""
 #define make_inclusive_BitOr(expr) ""
 #define make_inclusive_BitXor(expr) ""
@@ -3848,26 +5189,22 @@ void CompilerHLSL::emit_subgroup_op(const Instruction &i)
 
 	case OpGroupNonUniformInverseBallot:
 		SPIRV_CROSS_THROW("Cannot trivially implement InverseBallot in HLSL.");
-		break;
 
 	case OpGroupNonUniformBallotBitExtract:
 		SPIRV_CROSS_THROW("Cannot trivially implement BallotBitExtract in HLSL.");
-		break;
 
 	case OpGroupNonUniformBallotFindLSB:
 		SPIRV_CROSS_THROW("Cannot trivially implement BallotFindLSB in HLSL.");
-		break;
 
 	case OpGroupNonUniformBallotFindMSB:
 		SPIRV_CROSS_THROW("Cannot trivially implement BallotFindMSB in HLSL.");
-		break;
 
 	case OpGroupNonUniformBallotBitCount:
 	{
 		auto operation = static_cast<GroupOperation>(ops[3]);
+		bool forward = should_forward(ops[4]);
 		if (operation == GroupOperationReduce)
 		{
-			bool forward = should_forward(ops[4]);
 			auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x) + countbits(",
 			                 to_enclosed_expression(ops[4]), ".y)");
 			auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z) + countbits(",
@@ -3876,22 +5213,66 @@ void CompilerHLSL::emit_subgroup_op(const Instruction &i)
 			inherit_expression_dependencies(id, ops[4]);
 		}
 		else if (operation == GroupOperationInclusiveScan)
-			SPIRV_CROSS_THROW("Cannot trivially implement BallotBitCount Inclusive Scan in HLSL.");
+		{
+			auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x & gl_SubgroupLeMask.x) + countbits(",
+			                 to_enclosed_expression(ops[4]), ".y & gl_SubgroupLeMask.y)");
+			auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z & gl_SubgroupLeMask.z) + countbits(",
+			                  to_enclosed_expression(ops[4]), ".w & gl_SubgroupLeMask.w)");
+			emit_op(result_type, id, join(left, " + ", right), forward);
+			if (!active_input_builtins.get(BuiltInSubgroupLeMask))
+			{
+				active_input_builtins.set(BuiltInSubgroupLeMask);
+				force_recompile_guarantee_forward_progress();
+			}
+		}
 		else if (operation == GroupOperationExclusiveScan)
-			SPIRV_CROSS_THROW("Cannot trivially implement BallotBitCount Exclusive Scan in HLSL.");
+		{
+			auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x & gl_SubgroupLtMask.x) + countbits(",
+			                 to_enclosed_expression(ops[4]), ".y & gl_SubgroupLtMask.y)");
+			auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z & gl_SubgroupLtMask.z) + countbits(",
+			                  to_enclosed_expression(ops[4]), ".w & gl_SubgroupLtMask.w)");
+			emit_op(result_type, id, join(left, " + ", right), forward);
+			if (!active_input_builtins.get(BuiltInSubgroupLtMask))
+			{
+				active_input_builtins.set(BuiltInSubgroupLtMask);
+				force_recompile_guarantee_forward_progress();
+			}
+		}
 		else
 			SPIRV_CROSS_THROW("Invalid BitCount operation.");
 		break;
 	}
 
 	case OpGroupNonUniformShuffle:
-		SPIRV_CROSS_THROW("Cannot trivially implement Shuffle in HLSL.");
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt");
+		break;
 	case OpGroupNonUniformShuffleXor:
-		SPIRV_CROSS_THROW("Cannot trivially implement ShuffleXor in HLSL.");
+	{
+		bool forward = should_forward(ops[3]);
+		emit_op(ops[0], ops[1],
+		        join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ",
+		             "WaveGetLaneIndex() ^ ", to_enclosed_expression(ops[4]), ")"), forward);
+		inherit_expression_dependencies(ops[1], ops[3]);
+		break;
+	}
 	case OpGroupNonUniformShuffleUp:
-		SPIRV_CROSS_THROW("Cannot trivially implement ShuffleUp in HLSL.");
+	{
+		bool forward = should_forward(ops[3]);
+		emit_op(ops[0], ops[1],
+		        join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ",
+		             "WaveGetLaneIndex() - ", to_enclosed_expression(ops[4]), ")"), forward);
+		inherit_expression_dependencies(ops[1], ops[3]);
+		break;
+	}
 	case OpGroupNonUniformShuffleDown:
-		SPIRV_CROSS_THROW("Cannot trivially implement ShuffleDown in HLSL.");
+	{
+		bool forward = should_forward(ops[3]);
+		emit_op(ops[0], ops[1],
+		        join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ",
+		             "WaveGetLaneIndex() + ", to_enclosed_expression(ops[4]), ")"), forward);
+		inherit_expression_dependencies(ops[1], ops[3]);
+		break;
+	}
 
 	case OpGroupNonUniformAll:
 		emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllTrue");
@@ -3902,12 +5283,8 @@ void CompilerHLSL::emit_subgroup_op(const Instruction &i)
 		break;
 
 	case OpGroupNonUniformAllEqual:
-	{
-		auto &type = get<SPIRType>(result_type);
-		emit_unary_func_op(result_type, id, ops[3],
-		                   type.basetype == SPIRType::Boolean ? "WaveActiveAllEqualBool" : "WaveActiveAllEqual");
+		emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllEqual");
 		break;
-	}
 
 	// clang-format off
 #define HLSL_GROUP_OP(op, hlsl_op, supports_scan) \
@@ -3930,25 +5307,42 @@ case OpGroupNonUniform##op: \
 			SPIRV_CROSS_THROW("Invalid group operation."); \
 		break; \
 	}
+
+#define HLSL_GROUP_OP_CAST(op, hlsl_op, type) \
+case OpGroupNonUniform##op: \
+	{ \
+		auto operation = static_cast<GroupOperation>(ops[3]); \
+		if (operation == GroupOperationReduce) \
+			emit_unary_func_op_cast(result_type, id, ops[4], "WaveActive" #hlsl_op, type, type); \
+		else \
+			SPIRV_CROSS_THROW("Invalid group operation."); \
+		break; \
+	}
+
 	HLSL_GROUP_OP(FAdd, Sum, true)
 	HLSL_GROUP_OP(FMul, Product, true)
 	HLSL_GROUP_OP(FMin, Min, false)
 	HLSL_GROUP_OP(FMax, Max, false)
 	HLSL_GROUP_OP(IAdd, Sum, true)
 	HLSL_GROUP_OP(IMul, Product, true)
-	HLSL_GROUP_OP(SMin, Min, false)
-	HLSL_GROUP_OP(SMax, Max, false)
-	HLSL_GROUP_OP(UMin, Min, false)
-	HLSL_GROUP_OP(UMax, Max, false)
+	HLSL_GROUP_OP_CAST(SMin, Min, int_type)
+	HLSL_GROUP_OP_CAST(SMax, Max, int_type)
+	HLSL_GROUP_OP_CAST(UMin, Min, uint_type)
+	HLSL_GROUP_OP_CAST(UMax, Max, uint_type)
 	HLSL_GROUP_OP(BitwiseAnd, BitAnd, false)
 	HLSL_GROUP_OP(BitwiseOr, BitOr, false)
 	HLSL_GROUP_OP(BitwiseXor, BitXor, false)
+	HLSL_GROUP_OP_CAST(LogicalAnd, BitAnd, uint_type)
+	HLSL_GROUP_OP_CAST(LogicalOr, BitOr, uint_type)
+	HLSL_GROUP_OP_CAST(LogicalXor, BitXor, uint_type)
+
 #undef HLSL_GROUP_OP
+#undef HLSL_GROUP_OP_CAST
 		// clang-format on
 
 	case OpGroupNonUniformQuadSwap:
 	{
-		uint32_t direction = get<SPIRConstant>(ops[4]).scalar();
+		uint32_t direction = evaluate_constant_u32(ops[4]);
 		if (direction == 0)
 			emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossX");
 		else if (direction == 1)
@@ -3980,7 +5374,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 #define HLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
 #define HLSL_BOP_CAST(op, type) \
-	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
+	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode), false)
 #define HLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
 #define HLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
 #define HLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
@@ -3993,6 +5387,9 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
 	uint32_t integer_width = get_integer_width_for_instruction(instruction);
 	auto int_type = to_signed_basetype(integer_width);
+	auto uint_type = to_unsigned_basetype(integer_width);
+
+	opcode = get_remapped_spirv_op(opcode);
 
 	switch (opcode)
 	{
@@ -4002,6 +5399,55 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		emit_access_chain(instruction);
 		break;
 	}
+	case OpBitcast:
+	{
+		auto bitcast_type = get_bitcast_type(ops[0], ops[2]);
+		if (bitcast_type == CompilerHLSL::TypeNormal)
+			CompilerGLSL::emit_instruction(instruction);
+		else
+		{
+			if (!requires_uint2_packing)
+			{
+				requires_uint2_packing = true;
+				force_recompile();
+			}
+
+			if (bitcast_type == CompilerHLSL::TypePackUint2x32)
+				emit_unary_func_op(ops[0], ops[1], ops[2], "spvPackUint2x32");
+			else
+				emit_unary_func_op(ops[0], ops[1], ops[2], "spvUnpackUint2x32");
+		}
+
+		break;
+	}
+
+	case OpSelect:
+	{
+		auto &value_type = expression_type(ops[3]);
+		if (value_type.basetype == SPIRType::Struct || is_array(value_type))
+		{
+			// HLSL does not support ternary expressions on composites.
+			// Cannot use branches, since we might be in a continue block
+			// where explicit control flow is prohibited.
+			// Emit a helper function where we can use control flow.
+			TypeID value_type_id = expression_type_id(ops[3]);
+			auto itr = std::find(composite_selection_workaround_types.begin(),
+			                     composite_selection_workaround_types.end(),
+			                     value_type_id);
+			if (itr == composite_selection_workaround_types.end())
+			{
+				composite_selection_workaround_types.push_back(value_type_id);
+				force_recompile();
+			}
+			emit_uninitialized_temporary_expression(ops[0], ops[1]);
+			statement("spvSelectComposite(",
+					  to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
+					  to_expression(ops[3]), ", ", to_expression(ops[4]), ");");
+		}
+		else
+			CompilerGLSL::emit_instruction(instruction);
+		break;
+	}
 
 	case OpStore:
 	{
@@ -4017,18 +5463,21 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 	case OpMatrixTimesVector:
 	{
+		// Matrices are kept in a transposed state all the time, flip multiplication order always.
 		emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
 		break;
 	}
 
 	case OpVectorTimesMatrix:
 	{
+		// Matrices are kept in a transposed state all the time, flip multiplication order always.
 		emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
 		break;
 	}
 
 	case OpMatrixTimesMatrix:
 	{
+		// Matrices are kept in a transposed state all the time, flip multiplication order always.
 		emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul");
 		break;
 	}
@@ -4152,7 +5601,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown);
 		else
 			HLSL_BOP_CAST(==, int_type);
 		break;
@@ -4160,12 +5609,19 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 	case OpLogicalEqual:
 	case OpFOrdEqual:
+	case OpFUnordEqual:
 	{
+		// HLSL != operator is unordered.
+		// https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
+		// isnan() is apparently implemented as x != x as well.
+		// We cannot implement UnordEqual as !(OrdNotEqual), as HLSL cannot express OrdNotEqual.
+		// HACK: FUnordEqual will be implemented as FOrdEqual.
+
 		auto result_type = ops[0];
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(==);
 		break;
@@ -4177,7 +5633,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown);
 		else
 			HLSL_BOP_CAST(!=, int_type);
 		break;
@@ -4185,12 +5641,23 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 	case OpLogicalNotEqual:
 	case OpFOrdNotEqual:
+	case OpFUnordNotEqual:
 	{
+		// HLSL != operator is unordered.
+		// https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules.
+		// isnan() is apparently implemented as x != x as well.
+
+		// FIXME: FOrdNotEqual cannot be implemented in a crisp and simple way here.
+		// We would need to do something like not(UnordEqual), but that cannot be expressed either.
+		// Adding a lot of NaN checks would be a breaking change from perspective of performance.
+		// SPIR-V will generally use isnan() checks when this even matters.
+		// HACK: FOrdNotEqual will be implemented as FUnordEqual.
+
 		auto result_type = ops[0];
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(!=);
 		break;
@@ -4201,10 +5668,10 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 	{
 		auto result_type = ops[0];
 		auto id = ops[1];
-		auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpUGreaterThan ? uint_type : int_type;
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, type);
 		else
 			HLSL_BOP_CAST(>, type);
 		break;
@@ -4216,21 +5683,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(>);
 		break;
 	}
 
+	case OpFUnordGreaterThan:
+	{
+		auto result_type = ops[0];
+		auto id = ops[1];
+
+		if (expression_type(ops[2]).vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true, SPIRType::Unknown);
+		else
+			CompilerGLSL::emit_instruction(instruction);
+		break;
+	}
+
 	case OpUGreaterThanEqual:
 	case OpSGreaterThanEqual:
 	{
 		auto result_type = ops[0];
 		auto id = ops[1];
 
-		auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, type);
 		else
 			HLSL_BOP_CAST(>=, type);
 		break;
@@ -4242,21 +5721,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(>=);
 		break;
 	}
 
+	case OpFUnordGreaterThanEqual:
+	{
+		auto result_type = ops[0];
+		auto id = ops[1];
+
+		if (expression_type(ops[2]).vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true, SPIRType::Unknown);
+		else
+			CompilerGLSL::emit_instruction(instruction);
+		break;
+	}
+
 	case OpULessThan:
 	case OpSLessThan:
 	{
 		auto result_type = ops[0];
 		auto id = ops[1];
 
-		auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpULessThan ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, type);
 		else
 			HLSL_BOP_CAST(<, type);
 		break;
@@ -4268,21 +5759,33 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(<);
 		break;
 	}
 
+	case OpFUnordLessThan:
+	{
+		auto result_type = ops[0];
+		auto id = ops[1];
+
+		if (expression_type(ops[2]).vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true, SPIRType::Unknown);
+		else
+			CompilerGLSL::emit_instruction(instruction);
+		break;
+	}
+
 	case OpULessThanEqual:
 	case OpSLessThanEqual:
 	{
 		auto result_type = ops[0];
 		auto id = ops[1];
 
-		auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
+		auto type = opcode == OpULessThanEqual ? uint_type : int_type;
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, type);
 		else
 			HLSL_BOP_CAST(<=, type);
 		break;
@@ -4294,14 +5797,26 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto id = ops[1];
 
 		if (expression_type(ops[2]).vecsize > 1)
-			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=");
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, SPIRType::Unknown);
 		else
 			HLSL_BOP(<=);
 		break;
 	}
 
+	case OpFUnordLessThanEqual:
+	{
+		auto result_type = ops[0];
+		auto id = ops[1];
+
+		if (expression_type(ops[2]).vecsize > 1)
+			emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true, SPIRType::Unknown);
+		else
+			CompilerGLSL::emit_instruction(instruction);
+		break;
+	}
+
 	case OpImageQueryLod:
-		emit_texture_op(instruction);
+		emit_texture_op(instruction, false);
 		break;
 
 	case OpImageQuerySizeLod:
@@ -4309,12 +5824,11 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto result_type = ops[0];
 		auto id = ops[1];
 
-		require_texture_query_variant(expression_type(ops[2]));
-
+		require_texture_query_variant(ops[2]);
 		auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter");
 		statement("uint ", dummy_samples_levels, ";");
 
-		auto expr = join("SPIRV_Cross_textureSize(", to_expression(ops[2]), ", ",
+		auto expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", ",
 		                 bitcast_expression(SPIRType::UInt, ops[3]), ", ", dummy_samples_levels, ")");
 
 		auto &restype = get<SPIRType>(ops[0]);
@@ -4328,12 +5842,22 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto result_type = ops[0];
 		auto id = ops[1];
 
-		require_texture_query_variant(expression_type(ops[2]));
+		require_texture_query_variant(ops[2]);
+		bool uav = expression_type(ops[2]).image.sampled == 2;
+
+		if (const auto *var = maybe_get_backing_variable(ops[2]))
+			if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable))
+				uav = false;
 
 		auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter");
 		statement("uint ", dummy_samples_levels, ";");
 
-		auto expr = join("SPIRV_Cross_textureSize(", to_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")");
+		string expr;
+		if (uav)
+			expr = join("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", dummy_samples_levels, ")");
+		else
+			expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")");
+
 		auto &restype = get<SPIRType>(ops[0]);
 		expr = bitcast_expression(restype, SPIRType::UInt, expr);
 		emit_op(result_type, id, expr, true);
@@ -4346,14 +5870,25 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		auto result_type = ops[0];
 		auto id = ops[1];
 
-		require_texture_query_variant(expression_type(ops[2]));
+		require_texture_query_variant(ops[2]);
+		bool uav = expression_type(ops[2]).image.sampled == 2;
+		if (opcode == OpImageQueryLevels && uav)
+			SPIRV_CROSS_THROW("Cannot query levels for UAV images.");
+
+		if (const auto *var = maybe_get_backing_variable(ops[2]))
+			if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable))
+				uav = false;
 
 		// Keep it simple and do not emit special variants to make this look nicer ...
 		// This stuff is barely, if ever, used.
 		forced_temporaries.insert(id);
 		auto &type = get<SPIRType>(result_type);
 		statement(variable_decl(type, to_name(id)), ";");
-		statement("SPIRV_Cross_textureSize(", to_expression(ops[2]), ", 0u, ", to_name(id), ");");
+
+		if (uav)
+			statement("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", to_name(id), ");");
+		else
+			statement("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", to_name(id), ");");
 
 		auto &restype = get<SPIRType>(ops[0]);
 		auto expr = bitcast_expression(restype, SPIRType::UInt, to_name(id));
@@ -4384,24 +5919,29 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 				if (operands != ImageOperandsSampleMask || instruction.length != 6)
 					SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used.");
 				uint32_t sample = ops[5];
-				imgexpr = join(to_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")");
+				imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")");
 			}
 			else
-				imgexpr = join(to_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))");
+				imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))");
 
 			pure = true;
 		}
 		else
 		{
-			imgexpr = join(to_expression(ops[2]), "[", to_expression(ops[3]), "]");
+			imgexpr = join(to_non_uniform_aware_expression(ops[2]), "[", to_expression(ops[3]), "]");
 			// The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4",
 			// except that the underlying type changes how the data is interpreted.
-			if (var && !subpass_data)
+
+			bool force_srv =
+			    hlsl_options.nonwritable_uav_texture_as_srv && var && has_decoration(var->self, DecorationNonWritable);
+			pure = force_srv;
+
+			if (var && !subpass_data && !force_srv)
 				imgexpr = remap_swizzle(get<SPIRType>(result_type),
 				                        image_format_to_components(get<SPIRType>(var->basetype).image.format), imgexpr);
 		}
 
-		if (var && var->forwardable)
+		if (var)
 		{
 			bool forward = forced_temporaries.find(id) == end(forced_temporaries);
 			auto &e = emit_op(result_type, id, imgexpr, forward);
@@ -4437,7 +5977,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 			value_expr = remap_swizzle(narrowed_type, expression_type(ops[2]).vecsize, value_expr);
 		}
 
-		statement(to_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";");
+		statement(to_non_uniform_aware_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";");
 		if (var && variable_storage_is_aliased(*var))
 			flush_all_aliased_variables();
 		break;
@@ -4447,12 +5987,15 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 	{
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
-		auto &e =
-		    set<SPIRExpression>(id, join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"), result_type, true);
+
+		auto expr = to_expression(ops[2]);
+		expr += join("[", to_expression(ops[3]), "]");
+		auto &e = set<SPIRExpression>(id, expr, result_type, true);
 
 		// When using the pointer, we need to know which variable it is actually loaded from.
 		auto *var = maybe_get_backing_variable(ops[2]);
-		e.loaded_from = var ? var->self : 0;
+		e.loaded_from = var ? var->self : ID(0);
+		inherit_expression_dependencies(id, ops[3]);
 		break;
 	}
 
@@ -4469,6 +6012,8 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 	case OpAtomicIAdd:
 	case OpAtomicIIncrement:
 	case OpAtomicIDecrement:
+	case OpAtomicLoad:
+	case OpAtomicStore:
 	{
 		emit_atomic(ops, instruction.length, opcode);
 		break;
@@ -4482,13 +6027,13 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 		if (opcode == OpMemoryBarrier)
 		{
-			memory = get<SPIRConstant>(ops[0]).scalar();
-			semantics = get<SPIRConstant>(ops[1]).scalar();
+			memory = evaluate_constant_u32(ops[0]);
+			semantics = evaluate_constant_u32(ops[1]);
 		}
 		else
 		{
-			memory = get<SPIRConstant>(ops[1]).scalar();
-			semantics = get<SPIRConstant>(ops[2]).scalar();
+			memory = evaluate_constant_u32(ops[1]);
+			semantics = evaluate_constant_u32(ops[2]);
 		}
 
 		if (memory == ScopeSubgroup)
@@ -4508,8 +6053,8 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 			if (next && next->op == OpControlBarrier)
 			{
 				auto *next_ops = stream(*next);
-				uint32_t next_memory = get<SPIRConstant>(next_ops[1]).scalar();
-				uint32_t next_semantics = get<SPIRConstant>(next_ops[2]).scalar();
+				uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
+				uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
 				next_semantics = mask_relevant_memory_semantics(next_semantics);
 
 				// There is no "just execution barrier" in HLSL.
@@ -4581,7 +6126,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 			force_recompile();
 		}
 
-		auto expr = join("SPIRV_Cross_bitfieldInsert(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ",
+		auto expr = join("spvBitfieldInsert(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ",
 		                 to_expression(ops[4]), ", ", to_expression(ops[5]), ")");
 
 		bool forward =
@@ -4603,15 +6148,18 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		}
 
 		if (opcode == OpBitFieldSExtract)
-			HLSL_TFOP(SPIRV_Cross_bitfieldSExtract);
+			HLSL_TFOP(spvBitfieldSExtract);
 		else
-			HLSL_TFOP(SPIRV_Cross_bitfieldUExtract);
+			HLSL_TFOP(spvBitfieldUExtract);
 		break;
 	}
 
 	case OpBitCount:
-		HLSL_UFOP(countbits);
+	{
+		auto basetype = expression_type(ops[2]).basetype;
+		emit_unary_func_op_cast(ops[0], ops[1], ops[2], "countbits", basetype, basetype);
 		break;
+	}
 
 	case OpBitReverse:
 		HLSL_UFOP(reversebits);
@@ -4619,7 +6167,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 	case OpArrayLength:
 	{
-		auto *var = maybe_get<SPIRVariable>(ops[2]);
+		auto *var = maybe_get_backing_variable(ops[2]);
 		if (!var)
 			SPIRV_CROSS_THROW("Array length must point directly to an SSBO block.");
 
@@ -4629,21 +6177,186 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 
 		// This must be 32-bit uint, so we're good to go.
 		emit_uninitialized_temporary_expression(ops[0], ops[1]);
-		statement(to_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");");
+		statement(to_non_uniform_aware_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");");
 		uint32_t offset = type_struct_member_offset(type, ops[3]);
 		uint32_t stride = type_struct_member_array_stride(type, ops[3]);
 		statement(to_expression(ops[1]), " = (", to_expression(ops[1]), " - ", offset, ") / ", stride, ";");
 		break;
 	}
 
+	case OpIsHelperInvocationEXT:
+		if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment)
+			SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher.");
+		// Helper lane state with demote is volatile by nature.
+		// Do not forward this.
+		emit_op(ops[0], ops[1], "IsHelperLane()", false);
+		break;
+
+	case OpBeginInvocationInterlockEXT:
+	case OpEndInvocationInterlockEXT:
+		if (hlsl_options.shader_model < 51)
+			SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1.");
+		break; // Nothing to do in the body
+
+	case OpRayQueryInitializeKHR:
+	{
+		flush_variable_declaration(ops[0]);
+
+		std::string ray_desc_name = get_unique_identifier();
+		statement("RayDesc ", ray_desc_name, " = {", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
+			to_expression(ops[6]), ", ", to_expression(ops[7]), "};");
+
+		statement(to_expression(ops[0]), ".TraceRayInline(", 
+			to_expression(ops[1]), ", ", // acc structure
+			to_expression(ops[2]), ", ", // ray flags
+			to_expression(ops[3]), ", ", // mask
+			ray_desc_name, ");"); // ray
+		break;
+	}
+	case OpRayQueryProceedKHR:
+	{
+		flush_variable_declaration(ops[0]);
+		emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".Proceed()"), false);
+		break;
+	}	
+	case OpRayQueryTerminateKHR:
+	{
+		flush_variable_declaration(ops[0]);
+		statement(to_expression(ops[0]), ".Abort();");
+		break;
+	}
+	case OpRayQueryGenerateIntersectionKHR:
+	{
+		flush_variable_declaration(ops[0]);
+		statement(to_expression(ops[0]), ".CommitProceduralPrimitiveHit(", ops[1], ");");
+		break;
+	}
+	case OpRayQueryConfirmIntersectionKHR:
+	{
+		flush_variable_declaration(ops[0]);
+		statement(to_expression(ops[0]), ".CommitNonOpaqueTriangleHit();");
+		break;
+	}
+	case OpRayQueryGetIntersectionTypeKHR:
+	{
+		emit_rayquery_function(".CommittedStatus()", ".CandidateType()", ops);
+		break;
+	}
+	case OpRayQueryGetIntersectionTKHR:
+	{
+		emit_rayquery_function(".CommittedRayT()", ".CandidateTriangleRayT()", ops);
+		break;
+	}
+	case OpRayQueryGetIntersectionInstanceCustomIndexKHR:
+	{
+		emit_rayquery_function(".CommittedInstanceID()", ".CandidateInstanceID()", ops);
+		break;
+	}
+	case OpRayQueryGetIntersectionInstanceIdKHR:
+	{
+		emit_rayquery_function(".CommittedInstanceIndex()", ".CandidateInstanceIndex()", ops);
+		break;
+	}
+	case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
+	{
+		emit_rayquery_function(".CommittedInstanceContributionToHitGroupIndex()", 
+			".CandidateInstanceContributionToHitGroupIndex()", ops);
+		break;
+	}
+	case OpRayQueryGetIntersectionGeometryIndexKHR:
+	{
+		emit_rayquery_function(".CommittedGeometryIndex()",
+				".CandidateGeometryIndex()", ops);
+		break;
+	}
+	case OpRayQueryGetIntersectionPrimitiveIndexKHR:
+	{
+		emit_rayquery_function(".CommittedPrimitiveIndex()", ".CandidatePrimitiveIndex()", ops);
+		break;
+	}
+	case OpRayQueryGetIntersectionBarycentricsKHR:
+	{
+		emit_rayquery_function(".CommittedTriangleBarycentrics()", ".CandidateTriangleBarycentrics()", ops);
+		break;
+	}
+	case OpRayQueryGetIntersectionFrontFaceKHR:
+	{
+		emit_rayquery_function(".CommittedTriangleFrontFace()", ".CandidateTriangleFrontFace()", ops);
+		break;
+	}
+	case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
+	{
+		flush_variable_declaration(ops[0]);
+		emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".CandidateProceduralPrimitiveNonOpaque()"), false);
+		break;
+	}
+	case OpRayQueryGetIntersectionObjectRayDirectionKHR:
+	{
+		emit_rayquery_function(".CommittedObjectRayDirection()", ".CandidateObjectRayDirection()", ops);
+		break;
+	}
+	case OpRayQueryGetIntersectionObjectRayOriginKHR:
+	{
+		flush_variable_declaration(ops[0]);
+		emit_rayquery_function(".CommittedObjectRayOrigin()", ".CandidateObjectRayOrigin()", ops);
+		break;
+	}
+	case OpRayQueryGetIntersectionObjectToWorldKHR:
+	{
+		emit_rayquery_function(".CommittedObjectToWorld4x3()", ".CandidateObjectToWorld4x3()", ops);
+		break;
+	}
+	case OpRayQueryGetIntersectionWorldToObjectKHR:
+	{
+		emit_rayquery_function(".CommittedWorldToObject4x3()", ".CandidateWorldToObject4x3()", ops);
+		break;
+	}
+	case OpRayQueryGetRayFlagsKHR:
+	{
+		flush_variable_declaration(ops[0]);
+		emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".RayFlags()"), false);
+		break;
+	}
+	case OpRayQueryGetRayTMinKHR:
+	{
+		flush_variable_declaration(ops[0]);
+		emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".RayTMin()"), false);
+		break;
+	}
+	case OpRayQueryGetWorldRayOriginKHR:
+	{
+		flush_variable_declaration(ops[0]);
+		emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayOrigin()"), false);
+		break;
+	}
+	case OpRayQueryGetWorldRayDirectionKHR:
+	{
+		flush_variable_declaration(ops[0]);
+		emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayDirection()"), false);
+		break;
+	}
+	case OpSetMeshOutputsEXT:
+	{
+		statement("SetMeshOutputCounts(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");");
+		break;
+	}
+
 	default:
 		CompilerGLSL::emit_instruction(instruction);
 		break;
 	}
 }
 
-void CompilerHLSL::require_texture_query_variant(const SPIRType &type)
+void CompilerHLSL::require_texture_query_variant(uint32_t var_id)
 {
+	if (const auto *var = maybe_get_backing_variable(var_id))
+		var_id = var->self;
+
+	auto &type = expression_type(var_id);
+	bool uav = type.image.sampled == 2;
+	if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var_id, DecorationNonWritable))
+		uav = false;
+
 	uint32_t bit = 0;
 	switch (type.image.dim)
 	{
@@ -4692,17 +6405,22 @@ void CompilerHLSL::require_texture_query_variant(const SPIRType &type)
 		SPIRV_CROSS_THROW("Unsupported query type.");
 	}
 
+	auto norm_state = image_format_to_normalized_state(type.image.format);
+	auto &variant = uav ? required_texture_size_variants
+	                          .uav[uint32_t(norm_state)][image_format_to_components(type.image.format) - 1] :
+	                      required_texture_size_variants.srv;
+
 	uint64_t mask = 1ull << bit;
-	if ((required_textureSizeVariants & mask) == 0)
+	if ((variant & mask) == 0)
 	{
 		force_recompile();
-		required_textureSizeVariants |= mask;
+		variant |= mask;
 	}
 }
 
 void CompilerHLSL::set_root_constant_layouts(std::vector<RootConstants> layout)
 {
-	root_constants_layout = move(layout);
+	root_constants_layout = std::move(layout);
 }
 
 void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &vertex_attributes)
@@ -4710,7 +6428,7 @@ void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &ve
 	remap_vertex_attributes.push_back(vertex_attributes);
 }
 
-uint32_t CompilerHLSL::remap_num_workgroups_builtin()
+VariableID CompilerHLSL::remap_num_workgroups_builtin()
 {
 	update_active_builtins();
 
@@ -4753,9 +6471,15 @@ uint32_t CompilerHLSL::remap_num_workgroups_builtin()
 	ir.meta[variable_id].decoration.alias = "SPIRV_Cross_NumWorkgroups";
 
 	num_workgroups_builtin = variable_id;
+	get_entry_point().interface_variables.push_back(num_workgroups_builtin);
 	return variable_id;
 }
 
+void CompilerHLSL::set_resource_binding_flags(HLSLBindingFlags flags)
+{
+	resource_binding_flags = flags;
+}
+
 void CompilerHLSL::validate_shader_model()
 {
 	// Check for nonuniform qualifier.
@@ -4769,6 +6493,12 @@ void CompilerHLSL::validate_shader_model()
 			if (hlsl_options.shader_model < 51)
 				SPIRV_CROSS_THROW(
 				    "Shader model 5.1 or higher is required to use bindless resources or NonUniformResourceIndex.");
+			break;
+
+		case CapabilityVariablePointers:
+		case CapabilityVariablePointersStorageBuffer:
+			SPIRV_CROSS_THROW("VariablePointers capability is not supported in HLSL.");
+
 		default:
 			break;
 		}
@@ -4776,10 +6506,15 @@ void CompilerHLSL::validate_shader_model()
 
 	if (ir.addressing_model != AddressingModelLogical)
 		SPIRV_CROSS_THROW("Only Logical addressing model can be used with HLSL.");
+
+	if (hlsl_options.enable_16bit_types && hlsl_options.shader_model < 62)
+		SPIRV_CROSS_THROW("Need at least shader model 6.2 when enabling native 16-bit type support.");
 }
 
 string CompilerHLSL::compile()
 {
+	ir.fixup_reserved_names();
+
 	// Do not deal with ES-isms like precision, older extensions and such.
 	options.es = false;
 	options.version = 450;
@@ -4792,26 +6527,36 @@ string CompilerHLSL::compile()
 	backend.uint16_t_literal_suffix = "u";
 	backend.basic_int_type = "int";
 	backend.basic_uint_type = "uint";
+	backend.demote_literal = "discard";
+	backend.boolean_mix_function = "";
 	backend.swizzle_is_function = false;
 	backend.shared_is_implied = true;
 	backend.unsized_array_supported = true;
 	backend.explicit_struct_type = false;
 	backend.use_initializer_list = true;
 	backend.use_constructor_splatting = false;
-	backend.boolean_mix_support = false;
 	backend.can_swizzle_scalar = true;
 	backend.can_declare_struct_inline = false;
 	backend.can_declare_arrays_inline = false;
 	backend.can_return_array = false;
 	backend.nonuniform_qualifier = "NonUniformResourceIndex";
 	backend.support_case_fallthrough = false;
+	backend.force_merged_mesh_block = get_execution_model() == ExecutionModelMeshEXT;
+	backend.force_gl_in_out_block = backend.force_merged_mesh_block;
+
+	// SM 4.1 does not support precise for some reason.
+	backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40;
 
+	fixup_anonymous_struct_names();
 	fixup_type_alias();
 	reorder_type_alias();
 	build_function_control_flow_graphs_and_analyze();
 	validate_shader_model();
 	update_active_builtins();
 	analyze_image_and_sampler_usage();
+	analyze_interlocked_resource_usage();
+	if (get_execution_model() == ExecutionModelMeshEXT)
+		analyze_meshlet_writes();
 
 	// Subpass input needs SV_Position.
 	if (need_subpass_input)
@@ -4820,10 +6565,7 @@ string CompilerHLSL::compile()
 	uint32_t pass_count = 0;
 	do
 	{
-		if (pass_count >= 3)
-			SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
-
-		reset();
+		reset(pass_count);
 
 		// Move constructor for this type is broken on GCC 4.9 ...
 		buffer.reset();
@@ -4863,3 +6605,60 @@ void CompilerHLSL::emit_block_hints(const SPIRBlock &block)
 		break;
 	}
 }
+
+string CompilerHLSL::get_unique_identifier()
+{
+	return join("_", unique_identifier_count++, "ident");
+}
+
+void CompilerHLSL::add_hlsl_resource_binding(const HLSLResourceBinding &binding)
+{
+	StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding };
+	resource_bindings[tuple] = { binding, false };
+}
+
+bool CompilerHLSL::is_hlsl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) const
+{
+	StageSetBinding tuple = { model, desc_set, binding };
+	auto itr = resource_bindings.find(tuple);
+	return itr != end(resource_bindings) && itr->second.second;
+}
+
+CompilerHLSL::BitcastType CompilerHLSL::get_bitcast_type(uint32_t result_type, uint32_t op0)
+{
+	auto &rslt_type = get<SPIRType>(result_type);
+	auto &expr_type = expression_type(op0);
+
+	if (rslt_type.basetype == SPIRType::BaseType::UInt64 && expr_type.basetype == SPIRType::BaseType::UInt &&
+	    expr_type.vecsize == 2)
+		return BitcastType::TypePackUint2x32;
+	else if (rslt_type.basetype == SPIRType::BaseType::UInt && rslt_type.vecsize == 2 &&
+	         expr_type.basetype == SPIRType::BaseType::UInt64)
+		return BitcastType::TypeUnpackUint64;
+
+	return BitcastType::TypeNormal;
+}
+
+bool CompilerHLSL::is_hlsl_force_storage_buffer_as_uav(ID id) const
+{
+	if (hlsl_options.force_storage_buffer_as_uav)
+	{
+		return true;
+	}
+
+	const uint32_t desc_set = get_decoration(id, spv::DecorationDescriptorSet);
+	const uint32_t binding = get_decoration(id, spv::DecorationBinding);
+
+	return (force_uav_buffer_bindings.find({ desc_set, binding }) != force_uav_buffer_bindings.end());
+}
+
+void CompilerHLSL::set_hlsl_force_storage_buffer_as_uav(uint32_t desc_set, uint32_t binding)
+{
+	SetBindingPair pair = { desc_set, binding };
+	force_uav_buffer_bindings.insert(pair);
+}
+
+bool CompilerHLSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const
+{
+	return (builtin == BuiltInSampleMask);
+}
diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp
index 796f502c5d2..57d1c2cdc01 100644
--- a/spirv_hlsl.hpp
+++ b/spirv_hlsl.hpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2016-2019 Robert Konrad
+ * Copyright 2016-2021 Robert Konrad
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_HLSL_HPP
 #define SPIRV_HLSL_HPP
 
@@ -41,6 +48,61 @@ struct RootConstants
 	uint32_t space;
 };
 
+// For finer control, decorations may be removed from specific resources instead with unset_decoration().
+enum HLSLBindingFlagBits
+{
+	HLSL_BINDING_AUTO_NONE_BIT = 0,
+
+	// Push constant (root constant) resources will be declared as CBVs (b-space) without a register() declaration.
+	// A register will be automatically assigned by the D3D compiler, but must therefore be reflected in D3D-land.
+	// Push constants do not normally have a DecorationBinding set, but if they do, this can be used to ignore it.
+	HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT = 1 << 0,
+
+	// cbuffer resources will be declared as CBVs (b-space) without a register() declaration.
+	// A register will be automatically assigned, but must be reflected in D3D-land.
+	HLSL_BINDING_AUTO_CBV_BIT = 1 << 1,
+
+	// All SRVs (t-space) will be declared without a register() declaration.
+	HLSL_BINDING_AUTO_SRV_BIT = 1 << 2,
+
+	// All UAVs (u-space) will be declared without a register() declaration.
+	HLSL_BINDING_AUTO_UAV_BIT = 1 << 3,
+
+	// All samplers (s-space) will be declared without a register() declaration.
+	HLSL_BINDING_AUTO_SAMPLER_BIT = 1 << 4,
+
+	// No resources will be declared with register().
+	HLSL_BINDING_AUTO_ALL = 0x7fffffff
+};
+using HLSLBindingFlags = uint32_t;
+
+// By matching stage, desc_set and binding for a SPIR-V resource,
+// register bindings are set based on whether the HLSL resource is a
+// CBV, UAV, SRV or Sampler. A single binding in SPIR-V might contain multiple
+// resource types, e.g. COMBINED_IMAGE_SAMPLER, and SRV/Sampler bindings will be used respectively.
+// On SM 5.0 and lower, register_space is ignored.
+//
+// To remap a push constant block which does not have any desc_set/binding associated with it,
+// use ResourceBindingPushConstant{DescriptorSet,Binding} as values for desc_set/binding.
+// For deeper control of push constants, set_root_constant_layouts() can be used instead.
+struct HLSLResourceBinding
+{
+	spv::ExecutionModel stage = spv::ExecutionModelMax;
+	uint32_t desc_set = 0;
+	uint32_t binding = 0;
+
+	struct Binding
+	{
+		uint32_t register_space = 0;
+		uint32_t register_binding = 0;
+	} cbv, uav, srv, sampler;
+};
+
+enum HLSLAuxBinding
+{
+	HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE = 0
+};
+
 class CompilerHLSL : public CompilerGLSL
 {
 public:
@@ -59,6 +121,30 @@ class CompilerHLSL : public CompilerGLSL
 		// Set to false if you know you will never use base instance or base vertex
 		// functionality as it might remove an internal cbuffer.
 		bool support_nonzero_base_vertex_base_instance = false;
+
+		// Forces a storage buffer to always be declared as UAV, even if the readonly decoration is used.
+		// By default, a readonly storage buffer will be declared as ByteAddressBuffer (SRV) instead.
+		// Alternatively, use set_hlsl_force_storage_buffer_as_uav to specify individually.
+		bool force_storage_buffer_as_uav = false;
+
+		// Forces any storage image type marked as NonWritable to be considered an SRV instead.
+		// For this to work with function call parameters, NonWritable must be considered to be part of the type system
+		// so that NonWritable image arguments are also translated to Texture rather than RWTexture.
+		bool nonwritable_uav_texture_as_srv = false;
+
+		// Enables native 16-bit types. Needs SM 6.2.
+		// Uses half/int16_t/uint16_t instead of min16* types.
+		// Also adds support for 16-bit load-store from (RW)ByteAddressBuffer.
+		bool enable_16bit_types = false;
+
+		// If matrices are used as IO variables, flatten the attribute declaration to use
+		// TEXCOORD{N,N+1,N+2,...} rather than TEXCOORDN_{0,1,2,3}.
+		// If add_vertex_attribute_remap is used and this feature is used,
+		// the semantic name will be queried once per active location.
+		bool flatten_matrix_vertex_input_semantics = false;
+
+		// Rather than emitting main() for the entry point, use the name in SPIR-V.
+		bool use_entry_point_name = false;
 	};
 
 	explicit CompilerHLSL(std::vector<uint32_t> spirv_)
@@ -114,7 +200,26 @@ class CompilerHLSL : public CompilerGLSL
 	// If non-zero, this returns the variable ID of a cbuffer which corresponds to
 	// the cbuffer declared above. By default, no binding or descriptor set decoration is set,
 	// so the calling application should declare explicit bindings on this ID before calling compile().
-	uint32_t remap_num_workgroups_builtin();
+	VariableID remap_num_workgroups_builtin();
+
+	// Controls how resource bindings are declared in the output HLSL.
+	void set_resource_binding_flags(HLSLBindingFlags flags);
+
+	// resource is a resource binding to indicate the HLSL CBV, SRV, UAV or sampler binding
+	// to use for a particular SPIR-V description set
+	// and binding. If resource bindings are provided,
+	// is_hlsl_resource_binding_used() will return true after calling ::compile() if
+	// the set/binding combination was used by the HLSL code.
+	void add_hlsl_resource_binding(const HLSLResourceBinding &resource);
+	bool is_hlsl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding) const;
+
+	// Controls which storage buffer bindings will be forced to be declared as UAVs.
+	void set_hlsl_force_storage_buffer_as_uav(uint32_t desc_set, uint32_t binding);
+
+	// By default, these magic buffers are not assigned a specific binding.
+	void set_hlsl_aux_buffer_binding(HLSLAuxBinding binding, uint32_t register_index, uint32_t register_space);
+	void unset_hlsl_aux_buffer_binding(HLSLAuxBinding binding);
+	bool is_hlsl_aux_buffer_binding_used(HLSLAuxBinding binding) const;
 
 private:
 	std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override;
@@ -126,10 +231,13 @@ class CompilerHLSL : public CompilerGLSL
 	void emit_header() override;
 	void emit_resources();
 	void emit_interface_block_globally(const SPIRVariable &type);
-	void emit_interface_block_in_struct(const SPIRVariable &type, std::unordered_set<uint32_t> &active_locations);
+	void emit_interface_block_in_struct(const SPIRVariable &var, std::unordered_set<uint32_t> &active_locations);
+	void emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index, uint32_t location,
+	                                           std::unordered_set<uint32_t> &active_locations);
 	void emit_builtin_inputs_in_struct();
 	void emit_builtin_outputs_in_struct();
-	void emit_texture_op(const Instruction &i) override;
+	void emit_builtin_primitive_outputs_in_struct();
+	void emit_texture_op(const Instruction &i, bool sparse) override;
 	void emit_instruction(const Instruction &instruction) override;
 	void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
 	                  uint32_t count) override;
@@ -145,16 +253,25 @@ class CompilerHLSL : public CompilerGLSL
 	std::string layout_for_member(const SPIRType &type, uint32_t index) override;
 	std::string to_interpolation_qualifiers(const Bitset &flags) override;
 	std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override;
-	std::string to_func_call_arg(uint32_t id) override;
+	bool emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) override;
+	std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override;
 	std::string to_sampler_expression(uint32_t id);
 	std::string to_resource_binding(const SPIRVariable &var);
 	std::string to_resource_binding_sampler(const SPIRVariable &var);
-	std::string to_resource_register(char space, uint32_t binding, uint32_t set);
+	std::string to_resource_register(HLSLBindingFlagBits flag, char space, uint32_t binding, uint32_t set);
+	std::string to_initializer_expression(const SPIRVariable &var) override;
 	void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override;
 	void emit_access_chain(const Instruction &instruction);
 	void emit_load(const Instruction &instruction);
-	std::string read_access_chain(const SPIRAccessChain &chain);
-	void write_access_chain(const SPIRAccessChain &chain, uint32_t value);
+	void read_access_chain(std::string *expr, const std::string &lhs, const SPIRAccessChain &chain);
+	void read_access_chain_struct(const std::string &lhs, const SPIRAccessChain &chain);
+	void read_access_chain_array(const std::string &lhs, const SPIRAccessChain &chain);
+	void write_access_chain(const SPIRAccessChain &chain, uint32_t value, const SmallVector<uint32_t> &composite_chain);
+	void write_access_chain_struct(const SPIRAccessChain &chain, uint32_t value,
+	                               const SmallVector<uint32_t> &composite_chain);
+	void write_access_chain_array(const SPIRAccessChain &chain, uint32_t value,
+	                              const SmallVector<uint32_t> &composite_chain);
+	std::string write_access_chain_value(uint32_t value, const SmallVector<uint32_t> &composite_chain, bool enclose);
 	void emit_store(const Instruction &instruction);
 	void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op);
 	void emit_subgroup_op(const Instruction &i) override;
@@ -162,15 +279,19 @@ class CompilerHLSL : public CompilerGLSL
 
 	void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier,
 	                        uint32_t base_offset = 0) override;
+	void emit_rayquery_function(const char *commited, const char *candidate, const uint32_t *ops);
 
 	const char *to_storage_qualifiers_glsl(const SPIRVariable &var) override;
 	void replace_illegal_names() override;
 
+	bool is_hlsl_force_storage_buffer_as_uav(ID id) const;
+
 	Options hlsl_options;
 
 	// TODO: Refactor this to be more similar to MSL, maybe have some common system in place?
 	bool requires_op_fmod = false;
 	bool requires_fp16_packing = false;
+	bool requires_uint2_packing = false;
 	bool requires_explicit_fp16_packing = false;
 	bool requires_unorm8_packing = false;
 	bool requires_snorm8_packing = false;
@@ -183,8 +304,25 @@ class CompilerHLSL : public CompilerGLSL
 	bool requires_inverse_4x4 = false;
 	bool requires_scalar_reflect = false;
 	bool requires_scalar_refract = false;
-	uint64_t required_textureSizeVariants = 0;
-	void require_texture_query_variant(const SPIRType &type);
+	bool requires_scalar_faceforward = false;
+
+	struct TextureSizeVariants
+	{
+		// MSVC 2013 workaround.
+		TextureSizeVariants()
+		{
+			srv = 0;
+			for (auto &unorm : uav)
+				for (auto &u : unorm)
+					u = 0;
+		}
+		uint64_t srv;
+		uint64_t uav[3][4];
+	} required_texture_size_variants;
+
+	void require_texture_query_variant(uint32_t var_id);
+	void emit_texture_size_variants(uint64_t variant_mask, const char *vecsize_qualifier, bool uav,
+	                                const char *type_qualifier);
 
 	enum TextureQueryVariantDim
 	{
@@ -209,6 +347,19 @@ class CompilerHLSL : public CompilerGLSL
 		QueryTypeCount = 3
 	};
 
+	enum BitcastType
+	{
+		TypeNormal,
+		TypePackUint2x32,
+		TypeUnpackUint64
+	};
+
+	void analyze_meshlet_writes();
+	void analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive,
+	                            std::unordered_set<uint32_t> &processed_func_ids);
+
+	BitcastType get_bitcast_type(uint32_t result_type, uint32_t op0);
+
 	void emit_builtin_variables();
 	bool require_output = false;
 	bool require_input = false;
@@ -216,16 +367,39 @@ class CompilerHLSL : public CompilerGLSL
 
 	uint32_t type_to_consumed_locations(const SPIRType &type) const;
 
-	void emit_io_block(const SPIRVariable &var);
 	std::string to_semantic(uint32_t location, spv::ExecutionModel em, spv::StorageClass sc);
 
 	uint32_t num_workgroups_builtin = 0;
+	HLSLBindingFlags resource_binding_flags = 0;
 
 	// Custom root constant layout, which should be emitted
 	// when translating push constant ranges.
 	std::vector<RootConstants> root_constants_layout;
 
 	void validate_shader_model();
+
+	std::string get_unique_identifier();
+	uint32_t unique_identifier_count = 0;
+
+	std::unordered_map<StageSetBinding, std::pair<HLSLResourceBinding, bool>, InternalHasher> resource_bindings;
+	void remap_hlsl_resource_binding(HLSLBindingFlagBits type, uint32_t &desc_set, uint32_t &binding);
+
+	std::unordered_set<SetBindingPair, InternalHasher> force_uav_buffer_bindings;
+
+	struct
+	{
+		uint32_t register_index = 0;
+		uint32_t register_space = 0;
+		bool explicit_binding = false;
+		bool used = false;
+	} base_vertex_info;
+
+	// Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but SV_Coverage is a scalar in HLSL.
+	bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override;
+
+	std::vector<TypeID> composite_selection_workaround_types;
+
+	std::string get_inner_entry_point_name() const;
 };
 } // namespace SPIRV_CROSS_NAMESPACE
 
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index eaee10a0493..da5656ab77e 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2016-2019 The Brenwill Workshop Ltd.
+ * Copyright 2016-2021 The Brenwill Workshop Ltd.
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #include "spirv_msl.hpp"
 #include "GLSL.std.450.h"
 
@@ -27,9 +34,10 @@ using namespace std;
 
 static const uint32_t k_unknown_location = ~0u;
 static const uint32_t k_unknown_component = ~0u;
+static const char *force_inline = "static inline __attribute__((always_inline))";
 
 CompilerMSL::CompilerMSL(std::vector<uint32_t> spirv_)
-    : CompilerGLSL(move(spirv_))
+    : CompilerGLSL(std::move(spirv_))
 {
 }
 
@@ -48,17 +56,81 @@ CompilerMSL::CompilerMSL(ParsedIR &&ir_)
 {
 }
 
-void CompilerMSL::add_msl_vertex_attribute(const MSLVertexAttr &va)
+void CompilerMSL::add_msl_shader_input(const MSLShaderInterfaceVariable &si)
+{
+	inputs_by_location[{si.location, si.component}] = si;
+	if (si.builtin != BuiltInMax && !inputs_by_builtin.count(si.builtin))
+		inputs_by_builtin[si.builtin] = si;
+}
+
+void CompilerMSL::add_msl_shader_output(const MSLShaderInterfaceVariable &so)
 {
-	vtx_attrs_by_location[va.location] = va;
-	if (va.builtin != BuiltInMax && !vtx_attrs_by_builtin.count(va.builtin))
-		vtx_attrs_by_builtin[va.builtin] = va;
+	outputs_by_location[{so.location, so.component}] = so;
+	if (so.builtin != BuiltInMax && !outputs_by_builtin.count(so.builtin))
+		outputs_by_builtin[so.builtin] = so;
 }
 
 void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding)
 {
 	StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding };
 	resource_bindings[tuple] = { binding, false };
+
+	// If we might need to pad argument buffer members to positionally align
+	// arg buffer indexes, also maintain a lookup by argument buffer index.
+	if (msl_options.pad_argument_buffer_resources)
+	{
+		StageSetBinding arg_idx_tuple = { binding.stage, binding.desc_set, k_unknown_component };
+
+#define ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(rez) \
+	arg_idx_tuple.binding = binding.msl_##rez; \
+	resource_arg_buff_idx_to_binding_number[arg_idx_tuple] = binding.binding
+
+		switch (binding.basetype)
+		{
+		case SPIRType::Void:
+		case SPIRType::Boolean:
+		case SPIRType::SByte:
+		case SPIRType::UByte:
+		case SPIRType::Short:
+		case SPIRType::UShort:
+		case SPIRType::Int:
+		case SPIRType::UInt:
+		case SPIRType::Int64:
+		case SPIRType::UInt64:
+		case SPIRType::AtomicCounter:
+		case SPIRType::Half:
+		case SPIRType::Float:
+		case SPIRType::Double:
+			ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(buffer);
+			break;
+		case SPIRType::Image:
+			ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(texture);
+			break;
+		case SPIRType::Sampler:
+			ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(sampler);
+			break;
+		case SPIRType::SampledImage:
+			ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(texture);
+			ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP(sampler);
+			break;
+		default:
+			SPIRV_CROSS_THROW("Unexpected argument buffer resource base type. When padding argument buffer elements, "
+			                  "all descriptor set resources must be supplied with a base type by the app.");
+		}
+#undef ADD_ARG_IDX_TO_BINDING_NUM_LOOKUP
+	}
+}
+
+void CompilerMSL::add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index)
+{
+	SetBindingPair pair = { desc_set, binding };
+	buffers_requiring_dynamic_offset[pair] = { index, 0 };
+}
+
+void CompilerMSL::add_inline_uniform_block(uint32_t desc_set, uint32_t binding)
+{
+	SetBindingPair pair = { desc_set, binding };
+	inline_uniform_blocks.insert(pair);
 }
 
 void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set)
@@ -67,18 +139,66 @@ void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set)
 		argument_buffer_discrete_mask |= 1u << desc_set;
 }
 
-bool CompilerMSL::is_msl_vertex_attribute_used(uint32_t location)
+void CompilerMSL::set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage)
 {
-	return vtx_attrs_in_use.count(location) != 0;
+	if (desc_set < kMaxArgumentBuffers)
+	{
+		if (device_storage)
+			argument_buffer_device_storage_mask |= 1u << desc_set;
+		else
+			argument_buffer_device_storage_mask &= ~(1u << desc_set);
+	}
+}
+
+bool CompilerMSL::is_msl_shader_input_used(uint32_t location)
+{
+	// Don't report internal location allocations to app.
+	return location_inputs_in_use.count(location) != 0 &&
+	       location_inputs_in_use_fallback.count(location) == 0;
+}
+
+bool CompilerMSL::is_msl_shader_output_used(uint32_t location)
+{
+	// Don't report internal location allocations to app.
+	return location_outputs_in_use.count(location) != 0 &&
+	       location_outputs_in_use_fallback.count(location) == 0;
+}
+
+uint32_t CompilerMSL::get_automatic_builtin_input_location(spv::BuiltIn builtin) const
+{
+	auto itr = builtin_to_automatic_input_location.find(builtin);
+	if (itr == builtin_to_automatic_input_location.end())
+		return k_unknown_location;
+	else
+		return itr->second;
+}
+
+uint32_t CompilerMSL::get_automatic_builtin_output_location(spv::BuiltIn builtin) const
+{
+	auto itr = builtin_to_automatic_output_location.find(builtin);
+	if (itr == builtin_to_automatic_output_location.end())
+		return k_unknown_location;
+	else
+		return itr->second;
 }
 
-bool CompilerMSL::is_msl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding)
+bool CompilerMSL::is_msl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) const
 {
 	StageSetBinding tuple = { model, desc_set, binding };
 	auto itr = resource_bindings.find(tuple);
 	return itr != end(resource_bindings) && itr->second.second;
 }
 
+// Returns the size of the array of resources used by the variable with the specified id.
+// The returned value is retrieved from the resource binding added using add_msl_resource_binding().
+uint32_t CompilerMSL::get_resource_array_size(uint32_t id) const
+{
+	StageSetBinding tuple = { get_entry_point().model, get_decoration(id, DecorationDescriptorSet),
+		                      get_decoration(id, DecorationBinding) };
+	auto itr = resource_bindings.find(tuple);
+	return itr != end(resource_bindings) ? itr->second.first.count : 0;
+}
+
 uint32_t CompilerMSL::get_automatic_msl_resource_binding(uint32_t id) const
 {
 	return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexPrimary);
@@ -89,26 +209,58 @@ uint32_t CompilerMSL::get_automatic_msl_resource_binding_secondary(uint32_t id)
 	return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexSecondary);
 }
 
+uint32_t CompilerMSL::get_automatic_msl_resource_binding_tertiary(uint32_t id) const
+{
+	return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexTertiary);
+}
+
+uint32_t CompilerMSL::get_automatic_msl_resource_binding_quaternary(uint32_t id) const
+{
+	return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexQuaternary);
+}
+
 void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t components)
 {
 	fragment_output_components[location] = components;
 }
 
+bool CompilerMSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const
+{
+	return (builtin == BuiltInSampleMask);
+}
+
 void CompilerMSL::build_implicit_builtins()
 {
 	bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition);
-	bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex;
-	bool need_tesc_params = get_execution_model() == ExecutionModelTessellationControl;
+	bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex &&
+	                          !msl_options.vertex_for_tessellation;
+	bool need_tesc_params = is_tesc_shader();
+	bool need_tese_params = is_tese_shader() && msl_options.raw_buffer_tese_input;
 	bool need_subgroup_mask =
 	    active_input_builtins.get(BuiltInSubgroupEqMask) || active_input_builtins.get(BuiltInSubgroupGeMask) ||
 	    active_input_builtins.get(BuiltInSubgroupGtMask) || active_input_builtins.get(BuiltInSubgroupLeMask) ||
 	    active_input_builtins.get(BuiltInSubgroupLtMask);
 	bool need_subgroup_ge_mask = !msl_options.is_ios() && (active_input_builtins.get(BuiltInSubgroupGeMask) ||
 	                                                       active_input_builtins.get(BuiltInSubgroupGtMask));
-	bool need_multiview = get_execution_model() == ExecutionModelVertex &&
+	bool need_multiview = get_execution_model() == ExecutionModelVertex && !msl_options.view_index_from_device_index &&
+	                      msl_options.multiview_layered_rendering &&
 	                      (msl_options.multiview || active_input_builtins.get(BuiltInViewIndex));
+	bool need_dispatch_base =
+	    msl_options.dispatch_base && get_execution_model() == ExecutionModelGLCompute &&
+	    (active_input_builtins.get(BuiltInWorkgroupId) || active_input_builtins.get(BuiltInGlobalInvocationId));
+	bool need_grid_params = get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation;
+	bool need_vertex_base_params =
+	    need_grid_params &&
+	    (active_input_builtins.get(BuiltInVertexId) || active_input_builtins.get(BuiltInVertexIndex) ||
+	     active_input_builtins.get(BuiltInBaseVertex) || active_input_builtins.get(BuiltInInstanceId) ||
+	     active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance));
+	bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId);
+	bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups);
+
 	if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
-	    need_multiview || needs_subgroup_invocation_id)
+	    need_tese_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params ||
+	    needs_sample_id || needs_subgroup_invocation_id || needs_subgroup_size || needs_helper_invocation ||
+		has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size)
 	{
 		bool has_frag_coord = false;
 		bool has_sample_id = false;
@@ -121,21 +273,68 @@ void CompilerMSL::build_implicit_builtins()
 		bool has_subgroup_invocation_id = false;
 		bool has_subgroup_size = false;
 		bool has_view_idx = false;
+		bool has_layer = false;
+		bool has_helper_invocation = false;
+		bool has_local_invocation_index = false;
+		bool has_workgroup_size = false;
+		uint32_t workgroup_id_type = 0;
 
 		ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
-			if (var.storage != StorageClassInput || !ir.meta[var.self].decoration.builtin)
+			if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
+				return;
+			if (!interface_variable_exists_in_entry_point(var.self))
+				return;
+			if (!has_decoration(var.self, DecorationBuiltIn))
 				return;
 
 			BuiltIn builtin = ir.meta[var.self].decoration.builtin_type;
-			if (need_subpass_input && builtin == BuiltInFragCoord)
+
+			if (var.storage == StorageClassOutput)
+			{
+				if (has_additional_fixed_sample_mask() && builtin == BuiltInSampleMask)
+				{
+					builtin_sample_mask_id = var.self;
+					mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var.self);
+					does_shader_write_sample_mask = true;
+				}
+			}
+
+			if (var.storage != StorageClassInput)
+				return;
+
+			// Use Metal's native frame-buffer fetch API for subpass inputs.
+			if (need_subpass_input && (!msl_options.use_framebuffer_fetch_subpasses))
 			{
-				builtin_frag_coord_id = var.self;
-				has_frag_coord = true;
+				switch (builtin)
+				{
+				case BuiltInFragCoord:
+					mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var.self);
+					builtin_frag_coord_id = var.self;
+					has_frag_coord = true;
+					break;
+				case BuiltInLayer:
+					if (!msl_options.arrayed_subpass_input || msl_options.multiview)
+						break;
+					mark_implicit_builtin(StorageClassInput, BuiltInLayer, var.self);
+					builtin_layer_id = var.self;
+					has_layer = true;
+					break;
+				case BuiltInViewIndex:
+					if (!msl_options.multiview)
+						break;
+					mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var.self);
+					builtin_view_idx_id = var.self;
+					has_view_idx = true;
+					break;
+				default:
+					break;
+				}
 			}
 
-			if (need_sample_pos && builtin == BuiltInSampleId)
+			if ((need_sample_pos || needs_sample_id) && builtin == BuiltInSampleId)
 			{
 				builtin_sample_id_id = var.self;
+				mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var.self);
 				has_sample_id = true;
 			}
 
@@ -145,18 +344,22 @@ void CompilerMSL::build_implicit_builtins()
 				{
 				case BuiltInVertexIndex:
 					builtin_vertex_idx_id = var.self;
+					mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var.self);
 					has_vertex_idx = true;
 					break;
 				case BuiltInBaseVertex:
 					builtin_base_vertex_id = var.self;
+					mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var.self);
 					has_base_vertex = true;
 					break;
 				case BuiltInInstanceIndex:
 					builtin_instance_idx_id = var.self;
+					mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var.self);
 					has_instance_idx = true;
 					break;
 				case BuiltInBaseInstance:
 					builtin_base_instance_id = var.self;
+					mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var.self);
 					has_base_instance = true;
 					break;
 				default:
@@ -164,100 +367,193 @@ void CompilerMSL::build_implicit_builtins()
 				}
 			}
 
-			if (need_tesc_params)
+			if (need_tesc_params && builtin == BuiltInInvocationId)
 			{
-				switch (builtin)
-				{
-				case BuiltInInvocationId:
-					builtin_invocation_id_id = var.self;
-					has_invocation_id = true;
-					break;
-				case BuiltInPrimitiveId:
-					builtin_primitive_id_id = var.self;
-					has_primitive_id = true;
-					break;
-				default:
-					break;
-				}
+				builtin_invocation_id_id = var.self;
+				mark_implicit_builtin(StorageClassInput, BuiltInInvocationId, var.self);
+				has_invocation_id = true;
+			}
+
+			if ((need_tesc_params || need_tese_params) && builtin == BuiltInPrimitiveId)
+			{
+				builtin_primitive_id_id = var.self;
+				mark_implicit_builtin(StorageClassInput, BuiltInPrimitiveId, var.self);
+				has_primitive_id = true;
+			}
+
+			if (need_tese_params && builtin == BuiltInTessLevelOuter)
+			{
+				tess_level_outer_var_id = var.self;
+			}
+
+			if (need_tese_params && builtin == BuiltInTessLevelInner)
+			{
+				tess_level_inner_var_id = var.self;
 			}
 
 			if ((need_subgroup_mask || needs_subgroup_invocation_id) && builtin == BuiltInSubgroupLocalInvocationId)
 			{
 				builtin_subgroup_invocation_id_id = var.self;
+				mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var.self);
 				has_subgroup_invocation_id = true;
 			}
 
-			if (need_subgroup_ge_mask && builtin == BuiltInSubgroupSize)
+			if ((need_subgroup_ge_mask || needs_subgroup_size) && builtin == BuiltInSubgroupSize)
 			{
 				builtin_subgroup_size_id = var.self;
+				mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var.self);
 				has_subgroup_size = true;
 			}
 
 			if (need_multiview)
 			{
-				if (builtin == BuiltInInstanceIndex)
+				switch (builtin)
 				{
+				case BuiltInInstanceIndex:
 					// The view index here is derived from the instance index.
 					builtin_instance_idx_id = var.self;
+					mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var.self);
 					has_instance_idx = true;
-				}
-
-				if (builtin == BuiltInViewIndex)
-				{
+					break;
+				case BuiltInBaseInstance:
+					// If a non-zero base instance is used, we need to adjust for it when calculating the view index.
+					builtin_base_instance_id = var.self;
+					mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var.self);
+					has_base_instance = true;
+					break;
+				case BuiltInViewIndex:
 					builtin_view_idx_id = var.self;
+					mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var.self);
 					has_view_idx = true;
+					break;
+				default:
+					break;
 				}
 			}
+
+			if (needs_helper_invocation && builtin == BuiltInHelperInvocation)
+			{
+				builtin_helper_invocation_id = var.self;
+				mark_implicit_builtin(StorageClassInput, BuiltInHelperInvocation, var.self);
+				has_helper_invocation = true;
+			}
+
+			if (need_local_invocation_index && builtin == BuiltInLocalInvocationIndex)
+			{
+				builtin_local_invocation_index_id = var.self;
+				mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var.self);
+				has_local_invocation_index = true;
+			}
+
+			if (need_workgroup_size && builtin == BuiltInLocalInvocationId)
+			{
+				builtin_workgroup_size_id = var.self;
+				mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self);
+				has_workgroup_size = true;
+			}
+
+			// The base workgroup needs to have the same type and vector size
+			// as the workgroup or invocation ID, so keep track of the type that
+			// was used.
+			if (need_dispatch_base && workgroup_id_type == 0 &&
+			    (builtin == BuiltInWorkgroupId || builtin == BuiltInGlobalInvocationId))
+				workgroup_id_type = var.basetype;
 		});
 
-		if (!has_frag_coord && need_subpass_input)
+		// Use Metal's native frame-buffer fetch API for subpass inputs.
+		if ((!has_frag_coord || (msl_options.multiview && !has_view_idx) ||
+		     (msl_options.arrayed_subpass_input && !msl_options.multiview && !has_layer)) &&
+		    (!msl_options.use_framebuffer_fetch_subpasses) && need_subpass_input)
 		{
-			uint32_t offset = ir.increase_bound_by(3);
-			uint32_t type_id = offset;
-			uint32_t type_ptr_id = offset + 1;
-			uint32_t var_id = offset + 2;
+			if (!has_frag_coord)
+			{
+				uint32_t offset = ir.increase_bound_by(3);
+				uint32_t type_id = offset;
+				uint32_t type_ptr_id = offset + 1;
+				uint32_t var_id = offset + 2;
+
+				// Create gl_FragCoord.
+				SPIRType vec4_type;
+				vec4_type.basetype = SPIRType::Float;
+				vec4_type.width = 32;
+				vec4_type.vecsize = 4;
+				set<SPIRType>(type_id, vec4_type);
+
+				SPIRType vec4_type_ptr;
+				vec4_type_ptr = vec4_type;
+				vec4_type_ptr.pointer = true;
+				vec4_type_ptr.pointer_depth++;
+				vec4_type_ptr.parent_type = type_id;
+				vec4_type_ptr.storage = StorageClassInput;
+				auto &ptr_type = set<SPIRType>(type_ptr_id, vec4_type_ptr);
+				ptr_type.self = type_id;
 
-			// Create gl_FragCoord.
-			SPIRType vec4_type;
-			vec4_type.basetype = SPIRType::Float;
-			vec4_type.width = 32;
-			vec4_type.vecsize = 4;
-			set<SPIRType>(type_id, vec4_type);
-
-			SPIRType vec4_type_ptr;
-			vec4_type_ptr = vec4_type;
-			vec4_type_ptr.pointer = true;
-			vec4_type_ptr.parent_type = type_id;
-			vec4_type_ptr.storage = StorageClassInput;
-			auto &ptr_type = set<SPIRType>(type_ptr_id, vec4_type_ptr);
-			ptr_type.self = type_id;
+				set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+				set_decoration(var_id, DecorationBuiltIn, BuiltInFragCoord);
+				builtin_frag_coord_id = var_id;
+				mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var_id);
+			}
 
-			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
-			set_decoration(var_id, DecorationBuiltIn, BuiltInFragCoord);
-			builtin_frag_coord_id = var_id;
-			mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var_id);
+			if (!has_layer && msl_options.arrayed_subpass_input && !msl_options.multiview)
+			{
+				uint32_t offset = ir.increase_bound_by(2);
+				uint32_t type_ptr_id = offset;
+				uint32_t var_id = offset + 1;
+
+				// Create gl_Layer.
+				SPIRType uint_type_ptr;
+				uint_type_ptr = get_uint_type();
+				uint_type_ptr.pointer = true;
+				uint_type_ptr.pointer_depth++;
+				uint_type_ptr.parent_type = get_uint_type_id();
+				uint_type_ptr.storage = StorageClassInput;
+				auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
+				ptr_type.self = get_uint_type_id();
+
+				set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+				set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
+				builtin_layer_id = var_id;
+				mark_implicit_builtin(StorageClassInput, BuiltInLayer, var_id);
+			}
+
+			if (!has_view_idx && msl_options.multiview)
+			{
+				uint32_t offset = ir.increase_bound_by(2);
+				uint32_t type_ptr_id = offset;
+				uint32_t var_id = offset + 1;
+
+				// Create gl_ViewIndex.
+				SPIRType uint_type_ptr;
+				uint_type_ptr = get_uint_type();
+				uint_type_ptr.pointer = true;
+				uint_type_ptr.pointer_depth++;
+				uint_type_ptr.parent_type = get_uint_type_id();
+				uint_type_ptr.storage = StorageClassInput;
+				auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
+				ptr_type.self = get_uint_type_id();
+
+				set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+				set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex);
+				builtin_view_idx_id = var_id;
+				mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id);
+			}
 		}
 
-		if (!has_sample_id && need_sample_pos)
+		if (!has_sample_id && (need_sample_pos || needs_sample_id))
 		{
-			uint32_t offset = ir.increase_bound_by(3);
-			uint32_t type_id = offset;
-			uint32_t type_ptr_id = offset + 1;
-			uint32_t var_id = offset + 2;
+			uint32_t offset = ir.increase_bound_by(2);
+			uint32_t type_ptr_id = offset;
+			uint32_t var_id = offset + 1;
 
 			// Create gl_SampleID.
-			SPIRType uint_type;
-			uint_type.basetype = SPIRType::UInt;
-			uint_type.width = 32;
-			set<SPIRType>(type_id, uint_type);
-
 			SPIRType uint_type_ptr;
-			uint_type_ptr = uint_type;
+			uint_type_ptr = get_uint_type();
 			uint_type_ptr.pointer = true;
-			uint_type_ptr.parent_type = type_id;
+			uint_type_ptr.pointer_depth++;
+			uint_type_ptr.parent_type = get_uint_type_id();
 			uint_type_ptr.storage = StorageClassInput;
 			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
-			ptr_type.self = type_id;
+			ptr_type.self = get_uint_type_id();
 
 			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
 			set_decoration(var_id, DecorationBuiltIn, BuiltInSampleId);
@@ -266,24 +562,18 @@ void CompilerMSL::build_implicit_builtins()
 		}
 
 		if ((need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) ||
-		    (need_multiview && (!has_instance_idx || !has_view_idx)))
+		    (need_multiview && (!has_instance_idx || !has_base_instance || !has_view_idx)))
 		{
-			uint32_t offset = ir.increase_bound_by(2);
-			uint32_t type_id = offset;
-			uint32_t type_ptr_id = offset + 1;
-
-			SPIRType uint_type;
-			uint_type.basetype = SPIRType::UInt;
-			uint_type.width = 32;
-			set<SPIRType>(type_id, uint_type);
+			uint32_t type_ptr_id = ir.increase_bound_by(1);
 
 			SPIRType uint_type_ptr;
-			uint_type_ptr = uint_type;
+			uint_type_ptr = get_uint_type();
 			uint_type_ptr.pointer = true;
-			uint_type_ptr.parent_type = type_id;
+			uint_type_ptr.pointer_depth++;
+			uint_type_ptr.parent_type = get_uint_type_id();
 			uint_type_ptr.storage = StorageClassInput;
 			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
-			ptr_type.self = type_id;
+			ptr_type.self = get_uint_type_id();
 
 			if (need_vertex_params && !has_vertex_idx)
 			{
@@ -316,30 +606,9 @@ void CompilerMSL::build_implicit_builtins()
 				set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex);
 				builtin_instance_idx_id = var_id;
 				mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var_id);
-
-				if (need_multiview)
-				{
-					// Multiview shaders are not allowed to write to gl_Layer, ostensibly because
-					// it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
-					// Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
-					// gl_Layer is an output in vertex-pipeline shaders.
-					uint32_t type_ptr_out_id = ir.increase_bound_by(2);
-					SPIRType uint_type_ptr_out;
-					uint_type_ptr_out = uint_type;
-					uint_type_ptr_out.pointer = true;
-					uint_type_ptr_out.parent_type = type_id;
-					uint_type_ptr_out.storage = StorageClassOutput;
-					auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
-					ptr_out_type.self = type_id;
-					var_id = type_ptr_out_id + 1;
-					set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
-					set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
-					builtin_layer_id = var_id;
-					mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
-				}
 			}
 
-			if (need_vertex_params && !has_base_instance)
+			if (!has_base_instance) // Needed by both multiview and tessellation
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
 
@@ -350,6 +619,28 @@ void CompilerMSL::build_implicit_builtins()
 				mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id);
 			}
 
+			if (need_multiview)
+			{
+				// Multiview shaders are not allowed to write to gl_Layer, ostensibly because
+				// it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
+				// Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
+				// gl_Layer is an output in vertex-pipeline shaders.
+				uint32_t type_ptr_out_id = ir.increase_bound_by(2);
+				SPIRType uint_type_ptr_out;
+				uint_type_ptr_out = get_uint_type();
+				uint_type_ptr_out.pointer = true;
+				uint_type_ptr_out.pointer_depth++;
+				uint_type_ptr_out.parent_type = get_uint_type_id();
+				uint_type_ptr_out.storage = StorageClassOutput;
+				auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
+				ptr_out_type.self = get_uint_type_id();
+				uint32_t var_id = type_ptr_out_id + 1;
+				set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
+				set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
+				builtin_layer_id = var_id;
+				mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
+			}
+
 			if (need_multiview && !has_view_idx)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
@@ -362,26 +653,31 @@ void CompilerMSL::build_implicit_builtins()
 			}
 		}
 
-		if (need_tesc_params && (!has_invocation_id || !has_primitive_id))
+		if ((need_tesc_params && (msl_options.multi_patch_workgroup || !has_invocation_id || !has_primitive_id)) ||
+		    (need_tese_params && !has_primitive_id) || need_grid_params)
 		{
-			uint32_t offset = ir.increase_bound_by(2);
-			uint32_t type_id = offset;
-			uint32_t type_ptr_id = offset + 1;
-
-			SPIRType uint_type;
-			uint_type.basetype = SPIRType::UInt;
-			uint_type.width = 32;
-			set<SPIRType>(type_id, uint_type);
+			uint32_t type_ptr_id = ir.increase_bound_by(1);
 
 			SPIRType uint_type_ptr;
-			uint_type_ptr = uint_type;
+			uint_type_ptr = get_uint_type();
 			uint_type_ptr.pointer = true;
-			uint_type_ptr.parent_type = type_id;
+			uint_type_ptr.pointer_depth++;
+			uint_type_ptr.parent_type = get_uint_type_id();
 			uint_type_ptr.storage = StorageClassInput;
 			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
-			ptr_type.self = type_id;
+			ptr_type.self = get_uint_type_id();
+
+			if ((need_tesc_params && msl_options.multi_patch_workgroup) || need_grid_params)
+			{
+				uint32_t var_id = ir.increase_bound_by(1);
 
-			if (!has_invocation_id)
+				// Create gl_GlobalInvocationID.
+				set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+				set_decoration(var_id, DecorationBuiltIn, BuiltInGlobalInvocationId);
+				builtin_invocation_id_id = var_id;
+				mark_implicit_builtin(StorageClassInput, BuiltInGlobalInvocationId, var_id);
+			}
+			else if (need_tesc_params && !has_invocation_id)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
 
@@ -392,7 +688,7 @@ void CompilerMSL::build_implicit_builtins()
 				mark_implicit_builtin(StorageClassInput, BuiltInInvocationId, var_id);
 			}
 
-			if (!has_primitive_id)
+			if ((need_tesc_params || need_tese_params) && !has_primitive_id)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
 
@@ -402,28 +698,34 @@ void CompilerMSL::build_implicit_builtins()
 				builtin_primitive_id_id = var_id;
 				mark_implicit_builtin(StorageClassInput, BuiltInPrimitiveId, var_id);
 			}
+
+			if (need_grid_params)
+			{
+				uint32_t var_id = ir.increase_bound_by(1);
+
+				set<SPIRVariable>(var_id, build_extended_vector_type(get_uint_type_id(), 3), StorageClassInput);
+				set_extended_decoration(var_id, SPIRVCrossDecorationBuiltInStageInputSize);
+				get_entry_point().interface_variables.push_back(var_id);
+				set_name(var_id, "spvStageInputSize");
+				builtin_stage_input_size_id = var_id;
+			}
 		}
 
 		if (!has_subgroup_invocation_id && (need_subgroup_mask || needs_subgroup_invocation_id))
 		{
-			uint32_t offset = ir.increase_bound_by(3);
-			uint32_t type_id = offset;
-			uint32_t type_ptr_id = offset + 1;
-			uint32_t var_id = offset + 2;
+			uint32_t offset = ir.increase_bound_by(2);
+			uint32_t type_ptr_id = offset;
+			uint32_t var_id = offset + 1;
 
 			// Create gl_SubgroupInvocationID.
-			SPIRType uint_type;
-			uint_type.basetype = SPIRType::UInt;
-			uint_type.width = 32;
-			set<SPIRType>(type_id, uint_type);
-
 			SPIRType uint_type_ptr;
-			uint_type_ptr = uint_type;
+			uint_type_ptr = get_uint_type();
 			uint_type_ptr.pointer = true;
-			uint_type_ptr.parent_type = type_id;
+			uint_type_ptr.pointer_depth++;
+			uint_type_ptr.parent_type = get_uint_type_id();
 			uint_type_ptr.storage = StorageClassInput;
 			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
-			ptr_type.self = type_id;
+			ptr_type.self = get_uint_type_id();
 
 			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
 			set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupLocalInvocationId);
@@ -431,31 +733,158 @@ void CompilerMSL::build_implicit_builtins()
 			mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var_id);
 		}
 
-		if (!has_subgroup_size && need_subgroup_ge_mask)
+		if (!has_subgroup_size && (need_subgroup_ge_mask || needs_subgroup_size))
+		{
+			uint32_t offset = ir.increase_bound_by(2);
+			uint32_t type_ptr_id = offset;
+			uint32_t var_id = offset + 1;
+
+			// Create gl_SubgroupSize.
+			SPIRType uint_type_ptr;
+			uint_type_ptr = get_uint_type();
+			uint_type_ptr.pointer = true;
+			uint_type_ptr.pointer_depth++;
+			uint_type_ptr.parent_type = get_uint_type_id();
+			uint_type_ptr.storage = StorageClassInput;
+			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
+			ptr_type.self = get_uint_type_id();
+
+			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+			set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupSize);
+			builtin_subgroup_size_id = var_id;
+			mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var_id);
+		}
+
+		if (need_dispatch_base || need_vertex_base_params)
+		{
+			if (workgroup_id_type == 0)
+				workgroup_id_type = build_extended_vector_type(get_uint_type_id(), 3);
+			uint32_t var_id;
+			if (msl_options.supports_msl_version(1, 2))
+			{
+				// If we have MSL 1.2, we can (ab)use the [[grid_origin]] builtin
+				// to convey this information and save a buffer slot.
+				uint32_t offset = ir.increase_bound_by(1);
+				var_id = offset;
+
+				set<SPIRVariable>(var_id, workgroup_id_type, StorageClassInput);
+				set_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase);
+				get_entry_point().interface_variables.push_back(var_id);
+			}
+			else
+			{
+				// Otherwise, we need to fall back to a good ol' fashioned buffer.
+				uint32_t offset = ir.increase_bound_by(2);
+				var_id = offset;
+				uint32_t type_id = offset + 1;
+
+				SPIRType var_type = get<SPIRType>(workgroup_id_type);
+				var_type.storage = StorageClassUniform;
+				set<SPIRType>(type_id, var_type);
+
+				set<SPIRVariable>(var_id, type_id, StorageClassUniform);
+				// This should never match anything.
+				set_decoration(var_id, DecorationDescriptorSet, ~(5u));
+				set_decoration(var_id, DecorationBinding, msl_options.indirect_params_buffer_index);
+				set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary,
+				                        msl_options.indirect_params_buffer_index);
+			}
+			set_name(var_id, "spvDispatchBase");
+			builtin_dispatch_base_id = var_id;
+		}
+
+		if (has_additional_fixed_sample_mask() && !does_shader_write_sample_mask)
+		{
+			uint32_t offset = ir.increase_bound_by(2);
+			uint32_t var_id = offset + 1;
+
+			// Create gl_SampleMask.
+			SPIRType uint_type_ptr_out;
+			uint_type_ptr_out = get_uint_type();
+			uint_type_ptr_out.pointer = true;
+			uint_type_ptr_out.pointer_depth++;
+			uint_type_ptr_out.parent_type = get_uint_type_id();
+			uint_type_ptr_out.storage = StorageClassOutput;
+
+			auto &ptr_out_type = set<SPIRType>(offset, uint_type_ptr_out);
+			ptr_out_type.self = get_uint_type_id();
+			set<SPIRVariable>(var_id, offset, StorageClassOutput);
+			set_decoration(var_id, DecorationBuiltIn, BuiltInSampleMask);
+			builtin_sample_mask_id = var_id;
+			mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var_id);
+		}
+
+		if (!has_helper_invocation && needs_helper_invocation)
 		{
 			uint32_t offset = ir.increase_bound_by(3);
 			uint32_t type_id = offset;
 			uint32_t type_ptr_id = offset + 1;
 			uint32_t var_id = offset + 2;
 
-			// Create gl_SubgroupSize.
-			SPIRType uint_type;
-			uint_type.basetype = SPIRType::UInt;
-			uint_type.width = 32;
-			set<SPIRType>(type_id, uint_type);
+			// Create gl_HelperInvocation.
+			SPIRType bool_type;
+			bool_type.basetype = SPIRType::Boolean;
+			bool_type.width = 8;
+			bool_type.vecsize = 1;
+			set<SPIRType>(type_id, bool_type);
+
+			SPIRType bool_type_ptr_in;
+			bool_type_ptr_in = bool_type;
+			bool_type_ptr_in.pointer = true;
+			bool_type_ptr_in.pointer_depth++;
+			bool_type_ptr_in.parent_type = type_id;
+			bool_type_ptr_in.storage = StorageClassInput;
+
+			auto &ptr_in_type = set<SPIRType>(type_ptr_id, bool_type_ptr_in);
+			ptr_in_type.self = type_id;
+			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+			set_decoration(var_id, DecorationBuiltIn, BuiltInHelperInvocation);
+			builtin_helper_invocation_id = var_id;
+			mark_implicit_builtin(StorageClassInput, BuiltInHelperInvocation, var_id);
+		}
 
+		if (need_local_invocation_index && !has_local_invocation_index)
+		{
+			uint32_t offset = ir.increase_bound_by(2);
+			uint32_t type_ptr_id = offset;
+			uint32_t var_id = offset + 1;
+
+			// Create gl_LocalInvocationIndex.
 			SPIRType uint_type_ptr;
-			uint_type_ptr = uint_type;
+			uint_type_ptr = get_uint_type();
+			uint_type_ptr.pointer = true;
+			uint_type_ptr.pointer_depth++;
+			uint_type_ptr.parent_type = get_uint_type_id();
+			uint_type_ptr.storage = StorageClassInput;
+
+			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
+			ptr_type.self = get_uint_type_id();
+			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+			set_decoration(var_id, DecorationBuiltIn, BuiltInLocalInvocationIndex);
+			builtin_local_invocation_index_id = var_id;
+			mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var_id);
+		}
+
+		if (need_workgroup_size && !has_workgroup_size)
+		{
+			uint32_t offset = ir.increase_bound_by(2);
+			uint32_t type_ptr_id = offset;
+			uint32_t var_id = offset + 1;
+
+			// Create gl_WorkgroupSize.
+			uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3);
+			SPIRType uint_type_ptr = get<SPIRType>(type_id);
 			uint_type_ptr.pointer = true;
+			uint_type_ptr.pointer_depth++;
 			uint_type_ptr.parent_type = type_id;
 			uint_type_ptr.storage = StorageClassInput;
+
 			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
 			ptr_type.self = type_id;
-
 			set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
-			set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupSize);
-			builtin_subgroup_size_id = var_id;
-			mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var_id);
+			set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize);
+			builtin_workgroup_size_id = var_id;
+			mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
 		}
 	}
 
@@ -470,7 +899,7 @@ void CompilerMSL::build_implicit_builtins()
 		swizzle_buffer_id = var_id;
 	}
 
-	if (!buffers_requiring_array_length.empty())
+	if (needs_buffer_size_buffer())
 	{
 		uint32_t var_id = build_constant_uint_array_pointer();
 		set_name(var_id, "spvBufferSizeConstants");
@@ -491,18 +920,127 @@ void CompilerMSL::build_implicit_builtins()
 		set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index);
 		view_mask_buffer_id = var_id;
 	}
-}
 
-void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id)
-{
-	Bitset *active_builtins = nullptr;
-	switch (storage)
+	if (!buffers_requiring_dynamic_offset.empty())
 	{
-	case StorageClassInput:
-		active_builtins = &active_input_builtins;
-		break;
-
-	case StorageClassOutput:
+		uint32_t var_id = build_constant_uint_array_pointer();
+		set_name(var_id, "spvDynamicOffsets");
+		// This should never match anything.
+		set_decoration(var_id, DecorationDescriptorSet, ~(5u));
+		set_decoration(var_id, DecorationBinding, msl_options.dynamic_offsets_buffer_index);
+		set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary,
+		                        msl_options.dynamic_offsets_buffer_index);
+		dynamic_offsets_buffer_id = var_id;
+	}
+
+	// If we're returning a struct from a vertex-like entry point, we must return a position attribute.
+	bool need_position = (get_execution_model() == ExecutionModelVertex || is_tese_shader()) &&
+	                     !capture_output_to_buffer && !get_is_rasterization_disabled() &&
+	                     !active_output_builtins.get(BuiltInPosition);
+
+	if (need_position)
+	{
+		// If we can get away with returning void from entry point, we don't need to care.
+		// If there is at least one other stage output, we need to return [[position]],
+		// so we need to create one if it doesn't appear in the SPIR-V. Before adding the
+		// implicit variable, check if it actually exists already, but just has not been used
+		// or initialized, and if so, mark it as active, and do not create the implicit variable.
+		bool has_output = false;
+		ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+			if (var.storage == StorageClassOutput && interface_variable_exists_in_entry_point(var.self))
+			{
+				has_output = true;
+
+				// Check if the var is the Position builtin
+				if (has_decoration(var.self, DecorationBuiltIn) && get_decoration(var.self, DecorationBuiltIn) == BuiltInPosition)
+					active_output_builtins.set(BuiltInPosition);
+
+				// If the var is a struct, check if any members is the Position builtin
+				auto &var_type = get_variable_element_type(var);
+				if (var_type.basetype == SPIRType::Struct)
+				{
+					auto mbr_cnt = var_type.member_types.size();
+					for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
+					{
+						auto builtin = BuiltInMax;
+						bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
+						if (is_builtin && builtin == BuiltInPosition)
+							active_output_builtins.set(BuiltInPosition);
+					}
+				}
+			}
+		});
+		need_position = has_output && !active_output_builtins.get(BuiltInPosition);
+	}
+
+	if (need_position)
+	{
+		uint32_t offset = ir.increase_bound_by(3);
+		uint32_t type_id = offset;
+		uint32_t type_ptr_id = offset + 1;
+		uint32_t var_id = offset + 2;
+
+		// Create gl_Position.
+		SPIRType vec4_type;
+		vec4_type.basetype = SPIRType::Float;
+		vec4_type.width = 32;
+		vec4_type.vecsize = 4;
+		set<SPIRType>(type_id, vec4_type);
+
+		SPIRType vec4_type_ptr;
+		vec4_type_ptr = vec4_type;
+		vec4_type_ptr.pointer = true;
+		vec4_type_ptr.pointer_depth++;
+		vec4_type_ptr.parent_type = type_id;
+		vec4_type_ptr.storage = StorageClassOutput;
+		auto &ptr_type = set<SPIRType>(type_ptr_id, vec4_type_ptr);
+		ptr_type.self = type_id;
+
+		set<SPIRVariable>(var_id, type_ptr_id, StorageClassOutput);
+		set_decoration(var_id, DecorationBuiltIn, BuiltInPosition);
+		mark_implicit_builtin(StorageClassOutput, BuiltInPosition, var_id);
+	}
+}
+
+// Checks if the specified builtin variable (e.g. gl_InstanceIndex) is marked as active.
+// If not, it marks it as active and forces a recompilation.
+// This might be used when the optimization of inactive builtins was too optimistic (e.g. when "spvOut" is emitted).
+void CompilerMSL::ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin)
+{
+	Bitset *active_builtins = nullptr;
+	switch (storage)
+	{
+	case StorageClassInput:
+		active_builtins = &active_input_builtins;
+		break;
+
+	case StorageClassOutput:
+		active_builtins = &active_output_builtins;
+		break;
+
+	default:
+		break;
+	}
+
+	// At this point, the specified builtin variable must have already been declared in the entry point.
+	// If not, mark as active and force recompile.
+	if (active_builtins != nullptr && !active_builtins->get(builtin))
+	{
+		active_builtins->set(builtin);
+		force_recompile();
+	}
+}
+
+void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id)
+{
+	Bitset *active_builtins = nullptr;
+	switch (storage)
+	{
+	case StorageClassInput:
+		active_builtins = &active_input_builtins;
+		break;
+
+	case StorageClassOutput:
 		active_builtins = &active_output_builtins;
 		break;
 
@@ -512,27 +1050,24 @@ void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, u
 
 	assert(active_builtins != nullptr);
 	active_builtins->set(builtin);
-	get_entry_point().interface_variables.push_back(id);
+
+	auto &var = get_entry_point().interface_variables;
+	if (find(begin(var), end(var), VariableID(id)) == end(var))
+		var.push_back(id);
 }
 
 uint32_t CompilerMSL::build_constant_uint_array_pointer()
 {
-	uint32_t offset = ir.increase_bound_by(4);
-	uint32_t type_id = offset;
-	uint32_t type_ptr_id = offset + 1;
-	uint32_t type_ptr_ptr_id = offset + 2;
-	uint32_t var_id = offset + 3;
+	uint32_t offset = ir.increase_bound_by(3);
+	uint32_t type_ptr_id = offset;
+	uint32_t type_ptr_ptr_id = offset + 1;
+	uint32_t var_id = offset + 2;
 
 	// Create a buffer to hold extra data, including the swizzle constants.
-	SPIRType uint_type;
-	uint_type.basetype = SPIRType::UInt;
-	uint_type.width = 32;
-	set<SPIRType>(type_id, uint_type);
-
-	SPIRType uint_type_pointer = uint_type;
+	SPIRType uint_type_pointer = get_uint_type();
 	uint_type_pointer.pointer = true;
-	uint_type_pointer.pointer_depth = 1;
-	uint_type_pointer.parent_type = type_id;
+	uint_type_pointer.pointer_depth++;
+	uint_type_pointer.parent_type = get_uint_type_id();
 	uint_type_pointer.storage = StorageClassUniform;
 	set<SPIRType>(type_ptr_id, uint_type_pointer);
 	set_decoration(type_ptr_id, DecorationArrayStride, 4);
@@ -591,14 +1126,35 @@ SPIRType &CompilerMSL::get_patch_stage_out_struct_type()
 
 std::string CompilerMSL::get_tess_factor_struct_name()
 {
-	if (get_entry_point().flags.get(ExecutionModeTriangles))
+	if (is_tessellating_triangles())
 		return "MTLTriangleTessellationFactorsHalf";
 	return "MTLQuadTessellationFactorsHalf";
 }
 
+SPIRType &CompilerMSL::get_uint_type()
+{
+	return get<SPIRType>(get_uint_type_id());
+}
+
+uint32_t CompilerMSL::get_uint_type_id()
+{
+	if (uint_type_id != 0)
+		return uint_type_id;
+
+	uint_type_id = ir.increase_bound_by(1);
+
+	SPIRType type;
+	type.basetype = SPIRType::UInt;
+	type.width = 32;
+	set<SPIRType>(uint_type_id, type);
+	return uint_type_id;
+}
+
 void CompilerMSL::emit_entry_point_declarations()
 {
 	// FIXME: Get test coverage here ...
+	// Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries
+	declare_complex_constant_arrays();
 
 	// Emit constexpr samplers here.
 	for (auto &samp : constexpr_samplers_by_id)
@@ -717,30 +1273,170 @@ void CompilerMSL::emit_entry_point_declarations()
 			                    convert_to_string(s.lod_clamp_max, current_locale_radix_character), ")"));
 		}
 
-		statement("constexpr sampler ",
-		          type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
-		          "(", merge(args), ");");
+		// If we would emit no arguments, then omit the parentheses entirely. Otherwise,
+		// we'll wind up with a "most vexing parse" situation.
+		if (args.empty())
+			statement("constexpr sampler ",
+			          type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
+			          ";");
+		else
+			statement("constexpr sampler ",
+			          type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first),
+			          "(", merge(args), ");");
+	}
+
+	// Emit dynamic buffers here.
+	for (auto &dynamic_buffer : buffers_requiring_dynamic_offset)
+	{
+		if (!dynamic_buffer.second.second)
+		{
+			// Could happen if no buffer was used at requested binding point.
+			continue;
+		}
+
+		const auto &var = get<SPIRVariable>(dynamic_buffer.second.second);
+		uint32_t var_id = var.self;
+		const auto &type = get_variable_data_type(var);
+		string name = to_name(var.self);
+		uint32_t desc_set = get_decoration(var.self, DecorationDescriptorSet);
+		uint32_t arg_id = argument_buffer_ids[desc_set];
+		uint32_t base_index = dynamic_buffer.second.first;
+
+		if (!type.array.empty())
+		{
+			// This is complicated, because we need to support arrays of arrays.
+			// And it's even worse if the outermost dimension is a runtime array, because now
+			// all this complicated goop has to go into the shader itself. (FIXME)
+			if (!type.array[type.array.size() - 1])
+				SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet.");
+			else
+			{
+				is_using_builtin_array = true;
+				statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, true), name,
+				          type_to_array_glsl(type), " =");
+
+				uint32_t dim = uint32_t(type.array.size());
+				uint32_t j = 0;
+				for (SmallVector<uint32_t> indices(type.array.size());
+				     indices[type.array.size() - 1] < to_array_size_literal(type); j++)
+				{
+					while (dim > 0)
+					{
+						begin_scope();
+						--dim;
+					}
+
+					string arrays;
+					for (uint32_t i = uint32_t(type.array.size()); i; --i)
+						arrays += join("[", indices[i - 1], "]");
+					statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ",
+					          to_restrict(var_id, false), ")((", get_argument_address_space(var), " char* ",
+					          to_restrict(var_id, false), ")", to_name(arg_id), ".", ensure_valid_name(name, "m"),
+					          arrays, " + ", to_name(dynamic_offsets_buffer_id), "[", base_index + j, "]),");
+
+					while (++indices[dim] >= to_array_size_literal(type, dim) && dim < type.array.size() - 1)
+					{
+						end_scope(",");
+						indices[dim++] = 0;
+					}
+				}
+				end_scope_decl();
+				statement_no_indent("");
+				is_using_builtin_array = false;
+			}
+		}
+		else
+		{
+			statement(get_argument_address_space(var), " auto& ", to_restrict(var_id, true), name, " = *(",
+			          get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, false), ")((",
+			          get_argument_address_space(var), " char* ", to_restrict(var_id, false), ")", to_name(arg_id), ".",
+			          ensure_valid_name(name, "m"), " + ", to_name(dynamic_offsets_buffer_id), "[", base_index, "]);");
+		}
 	}
 
 	// Emit buffer arrays here.
-	for (uint32_t array_id : buffer_arrays)
+	for (uint32_t array_id : buffer_arrays_discrete)
 	{
 		const auto &var = get<SPIRVariable>(array_id);
 		const auto &type = get_variable_data_type(var);
+		const auto &buffer_type = get_variable_element_type(var);
 		string name = to_name(array_id);
-		statement(get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + name + "[] =");
+		statement(get_argument_address_space(var), " ", type_to_glsl(buffer_type), "* ", to_restrict(array_id, true), name,
+		          "[] =");
 		begin_scope();
-		for (uint32_t i = 0; i < type.array[0]; ++i)
-			statement(name + "_" + convert_to_string(i) + ",");
+		for (uint32_t i = 0; i < to_array_size_literal(type); ++i)
+			statement(name, "_", i, ",");
 		end_scope_decl();
 		statement_no_indent("");
 	}
-	// For some reason, without this, we end up emitting the arrays twice.
-	buffer_arrays.clear();
+	// Discrete descriptors are processed in entry point emission every compiler iteration.
+	buffer_arrays_discrete.clear();
+
+	// Emit buffer aliases here.
+	for (auto &var_id : buffer_aliases_discrete)
+	{
+		const auto &var = get<SPIRVariable>(var_id);
+		const auto &type = get_variable_data_type(var);
+		auto addr_space = get_argument_address_space(var);
+		auto name = to_name(var_id);
+
+		uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
+		uint32_t desc_binding = get_decoration(var_id, DecorationBinding);
+		auto alias_name = join("spvBufferAliasSet", desc_set, "Binding", desc_binding);
+
+		statement(addr_space, " auto& ", to_restrict(var_id, true),
+		          name,
+		          " = *(", addr_space, " ", type_to_glsl(type), "*)", alias_name, ";");
+	}
+	// Discrete descriptors are processed in entry point emission every compiler iteration.
+	buffer_aliases_discrete.clear();
+
+	for (auto &var_pair : buffer_aliases_argument)
+	{
+		uint32_t var_id = var_pair.first;
+		uint32_t alias_id = var_pair.second;
+
+		const auto &var = get<SPIRVariable>(var_id);
+		const auto &type = get_variable_data_type(var);
+		auto addr_space = get_argument_address_space(var);
+
+		if (type.array.empty())
+		{
+			statement(addr_space, " auto& ", to_restrict(var_id, true), to_name(var_id), " = (", addr_space, " ",
+			          type_to_glsl(type), "&)", ir.meta[alias_id].decoration.qualified_alias, ";");
+		}
+		else
+		{
+			const char *desc_addr_space = descriptor_address_space(var_id, var.storage, "thread");
+
+			// Esoteric type cast. Reference to array of pointers.
+			// Auto here defers to UBO or SSBO. The address space of the reference needs to refer to the
+			// address space of the argument buffer itself, which is usually constant, but can be const device for
+			// large argument buffers.
+			is_using_builtin_array = true;
+			statement(desc_addr_space, " auto& ", to_restrict(var_id, true), to_name(var_id), " = (", addr_space, " ",
+			          type_to_glsl(type), "* ", desc_addr_space, " (&)",
+			          type_to_array_glsl(type), ")", ir.meta[alias_id].decoration.qualified_alias, ";");
+			is_using_builtin_array = false;
+		}
+	}
+
+	// Emit disabled fragment outputs.
+	std::sort(disabled_frag_outputs.begin(), disabled_frag_outputs.end());
+	for (uint32_t var_id : disabled_frag_outputs)
+	{
+		auto &var = get<SPIRVariable>(var_id);
+		add_local_variable_name(var_id);
+		statement(variable_decl(var), ";");
+		var.deferred_declaration = false;
+	}
 }
 
 string CompilerMSL::compile()
 {
+	replace_illegal_entry_point_names();
+	ir.fixup_reserved_names();
+
 	// Do not deal with GLES-isms like precision, older extensions and such.
 	options.vulkan_semantics = true;
 	options.es = false;
@@ -749,14 +1445,14 @@ string CompilerMSL::compile()
 	backend.float_literal_suffix = false;
 	backend.uint32_t_literal_suffix = true;
 	backend.int16_t_literal_suffix = "";
-	backend.uint16_t_literal_suffix = "u";
+	backend.uint16_t_literal_suffix = "";
 	backend.basic_int_type = "int";
 	backend.basic_uint_type = "uint";
 	backend.basic_int8_type = "char";
 	backend.basic_uint8_type = "uchar";
 	backend.basic_int16_type = "short";
 	backend.basic_uint16_type = "ushort";
-	backend.discard_literal = "discard_fragment()";
+	backend.boolean_mix_function = "select";
 	backend.swizzle_is_function = false;
 	backend.shared_is_implied = false;
 	backend.use_initializer_list = true;
@@ -764,14 +1460,21 @@ string CompilerMSL::compile()
 	backend.native_row_major_matrix = false;
 	backend.unsized_array_supported = false;
 	backend.can_declare_arrays_inline = false;
-	backend.can_return_array = false;
-	backend.boolean_mix_support = false;
 	backend.allow_truncated_access_chain = true;
-	backend.array_is_value_type = false;
 	backend.comparison_image_samples_scalar = true;
 	backend.native_pointers = true;
 	backend.nonuniform_qualifier = "";
 	backend.support_small_type_sampling_result = true;
+	backend.supports_empty_struct = true;
+	backend.support_64bit_switch = true;
+
+	// Allow Metal to use the array<T> template unless we force it off.
+	backend.can_return_array = !msl_options.force_native_arrays;
+	backend.array_is_value_type = !msl_options.force_native_arrays;
+	// Arrays which are part of buffer objects are never considered to be value types (just plain C-style).
+	backend.array_is_value_type_in_buffer_blocks = false;
+	backend.support_pointer_to_pointer = true;
+	backend.implicit_c_integer_promotion_rules = true;
 
 	capture_output_to_buffer = msl_options.capture_output_to_buffer;
 	is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
@@ -780,29 +1483,53 @@ string CompilerMSL::compile()
 	for (auto &id : next_metal_resource_ids)
 		id = 0;
 
+	fixup_anonymous_struct_names();
 	fixup_type_alias();
 	replace_illegal_names();
-
-	struct_member_padding.clear();
+	sync_entry_point_aliases_and_names();
 
 	build_function_control_flow_graphs_and_analyze();
 	update_active_builtins();
 	analyze_image_and_sampler_usage();
 	analyze_sampled_image_usage();
+	analyze_interlocked_resource_usage();
 	preprocess_op_codes();
 	build_implicit_builtins();
 
+	if (needs_manual_helper_invocation_updates() &&
+	    (active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation))
+	{
+		string discard_expr =
+		    join(builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), " = true, discard_fragment()");
+		backend.discard_literal = discard_expr;
+		backend.demote_literal = discard_expr;
+	}
+	else
+	{
+		backend.discard_literal = "discard_fragment()";
+		backend.demote_literal = "discard_fragment()";
+	}
+
 	fixup_image_load_store_access();
 
 	set_enabled_interface_variables(get_active_interface_variables());
+	if (msl_options.force_active_argument_buffer_resources)
+		activate_argument_buffer_resources();
+
 	if (swizzle_buffer_id)
-		active_interface_variables.insert(swizzle_buffer_id);
+		add_active_interface_variable(swizzle_buffer_id);
 	if (buffer_size_buffer_id)
-		active_interface_variables.insert(buffer_size_buffer_id);
+		add_active_interface_variable(buffer_size_buffer_id);
 	if (view_mask_buffer_id)
-		active_interface_variables.insert(view_mask_buffer_id);
+		add_active_interface_variable(view_mask_buffer_id);
+	if (dynamic_offsets_buffer_id)
+		add_active_interface_variable(dynamic_offsets_buffer_id);
 	if (builtin_layer_id)
-		active_interface_variables.insert(builtin_layer_id);
+		add_active_interface_variable(builtin_layer_id);
+	if (builtin_dispatch_base_id && !msl_options.supports_msl_version(1, 2))
+		add_active_interface_variable(builtin_dispatch_base_id);
+	if (builtin_sample_mask_id)
+		add_active_interface_variable(builtin_sample_mask_id);
 
 	// Create structs to hold input, output and uniform variables.
 	// Do output first to ensure out. is declared at top of entry function.
@@ -810,10 +1537,10 @@ string CompilerMSL::compile()
 	stage_out_var_id = add_interface_block(StorageClassOutput);
 	patch_stage_out_var_id = add_interface_block(StorageClassOutput, true);
 	stage_in_var_id = add_interface_block(StorageClassInput);
-	if (get_execution_model() == ExecutionModelTessellationEvaluation)
+	if (is_tese_shader())
 		patch_stage_in_var_id = add_interface_block(StorageClassInput, true);
 
-	if (get_execution_model() == ExecutionModelTessellationControl)
+	if (is_tesc_shader())
 		stage_out_ptr_var_id = add_interface_block_pointer(stage_out_var_id, StorageClassOutput);
 	if (is_tessellation_shader())
 		stage_in_ptr_var_id = add_interface_block_pointer(stage_in_var_id, StorageClassInput);
@@ -846,10 +1573,7 @@ string CompilerMSL::compile()
 	uint32_t pass_count = 0;
 	do
 	{
-		if (pass_count >= 3)
-			SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
-
-		reset();
+		reset(pass_count);
 
 		// Start bindings at zero.
 		next_metal_resource_index_buffer = 0;
@@ -862,9 +1586,10 @@ string CompilerMSL::compile()
 		buffer.reset();
 
 		emit_header();
+		emit_custom_templates();
+		emit_custom_functions();
 		emit_specialization_constants_and_structs();
 		emit_resources();
-		emit_custom_functions();
 		emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
 
 		pass_count++;
@@ -887,13 +1612,15 @@ void CompilerMSL::preprocess_op_codes()
 		add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"");
 	}
 
-	// Metal vertex functions that write to resources must disable rasterization and return void.
-	if (preproc.uses_resource_write)
+	// Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to
+	// resources must disable rasterization and return void.
+	if ((preproc.uses_buffer_write && !msl_options.supports_msl_version(2, 1)) ||
+	    (preproc.uses_image_write && !msl_options.supports_msl_version(2, 2)))
 		is_rasterization_disabled = true;
 
 	// Tessellation control shaders are run as compute functions in Metal, and so
 	// must capture their output to a buffer.
-	if (get_execution_model() == ExecutionModelTessellationControl)
+	if (is_tesc_shader() || (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation))
 	{
 		is_rasterization_disabled = true;
 		capture_output_to_buffer = true;
@@ -901,6 +1628,44 @@ void CompilerMSL::preprocess_op_codes()
 
 	if (preproc.needs_subgroup_invocation_id)
 		needs_subgroup_invocation_id = true;
+	if (preproc.needs_subgroup_size)
+		needs_subgroup_size = true;
+	// build_implicit_builtins() hasn't run yet, and in fact, this needs to execute
+	// before then so that gl_SampleID will get added; so we also need to check if
+	// that function would add gl_FragCoord.
+	if (preproc.needs_sample_id || msl_options.force_sample_rate_shading ||
+	    (is_sample_rate() && (active_input_builtins.get(BuiltInFragCoord) ||
+	                          (need_subpass_input_ms && !msl_options.use_framebuffer_fetch_subpasses))))
+		needs_sample_id = true;
+	if (preproc.needs_helper_invocation)
+		needs_helper_invocation = true;
+
+	// OpKill is removed by the parser, so we need to identify those by inspecting
+	// blocks.
+	ir.for_each_typed_id<SPIRBlock>([&preproc](uint32_t, SPIRBlock &block) {
+		if (block.terminator == SPIRBlock::Kill)
+			preproc.uses_discard = true;
+	});
+
+	// Fragment shaders that both write to storage resources and discard fragments
+	// need checks on the writes, to work around Metal allowing these writes despite
+	// the fragment being dead.
+	if (msl_options.check_discarded_frag_stores && preproc.uses_discard &&
+	    (preproc.uses_buffer_write || preproc.uses_image_write))
+	{
+		frag_shader_needs_discard_checks = true;
+		needs_helper_invocation = true;
+		// Fragment discard store checks imply manual HelperInvocation updates.
+		msl_options.manual_helper_invocation_updates = true;
+	}
+
+	if (is_intersection_query())
+	{
+		add_header_line("#if __METAL_VERSION__ >= 230");
+		add_header_line("#include <metal_raytracing>");
+		add_header_line("using namespace metal::raytracing;");
+		add_header_line("#endif");
+	}
 }
 
 // Move the Private and Workgroup global variables to the entry function.
@@ -931,6 +1696,30 @@ void CompilerMSL::extract_global_variables_from_functions()
 	// Uniforms
 	unordered_set<uint32_t> global_var_ids;
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+		// Some builtins resolve directly to a function call which does not need any declared variables.
+		// Skip these.
+		if (var.storage == StorageClassInput && has_decoration(var.self, DecorationBuiltIn))
+		{
+			auto bi_type = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
+			if (bi_type == BuiltInHelperInvocation && !needs_manual_helper_invocation_updates())
+				return;
+			if (bi_type == BuiltInHelperInvocation && needs_manual_helper_invocation_updates())
+			{
+				if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
+					SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.3 on iOS.");
+				else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
+					SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS.");
+				// Make sure this is declared and initialized.
+				// Force this to have the proper name.
+				set_name(var.self, builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput));
+				auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
+				entry_func.add_local_variable(var.self);
+				vars_needing_early_declaration.push_back(var.self);
+				entry_func.fixup_hooks_in.push_back([this, &var]()
+				                                    { statement(to_name(var.self), " = simd_is_helper_thread();"); });
+			}
+		}
+
 		if (var.storage == StorageClassInput || var.storage == StorageClassOutput ||
 		    var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
 		    var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer)
@@ -990,12 +1779,26 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 				if (global_var_ids.find(base_id) != global_var_ids.end())
 					added_arg_ids.insert(base_id);
 
+				// Use Metal's native frame-buffer fetch API for subpass inputs.
 				auto &type = get<SPIRType>(ops[0]);
-				if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
+				if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
+				    (!msl_options.use_framebuffer_fetch_subpasses))
 				{
 					// Implicitly reads gl_FragCoord.
 					assert(builtin_frag_coord_id != 0);
 					added_arg_ids.insert(builtin_frag_coord_id);
+					if (msl_options.multiview)
+					{
+						// Implicitly reads gl_ViewIndex.
+						assert(builtin_view_idx_id != 0);
+						added_arg_ids.insert(builtin_view_idx_id);
+					}
+					else if (msl_options.arrayed_subpass_input)
+					{
+						// Implicitly reads gl_Layer.
+						assert(builtin_layer_id != 0);
+						added_arg_ids.insert(builtin_layer_id);
+					}
 				}
 
 				break;
@@ -1025,6 +1828,14 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 				uint32_t base_id = ops[0];
 				if (global_var_ids.find(base_id) != global_var_ids.end())
 					added_arg_ids.insert(base_id);
+
+				uint32_t rvalue_id = ops[1];
+				if (global_var_ids.find(rvalue_id) != global_var_ids.end())
+					added_arg_ids.insert(rvalue_id);
+
+				if (needs_frag_discard_checks())
+					added_arg_ids.insert(builtin_helper_invocation_id);
+
 				break;
 			}
 
@@ -1039,10 +1850,164 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 				break;
 			}
 
+			case OpAtomicExchange:
+			case OpAtomicCompareExchange:
+			case OpAtomicStore:
+			case OpAtomicIIncrement:
+			case OpAtomicIDecrement:
+			case OpAtomicIAdd:
+			case OpAtomicISub:
+			case OpAtomicSMin:
+			case OpAtomicUMin:
+			case OpAtomicSMax:
+			case OpAtomicUMax:
+			case OpAtomicAnd:
+			case OpAtomicOr:
+			case OpAtomicXor:
+			case OpImageWrite:
+				if (needs_frag_discard_checks())
+					added_arg_ids.insert(builtin_helper_invocation_id);
+				break;
+
+			// Emulate texture2D atomic operations
+			case OpImageTexelPointer:
+			{
+				// When using the pointer, we need to know which variable it is actually loaded from.
+				uint32_t base_id = ops[2];
+				auto *var = maybe_get_backing_variable(base_id);
+				if (var && atomic_image_vars.count(var->self))
+				{
+					if (global_var_ids.find(base_id) != global_var_ids.end())
+						added_arg_ids.insert(base_id);
+				}
+				break;
+			}
+
+			case OpExtInst:
+			{
+				uint32_t extension_set = ops[2];
+				if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
+				{
+					auto op_450 = static_cast<GLSLstd450>(ops[3]);
+					switch (op_450)
+					{
+					case GLSLstd450InterpolateAtCentroid:
+					case GLSLstd450InterpolateAtSample:
+					case GLSLstd450InterpolateAtOffset:
+					{
+						// For these, we really need the stage-in block. It is theoretically possible to pass the
+						// interpolant object, but a) doing so would require us to create an entirely new variable
+						// with Interpolant type, and b) if we have a struct or array, handling all the members and
+						// elements could get unwieldy fast.
+						added_arg_ids.insert(stage_in_var_id);
+						break;
+					}
+
+					case GLSLstd450Modf:
+					case GLSLstd450Frexp:
+					{
+						uint32_t base_id = ops[5];
+						if (global_var_ids.find(base_id) != global_var_ids.end())
+							added_arg_ids.insert(base_id);
+						break;
+					}
+
+					default:
+						break;
+					}
+				}
+				break;
+			}
+
+			case OpGroupNonUniformInverseBallot:
+			{
+				added_arg_ids.insert(builtin_subgroup_invocation_id_id);
+				break;
+			}
+
+			case OpGroupNonUniformBallotFindLSB:
+			case OpGroupNonUniformBallotFindMSB:
+			{
+				added_arg_ids.insert(builtin_subgroup_size_id);
+				break;
+			}
+
+			case OpGroupNonUniformBallotBitCount:
+			{
+				auto operation = static_cast<GroupOperation>(ops[3]);
+				switch (operation)
+				{
+				case GroupOperationReduce:
+					added_arg_ids.insert(builtin_subgroup_size_id);
+					break;
+				case GroupOperationInclusiveScan:
+				case GroupOperationExclusiveScan:
+					added_arg_ids.insert(builtin_subgroup_invocation_id_id);
+					break;
+				default:
+					break;
+				}
+				break;
+			}
+
+			case OpDemoteToHelperInvocation:
+				if (needs_manual_helper_invocation_updates() &&
+				    (active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation))
+					added_arg_ids.insert(builtin_helper_invocation_id);
+				break;
+
+			case OpIsHelperInvocationEXT:
+				if (needs_manual_helper_invocation_updates())
+					added_arg_ids.insert(builtin_helper_invocation_id);
+				break;
+
+			case OpRayQueryInitializeKHR:
+			case OpRayQueryProceedKHR:
+			case OpRayQueryTerminateKHR:
+			case OpRayQueryGenerateIntersectionKHR:
+			case OpRayQueryConfirmIntersectionKHR:
+			{
+				// Ray query accesses memory directly, need check pass down object if using Private storage class.
+				uint32_t base_id = ops[0];
+				if (global_var_ids.find(base_id) != global_var_ids.end())
+					added_arg_ids.insert(base_id);
+				break;
+			}
+
+			case OpRayQueryGetRayTMinKHR:
+			case OpRayQueryGetRayFlagsKHR:
+			case OpRayQueryGetWorldRayOriginKHR:
+			case OpRayQueryGetWorldRayDirectionKHR:
+			case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
+			case OpRayQueryGetIntersectionTypeKHR:
+			case OpRayQueryGetIntersectionTKHR:
+			case OpRayQueryGetIntersectionInstanceCustomIndexKHR:
+			case OpRayQueryGetIntersectionInstanceIdKHR:
+			case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
+			case OpRayQueryGetIntersectionGeometryIndexKHR:
+			case OpRayQueryGetIntersectionPrimitiveIndexKHR:
+			case OpRayQueryGetIntersectionBarycentricsKHR:
+			case OpRayQueryGetIntersectionFrontFaceKHR:
+			case OpRayQueryGetIntersectionObjectRayDirectionKHR:
+			case OpRayQueryGetIntersectionObjectRayOriginKHR:
+			case OpRayQueryGetIntersectionObjectToWorldKHR:
+			case OpRayQueryGetIntersectionWorldToObjectKHR:
+			{
+				// Ray query accesses memory directly, need check pass down object if using Private storage class.
+				uint32_t base_id = ops[2];
+				if (global_var_ids.find(base_id) != global_var_ids.end())
+					added_arg_ids.insert(base_id);
+				break;
+			}
+
 			default:
 				break;
 			}
 
+			if (needs_manual_helper_invocation_updates() && b.terminator == SPIRBlock::Kill &&
+			    (active_input_builtins.get(BuiltInHelperInvocation) || needs_helper_invocation))
+				added_arg_ids.insert(builtin_helper_invocation_id);
+
 			// TODO: Add all other operations which can affect memory.
 			// We should consider a more unified system here to reduce boiler-plate.
 			// This kind of analysis is done in several places ...
@@ -1054,8 +2019,11 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 	// Add the global variables as arguments to the function
 	if (func_id != ir.default_entry_point)
 	{
-		bool added_in = false;
-		bool added_out = false;
+		bool control_point_added_in = false;
+		bool control_point_added_out = false;
+		bool patch_added_in = false;
+		bool patch_added_out = false;
+
 		for (uint32_t arg_id : added_arg_ids)
 		{
 			auto &var = get<SPIRVariable>(arg_id);
@@ -1063,42 +2031,77 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 			auto *p_type = &get<SPIRType>(type_id);
 			BuiltIn bi_type = BuiltIn(get_decoration(arg_id, DecorationBuiltIn));
 
-			if (((is_tessellation_shader() && var.storage == StorageClassInput) ||
-			     (get_execution_model() == ExecutionModelTessellationControl && var.storage == StorageClassOutput)) &&
-			    !(has_decoration(arg_id, DecorationPatch) || is_patch_block(*p_type)) &&
-			    (!is_builtin_variable(var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
-			     bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance ||
-			     p_type->basetype == SPIRType::Struct))
+			bool is_patch = has_decoration(arg_id, DecorationPatch) || is_patch_block(*p_type);
+			bool is_block = has_decoration(p_type->self, DecorationBlock);
+			bool is_control_point_storage =
+			    !is_patch && ((is_tessellation_shader() && var.storage == StorageClassInput) ||
+			                  (is_tesc_shader() && var.storage == StorageClassOutput));
+			bool is_patch_block_storage = is_patch && is_block && var.storage == StorageClassOutput;
+			bool is_builtin = is_builtin_variable(var);
+			bool variable_is_stage_io =
+					!is_builtin || bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
+					bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance ||
+					p_type->basetype == SPIRType::Struct;
+			bool is_redirected_to_global_stage_io = (is_control_point_storage || is_patch_block_storage) &&
+			                                        variable_is_stage_io;
+
+			// If output is masked it is not considered part of the global stage IO interface.
+			if (is_redirected_to_global_stage_io && var.storage == StorageClassOutput)
+				is_redirected_to_global_stage_io = !is_stage_output_variable_masked(var);
+
+			if (is_redirected_to_global_stage_io)
 			{
-				// Tessellation control shaders see inputs and per-vertex outputs as arrays.
-				// Similarly, tessellation evaluation shaders see per-vertex inputs as arrays.
+				// Tessellation control shaders see inputs and per-point outputs as arrays.
+				// Similarly, tessellation evaluation shaders see per-point inputs as arrays.
 				// We collected them into a structure; we must pass the array of this
 				// structure to the function.
 				std::string name;
+				if (is_patch)
+					name = var.storage == StorageClassInput ? patch_stage_in_var_name : patch_stage_out_var_name;
+				else
+					name = var.storage == StorageClassInput ? "gl_in" : "gl_out";
+
+				if (var.storage == StorageClassOutput && has_decoration(p_type->self, DecorationBlock))
+				{
+					// If we're redirecting a block, we might still need to access the original block
+					// variable if we're masking some members.
+					for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(p_type->member_types.size()); mbr_idx++)
+					{
+						if (is_stage_output_block_member_masked(var, mbr_idx, true))
+						{
+							func.add_parameter(var.basetype, var.self, true);
+							break;
+						}
+					}
+				}
+
 				if (var.storage == StorageClassInput)
 				{
+					auto &added_in = is_patch ? patch_added_in : control_point_added_in;
 					if (added_in)
 						continue;
-					name = input_wg_var_name;
-					arg_id = stage_in_ptr_var_id;
+					arg_id = is_patch ? patch_stage_in_var_id : stage_in_ptr_var_id;
 					added_in = true;
 				}
 				else if (var.storage == StorageClassOutput)
 				{
+					auto &added_out = is_patch ? patch_added_out : control_point_added_out;
 					if (added_out)
 						continue;
-					name = "gl_out";
-					arg_id = stage_out_ptr_var_id;
+					arg_id = is_patch ? patch_stage_out_var_id : stage_out_ptr_var_id;
 					added_out = true;
 				}
+
 				type_id = get<SPIRVariable>(arg_id).basetype;
 				uint32_t next_id = ir.increase_bound_by(1);
 				func.add_parameter(type_id, next_id, true);
 				set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
 
 				set_name(next_id, name);
+				if (is_tese_shader() && msl_options.raw_buffer_tese_input && var.storage == StorageClassInput)
+					set_decoration(next_id, DecorationNonWritable);
 			}
-			else if (is_builtin_variable(var) && p_type->basetype == SPIRType::Struct)
+			else if (is_builtin && has_decoration(p_type->self, DecorationBlock))
 			{
 				// Get the pointee type
 				type_id = get_pointee_type_id(type_id);
@@ -1108,7 +2111,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 				for (auto &mbr_type_id : p_type->member_types)
 				{
 					BuiltIn builtin = BuiltInMax;
-					bool is_builtin = is_member_builtin(*p_type, mbr_idx, &builtin);
+					is_builtin = is_member_builtin(*p_type, mbr_idx, &builtin);
 					if (is_builtin && has_active_builtin(builtin, var.storage))
 					{
 						// Add a arg variable with the same type and decorations as the member
@@ -1122,6 +2125,7 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 						ptr.self = mbr_type_id;
 						ptr.storage = var.storage;
 						ptr.pointer = true;
+						ptr.pointer_depth++;
 						ptr.parent_type = mbr_type_id;
 
 						func.add_parameter(mbr_type_id, var_id, true);
@@ -1159,11 +2163,25 @@ void CompilerMSL::mark_packable_structs()
 			    (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
 				mark_as_packable(type);
 		}
+
+		if (var.storage == StorageClassWorkgroup)
+		{
+			auto *type = &this->get<SPIRType>(var.basetype);
+			if (type->basetype == SPIRType::Struct)
+				mark_as_workgroup_struct(*type);
+		}
+	});
+
+	// Physical storage buffer pointers can appear outside of the context of a variable, if the address
+	// is calculated from a ulong or uvec2 and cast to a pointer, so check if they need to be packed too.
+	ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
+		if (type.basetype == SPIRType::Struct && type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
+			mark_as_packable(type);
 	});
 }
 
 // If the specified type is a struct, it and any nested structs
-// are marked as packable with the SPIRVCrossDecorationPacked decoration,
+// are marked as packable with the SPIRVCrossDecorationBufferBlockRepacked decoration,
 void CompilerMSL::mark_as_packable(SPIRType &type)
 {
 	// If this is not the base type (eg. it's a pointer or array), tunnel down
@@ -1173,12 +2191,13 @@ void CompilerMSL::mark_as_packable(SPIRType &type)
 		return;
 	}
 
-	if (type.basetype == SPIRType::Struct)
+	// Handle possible recursion when a struct contains a pointer to its own type nested somewhere.
+	if (type.basetype == SPIRType::Struct && !has_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked))
 	{
-		set_extended_decoration(type.self, SPIRVCrossDecorationPacked);
+		set_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked);
 
 		// Recurse
-		size_t mbr_cnt = type.member_types.size();
+		uint32_t mbr_cnt = uint32_t(type.member_types.size());
 		for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
 		{
 			uint32_t mbr_type_id = type.member_types[mbr_idx];
@@ -1193,36 +2212,215 @@ void CompilerMSL::mark_as_packable(SPIRType &type)
 	}
 }
 
-// If a vertex attribute exists at the location, it is marked as being used by this shader
-void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, StorageClass storage)
-{
-	if ((get_execution_model() == ExecutionModelVertex || is_tessellation_shader()) && (storage == StorageClassInput))
-		vtx_attrs_in_use.insert(location);
-}
-
-uint32_t CompilerMSL::get_target_components_for_fragment_location(uint32_t location) const
+// If the specified type is a struct, it and any nested structs
+// are marked as used with workgroup storage using the SPIRVCrossDecorationWorkgroupStruct decoration.
+void CompilerMSL::mark_as_workgroup_struct(SPIRType &type)
 {
-	auto itr = fragment_output_components.find(location);
-	if (itr == end(fragment_output_components))
-		return 4;
-	else
-		return itr->second;
-}
+	// If this is not the base type (eg. it's a pointer or array), tunnel down
+	if (type.parent_type)
+	{
+		mark_as_workgroup_struct(get<SPIRType>(type.parent_type));
+		return;
+	}
 
-uint32_t CompilerMSL::build_extended_vector_type(uint32_t type_id, uint32_t components)
-{
-	uint32_t new_type_id = ir.increase_bound_by(1);
-	auto &type = set<SPIRType>(new_type_id, get<SPIRType>(type_id));
-	type.vecsize = components;
-	type.self = new_type_id;
-	type.parent_type = type_id;
-	type.pointer = false;
+	// Handle possible recursion when a struct contains a pointer to its own type nested somewhere.
+	if (type.basetype == SPIRType::Struct && !has_extended_decoration(type.self, SPIRVCrossDecorationWorkgroupStruct))
+	{
+		set_extended_decoration(type.self, SPIRVCrossDecorationWorkgroupStruct);
 
-	return new_type_id;
+		// Recurse
+		uint32_t mbr_cnt = uint32_t(type.member_types.size());
+		for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
+		{
+			uint32_t mbr_type_id = type.member_types[mbr_idx];
+			auto &mbr_type = get<SPIRType>(mbr_type_id);
+			mark_as_workgroup_struct(mbr_type);
+			if (mbr_type.type_alias)
+			{
+				auto &mbr_type_alias = get<SPIRType>(mbr_type.type_alias);
+				mark_as_workgroup_struct(mbr_type_alias);
+			}
+		}
+	}
+}
+
+// If a shader input exists at the location, it is marked as being used by this shader
+void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, const SPIRType &type,
+                                                  StorageClass storage, bool fallback)
+{
+	uint32_t count = type_to_location_count(type);
+	switch (storage)
+	{
+	case StorageClassInput:
+		for (uint32_t i = 0; i < count; i++)
+		{
+			location_inputs_in_use.insert(location + i);
+			if (fallback)
+				location_inputs_in_use_fallback.insert(location + i);
+		}
+		break;
+	case StorageClassOutput:
+		for (uint32_t i = 0; i < count; i++)
+		{
+			location_outputs_in_use.insert(location + i);
+			if (fallback)
+				location_outputs_in_use_fallback.insert(location + i);
+		}
+		break;
+	default:
+		return;
+	}
+}
+
+uint32_t CompilerMSL::get_target_components_for_fragment_location(uint32_t location) const
+{
+	auto itr = fragment_output_components.find(location);
+	if (itr == end(fragment_output_components))
+		return 4;
+	else
+		return itr->second;
+}
+
+uint32_t CompilerMSL::build_extended_vector_type(uint32_t type_id, uint32_t components, SPIRType::BaseType basetype)
+{
+	uint32_t new_type_id = ir.increase_bound_by(1);
+	auto &old_type = get<SPIRType>(type_id);
+	auto *type = &set<SPIRType>(new_type_id, old_type);
+	type->vecsize = components;
+	if (basetype != SPIRType::Unknown)
+		type->basetype = basetype;
+	type->self = new_type_id;
+	type->parent_type = type_id;
+	type->array.clear();
+	type->array_size_literal.clear();
+	type->pointer = false;
+
+	if (is_array(old_type))
+	{
+		uint32_t array_type_id = ir.increase_bound_by(1);
+		type = &set<SPIRType>(array_type_id, *type);
+		type->parent_type = new_type_id;
+		type->array = old_type.array;
+		type->array_size_literal = old_type.array_size_literal;
+		new_type_id = array_type_id;
+	}
+
+	if (old_type.pointer)
+	{
+		uint32_t ptr_type_id = ir.increase_bound_by(1);
+		type = &set<SPIRType>(ptr_type_id, *type);
+		type->self = new_type_id;
+		type->parent_type = new_type_id;
+		type->storage = old_type.storage;
+		type->pointer = true;
+		type->pointer_depth++;
+		new_type_id = ptr_type_id;
+	}
+
+	return new_type_id;
+}
+
+uint32_t CompilerMSL::build_msl_interpolant_type(uint32_t type_id, bool is_noperspective)
+{
+	uint32_t new_type_id = ir.increase_bound_by(1);
+	SPIRType &type = set<SPIRType>(new_type_id, get<SPIRType>(type_id));
+	type.basetype = SPIRType::Interpolant;
+	type.parent_type = type_id;
+	// In Metal, the pull-model interpolant type encodes perspective-vs-no-perspective in the type itself.
+	// Add this decoration so we know which argument to pass to the template.
+	if (is_noperspective)
+		set_decoration(new_type_id, DecorationNoPerspective);
+	return new_type_id;
+}
+
+bool CompilerMSL::add_component_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
+                                                            SPIRVariable &var,
+                                                            const SPIRType &type,
+                                                            InterfaceBlockMeta &meta)
+{
+	// Deal with Component decorations.
+	const InterfaceBlockMeta::LocationMeta *location_meta = nullptr;
+	uint32_t location = ~0u;
+	if (has_decoration(var.self, DecorationLocation))
+	{
+		location = get_decoration(var.self, DecorationLocation);
+		auto location_meta_itr = meta.location_meta.find(location);
+		if (location_meta_itr != end(meta.location_meta))
+			location_meta = &location_meta_itr->second;
+	}
+
+	// Check if we need to pad fragment output to match a certain number of components.
+	if (location_meta)
+	{
+		bool pad_fragment_output = has_decoration(var.self, DecorationLocation) &&
+		                           msl_options.pad_fragment_output_components &&
+		                           get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput;
+
+		auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
+		uint32_t start_component = get_decoration(var.self, DecorationComponent);
+		uint32_t type_components = type.vecsize;
+		uint32_t num_components = location_meta->num_components;
+
+		if (pad_fragment_output)
+		{
+			uint32_t locn = get_decoration(var.self, DecorationLocation);
+			num_components = std::max(num_components, get_target_components_for_fragment_location(locn));
+		}
+
+		// We have already declared an IO block member as m_location_N.
+		// Just emit an early-declared variable and fixup as needed.
+		// Arrays need to be unrolled here since each location might need a different number of components.
+		entry_func.add_local_variable(var.self);
+		vars_needing_early_declaration.push_back(var.self);
+
+		if (var.storage == StorageClassInput)
+		{
+			entry_func.fixup_hooks_in.push_back([=, &type, &var]() {
+				if (!type.array.empty())
+				{
+					uint32_t array_size = to_array_size_literal(type);
+					for (uint32_t loc_off = 0; loc_off < array_size; loc_off++)
+					{
+						statement(to_name(var.self), "[", loc_off, "]", " = ", ib_var_ref,
+						          ".m_location_", location + loc_off,
+						          vector_swizzle(type_components, start_component), ";");
+					}
+				}
+				else
+				{
+					statement(to_name(var.self), " = ", ib_var_ref, ".m_location_", location,
+					          vector_swizzle(type_components, start_component), ";");
+				}
+			});
+		}
+		else
+		{
+			entry_func.fixup_hooks_out.push_back([=, &type, &var]() {
+				if (!type.array.empty())
+				{
+					uint32_t array_size = to_array_size_literal(type);
+					for (uint32_t loc_off = 0; loc_off < array_size; loc_off++)
+					{
+						statement(ib_var_ref, ".m_location_", location + loc_off,
+						          vector_swizzle(type_components, start_component), " = ",
+						          to_name(var.self), "[", loc_off, "];");
+					}
+				}
+				else
+				{
+					statement(ib_var_ref, ".m_location_", location,
+					          vector_swizzle(type_components, start_component), " = ", to_name(var.self), ";");
+				}
+			});
+		}
+		return true;
+	}
+	else
+		return false;
 }
 
 void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
-                                                        SPIRType &ib_type, SPIRVariable &var, bool strip_array)
+                                                        SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta)
 {
 	bool is_builtin = is_builtin_variable(var);
 	BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
@@ -1237,16 +2435,26 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co
 	var.basetype = type_id;
 
 	type_id = get_pointee_type_id(var.basetype);
-	if (strip_array && is_array(get<SPIRType>(type_id)))
+	if (meta.strip_array && is_array(get<SPIRType>(type_id)))
 		type_id = get<SPIRType>(type_id).parent_type;
 	auto &type = get<SPIRType>(type_id);
 	uint32_t target_components = 0;
 	uint32_t type_components = type.vecsize;
+
 	bool padded_output = false;
+	bool padded_input = false;
+	uint32_t start_component = 0;
 
-	// Check if we need to pad fragment output to match a certain number of components.
-	if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components &&
-	    get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput)
+	auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
+
+	if (add_component_variable_to_interface_block(storage, ib_var_ref, var, type, meta))
+		return;
+
+	bool pad_fragment_output = has_decoration(var.self, DecorationLocation) &&
+	                           msl_options.pad_fragment_output_components &&
+	                           get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput;
+
+	if (pad_fragment_output)
 	{
 		uint32_t locn = get_decoration(var.self, DecorationLocation);
 		target_components = get_target_components_for_fragment_location(locn);
@@ -1258,7 +2466,10 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co
 		}
 	}
 
-	ib_type.member_types.push_back(type_id);
+	if (storage == StorageClassInput && pull_model_inputs.count(var.self))
+		ib_type.member_types.push_back(build_msl_interpolant_type(type_id, is_noperspective));
+	else
+		ib_type.member_types.push_back(type_id);
 
 	// Give the member a name
 	string mbr_name = ensure_valid_name(to_expression(var.self), "m");
@@ -1266,54 +2477,109 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co
 
 	// Update the original variable reference to include the structure reference
 	string qual_var_name = ib_var_ref + "." + mbr_name;
-	auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
+	// If using pull-model interpolation, need to add a call to the correct interpolation method.
+	if (storage == StorageClassInput && pull_model_inputs.count(var.self))
+	{
+		if (is_centroid)
+			qual_var_name += ".interpolate_at_centroid()";
+		else if (is_sample)
+			qual_var_name += join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")");
+		else
+			qual_var_name += ".interpolate_at_center()";
+	}
 
-	if (padded_output)
+	if (padded_output || padded_input)
 	{
 		entry_func.add_local_variable(var.self);
 		vars_needing_early_declaration.push_back(var.self);
 
-		entry_func.fixup_hooks_out.push_back([=, &var]() {
-			SPIRType &padded_type = this->get<SPIRType>(type_id);
-			statement(qual_var_name, " = ", remap_swizzle(padded_type, type_components, to_name(var.self)), ";");
-		});
+		if (padded_output)
+		{
+			entry_func.fixup_hooks_out.push_back([=, &var]() {
+				statement(qual_var_name, vector_swizzle(type_components, start_component), " = ", to_name(var.self),
+				          ";");
+			});
+		}
+		else
+		{
+			entry_func.fixup_hooks_in.push_back([=, &var]() {
+				statement(to_name(var.self), " = ", qual_var_name, vector_swizzle(type_components, start_component),
+				          ";");
+			});
+		}
 	}
-	else if (!strip_array)
+	else if (!meta.strip_array)
 		ir.meta[var.self].decoration.qualified_alias = qual_var_name;
 
-	if (var.storage == StorageClassOutput && var.initializer != 0)
+	if (var.storage == StorageClassOutput && var.initializer != ID(0))
 	{
-		entry_func.fixup_hooks_in.push_back(
-		    [=, &var]() { statement(qual_var_name, " = ", to_expression(var.initializer), ";"); });
+		if (padded_output || padded_input)
+		{
+			entry_func.fixup_hooks_in.push_back(
+			    [=, &var]() { statement(to_name(var.self), " = ", to_expression(var.initializer), ";"); });
+		}
+		else
+		{
+			if (meta.strip_array)
+			{
+				entry_func.fixup_hooks_in.push_back([=, &var]() {
+					uint32_t index = get_extended_decoration(var.self, SPIRVCrossDecorationInterfaceMemberIndex);
+					auto invocation = to_tesc_invocation_id();
+					statement(to_expression(stage_out_ptr_var_id), "[",
+					          invocation, "].",
+					          to_member_name(ib_type, index), " = ", to_expression(var.initializer), "[",
+					          invocation, "];");
+				});
+			}
+			else
+			{
+				entry_func.fixup_hooks_in.push_back([=, &var]() {
+					statement(qual_var_name, " = ", to_expression(var.initializer), ";");
+				});
+			}
+		}
 	}
 
 	// Copy the variable location from the original variable to the member
 	if (get_decoration_bitset(var.self).get(DecorationLocation))
 	{
 		uint32_t locn = get_decoration(var.self, DecorationLocation);
-		if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || is_tessellation_shader()))
+		uint32_t comp = get_decoration(var.self, DecorationComponent);
+		if (storage == StorageClassInput)
 		{
-			type_id = ensure_correct_attribute_type(var.basetype, locn);
+			type_id = ensure_correct_input_type(var.basetype, locn, comp, 0, meta.strip_array);
 			var.basetype = type_id;
+
 			type_id = get_pointee_type_id(type_id);
-			if (strip_array && is_array(get<SPIRType>(type_id)))
+			if (meta.strip_array && is_array(get<SPIRType>(type_id)))
 				type_id = get<SPIRType>(type_id).parent_type;
-			ib_type.member_types[ib_mbr_idx] = type_id;
+			if (pull_model_inputs.count(var.self))
+				ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(type_id, is_noperspective);
+			else
+				ib_type.member_types[ib_mbr_idx] = type_id;
 		}
 		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-		mark_location_as_used_by_shader(locn, storage);
+		if (comp)
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp);
+		mark_location_as_used_by_shader(locn, get<SPIRType>(type_id), storage);
+	}
+	else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
+	{
+		uint32_t locn = inputs_by_builtin[builtin].location;
+		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
+		mark_location_as_used_by_shader(locn, type, storage);
 	}
-	else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin))
+	else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin))
 	{
-		uint32_t locn = vtx_attrs_by_builtin[builtin].location;
+		uint32_t locn = outputs_by_builtin[builtin].location;
 		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-		mark_location_as_used_by_shader(locn, storage);
+		mark_location_as_used_by_shader(locn, type, storage);
 	}
 
 	if (get_decoration_bitset(var.self).get(DecorationComponent))
 	{
-		uint32_t comp = get_decoration(var.self, DecorationComponent);
-		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp);
+		uint32_t component = get_decoration(var.self, DecorationComponent);
+		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, component);
 	}
 
 	if (get_decoration_bitset(var.self).get(DecorationIndex))
@@ -1331,25 +2597,32 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co
 	}
 
 	// Copy interpolation decorations if needed
-	if (is_flat)
-		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
-	if (is_noperspective)
-		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
-	if (is_centroid)
-		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
-	if (is_sample)
-		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
+	if (storage != StorageClassInput || !pull_model_inputs.count(var.self))
+	{
+		if (is_flat)
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
+		if (is_noperspective)
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
+		if (is_centroid)
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
+		if (is_sample)
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
+	}
 
 	set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
 }
 
 void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
-                                                            SPIRType &ib_type, SPIRVariable &var, bool strip_array)
+                                                            SPIRType &ib_type, SPIRVariable &var,
+                                                            InterfaceBlockMeta &meta)
 {
 	auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
-	auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
+	auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
 	uint32_t elem_cnt = 0;
 
+	if (add_component_variable_to_interface_block(storage, ib_var_ref, var, var_type, meta))
+		return;
+
 	if (is_matrix(var_type))
 	{
 		if (is_array(var_type))
@@ -1382,10 +2655,33 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage
 	if (is_builtin)
 		set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction));
 
-	entry_func.add_local_variable(var.self);
+	bool flatten_from_ib_var = false;
+	string flatten_from_ib_mbr_name;
 
-	// We need to declare the variable early and at entry-point scope.
-	vars_needing_early_declaration.push_back(var.self);
+	if (storage == StorageClassOutput && is_builtin && builtin == BuiltInClipDistance)
+	{
+		// Also declare [[clip_distance]] attribute here.
+		uint32_t clip_array_mbr_idx = uint32_t(ib_type.member_types.size());
+		ib_type.member_types.push_back(get_variable_data_type_id(var));
+		set_member_decoration(ib_type.self, clip_array_mbr_idx, DecorationBuiltIn, BuiltInClipDistance);
+
+		flatten_from_ib_mbr_name = builtin_to_glsl(BuiltInClipDistance, StorageClassOutput);
+		set_member_name(ib_type.self, clip_array_mbr_idx, flatten_from_ib_mbr_name);
+
+		// When we flatten, we flatten directly from the "out" struct,
+		// not from a function variable.
+		flatten_from_ib_var = true;
+
+		if (!msl_options.enable_clip_distance_user_varying)
+			return;
+	}
+	else if (!meta.strip_array)
+	{
+		// Only flatten/unflatten IO composites for non-tessellation cases where arrays are not stripped.
+		entry_func.add_local_variable(var.self);
+		// We need to declare the variable early and at entry-point scope.
+		vars_needing_early_declaration.push_back(var.self);
+	}
 
 	for (uint32_t i = 0; i < elem_cnt; i++)
 	{
@@ -1410,7 +2706,10 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage
 			}
 		}
 
-		ib_type.member_types.push_back(get_pointee_type_id(type_id));
+		if (storage == StorageClassInput && pull_model_inputs.count(var.self))
+			ib_type.member_types.push_back(build_msl_interpolant_type(get_pointee_type_id(type_id), is_noperspective));
+		else
+			ib_type.member_types.push_back(get_pointee_type_id(type_id));
 
 		// Give the member a name
 		string mbr_name = ensure_valid_name(join(to_expression(var.self), "_", i), "m");
@@ -1420,21 +2719,38 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage
 		if (get_decoration_bitset(var.self).get(DecorationLocation))
 		{
 			uint32_t locn = get_decoration(var.self, DecorationLocation) + i;
-			if (storage == StorageClassInput &&
-			    (get_execution_model() == ExecutionModelVertex || is_tessellation_shader()))
+			uint32_t comp = get_decoration(var.self, DecorationComponent);
+			if (storage == StorageClassInput)
 			{
-				var.basetype = ensure_correct_attribute_type(var.basetype, locn);
-				uint32_t mbr_type_id = ensure_correct_attribute_type(usable_type->self, locn);
-				ib_type.member_types[ib_mbr_idx] = mbr_type_id;
+				var.basetype = ensure_correct_input_type(var.basetype, locn, comp, 0, meta.strip_array);
+				uint32_t mbr_type_id = ensure_correct_input_type(usable_type->self, locn, comp, 0, meta.strip_array);
+				if (storage == StorageClassInput && pull_model_inputs.count(var.self))
+					ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
+				else
+					ib_type.member_types[ib_mbr_idx] = mbr_type_id;
 			}
 			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-			mark_location_as_used_by_shader(locn, storage);
+			if (comp)
+				set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp);
+			mark_location_as_used_by_shader(locn, *usable_type, storage);
+		}
+		else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
+		{
+			uint32_t locn = inputs_by_builtin[builtin].location + i;
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
+			mark_location_as_used_by_shader(locn, *usable_type, storage);
 		}
-		else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin))
+		else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin))
 		{
-			uint32_t locn = vtx_attrs_by_builtin[builtin].location + i;
+			uint32_t locn = outputs_by_builtin[builtin].location + i;
 			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-			mark_location_as_used_by_shader(locn, storage);
+			mark_location_as_used_by_shader(locn, *usable_type, storage);
+		}
+		else if (is_builtin && (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance))
+		{
+			// Declare the Clip/CullDistance as [[user(clip/cullN)]].
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, i);
 		}
 
 		if (get_decoration_bitset(var.self).get(DecorationIndex))
@@ -1443,25 +2759,44 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage
 			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, index);
 		}
 
-		// Copy interpolation decorations if needed
-		if (is_flat)
-			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
-		if (is_noperspective)
-			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
-		if (is_centroid)
-			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
-		if (is_sample)
-			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
+		if (storage != StorageClassInput || !pull_model_inputs.count(var.self))
+		{
+			// Copy interpolation decorations if needed
+			if (is_flat)
+				set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
+			if (is_noperspective)
+				set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
+			if (is_centroid)
+				set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
+			if (is_sample)
+				set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
+		}
 
 		set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
 
-		if (!strip_array)
+		// Only flatten/unflatten IO composites for non-tessellation cases where arrays are not stripped.
+		if (!meta.strip_array)
 		{
 			switch (storage)
 			{
 			case StorageClassInput:
-				entry_func.fixup_hooks_in.push_back(
-				    [=, &var]() { statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";"); });
+				entry_func.fixup_hooks_in.push_back([=, &var]() {
+					if (pull_model_inputs.count(var.self))
+					{
+						string lerp_call;
+						if (is_centroid)
+							lerp_call = ".interpolate_at_centroid()";
+						else if (is_sample)
+							lerp_call = join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")");
+						else
+							lerp_call = ".interpolate_at_center()";
+						statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, lerp_call, ";");
+					}
+					else
+					{
+						statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";");
+					}
+				});
 				break;
 
 			case StorageClassOutput:
@@ -1474,6 +2809,9 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage
 						    remap_swizzle(padded_type, usable_type->vecsize, join(to_name(var.self), "[", i, "]")),
 						    ";");
 					}
+					else if (flatten_from_ib_var)
+						statement(ib_var_ref, ".", mbr_name, " = ", ib_var_ref, ".", flatten_from_ib_mbr_name, "[", i,
+						          "];");
 					else
 						statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), "[", i, "];");
 				});
@@ -1486,42 +2824,17 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage
 	}
 }
 
-uint32_t CompilerMSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array)
-{
-	auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
-	uint32_t location = get_decoration(var.self, DecorationLocation);
-
-	for (uint32_t i = 0; i < mbr_idx; i++)
-	{
-		auto &mbr_type = get<SPIRType>(type.member_types[i]);
-
-		// Start counting from any place we have a new location decoration.
-		if (has_member_decoration(type.self, mbr_idx, DecorationLocation))
-			location = get_member_decoration(type.self, mbr_idx, DecorationLocation);
-
-		uint32_t location_count = 1;
-
-		if (mbr_type.columns > 1)
-			location_count = mbr_type.columns;
-
-		if (!mbr_type.array.empty())
-			for (uint32_t j = 0; j < uint32_t(mbr_type.array.size()); j++)
-				location_count *= to_array_size_literal(mbr_type, j);
-
-		location += location_count;
-	}
-
-	return location;
-}
-
-void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
-                                                                   SPIRType &ib_type, SPIRVariable &var,
-                                                                   uint32_t mbr_idx, bool strip_array)
+void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass storage,
+                                                                   const string &ib_var_ref, SPIRType &ib_type,
+                                                                   SPIRVariable &var, SPIRType &var_type,
+                                                                   uint32_t mbr_idx, InterfaceBlockMeta &meta,
+                                                                   const string &mbr_name_qual,
+                                                                   const string &var_chain_qual,
+                                                                   uint32_t &location, uint32_t &var_mbr_idx)
 {
 	auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
-	auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
 
-	BuiltIn builtin;
+	BuiltIn builtin = BuiltInMax;
 	bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
 	bool is_flat =
 	    has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat);
@@ -1534,13 +2847,15 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass
 
 	uint32_t mbr_type_id = var_type.member_types[mbr_idx];
 	auto &mbr_type = get<SPIRType>(mbr_type_id);
-	uint32_t elem_cnt = 0;
 
+	bool mbr_is_indexable = false;
+	uint32_t elem_cnt = 1;
 	if (is_matrix(mbr_type))
 	{
 		if (is_array(mbr_type))
 			SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables.");
 
+		mbr_is_indexable = true;
 		elem_cnt = mbr_type.columns;
 	}
 	else if (is_array(mbr_type))
@@ -1548,6 +2863,7 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass
 		if (mbr_type.array.size() != 1)
 			SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables.");
 
+		mbr_is_indexable = true;
 		elem_cnt = to_array_size_literal(mbr_type);
 	}
 
@@ -1557,67 +2873,154 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass
 	while (is_array(*usable_type) || is_matrix(*usable_type))
 		usable_type = &get<SPIRType>(usable_type->parent_type);
 
+	bool flatten_from_ib_var = false;
+	string flatten_from_ib_mbr_name;
+
+	if (storage == StorageClassOutput && is_builtin && builtin == BuiltInClipDistance)
+	{
+		// Also declare [[clip_distance]] attribute here.
+		uint32_t clip_array_mbr_idx = uint32_t(ib_type.member_types.size());
+		ib_type.member_types.push_back(mbr_type_id);
+		set_member_decoration(ib_type.self, clip_array_mbr_idx, DecorationBuiltIn, BuiltInClipDistance);
+
+		flatten_from_ib_mbr_name = builtin_to_glsl(BuiltInClipDistance, StorageClassOutput);
+		set_member_name(ib_type.self, clip_array_mbr_idx, flatten_from_ib_mbr_name);
+
+		// When we flatten, we flatten directly from the "out" struct,
+		// not from a function variable.
+		flatten_from_ib_var = true;
+
+		if (!msl_options.enable_clip_distance_user_varying)
+			return;
+	}
+
+	// Recursively handle nested structures.
+	if (mbr_type.basetype == SPIRType::Struct)
+	{
+		for (uint32_t i = 0; i < elem_cnt; i++)
+		{
+			string mbr_name = append_member_name(mbr_name_qual, var_type, mbr_idx) + (mbr_is_indexable ? join("_", i) : "");
+			string var_chain = join(var_chain_qual, ".", to_member_name(var_type, mbr_idx), (mbr_is_indexable ? join("[", i, "]") : ""));
+			uint32_t sub_mbr_cnt = uint32_t(mbr_type.member_types.size());
+			for (uint32_t sub_mbr_idx = 0; sub_mbr_idx < sub_mbr_cnt; sub_mbr_idx++)
+			{
+				add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type,
+																 var, mbr_type, sub_mbr_idx,
+																 meta, mbr_name, var_chain,
+																 location, var_mbr_idx);
+				// FIXME: Recursive structs and tessellation breaks here.
+				var_mbr_idx++;
+			}
+		}
+		return;
+	}
+
 	for (uint32_t i = 0; i < elem_cnt; i++)
 	{
 		// Add a reference to the variable type to the interface struct.
 		uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
-		ib_type.member_types.push_back(usable_type->self);
+		if (storage == StorageClassInput && pull_model_inputs.count(var.self))
+			ib_type.member_types.push_back(build_msl_interpolant_type(usable_type->self, is_noperspective));
+		else
+			ib_type.member_types.push_back(usable_type->self);
 
 		// Give the member a name
-		string mbr_name = ensure_valid_name(join(to_qualified_member_name(var_type, mbr_idx), "_", i), "m");
+		string mbr_name = ensure_valid_name(append_member_name(mbr_name_qual, var_type, mbr_idx) + (mbr_is_indexable ? join("_", i) : ""), "m");
 		set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
 
-		if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation))
+		// Once we determine the location of the first member within nested structures,
+		// from a var of the topmost structure, the remaining flattened members of
+		// the nested structures will have consecutive location values. At this point,
+		// we've recursively tunnelled into structs, arrays, and matrices, and are
+		// down to a single location for each member now.
+		if (!is_builtin && location != UINT32_MAX)
 		{
-			uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation) + i;
-			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-			mark_location_as_used_by_shader(locn, storage);
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
+			mark_location_as_used_by_shader(location, *usable_type, storage);
+			location++;
+		}
+		else if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation))
+		{
+			location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation) + i;
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
+			mark_location_as_used_by_shader(location, *usable_type, storage);
+			location++;
 		}
 		else if (has_decoration(var.self, DecorationLocation))
 		{
-			uint32_t locn = get_accumulated_member_location(var, mbr_idx, strip_array) + i;
-			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-			mark_location_as_used_by_shader(locn, storage);
+			location = get_accumulated_member_location(var, mbr_idx, meta.strip_array) + i;
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
+			mark_location_as_used_by_shader(location, *usable_type, storage);
+			location++;
 		}
-		else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin))
+		else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
 		{
-			uint32_t locn = vtx_attrs_by_builtin[builtin].location + i;
-			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-			mark_location_as_used_by_shader(locn, storage);
+			location = inputs_by_builtin[builtin].location + i;
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
+			mark_location_as_used_by_shader(location, *usable_type, storage);
+			location++;
+		}
+		else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin))
+		{
+			location = outputs_by_builtin[builtin].location + i;
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
+			mark_location_as_used_by_shader(location, *usable_type, storage);
+			location++;
+		}
+		else if (is_builtin && (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance))
+		{
+			// Declare the Clip/CullDistance as [[user(clip/cullN)]].
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, i);
 		}
 
 		if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent))
-			SPIRV_CROSS_THROW("DecorationComponent on matrices and arrays make little sense.");
+			SPIRV_CROSS_THROW("DecorationComponent on matrices and arrays is not supported.");
 
-		// Copy interpolation decorations if needed
-		if (is_flat)
-			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
-		if (is_noperspective)
-			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
-		if (is_centroid)
-			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
-		if (is_sample)
-			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
+		if (storage != StorageClassInput || !pull_model_inputs.count(var.self))
+		{
+			// Copy interpolation decorations if needed
+			if (is_flat)
+				set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
+			if (is_noperspective)
+				set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
+			if (is_centroid)
+				set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
+			if (is_sample)
+				set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
+		}
 
 		set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
-		set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx);
+		set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, var_mbr_idx);
 
 		// Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate.
-		if (!strip_array)
+		if (!meta.strip_array && meta.allow_local_declaration)
 		{
+			string var_chain = join(var_chain_qual, ".", to_member_name(var_type, mbr_idx), (mbr_is_indexable ? join("[", i, "]") : ""));
 			switch (storage)
 			{
 			case StorageClassInput:
-				entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() {
-					statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), "[", i, "] = ", ib_var_ref,
-					          ".", mbr_name, ";");
+				entry_func.fixup_hooks_in.push_back([=, &var]() {
+					string lerp_call;
+					if (pull_model_inputs.count(var.self))
+					{
+						if (is_centroid)
+							lerp_call = ".interpolate_at_centroid()";
+						else if (is_sample)
+							lerp_call = join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")");
+						else
+							lerp_call = ".interpolate_at_center()";
+					}
+					statement(var_chain, " = ", ib_var_ref, ".", mbr_name, lerp_call, ";");
 				});
 				break;
 
 			case StorageClassOutput:
-				entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() {
-					statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), ".",
-					          to_member_name(var_type, mbr_idx), "[", i, "];");
+				entry_func.fixup_hooks_out.push_back([=]() {
+					if (flatten_from_ib_var)
+						statement(ib_var_ref, ".", mbr_name, " = ", ib_var_ref, ".", flatten_from_ib_mbr_name, "[", i, "];");
+					else
+						statement(ib_var_ref, ".", mbr_name, " = ", var_chain, ";");
 				});
 				break;
 
@@ -1628,11 +3031,14 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass
 	}
 }
 
-void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref,
-                                                               SPIRType &ib_type, SPIRVariable &var, uint32_t mbr_idx,
-                                                               bool strip_array)
+void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass storage,
+                                                               const string &ib_var_ref, SPIRType &ib_type,
+                                                               SPIRVariable &var, SPIRType &var_type,
+                                                               uint32_t mbr_idx, InterfaceBlockMeta &meta,
+                                                               const string &mbr_name_qual,
+                                                               const string &var_chain_qual,
+                                                               uint32_t &location, uint32_t &var_mbr_idx)
 {
-	auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
 	auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
 
 	BuiltIn builtin = BuiltInMax;
@@ -1651,35 +3057,51 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor
 	uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
 	mbr_type_id = ensure_correct_builtin_type(mbr_type_id, builtin);
 	var_type.member_types[mbr_idx] = mbr_type_id;
-	ib_type.member_types.push_back(mbr_type_id);
+	if (storage == StorageClassInput && pull_model_inputs.count(var.self))
+		ib_type.member_types.push_back(build_msl_interpolant_type(mbr_type_id, is_noperspective));
+	else
+		ib_type.member_types.push_back(mbr_type_id);
 
 	// Give the member a name
-	string mbr_name = ensure_valid_name(to_qualified_member_name(var_type, mbr_idx), "m");
+	string mbr_name = ensure_valid_name(append_member_name(mbr_name_qual, var_type, mbr_idx), "m");
 	set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
 
 	// Update the original variable reference to include the structure reference
 	string qual_var_name = ib_var_ref + "." + mbr_name;
+	// If using pull-model interpolation, need to add a call to the correct interpolation method.
+	if (storage == StorageClassInput && pull_model_inputs.count(var.self))
+	{
+		if (is_centroid)
+			qual_var_name += ".interpolate_at_centroid()";
+		else if (is_sample)
+			qual_var_name += join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")");
+		else
+			qual_var_name += ".interpolate_at_center()";
+	}
 
-	if (is_builtin && !strip_array)
+	bool flatten_stage_out = false;
+	string var_chain = var_chain_qual + "." + to_member_name(var_type, mbr_idx);
+	if (is_builtin && !meta.strip_array)
 	{
 		// For the builtin gl_PerVertex, we cannot treat it as a block anyways,
 		// so redirect to qualified name.
 		set_member_qualified_name(var_type.self, mbr_idx, qual_var_name);
 	}
-	else if (!strip_array)
+	else if (!meta.strip_array && meta.allow_local_declaration)
 	{
 		// Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate.
 		switch (storage)
 		{
 		case StorageClassInput:
-			entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() {
-				statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), " = ", qual_var_name, ";");
+			entry_func.fixup_hooks_in.push_back([=]() {
+				statement(var_chain, " = ", qual_var_name, ";");
 			});
 			break;
 
 		case StorageClassOutput:
-			entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() {
-				statement(qual_var_name, " = ", to_name(var.self), ".", to_member_name(var_type, mbr_idx), ";");
+			flatten_stage_out = true;
+			entry_func.fixup_hooks_out.push_back([=]() {
+				statement(qual_var_name, " = ", var_chain, ";");
 			});
 			break;
 
@@ -1688,41 +3110,63 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor
 		}
 	}
 
-	// Copy the variable location from the original variable to the member
-	if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation))
+	// Once we determine the location of the first member within nested structures,
+	// from a var of the topmost structure, the remaining flattened members of
+	// the nested structures will have consecutive location values. At this point,
+	// we've recursively tunnelled into structs, arrays, and matrices, and are
+	// down to a single location for each member now.
+	if (!is_builtin && location != UINT32_MAX)
 	{
-		uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation);
-		if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || is_tessellation_shader()))
-		{
-			mbr_type_id = ensure_correct_attribute_type(mbr_type_id, locn);
-			var_type.member_types[mbr_idx] = mbr_type_id;
-			ib_type.member_types[ib_mbr_idx] = mbr_type_id;
+		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
+		mark_location_as_used_by_shader(location, get<SPIRType>(mbr_type_id), storage);
+		location += type_to_location_count(get<SPIRType>(mbr_type_id));
+	}
+	else if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation))
+	{
+		location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation);
+		uint32_t comp = get_member_decoration(var_type.self, mbr_idx, DecorationComponent);
+		if (storage == StorageClassInput)
+		{
+			mbr_type_id = ensure_correct_input_type(mbr_type_id, location, comp, 0, meta.strip_array);
+			var_type.member_types[mbr_idx] = mbr_type_id;
+			if (storage == StorageClassInput && pull_model_inputs.count(var.self))
+				ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
+			else
+				ib_type.member_types[ib_mbr_idx] = mbr_type_id;
 		}
-		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-		mark_location_as_used_by_shader(locn, storage);
+		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
+		mark_location_as_used_by_shader(location, get<SPIRType>(mbr_type_id), storage);
+		location += type_to_location_count(get<SPIRType>(mbr_type_id));
 	}
 	else if (has_decoration(var.self, DecorationLocation))
 	{
-		// The block itself might have a location and in this case, all members of the block
-		// receive incrementing locations.
-		uint32_t locn = get_accumulated_member_location(var, mbr_idx, strip_array);
-		if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || is_tessellation_shader()))
+		location = get_accumulated_member_location(var, mbr_idx, meta.strip_array);
+		if (storage == StorageClassInput)
 		{
-			mbr_type_id = ensure_correct_attribute_type(mbr_type_id, locn);
+			mbr_type_id = ensure_correct_input_type(mbr_type_id, location, 0, 0, meta.strip_array);
 			var_type.member_types[mbr_idx] = mbr_type_id;
-			ib_type.member_types[ib_mbr_idx] = mbr_type_id;
+			if (storage == StorageClassInput && pull_model_inputs.count(var.self))
+				ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
+			else
+				ib_type.member_types[ib_mbr_idx] = mbr_type_id;
 		}
-		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-		mark_location_as_used_by_shader(locn, storage);
+		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
+		mark_location_as_used_by_shader(location, get<SPIRType>(mbr_type_id), storage);
+		location += type_to_location_count(get<SPIRType>(mbr_type_id));
 	}
-	else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin))
+	else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
 	{
-		uint32_t locn = 0;
-		auto builtin_itr = vtx_attrs_by_builtin.find(builtin);
-		if (builtin_itr != end(vtx_attrs_by_builtin))
-			locn = builtin_itr->second.location;
-		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-		mark_location_as_used_by_shader(locn, storage);
+		location = inputs_by_builtin[builtin].location;
+		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
+		mark_location_as_used_by_shader(location, get<SPIRType>(mbr_type_id), storage);
+		location += type_to_location_count(get<SPIRType>(mbr_type_id));
+	}
+	else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin))
+	{
+		location = outputs_by_builtin[builtin].location;
+		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
+		mark_location_as_used_by_shader(location, get<SPIRType>(mbr_type_id), storage);
+		location += type_to_location_count(get<SPIRType>(mbr_type_id));
 	}
 
 	// Copy the component location, if present.
@@ -1740,18 +3184,48 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor
 			qual_pos_var_name = qual_var_name;
 	}
 
-	// Copy interpolation decorations if needed
-	if (is_flat)
-		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
-	if (is_noperspective)
-		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
-	if (is_centroid)
-		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
-	if (is_sample)
-		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
+	const SPIRConstant *c = nullptr;
+	if (!flatten_stage_out && var.storage == StorageClassOutput &&
+	    var.initializer != ID(0) && (c = maybe_get<SPIRConstant>(var.initializer)))
+	{
+		if (meta.strip_array)
+		{
+			entry_func.fixup_hooks_in.push_back([=, &var]() {
+				auto &type = this->get<SPIRType>(var.basetype);
+				uint32_t index = get_extended_member_decoration(var.self, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex);
+
+				auto invocation = to_tesc_invocation_id();
+				auto constant_chain = join(to_expression(var.initializer), "[", invocation, "]");
+				statement(to_expression(stage_out_ptr_var_id), "[",
+				          invocation, "].",
+				          to_member_name(ib_type, index), " = ",
+				          constant_chain, ".", to_member_name(type, mbr_idx), ";");
+			});
+		}
+		else
+		{
+			entry_func.fixup_hooks_in.push_back([=]() {
+				statement(qual_var_name, " = ", constant_expression(
+						this->get<SPIRConstant>(c->subconstants[mbr_idx])), ";");
+			});
+		}
+	}
+
+	if (storage != StorageClassInput || !pull_model_inputs.count(var.self))
+	{
+		// Copy interpolation decorations if needed
+		if (is_flat)
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
+		if (is_noperspective)
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
+		if (is_centroid)
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
+		if (is_sample)
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
+	}
 
 	set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self);
-	set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx);
+	set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, var_mbr_idx);
 }
 
 // In Metal, the tessellation levels are stored as tightly packed half-precision floating point values.
@@ -1762,30 +3236,38 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor
 void CompilerMSL::add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type,
                                                           SPIRVariable &var)
 {
-	auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
 	auto &var_type = get_variable_element_type(var);
 
 	BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
+	bool triangles = is_tessellating_triangles();
+	string mbr_name;
 
-	// Force the variable to have the proper name.
-	set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction));
+	// Add a reference to the variable type to the interface struct.
+	uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
+
+	const auto mark_locations = [&](const SPIRType &new_var_type) {
+		if (get_decoration_bitset(var.self).get(DecorationLocation))
+		{
+			uint32_t locn = get_decoration(var.self, DecorationLocation);
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
+			mark_location_as_used_by_shader(locn, new_var_type, StorageClassInput);
+		}
+		else if (inputs_by_builtin.count(builtin))
+		{
+			uint32_t locn = inputs_by_builtin[builtin].location;
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
+			mark_location_as_used_by_shader(locn, new_var_type, StorageClassInput);
+		}
+	};
 
-	if (get_entry_point().flags.get(ExecutionModeTriangles))
+	if (triangles)
 	{
 		// Triangles are tricky, because we want only one member in the struct.
-
-		// We need to declare the variable early and at entry-point scope.
-		entry_func.add_local_variable(var.self);
-		vars_needing_early_declaration.push_back(var.self);
-
-		string mbr_name = "gl_TessLevel";
+		mbr_name = "gl_TessLevel";
 
 		// If we already added the other one, we can skip this step.
 		if (!added_builtin_tess_level)
 		{
-			// Add a reference to the variable type to the interface struct.
-			uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
-
 			uint32_t type_id = build_extended_vector_type(var_type.self, 4);
 
 			ib_type.member_types.push_back(type_id);
@@ -1793,97 +3275,220 @@ void CompilerMSL::add_tess_level_input_to_interface_block(const std::string &ib_
 			// Give the member a name
 			set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
 
-			// There is no qualified alias since we need to flatten the internal array on return.
-			if (get_decoration_bitset(var.self).get(DecorationLocation))
-			{
-				uint32_t locn = get_decoration(var.self, DecorationLocation);
-				set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-				mark_location_as_used_by_shader(locn, StorageClassInput);
-			}
-			else if (vtx_attrs_by_builtin.count(builtin))
-			{
-				uint32_t locn = vtx_attrs_by_builtin[builtin].location;
-				set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-				mark_location_as_used_by_shader(locn, StorageClassInput);
-			}
+			// We cannot decorate both, but the important part is that
+			// it's marked as builtin so we can get automatic attribute assignment if needed.
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
 
+			mark_locations(var_type);
 			added_builtin_tess_level = true;
 		}
-
-		switch (builtin)
-		{
-		case BuiltInTessLevelOuter:
-			entry_func.fixup_hooks_in.push_back([=, &var]() {
-				statement(to_name(var.self), "[0] = ", ib_var_ref, ".", mbr_name, ".x;");
-				statement(to_name(var.self), "[1] = ", ib_var_ref, ".", mbr_name, ".y;");
-				statement(to_name(var.self), "[2] = ", ib_var_ref, ".", mbr_name, ".z;");
-			});
-			break;
-
-		case BuiltInTessLevelInner:
-			entry_func.fixup_hooks_in.push_back(
-			    [=, &var]() { statement(to_name(var.self), "[0] = ", ib_var_ref, ".", mbr_name, ".w;"); });
-			break;
-
-		default:
-			assert(false);
-			break;
-		}
 	}
 	else
 	{
-		// Add a reference to the variable type to the interface struct.
-		uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
+		mbr_name = builtin_to_glsl(builtin, StorageClassFunction);
 
 		uint32_t type_id = build_extended_vector_type(var_type.self, builtin == BuiltInTessLevelOuter ? 4 : 2);
-		// Change the type of the variable, too.
+
 		uint32_t ptr_type_id = ir.increase_bound_by(1);
 		auto &new_var_type = set<SPIRType>(ptr_type_id, get<SPIRType>(type_id));
 		new_var_type.pointer = true;
+		new_var_type.pointer_depth++;
 		new_var_type.storage = StorageClassInput;
 		new_var_type.parent_type = type_id;
-		var.basetype = ptr_type_id;
 
 		ib_type.member_types.push_back(type_id);
 
 		// Give the member a name
-		string mbr_name = to_expression(var.self);
 		set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
+		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin);
 
-		// Since vectors can be indexed like arrays, there is no need to unpack this. We can
-		// just refer to the vector directly. So give it a qualified alias.
-		string qual_var_name = ib_var_ref + "." + mbr_name;
-		ir.meta[var.self].decoration.qualified_alias = qual_var_name;
+		mark_locations(new_var_type);
+	}
 
-		if (get_decoration_bitset(var.self).get(DecorationLocation))
+	add_tess_level_input(ib_var_ref, mbr_name, var);
+}
+
+void CompilerMSL::add_tess_level_input(const std::string &base_ref, const std::string &mbr_name, SPIRVariable &var)
+{
+	auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
+	BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
+
+	// Force the variable to have the proper name.
+	string var_name = builtin_to_glsl(builtin, StorageClassFunction);
+	set_name(var.self, var_name);
+
+	// We need to declare the variable early and at entry-point scope.
+	entry_func.add_local_variable(var.self);
+	vars_needing_early_declaration.push_back(var.self);
+	bool triangles = is_tessellating_triangles();
+
+	if (builtin == BuiltInTessLevelOuter)
+	{
+		entry_func.fixup_hooks_in.push_back(
+		    [=]()
+		    {
+			    statement(var_name, "[0] = ", base_ref, ".", mbr_name, "[0];");
+			    statement(var_name, "[1] = ", base_ref, ".", mbr_name, "[1];");
+			    statement(var_name, "[2] = ", base_ref, ".", mbr_name, "[2];");
+			    if (!triangles)
+				    statement(var_name, "[3] = ", base_ref, ".", mbr_name, "[3];");
+		    });
+	}
+	else
+	{
+		entry_func.fixup_hooks_in.push_back([=]() {
+			if (triangles)
+			{
+				if (msl_options.raw_buffer_tese_input)
+					statement(var_name, "[0] = ", base_ref, ".", mbr_name, ";");
+				else
+					statement(var_name, "[0] = ", base_ref, ".", mbr_name, "[3];");
+			}
+			else
+			{
+				statement(var_name, "[0] = ", base_ref, ".", mbr_name, "[0];");
+				statement(var_name, "[1] = ", base_ref, ".", mbr_name, "[1];");
+			}
+		});
+	}
+}
+
+bool CompilerMSL::variable_storage_requires_stage_io(spv::StorageClass storage) const
+{
+	if (storage == StorageClassOutput)
+		return !capture_output_to_buffer;
+	else if (storage == StorageClassInput)
+		return !(is_tesc_shader() && msl_options.multi_patch_workgroup) &&
+		       !(is_tese_shader() && msl_options.raw_buffer_tese_input);
+	else
+		return false;
+}
+
+string CompilerMSL::to_tesc_invocation_id()
+{
+	if (msl_options.multi_patch_workgroup)
+	{
+		// n.b. builtin_invocation_id_id here is the dispatch global invocation ID,
+		// not the TC invocation ID.
+		return join(to_expression(builtin_invocation_id_id), ".x % ", get_entry_point().output_vertices);
+	}
+	else
+		return builtin_to_glsl(BuiltInInvocationId, StorageClassInput);
+}
+
+void CompilerMSL::emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array)
+{
+	auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
+	bool threadgroup_storage = variable_decl_is_remapped_storage(masked_var, StorageClassWorkgroup);
+
+	if (threadgroup_storage && msl_options.multi_patch_workgroup)
+	{
+		// We need one threadgroup block per patch, so fake this.
+		entry_func.fixup_hooks_in.push_back([this, &masked_var]() {
+			auto &type = get_variable_data_type(masked_var);
+			add_local_variable_name(masked_var.self);
+
+			bool old_is_builtin = is_using_builtin_array;
+			is_using_builtin_array = true;
+
+			const uint32_t max_control_points_per_patch = 32u;
+			uint32_t max_num_instances =
+					(max_control_points_per_patch + get_entry_point().output_vertices - 1u) /
+					get_entry_point().output_vertices;
+			statement("threadgroup ", type_to_glsl(type), " ",
+			          "spvStorage", to_name(masked_var.self), "[", max_num_instances, "]",
+			          type_to_array_glsl(type), ";");
+
+			// Assign a threadgroup slice to each PrimitiveID.
+			// We assume here that workgroup size is rounded to 32,
+			// since that's the maximum number of control points per patch.
+			// We cannot size the array based on fixed dispatch parameters,
+			// since Metal does not allow that. :(
+			// FIXME: We will likely need an option to support passing down target workgroup size,
+			// so we can emit appropriate size here.
+			statement("threadgroup ", type_to_glsl(type), " ",
+			          "(&", to_name(masked_var.self), ")",
+			          type_to_array_glsl(type), " = spvStorage", to_name(masked_var.self), "[",
+			          "(", to_expression(builtin_invocation_id_id), ".x / ",
+			          get_entry_point().output_vertices, ") % ",
+			          max_num_instances, "];");
+
+			is_using_builtin_array = old_is_builtin;
+		});
+	}
+	else
+	{
+		entry_func.add_local_variable(masked_var.self);
+	}
+
+	if (!threadgroup_storage)
+	{
+		vars_needing_early_declaration.push_back(masked_var.self);
+	}
+	else if (masked_var.initializer)
+	{
+		// Cannot directly initialize threadgroup variables. Need fixup hooks.
+		ID initializer = masked_var.initializer;
+		if (strip_array)
 		{
-			uint32_t locn = get_decoration(var.self, DecorationLocation);
-			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-			mark_location_as_used_by_shader(locn, StorageClassInput);
+			entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() {
+				auto invocation = to_tesc_invocation_id();
+				statement(to_expression(masked_var.self), "[",
+				          invocation, "] = ",
+				          to_expression(initializer), "[",
+				          invocation, "];");
+			});
 		}
-		else if (vtx_attrs_by_builtin.count(builtin))
+		else
 		{
-			uint32_t locn = vtx_attrs_by_builtin[builtin].location;
-			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn);
-			mark_location_as_used_by_shader(locn, StorageClassInput);
+			entry_func.fixup_hooks_in.push_back([this, &masked_var, initializer]() {
+				statement(to_expression(masked_var.self), " = ", to_expression(initializer), ";");
+			});
 		}
 	}
 }
 
 void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, SPIRType &ib_type,
-                                                  SPIRVariable &var, bool strip_array)
+                                                  SPIRVariable &var, InterfaceBlockMeta &meta)
 {
 	auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
 	// Tessellation control I/O variables and tessellation evaluation per-point inputs are
 	// usually declared as arrays. In these cases, we want to add the element type to the
 	// interface block, since in Metal it's the interface block itself which is arrayed.
-	auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
+	auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
 	bool is_builtin = is_builtin_variable(var);
 	auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
+	bool is_block = has_decoration(var_type.self, DecorationBlock);
+
+	// If stage variables are masked out, emit them as plain variables instead.
+	// For builtins, we query them one by one later.
+	// IO blocks are not masked here, we need to mask them per-member instead.
+	if (storage == StorageClassOutput && is_stage_output_variable_masked(var))
+	{
+		// If we ignore an output, we must still emit it, since it might be used by app.
+		// Instead, just emit it as early declaration.
+		emit_local_masked_variable(var, meta.strip_array);
+		return;
+	}
+
+	if (storage == StorageClassInput && has_decoration(var.self, DecorationPerVertexKHR))
+		SPIRV_CROSS_THROW("PerVertexKHR decoration is not supported in MSL.");
+
+	// If variable names alias, they will end up with wrong names in the interface struct, because
+	// there might be aliases in the member name cache and there would be a mismatch in fixup_in code.
+	// Make sure to register the variables as unique resource names ahead of time.
+	// This would normally conflict with the name cache when emitting local variables,
+	// but this happens in the setup stage, before we hit compilation loops.
+	// The name cache is cleared before we actually emit code, so this is safe.
+	add_resource_name(var.self);
 
 	if (var_type.basetype == SPIRType::Struct)
 	{
-		if (!is_builtin_type(var_type) && (!capture_output_to_buffer || storage == StorageClassInput) && !strip_array)
+		bool block_requires_flattening =
+		    variable_storage_requires_stage_io(storage) || (is_block && var_type.array.empty());
+		bool needs_local_declaration = !is_builtin && block_requires_flattening && meta.allow_local_declaration;
+
+		if (needs_local_declaration)
 		{
 			// For I/O blocks or structs, we will need to pass the block itself around
 			// to functions if they are used globally in leaf functions.
@@ -1891,67 +3496,171 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
 			// we unflatten I/O blocks while running the shader,
 			// and pass the actual struct type down to leaf functions.
 			// We then unflatten inputs, and flatten outputs in the "fixup" stages.
-			entry_func.add_local_variable(var.self);
-			vars_needing_early_declaration.push_back(var.self);
+			emit_local_masked_variable(var, meta.strip_array);
 		}
 
-		if (capture_output_to_buffer && storage != StorageClassInput && !has_decoration(var_type.self, DecorationBlock))
+		if (!block_requires_flattening)
 		{
 			// In Metal tessellation shaders, the interface block itself is arrayed. This makes things
 			// very complicated, since stage-in structures in MSL don't support nested structures.
 			// Luckily, for stage-out when capturing output, we can avoid this and just add
 			// composite members directly, because the stage-out structure is stored to a buffer,
 			// not returned.
-			add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array);
+			add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta);
 		}
 		else
 		{
-			// Flatten the struct members into the interface struct
-			for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++)
+			bool masked_block = false;
+			uint32_t location = UINT32_MAX;
+			uint32_t var_mbr_idx = 0;
+			uint32_t elem_cnt = 1;
+			if (is_matrix(var_type))
 			{
-				builtin = BuiltInMax;
-				is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
-				auto &mbr_type = get<SPIRType>(var_type.member_types[mbr_idx]);
+				if (is_array(var_type))
+					SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables.");
+
+				elem_cnt = var_type.columns;
+			}
+			else if (is_array(var_type))
+			{
+				if (var_type.array.size() != 1)
+					SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables.");
+
+				elem_cnt = to_array_size_literal(var_type);
+			}
 
-				if (!is_builtin || has_active_builtin(builtin, storage))
+			for (uint32_t elem_idx = 0; elem_idx < elem_cnt; elem_idx++)
+			{
+				// Flatten the struct members into the interface struct
+				for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++)
 				{
-					if ((!is_builtin ||
-					     (storage == StorageClassInput && get_execution_model() != ExecutionModelFragment)) &&
-					    (storage == StorageClassInput || storage == StorageClassOutput) &&
-					    (is_matrix(mbr_type) || is_array(mbr_type)))
+					builtin = BuiltInMax;
+					is_builtin = is_member_builtin(var_type, mbr_idx, &builtin);
+					auto &mbr_type = get<SPIRType>(var_type.member_types[mbr_idx]);
+
+					if (storage == StorageClassOutput && is_stage_output_block_member_masked(var, mbr_idx, meta.strip_array))
 					{
-						add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx,
-						                                                 strip_array);
+						location = UINT32_MAX; // Skip this member and resolve location again on next var member
+
+						if (is_block)
+							masked_block = true;
+
+						// Non-builtin block output variables are just ignored, since they will still access
+						// the block variable as-is. They're just not flattened.
+						if (is_builtin && !meta.strip_array)
+						{
+							// Emit a fake variable instead.
+							uint32_t ids = ir.increase_bound_by(2);
+							uint32_t ptr_type_id = ids + 0;
+							uint32_t var_id = ids + 1;
+
+							auto ptr_type = mbr_type;
+							ptr_type.pointer = true;
+							ptr_type.pointer_depth++;
+							ptr_type.parent_type = var_type.member_types[mbr_idx];
+							ptr_type.storage = StorageClassOutput;
+
+							uint32_t initializer = 0;
+							if (var.initializer)
+								if (auto *c = maybe_get<SPIRConstant>(var.initializer))
+									initializer = c->subconstants[mbr_idx];
+
+							set<SPIRType>(ptr_type_id, ptr_type);
+							set<SPIRVariable>(var_id, ptr_type_id, StorageClassOutput, initializer);
+							entry_func.add_local_variable(var_id);
+							vars_needing_early_declaration.push_back(var_id);
+							set_name(var_id, builtin_to_glsl(builtin, StorageClassOutput));
+							set_decoration(var_id, DecorationBuiltIn, builtin);
+						}
 					}
-					else
+					else if (!is_builtin || has_active_builtin(builtin, storage))
 					{
-						add_plain_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx,
-						                                             strip_array);
+						bool is_composite_type = is_matrix(mbr_type) || is_array(mbr_type) || mbr_type.basetype == SPIRType::Struct;
+						bool attribute_load_store =
+								storage == StorageClassInput && get_execution_model() != ExecutionModelFragment;
+						bool storage_is_stage_io = variable_storage_requires_stage_io(storage);
+
+						// Clip/CullDistance always need to be declared as user attributes.
+						if (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance)
+							is_builtin = false;
+
+						const string var_name = to_name(var.self);
+						string mbr_name_qual = var_name;
+						string var_chain_qual = var_name;
+						if (elem_cnt > 1)
+						{
+							mbr_name_qual += join("_", elem_idx);
+							var_chain_qual += join("[", elem_idx, "]");
+						}
+
+						if ((!is_builtin || attribute_load_store) && storage_is_stage_io && is_composite_type)
+						{
+							add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type,
+							                                                 var, var_type, mbr_idx, meta,
+							                                                 mbr_name_qual, var_chain_qual,
+							                                                 location, var_mbr_idx);
+						}
+						else
+						{
+							add_plain_member_variable_to_interface_block(storage, ib_var_ref, ib_type,
+							                                             var, var_type, mbr_idx, meta,
+							                                             mbr_name_qual, var_chain_qual,
+							                                             location, var_mbr_idx);
+						}
 					}
+					var_mbr_idx++;
+				}
+			}
+
+			// If we're redirecting a block, we might still need to access the original block
+			// variable if we're masking some members.
+			if (masked_block && !needs_local_declaration && (!is_builtin_variable(var) || is_tesc_shader()))
+			{
+				if (is_builtin_variable(var))
+				{
+					// Ensure correct names for the block members if we're actually going to
+					// declare gl_PerVertex.
+					for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++)
+					{
+						set_member_name(var_type.self, mbr_idx, builtin_to_glsl(
+								BuiltIn(get_member_decoration(var_type.self, mbr_idx, DecorationBuiltIn)),
+								StorageClassOutput));
+					}
+
+					set_name(var_type.self, "gl_PerVertex");
+					set_name(var.self, "gl_out_masked");
+					stage_out_masked_builtin_type_id = var_type.self;
 				}
+				emit_local_masked_variable(var, meta.strip_array);
 			}
 		}
 	}
-	else if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput &&
-	         !strip_array && is_builtin && (builtin == BuiltInTessLevelOuter || builtin == BuiltInTessLevelInner))
+	else if (is_tese_shader() && storage == StorageClassInput && !meta.strip_array && is_builtin &&
+	         (builtin == BuiltInTessLevelOuter || builtin == BuiltInTessLevelInner))
 	{
 		add_tess_level_input_to_interface_block(ib_var_ref, ib_type, var);
 	}
 	else if (var_type.basetype == SPIRType::Boolean || var_type.basetype == SPIRType::Char ||
-	         type_is_integral(var_type) || type_is_floating_point(var_type) || var_type.basetype == SPIRType::Boolean)
+	         type_is_integral(var_type) || type_is_floating_point(var_type))
 	{
 		if (!is_builtin || has_active_builtin(builtin, storage))
 		{
+			bool is_composite_type = is_matrix(var_type) || is_array(var_type);
+			bool storage_is_stage_io = variable_storage_requires_stage_io(storage);
+			bool attribute_load_store = storage == StorageClassInput && get_execution_model() != ExecutionModelFragment;
+
+			// Clip/CullDistance always needs to be declared as user attributes.
+			if (builtin == BuiltInClipDistance || builtin == BuiltInCullDistance)
+				is_builtin = false;
+
 			// MSL does not allow matrices or arrays in input or output variables, so need to handle it specially.
-			if ((!is_builtin || (storage == StorageClassInput && get_execution_model() != ExecutionModelFragment)) &&
-			    (storage == StorageClassInput || (storage == StorageClassOutput && !capture_output_to_buffer)) &&
-			    (is_matrix(var_type) || is_array(var_type)))
+			if ((!is_builtin || attribute_load_store) && storage_is_stage_io && is_composite_type)
 			{
-				add_composite_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array);
+				add_composite_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta);
 			}
 			else
 			{
-				add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array);
+				add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta);
 			}
 		}
 	}
@@ -1961,62 +3670,45 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
 // for per-vertex variables in a tessellation control shader.
 void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t ib_type_id)
 {
-	// Only needed for tessellation shaders.
-	if (get_execution_model() != ExecutionModelTessellationControl &&
-	    !(get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput))
+	// Only needed for tessellation shaders and pull-model interpolants.
+	// Need to redirect interface indices back to variables themselves.
+	// For structs, each member of the struct need a separate instance.
+	if (!is_tesc_shader() && !(is_tese_shader() && storage == StorageClassInput) &&
+	    !(get_execution_model() == ExecutionModelFragment && storage == StorageClassInput &&
+	      !pull_model_inputs.empty()))
 		return;
 
-	bool in_array = false;
-	for (uint32_t i = 0; i < ir.meta[ib_type_id].members.size(); i++)
+	auto mbr_cnt = uint32_t(ir.meta[ib_type_id].members.size());
+	for (uint32_t i = 0; i < mbr_cnt; i++)
 	{
-		auto &mbr_dec = ir.meta[ib_type_id].members[i];
-		uint32_t var_id = mbr_dec.extended.ib_orig_id;
+		uint32_t var_id = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceOrigID);
 		if (!var_id)
 			continue;
 		auto &var = get<SPIRVariable>(var_id);
 
-		// Unfortunately, all this complexity is needed to handle flattened structs and/or
-		// arrays.
-		if (storage == StorageClassInput)
-		{
-			auto &type = get_variable_element_type(var);
-			if (is_array(type) || is_matrix(type))
-			{
-				if (in_array)
-					continue;
-				in_array = true;
-				set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i);
-			}
-			else
-			{
-				if (type.basetype == SPIRType::Struct)
-				{
-					uint32_t mbr_idx =
-					    get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex);
-					auto &mbr_type = get<SPIRType>(type.member_types[mbr_idx]);
+		auto &type = get_variable_element_type(var);
 
-					if (is_array(mbr_type) || is_matrix(mbr_type))
-					{
-						if (in_array)
-							continue;
-						in_array = true;
-						set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i);
-					}
-					else
-					{
-						in_array = false;
-						set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i);
-					}
-				}
-				else
-				{
-					in_array = false;
-					set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i);
-				}
-			}
+		bool flatten_composites = variable_storage_requires_stage_io(var.storage);
+		bool is_block = has_decoration(type.self, DecorationBlock);
+
+		uint32_t mbr_idx = uint32_t(-1);
+		if (type.basetype == SPIRType::Struct && (flatten_composites || is_block))
+			mbr_idx = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex);
+
+		if (mbr_idx != uint32_t(-1))
+		{
+			// Only set the lowest InterfaceMemberIndex for each variable member.
+			// IB struct members will be emitted in-order w.r.t. interface member index.
+			if (!has_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex))
+				set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i);
 		}
 		else
-			set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i);
+		{
+			// Only set the lowest InterfaceMemberIndex for each variable.
+			// IB struct members will be emitted in-order w.r.t. interface member index.
+			if (!has_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex))
+				set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i);
+		}
 	}
 }
 
@@ -2029,6 +3721,16 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
 	bool incl_builtins = storage == StorageClassOutput || is_tessellation_shader();
 	bool has_seen_barycentric = false;
 
+	InterfaceBlockMeta meta;
+
+	// Varying interfaces between stages which use "user()" attribute can be dealt with
+	// without explicit packing and unpacking of components. For any variables which link against the runtime
+	// in some way (vertex attributes, fragment output, etc), we'll need to deal with it somehow.
+	bool pack_components =
+	    (storage == StorageClassInput && get_execution_model() == ExecutionModelVertex) ||
+	    (storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment) ||
+	    (storage == StorageClassOutput && get_execution_model() == ExecutionModelVertex && capture_output_to_buffer);
+
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
 		if (var.storage != storage)
 			return;
@@ -2036,29 +3738,81 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
 		auto &type = this->get<SPIRType>(var.basetype);
 
 		bool is_builtin = is_builtin_variable(var);
-		auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
+		bool is_block = has_decoration(type.self, DecorationBlock);
+
+		auto bi_type = BuiltInMax;
+		bool builtin_is_gl_in_out = false;
+		if (is_builtin && !is_block)
+		{
+			bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
+			builtin_is_gl_in_out = bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
+			                       bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance;
+		}
+
+		if (is_builtin && is_block)
+			builtin_is_gl_in_out = true;
+
+		uint32_t location = get_decoration(var_id, DecorationLocation);
+
+		bool builtin_is_stage_in_out = builtin_is_gl_in_out ||
+		                               bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex ||
+		                               bi_type == BuiltInBaryCoordKHR || bi_type == BuiltInBaryCoordNoPerspKHR ||
+		                               bi_type == BuiltInFragDepth ||
+		                               bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask;
 
 		// These builtins are part of the stage in/out structs.
 		bool is_interface_block_builtin =
-		    (bi_type == BuiltInPosition || bi_type == BuiltInPointSize || bi_type == BuiltInClipDistance ||
-		     bi_type == BuiltInCullDistance || bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex ||
-		     bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV || bi_type == BuiltInFragDepth ||
-		     bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask) ||
-		    (get_execution_model() == ExecutionModelTessellationEvaluation &&
-		     (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner));
+		    builtin_is_stage_in_out || (is_tese_shader() && !msl_options.raw_buffer_tese_input &&
+		                                (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner));
 
 		bool is_active = interface_variable_exists_in_entry_point(var.self);
 		if (is_builtin && is_active)
 		{
 			// Only emit the builtin if it's active in this entry point. Interface variable list might lie.
-			is_active = has_active_builtin(bi_type, storage);
+			if (is_block)
+			{
+				// If any builtin is active, the block is active.
+				uint32_t mbr_cnt = uint32_t(type.member_types.size());
+				for (uint32_t i = 0; !is_active && i < mbr_cnt; i++)
+					is_active = has_active_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)), storage);
+			}
+			else
+			{
+				is_active = has_active_builtin(bi_type, storage);
+			}
 		}
 
 		bool filter_patch_decoration = (has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch;
 
 		bool hidden = is_hidden_variable(var, incl_builtins);
+
+		// ClipDistance is never hidden, we need to emulate it when used as an input.
+		if (bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance)
+			hidden = false;
+
+		// It's not enough to simply avoid marking fragment outputs if the pipeline won't
+		// accept them. We can't put them in the struct at all, or otherwise the compiler
+		// complains that the outputs weren't explicitly marked.
+		// Frag depth and stencil outputs are incompatible with explicit early fragment tests.
+		// In GLSL, depth and stencil outputs are just ignored when explicit early fragment tests are required.
+		// In Metal, it's a compilation error, so we need to exclude them from the output struct.
+		if (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput && !patch &&
+		    ((is_builtin && ((bi_type == BuiltInFragDepth && (!msl_options.enable_frag_depth_builtin || uses_explicit_early_fragment_test())) ||
+		                     (bi_type == BuiltInFragStencilRefEXT && (!msl_options.enable_frag_stencil_ref_builtin || uses_explicit_early_fragment_test())))) ||
+		     (!is_builtin && !(msl_options.enable_frag_output_mask & (1 << location)))))
+		{
+			hidden = true;
+			disabled_frag_outputs.push_back(var_id);
+			// If a builtin, force it to have the proper name, and mark it as not part of the output struct.
+			if (is_builtin)
+			{
+				set_name(var_id, builtin_to_glsl(bi_type, StorageClassFunction));
+				mask_stage_output_by_builtin(bi_type);
+			}
+		}
+
 		// Barycentric inputs must be emitted in stage-in, because they can have interpolation arguments.
-		if (is_active && (bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV))
+		if (is_active && (bi_type == BuiltInBaryCoordKHR || bi_type == BuiltInBaryCoordNoPerspKHR))
 		{
 			if (has_seen_barycentric)
 				SPIRV_CROSS_THROW("Cannot declare both BaryCoordNV and BaryCoordNoPerspNV in same shader in MSL.");
@@ -2070,13 +3824,64 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
 		    (!is_builtin || is_interface_block_builtin))
 		{
 			vars.push_back(&var);
+
+			if (!is_builtin)
+			{
+				// Need to deal specially with DecorationComponent.
+				// Multiple variables can alias the same Location, and try to make sure each location is declared only once.
+				// We will swizzle data in and out to make this work.
+				// This is only relevant for vertex inputs and fragment outputs.
+				// Technically tessellation as well, but it is too complicated to support.
+				uint32_t component = get_decoration(var_id, DecorationComponent);
+				if (component != 0)
+				{
+					if (is_tessellation_shader())
+						SPIRV_CROSS_THROW("Component decoration is not supported in tessellation shaders.");
+					else if (pack_components)
+					{
+						uint32_t array_size = 1;
+						if (!type.array.empty())
+							array_size = to_array_size_literal(type);
+
+						for (uint32_t location_offset = 0; location_offset < array_size; location_offset++)
+						{
+							auto &location_meta = meta.location_meta[location + location_offset];
+							location_meta.num_components = std::max(location_meta.num_components, component + type.vecsize);
+
+							// For variables sharing location, decorations and base type must match.
+							location_meta.base_type_id = type.self;
+							location_meta.flat = has_decoration(var.self, DecorationFlat);
+							location_meta.noperspective = has_decoration(var.self, DecorationNoPerspective);
+							location_meta.centroid = has_decoration(var.self, DecorationCentroid);
+							location_meta.sample = has_decoration(var.self, DecorationSample);
+						}
+					}
+				}
+			}
+		}
+
+		if (is_tese_shader() && msl_options.raw_buffer_tese_input && patch && storage == StorageClassInput &&
+		    (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner))
+		{
+			// In this case, we won't add the builtin to the interface struct,
+			// but we still need the hook to run to populate the arrays.
+			string base_ref = join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id), "]");
+			const char *mbr_name =
+			    bi_type == BuiltInTessLevelOuter ? "edgeTessellationFactor" : "insideTessellationFactor";
+			add_tess_level_input(base_ref, mbr_name, var);
+			if (inputs_by_builtin.count(bi_type))
+			{
+				uint32_t locn = inputs_by_builtin[bi_type].location;
+				mark_location_as_used_by_shader(locn, type, StorageClassInput);
+			}
 		}
 	});
 
 	// If no variables qualify, leave.
 	// For patch input in a tessellation evaluation shader, the per-vertex stage inputs
 	// are included in a special patch control point array.
-	if (vars.empty() && !(storage == StorageClassInput && patch && stage_in_var_id))
+	if (vars.empty() &&
+	    !(!msl_options.raw_buffer_tese_input && storage == StorageClassInput && patch && stage_in_var_id))
 		return 0;
 
 	// Add a new typed variable for this interface structure.
@@ -2099,30 +3904,74 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
 	{
 	case StorageClassInput:
 		ib_var_ref = patch ? patch_stage_in_var_name : stage_in_var_name;
-		if (get_execution_model() == ExecutionModelTessellationControl)
+		switch (get_execution_model())
 		{
-			// Add a hook to populate the shared workgroup memory containing
-			// the gl_in array.
+		case ExecutionModelTessellationControl:
+			// Add a hook to populate the shared workgroup memory containing the gl_in array.
 			entry_func.fixup_hooks_in.push_back([=]() {
-				// Can't use PatchVertices yet; the hook for that may not have run yet.
-				statement("if (", to_expression(builtin_invocation_id_id), " < ", "spvIndirectParams[0])");
-				statement("    ", input_wg_var_name, "[", to_expression(builtin_invocation_id_id), "] = ", ib_var_ref,
-				          ";");
-				statement("threadgroup_barrier(mem_flags::mem_threadgroup);");
-				statement("if (", to_expression(builtin_invocation_id_id), " >= ", get_entry_point().output_vertices,
-				          ")");
-				statement("    return;");
+				// Can't use PatchVertices, PrimitiveId, or InvocationId yet; the hooks for those may not have run yet.
+				if (msl_options.multi_patch_workgroup)
+				{
+					// n.b. builtin_invocation_id_id here is the dispatch global invocation ID,
+					// not the TC invocation ID.
+					statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_in = &",
+					          input_buffer_var_name, "[min(", to_expression(builtin_invocation_id_id), ".x / ",
+					          get_entry_point().output_vertices,
+					          ", spvIndirectParams[1] - 1) * spvIndirectParams[0]];");
+				}
+				else
+				{
+					// It's safe to use InvocationId here because it's directly mapped to a
+					// Metal builtin, and therefore doesn't need a hook.
+					statement("if (", to_expression(builtin_invocation_id_id), " < spvIndirectParams[0])");
+					statement("    ", input_wg_var_name, "[", to_expression(builtin_invocation_id_id),
+					          "] = ", ib_var_ref, ";");
+					statement("threadgroup_barrier(mem_flags::mem_threadgroup);");
+					statement("if (", to_expression(builtin_invocation_id_id),
+					          " >= ", get_entry_point().output_vertices, ")");
+					statement("    return;");
+				}
 			});
-		}
-		break;
-
-	case StorageClassOutput:
-	{
-		ib_var_ref = patch ? patch_stage_out_var_name : stage_out_var_name;
-
-		// Add the output interface struct as a local variable to the entry function.
-		// If the entry point should return the output struct, set the entry function
-		// to return the output interface struct, otherwise to return nothing.
+			break;
+		case ExecutionModelTessellationEvaluation:
+			if (!msl_options.raw_buffer_tese_input)
+				break;
+			if (patch)
+			{
+				entry_func.fixup_hooks_in.push_back(
+				    [=]()
+				    {
+					    statement("const device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
+					              " = ", patch_input_buffer_var_name, "[", to_expression(builtin_primitive_id_id),
+					              "];");
+				    });
+			}
+			else
+			{
+				entry_func.fixup_hooks_in.push_back(
+				    [=]()
+				    {
+					    statement("const device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_in = &",
+					              input_buffer_var_name, "[", to_expression(builtin_primitive_id_id), " * ",
+					              get_entry_point().output_vertices, "];");
+				    });
+			}
+			break;
+		default:
+			break;
+		}
+		break;
+
+	case StorageClassOutput:
+	{
+		ib_var_ref = patch ? patch_stage_out_var_name : stage_out_var_name;
+
+		// Add the output interface struct as a local variable to the entry function.
+		// If the entry point should return the output struct, set the entry function
+		// to return the output interface struct, otherwise to return nothing.
+		// Watch out for the rare case where the terminator of the last entry point block is a
+		// Kill, instead of a Return. Based on SPIR-V's block-domination rules, we assume that
+		// any block that has a Kill will also have a terminating Return, except the last block.
 		// Indicate the output var requires early initialization.
 		bool ep_should_return_output = !get_is_rasterization_disabled();
 		uint32_t rtn_id = ep_should_return_output ? ib_var_id : 0;
@@ -2132,7 +3981,7 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
 			for (auto &blk_id : entry_func.blocks)
 			{
 				auto &blk = get<SPIRBlock>(blk_id);
-				if (blk.terminator == SPIRBlock::Return)
+				if (blk.terminator == SPIRBlock::Return || (blk.terminator == SPIRBlock::Kill && blk_id == entry_func.blocks.back()))
 					blk.return_value = rtn_id;
 			}
 			vars_needing_early_declaration.push_back(ib_var_id);
@@ -2152,26 +4001,72 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
 					{
 						// The first member of the indirect buffer is always the number of vertices
 						// to draw.
-						statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, " = ",
-						          output_buffer_var_name, "[(", to_expression(builtin_instance_idx_id), " - ",
-						          to_expression(builtin_base_instance_id), ") * spvIndirectParams[0] + ",
-						          to_expression(builtin_vertex_idx_id), " - ", to_expression(builtin_base_vertex_id),
-						          "];");
+						// We zero-base the InstanceID & VertexID variables for HLSL emulation elsewhere, so don't do it twice
+						if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)
+						{
+							statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
+							          " = ", output_buffer_var_name, "[", to_expression(builtin_invocation_id_id),
+							          ".y * ", to_expression(builtin_stage_input_size_id), ".x + ",
+							          to_expression(builtin_invocation_id_id), ".x];");
+						}
+						else if (msl_options.enable_base_index_zero)
+						{
+							statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
+							          " = ", output_buffer_var_name, "[", to_expression(builtin_instance_idx_id),
+							          " * spvIndirectParams[0] + ", to_expression(builtin_vertex_idx_id), "];");
+						}
+						else
+						{
+							statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
+							          " = ", output_buffer_var_name, "[(", to_expression(builtin_instance_idx_id),
+							          " - ", to_expression(builtin_base_instance_id), ") * spvIndirectParams[0] + ",
+							          to_expression(builtin_vertex_idx_id), " - ",
+							          to_expression(builtin_base_vertex_id), "];");
+						}
 					}
 				});
 				break;
 			case ExecutionModelTessellationControl:
-				if (patch)
-					entry_func.fixup_hooks_in.push_back([=]() {
-						statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, " = ",
-						          patch_output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), "];");
-					});
+				if (msl_options.multi_patch_workgroup)
+				{
+					// We cannot use PrimitiveId here, because the hook may not have run yet.
+					if (patch)
+					{
+						entry_func.fixup_hooks_in.push_back([=]() {
+							statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
+							          " = ", patch_output_buffer_var_name, "[", to_expression(builtin_invocation_id_id),
+							          ".x / ", get_entry_point().output_vertices, "];");
+						});
+					}
+					else
+					{
+						entry_func.fixup_hooks_in.push_back([=]() {
+							statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &",
+							          output_buffer_var_name, "[", to_expression(builtin_invocation_id_id), ".x - ",
+							          to_expression(builtin_invocation_id_id), ".x % ",
+							          get_entry_point().output_vertices, "];");
+						});
+					}
+				}
 				else
-					entry_func.fixup_hooks_in.push_back([=]() {
-						statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &",
-						          output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), " * ",
-						          get_entry_point().output_vertices, "];");
-					});
+				{
+					if (patch)
+					{
+						entry_func.fixup_hooks_in.push_back([=]() {
+							statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref,
+							          " = ", patch_output_buffer_var_name, "[", to_expression(builtin_primitive_id_id),
+							          "];");
+						});
+					}
+					else
+					{
+						entry_func.fixup_hooks_in.push_back([=]() {
+							statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &",
+							          output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), " * ",
+							          get_entry_point().output_vertices, "];");
+						});
+					}
+				}
 				break;
 			default:
 				break;
@@ -2189,24 +4084,176 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
 
 	for (auto *p_var : vars)
 	{
-		bool strip_array =
-		    (get_execution_model() == ExecutionModelTessellationControl ||
-		     (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput)) &&
-		    !patch;
-		add_variable_to_interface_block(storage, ib_var_ref, ib_type, *p_var, strip_array);
+		bool strip_array = (is_tesc_shader() || (is_tese_shader() && storage == StorageClassInput)) && !patch;
+
+		// Fixing up flattened stores in TESC is impossible since the memory is group shared either via
+		// device (not masked) or threadgroup (masked) storage classes and it's race condition city.
+		meta.strip_array = strip_array;
+		meta.allow_local_declaration = !strip_array && !(is_tesc_shader() && storage == StorageClassOutput);
+		add_variable_to_interface_block(storage, ib_var_ref, ib_type, *p_var, meta);
+	}
+
+	if (((is_tesc_shader() && msl_options.multi_patch_workgroup) ||
+	     (is_tese_shader() && msl_options.raw_buffer_tese_input)) &&
+	    storage == StorageClassInput)
+	{
+		// For tessellation inputs, add all outputs from the previous stage to ensure
+		// the struct containing them is the correct size and layout.
+		for (auto &input : inputs_by_location)
+		{
+			if (location_inputs_in_use.count(input.first.location) != 0)
+				continue;
+
+			if (patch != (input.second.rate == MSL_SHADER_VARIABLE_RATE_PER_PATCH))
+				continue;
+
+			// Tessellation levels have their own struct, so there's no need to add them here.
+			if (input.second.builtin == BuiltInTessLevelOuter || input.second.builtin == BuiltInTessLevelInner)
+				continue;
+
+			// Create a fake variable to put at the location.
+			uint32_t offset = ir.increase_bound_by(4);
+			uint32_t type_id = offset;
+			uint32_t array_type_id = offset + 1;
+			uint32_t ptr_type_id = offset + 2;
+			uint32_t var_id = offset + 3;
+
+			SPIRType type;
+			switch (input.second.format)
+			{
+			case MSL_SHADER_VARIABLE_FORMAT_UINT16:
+			case MSL_SHADER_VARIABLE_FORMAT_ANY16:
+				type.basetype = SPIRType::UShort;
+				type.width = 16;
+				break;
+			case MSL_SHADER_VARIABLE_FORMAT_ANY32:
+			default:
+				type.basetype = SPIRType::UInt;
+				type.width = 32;
+				break;
+			}
+			type.vecsize = input.second.vecsize;
+			set<SPIRType>(type_id, type);
+
+			type.array.push_back(0);
+			type.array_size_literal.push_back(true);
+			type.parent_type = type_id;
+			set<SPIRType>(array_type_id, type);
+
+			type.pointer = true;
+			type.pointer_depth++;
+			type.parent_type = array_type_id;
+			type.storage = storage;
+			auto &ptr_type = set<SPIRType>(ptr_type_id, type);
+			ptr_type.self = array_type_id;
+
+			auto &fake_var = set<SPIRVariable>(var_id, ptr_type_id, storage);
+			set_decoration(var_id, DecorationLocation, input.first.location);
+			if (input.first.component)
+				set_decoration(var_id, DecorationComponent, input.first.component);
+
+			meta.strip_array = true;
+			meta.allow_local_declaration = false;
+			add_variable_to_interface_block(storage, ib_var_ref, ib_type, fake_var, meta);
+		}
+	}
+
+	if (capture_output_to_buffer && storage == StorageClassOutput)
+	{
+		// For captured output, add all inputs from the next stage to ensure
+		// the struct containing them is the correct size and layout. This is
+		// necessary for certain implicit builtins that may nonetheless be read,
+		// even when they aren't written.
+		for (auto &output : outputs_by_location)
+		{
+			if (location_outputs_in_use.count(output.first.location) != 0)
+				continue;
+
+			// Create a fake variable to put at the location.
+			uint32_t offset = ir.increase_bound_by(4);
+			uint32_t type_id = offset;
+			uint32_t array_type_id = offset + 1;
+			uint32_t ptr_type_id = offset + 2;
+			uint32_t var_id = offset + 3;
+
+			SPIRType type;
+			switch (output.second.format)
+			{
+			case MSL_SHADER_VARIABLE_FORMAT_UINT16:
+			case MSL_SHADER_VARIABLE_FORMAT_ANY16:
+				type.basetype = SPIRType::UShort;
+				type.width = 16;
+				break;
+			case MSL_SHADER_VARIABLE_FORMAT_ANY32:
+			default:
+				type.basetype = SPIRType::UInt;
+				type.width = 32;
+				break;
+			}
+			type.vecsize = output.second.vecsize;
+			set<SPIRType>(type_id, type);
+
+			if (is_tesc_shader())
+			{
+				type.array.push_back(0);
+				type.array_size_literal.push_back(true);
+				type.parent_type = type_id;
+				set<SPIRType>(array_type_id, type);
+			}
+
+			type.pointer = true;
+			type.pointer_depth++;
+			type.parent_type = is_tesc_shader() ? array_type_id : type_id;
+			type.storage = storage;
+			auto &ptr_type = set<SPIRType>(ptr_type_id, type);
+			ptr_type.self = type.parent_type;
+
+			auto &fake_var = set<SPIRVariable>(var_id, ptr_type_id, storage);
+			set_decoration(var_id, DecorationLocation, output.first.location);
+			if (output.first.component)
+				set_decoration(var_id, DecorationComponent, output.first.component);
+
+			meta.strip_array = true;
+			meta.allow_local_declaration = false;
+			add_variable_to_interface_block(storage, ib_var_ref, ib_type, fake_var, meta);
+		}
+	}
+
+	// When multiple variables need to access same location,
+	// unroll locations one by one and we will flatten output or input as necessary.
+	for (auto &loc : meta.location_meta)
+	{
+		uint32_t location = loc.first;
+		auto &location_meta = loc.second;
+
+		uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size());
+		uint32_t type_id = build_extended_vector_type(location_meta.base_type_id, location_meta.num_components);
+		ib_type.member_types.push_back(type_id);
+
+		set_member_name(ib_type.self, ib_mbr_idx, join("m_location_", location));
+		set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
+		mark_location_as_used_by_shader(location, get<SPIRType>(type_id), storage);
+
+		if (location_meta.flat)
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat);
+		if (location_meta.noperspective)
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective);
+		if (location_meta.centroid)
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid);
+		if (location_meta.sample)
+			set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample);
 	}
 
 	// Sort the members of the structure by their locations.
-	MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Location);
+	MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::LocationThenBuiltInType);
 	member_sorter.sort();
 
 	// The member indices were saved to the original variables, but after the members
 	// were sorted, those indices are now likely incorrect. Fix those up now.
-	if (!patch)
-		fix_up_interface_member_indices(storage, ib_type_id);
+	fix_up_interface_member_indices(storage, ib_type_id);
 
 	// For patch inputs, add one more member, holding the array of control point data.
-	if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput && patch &&
+	if (is_tese_shader() && !msl_options.raw_buffer_tese_input && storage == StorageClassInput && patch &&
 	    stage_in_var_id)
 	{
 		uint32_t pcp_type_id = ir.increase_bound_by(1);
@@ -2220,6 +4267,9 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
 		set_member_name(ib_type.self, mbr_idx, "gl_in");
 	}
 
+	if (storage == StorageClassInput)
+		set_decoration(ib_var_id, DecorationNonWritable);
+
 	return ib_var_id;
 }
 
@@ -2231,7 +4281,7 @@ uint32_t CompilerMSL::add_interface_block_pointer(uint32_t ib_var_id, StorageCla
 	uint32_t ib_ptr_var_id;
 	uint32_t next_id = ir.increase_bound_by(3);
 	auto &ib_type = expression_type(ib_var_id);
-	if (get_execution_model() == ExecutionModelTessellationControl)
+	if (is_tesc_shader() || (is_tese_shader() && msl_options.raw_buffer_tese_input))
 	{
 		// Tessellation control per-vertex I/O is presented as an array, so we must
 		// do the same with our struct here.
@@ -2239,7 +4289,13 @@ uint32_t CompilerMSL::add_interface_block_pointer(uint32_t ib_var_id, StorageCla
 		auto &ib_ptr_type = set<SPIRType>(ib_ptr_type_id, ib_type);
 		ib_ptr_type.parent_type = ib_ptr_type.type_alias = ib_type.self;
 		ib_ptr_type.pointer = true;
-		ib_ptr_type.storage = storage == StorageClassInput ? StorageClassWorkgroup : StorageClassStorageBuffer;
+		ib_ptr_type.pointer_depth++;
+		ib_ptr_type.storage = storage == StorageClassInput ?
+		                          ((is_tesc_shader() && msl_options.multi_patch_workgroup) ||
+		                                   (is_tese_shader() && msl_options.raw_buffer_tese_input) ?
+		                               StorageClassStorageBuffer :
+		                               StorageClassWorkgroup) :
+		                          StorageClassStorageBuffer;
 		ir.meta[ib_ptr_type_id] = ir.meta[ib_type.self];
 		// To ensure that get_variable_data_type() doesn't strip off the pointer,
 		// which we need, use another pointer.
@@ -2252,7 +4308,9 @@ uint32_t CompilerMSL::add_interface_block_pointer(uint32_t ib_var_id, StorageCla
 
 		ib_ptr_var_id = next_id;
 		set<SPIRVariable>(ib_ptr_var_id, ib_ptr_ptr_type_id, StorageClassFunction, 0);
-		set_name(ib_ptr_var_id, storage == StorageClassInput ? input_wg_var_name : "gl_out");
+		set_name(ib_ptr_var_id, storage == StorageClassInput ? "gl_in" : "gl_out");
+		if (storage == StorageClassInput)
+			set_decoration(ib_ptr_var_id, DecorationNonWritable);
 	}
 	else
 	{
@@ -2301,6 +4359,7 @@ uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn buil
 		auto &ptr_type = set<SPIRType>(ptr_type_id);
 		ptr_type = base_type;
 		ptr_type.pointer = true;
+		ptr_type.pointer_depth++;
 		ptr_type.storage = type.storage;
 		ptr_type.parent_type = base_type_id;
 		return ptr_type_id;
@@ -2309,395 +4368,821 @@ uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn buil
 	return type_id;
 }
 
-// Ensure that the type is compatible with the vertex attribute.
+// Ensure that the type is compatible with the shader input.
 // If it is, simply return the given type ID.
 // Otherwise, create a new type, and return its ID.
-uint32_t CompilerMSL::ensure_correct_attribute_type(uint32_t type_id, uint32_t location)
+uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t component, uint32_t num_components, bool strip_array)
 {
 	auto &type = get<SPIRType>(type_id);
 
-	auto p_va = vtx_attrs_by_location.find(location);
-	if (p_va == end(vtx_attrs_by_location))
+	uint32_t max_array_dimensions = strip_array ? 1 : 0;
+
+	// Struct and array types must match exactly.
+	if (type.basetype == SPIRType::Struct || type.array.size() > max_array_dimensions)
 		return type_id;
 
+	auto p_va = inputs_by_location.find({location, component});
+	if (p_va == end(inputs_by_location))
+	{
+		if (num_components > type.vecsize)
+			return build_extended_vector_type(type_id, num_components);
+		else
+			return type_id;
+	}
+
+	if (num_components == 0)
+		num_components = p_va->second.vecsize;
+
 	switch (p_va->second.format)
 	{
-	case MSL_VERTEX_FORMAT_UINT8:
+	case MSL_SHADER_VARIABLE_FORMAT_UINT8:
 	{
 		switch (type.basetype)
 		{
 		case SPIRType::UByte:
 		case SPIRType::UShort:
 		case SPIRType::UInt:
-			return type_id;
+			if (num_components > type.vecsize)
+				return build_extended_vector_type(type_id, num_components);
+			else
+				return type_id;
+
 		case SPIRType::Short:
+			return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize,
+			                                  SPIRType::UShort);
 		case SPIRType::Int:
-			break;
+			return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize,
+			                                  SPIRType::UInt);
+
 		default:
 			SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader");
 		}
-		uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1);
-		uint32_t base_type_id = next_id++;
-		auto &base_type = set<SPIRType>(base_type_id);
-		base_type = type;
-		base_type.basetype = type.basetype == SPIRType::Short ? SPIRType::UShort : SPIRType::UInt;
-		base_type.pointer = false;
-
-		if (!type.pointer)
-			return base_type_id;
-
-		uint32_t ptr_type_id = next_id++;
-		auto &ptr_type = set<SPIRType>(ptr_type_id);
-		ptr_type = base_type;
-		ptr_type.pointer = true;
-		ptr_type.storage = type.storage;
-		ptr_type.parent_type = base_type_id;
-		return ptr_type_id;
 	}
 
-	case MSL_VERTEX_FORMAT_UINT16:
+	case MSL_SHADER_VARIABLE_FORMAT_UINT16:
 	{
 		switch (type.basetype)
 		{
 		case SPIRType::UShort:
 		case SPIRType::UInt:
-			return type_id;
+			if (num_components > type.vecsize)
+				return build_extended_vector_type(type_id, num_components);
+			else
+				return type_id;
+
 		case SPIRType::Int:
-			break;
+			return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize,
+			                                  SPIRType::UInt);
+
 		default:
 			SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader");
 		}
-		uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1);
-		uint32_t base_type_id = next_id++;
-		auto &base_type = set<SPIRType>(base_type_id);
-		base_type = type;
-		base_type.basetype = SPIRType::UInt;
-		base_type.pointer = false;
-
-		if (!type.pointer)
-			return base_type_id;
-
-		uint32_t ptr_type_id = next_id++;
-		auto &ptr_type = set<SPIRType>(ptr_type_id);
-		ptr_type = base_type;
-		ptr_type.pointer = true;
-		ptr_type.storage = type.storage;
-		ptr_type.parent_type = base_type_id;
-		return ptr_type_id;
 	}
 
 	default:
-	case MSL_VERTEX_FORMAT_OTHER:
+		if (num_components > type.vecsize)
+			type_id = build_extended_vector_type(type_id, num_components);
 		break;
 	}
 
 	return type_id;
 }
 
+void CompilerMSL::mark_struct_members_packed(const SPIRType &type)
+{
+	// Handle possible recursion when a struct contains a pointer to its own type nested somewhere.
+	if (has_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked))
+		return;
+
+	set_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked);
+
+	// Problem case! Struct needs to be placed at an awkward alignment.
+	// Mark every member of the child struct as packed.
+	uint32_t mbr_cnt = uint32_t(type.member_types.size());
+	for (uint32_t i = 0; i < mbr_cnt; i++)
+	{
+		auto &mbr_type = get<SPIRType>(type.member_types[i]);
+		if (mbr_type.basetype == SPIRType::Struct)
+		{
+			// Recursively mark structs as packed.
+			auto *struct_type = &mbr_type;
+			while (!struct_type->array.empty())
+				struct_type = &get<SPIRType>(struct_type->parent_type);
+			mark_struct_members_packed(*struct_type);
+		}
+		else if (!is_scalar(mbr_type))
+			set_extended_member_decoration(type.self, i, SPIRVCrossDecorationPhysicalTypePacked);
+	}
+}
+
+void CompilerMSL::mark_scalar_layout_structs(const SPIRType &type)
+{
+	uint32_t mbr_cnt = uint32_t(type.member_types.size());
+	for (uint32_t i = 0; i < mbr_cnt; i++)
+	{
+		// Handle possible recursion when a struct contains a pointer to its own type nested somewhere.
+		auto &mbr_type = get<SPIRType>(type.member_types[i]);
+		if (mbr_type.basetype == SPIRType::Struct && !(mbr_type.pointer && mbr_type.storage == StorageClassPhysicalStorageBuffer))
+		{
+			auto *struct_type = &mbr_type;
+			while (!struct_type->array.empty())
+				struct_type = &get<SPIRType>(struct_type->parent_type);
+
+			if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPhysicalTypePacked))
+				continue;
+
+			uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, i);
+			uint32_t msl_size = get_declared_struct_member_size_msl(type, i);
+			uint32_t spirv_offset = type_struct_member_offset(type, i);
+			uint32_t spirv_offset_next;
+			if (i + 1 < mbr_cnt)
+				spirv_offset_next = type_struct_member_offset(type, i + 1);
+			else
+				spirv_offset_next = spirv_offset + msl_size;
+
+			// Both are complicated cases. In scalar layout, a struct of float3 might just consume 12 bytes,
+			// and the next member will be placed at offset 12.
+			bool struct_is_misaligned = (spirv_offset % msl_alignment) != 0;
+			bool struct_is_too_large = spirv_offset + msl_size > spirv_offset_next;
+			uint32_t array_stride = 0;
+			bool struct_needs_explicit_padding = false;
+
+			// Verify that if a struct is used as an array that ArrayStride matches the effective size of the struct.
+			if (!mbr_type.array.empty())
+			{
+				array_stride = type_struct_member_array_stride(type, i);
+				uint32_t dimensions = uint32_t(mbr_type.array.size() - 1);
+				for (uint32_t dim = 0; dim < dimensions; dim++)
+				{
+					uint32_t array_size = to_array_size_literal(mbr_type, dim);
+					array_stride /= max<uint32_t>(array_size, 1u);
+				}
+
+				// Set expected struct size based on ArrayStride.
+				struct_needs_explicit_padding = true;
+
+				// If struct size is larger than array stride, we might be able to fit, if we tightly pack.
+				if (get_declared_struct_size_msl(*struct_type) > array_stride)
+					struct_is_too_large = true;
+			}
+
+			if (struct_is_misaligned || struct_is_too_large)
+				mark_struct_members_packed(*struct_type);
+			mark_scalar_layout_structs(*struct_type);
+
+			if (struct_needs_explicit_padding)
+			{
+				msl_size = get_declared_struct_size_msl(*struct_type, true, true);
+				if (array_stride < msl_size)
+				{
+					SPIRV_CROSS_THROW("Cannot express an array stride smaller than size of struct type.");
+				}
+				else
+				{
+					if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget))
+					{
+						if (array_stride !=
+						    get_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget))
+							SPIRV_CROSS_THROW(
+							    "A struct is used with different array strides. Cannot express this in MSL.");
+					}
+					else
+						set_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget, array_stride);
+				}
+			}
+		}
+	}
+}
+
 // Sort the members of the struct type by offset, and pack and then pad members where needed
 // to align MSL members with SPIR-V offsets. The struct members are iterated twice. Packing
 // occurs first, followed by padding, because packing a member reduces both its size and its
 // natural alignment, possibly requiring a padding member to be added ahead of it.
-void CompilerMSL::align_struct(SPIRType &ib_type)
+void CompilerMSL::align_struct(SPIRType &ib_type, unordered_set<uint32_t> &aligned_structs)
 {
-	uint32_t &ib_type_id = ib_type.self;
+	// We align structs recursively, so stop any redundant work.
+	ID &ib_type_id = ib_type.self;
+	if (aligned_structs.count(ib_type_id))
+		return;
+	aligned_structs.insert(ib_type_id);
 
 	// Sort the members of the interface structure by their offset.
 	// They should already be sorted per SPIR-V spec anyway.
 	MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Offset);
 	member_sorter.sort();
 
-	uint32_t mbr_cnt = uint32_t(ib_type.member_types.size());
+	auto mbr_cnt = uint32_t(ib_type.member_types.size());
+
+	for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
+	{
+		// Pack any dependent struct types before we pack a parent struct.
+		auto &mbr_type = get<SPIRType>(ib_type.member_types[mbr_idx]);
+		if (mbr_type.basetype == SPIRType::Struct)
+			align_struct(mbr_type, aligned_structs);
+	}
 
 	// Test the alignment of each member, and if a member should be closer to the previous
 	// member than the default spacing expects, it is likely that the previous member is in
 	// a packed format. If so, and the previous member is packable, pack it.
-	// For example...this applies to any 3-element vector that is followed by a scalar.
-	uint32_t curr_offset = 0;
+	// For example ... this applies to any 3-element vector that is followed by a scalar.
+	uint32_t msl_offset = 0;
 	for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
 	{
-		if (is_member_packable(ib_type, mbr_idx))
-		{
-			set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPacked);
-			set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPackedType,
-			                               get_member_packed_type(ib_type, mbr_idx));
-		}
+		// This checks the member in isolation, if the member needs some kind of type remapping to conform to SPIR-V
+		// offsets, array strides and matrix strides.
+		ensure_member_packing_rules_msl(ib_type, mbr_idx);
 
-		// Align current offset to the current member's default alignment.
-		size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1;
-		uint32_t aligned_curr_offset = uint32_t((curr_offset + align_mask) & ~align_mask);
+		// Align current offset to the current member's default alignment. If the member was packed, it will observe
+		// the updated alignment here.
+		uint32_t msl_align_mask = get_declared_struct_member_alignment_msl(ib_type, mbr_idx) - 1;
+		uint32_t aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask;
 
 		// Fetch the member offset as declared in the SPIRV.
-		uint32_t mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset);
-		if (mbr_offset > aligned_curr_offset)
+		uint32_t spirv_mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset);
+		if (spirv_mbr_offset > aligned_msl_offset)
 		{
 			// Since MSL and SPIR-V have slightly different struct member alignment and
-			// size rules, we'll pad to standard C-packing rules. If the member is farther
+			// size rules, we'll pad to standard C-packing rules with a char[] array. If the member is farther
 			// away than C-packing, expects, add an inert padding member before the the member.
-			MSLStructMemberKey key = get_struct_member_key(ib_type_id, mbr_idx);
-			struct_member_padding[key] = mbr_offset - curr_offset;
+			uint32_t padding_bytes = spirv_mbr_offset - aligned_msl_offset;
+			set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPaddingTarget, padding_bytes);
+
+			// Re-align as a sanity check that aligning post-padding matches up.
+			msl_offset += padding_bytes;
+			aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask;
+		}
+		else if (spirv_mbr_offset < aligned_msl_offset)
+		{
+			// This should not happen, but deal with unexpected scenarios.
+			// It *might* happen if a sub-struct has a larger alignment requirement in MSL than SPIR-V.
+			SPIRV_CROSS_THROW("Cannot represent buffer block correctly in MSL.");
 		}
 
+		assert(aligned_msl_offset == spirv_mbr_offset);
+
 		// Increment the current offset to be positioned immediately after the current member.
 		// Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here.
 		if (mbr_idx + 1 < mbr_cnt)
-			curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size_msl(ib_type, mbr_idx));
+			msl_offset = aligned_msl_offset + get_declared_struct_member_size_msl(ib_type, mbr_idx);
 	}
 }
 
-// Returns whether the specified struct member supports a packable type
-// variation that is smaller than the unpacked variation of that type.
-bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index, uint32_t base_offset)
+bool CompilerMSL::validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const
 {
-	// We've already marked it as packable
-	if (has_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPacked))
-		return true;
-
-	auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
-
-	uint32_t component_size = mbr_type.width / 8;
-	uint32_t unpacked_mbr_size;
-	if (mbr_type.vecsize == 3)
-		unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns;
-	else
-		unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns;
+	auto &mbr_type = get<SPIRType>(type.member_types[index]);
+	uint32_t spirv_offset = get_member_decoration(type.self, index, DecorationOffset);
 
-	// Special case for packing. Check for float[] or vec2[] in std140 layout. Here we actually need to pad out instead,
-	// but we will use the same mechanism.
-	if (is_array(mbr_type) && (is_scalar(mbr_type) || is_vector(mbr_type)) && mbr_type.vecsize <= 2 &&
-	    type_struct_member_array_stride(ib_type, index) == 4 * component_size)
+	if (index + 1 < type.member_types.size())
 	{
-		return true;
+		// First, we will check offsets. If SPIR-V offset + MSL size > SPIR-V offset of next member,
+		// we *must* perform some kind of remapping, no way getting around it.
+		// We can always pad after this member if necessary, so that case is fine.
+		uint32_t spirv_offset_next = get_member_decoration(type.self, index + 1, DecorationOffset);
+		assert(spirv_offset_next >= spirv_offset);
+		uint32_t maximum_size = spirv_offset_next - spirv_offset;
+		uint32_t msl_mbr_size = get_declared_struct_member_size_msl(type, index);
+		if (msl_mbr_size > maximum_size)
+			return false;
 	}
 
-	uint32_t mbr_offset_curr = base_offset + get_member_decoration(ib_type.self, index, DecorationOffset);
-	if (mbr_type.basetype == SPIRType::Struct)
+	if (!mbr_type.array.empty())
 	{
-		// If this is a struct type, check if any of its members need packing.
-		for (uint32_t i = 0; i < mbr_type.member_types.size(); i++)
-		{
-			if (is_member_packable(mbr_type, i, mbr_offset_curr))
-			{
-				set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPacked);
-				set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPackedType,
-				                               get_member_packed_type(mbr_type, i));
-			}
-		}
-		size_t declared_struct_size = get_declared_struct_size(mbr_type);
-		size_t alignment = get_declared_struct_member_alignment(ib_type, index);
-		declared_struct_size = (declared_struct_size + alignment - 1) & ~(alignment - 1);
-		// Check for array of struct, where the SPIR-V declares an array stride which is larger than the struct itself.
-		// This can happen for struct A { float a }; A a[]; in std140 layout.
-		// TODO: Emit a padded struct which can be used for this purpose.
-		if (is_array(mbr_type))
-		{
-			size_t array_stride = type_struct_member_array_stride(ib_type, index);
-			if (array_stride > declared_struct_size)
-				return true;
-			if (array_stride < declared_struct_size)
-			{
-				// If the stride is *less* (i.e. more tightly packed), then
-				// we need to pack the members of the struct itself.
-				for (uint32_t i = 0; i < mbr_type.member_types.size(); i++)
-				{
-					if (is_member_packable(mbr_type, i, mbr_offset_curr + array_stride))
-					{
-						set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPacked);
-						set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPackedType,
-						                               get_member_packed_type(mbr_type, i));
-					}
-				}
-			}
-		}
-		else
-		{
-			// Pack if there is not enough space between this member and next.
-			if (index < ib_type.member_types.size() - 1)
-			{
-				uint32_t mbr_offset_next =
-				    base_offset + get_member_decoration(ib_type.self, index + 1, DecorationOffset);
-				if (declared_struct_size > mbr_offset_next - mbr_offset_curr)
-				{
-					for (uint32_t i = 0; i < mbr_type.member_types.size(); i++)
-					{
-						if (is_member_packable(mbr_type, i, mbr_offset_next))
-						{
-							set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPacked);
-							set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPackedType,
-							                               get_member_packed_type(mbr_type, i));
-						}
-					}
-				}
-			}
-		}
-	}
-
-	// TODO: Another sanity check for matrices. We currently do not support std140 matrices which need to be padded out per column.
-	//if (is_matrix(mbr_type) && mbr_type.vecsize <= 2 && type_struct_member_matrix_stride(ib_type, index) == 16)
-	//	SPIRV_CROSS_THROW("Currently cannot support matrices with small vector size in std140 layout.");
-
-	// Pack if the member's offset doesn't conform to the type's usual
-	// alignment. For example, a float3 at offset 4.
-	if (mbr_offset_curr % get_declared_struct_member_alignment(ib_type, index))
-		return true;
-
-	// Only vectors or 3-row matrices need to be packed.
-	if (mbr_type.vecsize == 1 || (is_matrix(mbr_type) && mbr_type.vecsize != 3))
-		return false;
+		// If we have an array type, array stride must match exactly with SPIR-V.
 
-	if (is_array(mbr_type))
-	{
-		// If member is an array, and the array stride is larger than the type needs, don't pack it.
-		// Take into consideration multi-dimentional arrays.
-		uint32_t md_elem_cnt = 1;
-		size_t last_elem_idx = mbr_type.array.size() - 1;
-		for (uint32_t i = 0; i < last_elem_idx; i++)
-			md_elem_cnt *= max(to_array_size_literal(mbr_type, i), 1u);
+		// An exception to this requirement is if we have one array element.
+		// This comes from DX scalar layout workaround.
+		// If app tries to be cheeky and access the member out of bounds, this will not work, but this is the best we can do.
+		// In OpAccessChain with logical memory models, access chains must be in-bounds in SPIR-V specification.
+		bool relax_array_stride = mbr_type.array.back() == 1 && mbr_type.array_size_literal.back();
 
-		uint32_t unpacked_array_stride = unpacked_mbr_size * md_elem_cnt;
-		uint32_t array_stride = type_struct_member_array_stride(ib_type, index);
-		return unpacked_array_stride > array_stride;
-	}
-	else
-	{
-		// Pack if there is not enough space between this member and next.
-		// If last member, only pack if it's a row-major matrix.
-		if (index < ib_type.member_types.size() - 1)
+		if (!relax_array_stride)
 		{
-			uint32_t mbr_offset_next = base_offset + get_member_decoration(ib_type.self, index + 1, DecorationOffset);
-			return unpacked_mbr_size > mbr_offset_next - mbr_offset_curr;
+			uint32_t spirv_array_stride = type_struct_member_array_stride(type, index);
+			uint32_t msl_array_stride = get_declared_struct_member_array_stride_msl(type, index);
+			if (spirv_array_stride != msl_array_stride)
+				return false;
 		}
-		else
-			return is_matrix(mbr_type);
 	}
-}
 
-uint32_t CompilerMSL::get_member_packed_type(SPIRType &type, uint32_t index)
-{
-	auto &mbr_type = get<SPIRType>(type.member_types[index]);
-	if (is_matrix(mbr_type) && has_member_decoration(type.self, index, DecorationRowMajor))
+	if (is_matrix(mbr_type))
 	{
-		// Packed row-major matrices are stored transposed. But, we don't know if
-		// we're dealing with a row-major matrix at the time we need to load it.
-		// So, we'll set a packed type with the columns and rows transposed, so we'll
-		// know to use the correct constructor.
-		uint32_t new_type_id = ir.increase_bound_by(1);
-		auto &transpose_type = set<SPIRType>(new_type_id);
-		transpose_type = mbr_type;
-		transpose_type.vecsize = mbr_type.columns;
-		transpose_type.columns = mbr_type.vecsize;
-		return new_type_id;
+		// Need to check MatrixStride as well.
+		uint32_t spirv_matrix_stride = type_struct_member_matrix_stride(type, index);
+		uint32_t msl_matrix_stride = get_declared_struct_member_matrix_stride_msl(type, index);
+		if (spirv_matrix_stride != msl_matrix_stride)
+			return false;
 	}
-	return type.member_types[index];
-}
 
-// Returns a combination of type ID and member index for use as hash key
-MSLStructMemberKey CompilerMSL::get_struct_member_key(uint32_t type_id, uint32_t index)
-{
-	MSLStructMemberKey k = type_id;
-	k <<= 32;
-	k += index;
-	return k;
+	// Now, we check alignment.
+	uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, index);
+	if ((spirv_offset % msl_alignment) != 0)
+		return false;
+
+	// We're in the clear.
+	return true;
+}
+
+// Here we need to verify that the member type we declare conforms to Offset, ArrayStride or MatrixStride restrictions.
+// If there is a mismatch, we need to emit remapped types, either normal types, or "packed_X" types.
+// In odd cases we need to emit packed and remapped types, for e.g. weird matrices or arrays with weird array strides.
+void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index)
+{
+	if (validate_member_packing_rules_msl(ib_type, index))
+		return;
+
+	// We failed validation.
+	// This case will be nightmare-ish to deal with. This could possibly happen if struct alignment does not quite
+	// match up with what we want. Scalar block layout comes to mind here where we might have to work around the rule
+	// that struct alignment == max alignment of all members and struct size depends on this alignment.
+	// Can't repack structs, but can repack pointers to structs.
+	auto &mbr_type = get<SPIRType>(ib_type.member_types[index]);
+	bool is_buff_ptr = mbr_type.pointer && mbr_type.storage == StorageClassPhysicalStorageBuffer;
+	if (mbr_type.basetype == SPIRType::Struct && !is_buff_ptr)
+		SPIRV_CROSS_THROW("Cannot perform any repacking for structs when it is used as a member of another struct.");
+
+	// Perform remapping here.
+	// There is nothing to be gained by using packed scalars, so don't attempt it.
+	if (!is_scalar(ib_type))
+		set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
+
+	// Try validating again, now with packed.
+	if (validate_member_packing_rules_msl(ib_type, index))
+		return;
+
+	// We're in deep trouble, and we need to create a new PhysicalType which matches up with what we expect.
+	// A lot of work goes here ...
+	// We will need remapping on Load and Store to translate the types between Logical and Physical.
+
+	// First, we check if we have small vector std140 array.
+	// We detect this if we have an array of vectors, and array stride is greater than number of elements.
+	if (!mbr_type.array.empty() && !is_matrix(mbr_type))
+	{
+		uint32_t array_stride = type_struct_member_array_stride(ib_type, index);
+
+		// Hack off array-of-arrays until we find the array stride per element we must have to make it work.
+		uint32_t dimensions = uint32_t(mbr_type.array.size() - 1);
+		for (uint32_t dim = 0; dim < dimensions; dim++)
+			array_stride /= max<uint32_t>(to_array_size_literal(mbr_type, dim), 1u);
+
+		// Pointers are 8 bytes
+		uint32_t mbr_width_in_bytes = is_buff_ptr ? 8 : (mbr_type.width / 8);
+		uint32_t elems_per_stride = array_stride / mbr_width_in_bytes;
+
+		if (elems_per_stride == 3)
+			SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios.");
+		else if (elems_per_stride > 4)
+			SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL.");
+
+		auto physical_type = mbr_type;
+		physical_type.vecsize = elems_per_stride;
+		physical_type.parent_type = 0;
+
+		// If this is a physical buffer pointer, replace type with a ulongn vector.
+		if (is_buff_ptr)
+		{
+			physical_type.width = 64;
+			physical_type.basetype = to_unsigned_basetype(physical_type.width);
+			physical_type.pointer = false;
+			physical_type.pointer_depth = false;
+			physical_type.forward_pointer = false;
+		}
+
+		uint32_t type_id = ir.increase_bound_by(1);
+		set<SPIRType>(type_id, physical_type);
+		set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id);
+		set_decoration(type_id, DecorationArrayStride, array_stride);
+
+		// Remove packed_ for vectors of size 1, 2 and 4.
+		unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
+	}
+	else if (is_matrix(mbr_type))
+	{
+		// MatrixStride might be std140-esque.
+		uint32_t matrix_stride = type_struct_member_matrix_stride(ib_type, index);
+
+		uint32_t elems_per_stride = matrix_stride / (mbr_type.width / 8);
+
+		if (elems_per_stride == 3)
+			SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios.");
+		else if (elems_per_stride > 4)
+			SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL.");
+
+		bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor);
+
+		auto physical_type = mbr_type;
+		physical_type.parent_type = 0;
+		if (row_major)
+			physical_type.columns = elems_per_stride;
+		else
+			physical_type.vecsize = elems_per_stride;
+		uint32_t type_id = ir.increase_bound_by(1);
+		set<SPIRType>(type_id, physical_type);
+		set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id);
+
+		// Remove packed_ for vectors of size 1, 2 and 4.
+		unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
+	}
+	else
+		SPIRV_CROSS_THROW("Found a buffer packing case which we cannot represent in MSL.");
+
+	// Try validating again, now with physical type remapping.
+	if (validate_member_packing_rules_msl(ib_type, index))
+		return;
+
+	// We might have a particular odd scalar layout case where the last element of an array
+	// does not take up as much space as the ArrayStride or MatrixStride. This can happen with DX cbuffers.
+	// The "proper" workaround for this is extremely painful and essentially impossible in the edge case of float3[],
+	// so we hack around it by declaring the offending array or matrix with one less array size/col/row,
+	// and rely on padding to get the correct value. We will technically access arrays out of bounds into the padding region,
+	// but it should spill over gracefully without too much trouble. We rely on behavior like this for unsized arrays anyways.
+
+	// E.g. we might observe a physical layout of:
+	// { float2 a[2]; float b; } in cbuffer layout where ArrayStride of a is 16, but offset of b is 24, packed right after a[1] ...
+	uint32_t type_id = get_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID);
+	auto &type = get<SPIRType>(type_id);
+
+	// Modify the physical type in-place. This is safe since each physical type workaround is a copy.
+	if (is_array(type))
+	{
+		if (type.array.back() > 1)
+		{
+			if (!type.array_size_literal.back())
+				SPIRV_CROSS_THROW("Cannot apply scalar layout workaround with spec constant array size.");
+			type.array.back() -= 1;
+		}
+		else
+		{
+			// We have an array of size 1, so we cannot decrement that. Our only option now is to
+			// force a packed layout instead, and drop the physical type remap since ArrayStride is meaningless now.
+			unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID);
+			set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
+		}
+	}
+	else if (is_matrix(type))
+	{
+		bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor);
+		if (!row_major)
+		{
+			// Slice off one column. If we only have 2 columns, this might turn the matrix into a vector with one array element instead.
+			if (type.columns > 2)
+			{
+				type.columns--;
+			}
+			else if (type.columns == 2)
+			{
+				type.columns = 1;
+				assert(type.array.empty());
+				type.array.push_back(1);
+				type.array_size_literal.push_back(true);
+			}
+		}
+		else
+		{
+			// Slice off one row. If we only have 2 rows, this might turn the matrix into a vector with one array element instead.
+			if (type.vecsize > 2)
+			{
+				type.vecsize--;
+			}
+			else if (type.vecsize == 2)
+			{
+				type.vecsize = type.columns;
+				type.columns = 1;
+				assert(type.array.empty());
+				type.array.push_back(1);
+				type.array_size_literal.push_back(true);
+			}
+		}
+	}
+
+	// This better validate now, or we must fail gracefully.
+	if (!validate_member_packing_rules_msl(ib_type, index))
+		SPIRV_CROSS_THROW("Found a buffer packing case which we cannot represent in MSL.");
 }
 
 void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
 {
-	if (!has_extended_decoration(lhs_expression, SPIRVCrossDecorationPacked) ||
-	    get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType) == 0)
+	auto &type = expression_type(rhs_expression);
+
+	bool lhs_remapped_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID);
+	bool lhs_packed_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypePacked);
+	auto *lhs_e = maybe_get<SPIRExpression>(lhs_expression);
+	auto *rhs_e = maybe_get<SPIRExpression>(rhs_expression);
+
+	bool transpose = lhs_e && lhs_e->need_transpose;
+
+	// No physical type remapping, and no packed type, so can just emit a store directly.
+	if (!lhs_remapped_type && !lhs_packed_type)
+	{
+		// We might not be dealing with remapped physical types or packed types,
+		// but we might be doing a clean store to a row-major matrix.
+		// In this case, we just flip transpose states, and emit the store, a transpose must be in the RHS expression, if any.
+		if (is_matrix(type) && lhs_e && lhs_e->need_transpose)
+		{
+			lhs_e->need_transpose = false;
+
+			if (rhs_e && rhs_e->need_transpose)
+			{
+				// Direct copy, but might need to unpack RHS.
+				// Skip the transpose, as we will transpose when writing to LHS and transpose(transpose(T)) == T.
+				rhs_e->need_transpose = false;
+				statement(to_expression(lhs_expression), " = ", to_unpacked_row_major_matrix_expression(rhs_expression),
+				          ";");
+				rhs_e->need_transpose = true;
+			}
+			else
+				statement(to_expression(lhs_expression), " = transpose(", to_unpacked_expression(rhs_expression), ");");
+
+			lhs_e->need_transpose = true;
+			register_write(lhs_expression);
+		}
+		else if (lhs_e && lhs_e->need_transpose)
+		{
+			lhs_e->need_transpose = false;
+
+			// Storing a column to a row-major matrix. Unroll the write.
+			for (uint32_t c = 0; c < type.vecsize; c++)
+			{
+				auto lhs_expr = to_dereferenced_expression(lhs_expression);
+				auto column_index = lhs_expr.find_last_of('[');
+				if (column_index != string::npos)
+				{
+					statement(lhs_expr.insert(column_index, join('[', c, ']')), " = ",
+					          to_extract_component_expression(rhs_expression, c), ";");
+				}
+			}
+			lhs_e->need_transpose = true;
+			register_write(lhs_expression);
+		}
+		else
+			CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression);
+	}
+	else if (!lhs_remapped_type && !is_matrix(type) && !transpose)
 	{
+		// Even if the target type is packed, we can directly store to it. We cannot store to packed matrices directly,
+		// since they are declared as array of vectors instead, and we need the fallback path below.
 		CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression);
 	}
 	else
 	{
-		// Special handling when storing to a float[] or float2[] in std140 layout.
+		// Special handling when storing to a remapped physical type.
+		// This is mostly to deal with std140 padded matrices or vectors.
+
+		TypeID physical_type_id = lhs_remapped_type ?
+		                              ID(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID)) :
+		                              type.self;
 
-		uint32_t type_id = get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType);
-		auto &type = get<SPIRType>(type_id);
-		string lhs = to_dereferenced_expression(lhs_expression);
-		string rhs = to_pointer_expression(rhs_expression);
-		uint32_t stride = get_decoration(type_id, DecorationArrayStride);
+		auto &physical_type = get<SPIRType>(physical_type_id);
+
+		string cast_addr_space = "thread";
+		auto *p_var_lhs = maybe_get_backing_variable(lhs_expression);
+		if (p_var_lhs)
+			cast_addr_space = get_type_address_space(get<SPIRType>(p_var_lhs->basetype), lhs_expression);
 
 		if (is_matrix(type))
 		{
+			const char *packed_pfx = lhs_packed_type ? "packed_" : "";
+
 			// Packed matrices are stored as arrays of packed vectors, so we need
 			// to assign the vectors one at a time.
 			// For row-major matrices, we need to transpose the *right-hand* side,
-			// not the left-hand side. Otherwise, the changes will be lost.
-			auto *lhs_e = maybe_get<SPIRExpression>(lhs_expression);
-			auto *rhs_e = maybe_get<SPIRExpression>(rhs_expression);
-			bool transpose = lhs_e && lhs_e->need_transpose;
+			// not the left-hand side.
+
+			// Lots of cases to cover here ...
+
+			bool rhs_transpose = rhs_e && rhs_e->need_transpose;
+			SPIRType write_type = type;
+			string cast_expr;
+
+			// We're dealing with transpose manually.
+			if (rhs_transpose)
+				rhs_e->need_transpose = false;
+
 			if (transpose)
 			{
+				// We're dealing with transpose manually.
 				lhs_e->need_transpose = false;
-				if (rhs_e) rhs_e->need_transpose = !rhs_e->need_transpose;
-				lhs = to_dereferenced_expression(lhs_expression);
-				rhs = to_pointer_expression(rhs_expression);
+				write_type.vecsize = type.columns;
+				write_type.columns = 1;
+
+				if (physical_type.columns != type.columns)
+					cast_expr = join("(", cast_addr_space, " ", packed_pfx, type_to_glsl(write_type), "&)");
+
+				if (rhs_transpose)
+				{
+					// If RHS is also transposed, we can just copy row by row.
+					for (uint32_t i = 0; i < type.vecsize; i++)
+					{
+						statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ",
+						          to_unpacked_row_major_matrix_expression(rhs_expression), "[", i, "];");
+					}
+				}
+				else
+				{
+					auto vector_type = expression_type(rhs_expression);
+					vector_type.vecsize = vector_type.columns;
+					vector_type.columns = 1;
+
+					// Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad,
+					// so pick out individual components instead.
+					for (uint32_t i = 0; i < type.vecsize; i++)
+					{
+						string rhs_row = type_to_glsl_constructor(vector_type) + "(";
+						for (uint32_t j = 0; j < vector_type.vecsize; j++)
+						{
+							rhs_row += join(to_enclosed_unpacked_expression(rhs_expression), "[", j, "][", i, "]");
+							if (j + 1 < vector_type.vecsize)
+								rhs_row += ", ";
+						}
+						rhs_row += ")";
+
+						statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", rhs_row, ";");
+					}
+				}
+
+				// We're dealing with transpose manually.
+				lhs_e->need_transpose = true;
 			}
-			for (uint32_t i = 0; i < type.columns; i++)
-				statement(enclose_expression(lhs), "[", i, "] = ", enclose_expression(rhs), "[", i, "];");
-			if (transpose)
+			else
 			{
-				lhs_e->need_transpose = true;
-				if (rhs_e) rhs_e->need_transpose = !rhs_e->need_transpose;
+				write_type.columns = 1;
+
+				if (physical_type.vecsize != type.vecsize)
+					cast_expr = join("(", cast_addr_space, " ", packed_pfx, type_to_glsl(write_type), "&)");
+
+				if (rhs_transpose)
+				{
+					auto vector_type = expression_type(rhs_expression);
+					vector_type.columns = 1;
+
+					// Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad,
+					// so pick out individual components instead.
+					for (uint32_t i = 0; i < type.columns; i++)
+					{
+						string rhs_row = type_to_glsl_constructor(vector_type) + "(";
+						for (uint32_t j = 0; j < vector_type.vecsize; j++)
+						{
+							// Need to explicitly unpack expression since we've mucked with transpose state.
+							auto unpacked_expr = to_unpacked_row_major_matrix_expression(rhs_expression);
+							rhs_row += join(unpacked_expr, "[", j, "][", i, "]");
+							if (j + 1 < vector_type.vecsize)
+								rhs_row += ", ";
+						}
+						rhs_row += ")";
+
+						statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", rhs_row, ";");
+					}
+				}
+				else
+				{
+					// Copy column-by-column.
+					for (uint32_t i = 0; i < type.columns; i++)
+					{
+						statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ",
+						          to_enclosed_unpacked_expression(rhs_expression), "[", i, "];");
+					}
+				}
+			}
+
+			// We're dealing with transpose manually.
+			if (rhs_transpose)
+				rhs_e->need_transpose = true;
+		}
+		else if (transpose)
+		{
+			lhs_e->need_transpose = false;
+
+			SPIRType write_type = type;
+			write_type.vecsize = 1;
+			write_type.columns = 1;
+
+			// Storing a column to a row-major matrix. Unroll the write.
+			for (uint32_t c = 0; c < type.vecsize; c++)
+			{
+				auto lhs_expr = to_enclosed_expression(lhs_expression);
+				auto column_index = lhs_expr.find_last_of('[');
+				if (column_index != string::npos)
+				{
+					statement("((", cast_addr_space, " ", type_to_glsl(write_type), "*)&",
+					          lhs_expr.insert(column_index, join('[', c, ']', ")")), " = ",
+					          to_extract_component_expression(rhs_expression, c), ";");
+				}
 			}
+
+			lhs_e->need_transpose = true;
 		}
-		else if (is_array(type) && stride == 4 * type.width / 8)
+		else if ((is_matrix(physical_type) || is_array(physical_type)) && physical_type.vecsize > type.vecsize)
 		{
+			assert(type.vecsize >= 1 && type.vecsize <= 3);
+
+			// If we have packed types, we cannot use swizzled stores.
+			// We could technically unroll the store for each element if needed.
+			// When remapping to a std140 physical type, we always get float4,
+			// and the packed decoration should always be removed.
+			assert(!lhs_packed_type);
+
+			string lhs = to_dereferenced_expression(lhs_expression);
+			string rhs = to_pointer_expression(rhs_expression);
+
 			// Unpack the expression so we can store to it with a float or float2.
 			// It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
-			if (is_scalar(type))
-				lhs = enclose_expression(lhs) + ".x";
-			else if (is_vector(type) && type.vecsize == 2)
-				lhs = enclose_expression(lhs) + ".xy";
+			lhs = join("(", cast_addr_space, " ", type_to_glsl(type), "&)", enclose_expression(lhs));
+			if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
+				statement(lhs, " = ", rhs, ";");
 		}
-
-		if (!is_matrix(type))
+		else if (!is_matrix(type))
 		{
+			string lhs = to_dereferenced_expression(lhs_expression);
+			string rhs = to_pointer_expression(rhs_expression);
 			if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
 				statement(lhs, " = ", rhs, ";");
 		}
+
 		register_write(lhs_expression);
 	}
 }
 
+static bool expression_ends_with(const string &expr_str, const std::string &ending)
+{
+	if (expr_str.length() >= ending.length())
+		return (expr_str.compare(expr_str.length() - ending.length(), ending.length(), ending) == 0);
+	else
+		return false;
+}
+
 // Converts the format of the current expression from packed to unpacked,
 // by wrapping the expression in a constructor of the appropriate type.
-string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t packed_type_id)
+// Also, handle special physical ID remapping scenarios, similar to emit_store_statement().
+string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t physical_type_id,
+                                           bool packed, bool row_major)
 {
-	const SPIRType *packed_type = nullptr;
-	uint32_t stride = 0;
-	if (packed_type_id)
+	// Trivial case, nothing to do.
+	if (physical_type_id == 0 && !packed)
+		return expr_str;
+
+	const SPIRType *physical_type = nullptr;
+	if (physical_type_id)
+		physical_type = &get<SPIRType>(physical_type_id);
+
+	static const char *swizzle_lut[] = {
+		".x",
+		".xy",
+		".xyz",
+	};
+
+	if (physical_type && is_vector(*physical_type) && is_array(*physical_type) &&
+	    physical_type->vecsize > type.vecsize && !expression_ends_with(expr_str, swizzle_lut[type.vecsize - 1]))
 	{
-		packed_type = &get<SPIRType>(packed_type_id);
-		stride = get_decoration(packed_type_id, DecorationArrayStride);
+		// std140 array cases for vectors.
+		assert(type.vecsize >= 1 && type.vecsize <= 3);
+		return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1];
+	}
+	else if (physical_type && is_matrix(*physical_type) && is_vector(type) && physical_type->vecsize > type.vecsize)
+	{
+		// Extract column from padded matrix.
+		assert(type.vecsize >= 1 && type.vecsize <= 3);
+		return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1];
 	}
-
-	// float[] and float2[] cases are really just padding, so directly swizzle from the backing float4 instead.
-	if (packed_type && is_array(*packed_type) && is_scalar(*packed_type) && stride == 4 * packed_type->width / 8)
-		return enclose_expression(expr_str) + ".x";
-	else if (packed_type && is_array(*packed_type) && is_vector(*packed_type) && packed_type->vecsize == 2 &&
-	         stride == 4 * packed_type->width / 8)
-		return enclose_expression(expr_str) + ".xy";
 	else if (is_matrix(type))
 	{
 		// Packed matrices are stored as arrays of packed vectors. Unfortunately,
 		// we can't just pass the array straight to the matrix constructor. We have to
 		// pass each vector individually, so that they can be unpacked to normal vectors.
-		if (!packed_type)
-			packed_type = &type;
-		const char *base_type = packed_type->width == 16 ? "half" : "float";
-		string unpack_expr = join(type_to_glsl(*packed_type), "(");
-		for (uint32_t i = 0; i < packed_type->columns; i++)
+		if (!physical_type)
+			physical_type = &type;
+
+		uint32_t vecsize = type.vecsize;
+		uint32_t columns = type.columns;
+		if (row_major)
+			swap(vecsize, columns);
+
+		uint32_t physical_vecsize = row_major ? physical_type->columns : physical_type->vecsize;
+
+		const char *base_type = type.width == 16 ? "half" : "float";
+		string unpack_expr = join(base_type, columns, "x", vecsize, "(");
+
+		const char *load_swiz = "";
+
+		if (physical_vecsize != vecsize)
+			load_swiz = swizzle_lut[vecsize - 1];
+
+		for (uint32_t i = 0; i < columns; i++)
 		{
 			if (i > 0)
 				unpack_expr += ", ";
-			unpack_expr += join(base_type, packed_type->vecsize, "(", expr_str, "[", i, "])");
+
+			if (packed)
+				unpack_expr += join(base_type, physical_vecsize, "(", expr_str, "[", i, "]", ")", load_swiz);
+			else
+				unpack_expr += join(expr_str, "[", i, "]", load_swiz);
 		}
+
 		unpack_expr += ")";
 		return unpack_expr;
 	}
 	else
+	{
 		return join(type_to_glsl(type), "(", expr_str, ")");
+	}
 }
 
 // Emits the file header info
@@ -2706,6 +5191,11 @@ void CompilerMSL::emit_header()
 	// This particular line can be overridden during compilation, so make it a flag and not a pragma line.
 	if (suppress_missing_prototypes)
 		statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
+
+	// Disable warning about missing braces for array<T> template to make arrays a value type
+	if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0)
+		statement("#pragma clang diagnostic ignored \"-Wmissing-braces\"");
+
 	for (auto &pragma : pragma_lines)
 		statement(pragma);
 
@@ -2743,21 +5233,230 @@ void CompilerMSL::add_typedef_line(const string &line)
 		force_recompile();
 }
 
+// Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared
+void CompilerMSL::emit_custom_templates()
+{
+	static const char * const address_spaces[] = {
+		"thread", "constant", "device", "threadgroup", "threadgroup_imageblock", "ray_data", "object_data"
+	};
+
+	for (const auto &spv_func : spv_function_implementations)
+	{
+		switch (spv_func)
+		{
+		case SPVFuncImplUnsafeArray:
+			statement("template<typename T, size_t Num>");
+			statement("struct spvUnsafeArray");
+			begin_scope();
+			statement("T elements[Num ? Num : 1];");
+			statement("");
+			statement("thread T& operator [] (size_t pos) thread");
+			begin_scope();
+			statement("return elements[pos];");
+			end_scope();
+			statement("constexpr const thread T& operator [] (size_t pos) const thread");
+			begin_scope();
+			statement("return elements[pos];");
+			end_scope();
+			statement("");
+			statement("device T& operator [] (size_t pos) device");
+			begin_scope();
+			statement("return elements[pos];");
+			end_scope();
+			statement("constexpr const device T& operator [] (size_t pos) const device");
+			begin_scope();
+			statement("return elements[pos];");
+			end_scope();
+			statement("");
+			statement("constexpr const constant T& operator [] (size_t pos) const constant");
+			begin_scope();
+			statement("return elements[pos];");
+			end_scope();
+			statement("");
+			statement("threadgroup T& operator [] (size_t pos) threadgroup");
+			begin_scope();
+			statement("return elements[pos];");
+			end_scope();
+			statement("constexpr const threadgroup T& operator [] (size_t pos) const threadgroup");
+			begin_scope();
+			statement("return elements[pos];");
+			end_scope();
+			end_scope_decl();
+			statement("");
+			break;
+
+		case SPVFuncImplStorageMatrix:
+			statement("template<typename T, int Cols, int Rows=Cols>");
+			statement("struct spvStorageMatrix");
+			begin_scope();
+			statement("vec<T, Rows> columns[Cols];");
+			statement("");
+			for (size_t method_idx = 0; method_idx < sizeof(address_spaces) / sizeof(address_spaces[0]); ++method_idx)
+			{
+				// Some address spaces require particular features.
+				if (method_idx == 4) // threadgroup_imageblock
+					statement("#ifdef __HAVE_IMAGEBLOCKS__");
+				else if (method_idx == 5) // ray_data
+					statement("#ifdef __HAVE_RAYTRACING__");
+				else if (method_idx == 6) // object_data
+					statement("#ifdef __HAVE_MESH__");
+				const string &method_as = address_spaces[method_idx];
+				statement("spvStorageMatrix() ", method_as, " = default;");
+				if (method_idx != 1) // constant
+				{
+					statement(method_as, " spvStorageMatrix& operator=(initializer_list<vec<T, Rows>> cols) ",
+					          method_as);
+					begin_scope();
+					statement("size_t i;");
+					statement("thread vec<T, Rows>* col;");
+					statement("for (i = 0, col = cols.begin(); i < Cols; ++i, ++col)");
+					statement("    columns[i] = *col;");
+					statement("return *this;");
+					end_scope();
+				}
+				statement("");
+				for (size_t param_idx = 0; param_idx < sizeof(address_spaces) / sizeof(address_spaces[0]); ++param_idx)
+				{
+					if (param_idx != method_idx)
+					{
+						if (param_idx == 4) // threadgroup_imageblock
+							statement("#ifdef __HAVE_IMAGEBLOCKS__");
+						else if (param_idx == 5) // ray_data
+							statement("#ifdef __HAVE_RAYTRACING__");
+						else if (param_idx == 6) // object_data
+							statement("#ifdef __HAVE_MESH__");
+					}
+					const string &param_as = address_spaces[param_idx];
+					statement("spvStorageMatrix(const ", param_as, " matrix<T, Cols, Rows>& m) ", method_as);
+					begin_scope();
+					statement("for (size_t i = 0; i < Cols; ++i)");
+					statement("    columns[i] = m.columns[i];");
+					end_scope();
+					statement("spvStorageMatrix(const ", param_as, " spvStorageMatrix& m) ", method_as, " = default;");
+					if (method_idx != 1) // constant
+					{
+						statement(method_as, " spvStorageMatrix& operator=(const ", param_as,
+						          " matrix<T, Cols, Rows>& m) ", method_as);
+						begin_scope();
+						statement("for (size_t i = 0; i < Cols; ++i)");
+						statement("    columns[i] = m.columns[i];");
+						statement("return *this;");
+						end_scope();
+						statement(method_as, " spvStorageMatrix& operator=(const ", param_as, " spvStorageMatrix& m) ",
+						          method_as, " = default;");
+					}
+					if (param_idx != method_idx && param_idx >= 4)
+						statement("#endif");
+					statement("");
+				}
+				statement("operator matrix<T, Cols, Rows>() const ", method_as);
+				begin_scope();
+				statement("matrix<T, Cols, Rows> m;");
+				statement("for (int i = 0; i < Cols; ++i)");
+				statement("    m.columns[i] = columns[i];");
+				statement("return m;");
+				end_scope();
+				statement("");
+				statement("vec<T, Rows> operator[](size_t idx) const ", method_as);
+				begin_scope();
+				statement("return columns[idx];");
+				end_scope();
+				if (method_idx != 1) // constant
+				{
+					statement(method_as, " vec<T, Rows>& operator[](size_t idx) ", method_as);
+					begin_scope();
+					statement("return columns[idx];");
+					end_scope();
+				}
+				if (method_idx >= 4)
+					statement("#endif");
+				statement("");
+			}
+			end_scope_decl();
+			statement("");
+			statement("template<typename T, int Cols, int Rows>");
+			statement("matrix<T, Rows, Cols> transpose(spvStorageMatrix<T, Cols, Rows> m)");
+			begin_scope();
+			statement("return transpose(matrix<T, Cols, Rows>(m));");
+			end_scope();
+			statement("");
+			statement("typedef spvStorageMatrix<half, 2, 2> spvStorage_half2x2;");
+			statement("typedef spvStorageMatrix<half, 2, 3> spvStorage_half2x3;");
+			statement("typedef spvStorageMatrix<half, 2, 4> spvStorage_half2x4;");
+			statement("typedef spvStorageMatrix<half, 3, 2> spvStorage_half3x2;");
+			statement("typedef spvStorageMatrix<half, 3, 3> spvStorage_half3x3;");
+			statement("typedef spvStorageMatrix<half, 3, 4> spvStorage_half3x4;");
+			statement("typedef spvStorageMatrix<half, 4, 2> spvStorage_half4x2;");
+			statement("typedef spvStorageMatrix<half, 4, 3> spvStorage_half4x3;");
+			statement("typedef spvStorageMatrix<half, 4, 4> spvStorage_half4x4;");
+			statement("typedef spvStorageMatrix<float, 2, 2> spvStorage_float2x2;");
+			statement("typedef spvStorageMatrix<float, 2, 3> spvStorage_float2x3;");
+			statement("typedef spvStorageMatrix<float, 2, 4> spvStorage_float2x4;");
+			statement("typedef spvStorageMatrix<float, 3, 2> spvStorage_float3x2;");
+			statement("typedef spvStorageMatrix<float, 3, 3> spvStorage_float3x3;");
+			statement("typedef spvStorageMatrix<float, 3, 4> spvStorage_float3x4;");
+			statement("typedef spvStorageMatrix<float, 4, 2> spvStorage_float4x2;");
+			statement("typedef spvStorageMatrix<float, 4, 3> spvStorage_float4x3;");
+			statement("typedef spvStorageMatrix<float, 4, 4> spvStorage_float4x4;");
+			statement("");
+			break;
+
+		default:
+			break;
+		}
+	}
+}
+
 // Emits any needed custom function bodies.
+// Metal helper functions must be static force-inline, i.e. static inline __attribute__((always_inline))
+// otherwise they will cause problems when linked together in a single Metallib.
 void CompilerMSL::emit_custom_functions()
 {
-	for (uint32_t i = SPVFuncImplArrayCopyMultidimMax; i >= 2; i--)
+	for (uint32_t i = kArrayCopyMultidimMax; i >= 2; i--)
 		if (spv_function_implementations.count(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i)))
 			spv_function_implementations.insert(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i - 1));
 
-	for (auto &spv_func : spv_function_implementations)
+	if (spv_function_implementations.count(SPVFuncImplDynamicImageSampler))
+	{
+		// Unfortunately, this one needs a lot of the other functions to compile OK.
+		if (!msl_options.supports_msl_version(2))
+			SPIRV_CROSS_THROW(
+			    "spvDynamicImageSampler requires default-constructible texture objects, which require MSL 2.0.");
+		spv_function_implementations.insert(SPVFuncImplForwardArgs);
+		spv_function_implementations.insert(SPVFuncImplTextureSwizzle);
+		if (msl_options.swizzle_texture_samples)
+			spv_function_implementations.insert(SPVFuncImplGatherSwizzle);
+		for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane;
+		     i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++)
+			spv_function_implementations.insert(static_cast<SPVFuncImpl>(i));
+		spv_function_implementations.insert(SPVFuncImplExpandITUFullRange);
+		spv_function_implementations.insert(SPVFuncImplExpandITUNarrowRange);
+		spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT709);
+		spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT601);
+		spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT2020);
+	}
+
+	for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane;
+	     i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++)
+		if (spv_function_implementations.count(static_cast<SPVFuncImpl>(i)))
+			spv_function_implementations.insert(SPVFuncImplForwardArgs);
+
+	if (spv_function_implementations.count(SPVFuncImplTextureSwizzle) ||
+	    spv_function_implementations.count(SPVFuncImplGatherSwizzle) ||
+	    spv_function_implementations.count(SPVFuncImplGatherCompareSwizzle))
+	{
+		spv_function_implementations.insert(SPVFuncImplForwardArgs);
+		spv_function_implementations.insert(SPVFuncImplGetSwizzle);
+	}
+
+	for (const auto &spv_func : spv_function_implementations)
 	{
 		switch (spv_func)
 		{
 		case SPVFuncImplMod:
 			statement("// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()");
 			statement("template<typename Tx, typename Ty>");
-			statement("Tx mod(Tx x, Ty y)");
+			statement("inline Tx mod(Tx x, Ty y)");
 			begin_scope();
 			statement("return x - y * floor(x / y);");
 			end_scope();
@@ -2767,7 +5466,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplRadians:
 			statement("// Implementation of the GLSL radians() function");
 			statement("template<typename T>");
-			statement("T radians(T d)");
+			statement("inline T radians(T d)");
 			begin_scope();
 			statement("return d * T(0.01745329251);");
 			end_scope();
@@ -2777,7 +5476,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplDegrees:
 			statement("// Implementation of the GLSL degrees() function");
 			statement("template<typename T>");
-			statement("T degrees(T r)");
+			statement("inline T degrees(T r)");
 			begin_scope();
 			statement("return r * T(57.2957795131);");
 			end_scope();
@@ -2787,7 +5486,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplFindILsb:
 			statement("// Implementation of the GLSL findLSB() function");
 			statement("template<typename T>");
-			statement("T findLSB(T x)");
+			statement("inline T spvFindLSB(T x)");
 			begin_scope();
 			statement("return select(ctz(x), T(-1), x == T(0));");
 			end_scope();
@@ -2797,7 +5496,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplFindUMsb:
 			statement("// Implementation of the unsigned GLSL findMSB() function");
 			statement("template<typename T>");
-			statement("T findUMSB(T x)");
+			statement("inline T spvFindUMSB(T x)");
 			begin_scope();
 			statement("return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));");
 			end_scope();
@@ -2807,7 +5506,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplFindSMsb:
 			statement("// Implementation of the signed GLSL findMSB() function");
 			statement("template<typename T>");
-			statement("T findSMSB(T x)");
+			statement("inline T spvFindSMSB(T x)");
 			begin_scope();
 			statement("T v = select(x, T(-1) - x, x < T(0));");
 			statement("return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));");
@@ -2818,7 +5517,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplSSign:
 			statement("// Implementation of the GLSL sign() function for integer types");
 			statement("template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>");
-			statement("T sign(T x)");
+			statement("inline T sign(T x)");
 			begin_scope();
 			statement("return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));");
 			end_scope();
@@ -2826,42 +5525,34 @@ void CompilerMSL::emit_custom_functions()
 			break;
 
 		case SPVFuncImplArrayCopy:
-			statement("// Implementation of an array copy function to cover GLSL's ability to copy an array via "
-			          "assignment.");
-			statement("template<typename T, uint N>");
-			statement("void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])");
-			begin_scope();
-			statement("for (uint i = 0; i < N; dst[i] = src[i], i++);");
-			end_scope();
-			statement("");
-
-			statement("template<typename T, uint N>");
-			statement("void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])");
-			begin_scope();
-			statement("for (uint i = 0; i < N; dst[i] = src[i], i++);");
-			end_scope();
-			statement("");
-			break;
-
 		case SPVFuncImplArrayOfArrayCopy2Dim:
 		case SPVFuncImplArrayOfArrayCopy3Dim:
 		case SPVFuncImplArrayOfArrayCopy4Dim:
 		case SPVFuncImplArrayOfArrayCopy5Dim:
 		case SPVFuncImplArrayOfArrayCopy6Dim:
 		{
+			// Unfortunately we cannot template on the address space, so combinatorial explosion it is.
 			static const char *function_name_tags[] = {
-				"FromStack",
-				"FromConstant",
+				"FromConstantToStack",     "FromConstantToThreadGroup", "FromStackToStack",
+				"FromStackToThreadGroup",  "FromThreadGroupToStack",    "FromThreadGroupToThreadGroup",
+				"FromDeviceToDevice",      "FromConstantToDevice",      "FromStackToDevice",
+				"FromThreadGroupToDevice", "FromDeviceToStack",         "FromDeviceToThreadGroup",
 			};
 
 			static const char *src_address_space[] = {
-				"thread const",
-				"constant",
+				"constant",          "constant",          "thread const", "thread const",
+				"threadgroup const", "threadgroup const", "device const", "constant",
+				"thread const",      "threadgroup const", "device const", "device const",
+			};
+
+			static const char *dst_address_space[] = {
+				"thread", "threadgroup", "thread", "threadgroup", "thread", "threadgroup",
+				"device", "device",      "device", "device",      "thread", "threadgroup",
 			};
 
-			for (uint32_t variant = 0; variant < 2; variant++)
+			for (uint32_t variant = 0; variant < 12; variant++)
 			{
-				uint32_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase;
+				uint8_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase;
 				string tmp = "template<typename T";
 				for (uint8_t i = 0; i < dimensions; i++)
 				{
@@ -2879,13 +5570,18 @@ void CompilerMSL::emit_custom_functions()
 					array_arg += "]";
 				}
 
-				statement("void spvArrayCopy", function_name_tags[variant], dimensions, "(thread T (&dst)", array_arg,
-				          ", ", src_address_space[variant], " T (&src)", array_arg, ")");
+				statement("inline void spvArrayCopy", function_name_tags[variant], dimensions, "(",
+				          dst_address_space[variant], " T (&dst)", array_arg, ", ", src_address_space[variant],
+				          " T (&src)", array_arg, ")");
 
 				begin_scope();
 				statement("for (uint i = 0; i < A; i++)");
 				begin_scope();
-				statement("spvArrayCopy", function_name_tags[variant], dimensions - 1, "(dst[i], src[i]);");
+
+				if (dimensions == 1)
+					statement("dst[i] = src[i];");
+				else
+					statement("spvArrayCopy", function_name_tags[variant], dimensions - 1, "(dst[i], src[i]);");
 				end_scope();
 				end_scope();
 				statement("");
@@ -2893,28 +5589,200 @@ void CompilerMSL::emit_custom_functions()
 			break;
 		}
 
+		// Support for Metal 2.1's new texture_buffer type.
 		case SPVFuncImplTexelBufferCoords:
 		{
-			string tex_width_str = convert_to_string(msl_options.texel_buffer_texture_width);
-			statement("// Returns 2D texture coords corresponding to 1D texel buffer coords");
-			statement("uint2 spvTexelBufferCoord(uint tc)");
-			begin_scope();
-			statement(join("return uint2(tc % ", tex_width_str, ", tc / ", tex_width_str, ");"));
-			end_scope();
+			if (msl_options.texel_buffer_texture_width > 0)
+			{
+				string tex_width_str = convert_to_string(msl_options.texel_buffer_texture_width);
+				statement("// Returns 2D texture coords corresponding to 1D texel buffer coords");
+				statement(force_inline);
+				statement("uint2 spvTexelBufferCoord(uint tc)");
+				begin_scope();
+				statement(join("return uint2(tc % ", tex_width_str, ", tc / ", tex_width_str, ");"));
+				end_scope();
+				statement("");
+			}
+			else
+			{
+				statement("// Returns 2D texture coords corresponding to 1D texel buffer coords");
+				statement(
+				    "#define spvTexelBufferCoord(tc, tex) uint2((tc) % (tex).get_width(), (tc) / (tex).get_width())");
+				statement("");
+			}
+			break;
+		}
+
+		// Emulate texture2D atomic operations
+		case SPVFuncImplImage2DAtomicCoords:
+		{
+			if (msl_options.supports_msl_version(1, 2))
+			{
+				statement("// The required alignment of a linear texture of R32Uint format.");
+				statement("constant uint spvLinearTextureAlignmentOverride [[function_constant(",
+				          msl_options.r32ui_alignment_constant_id, ")]];");
+				statement("constant uint spvLinearTextureAlignment = ",
+				          "is_function_constant_defined(spvLinearTextureAlignmentOverride) ? ",
+				          "spvLinearTextureAlignmentOverride : ", msl_options.r32ui_linear_texture_alignment, ";");
+			}
+			else
+			{
+				statement("// The required alignment of a linear texture of R32Uint format.");
+				statement("constant uint spvLinearTextureAlignment = ", msl_options.r32ui_linear_texture_alignment,
+				          ";");
+			}
+			statement("// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics");
+			statement("#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + ",
+			          " spvLinearTextureAlignment / 4 - 1) & ~(",
+			          " spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)");
 			statement("");
 			break;
 		}
 
+		// "fadd" intrinsic support
+		case SPVFuncImplFAdd:
+			statement("template<typename T>");
+			statement("[[clang::optnone]] T spvFAdd(T l, T r)");
+			begin_scope();
+			statement("return fma(T(1), l, r);");
+			end_scope();
+			statement("");
+			break;
+
+		// "fsub" intrinsic support
+		case SPVFuncImplFSub:
+			statement("template<typename T>");
+			statement("[[clang::optnone]] T spvFSub(T l, T r)");
+			begin_scope();
+			statement("return fma(T(-1), r, l);");
+			end_scope();
+			statement("");
+			break;
+
+		// "fmul' intrinsic support
+		case SPVFuncImplFMul:
+			statement("template<typename T>");
+			statement("[[clang::optnone]] T spvFMul(T l, T r)");
+			begin_scope();
+			statement("return fma(l, r, T(0));");
+			end_scope();
+			statement("");
+
+			statement("template<typename T, int Cols, int Rows>");
+			statement("[[clang::optnone]] vec<T, Cols> spvFMulVectorMatrix(vec<T, Rows> v, matrix<T, Cols, Rows> m)");
+			begin_scope();
+			statement("vec<T, Cols> res = vec<T, Cols>(0);");
+			statement("for (uint i = Rows; i > 0; --i)");
+			begin_scope();
+			statement("vec<T, Cols> tmp(0);");
+			statement("for (uint j = 0; j < Cols; ++j)");
+			begin_scope();
+			statement("tmp[j] = m[j][i - 1];");
+			end_scope();
+			statement("res = fma(tmp, vec<T, Cols>(v[i - 1]), res);");
+			end_scope();
+			statement("return res;");
+			end_scope();
+			statement("");
+
+			statement("template<typename T, int Cols, int Rows>");
+			statement("[[clang::optnone]] vec<T, Rows> spvFMulMatrixVector(matrix<T, Cols, Rows> m, vec<T, Cols> v)");
+			begin_scope();
+			statement("vec<T, Rows> res = vec<T, Rows>(0);");
+			statement("for (uint i = Cols; i > 0; --i)");
+			begin_scope();
+			statement("res = fma(m[i - 1], vec<T, Rows>(v[i - 1]), res);");
+			end_scope();
+			statement("return res;");
+			end_scope();
+			statement("");
+
+			statement("template<typename T, int LCols, int LRows, int RCols, int RRows>");
+			statement("[[clang::optnone]] matrix<T, RCols, LRows> spvFMulMatrixMatrix(matrix<T, LCols, LRows> l, matrix<T, RCols, RRows> r)");
+			begin_scope();
+			statement("matrix<T, RCols, LRows> res;");
+			statement("for (uint i = 0; i < RCols; i++)");
+			begin_scope();
+			statement("vec<T, RCols> tmp(0);");
+			statement("for (uint j = 0; j < LCols; j++)");
+			begin_scope();
+			statement("tmp = fma(vec<T, RCols>(r[i][j]), l[j], tmp);");
+			end_scope();
+			statement("res[i] = tmp;");
+			end_scope();
+			statement("return res;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplQuantizeToF16:
+			// Ensure fast-math is disabled to match Vulkan results.
+			// SpvHalfTypeSelector is used to match the half* template type to the float* template type.
+			// Depending on GPU, MSL does not always flush converted subnormal halfs to zero,
+			// as required by OpQuantizeToF16, so check for subnormals and flush them to zero.
+			statement("template <typename F> struct SpvHalfTypeSelector;");
+			statement("template <> struct SpvHalfTypeSelector<float> { public: using H = half; };");
+			statement("template<uint N> struct SpvHalfTypeSelector<vec<float, N>> { using H = vec<half, N>; };");
+			statement("template<typename F, typename H = typename SpvHalfTypeSelector<F>::H>");
+			statement("[[clang::optnone]] F spvQuantizeToF16(F fval)");
+			begin_scope();
+			statement("H hval = H(fval);");
+			statement("hval = select(copysign(H(0), hval), hval, isnormal(hval) || isinf(hval) || isnan(hval));");
+			statement("return F(hval);");
+			end_scope();
+			statement("");
+			break;
+
+		// Emulate texturecube_array with texture2d_array for iOS where this type is not available
+		case SPVFuncImplCubemapTo2DArrayFace:
+			statement(force_inline);
+			statement("float3 spvCubemapTo2DArrayFace(float3 P)");
+			begin_scope();
+			statement("float3 Coords = abs(P.xyz);");
+			statement("float CubeFace = 0;");
+			statement("float ProjectionAxis = 0;");
+			statement("float u = 0;");
+			statement("float v = 0;");
+			statement("if (Coords.x >= Coords.y && Coords.x >= Coords.z)");
+			begin_scope();
+			statement("CubeFace = P.x >= 0 ? 0 : 1;");
+			statement("ProjectionAxis = Coords.x;");
+			statement("u = P.x >= 0 ? -P.z : P.z;");
+			statement("v = -P.y;");
+			end_scope();
+			statement("else if (Coords.y >= Coords.x && Coords.y >= Coords.z)");
+			begin_scope();
+			statement("CubeFace = P.y >= 0 ? 2 : 3;");
+			statement("ProjectionAxis = Coords.y;");
+			statement("u = P.x;");
+			statement("v = P.y >= 0 ? P.z : -P.z;");
+			end_scope();
+			statement("else");
+			begin_scope();
+			statement("CubeFace = P.z >= 0 ? 4 : 5;");
+			statement("ProjectionAxis = Coords.z;");
+			statement("u = P.z >= 0 ? P.x : -P.x;");
+			statement("v = -P.y;");
+			end_scope();
+			statement("u = 0.5 * (u/ProjectionAxis + 1);");
+			statement("v = 0.5 * (v/ProjectionAxis + 1);");
+			statement("return float3(u, v, CubeFace);");
+			end_scope();
+			statement("");
+			break;
+
 		case SPVFuncImplInverse4x4:
 			statement("// Returns the determinant of a 2x2 matrix.");
-			statement("inline float spvDet2x2(float a1, float a2, float b1, float b2)");
+			statement(force_inline);
+			statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
 			begin_scope();
 			statement("return a1 * b2 - b1 * a2;");
 			end_scope();
 			statement("");
 
 			statement("// Returns the determinant of a 3x3 matrix.");
-			statement("inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, "
+			statement(force_inline);
+			statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, "
 			          "float c2, float c3)");
 			begin_scope();
 			statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, "
@@ -2923,6 +5791,7 @@ void CompilerMSL::emit_custom_functions()
 			statement("");
 			statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
 			statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
+			statement(force_inline);
 			statement("float4x4 spvInverse4x4(float4x4 m)");
 			begin_scope();
 			statement("float4x4 adj;	// The adjoint matrix (inverse after dividing by determinant)");
@@ -2979,7 +5848,8 @@ void CompilerMSL::emit_custom_functions()
 			if (spv_function_implementations.count(SPVFuncImplInverse4x4) == 0)
 			{
 				statement("// Returns the determinant of a 2x2 matrix.");
-				statement("inline float spvDet2x2(float a1, float a2, float b1, float b2)");
+				statement(force_inline);
+				statement("float spvDet2x2(float a1, float a2, float b1, float b2)");
 				begin_scope();
 				statement("return a1 * b2 - b1 * a2;");
 				end_scope();
@@ -2988,6 +5858,7 @@ void CompilerMSL::emit_custom_functions()
 
 			statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
 			statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
+			statement(force_inline);
 			statement("float3x3 spvInverse3x3(float3x3 m)");
 			begin_scope();
 			statement("float3x3 adj;	// The adjoint matrix (inverse after dividing by determinant)");
@@ -3018,6 +5889,7 @@ void CompilerMSL::emit_custom_functions()
 		case SPVFuncImplInverse2x2:
 			statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical");
 			statement("// adjoint and dividing by the determinant. The contents of the matrix are changed.");
+			statement(force_inline);
 			statement("float2x2 spvInverse2x2(float2x2 m)");
 			begin_scope();
 			statement("float2x2 adj;	// The adjoint matrix (inverse after dividing by determinant)");
@@ -3039,65 +5911,24 @@ void CompilerMSL::emit_custom_functions()
 			statement("");
 			break;
 
-		case SPVFuncImplRowMajor2x3:
-			statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
-			statement("float2x3 spvConvertFromRowMajor2x3(float2x3 m)");
-			begin_scope();
-			statement("return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));");
-			end_scope();
-			statement("");
-			break;
-
-		case SPVFuncImplRowMajor2x4:
-			statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
-			statement("float2x4 spvConvertFromRowMajor2x4(float2x4 m)");
-			begin_scope();
-			statement("return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], "
-			          "m[1][3]));");
-			end_scope();
-			statement("");
-			break;
-
-		case SPVFuncImplRowMajor3x2:
-			statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
-			statement("float3x2 spvConvertFromRowMajor3x2(float3x2 m)");
-			begin_scope();
-			statement("return float3x2(float2(m[0][0], m[1][1]), float2(m[0][1], m[2][0]), float2(m[1][0], m[2][1]));");
-			end_scope();
-			statement("");
-			break;
-
-		case SPVFuncImplRowMajor3x4:
-			statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
-			statement("float3x4 spvConvertFromRowMajor3x4(float3x4 m)");
-			begin_scope();
-			statement("return float3x4(float4(m[0][0], m[0][3], m[1][2], m[2][1]), float4(m[0][1], m[1][0], m[1][3], "
-			          "m[2][2]), float4(m[0][2], m[1][1], m[2][0], m[2][3]));");
-			end_scope();
-			statement("");
-			break;
-
-		case SPVFuncImplRowMajor4x2:
-			statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
-			statement("float4x2 spvConvertFromRowMajor4x2(float4x2 m)");
+		case SPVFuncImplForwardArgs:
+			statement("template<typename T> struct spvRemoveReference { typedef T type; };");
+			statement("template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };");
+			statement("template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };");
+			statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
+			          "spvRemoveReference<T>::type& x)");
 			begin_scope();
-			statement("return float4x2(float2(m[0][0], m[2][0]), float2(m[0][1], m[2][1]), float2(m[1][0], m[3][0]), "
-			          "float2(m[1][1], m[3][1]));");
+			statement("return static_cast<thread T&&>(x);");
 			end_scope();
-			statement("");
-			break;
-
-		case SPVFuncImplRowMajor4x3:
-			statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization.");
-			statement("float4x3 spvConvertFromRowMajor4x3(float4x3 m)");
+			statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
+			          "spvRemoveReference<T>::type&& x)");
 			begin_scope();
-			statement("return float4x3(float3(m[0][0], m[1][1], m[2][2]), float3(m[0][1], m[1][2], m[3][0]), "
-			          "float3(m[0][2], m[2][0], m[3][1]), float3(m[1][0], m[2][1], m[3][2]));");
+			statement("return static_cast<thread T&&>(x);");
 			end_scope();
 			statement("");
 			break;
 
-		case SPVFuncImplTextureSwizzle:
+		case SPVFuncImplGetSwizzle:
 			statement("enum class spvSwizzle : uint");
 			begin_scope();
 			statement("none = 0,");
@@ -3109,20 +5940,6 @@ void CompilerMSL::emit_custom_functions()
 			statement("alpha");
 			end_scope_decl();
 			statement("");
-			statement("template<typename T> struct spvRemoveReference { typedef T type; };");
-			statement("template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };");
-			statement("template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };");
-			statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
-			          "spvRemoveReference<T>::type& x)");
-			begin_scope();
-			statement("return static_cast<thread T&&>(x);");
-			end_scope();
-			statement("template<typename T> inline constexpr thread T&& spvForward(thread typename "
-			          "spvRemoveReference<T>::type&& x)");
-			begin_scope();
-			statement("return static_cast<thread T&&>(x);");
-			end_scope();
-			statement("");
 			statement("template<typename T>");
 			statement("inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)");
 			begin_scope();
@@ -3145,6 +5962,9 @@ void CompilerMSL::emit_custom_functions()
 			end_scope();
 			end_scope();
 			statement("");
+			break;
+
+		case SPVFuncImplTextureSwizzle:
 			statement("// Wrapper function that swizzles texture samples and fetches.");
 			statement("template<typename T>");
 			statement("inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)");
@@ -3163,11 +5983,14 @@ void CompilerMSL::emit_custom_functions()
 			statement("return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;");
 			end_scope();
 			statement("");
+			break;
+
+		case SPVFuncImplGatherSwizzle:
 			statement("// Wrapper function that swizzles texture gathers.");
-			statement("template<typename T, typename Tex, typename... Ts>");
-			statement(
-			    "inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) "
-			    "METAL_CONST_ARG(c)");
+			statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
+			          "typename... Ts>");
+			statement("inline vec<T, 4> spvGatherSwizzle(const thread Tex<T>& t, sampler s, "
+			          "uint sw, component c, Ts... params) METAL_CONST_ARG(c)");
 			begin_scope();
 			statement("if (sw)");
 			begin_scope();
@@ -3204,10 +6027,14 @@ void CompilerMSL::emit_custom_functions()
 			end_scope();
 			end_scope();
 			statement("");
+			break;
+
+		case SPVFuncImplGatherCompareSwizzle:
 			statement("// Wrapper function that swizzles depth texture gathers.");
-			statement("template<typename T, typename Tex, typename... Ts>");
-			statement(
-			    "inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) ");
+			statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
+			          "typename... Ts>");
+			statement("inline vec<T, 4> spvGatherCompareSwizzle(const thread Tex<T>& t, sampler "
+			          "s, uint sw, Ts... params) ");
 			begin_scope();
 			statement("if (sw)");
 			begin_scope();
@@ -3230,16 +6057,89 @@ void CompilerMSL::emit_custom_functions()
 			statement("");
 			break;
 
+		case SPVFuncImplSubgroupBroadcast:
+			// Metal doesn't allow broadcasting boolean values directly, but we can work around that by broadcasting
+			// them as integers.
+			statement("template<typename T>");
+			statement("inline T spvSubgroupBroadcast(T value, ushort lane)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return quad_broadcast(value, lane);");
+			else
+				statement("return simd_broadcast(value, lane);");
+			end_scope();
+			statement("");
+			statement("template<>");
+			statement("inline bool spvSubgroupBroadcast(bool value, ushort lane)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return !!quad_broadcast((ushort)value, lane);");
+			else
+				statement("return !!simd_broadcast((ushort)value, lane);");
+			end_scope();
+			statement("");
+			statement("template<uint N>");
+			statement("inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);");
+			else
+				statement("return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane);");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplSubgroupBroadcastFirst:
+			statement("template<typename T>");
+			statement("inline T spvSubgroupBroadcastFirst(T value)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return quad_broadcast_first(value);");
+			else
+				statement("return simd_broadcast_first(value);");
+			end_scope();
+			statement("");
+			statement("template<>");
+			statement("inline bool spvSubgroupBroadcastFirst(bool value)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return !!quad_broadcast_first((ushort)value);");
+			else
+				statement("return !!simd_broadcast_first((ushort)value);");
+			end_scope();
+			statement("");
+			statement("template<uint N>");
+			statement("inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return (vec<bool, N>)quad_broadcast_first((vec<ushort, N>)value);");
+			else
+				statement("return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value);");
+			end_scope();
+			statement("");
+			break;
+
 		case SPVFuncImplSubgroupBallot:
 			statement("inline uint4 spvSubgroupBallot(bool value)");
 			begin_scope();
-			statement("simd_vote vote = simd_ballot(value);");
-			statement("// simd_ballot() returns a 64-bit integer-like object, but");
-			statement("// SPIR-V callers expect a uint4. We must convert.");
-			statement("// FIXME: This won't include higher bits if Apple ever supports");
-			statement("// 128 lanes in an SIMD-group.");
-			statement("return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> "
-			          "32) & 0xFFFFFFFF), 0, 0);");
+			if (msl_options.use_quadgroup_operation())
+			{
+				statement("return uint4((quad_vote::vote_t)quad_ballot(value), 0, 0, 0);");
+			}
+			else if (msl_options.is_ios())
+			{
+				// The current simd_vote on iOS uses a 32-bit integer-like object.
+				statement("return uint4((simd_vote::vote_t)simd_ballot(value), 0, 0, 0);");
+			}
+			else
+			{
+				statement("simd_vote vote = simd_ballot(value);");
+				statement("// simd_ballot() returns a 64-bit integer-like object, but");
+				statement("// SPIR-V callers expect a uint4. We must convert.");
+				statement("// FIXME: This won't include higher bits if Apple ever supports");
+				statement("// 128 lanes in an SIMD-group.");
+				statement("return uint4(as_type<uint2>((simd_vote::vote_t)vote), 0, 0);");
+			}
 			end_scope();
 			statement("");
 			break;
@@ -3253,8 +6153,18 @@ void CompilerMSL::emit_custom_functions()
 			break;
 
 		case SPVFuncImplSubgroupBallotFindLSB:
-			statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot)");
+			statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)");
 			begin_scope();
+			if (msl_options.is_ios())
+			{
+				statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));");
+			}
+			else
+			{
+				statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
+				          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
+			}
+			statement("ballot &= mask;");
 			statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + "
 			          "ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);");
 			end_scope();
@@ -3262,8 +6172,18 @@ void CompilerMSL::emit_custom_functions()
 			break;
 
 		case SPVFuncImplSubgroupBallotFindMSB:
-			statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot)");
+			statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)");
 			begin_scope();
+			if (msl_options.is_ios())
+			{
+				statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));");
+			}
+			else
+			{
+				statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
+				          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
+			}
+			statement("ballot &= mask;");
 			statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - "
 			          "(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), "
 			          "ballot.z == 0), ballot.w == 0);");
@@ -3272,24 +6192,52 @@ void CompilerMSL::emit_custom_functions()
 			break;
 
 		case SPVFuncImplSubgroupBallotBitCount:
-			statement("inline uint spvSubgroupBallotBitCount(uint4 ballot)");
+			statement("inline uint spvPopCount4(uint4 ballot)");
 			begin_scope();
 			statement("return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);");
 			end_scope();
 			statement("");
+			statement("inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)");
+			begin_scope();
+			if (msl_options.is_ios())
+			{
+				statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));");
+			}
+			else
+			{
+				statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), "
+				          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));");
+			}
+			statement("return spvPopCount4(ballot & mask);");
+			end_scope();
+			statement("");
 			statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
 			begin_scope();
-			statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), "
-			          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), "
-			          "uint2(0));");
-			statement("return spvSubgroupBallotBitCount(ballot & mask);");
+			if (msl_options.is_ios())
+			{
+				statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));");
+			}
+			else
+			{
+				statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), "
+				          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), "
+				          "uint2(0));");
+			}
+			statement("return spvPopCount4(ballot & mask);");
 			end_scope();
 			statement("");
 			statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)");
 			begin_scope();
-			statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), "
-			          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));");
-			statement("return spvSubgroupBallotBitCount(ballot & mask);");
+			if (msl_options.is_ios())
+			{
+				statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0));");
+			}
+			else
+			{
+				statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), "
+				          "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));");
+			}
+			statement("return spvPopCount4(ballot & mask);");
 			end_scope();
 			statement("");
 			break;
@@ -3302,70 +6250,1013 @@ void CompilerMSL::emit_custom_functions()
 			statement("template<typename T>");
 			statement("inline bool spvSubgroupAllEqual(T value)");
 			begin_scope();
-			statement("return simd_all(value == simd_broadcast_first(value));");
+			if (msl_options.use_quadgroup_operation())
+				statement("return quad_all(all(value == quad_broadcast_first(value)));");
+			else
+				statement("return simd_all(all(value == simd_broadcast_first(value)));");
 			end_scope();
 			statement("");
 			statement("template<>");
 			statement("inline bool spvSubgroupAllEqual(bool value)");
 			begin_scope();
-			statement("return simd_all(value) || !simd_any(value);");
+			if (msl_options.use_quadgroup_operation())
+				statement("return quad_all(value) || !quad_any(value);");
+			else
+				statement("return simd_all(value) || !simd_any(value);");
+			end_scope();
+			statement("");
+			statement("template<uint N>");
+			statement("inline bool spvSubgroupAllEqual(vec<bool, N> value)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return quad_all(all(value == (vec<bool, N>)quad_broadcast_first((vec<ushort, N>)value)));");
+			else
+				statement("return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value)));");
 			end_scope();
 			statement("");
 			break;
 
-		case SPVFuncImplReflectScalar:
-			// Metal does not support scalar versions of these functions.
+		case SPVFuncImplSubgroupShuffle:
 			statement("template<typename T>");
-			statement("inline T spvReflect(T i, T n)");
+			statement("inline T spvSubgroupShuffle(T value, ushort lane)");
 			begin_scope();
-			statement("return i - T(2) * i * n * n;");
+			if (msl_options.use_quadgroup_operation())
+				statement("return quad_shuffle(value, lane);");
+			else
+				statement("return simd_shuffle(value, lane);");
+			end_scope();
+			statement("");
+			statement("template<>");
+			statement("inline bool spvSubgroupShuffle(bool value, ushort lane)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return !!quad_shuffle((ushort)value, lane);");
+			else
+				statement("return !!simd_shuffle((ushort)value, lane);");
+			end_scope();
+			statement("");
+			statement("template<uint N>");
+			statement("inline vec<bool, N> spvSubgroupShuffle(vec<bool, N> value, ushort lane)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return (vec<bool, N>)quad_shuffle((vec<ushort, N>)value, lane);");
+			else
+				statement("return (vec<bool, N>)simd_shuffle((vec<ushort, N>)value, lane);");
 			end_scope();
 			statement("");
 			break;
 
-		case SPVFuncImplRefractScalar:
-			// Metal does not support scalar versions of these functions.
+		case SPVFuncImplSubgroupShuffleXor:
 			statement("template<typename T>");
-			statement("inline T spvRefract(T i, T n, T eta)");
-			begin_scope();
-			statement("T NoI = n * i;");
-			statement("T NoI2 = NoI * NoI;");
-			statement("T k = T(1) - eta * eta * (T(1) - NoI2);");
-			statement("if (k < T(0))");
+			statement("inline T spvSubgroupShuffleXor(T value, ushort mask)");
 			begin_scope();
-			statement("return T(0);");
+			if (msl_options.use_quadgroup_operation())
+				statement("return quad_shuffle_xor(value, mask);");
+			else
+				statement("return simd_shuffle_xor(value, mask);");
 			end_scope();
-			statement("else");
+			statement("");
+			statement("template<>");
+			statement("inline bool spvSubgroupShuffleXor(bool value, ushort mask)");
 			begin_scope();
-			statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
+			if (msl_options.use_quadgroup_operation())
+				statement("return !!quad_shuffle_xor((ushort)value, mask);");
+			else
+				statement("return !!simd_shuffle_xor((ushort)value, mask);");
 			end_scope();
+			statement("");
+			statement("template<uint N>");
+			statement("inline vec<bool, N> spvSubgroupShuffleXor(vec<bool, N> value, ushort mask)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, mask);");
+			else
+				statement("return (vec<bool, N>)simd_shuffle_xor((vec<ushort, N>)value, mask);");
 			end_scope();
 			statement("");
 			break;
 
-		default:
+		case SPVFuncImplSubgroupShuffleUp:
+			statement("template<typename T>");
+			statement("inline T spvSubgroupShuffleUp(T value, ushort delta)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return quad_shuffle_up(value, delta);");
+			else
+				statement("return simd_shuffle_up(value, delta);");
+			end_scope();
+			statement("");
+			statement("template<>");
+			statement("inline bool spvSubgroupShuffleUp(bool value, ushort delta)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return !!quad_shuffle_up((ushort)value, delta);");
+			else
+				statement("return !!simd_shuffle_up((ushort)value, delta);");
+			end_scope();
+			statement("");
+			statement("template<uint N>");
+			statement("inline vec<bool, N> spvSubgroupShuffleUp(vec<bool, N> value, ushort delta)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return (vec<bool, N>)quad_shuffle_up((vec<ushort, N>)value, delta);");
+			else
+				statement("return (vec<bool, N>)simd_shuffle_up((vec<ushort, N>)value, delta);");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplSubgroupShuffleDown:
+			statement("template<typename T>");
+			statement("inline T spvSubgroupShuffleDown(T value, ushort delta)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return quad_shuffle_down(value, delta);");
+			else
+				statement("return simd_shuffle_down(value, delta);");
+			end_scope();
+			statement("");
+			statement("template<>");
+			statement("inline bool spvSubgroupShuffleDown(bool value, ushort delta)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return !!quad_shuffle_down((ushort)value, delta);");
+			else
+				statement("return !!simd_shuffle_down((ushort)value, delta);");
+			end_scope();
+			statement("");
+			statement("template<uint N>");
+			statement("inline vec<bool, N> spvSubgroupShuffleDown(vec<bool, N> value, ushort delta)");
+			begin_scope();
+			if (msl_options.use_quadgroup_operation())
+				statement("return (vec<bool, N>)quad_shuffle_down((vec<ushort, N>)value, delta);");
+			else
+				statement("return (vec<bool, N>)simd_shuffle_down((vec<ushort, N>)value, delta);");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplQuadBroadcast:
+			statement("template<typename T>");
+			statement("inline T spvQuadBroadcast(T value, uint lane)");
+			begin_scope();
+			statement("return quad_broadcast(value, lane);");
+			end_scope();
+			statement("");
+			statement("template<>");
+			statement("inline bool spvQuadBroadcast(bool value, uint lane)");
+			begin_scope();
+			statement("return !!quad_broadcast((ushort)value, lane);");
+			end_scope();
+			statement("");
+			statement("template<uint N>");
+			statement("inline vec<bool, N> spvQuadBroadcast(vec<bool, N> value, uint lane)");
+			begin_scope();
+			statement("return (vec<bool, N>)quad_broadcast((vec<ushort, N>)value, lane);");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplQuadSwap:
+			// We can implement this easily based on the following table giving
+			// the target lane ID from the direction and current lane ID:
+			//        Direction
+			//      | 0 | 1 | 2 |
+			//   ---+---+---+---+
+			// L 0  | 1   2   3
+			// a 1  | 0   3   2
+			// n 2  | 3   0   1
+			// e 3  | 2   1   0
+			// Notice that target = source ^ (direction + 1).
+			statement("template<typename T>");
+			statement("inline T spvQuadSwap(T value, uint dir)");
+			begin_scope();
+			statement("return quad_shuffle_xor(value, dir + 1);");
+			end_scope();
+			statement("");
+			statement("template<>");
+			statement("inline bool spvQuadSwap(bool value, uint dir)");
+			begin_scope();
+			statement("return !!quad_shuffle_xor((ushort)value, dir + 1);");
+			end_scope();
+			statement("");
+			statement("template<uint N>");
+			statement("inline vec<bool, N> spvQuadSwap(vec<bool, N> value, uint dir)");
+			begin_scope();
+			statement("return (vec<bool, N>)quad_shuffle_xor((vec<ushort, N>)value, dir + 1);");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplReflectScalar:
+			// Metal does not support scalar versions of these functions.
+			// Ensure fast-math is disabled to match Vulkan results.
+			statement("template<typename T>");
+			statement("[[clang::optnone]] T spvReflect(T i, T n)");
+			begin_scope();
+			statement("return i - T(2) * i * n * n;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplRefractScalar:
+			// Metal does not support scalar versions of these functions.
+			statement("template<typename T>");
+			statement("inline T spvRefract(T i, T n, T eta)");
+			begin_scope();
+			statement("T NoI = n * i;");
+			statement("T NoI2 = NoI * NoI;");
+			statement("T k = T(1) - eta * eta * (T(1) - NoI2);");
+			statement("if (k < T(0))");
+			begin_scope();
+			statement("return T(0);");
+			end_scope();
+			statement("else");
+			begin_scope();
+			statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
+			end_scope();
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplFaceForwardScalar:
+			// Metal does not support scalar versions of these functions.
+			statement("template<typename T>");
+			statement("inline T spvFaceForward(T n, T i, T nref)");
+			begin_scope();
+			statement("return i * nref < T(0) ? n : -n;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructNearest2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructNearest(texture2d<T> plane0, texture2d<T> plane1, sampler "
+			          "samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("ycbcr.br = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).rg;");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructNearest3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructNearest(texture2d<T> plane0, texture2d<T> plane1, "
+			          "texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("ycbcr.b = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("ycbcr.r = plane2.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear422CositedEven2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear422CositedEven(texture2d<T> plane0, texture2d<T> "
+			          "plane1, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("if (fract(coord.x * plane1.get_width()) != 0.0)");
+			begin_scope();
+			statement("ycbcr.br = vec<T, 2>(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).rg);");
+			end_scope();
+			statement("else");
+			begin_scope();
+			statement("ycbcr.br = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).rg;");
+			end_scope();
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear422CositedEven3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear422CositedEven(texture2d<T> plane0, texture2d<T> "
+			          "plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("if (fract(coord.x * plane1.get_width()) != 0.0)");
+			begin_scope();
+			statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).r);");
+			statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), 0.5).r);");
+			end_scope();
+			statement("else");
+			begin_scope();
+			statement("ycbcr.b = plane1.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("ycbcr.r = plane2.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			end_scope();
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear422Midpoint2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear422Midpoint(texture2d<T> plane0, texture2d<T> "
+			          "plane1, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);");
+			statement("ycbcr.br = vec<T, 2>(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).rg);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear422Midpoint3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear422Midpoint(texture2d<T> plane0, texture2d<T> "
+			          "plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);");
+			statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).r);");
+			statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., offs), 0.25).r);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d<T> plane0, "
+			          "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);");
+			statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d<T> plane0, "
+			          "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);");
+			statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYCositedEven(texture2d<T> plane0, "
+			          "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
+			          "0)) * 0.5);");
+			statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYCositedEven(texture2d<T> plane0, "
+			          "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
+			          "0)) * 0.5);");
+			statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d<T> plane0, "
+			          "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, "
+			          "0.5)) * 0.5);");
+			statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d<T> plane0, "
+			          "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, "
+			          "0.5)) * 0.5);");
+			statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYMidpoint(texture2d<T> plane0, "
+			          "texture2d<T> plane1, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
+			          "0.5)) * 0.5);");
+			statement("ycbcr.br = vec<T, 2>(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).rg);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane:
+			statement("template<typename T, typename... LodOptions>");
+			statement("inline vec<T, 4> spvChromaReconstructLinear420XMidpointYMidpoint(texture2d<T> plane0, "
+			          "texture2d<T> plane1, texture2d<T> plane2, sampler samp, float2 coord, LodOptions... options)");
+			begin_scope();
+			statement("vec<T, 4> ycbcr = vec<T, 4>(0, 0, 0, 1);");
+			statement("ycbcr.g = plane0.sample(samp, coord, spvForward<LodOptions>(options)...).r;");
+			statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, "
+			          "0.5)) * 0.5);");
+			statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane1.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)...), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 0)), ab.x), "
+			          "mix(plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(0, 1)), "
+			          "plane2.sample(samp, coord, spvForward<LodOptions>(options)..., int2(1, 1)), ab.x), ab.y).r);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplExpandITUFullRange:
+			statement("template<typename T>");
+			statement("inline vec<T, 4> spvExpandITUFullRange(vec<T, 4> ycbcr, int n)");
+			begin_scope();
+			statement("ycbcr.br -= exp2(T(n-1))/(exp2(T(n))-1);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplExpandITUNarrowRange:
+			statement("template<typename T>");
+			statement("inline vec<T, 4> spvExpandITUNarrowRange(vec<T, 4> ycbcr, int n)");
+			begin_scope();
+			statement("ycbcr.g = (ycbcr.g * (exp2(T(n)) - 1) - ldexp(T(16), n - 8))/ldexp(T(219), n - 8);");
+			statement("ycbcr.br = (ycbcr.br * (exp2(T(n)) - 1) - ldexp(T(128), n - 8))/ldexp(T(224), n - 8);");
+			statement("return ycbcr;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplConvertYCbCrBT709:
+			statement("// cf. Khronos Data Format Specification, section 15.1.1");
+			statement("constant float3x3 spvBT709Factors = {{1, 1, 1}, {0, -0.13397432/0.7152, 1.8556}, {1.5748, "
+			          "-0.33480248/0.7152, 0}};");
+			statement("");
+			statement("template<typename T>");
+			statement("inline vec<T, 4> spvConvertYCbCrBT709(vec<T, 4> ycbcr)");
+			begin_scope();
+			statement("vec<T, 4> rgba;");
+			statement("rgba.rgb = vec<T, 3>(spvBT709Factors * ycbcr.gbr);");
+			statement("rgba.a = ycbcr.a;");
+			statement("return rgba;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplConvertYCbCrBT601:
+			statement("// cf. Khronos Data Format Specification, section 15.1.2");
+			statement("constant float3x3 spvBT601Factors = {{1, 1, 1}, {0, -0.202008/0.587, 1.772}, {1.402, "
+			          "-0.419198/0.587, 0}};");
+			statement("");
+			statement("template<typename T>");
+			statement("inline vec<T, 4> spvConvertYCbCrBT601(vec<T, 4> ycbcr)");
+			begin_scope();
+			statement("vec<T, 4> rgba;");
+			statement("rgba.rgb = vec<T, 3>(spvBT601Factors * ycbcr.gbr);");
+			statement("rgba.a = ycbcr.a;");
+			statement("return rgba;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplConvertYCbCrBT2020:
+			statement("// cf. Khronos Data Format Specification, section 15.1.3");
+			statement("constant float3x3 spvBT2020Factors = {{1, 1, 1}, {0, -0.11156702/0.6780, 1.8814}, {1.4746, "
+			          "-0.38737742/0.6780, 0}};");
+			statement("");
+			statement("template<typename T>");
+			statement("inline vec<T, 4> spvConvertYCbCrBT2020(vec<T, 4> ycbcr)");
+			begin_scope();
+			statement("vec<T, 4> rgba;");
+			statement("rgba.rgb = vec<T, 3>(spvBT2020Factors * ycbcr.gbr);");
+			statement("rgba.a = ycbcr.a;");
+			statement("return rgba;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplDynamicImageSampler:
+			statement("enum class spvFormatResolution");
+			begin_scope();
+			statement("_444 = 0,");
+			statement("_422,");
+			statement("_420");
+			end_scope_decl();
+			statement("");
+			statement("enum class spvChromaFilter");
+			begin_scope();
+			statement("nearest = 0,");
+			statement("linear");
+			end_scope_decl();
+			statement("");
+			statement("enum class spvXChromaLocation");
+			begin_scope();
+			statement("cosited_even = 0,");
+			statement("midpoint");
+			end_scope_decl();
+			statement("");
+			statement("enum class spvYChromaLocation");
+			begin_scope();
+			statement("cosited_even = 0,");
+			statement("midpoint");
+			end_scope_decl();
+			statement("");
+			statement("enum class spvYCbCrModelConversion");
+			begin_scope();
+			statement("rgb_identity = 0,");
+			statement("ycbcr_identity,");
+			statement("ycbcr_bt_709,");
+			statement("ycbcr_bt_601,");
+			statement("ycbcr_bt_2020");
+			end_scope_decl();
+			statement("");
+			statement("enum class spvYCbCrRange");
+			begin_scope();
+			statement("itu_full = 0,");
+			statement("itu_narrow");
+			end_scope_decl();
+			statement("");
+			statement("struct spvComponentBits");
+			begin_scope();
+			statement("constexpr explicit spvComponentBits(int v) thread : value(v) {}");
+			statement("uchar value : 6;");
+			end_scope_decl();
+			statement("// A class corresponding to metal::sampler which holds sampler");
+			statement("// Y'CbCr conversion info.");
+			statement("struct spvYCbCrSampler");
+			begin_scope();
+			statement("constexpr spvYCbCrSampler() thread : val(build()) {}");
+			statement("template<typename... Ts>");
+			statement("constexpr spvYCbCrSampler(Ts... t) thread : val(build(t...)) {}");
+			statement("constexpr spvYCbCrSampler(const thread spvYCbCrSampler& s) thread = default;");
+			statement("");
+			statement("spvFormatResolution get_resolution() const thread");
+			begin_scope();
+			statement("return spvFormatResolution((val & resolution_mask) >> resolution_base);");
+			end_scope();
+			statement("spvChromaFilter get_chroma_filter() const thread");
+			begin_scope();
+			statement("return spvChromaFilter((val & chroma_filter_mask) >> chroma_filter_base);");
+			end_scope();
+			statement("spvXChromaLocation get_x_chroma_offset() const thread");
+			begin_scope();
+			statement("return spvXChromaLocation((val & x_chroma_off_mask) >> x_chroma_off_base);");
+			end_scope();
+			statement("spvYChromaLocation get_y_chroma_offset() const thread");
+			begin_scope();
+			statement("return spvYChromaLocation((val & y_chroma_off_mask) >> y_chroma_off_base);");
+			end_scope();
+			statement("spvYCbCrModelConversion get_ycbcr_model() const thread");
+			begin_scope();
+			statement("return spvYCbCrModelConversion((val & ycbcr_model_mask) >> ycbcr_model_base);");
+			end_scope();
+			statement("spvYCbCrRange get_ycbcr_range() const thread");
+			begin_scope();
+			statement("return spvYCbCrRange((val & ycbcr_range_mask) >> ycbcr_range_base);");
+			end_scope();
+			statement("int get_bpc() const thread { return (val & bpc_mask) >> bpc_base; }");
+			statement("");
+			statement("private:");
+			statement("ushort val;");
+			statement("");
+			statement("constexpr static constant ushort resolution_bits = 2;");
+			statement("constexpr static constant ushort chroma_filter_bits = 2;");
+			statement("constexpr static constant ushort x_chroma_off_bit = 1;");
+			statement("constexpr static constant ushort y_chroma_off_bit = 1;");
+			statement("constexpr static constant ushort ycbcr_model_bits = 3;");
+			statement("constexpr static constant ushort ycbcr_range_bit = 1;");
+			statement("constexpr static constant ushort bpc_bits = 6;");
+			statement("");
+			statement("constexpr static constant ushort resolution_base = 0;");
+			statement("constexpr static constant ushort chroma_filter_base = 2;");
+			statement("constexpr static constant ushort x_chroma_off_base = 4;");
+			statement("constexpr static constant ushort y_chroma_off_base = 5;");
+			statement("constexpr static constant ushort ycbcr_model_base = 6;");
+			statement("constexpr static constant ushort ycbcr_range_base = 9;");
+			statement("constexpr static constant ushort bpc_base = 10;");
+			statement("");
+			statement(
+			    "constexpr static constant ushort resolution_mask = ((1 << resolution_bits) - 1) << resolution_base;");
+			statement("constexpr static constant ushort chroma_filter_mask = ((1 << chroma_filter_bits) - 1) << "
+			          "chroma_filter_base;");
+			statement("constexpr static constant ushort x_chroma_off_mask = ((1 << x_chroma_off_bit) - 1) << "
+			          "x_chroma_off_base;");
+			statement("constexpr static constant ushort y_chroma_off_mask = ((1 << y_chroma_off_bit) - 1) << "
+			          "y_chroma_off_base;");
+			statement("constexpr static constant ushort ycbcr_model_mask = ((1 << ycbcr_model_bits) - 1) << "
+			          "ycbcr_model_base;");
+			statement("constexpr static constant ushort ycbcr_range_mask = ((1 << ycbcr_range_bit) - 1) << "
+			          "ycbcr_range_base;");
+			statement("constexpr static constant ushort bpc_mask = ((1 << bpc_bits) - 1) << bpc_base;");
+			statement("");
+			statement("static constexpr ushort build()");
+			begin_scope();
+			statement("return 0;");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvFormatResolution res, Ts... t)");
+			begin_scope();
+			statement("return (ushort(res) << resolution_base) | (build(t...) & ~resolution_mask);");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvChromaFilter filt, Ts... t)");
+			begin_scope();
+			statement("return (ushort(filt) << chroma_filter_base) | (build(t...) & ~chroma_filter_mask);");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvXChromaLocation loc, Ts... t)");
+			begin_scope();
+			statement("return (ushort(loc) << x_chroma_off_base) | (build(t...) & ~x_chroma_off_mask);");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvYChromaLocation loc, Ts... t)");
+			begin_scope();
+			statement("return (ushort(loc) << y_chroma_off_base) | (build(t...) & ~y_chroma_off_mask);");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvYCbCrModelConversion model, Ts... t)");
+			begin_scope();
+			statement("return (ushort(model) << ycbcr_model_base) | (build(t...) & ~ycbcr_model_mask);");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvYCbCrRange range, Ts... t)");
+			begin_scope();
+			statement("return (ushort(range) << ycbcr_range_base) | (build(t...) & ~ycbcr_range_mask);");
+			end_scope();
+			statement("");
+			statement("template<typename... Ts>");
+			statement("static constexpr ushort build(spvComponentBits bpc, Ts... t)");
+			begin_scope();
+			statement("return (ushort(bpc.value) << bpc_base) | (build(t...) & ~bpc_mask);");
+			end_scope();
+			end_scope_decl();
+			statement("");
+			statement("// A class which can hold up to three textures and a sampler, including");
+			statement("// Y'CbCr conversion info, used to pass combined image-samplers");
+			statement("// dynamically to functions.");
+			statement("template<typename T>");
+			statement("struct spvDynamicImageSampler");
+			begin_scope();
+			statement("texture2d<T> plane0;");
+			statement("texture2d<T> plane1;");
+			statement("texture2d<T> plane2;");
+			statement("sampler samp;");
+			statement("spvYCbCrSampler ycbcr_samp;");
+			statement("uint swizzle = 0;");
+			statement("");
+			if (msl_options.swizzle_texture_samples)
+			{
+				statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp, uint sw) thread :");
+				statement("    plane0(tex), samp(samp), swizzle(sw) {}");
+			}
+			else
+			{
+				statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp) thread :");
+				statement("    plane0(tex), samp(samp) {}");
+			}
+			statement("constexpr spvDynamicImageSampler(texture2d<T> tex, sampler samp, spvYCbCrSampler ycbcr_samp, "
+			          "uint sw) thread :");
+			statement("    plane0(tex), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}");
+			statement("constexpr spvDynamicImageSampler(texture2d<T> plane0, texture2d<T> plane1,");
+			statement("                                 sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :");
+			statement("    plane0(plane0), plane1(plane1), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}");
+			statement(
+			    "constexpr spvDynamicImageSampler(texture2d<T> plane0, texture2d<T> plane1, texture2d<T> plane2,");
+			statement("                                 sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :");
+			statement("    plane0(plane0), plane1(plane1), plane2(plane2), samp(samp), ycbcr_samp(ycbcr_samp), "
+			          "swizzle(sw) {}");
+			statement("");
+			// XXX This is really hard to follow... I've left comments to make it a bit easier.
+			statement("template<typename... LodOptions>");
+			statement("vec<T, 4> do_sample(float2 coord, LodOptions... options) const thread");
+			begin_scope();
+			statement("if (!is_null_texture(plane1))");
+			begin_scope();
+			statement("if (ycbcr_samp.get_resolution() == spvFormatResolution::_444 ||");
+			statement("    ycbcr_samp.get_chroma_filter() == spvChromaFilter::nearest)");
+			begin_scope();
+			statement("if (!is_null_texture(plane2))");
+			statement("    return spvChromaReconstructNearest(plane0, plane1, plane2, samp, coord,");
+			statement("                                       spvForward<LodOptions>(options)...);");
+			statement(
+			    "return spvChromaReconstructNearest(plane0, plane1, samp, coord, spvForward<LodOptions>(options)...);");
+			end_scope(); // if (resolution == 422 || chroma_filter == nearest)
+			statement("switch (ycbcr_samp.get_resolution())");
+			begin_scope();
+			statement("case spvFormatResolution::_444: break;");
+			statement("case spvFormatResolution::_422:");
+			begin_scope();
+			statement("switch (ycbcr_samp.get_x_chroma_offset())");
+			begin_scope();
+			statement("case spvXChromaLocation::cosited_even:");
+			statement("    if (!is_null_texture(plane2))");
+			statement("        return spvChromaReconstructLinear422CositedEven(");
+			statement("            plane0, plane1, plane2, samp,");
+			statement("            coord, spvForward<LodOptions>(options)...);");
+			statement("    return spvChromaReconstructLinear422CositedEven(");
+			statement("        plane0, plane1, samp, coord,");
+			statement("        spvForward<LodOptions>(options)...);");
+			statement("case spvXChromaLocation::midpoint:");
+			statement("    if (!is_null_texture(plane2))");
+			statement("        return spvChromaReconstructLinear422Midpoint(");
+			statement("            plane0, plane1, plane2, samp,");
+			statement("            coord, spvForward<LodOptions>(options)...);");
+			statement("    return spvChromaReconstructLinear422Midpoint(");
+			statement("        plane0, plane1, samp, coord,");
+			statement("        spvForward<LodOptions>(options)...);");
+			end_scope(); // switch (x_chroma_offset)
+			end_scope(); // case 422:
+			statement("case spvFormatResolution::_420:");
+			begin_scope();
+			statement("switch (ycbcr_samp.get_x_chroma_offset())");
+			begin_scope();
+			statement("case spvXChromaLocation::cosited_even:");
+			begin_scope();
+			statement("switch (ycbcr_samp.get_y_chroma_offset())");
+			begin_scope();
+			statement("case spvYChromaLocation::cosited_even:");
+			statement("    if (!is_null_texture(plane2))");
+			statement("        return spvChromaReconstructLinear420XCositedEvenYCositedEven(");
+			statement("            plane0, plane1, plane2, samp,");
+			statement("            coord, spvForward<LodOptions>(options)...);");
+			statement("    return spvChromaReconstructLinear420XCositedEvenYCositedEven(");
+			statement("        plane0, plane1, samp, coord,");
+			statement("        spvForward<LodOptions>(options)...);");
+			statement("case spvYChromaLocation::midpoint:");
+			statement("    if (!is_null_texture(plane2))");
+			statement("        return spvChromaReconstructLinear420XCositedEvenYMidpoint(");
+			statement("            plane0, plane1, plane2, samp,");
+			statement("            coord, spvForward<LodOptions>(options)...);");
+			statement("    return spvChromaReconstructLinear420XCositedEvenYMidpoint(");
+			statement("        plane0, plane1, samp, coord,");
+			statement("        spvForward<LodOptions>(options)...);");
+			end_scope(); // switch (y_chroma_offset)
+			end_scope(); // case x::cosited_even:
+			statement("case spvXChromaLocation::midpoint:");
+			begin_scope();
+			statement("switch (ycbcr_samp.get_y_chroma_offset())");
+			begin_scope();
+			statement("case spvYChromaLocation::cosited_even:");
+			statement("    if (!is_null_texture(plane2))");
+			statement("        return spvChromaReconstructLinear420XMidpointYCositedEven(");
+			statement("            plane0, plane1, plane2, samp,");
+			statement("            coord, spvForward<LodOptions>(options)...);");
+			statement("    return spvChromaReconstructLinear420XMidpointYCositedEven(");
+			statement("        plane0, plane1, samp, coord,");
+			statement("        spvForward<LodOptions>(options)...);");
+			statement("case spvYChromaLocation::midpoint:");
+			statement("    if (!is_null_texture(plane2))");
+			statement("        return spvChromaReconstructLinear420XMidpointYMidpoint(");
+			statement("            plane0, plane1, plane2, samp,");
+			statement("            coord, spvForward<LodOptions>(options)...);");
+			statement("    return spvChromaReconstructLinear420XMidpointYMidpoint(");
+			statement("        plane0, plane1, samp, coord,");
+			statement("        spvForward<LodOptions>(options)...);");
+			end_scope(); // switch (y_chroma_offset)
+			end_scope(); // case x::midpoint
+			end_scope(); // switch (x_chroma_offset)
+			end_scope(); // case 420:
+			end_scope(); // switch (resolution)
+			end_scope(); // if (multiplanar)
+			statement("return plane0.sample(samp, coord, spvForward<LodOptions>(options)...);");
+			end_scope(); // do_sample()
+			statement("template <typename... LodOptions>");
+			statement("vec<T, 4> sample(float2 coord, LodOptions... options) const thread");
+			begin_scope();
+			statement(
+			    "vec<T, 4> s = spvTextureSwizzle(do_sample(coord, spvForward<LodOptions>(options)...), swizzle);");
+			statement("if (ycbcr_samp.get_ycbcr_model() == spvYCbCrModelConversion::rgb_identity)");
+			statement("    return s;");
+			statement("");
+			statement("switch (ycbcr_samp.get_ycbcr_range())");
+			begin_scope();
+			statement("case spvYCbCrRange::itu_full:");
+			statement("    s = spvExpandITUFullRange(s, ycbcr_samp.get_bpc());");
+			statement("    break;");
+			statement("case spvYCbCrRange::itu_narrow:");
+			statement("    s = spvExpandITUNarrowRange(s, ycbcr_samp.get_bpc());");
+			statement("    break;");
+			end_scope();
+			statement("");
+			statement("switch (ycbcr_samp.get_ycbcr_model())");
+			begin_scope();
+			statement("case spvYCbCrModelConversion::rgb_identity:"); // Silence Clang warning
+			statement("case spvYCbCrModelConversion::ycbcr_identity:");
+			statement("    return s;");
+			statement("case spvYCbCrModelConversion::ycbcr_bt_709:");
+			statement("    return spvConvertYCbCrBT709(s);");
+			statement("case spvYCbCrModelConversion::ycbcr_bt_601:");
+			statement("    return spvConvertYCbCrBT601(s);");
+			statement("case spvYCbCrModelConversion::ycbcr_bt_2020:");
+			statement("    return spvConvertYCbCrBT2020(s);");
+			end_scope();
+			end_scope();
+			statement("");
+			// Sampler Y'CbCr conversion forbids offsets.
+			statement("vec<T, 4> sample(float2 coord, int2 offset) const thread");
+			begin_scope();
+			if (msl_options.swizzle_texture_samples)
+				statement("return spvTextureSwizzle(plane0.sample(samp, coord, offset), swizzle);");
+			else
+				statement("return plane0.sample(samp, coord, offset);");
+			end_scope();
+			statement("template<typename lod_options>");
+			statement("vec<T, 4> sample(float2 coord, lod_options options, int2 offset) const thread");
+			begin_scope();
+			if (msl_options.swizzle_texture_samples)
+				statement("return spvTextureSwizzle(plane0.sample(samp, coord, options, offset), swizzle);");
+			else
+				statement("return plane0.sample(samp, coord, options, offset);");
+			end_scope();
+			statement("#if __HAVE_MIN_LOD_CLAMP__");
+			statement("vec<T, 4> sample(float2 coord, bias b, min_lod_clamp min_lod, int2 offset) const thread");
+			begin_scope();
+			statement("return plane0.sample(samp, coord, b, min_lod, offset);");
+			end_scope();
+			statement(
+			    "vec<T, 4> sample(float2 coord, gradient2d grad, min_lod_clamp min_lod, int2 offset) const thread");
+			begin_scope();
+			statement("return plane0.sample(samp, coord, grad, min_lod, offset);");
+			end_scope();
+			statement("#endif");
+			statement("");
+			// Y'CbCr conversion forbids all operations but sampling.
+			statement("vec<T, 4> read(uint2 coord, uint lod = 0) const thread");
+			begin_scope();
+			statement("return plane0.read(coord, lod);");
+			end_scope();
+			statement("");
+			statement("vec<T, 4> gather(float2 coord, int2 offset = int2(0), component c = component::x) const thread");
+			begin_scope();
+			if (msl_options.swizzle_texture_samples)
+				statement("return spvGatherSwizzle(plane0, samp, swizzle, c, coord, offset);");
+			else
+				statement("return plane0.gather(samp, coord, offset, c);");
+			end_scope();
+			end_scope_decl();
+			statement("");
+
+		default:
 			break;
 		}
 	}
 }
 
-// Undefined global memory is not allowed in MSL.
-// Declare constant and init to zeros. Use {}, as global constructors can break Metal.
-void CompilerMSL::declare_undefined_values()
+static string inject_top_level_storage_qualifier(const string &expr, const string &qualifier)
+{
+	// Easier to do this through text munging since the qualifier does not exist in the type system at all,
+	// and plumbing in all that information is not very helpful.
+	size_t last_reference = expr.find_last_of('&');
+	size_t last_pointer = expr.find_last_of('*');
+	size_t last_significant = string::npos;
+
+	if (last_reference == string::npos)
+		last_significant = last_pointer;
+	else if (last_pointer == string::npos)
+		last_significant = last_reference;
+	else
+		last_significant = std::max(last_reference, last_pointer);
+
+	if (last_significant == string::npos)
+		return join(qualifier, " ", expr);
+	else
+	{
+		return join(expr.substr(0, last_significant + 1), " ",
+		            qualifier, expr.substr(last_significant + 1, string::npos));
+	}
+}
+
+void CompilerMSL::declare_constant_arrays()
 {
+	bool fully_inlined = ir.ids_for_type[TypeFunction].size() == 1;
+
+	// MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to
+	// global constants directly, so we are able to use constants as variable expressions.
 	bool emitted = false;
-	ir.for_each_typed_id<SPIRUndef>([&](uint32_t, SPIRUndef &undef) {
-		auto &type = this->get<SPIRType>(undef.basetype);
-		statement("constant ", variable_decl(type, to_name(undef.self), undef.self), " = {};");
-		emitted = true;
+
+	ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) {
+		if (c.specialization)
+			return;
+
+		auto &type = this->get<SPIRType>(c.constant_type);
+		// Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries.
+		// FIXME: However, hoisting constants to main() means we need to pass down constant arrays to leaf functions if they are used there.
+		// If there are multiple functions in the module, drop this case to avoid breaking use cases which do not need to
+		// link into Metal libraries. This is hacky.
+		if (!type.array.empty() && (!fully_inlined || is_scalar(type) || is_vector(type)))
+		{
+			add_resource_name(c.self);
+			auto name = to_name(c.self);
+			statement(inject_top_level_storage_qualifier(variable_decl(type, name), "constant"),
+			          " = ", constant_expression(c), ";");
+			emitted = true;
+		}
 	});
 
 	if (emitted)
 		statement("");
 }
 
-void CompilerMSL::declare_constant_arrays()
+// Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries
+void CompilerMSL::declare_complex_constant_arrays()
 {
+	// If we do not have a fully inlined module, we did not opt in to
+	// declaring constant arrays of complex types. See CompilerMSL::declare_constant_arrays().
+	bool fully_inlined = ir.ids_for_type[TypeFunction].size() == 1;
+	if (!fully_inlined)
+		return;
+
 	// MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to
 	// global constants directly, so we are able to use constants as variable expressions.
 	bool emitted = false;
@@ -3375,10 +7266,11 @@ void CompilerMSL::declare_constant_arrays()
 			return;
 
 		auto &type = this->get<SPIRType>(c.constant_type);
-		if (!type.array.empty())
+		if (!type.array.empty() && !(is_scalar(type) || is_vector(type)))
 		{
+			add_resource_name(c.self);
 			auto name = to_name(c.self);
-			statement("constant ", variable_decl(type, name), " = ", constant_expression(c), ";");
+			statement("", variable_decl(type, name), " = ", constant_expression(c), ";");
 			emitted = true;
 		}
 	});
@@ -3390,7 +7282,6 @@ void CompilerMSL::declare_constant_arrays()
 void CompilerMSL::emit_resources()
 {
 	declare_constant_arrays();
-	declare_undefined_values();
 
 	// Emit the special [[stage_in]] and [[stage_out]] interface blocks which we created.
 	emit_interface_block(stage_out_var_id);
@@ -3403,12 +7294,57 @@ void CompilerMSL::emit_resources()
 void CompilerMSL::emit_specialization_constants_and_structs()
 {
 	SpecializationConstant wg_x, wg_y, wg_z;
-	uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
+	ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
 	bool emitted = false;
 
 	unordered_set<uint32_t> declared_structs;
+	unordered_set<uint32_t> aligned_structs;
+
+	// First, we need to deal with scalar block layout.
+	// It is possible that a struct may have to be placed at an alignment which does not match the innate alignment of the struct itself.
+	// In that case, if such a case exists for a struct, we must force that all elements of the struct become packed_ types.
+	// This makes the struct alignment as small as physically possible.
+	// When we actually align the struct later, we can insert padding as necessary to make the packed members behave like normally aligned types.
+	ir.for_each_typed_id<SPIRType>([&](uint32_t type_id, const SPIRType &type) {
+		if (type.basetype == SPIRType::Struct &&
+		    has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked))
+			mark_scalar_layout_structs(type);
+	});
+
+	bool builtin_block_type_is_required = false;
+	// Very special case. If gl_PerVertex is initialized as an array (tessellation)
+	// we have to potentially emit the gl_PerVertex struct type so that we can emit a constant LUT.
+	ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) {
+		auto &type = this->get<SPIRType>(c.constant_type);
+		if (is_array(type) && has_decoration(type.self, DecorationBlock) && is_builtin_type(type))
+			builtin_block_type_is_required = true;
+	});
+
+	// Very particular use of the soft loop lock.
+	// align_struct may need to create custom types on the fly, but we don't care about
+	// these types for purpose of iterating over them in ir.ids_for_type and friends.
+	auto loop_lock = ir.create_loop_soft_lock();
+
+	// Physical storage buffer pointers can have cyclical references,
+	// so emit forward declarations of them before other structs.
+	// Ignore type_id because we want the underlying struct type from the pointer.
+	ir.for_each_typed_id<SPIRType>([&](uint32_t /* type_id */, const SPIRType &type) {
+		if (type.basetype == SPIRType::Struct &&
+			type.pointer && type.storage == StorageClassPhysicalStorageBuffer &&
+			declared_structs.count(type.self) == 0)
+		{
+			statement("struct ", to_name(type.self), ";");
+			declared_structs.insert(type.self);
+			emitted = true;
+		}
+	});
+	if (emitted)
+		statement("");
+
+	emitted = false;
+	declared_structs.clear();
 
-	for (auto &id_ : ir.ids_for_constant_or_type)
+	for (auto &id_ : ir.ids_for_constant_undef_or_type)
 	{
 		auto &id = ir.ids[id_];
 
@@ -3429,6 +7365,7 @@ void CompilerMSL::emit_specialization_constants_and_structs()
 			{
 				auto &type = get<SPIRType>(c.constant_type);
 				string sc_type_name = type_to_glsl(type);
+				add_resource_name(c.self);
 				string sc_name = to_name(c.self);
 				string sc_tmp_name = sc_name + "_tmp";
 
@@ -3447,259 +7384,733 @@ void CompilerMSL::emit_specialization_constants_and_structs()
 					statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name,
 					          ") ? ", sc_tmp_name, " : ", constant_expression(c), ";");
 				}
-				else if (has_decoration(c.self, DecorationSpecId))
+				else if (has_decoration(c.self, DecorationSpecId))
+				{
+					// Fallback to macro overrides.
+					c.specialization_constant_macro_name =
+					    constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
+
+					statement("#ifndef ", c.specialization_constant_macro_name);
+					statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c));
+					statement("#endif");
+					statement("constant ", sc_type_name, " ", sc_name, " = ", c.specialization_constant_macro_name,
+					          ";");
+				}
+				else
+				{
+					// Composite specialization constants must be built from other specialization constants.
+					statement("constant ", sc_type_name, " ", sc_name, " = ", constant_expression(c), ";");
+				}
+				emitted = true;
+			}
+		}
+		else if (id.get_type() == TypeConstantOp)
+		{
+			auto &c = id.get<SPIRConstantOp>();
+			auto &type = get<SPIRType>(c.basetype);
+			add_resource_name(c.self);
+			auto name = to_name(c.self);
+			statement("constant ", variable_decl(type, name), " = ", constant_op_expression(c), ";");
+			emitted = true;
+		}
+		else if (id.get_type() == TypeType)
+		{
+			// Output non-builtin interface structs. These include local function structs
+			// and structs nested within uniform and read-write buffers.
+			auto &type = id.get<SPIRType>();
+			TypeID type_id = type.self;
+
+			bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && !type.pointer;
+			bool is_block =
+			    has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
+
+			bool is_builtin_block = is_block && is_builtin_type(type);
+			bool is_declarable_struct = is_struct && (!is_builtin_block || builtin_block_type_is_required);
+
+			// We'll declare this later.
+			if (stage_out_var_id && get_stage_out_struct_type().self == type_id)
+				is_declarable_struct = false;
+			if (patch_stage_out_var_id && get_patch_stage_out_struct_type().self == type_id)
+				is_declarable_struct = false;
+			if (stage_in_var_id && get_stage_in_struct_type().self == type_id)
+				is_declarable_struct = false;
+			if (patch_stage_in_var_id && get_patch_stage_in_struct_type().self == type_id)
+				is_declarable_struct = false;
+
+			// Special case. Declare builtin struct anyways if we need to emit a threadgroup version of it.
+			if (stage_out_masked_builtin_type_id == type_id)
+				is_declarable_struct = true;
+
+			// Align and emit declarable structs...but avoid declaring each more than once.
+			if (is_declarable_struct && declared_structs.count(type_id) == 0)
+			{
+				if (emitted)
+					statement("");
+				emitted = false;
+
+				declared_structs.insert(type_id);
+
+				if (has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked))
+					align_struct(type, aligned_structs);
+
+				// Make sure we declare the underlying struct type, and not the "decorated" type with pointers, etc.
+				emit_struct(get<SPIRType>(type_id));
+			}
+		}
+		else if (id.get_type() == TypeUndef)
+		{
+			auto &undef = id.get<SPIRUndef>();
+			auto &type = get<SPIRType>(undef.basetype);
+			// OpUndef can be void for some reason ...
+			if (type.basetype == SPIRType::Void)
+				return;
+
+			// Undefined global memory is not allowed in MSL.
+			// Declare constant and init to zeros. Use {}, as global constructors can break Metal.
+			statement(
+			    inject_top_level_storage_qualifier(variable_decl(type, to_name(undef.self), undef.self), "constant"),
+			    " = {};");
+			emitted = true;
+		}
+	}
+
+	if (emitted)
+		statement("");
+}
+
+void CompilerMSL::emit_binary_ptr_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
+{
+	bool forward = should_forward(op0) && should_forward(op1);
+	emit_op(result_type, result_id, join(to_ptr_expression(op0), " ", op, " ", to_ptr_expression(op1)), forward);
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
+}
+
+string CompilerMSL::to_ptr_expression(uint32_t id, bool register_expression_read)
+{
+	auto *e = maybe_get<SPIRExpression>(id);
+	auto expr = enclose_expression(e && e->need_transpose ? e->expression : to_expression(id, register_expression_read));
+	if (!should_dereference(id))
+		expr = address_of_expression(expr);
+	return expr;
+}
+
+void CompilerMSL::emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
+                                       const char *op)
+{
+	bool forward = should_forward(op0) && should_forward(op1);
+	emit_op(result_type, result_id,
+	        join("(isunordered(", to_enclosed_unpacked_expression(op0), ", ", to_enclosed_unpacked_expression(op1),
+	             ") || ", to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1),
+	             ")"),
+	        forward);
+
+	inherit_expression_dependencies(result_id, op0);
+	inherit_expression_dependencies(result_id, op1);
+}
+
+bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id, uint32_t ptr)
+{
+	auto &ptr_type = expression_type(ptr);
+	auto &result_type = get<SPIRType>(result_type_id);
+	if (ptr_type.storage != StorageClassInput && ptr_type.storage != StorageClassOutput)
+		return false;
+	if (ptr_type.storage == StorageClassOutput && is_tese_shader())
+		return false;
+
+	if (has_decoration(ptr, DecorationPatch))
+		return false;
+	bool ptr_is_io_variable = ir.ids[ptr].get_type() == TypeVariable;
+
+	bool flattened_io = variable_storage_requires_stage_io(ptr_type.storage);
+
+	bool flat_data_type = flattened_io &&
+	                      (is_matrix(result_type) || is_array(result_type) || result_type.basetype == SPIRType::Struct);
+
+	// Edge case, even with multi-patch workgroups, we still need to unroll load
+	// if we're loading control points directly.
+	if (ptr_is_io_variable && is_array(result_type))
+		flat_data_type = true;
+
+	if (!flat_data_type)
+		return false;
+
+	// Now, we must unflatten a composite type and take care of interleaving array access with gl_in/gl_out.
+	// Lots of painful code duplication since we *really* should not unroll these kinds of loads in entry point fixup
+	// unless we're forced to do this when the code is emitting inoptimal OpLoads.
+	string expr;
+
+	uint32_t interface_index = get_extended_decoration(ptr, SPIRVCrossDecorationInterfaceMemberIndex);
+	auto *var = maybe_get_backing_variable(ptr);
+	auto &expr_type = get_pointee_type(ptr_type.self);
+
+	const auto &iface_type = expression_type(stage_in_ptr_var_id);
+
+	if (!flattened_io)
+	{
+		// Simplest case for multi-patch workgroups, just unroll array as-is.
+		if (interface_index == uint32_t(-1))
+			return false;
+
+		expr += type_to_glsl(result_type) + "({ ";
+		uint32_t num_control_points = to_array_size_literal(result_type, uint32_t(result_type.array.size()) - 1);
+
+		for (uint32_t i = 0; i < num_control_points; i++)
+		{
+			const uint32_t indices[2] = { i, interface_index };
+			AccessChainMeta meta;
+			expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
+			                              ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
+			if (i + 1 < num_control_points)
+				expr += ", ";
+		}
+		expr += " })";
+	}
+	else if (result_type.array.size() > 2)
+	{
+		SPIRV_CROSS_THROW("Cannot load tessellation IO variables with more than 2 dimensions.");
+	}
+	else if (result_type.array.size() == 2)
+	{
+		if (!ptr_is_io_variable)
+			SPIRV_CROSS_THROW("Loading an array-of-array must be loaded directly from an IO variable.");
+		if (interface_index == uint32_t(-1))
+			SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
+		if (result_type.basetype == SPIRType::Struct || is_matrix(result_type))
+			SPIRV_CROSS_THROW("Cannot load array-of-array of composite type in tessellation IO.");
+
+		expr += type_to_glsl(result_type) + "({ ";
+		uint32_t num_control_points = to_array_size_literal(result_type, 1);
+		uint32_t base_interface_index = interface_index;
+
+		auto &sub_type = get<SPIRType>(result_type.parent_type);
+
+		for (uint32_t i = 0; i < num_control_points; i++)
+		{
+			expr += type_to_glsl(sub_type) + "({ ";
+			interface_index = base_interface_index;
+			uint32_t array_size = to_array_size_literal(result_type, 0);
+			for (uint32_t j = 0; j < array_size; j++, interface_index++)
+			{
+				const uint32_t indices[2] = { i, interface_index };
+
+				AccessChainMeta meta;
+				expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
+				                              ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
+				if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct &&
+					expr_type.vecsize > sub_type.vecsize)
+					expr += vector_swizzle(sub_type.vecsize, 0);
+
+				if (j + 1 < array_size)
+					expr += ", ";
+			}
+			expr += " })";
+			if (i + 1 < num_control_points)
+				expr += ", ";
+		}
+		expr += " })";
+	}
+	else if (result_type.basetype == SPIRType::Struct)
+	{
+		bool is_array_of_struct = is_array(result_type);
+		if (is_array_of_struct && !ptr_is_io_variable)
+			SPIRV_CROSS_THROW("Loading array of struct from IO variable must come directly from IO variable.");
+
+		uint32_t num_control_points = 1;
+		if (is_array_of_struct)
+		{
+			num_control_points = to_array_size_literal(result_type, 0);
+			expr += type_to_glsl(result_type) + "({ ";
+		}
+
+		auto &struct_type = is_array_of_struct ? get<SPIRType>(result_type.parent_type) : result_type;
+		assert(struct_type.array.empty());
+
+		for (uint32_t i = 0; i < num_control_points; i++)
+		{
+			expr += type_to_glsl(struct_type) + "{ ";
+			for (uint32_t j = 0; j < uint32_t(struct_type.member_types.size()); j++)
+			{
+				// The base interface index is stored per variable for structs.
+				if (var)
+				{
+					interface_index =
+					    get_extended_member_decoration(var->self, j, SPIRVCrossDecorationInterfaceMemberIndex);
+				}
+
+				if (interface_index == uint32_t(-1))
+					SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
+
+				const auto &mbr_type = get<SPIRType>(struct_type.member_types[j]);
+				const auto &expr_mbr_type = get<SPIRType>(expr_type.member_types[j]);
+				if (is_matrix(mbr_type) && ptr_type.storage == StorageClassInput)
 				{
-					// Fallback to macro overrides.
-					c.specialization_constant_macro_name =
-					    constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
+					expr += type_to_glsl(mbr_type) + "(";
+					for (uint32_t k = 0; k < mbr_type.columns; k++, interface_index++)
+					{
+						if (is_array_of_struct)
+						{
+							const uint32_t indices[2] = { i, interface_index };
+							AccessChainMeta meta;
+							expr += access_chain_internal(
+									stage_in_ptr_var_id, indices, 2,
+									ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
+						}
+						else
+							expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
+						if (expr_mbr_type.vecsize > mbr_type.vecsize)
+							expr += vector_swizzle(mbr_type.vecsize, 0);
 
-					statement("#ifndef ", c.specialization_constant_macro_name);
-					statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c));
-					statement("#endif");
-					statement("constant ", sc_type_name, " ", sc_name, " = ", c.specialization_constant_macro_name,
-					          ";");
+						if (k + 1 < mbr_type.columns)
+							expr += ", ";
+					}
+					expr += ")";
+				}
+				else if (is_array(mbr_type))
+				{
+					expr += type_to_glsl(mbr_type) + "({ ";
+					uint32_t array_size = to_array_size_literal(mbr_type, 0);
+					for (uint32_t k = 0; k < array_size; k++, interface_index++)
+					{
+						if (is_array_of_struct)
+						{
+							const uint32_t indices[2] = { i, interface_index };
+							AccessChainMeta meta;
+							expr += access_chain_internal(
+									stage_in_ptr_var_id, indices, 2,
+									ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
+						}
+						else
+							expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
+						if (expr_mbr_type.vecsize > mbr_type.vecsize)
+							expr += vector_swizzle(mbr_type.vecsize, 0);
+
+						if (k + 1 < array_size)
+							expr += ", ";
+					}
+					expr += " })";
 				}
 				else
 				{
-					// Composite specialization constants must be built from other specialization constants.
-					statement("constant ", sc_type_name, " ", sc_name, " = ", constant_expression(c), ";");
+					if (is_array_of_struct)
+					{
+						const uint32_t indices[2] = { i, interface_index };
+						AccessChainMeta meta;
+						expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
+						                              ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT,
+						                              &meta);
+					}
+					else
+						expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
+					if (expr_mbr_type.vecsize > mbr_type.vecsize)
+						expr += vector_swizzle(mbr_type.vecsize, 0);
 				}
-				emitted = true;
+
+				if (j + 1 < struct_type.member_types.size())
+					expr += ", ";
 			}
+			expr += " }";
+			if (i + 1 < num_control_points)
+				expr += ", ";
 		}
-		else if (id.get_type() == TypeConstantOp)
-		{
-			auto &c = id.get<SPIRConstantOp>();
-			auto &type = get<SPIRType>(c.basetype);
-			auto name = to_name(c.self);
-			statement("constant ", variable_decl(type, name), " = ", constant_op_expression(c), ";");
-			emitted = true;
-		}
-		else if (id.get_type() == TypeType)
-		{
-			// Output non-builtin interface structs. These include local function structs
-			// and structs nested within uniform and read-write buffers.
-			auto &type = id.get<SPIRType>();
-			uint32_t type_id = type.self;
-
-			bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty();
-			bool is_block =
-			    has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
+		if (is_array_of_struct)
+			expr += " })";
+	}
+	else if (is_matrix(result_type))
+	{
+		bool is_array_of_matrix = is_array(result_type);
+		if (is_array_of_matrix && !ptr_is_io_variable)
+			SPIRV_CROSS_THROW("Loading array of matrix from IO variable must come directly from IO variable.");
+		if (interface_index == uint32_t(-1))
+			SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
 
-			bool is_builtin_block = is_block && is_builtin_type(type);
-			bool is_declarable_struct = is_struct && !is_builtin_block;
+		if (is_array_of_matrix)
+		{
+			// Loading a matrix from each control point.
+			uint32_t base_interface_index = interface_index;
+			uint32_t num_control_points = to_array_size_literal(result_type, 0);
+			expr += type_to_glsl(result_type) + "({ ";
 
-			// We'll declare this later.
-			if (stage_out_var_id && get_stage_out_struct_type().self == type_id)
-				is_declarable_struct = false;
-			if (patch_stage_out_var_id && get_patch_stage_out_struct_type().self == type_id)
-				is_declarable_struct = false;
-			if (stage_in_var_id && get_stage_in_struct_type().self == type_id)
-				is_declarable_struct = false;
-			if (patch_stage_in_var_id && get_patch_stage_in_struct_type().self == type_id)
-				is_declarable_struct = false;
+			auto &matrix_type = get_variable_element_type(get<SPIRVariable>(ptr));
 
-			// Align and emit declarable structs...but avoid declaring each more than once.
-			if (is_declarable_struct && declared_structs.count(type_id) == 0)
+			for (uint32_t i = 0; i < num_control_points; i++)
 			{
-				if (emitted)
-					statement("");
-				emitted = false;
-
-				declared_structs.insert(type_id);
-
-				if (has_extended_decoration(type_id, SPIRVCrossDecorationPacked))
-					align_struct(type);
+				interface_index = base_interface_index;
+				expr += type_to_glsl(matrix_type) + "(";
+				for (uint32_t j = 0; j < result_type.columns; j++, interface_index++)
+				{
+					const uint32_t indices[2] = { i, interface_index };
+
+					AccessChainMeta meta;
+					expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
+					                              ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
+					if (expr_type.vecsize > result_type.vecsize)
+						expr += vector_swizzle(result_type.vecsize, 0);
+					if (j + 1 < result_type.columns)
+						expr += ", ";
+				}
+				expr += ")";
+				if (i + 1 < num_control_points)
+					expr += ", ";
+			}
 
-				// Make sure we declare the underlying struct type, and not the "decorated" type with pointers, etc.
-				emit_struct(get<SPIRType>(type_id));
+			expr += " })";
+		}
+		else
+		{
+			expr += type_to_glsl(result_type) + "(";
+			for (uint32_t i = 0; i < result_type.columns; i++, interface_index++)
+			{
+				expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
+				if (expr_type.vecsize > result_type.vecsize)
+					expr += vector_swizzle(result_type.vecsize, 0);
+				if (i + 1 < result_type.columns)
+					expr += ", ";
 			}
+			expr += ")";
 		}
 	}
+	else if (ptr_is_io_variable)
+	{
+		assert(is_array(result_type));
+		assert(result_type.array.size() == 1);
+		if (interface_index == uint32_t(-1))
+			SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
 
-	if (emitted)
-		statement("");
-}
+		// We're loading an array directly from a global variable.
+		// This means we're loading one member from each control point.
+		expr += type_to_glsl(result_type) + "({ ";
+		uint32_t num_control_points = to_array_size_literal(result_type, 0);
 
-void CompilerMSL::emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
-                                       const char *op)
-{
-	bool forward = should_forward(op0) && should_forward(op1);
-	emit_op(result_type, result_id,
-	        join("(isunordered(", to_enclosed_unpacked_expression(op0), ", ", to_enclosed_unpacked_expression(op1),
-	             ") || ", to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1),
-	             ")"),
-	        forward);
+		for (uint32_t i = 0; i < num_control_points; i++)
+		{
+			const uint32_t indices[2] = { i, interface_index };
 
-	inherit_expression_dependencies(result_id, op0);
-	inherit_expression_dependencies(result_id, op1);
+			AccessChainMeta meta;
+			expr += access_chain_internal(stage_in_ptr_var_id, indices, 2,
+			                              ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta);
+			if (expr_type.vecsize > result_type.vecsize)
+				expr += vector_swizzle(result_type.vecsize, 0);
+
+			if (i + 1 < num_control_points)
+				expr += ", ";
+		}
+		expr += " })";
+	}
+	else
+	{
+		// We're loading an array from a concrete control point.
+		assert(is_array(result_type));
+		assert(result_type.array.size() == 1);
+		if (interface_index == uint32_t(-1))
+			SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue.");
+
+		expr += type_to_glsl(result_type) + "({ ";
+		uint32_t array_size = to_array_size_literal(result_type, 0);
+		for (uint32_t i = 0; i < array_size; i++, interface_index++)
+		{
+			expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index);
+			if (expr_type.vecsize > result_type.vecsize)
+				expr += vector_swizzle(result_type.vecsize, 0);
+			if (i + 1 < array_size)
+				expr += ", ";
+		}
+		expr += " })";
+	}
+
+	emit_op(result_type_id, id, expr, false);
+	register_read(id, ptr, false);
+	return true;
 }
 
 bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t length)
 {
 	// If this is a per-vertex output, remap it to the I/O array buffer.
-	auto *var = maybe_get<SPIRVariable>(ops[2]);
-	BuiltIn bi_type = BuiltIn(get_decoration(ops[2], DecorationBuiltIn));
-	if (var &&
-	    (var->storage == StorageClassInput ||
-	     (get_execution_model() == ExecutionModelTessellationControl && var->storage == StorageClassOutput)) &&
-	    !(has_decoration(ops[2], DecorationPatch) || is_patch_block(get_variable_data_type(*var))) &&
-	    (!is_builtin_variable(*var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
-	     bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance ||
-	     get_variable_data_type(*var).basetype == SPIRType::Struct))
+
+	// Any object which did not go through IO flattening shenanigans will go there instead.
+	// We will unflatten on-demand instead as needed, but not all possible cases can be supported, especially with arrays.
+
+	auto *var = maybe_get_backing_variable(ops[2]);
+	bool patch = false;
+	bool flat_data = false;
+	bool ptr_is_chain = false;
+	bool flatten_composites = false;
+
+	bool is_block = false;
+	bool is_arrayed = false;
+
+	if (var)
 	{
+		auto &type = get_variable_data_type(*var);
+		is_block = has_decoration(type.self, DecorationBlock);
+		is_arrayed = !type.array.empty();
+
+		flatten_composites = variable_storage_requires_stage_io(var->storage);
+		patch = has_decoration(ops[2], DecorationPatch) || is_patch_block(type);
+
+		// Should match strip_array in add_interface_block.
+		flat_data = var->storage == StorageClassInput || (var->storage == StorageClassOutput && is_tesc_shader());
+
+		// Patch inputs are treated as normal block IO variables, so they don't deal with this path at all.
+		if (patch && (!is_block || is_arrayed || var->storage == StorageClassInput))
+			flat_data = false;
+
+		// We might have a chained access chain, where
+		// we first take the access chain to the control point, and then we chain into a member or something similar.
+		// In this case, we need to skip gl_in/gl_out remapping.
+		// Also, skip ptr chain for patches.
+		ptr_is_chain = var->self != ID(ops[2]);
+	}
+
+	bool builtin_variable = false;
+	bool variable_is_flat = false;
+
+	if (var && flat_data)
+	{
+		builtin_variable = is_builtin_variable(*var);
+
+		BuiltIn bi_type = BuiltInMax;
+		if (builtin_variable && !is_block)
+			bi_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
+
+		variable_is_flat = !builtin_variable || is_block ||
+		                   bi_type == BuiltInPosition || bi_type == BuiltInPointSize ||
+		                   bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance;
+	}
+
+	if (variable_is_flat)
+	{
+		// If output is masked, it is emitted as a "normal" variable, just go through normal code paths.
+		// Only check this for the first level of access chain.
+		// Dealing with this for partial access chains should be possible, but awkward.
+		if (var->storage == StorageClassOutput && !ptr_is_chain)
+		{
+			bool masked = false;
+			if (is_block)
+			{
+				uint32_t relevant_member_index = patch ? 3 : 4;
+				// FIXME: This won't work properly if the application first access chains into gl_out element,
+				// then access chains into the member. Super weird, but theoretically possible ...
+				if (length > relevant_member_index)
+				{
+					uint32_t mbr_idx = get<SPIRConstant>(ops[relevant_member_index]).scalar();
+					masked = is_stage_output_block_member_masked(*var, mbr_idx, true);
+				}
+			}
+			else if (var)
+				masked = is_stage_output_variable_masked(*var);
+
+			if (masked)
+				return false;
+		}
+
 		AccessChainMeta meta;
 		SmallVector<uint32_t> indices;
-		uint32_t next_id = ir.increase_bound_by(2);
+		uint32_t next_id = ir.increase_bound_by(1);
 
 		indices.reserve(length - 3 + 1);
-		uint32_t type_id = next_id++;
-		SPIRType new_uint_type;
-		new_uint_type.basetype = SPIRType::UInt;
-		new_uint_type.width = 32;
-		set<SPIRType>(type_id, new_uint_type);
 
-		indices.push_back(ops[3]);
+		uint32_t first_non_array_index = (ptr_is_chain ? 3 : 4) - (patch ? 1 : 0);
+
+		VariableID stage_var_id;
+		if (patch)
+			stage_var_id = var->storage == StorageClassInput ? patch_stage_in_var_id : patch_stage_out_var_id;
+		else
+			stage_var_id = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id;
+
+		VariableID ptr = ptr_is_chain ? VariableID(ops[2]) : stage_var_id;
+		if (!ptr_is_chain && !patch)
+		{
+			// Index into gl_in/gl_out with first array index.
+			indices.push_back(ops[first_non_array_index - 1]);
+		}
+
+		auto &result_ptr_type = get<SPIRType>(ops[0]);
 
 		uint32_t const_mbr_id = next_id++;
 		uint32_t index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex);
-		uint32_t ptr = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id;
-		if (var->storage == StorageClassInput || has_decoration(get_variable_element_type(*var).self, DecorationBlock))
+
+		// If we have a pointer chain expression, and we are no longer pointing to a composite
+		// object, we are in the clear. There is no longer a need to flatten anything.
+		bool further_access_chain_is_trivial = false;
+		if (ptr_is_chain && flatten_composites)
 		{
-			uint32_t i = 4;
+			auto &ptr_type = expression_type(ptr);
+			if (!is_array(ptr_type) && !is_matrix(ptr_type) && ptr_type.basetype != SPIRType::Struct)
+				further_access_chain_is_trivial = true;
+		}
+
+		if (!further_access_chain_is_trivial && (flatten_composites || is_block))
+		{
+			uint32_t i = first_non_array_index;
 			auto *type = &get_variable_element_type(*var);
-			if (index == uint32_t(-1) && length >= 5)
+			if (index == uint32_t(-1) && length >= (first_non_array_index + 1))
 			{
 				// Maybe this is a struct type in the input class, in which case
 				// we put it as a decoration on the corresponding member.
-				index = get_extended_member_decoration(ops[2], get_constant(ops[4]).scalar(),
+				uint32_t mbr_idx = get_constant(ops[first_non_array_index]).scalar();
+				index = get_extended_member_decoration(var->self, mbr_idx,
 				                                       SPIRVCrossDecorationInterfaceMemberIndex);
 				assert(index != uint32_t(-1));
 				i++;
-				type = &get<SPIRType>(type->member_types[get_constant(ops[4]).scalar()]);
+				type = &get<SPIRType>(type->member_types[mbr_idx]);
 			}
-			// In this case, we flattened structures and arrays, so now we have to
+
+			// In this case, we're poking into flattened structures and arrays, so now we have to
 			// combine the following indices. If we encounter a non-constant index,
 			// we're hosed.
-			for (; i < length; ++i)
+			for (; flatten_composites && i < length; ++i)
 			{
 				if (!is_array(*type) && !is_matrix(*type) && type->basetype != SPIRType::Struct)
 					break;
 
-				auto &c = get_constant(ops[i]);
-				index += c.scalar();
+				auto *c = maybe_get<SPIRConstant>(ops[i]);
+				if (!c || c->specialization)
+					SPIRV_CROSS_THROW("Trying to dynamically index into an array interface variable in tessellation. "
+					                  "This is currently unsupported.");
+
+				// We're in flattened space, so just increment the member index into IO block.
+				// We can only do this once in the current implementation, so either:
+				// Struct, Matrix or 1-dimensional array for a control point.
+				if (type->basetype == SPIRType::Struct && var->storage == StorageClassOutput)
+				{
+					// Need to consider holes, since individual block members might be masked away.
+					uint32_t mbr_idx = c->scalar();
+					for (uint32_t j = 0; j < mbr_idx; j++)
+						if (!is_stage_output_block_member_masked(*var, j, true))
+							index++;
+				}
+				else
+					index += c->scalar();
+
 				if (type->parent_type)
 					type = &get<SPIRType>(type->parent_type);
 				else if (type->basetype == SPIRType::Struct)
-					type = &get<SPIRType>(type->member_types[c.scalar()]);
+					type = &get<SPIRType>(type->member_types[c->scalar()]);
 			}
-			// If the access chain terminates at a composite type, the composite
-			// itself might be copied. In that case, we must unflatten it.
-			if (is_matrix(*type) || is_array(*type) || type->basetype == SPIRType::Struct)
-			{
-				std::string temp_name = join(to_name(var->self), "_", ops[1]);
-				statement(variable_decl(*type, temp_name, var->self), ";");
-				// Set up the initializer for this temporary variable.
-				indices.push_back(const_mbr_id);
-				if (type->basetype == SPIRType::Struct)
-				{
-					for (uint32_t j = 0; j < type->member_types.size(); j++)
-					{
-						index = get_extended_member_decoration(ops[2], j, SPIRVCrossDecorationInterfaceMemberIndex);
-						const auto &mbr_type = get<SPIRType>(type->member_types[j]);
-						if (is_matrix(mbr_type))
-						{
-							for (uint32_t k = 0; k < mbr_type.columns; k++, index++)
-							{
-								set<SPIRConstant>(const_mbr_id, type_id, index, false);
-								auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr,
-								                      true);
-								statement(temp_name, ".", to_member_name(*type, j), "[", k, "] = ", e, ";");
-							}
-						}
-						else if (is_array(mbr_type))
-						{
-							for (uint32_t k = 0; k < mbr_type.array[0]; k++, index++)
-							{
-								set<SPIRConstant>(const_mbr_id, type_id, index, false);
-								auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr,
-								                      true);
-								statement(temp_name, ".", to_member_name(*type, j), "[", k, "] = ", e, ";");
-							}
-						}
-						else
-						{
-							set<SPIRConstant>(const_mbr_id, type_id, index, false);
-							auto e =
-							    access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr, true);
-							statement(temp_name, ".", to_member_name(*type, j), " = ", e, ";");
-						}
-					}
-				}
-				else if (is_matrix(*type))
-				{
-					for (uint32_t j = 0; j < type->columns; j++, index++)
-					{
-						set<SPIRConstant>(const_mbr_id, type_id, index, false);
-						auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), *type, nullptr, true);
-						statement(temp_name, "[", j, "] = ", e, ";");
-					}
-				}
-				else // Must be an array
-				{
-					assert(is_array(*type));
-					for (uint32_t j = 0; j < type->array[0]; j++, index++)
-					{
-						set<SPIRConstant>(const_mbr_id, type_id, index, false);
-						auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), *type, nullptr, true);
-						statement(temp_name, "[", j, "] = ", e, ";");
-					}
-				}
 
-				// This needs to be a variable instead of an expression so we don't
-				// try to dereference this as a variable pointer.
-				set<SPIRVariable>(ops[1], ops[0], var->storage);
-				ir.meta[ops[1]] = ir.meta[ops[2]];
-				set_name(ops[1], temp_name);
-				if (has_decoration(var->self, DecorationInvariant))
-					set_decoration(ops[1], DecorationInvariant);
-				for (uint32_t j = 2; j < length; j++)
-					inherit_expression_dependencies(ops[1], ops[j]);
-				return true;
-			}
-			else
+			// We're not going to emit the actual member name, we let any further OpLoad take care of that.
+			// Tag the access chain with the member index we're referencing.
+			bool defer_access_chain = flatten_composites && (is_matrix(result_ptr_type) || is_array(result_ptr_type) ||
+			                                                 result_ptr_type.basetype == SPIRType::Struct);
+
+			if (!defer_access_chain)
 			{
-				set<SPIRConstant>(const_mbr_id, type_id, index, false);
+				// Access the appropriate member of gl_in/gl_out.
+				set<SPIRConstant>(const_mbr_id, get_uint_type_id(), index, false);
 				indices.push_back(const_mbr_id);
 
+				// Member index is now irrelevant.
+				index = uint32_t(-1);
+
+				// Append any straggling access chain indices.
 				if (i < length)
 					indices.insert(indices.end(), ops + i, ops + length);
 			}
+			else
+			{
+				// We must have consumed the entire access chain if we're deferring it.
+				assert(i == length);
+			}
+
+			if (index != uint32_t(-1))
+				set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, index);
+			else
+				unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex);
 		}
 		else
 		{
-			assert(index != uint32_t(-1));
-			set<SPIRConstant>(const_mbr_id, type_id, index, false);
-			indices.push_back(const_mbr_id);
+			if (index != uint32_t(-1))
+			{
+				set<SPIRConstant>(const_mbr_id, get_uint_type_id(), index, false);
+				indices.push_back(const_mbr_id);
+			}
+
+			// Member index is now irrelevant.
+			index = uint32_t(-1);
+			unset_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex);
 
-			indices.insert(indices.end(), ops + 4, ops + length);
+			indices.insert(indices.end(), ops + first_non_array_index, ops + length);
 		}
 
 		// We use the pointer to the base of the input/output array here,
 		// so this is always a pointer chain.
-		auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), get<SPIRType>(ops[0]), &meta, true);
-		auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
+		string e;
+
+		if (!ptr_is_chain)
+		{
+			// This is the start of an access chain, use ptr_chain to index into control point array.
+			e = access_chain(ptr, indices.data(), uint32_t(indices.size()), result_ptr_type, &meta, !patch);
+		}
+		else
+		{
+			// If we're accessing a struct, we need to use member indices which are based on the IO block,
+			// not actual struct type, so we have to use a split access chain here where
+			// first path resolves the control point index, i.e. gl_in[index], and second half deals with
+			// looking up flattened member name.
+
+			// However, it is possible that we partially accessed a struct,
+			// by taking pointer to member inside the control-point array.
+			// For this case, we fall back to a natural access chain since we have already dealt with remapping struct members.
+			// One way to check this here is if we have 2 implied read expressions.
+			// First one is the gl_in/gl_out struct itself, then an index into that array.
+			// If we have traversed further, we use a normal access chain formulation.
+			auto *ptr_expr = maybe_get<SPIRExpression>(ptr);
+			bool split_access_chain_formulation = flatten_composites && ptr_expr &&
+			                                      ptr_expr->implied_read_expressions.size() == 2 &&
+			                                      !further_access_chain_is_trivial;
+
+			if (split_access_chain_formulation)
+			{
+				e = join(to_expression(ptr),
+				         access_chain_internal(stage_var_id, indices.data(), uint32_t(indices.size()),
+				                               ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta));
+			}
+			else
+			{
+				e = access_chain_internal(ptr, indices.data(), uint32_t(indices.size()), 0, &meta);
+			}
+		}
+
+		// Get the actual type of the object that was accessed. If it's a vector type and we changed it,
+		// then we'll need to add a swizzle.
+		// For this, we can't necessarily rely on the type of the base expression, because it might be
+		// another access chain, and it will therefore already have the "correct" type.
+		auto *expr_type = &get_variable_data_type(*var);
+		if (has_extended_decoration(ops[2], SPIRVCrossDecorationTessIOOriginalInputTypeID))
+			expr_type = &get<SPIRType>(get_extended_decoration(ops[2], SPIRVCrossDecorationTessIOOriginalInputTypeID));
+		for (uint32_t i = 3; i < length; i++)
+		{
+			if (!is_array(*expr_type) && expr_type->basetype == SPIRType::Struct)
+				expr_type = &get<SPIRType>(expr_type->member_types[get<SPIRConstant>(ops[i]).scalar()]);
+			else
+				expr_type = &get<SPIRType>(expr_type->parent_type);
+		}
+		if (!is_array(*expr_type) && !is_matrix(*expr_type) && expr_type->basetype != SPIRType::Struct &&
+		    expr_type->vecsize > result_ptr_type.vecsize)
+			e += vector_swizzle(result_ptr_type.vecsize, 0);
+
+		auto &expr = set<SPIRExpression>(ops[1], std::move(e), ops[0], should_forward(ops[2]));
 		expr.loaded_from = var->self;
 		expr.need_transpose = meta.need_transpose;
 		expr.access_chain = true;
 
 		// Mark the result as being packed if necessary.
 		if (meta.storage_is_packed)
-			set_extended_decoration(ops[1], SPIRVCrossDecorationPacked);
-		if (meta.storage_packed_type != 0)
-			set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type);
+			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
+		if (meta.storage_physical_type != 0)
+			set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
 		if (meta.storage_is_invariant)
 			set_decoration(ops[1], DecorationInvariant);
+		// Save the type we found in case the result is used in another access chain.
+		set_extended_decoration(ops[1], SPIRVCrossDecorationTessIOOriginalInputTypeID, expr_type->self);
+
+		// If we have some expression dependencies in our access chain, this access chain is technically a forwarded
+		// temporary which could be subject to invalidation.
+		// Need to assume we're forwarded while calling inherit_expression_depdendencies.
+		forwarded_temporaries.insert(ops[1]);
+		// The access chain itself is never forced to a temporary, but its dependencies might.
+		suppressed_usage_tracking.insert(ops[1]);
 
 		for (uint32_t i = 2; i < length; i++)
 		{
@@ -3707,6 +8118,11 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
 			add_implied_read_expression(expr, ops[i]);
 		}
 
+		// If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
+		// we're not forwarded after all.
+		if (expr.expression_dependencies.empty())
+			forwarded_temporaries.erase(ops[1]);
+
 		return true;
 	}
 
@@ -3716,9 +8132,9 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
 	// expression so we don't try to dereference it as a variable pointer.
 	// Don't do this if the index is a constant 1, though. We need to drop stores
 	// to that one.
-	auto *m = ir.find_meta(var ? var->self : 0);
-	if (get_execution_model() == ExecutionModelTessellationControl && var && m &&
-	    m->decoration.builtin_type == BuiltInTessLevelInner && get_entry_point().flags.get(ExecutionModeTriangles))
+	auto *m = ir.find_meta(var ? var->self : ID(0));
+	if (is_tesc_shader() && var && m && m->decoration.builtin_type == BuiltInTessLevelInner &&
+	    is_tessellating_triangles())
 	{
 		auto *c = maybe_get<SPIRConstant>(ops[3]);
 		if (c && c->scalar() == 1)
@@ -3735,7 +8151,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l
 
 bool CompilerMSL::is_out_of_bounds_tessellation_level(uint32_t id_lhs)
 {
-	if (!get_entry_point().flags.get(ExecutionModeTriangles))
+	if (!is_tessellating_triangles())
 		return false;
 
 	// In SPIR-V, TessLevelInner always has two elements and TessLevelOuter always has
@@ -3745,7 +8161,7 @@ bool CompilerMSL::is_out_of_bounds_tessellation_level(uint32_t id_lhs)
 	// In Metal, however, only the first element of TessLevelInner and the first three
 	// of TessLevelOuter are accessible. This stems from how in Metal, the tessellation
 	// levels must be stored to a dedicated buffer in a particular format that depends
-	// on the patch type. Therefore, in Triangles mode, any access to the second
+	// on the patch type. Therefore, in Triangles mode, any store to the second
 	// inner level or the fourth outer level must be dropped.
 	const auto *e = maybe_get<SPIRExpression>(id_lhs);
 	if (!e || !e->access_chain)
@@ -3760,12 +8176,117 @@ bool CompilerMSL::is_out_of_bounds_tessellation_level(uint32_t id_lhs)
 	       (builtin == BuiltInTessLevelOuter && c->scalar() == 3);
 }
 
+void CompilerMSL::prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
+                                                         spv::StorageClass storage, bool &is_packed)
+{
+	// If there is any risk of writes happening with the access chain in question,
+	// and there is a risk of concurrent write access to other components,
+	// we must cast the access chain to a plain pointer to ensure we only access the exact scalars we expect.
+	// The MSL compiler refuses to allow component-level access for any non-packed vector types.
+	if (!is_packed && (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup))
+	{
+		const char *addr_space = storage == StorageClassWorkgroup ? "threadgroup" : "device";
+		expr = join("((", addr_space, " ", type_to_glsl(type), "*)&", enclose_expression(expr), ")");
+
+		// Further indexing should happen with packed rules (array index, not swizzle).
+		is_packed = true;
+	}
+}
+
+bool CompilerMSL::access_chain_needs_stage_io_builtin_translation(uint32_t base)
+{
+	auto *var = maybe_get_backing_variable(base);
+	if (!var || !is_tessellation_shader())
+		return true;
+
+	// We only need to rewrite builtin access chains when accessing flattened builtins like gl_ClipDistance_N.
+	// Avoid overriding it back to just gl_ClipDistance.
+	// This can only happen in scenarios where we cannot flatten/unflatten access chains, so, the only case
+	// where this triggers is evaluation shader inputs.
+	bool redirect_builtin = is_tese_shader() ? var->storage == StorageClassOutput : false;
+	return redirect_builtin;
+}
+
+// Sets the interface member index for an access chain to a pull-model interpolant.
+void CompilerMSL::fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length)
+{
+	auto *var = maybe_get_backing_variable(ops[2]);
+	if (!var || !pull_model_inputs.count(var->self))
+		return;
+	// Get the base index.
+	uint32_t interface_index;
+	auto &var_type = get_variable_data_type(*var);
+	auto &result_type = get<SPIRType>(ops[0]);
+	auto *type = &var_type;
+	if (has_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex))
+	{
+		interface_index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex);
+	}
+	else
+	{
+		// Assume an access chain into a struct variable.
+		assert(var_type.basetype == SPIRType::Struct);
+		auto &c = get<SPIRConstant>(ops[3 + var_type.array.size()]);
+		interface_index =
+		    get_extended_member_decoration(var->self, c.scalar(), SPIRVCrossDecorationInterfaceMemberIndex);
+	}
+	// Accumulate indices. We'll have to skip over the one for the struct, if present, because we already accounted
+	// for that getting the base index.
+	for (uint32_t i = 3; i < length; ++i)
+	{
+		if (is_vector(*type) && !is_array(*type) && is_scalar(result_type))
+		{
+			// We don't want to combine the next index. Actually, we need to save it
+			// so we know to apply a swizzle to the result of the interpolation.
+			set_extended_decoration(ops[1], SPIRVCrossDecorationInterpolantComponentExpr, ops[i]);
+			break;
+		}
+
+		auto *c = maybe_get<SPIRConstant>(ops[i]);
+		if (!c || c->specialization)
+			SPIRV_CROSS_THROW("Trying to dynamically index into an array interface variable using pull-model "
+			                  "interpolation. This is currently unsupported.");
+
+		if (type->parent_type)
+			type = &get<SPIRType>(type->parent_type);
+		else if (type->basetype == SPIRType::Struct)
+			type = &get<SPIRType>(type->member_types[c->scalar()]);
+
+		if (!has_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex) &&
+		    i - 3 == var_type.array.size())
+			continue;
+
+		interface_index += c->scalar();
+	}
+	// Save this to the access chain itself so we can recover it later when calling an interpolation function.
+	set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, interface_index);
+}
+
+
+// If the physical type of a physical buffer pointer has been changed
+// to a ulong or ulongn vector, add a cast back to the pointer type.
+void CompilerMSL::check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type)
+{
+	auto *p_physical_type = maybe_get<SPIRType>(physical_type);
+	if (p_physical_type &&
+		p_physical_type->storage == StorageClassPhysicalStorageBuffer &&
+		p_physical_type->basetype == to_unsigned_basetype(64))
+	{
+		if (p_physical_type->vecsize > 1)
+			expr += ".x";
+
+		expr = join("((", type_to_glsl(*type), ")", expr, ")");
+	}
+}
+
 // Override for MSL-specific syntax instructions
 void CompilerMSL::emit_instruction(const Instruction &instruction)
 {
 #define MSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
+#define MSL_PTR_BOP(op) emit_binary_ptr_op(ops[0], ops[1], ops[2], ops[3], #op)
+	// MSL does care about implicit integer promotion, but those cases are all handled in common code.
 #define MSL_BOP_CAST(op, type) \
-	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
+	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode), false)
 #define MSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
 #define MSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
 #define MSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
@@ -3778,6 +8299,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 	auto ops = stream(instruction);
 	auto opcode = static_cast<Op>(instruction.op);
 
+	opcode = get_remapped_spirv_op(opcode);
+
 	// If we need to do implicit bitcasts, make sure we do it with the correct type.
 	uint32_t integer_width = get_integer_width_for_instruction(instruction);
 	auto int_type = to_signed_basetype(integer_width);
@@ -3785,6 +8308,24 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 
 	switch (opcode)
 	{
+	case OpLoad:
+	{
+		uint32_t id = ops[1];
+		uint32_t ptr = ops[2];
+		if (is_tessellation_shader())
+		{
+			if (!emit_tessellation_io_load(ops[0], id, ptr))
+				CompilerGLSL::emit_instruction(instruction);
+		}
+		else
+		{
+			// Sample mask input for Metal is not an array
+			if (BuiltIn(get_decoration(ptr, DecorationBuiltIn)) == BuiltInSampleMask)
+				set_decoration(id, DecorationBuiltIn, BuiltInSampleMask);
+			CompilerGLSL::emit_instruction(instruction);
+		}
+		break;
+	}
 
 	// Comparisons
 	case OpIEqual:
@@ -3802,6 +8343,10 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 
 	case OpLogicalNotEqual:
 	case OpFOrdNotEqual:
+		// TODO: Should probably negate the == result here.
+		// Typically OrdNotEqual comes from GLSL which itself does not really specify what
+		// happens with NaN.
+		// Consider fixing this if we run into real issues.
 		MSL_BOP(!=);
 		break;
 
@@ -3858,7 +8403,9 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		break;
 
 	case OpFUnordNotEqual:
-		MSL_UNORD_BOP(!=);
+		// not equal in MSL generates une opcodes to begin with.
+		// Since unordered not equal is how it works in C, just inherit that behavior.
+		MSL_BOP(!=);
 		break;
 
 	case OpFUnordGreaterThan:
@@ -3877,6 +8424,19 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		MSL_UNORD_BOP(<=);
 		break;
 
+	// Pointer math
+	case OpPtrEqual:
+		MSL_PTR_BOP(==);
+		break;
+
+	case OpPtrNotEqual:
+		MSL_PTR_BOP(!=);
+		break;
+
+	case OpPtrDiff:
+		MSL_PTR_BOP(-);
+		break;
+
 	// Derivatives
 	case OpDPdx:
 	case OpDPdxFine:
@@ -3901,26 +8461,62 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 
 	// Bitfield
 	case OpBitFieldInsert:
-		MSL_QFOP(insert_bits);
+	{
+		emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "insert_bits", SPIRType::UInt);
 		break;
+	}
 
 	case OpBitFieldSExtract:
+	{
+		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", int_type, int_type,
+		                                SPIRType::UInt, SPIRType::UInt);
+		break;
+	}
+
 	case OpBitFieldUExtract:
-		MSL_TFOP(extract_bits);
+	{
+		emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", uint_type, uint_type,
+		                                SPIRType::UInt, SPIRType::UInt);
 		break;
+	}
 
 	case OpBitReverse:
+		// BitReverse does not have issues with sign since result type must match input type.
 		MSL_UFOP(reverse_bits);
 		break;
 
 	case OpBitCount:
-		MSL_UFOP(popcount);
+	{
+		auto basetype = expression_type(ops[2]).basetype;
+		emit_unary_func_op_cast(ops[0], ops[1], ops[2], "popcount", basetype, basetype);
 		break;
+	}
 
 	case OpFRem:
 		MSL_BFOP(fmod);
 		break;
 
+	case OpFMul:
+		if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
+			MSL_BFOP(spvFMul);
+		else
+			MSL_BOP(*);
+		break;
+
+	case OpFAdd:
+		if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
+			MSL_BFOP(spvFAdd);
+		else
+			MSL_BOP(+);
+		break;
+
+	case OpFSub:
+		if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
+			MSL_BFOP(spvFSub);
+		else
+			MSL_BOP(-);
+		break;
+
 	// Atomics
 	case OpAtomicExchange:
 	{
@@ -3929,7 +8525,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		uint32_t ptr = ops[2];
 		uint32_t mem_sem = ops[4];
 		uint32_t val = ops[5];
-		emit_atomic_func_op(result_type, id, "atomic_exchange_explicit", mem_sem, mem_sem, false, ptr, val);
+		emit_atomic_func_op(result_type, id, "atomic_exchange_explicit", opcode, mem_sem, mem_sem, false, ptr, val);
 		break;
 	}
 
@@ -3942,7 +8538,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		uint32_t mem_sem_fail = ops[5];
 		uint32_t val = ops[6];
 		uint32_t comp = ops[7];
-		emit_atomic_func_op(result_type, id, "atomic_compare_exchange_weak_explicit", mem_sem_pass, mem_sem_fail, true,
+		emit_atomic_func_op(result_type, id, "atomic_compare_exchange_weak_explicit", opcode,
+		                    mem_sem_pass, mem_sem_fail, true,
 		                    ptr, comp, true, false, val);
 		break;
 	}
@@ -3956,7 +8553,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		uint32_t id = ops[1];
 		uint32_t ptr = ops[2];
 		uint32_t mem_sem = ops[4];
-		emit_atomic_func_op(result_type, id, "atomic_load_explicit", mem_sem, mem_sem, false, ptr, 0);
+		emit_atomic_func_op(result_type, id, "atomic_load_explicit", opcode, mem_sem, mem_sem, false, ptr, 0);
 		break;
 	}
 
@@ -3967,7 +8564,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		uint32_t ptr = ops[0];
 		uint32_t mem_sem = ops[2];
 		uint32_t val = ops[3];
-		emit_atomic_func_op(result_type, id, "atomic_store_explicit", mem_sem, mem_sem, false, ptr, val);
+		emit_atomic_func_op(result_type, id, "atomic_store_explicit", opcode, mem_sem, mem_sem, false, ptr, val);
 		break;
 	}
 
@@ -3979,7 +8576,8 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		uint32_t ptr = ops[2];                                                                                   \
 		uint32_t mem_sem = ops[4];                                                                               \
 		uint32_t val = valsrc;                                                                                   \
-		emit_atomic_func_op(result_type, id, "atomic_fetch_" #op "_explicit", mem_sem, mem_sem, false, ptr, val, \
+		emit_atomic_func_op(result_type, id, "atomic_fetch_" #op "_explicit", opcode,                            \
+		                    mem_sem, mem_sem, false, ptr, val,                                                   \
 		                    false, valconst);                                                                    \
 	} while (false)
 
@@ -4042,7 +8640,42 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 			}
 		}
 
-		emit_texture_op(instruction);
+		emit_texture_op(instruction, false);
+		break;
+	}
+
+	// Emulate texture2D atomic operations
+	case OpImageTexelPointer:
+	{
+		// When using the pointer, we need to know which variable it is actually loaded from.
+		auto *var = maybe_get_backing_variable(ops[2]);
+		if (var && atomic_image_vars.count(var->self))
+		{
+			uint32_t result_type = ops[0];
+			uint32_t id = ops[1];
+
+			std::string coord = to_expression(ops[3]);
+			auto &type = expression_type(ops[2]);
+			if (type.image.dim == Dim2D)
+			{
+				coord = join("spvImage2DAtomicCoord(", coord, ", ", to_expression(ops[2]), ")");
+			}
+
+			auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), "_atomic[", coord, "]"), result_type, true);
+			e.loaded_from = var ? var->self : ID(0);
+			inherit_expression_dependencies(id, ops[3]);
+		}
+		else
+		{
+			uint32_t result_type = ops[0];
+			uint32_t id = ops[1];
+			auto &e =
+			    set<SPIRExpression>(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true);
+
+			// When using the pointer, we need to know which variable it is actually loaded from.
+			e.loaded_from = var ? var->self : ID(0);
+			inherit_expression_dependencies(id, ops[3]);
+		}
 		break;
 	}
 
@@ -4093,11 +8726,22 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		auto store_type = texel_type;
 		store_type.vecsize = 4;
 
-		statement(join(to_expression(img_id), ".write(",
-		               remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)), ", ",
-		               to_function_args(img_id, img_type, true, false, false, coord_id, 0, 0, 0, 0, lod, 0, 0, 0, 0, 0,
-		                                0, &forward),
-		               ");"));
+		TextureFunctionArguments args = {};
+		args.base.img = img_id;
+		args.base.imgtype = &img_type;
+		args.base.is_fetch = true;
+		args.coord = coord_id;
+		args.lod = lod;
+
+		string expr;
+		if (needs_frag_discard_checks())
+			expr = join("(", builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), " ? ((void)0) : ");
+		expr += join(to_expression(img_id), ".write(",
+		             remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)), ", ",
+		             CompilerMSL::to_function_args(args, &forward), ")");
+		if (needs_frag_discard_checks())
+			expr += ")";
+		statement(expr, ";");
 
 		if (p_var && variable_storage_is_aliased(*p_var))
 			flush_all_aliased_variables();
@@ -4141,7 +8785,11 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 			expr += ", " + img_exp + ".get_depth(" + lod + ")";
 
 		if (img_is_array)
+		{
 			expr += ", " + img_exp + ".get_array_size()";
+			if (img_dim == DimCube && msl_options.emulate_cube_array)
+				expr += " / 6";
+		}
 
 		expr += ")";
 
@@ -4211,45 +8859,25 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		}
 		else
 		{
-			auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
 			auto *var = maybe_get_backing_variable(ops[2]);
+			SPIRExpression *e;
+			if (var && has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler))
+				e = &emit_op(result_type, id, join(to_expression(ops[2]), ".plane0"), true, true);
+			else
+				e = &emit_op(result_type, id, to_expression(ops[2]), true, true);
 			if (var)
-				e.loaded_from = var->self;
+				e->loaded_from = var->self;
 		}
 		break;
 	}
 
-	case OpImageTexelPointer:
-		SPIRV_CROSS_THROW("MSL does not support atomic operations on images or texel buffers.");
-
 	// Casting
 	case OpQuantizeToF16:
 	{
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
 		uint32_t arg = ops[2];
-
-		string exp;
-		auto &type = get<SPIRType>(result_type);
-
-		switch (type.vecsize)
-		{
-		case 1:
-			exp = join("float(half(", to_expression(arg), "))");
-			break;
-		case 2:
-			exp = join("float2(half2(", to_expression(arg), "))");
-			break;
-		case 3:
-			exp = join("float3(half3(", to_expression(arg), "))");
-			break;
-		case 4:
-			exp = join("float4(half4(", to_expression(arg), "))");
-			break;
-		default:
-			SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
-		}
-
+		string exp = join("spvQuantizeToF16(", to_expression(arg), ")");
 		emit_op(result_type, id, exp, should_forward(arg));
 		break;
 	}
@@ -4264,17 +8892,38 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		}
 		else
 			CompilerGLSL::emit_instruction(instruction);
+		fix_up_interpolant_access_chain(ops, instruction.length);
 		break;
 
 	case OpStore:
-		if (is_out_of_bounds_tessellation_level(ops[0]))
-			break;
+	{
+		const auto &type = expression_type(ops[0]);
 
-		if (maybe_emit_array_assignment(ops[0], ops[1]))
+		if (is_out_of_bounds_tessellation_level(ops[0]))
 			break;
 
-		CompilerGLSL::emit_instruction(instruction);
+		if (needs_frag_discard_checks() &&
+		    (type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform))
+		{
+			// If we're in a continue block, this kludge will make the block too complex
+			// to emit normally.
+			assert(current_emitting_block);
+			auto cont_type = continue_block_type(*current_emitting_block);
+			if (cont_type != SPIRBlock::ContinueNone && cont_type != SPIRBlock::ComplexLoop)
+			{
+				current_emitting_block->complex_continue = true;
+				force_recompile();
+			}
+			statement("if (!", builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), ")");
+			begin_scope();
+		}
+		if (!maybe_emit_array_assignment(ops[0], ops[1]))
+			CompilerGLSL::emit_instruction(instruction);
+		if (needs_frag_discard_checks() &&
+		    (type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform))
+			end_scope();
 		break;
+	}
 
 	// Compute barriers
 	case OpMemoryBarrier:
@@ -4289,25 +8938,6 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 			emit_barrier(ops[0], ops[1], ops[2]);
 		break;
 
-	case OpVectorTimesMatrix:
-	case OpMatrixTimesVector:
-	{
-		// If the matrix needs transpose and it is square or packed, just flip the multiply order.
-		uint32_t mtx_id = ops[opcode == OpMatrixTimesVector ? 2 : 3];
-		auto *e = maybe_get<SPIRExpression>(mtx_id);
-		auto &t = expression_type(mtx_id);
-		bool is_packed = has_extended_decoration(mtx_id, SPIRVCrossDecorationPacked);
-		if (e && e->need_transpose && (t.columns == t.vecsize || is_packed))
-		{
-			e->need_transpose = false;
-			emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
-			e->need_transpose = true;
-		}
-		else
-			MSL_BOP(*);
-		break;
-	}
-
 	case OpOuterProduct:
 	{
 		uint32_t result_type = ops[0];
@@ -4320,7 +8950,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		expr += "(";
 		for (uint32_t col = 0; col < type.columns; col++)
 		{
-			expr += to_enclosed_expression(a);
+			expr += to_enclosed_unpacked_expression(a);
 			expr += " * ";
 			expr += to_extract_component_expression(b, col);
 			if (col + 1 < type.columns)
@@ -4333,6 +8963,85 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		break;
 	}
 
+	case OpVectorTimesMatrix:
+	case OpMatrixTimesVector:
+	{
+		if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
+		{
+			CompilerGLSL::emit_instruction(instruction);
+			break;
+		}
+
+		// If the matrix needs transpose, just flip the multiply order.
+		auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
+		if (e && e->need_transpose)
+		{
+			e->need_transpose = false;
+			string expr;
+
+			if (opcode == OpMatrixTimesVector)
+			{
+				expr = join("spvFMulVectorMatrix(", to_enclosed_unpacked_expression(ops[3]), ", ",
+				            to_unpacked_row_major_matrix_expression(ops[2]), ")");
+			}
+			else
+			{
+				expr = join("spvFMulMatrixVector(", to_unpacked_row_major_matrix_expression(ops[3]), ", ",
+				            to_enclosed_unpacked_expression(ops[2]), ")");
+			}
+
+			bool forward = should_forward(ops[2]) && should_forward(ops[3]);
+			emit_op(ops[0], ops[1], expr, forward);
+			e->need_transpose = true;
+			inherit_expression_dependencies(ops[1], ops[2]);
+			inherit_expression_dependencies(ops[1], ops[3]);
+		}
+		else
+		{
+			if (opcode == OpMatrixTimesVector)
+				MSL_BFOP(spvFMulMatrixVector);
+			else
+				MSL_BFOP(spvFMulVectorMatrix);
+		}
+		break;
+	}
+
+	case OpMatrixTimesMatrix:
+	{
+		if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
+		{
+			CompilerGLSL::emit_instruction(instruction);
+			break;
+		}
+
+		auto *a = maybe_get<SPIRExpression>(ops[2]);
+		auto *b = maybe_get<SPIRExpression>(ops[3]);
+
+		// If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
+		// a^T * b^T = (b * a)^T.
+		if (a && b && a->need_transpose && b->need_transpose)
+		{
+			a->need_transpose = false;
+			b->need_transpose = false;
+
+			auto expr =
+			    join("spvFMulMatrixMatrix(", enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), ", ",
+			         enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])), ")");
+
+			bool forward = should_forward(ops[2]) && should_forward(ops[3]);
+			auto &e = emit_op(ops[0], ops[1], expr, forward);
+			e.need_transpose = true;
+			a->need_transpose = true;
+			b->need_transpose = true;
+			inherit_expression_dependencies(ops[1], ops[2]);
+			inherit_expression_dependencies(ops[1], ops[3]);
+		}
+		else
+			MSL_BFOP(spvFMulMatrixMatrix);
+
+		break;
+	}
+
 	case OpIAddCarry:
 	case OpISubBorrow:
 	{
@@ -4340,27 +9049,25 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		uint32_t result_id = ops[1];
 		uint32_t op0 = ops[2];
 		uint32_t op1 = ops[3];
-		forced_temporaries.insert(result_id);
 		auto &type = get<SPIRType>(result_type);
-		statement(variable_decl(type, to_name(result_id)), ";");
-		set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
+		emit_uninitialized_temporary_expression(result_type, result_id);
 
 		auto &res_type = get<SPIRType>(type.member_types[1]);
 		if (opcode == OpIAddCarry)
 		{
-			statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " + ",
-			          to_enclosed_expression(op1), ";");
+			statement(to_expression(result_id), ".", to_member_name(type, 0), " = ",
+					  to_enclosed_unpacked_expression(op0), " + ", to_enclosed_unpacked_expression(op1), ";");
 			statement(to_expression(result_id), ".", to_member_name(type, 1), " = select(", type_to_glsl(res_type),
-			          "(1), ", type_to_glsl(res_type), "(0), ", to_expression(result_id), ".", to_member_name(type, 0),
-			          " >= max(", to_expression(op0), ", ", to_expression(op1), "));");
+			          "(1), ", type_to_glsl(res_type), "(0), ", to_unpacked_expression(result_id), ".", to_member_name(type, 0),
+			          " >= max(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "));");
 		}
 		else
 		{
-			statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " - ",
-			          to_enclosed_expression(op1), ";");
+			statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_unpacked_expression(op0), " - ",
+			          to_enclosed_unpacked_expression(op1), ";");
 			statement(to_expression(result_id), ".", to_member_name(type, 1), " = select(", type_to_glsl(res_type),
-			          "(1), ", type_to_glsl(res_type), "(0), ", to_enclosed_expression(op0),
-			          " >= ", to_enclosed_expression(op1), ");");
+			          "(1), ", type_to_glsl(res_type), "(0), ", to_enclosed_unpacked_expression(op0),
+			          " >= ", to_enclosed_unpacked_expression(op1), ");");
 		}
 		break;
 	}
@@ -4372,15 +9079,34 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		uint32_t result_id = ops[1];
 		uint32_t op0 = ops[2];
 		uint32_t op1 = ops[3];
-		forced_temporaries.insert(result_id);
 		auto &type = get<SPIRType>(result_type);
-		statement(variable_decl(type, to_name(result_id)), ";");
-		set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
+		auto input_type = opcode == OpSMulExtended ? int_type : uint_type;
+		auto &output_type = get_type(result_type);
+		string cast_op0, cast_op1;
+
+		auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, false);
+
+		emit_uninitialized_temporary_expression(result_type, result_id);
+
+		string mullo_expr, mulhi_expr;
+		mullo_expr = join(cast_op0, " * ", cast_op1);
+		mulhi_expr = join("mulhi(", cast_op0, ", ", cast_op1, ")");
+
+		auto &low_type = get_type(output_type.member_types[0]);
+		auto &high_type = get_type(output_type.member_types[1]);
+		if (low_type.basetype != input_type)
+		{
+			expected_type.basetype = input_type;
+			mullo_expr = join(bitcast_glsl_op(low_type, expected_type), "(", mullo_expr, ")");
+		}
+		if (high_type.basetype != input_type)
+		{
+			expected_type.basetype = input_type;
+			mulhi_expr = join(bitcast_glsl_op(high_type, expected_type), "(", mulhi_expr, ")");
+		}
 
-		statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " * ",
-		          to_enclosed_expression(op1), ";");
-		statement(to_expression(result_id), ".", to_member_name(type, 1), " = mulhi(", to_expression(op0), ", ",
-		          to_expression(op1), ");");
+		statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", mullo_expr, ";");
+		statement(to_expression(result_id), ".", to_member_name(type, 1), " = ", mulhi_expr, ";");
 		break;
 	}
 
@@ -4395,6 +9121,208 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		break;
 	}
 
+	// SPV_INTEL_shader_integer_functions2
+	case OpUCountLeadingZerosINTEL:
+		MSL_UFOP(clz);
+		break;
+
+	case OpUCountTrailingZerosINTEL:
+		MSL_UFOP(ctz);
+		break;
+
+	case OpAbsISubINTEL:
+	case OpAbsUSubINTEL:
+		MSL_BFOP(absdiff);
+		break;
+
+	case OpIAddSatINTEL:
+	case OpUAddSatINTEL:
+		MSL_BFOP(addsat);
+		break;
+
+	case OpIAverageINTEL:
+	case OpUAverageINTEL:
+		MSL_BFOP(hadd);
+		break;
+
+	case OpIAverageRoundedINTEL:
+	case OpUAverageRoundedINTEL:
+		MSL_BFOP(rhadd);
+		break;
+
+	case OpISubSatINTEL:
+	case OpUSubSatINTEL:
+		MSL_BFOP(subsat);
+		break;
+
+	case OpIMul32x16INTEL:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t a = ops[2], b = ops[3];
+		bool forward = should_forward(a) && should_forward(b);
+		emit_op(result_type, id, join("int(short(", to_unpacked_expression(a), ")) * int(short(", to_unpacked_expression(b), "))"), forward);
+		inherit_expression_dependencies(id, a);
+		inherit_expression_dependencies(id, b);
+		break;
+	}
+
+	case OpUMul32x16INTEL:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t a = ops[2], b = ops[3];
+		bool forward = should_forward(a) && should_forward(b);
+		emit_op(result_type, id, join("uint(ushort(", to_unpacked_expression(a), ")) * uint(ushort(", to_unpacked_expression(b), "))"), forward);
+		inherit_expression_dependencies(id, a);
+		inherit_expression_dependencies(id, b);
+		break;
+	}
+
+	// SPV_EXT_demote_to_helper_invocation
+	case OpDemoteToHelperInvocationEXT:
+		if (!msl_options.supports_msl_version(2, 3))
+			SPIRV_CROSS_THROW("discard_fragment() does not formally have demote semantics until MSL 2.3.");
+		CompilerGLSL::emit_instruction(instruction);
+		break;
+
+	case OpIsHelperInvocationEXT:
+		if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
+			SPIRV_CROSS_THROW("simd_is_helper_thread() requires MSL 2.3 on iOS.");
+		else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
+			SPIRV_CROSS_THROW("simd_is_helper_thread() requires MSL 2.1 on macOS.");
+		emit_op(ops[0], ops[1],
+		        needs_manual_helper_invocation_updates() ? builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput) :
+		                                                   "simd_is_helper_thread()",
+		        false);
+		break;
+
+	case OpBeginInvocationInterlockEXT:
+	case OpEndInvocationInterlockEXT:
+		if (!msl_options.supports_msl_version(2, 0))
+			SPIRV_CROSS_THROW("Raster order groups require MSL 2.0.");
+		break; // Nothing to do in the body
+
+	case OpConvertUToAccelerationStructureKHR:
+		SPIRV_CROSS_THROW("ConvertUToAccelerationStructure is not supported in MSL.");
+	case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR:
+		SPIRV_CROSS_THROW("BindingTableRecordOffset is not supported in MSL.");
+
+	case OpRayQueryInitializeKHR:
+	{
+		flush_variable_declaration(ops[0]);
+
+		statement(to_expression(ops[0]), ".reset(", "ray(", to_expression(ops[4]), ", ", to_expression(ops[6]), ", ",
+		          to_expression(ops[5]), ", ", to_expression(ops[7]), "), ", to_expression(ops[1]),
+		          ", intersection_params());");
+		break;
+	}
+	case OpRayQueryProceedKHR:
+	{
+		flush_variable_declaration(ops[0]);
+		emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".next()"), false);
+		break;
+	}
+#define MSL_RAY_QUERY_IS_CANDIDATE get<SPIRConstant>(ops[3]).scalar_i32() == 0
+
+#define MSL_RAY_QUERY_GET_OP(op, msl_op)                                                   \
+	case OpRayQueryGet##op##KHR:                                                           \
+		flush_variable_declaration(ops[2]);                                                \
+		emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".get_" #msl_op "()"), false); \
+		break
+
+#define MSL_RAY_QUERY_OP_INNER2(op, msl_prefix, msl_op)                                                          \
+	case OpRayQueryGet##op##KHR:                                                                                 \
+		flush_variable_declaration(ops[2]);                                                                      \
+		if (MSL_RAY_QUERY_IS_CANDIDATE)                                                                          \
+			emit_op(ops[0], ops[1], join(to_expression(ops[2]), #msl_prefix "_candidate_" #msl_op "()"), false); \
+		else                                                                                                     \
+			emit_op(ops[0], ops[1], join(to_expression(ops[2]), #msl_prefix "_committed_" #msl_op "()"), false); \
+		break
+
+#define MSL_RAY_QUERY_GET_OP2(op, msl_op) MSL_RAY_QUERY_OP_INNER2(op, .get, msl_op)
+#define MSL_RAY_QUERY_IS_OP2(op, msl_op) MSL_RAY_QUERY_OP_INNER2(op, .is, msl_op)
+
+		MSL_RAY_QUERY_GET_OP(RayTMin, ray_min_distance);
+		MSL_RAY_QUERY_GET_OP(WorldRayOrigin, world_space_ray_origin);
+		MSL_RAY_QUERY_GET_OP(WorldRayDirection, world_space_ray_direction);
+		MSL_RAY_QUERY_GET_OP2(IntersectionInstanceId, instance_id);
+		MSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex, user_instance_id);
+		MSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics, triangle_barycentric_coord);
+		MSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex, primitive_id);
+		MSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex, geometry_id);
+		MSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin, ray_origin);
+		MSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection, ray_direction);
+		MSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld, object_to_world_transform);
+		MSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject, world_to_object_transform);
+		MSL_RAY_QUERY_IS_OP2(IntersectionFrontFace, triangle_front_facing);
+
+	case OpRayQueryGetIntersectionTypeKHR:
+		flush_variable_declaration(ops[2]);
+		if (MSL_RAY_QUERY_IS_CANDIDATE)
+			emit_op(ops[0], ops[1], join("uint(", to_expression(ops[2]), ".get_candidate_intersection_type()) - 1"),
+			        false);
+		else
+			emit_op(ops[0], ops[1], join("uint(", to_expression(ops[2]), ".get_committed_intersection_type())"), false);
+		break;
+	case OpRayQueryGetIntersectionTKHR:
+		flush_variable_declaration(ops[2]);
+		if (MSL_RAY_QUERY_IS_CANDIDATE)
+			emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".get_candidate_triangle_distance()"), false);
+		else
+			emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".get_committed_distance()"), false);
+		break;
+	case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR:
+	{
+		flush_variable_declaration(ops[0]);
+		emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".is_candidate_non_opaque_bounding_box()"), false);
+		break;
+	}
+	case OpRayQueryConfirmIntersectionKHR:
+		flush_variable_declaration(ops[0]);
+		statement(to_expression(ops[0]), ".commit_triangle_intersection();");
+		break;
+	case OpRayQueryGenerateIntersectionKHR:
+		flush_variable_declaration(ops[0]);
+		statement(to_expression(ops[0]), ".commit_bounding_box_intersection(", to_expression(ops[1]), ");");
+		break;
+	case OpRayQueryTerminateKHR:
+		flush_variable_declaration(ops[0]);
+		statement(to_expression(ops[0]), ".abort();");
+		break;
+#undef MSL_RAY_QUERY_GET_OP
+#undef MSL_RAY_QUERY_IS_CANDIDATE
+#undef MSL_RAY_QUERY_IS_OP2
+#undef MSL_RAY_QUERY_GET_OP2
+#undef MSL_RAY_QUERY_OP_INNER2
+
+	case OpConvertPtrToU:
+	case OpConvertUToPtr:
+	case OpBitcast:
+	{
+		auto &type = get<SPIRType>(ops[0]);
+		auto &input_type = expression_type(ops[2]);
+
+		if (opcode != OpBitcast || type.pointer || input_type.pointer)
+		{
+			string op;
+
+			if (type.vecsize == 1 && input_type.vecsize == 1)
+				op = join("reinterpret_cast<", type_to_glsl(type), ">(", to_unpacked_expression(ops[2]), ")");
+			else if (input_type.vecsize == 2)
+				op = join("reinterpret_cast<", type_to_glsl(type), ">(as_type<ulong>(", to_unpacked_expression(ops[2]), "))");
+			else
+				op = join("as_type<", type_to_glsl(type), ">(reinterpret_cast<ulong>(", to_unpacked_expression(ops[2]), "))");
+
+			emit_op(ops[0], ops[1], op, should_forward(ops[2]));
+			inherit_expression_dependencies(ops[1], ops[2]);
+		}
+		else
+			CompilerGLSL::emit_instruction(instruction);
+
+		break;
+	}
+
 	default:
 		CompilerGLSL::emit_instruction(instruction);
 		break;
@@ -4403,16 +9331,51 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 	previous_instruction_opcode = opcode;
 }
 
+void CompilerMSL::emit_texture_op(const Instruction &i, bool sparse)
+{
+	if (sparse)
+		SPIRV_CROSS_THROW("Sparse feedback not yet supported in MSL.");
+
+	if (msl_options.use_framebuffer_fetch_subpasses)
+	{
+		auto *ops = stream(i);
+
+		uint32_t result_type_id = ops[0];
+		uint32_t id = ops[1];
+		uint32_t img = ops[2];
+
+		auto &type = expression_type(img);
+		auto &imgtype = get<SPIRType>(type.self);
+
+		// Use Metal's native frame-buffer fetch API for subpass inputs.
+		if (imgtype.image.dim == DimSubpassData)
+		{
+			// Subpass inputs cannot be invalidated,
+			// so just forward the expression directly.
+			string expr = to_expression(img);
+			emit_op(result_type_id, id, expr, true);
+			return;
+		}
+	}
+
+	// Fallback to default implementation
+	CompilerGLSL::emit_texture_op(i, sparse);
+}
+
 void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem)
 {
-	if (get_execution_model() != ExecutionModelGLCompute && get_execution_model() != ExecutionModelTessellationControl)
+	if (get_execution_model() != ExecutionModelGLCompute && !is_tesc_shader())
 		return;
 
-	uint32_t exe_scope = id_exe_scope ? get<SPIRConstant>(id_exe_scope).scalar() : uint32_t(ScopeInvocation);
-	uint32_t mem_scope = id_mem_scope ? get<SPIRConstant>(id_mem_scope).scalar() : uint32_t(ScopeInvocation);
+	uint32_t exe_scope = id_exe_scope ? evaluate_constant_u32(id_exe_scope) : uint32_t(ScopeInvocation);
+	uint32_t mem_scope = id_mem_scope ? evaluate_constant_u32(id_mem_scope) : uint32_t(ScopeInvocation);
 	// Use the wider of the two scopes (smaller value)
 	exe_scope = min(exe_scope, mem_scope);
 
+	if (msl_options.emulate_subgroups && exe_scope >= ScopeSubgroup && !id_mem_sem)
+		// In this case, we assume a "subgroup" size of 1. The barrier, then, is a noop.
+		return;
+
 	string bar_stmt;
 	if ((msl_options.is_ios() && msl_options.supports_msl_version(1, 2)) || msl_options.supports_msl_version(2))
 		bar_stmt = exe_scope < ScopeSubgroup ? "threadgroup_barrier" : "simdgroup_barrier";
@@ -4420,7 +9383,7 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
 		bar_stmt = "threadgroup_barrier";
 	bar_stmt += "(";
 
-	uint32_t mem_sem = id_mem_sem ? get<SPIRConstant>(id_mem_sem).scalar() : uint32_t(MemorySemanticsMaskNone);
+	uint32_t mem_sem = id_mem_sem ? evaluate_constant_u32(id_mem_sem) : uint32_t(MemorySemanticsMaskNone);
 
 	// Use the | operator to combine flags if we can.
 	if (msl_options.supports_msl_version(1, 2))
@@ -4428,11 +9391,12 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
 		string mem_flags = "";
 		// For tesc shaders, this also affects objects in the Output storage class.
 		// Since in Metal, these are placed in a device buffer, we have to sync device memory here.
-		if (get_execution_model() == ExecutionModelTessellationControl ||
+		if (is_tesc_shader() ||
 		    (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)))
 			mem_flags += "mem_flags::mem_device";
-		if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
-		               MemorySemanticsAtomicCounterMemoryMask))
+
+		// Fix tessellation patch function processing
+		if (is_tesc_shader() || (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask)))
 		{
 			if (!mem_flags.empty())
 				mem_flags += " | ";
@@ -4453,13 +9417,11 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
 	else
 	{
 		if ((mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)) &&
-		    (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
-		                MemorySemanticsAtomicCounterMemoryMask)))
+		    (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask)))
 			bar_stmt += "mem_flags::mem_device_and_threadgroup";
 		else if (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask))
 			bar_stmt += "mem_flags::mem_device";
-		else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask |
-		                    MemorySemanticsAtomicCounterMemoryMask))
+		else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask))
 			bar_stmt += "mem_flags::mem_threadgroup";
 		else if (mem_sem & MemorySemanticsImageMemoryMask)
 			bar_stmt += "mem_flags::mem_texture";
@@ -4467,29 +9429,6 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
 			bar_stmt += "mem_flags::mem_none";
 	}
 
-	if (msl_options.is_ios() && (msl_options.supports_msl_version(2) && !msl_options.supports_msl_version(2, 1)))
-	{
-		bar_stmt += ", ";
-
-		switch (mem_scope)
-		{
-		case ScopeCrossDevice:
-		case ScopeDevice:
-			bar_stmt += "memory_scope_device";
-			break;
-
-		case ScopeSubgroup:
-		case ScopeInvocation:
-			bar_stmt += "memory_scope_simdgroup";
-			break;
-
-		case ScopeWorkgroup:
-		default:
-			bar_stmt += "memory_scope_threadgroup";
-			break;
-		}
-	}
-
 	bar_stmt += ");";
 
 	statement(bar_stmt);
@@ -4499,50 +9438,146 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
 	flush_all_active_variables();
 }
 
-void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id)
+static bool storage_class_array_is_thread(StorageClass storage)
 {
-	// Assignment from an array initializer is fine.
-	auto &type = expression_type(rhs_id);
-	auto *var = maybe_get_backing_variable(rhs_id);
-
-	// Unfortunately, we cannot template on address space in MSL,
-	// so explicit address space redirection it is ...
-	bool is_constant = false;
-	if (ir.ids[rhs_id].get_type() == TypeConstant)
+	switch (storage)
 	{
-		is_constant = true;
+	case StorageClassInput:
+	case StorageClassOutput:
+	case StorageClassGeneric:
+	case StorageClassFunction:
+	case StorageClassPrivate:
+		return true;
+
+	default:
+		return false;
 	}
-	else if (var && var->remapped_variable && var->statically_assigned &&
-	         ir.ids[var->static_expression].get_type() == TypeConstant)
+}
+
+void CompilerMSL::emit_array_copy(const string &lhs, uint32_t lhs_id, uint32_t rhs_id,
+								  StorageClass lhs_storage, StorageClass rhs_storage)
+{
+	// Allow Metal to use the array<T> template to make arrays a value type.
+	// This, however, cannot be used for threadgroup address specifiers, so consider the custom array copy as fallback.
+	bool lhs_is_thread_storage = storage_class_array_is_thread(lhs_storage);
+	bool rhs_is_thread_storage = storage_class_array_is_thread(rhs_storage);
+
+	bool lhs_is_array_template = lhs_is_thread_storage;
+	bool rhs_is_array_template = rhs_is_thread_storage;
+
+	// Special considerations for stage IO variables.
+	// If the variable is actually backed by non-user visible device storage, we use array templates for those.
+	//
+	// Another special consideration is given to thread local variables which happen to have Offset decorations
+	// applied to them. Block-like types do not use array templates, so we need to force POD path if we detect
+	// these scenarios. This check isn't perfect since it would be technically possible to mix and match these things,
+	// and for a fully correct solution we might have to track array template state through access chains as well,
+	// but for all reasonable use cases, this should suffice.
+	// This special case should also only apply to Function/Private storage classes.
+	// We should not check backing variable for temporaries.
+	auto *lhs_var = maybe_get_backing_variable(lhs_id);
+	if (lhs_var && lhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(lhs_var->storage))
+		lhs_is_array_template = true;
+	else if (lhs_var && (lhs_storage == StorageClassFunction || lhs_storage == StorageClassPrivate) &&
+	         type_is_block_like(get<SPIRType>(lhs_var->basetype)))
+		lhs_is_array_template = false;
+
+	auto *rhs_var = maybe_get_backing_variable(rhs_id);
+	if (rhs_var && rhs_storage == StorageClassStorageBuffer && storage_class_array_is_thread(rhs_var->storage))
+		rhs_is_array_template = true;
+	else if (rhs_var && (rhs_storage == StorageClassFunction || rhs_storage == StorageClassPrivate) &&
+	         type_is_block_like(get<SPIRType>(rhs_var->basetype)))
+		rhs_is_array_template = false;
+
+	// If threadgroup storage qualifiers are *not* used:
+	// Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier.
+	if (lhs_is_array_template && rhs_is_array_template && !using_builtin_array())
 	{
-		is_constant = true;
+		statement(lhs, " = ", to_expression(rhs_id), ";");
 	}
-
-	// For the case where we have OpLoad triggering an array copy,
-	// we cannot easily detect this case ahead of time since it's
-	// context dependent. We might have to force a recompile here
-	// if this is the only use of array copies in our shader.
-	if (type.array.size() > 1)
+	else
 	{
-		if (type.array.size() > SPVFuncImplArrayCopyMultidimMax)
-			SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays.");
-		auto func = static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type.array.size());
-		if (spv_function_implementations.count(func) == 0)
+		// Assignment from an array initializer is fine.
+		auto &type = expression_type(rhs_id);
+		auto *var = maybe_get_backing_variable(rhs_id);
+
+		// Unfortunately, we cannot template on address space in MSL,
+		// so explicit address space redirection it is ...
+		bool is_constant = false;
+		if (ir.ids[rhs_id].get_type() == TypeConstant)
+		{
+			is_constant = true;
+		}
+		else if (var && var->remapped_variable && var->statically_assigned &&
+		         ir.ids[var->static_expression].get_type() == TypeConstant)
+		{
+			is_constant = true;
+		}
+		else if (rhs_storage == StorageClassUniform || rhs_storage == StorageClassUniformConstant)
+		{
+			is_constant = true;
+		}
+
+		// For the case where we have OpLoad triggering an array copy,
+		// we cannot easily detect this case ahead of time since it's
+		// context dependent. We might have to force a recompile here
+		// if this is the only use of array copies in our shader.
+		if (type.array.size() > 1)
 		{
-			spv_function_implementations.insert(func);
-			suppress_missing_prototypes = true;
-			force_recompile();
+			if (type.array.size() > kArrayCopyMultidimMax)
+				SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays.");
+			auto func = static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type.array.size());
+			add_spv_func_and_recompile(func);
 		}
+		else
+			add_spv_func_and_recompile(SPVFuncImplArrayCopy);
+
+		const char *tag = nullptr;
+		if (lhs_is_thread_storage && is_constant)
+			tag = "FromConstantToStack";
+		else if (lhs_storage == StorageClassWorkgroup && is_constant)
+			tag = "FromConstantToThreadGroup";
+		else if (lhs_is_thread_storage && rhs_is_thread_storage)
+			tag = "FromStackToStack";
+		else if (lhs_storage == StorageClassWorkgroup && rhs_is_thread_storage)
+			tag = "FromStackToThreadGroup";
+		else if (lhs_is_thread_storage && rhs_storage == StorageClassWorkgroup)
+			tag = "FromThreadGroupToStack";
+		else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup)
+			tag = "FromThreadGroupToThreadGroup";
+		else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassStorageBuffer)
+			tag = "FromDeviceToDevice";
+		else if (lhs_storage == StorageClassStorageBuffer && is_constant)
+			tag = "FromConstantToDevice";
+		else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassWorkgroup)
+			tag = "FromThreadGroupToDevice";
+		else if (lhs_storage == StorageClassStorageBuffer && rhs_is_thread_storage)
+			tag = "FromStackToDevice";
+		else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassStorageBuffer)
+			tag = "FromDeviceToThreadGroup";
+		else if (lhs_is_thread_storage && rhs_storage == StorageClassStorageBuffer)
+			tag = "FromDeviceToStack";
+		else
+			SPIRV_CROSS_THROW("Unknown storage class used for copying arrays.");
+
+		// Pass internal array of spvUnsafeArray<> into wrapper functions
+		if (lhs_is_array_template && rhs_is_array_template && !msl_options.force_native_arrays)
+			statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ".elements);");
+		if (lhs_is_array_template && !msl_options.force_native_arrays)
+			statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ");");
+		else if (rhs_is_array_template && !msl_options.force_native_arrays)
+			statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ".elements);");
+		else
+			statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");");
 	}
-	else if (spv_function_implementations.count(SPVFuncImplArrayCopy) == 0)
-	{
-		spv_function_implementations.insert(SPVFuncImplArrayCopy);
-		suppress_missing_prototypes = true;
-		force_recompile();
-	}
+}
 
-	const char *tag = is_constant ? "FromConstant" : "FromStack";
-	statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");");
+uint32_t CompilerMSL::get_physical_tess_level_array_size(spv::BuiltIn builtin) const
+{
+	if (is_tessellating_triangles())
+		return builtin == BuiltInTessLevelInner ? 1 : 3;
+	else
+		return builtin == BuiltInTessLevelInner ? 2 : 4;
 }
 
 // Since MSL does not allow arrays to be copied via simple variable assignment,
@@ -4573,41 +9608,102 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
 		return true;
 	}
 
+	if (is_tesc_shader() && has_decoration(id_lhs, DecorationBuiltIn))
+	{
+		auto builtin = BuiltIn(get_decoration(id_lhs, DecorationBuiltIn));
+		// Need to manually unroll the array store.
+		if (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter)
+		{
+			uint32_t array_size = get_physical_tess_level_array_size(builtin);
+			if (array_size == 1)
+				statement(to_expression(id_lhs), " = half(", to_expression(id_rhs), "[0]);");
+			else
+			{
+				for (uint32_t i = 0; i < array_size; i++)
+					statement(to_expression(id_lhs), "[", i, "] = half(", to_expression(id_rhs), "[", i, "]);");
+			}
+			return true;
+		}
+	}
+
 	// Ensure the LHS variable has been declared
 	auto *p_v_lhs = maybe_get_backing_variable(id_lhs);
 	if (p_v_lhs)
 		flush_variable_declaration(p_v_lhs->self);
 
-	emit_array_copy(to_expression(id_lhs), id_rhs);
+	auto lhs_storage = get_expression_effective_storage_class(id_lhs);
+	auto rhs_storage = get_expression_effective_storage_class(id_rhs);
+	emit_array_copy(to_expression(id_lhs), id_lhs, id_rhs, lhs_storage, rhs_storage);
 	register_write(id_lhs);
 
 	return true;
 }
 
 // Emits one of the atomic functions. In MSL, the atomic functions operate on pointers
-void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1,
-                                      uint32_t mem_order_2, bool has_mem_order_2, uint32_t obj, uint32_t op1,
+void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, Op opcode,
+                                      uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t obj, uint32_t op1,
                                       bool op1_is_pointer, bool op1_is_literal, uint32_t op2)
 {
-	forced_temporaries.insert(result_id);
-
-	string exp = string(op) + "(";
+	string exp;
 
 	auto &type = get_pointee_type(expression_type(obj));
-	exp += "(volatile ";
+	auto expected_type = type.basetype;
+	if (opcode == OpAtomicUMax || opcode == OpAtomicUMin)
+		expected_type = to_unsigned_basetype(type.width);
+	else if (opcode == OpAtomicSMax || opcode == OpAtomicSMin)
+		expected_type = to_signed_basetype(type.width);
+
+	if (type.width == 64)
+		SPIRV_CROSS_THROW("MSL currently does not support 64-bit atomics.");
+
+	auto remapped_type = type;
+	remapped_type.basetype = expected_type;
+
 	auto *var = maybe_get_backing_variable(obj);
 	if (!var)
 		SPIRV_CROSS_THROW("No backing variable for atomic operation.");
-	exp += get_argument_address_space(*var);
+	const auto &res_type = get<SPIRType>(var->basetype);
+
+	bool is_atomic_compare_exchange_strong = op1_is_pointer && op1;
+
+	bool check_discard = opcode != OpAtomicLoad && needs_frag_discard_checks() &&
+	                     ((res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image) ||
+	                      var->storage == StorageClassStorageBuffer || var->storage == StorageClassUniform);
+
+	if (check_discard)
+	{
+		if (is_atomic_compare_exchange_strong)
+		{
+			// We're already emitting a CAS loop here; a conditional won't hurt.
+			emit_uninitialized_temporary_expression(result_type, result_id);
+			statement("if (!", builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), ")");
+			begin_scope();
+		}
+		else
+			exp = join("(!", builtin_to_glsl(BuiltInHelperInvocation, StorageClassInput), " ? ");
+	}
+
+	exp += string(op) + "(";
+	exp += "(";
+	// Emulate texture2D atomic operations
+	if (res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image)
+	{
+		exp += "device";
+	}
+	else
+	{
+		exp += get_argument_address_space(*var);
+	}
+
 	exp += " atomic_";
-	exp += type_to_glsl(type);
+	// For signed and unsigned min/max, we can signal this through the pointer type.
+	// There is no other way, since C++ does not have explicit signage for atomics.
+	exp += type_to_glsl(remapped_type);
 	exp += "*)";
 
 	exp += "&";
 	exp += to_enclosed_expression(obj);
 
-	bool is_atomic_compare_exchange_strong = op1_is_pointer && op1;
-
 	if (is_atomic_compare_exchange_strong)
 	{
 		assert(strcmp(op, "atomic_compare_exchange_weak_explicit") == 0);
@@ -4629,12 +9725,42 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
 		// the CAS loop, otherwise it will loop infinitely, with the comparison test always failing.
 		// The function updates the comparitor value from the memory value, so the additional
 		// comparison test evaluates the memory value against the expected value.
-		statement(variable_decl(type, to_name(result_id)), ";");
+		if (!check_discard)
+			emit_uninitialized_temporary_expression(result_type, result_id);
 		statement("do");
 		begin_scope();
 		statement(to_name(result_id), " = ", to_expression(op1), ";");
 		end_scope_decl(join("while (!", exp, " && ", to_name(result_id), " == ", to_enclosed_expression(op1), ")"));
-		set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
+		if (check_discard)
+		{
+			end_scope();
+			statement("else");
+			begin_scope();
+			exp = "atomic_load_explicit(";
+			exp += "(";
+			// Emulate texture2D atomic operations
+			if (res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image)
+				exp += "device";
+			else
+				exp += get_argument_address_space(*var);
+
+			exp += " atomic_";
+			exp += type_to_glsl(remapped_type);
+			exp += "*)";
+
+			exp += "&";
+			exp += to_enclosed_expression(obj);
+
+			if (has_mem_order_2)
+				exp += string(", ") + get_memory_order(mem_order_2);
+			else
+				exp += string(", ") + get_memory_order(mem_order_1);
+
+			exp += ")";
+
+			statement(to_name(result_id), " = ", exp, ";");
+			end_scope();
+		}
 	}
 	else
 	{
@@ -4644,7 +9770,7 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
 			if (op1_is_literal)
 				exp += join(", ", op1);
 			else
-				exp += ", " + to_expression(op1);
+				exp += ", " + bitcast_expression(expected_type, op1);
 		}
 		if (op2)
 			exp += ", " + to_expression(op2);
@@ -4654,7 +9780,46 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
 			exp += string(", ") + get_memory_order(mem_order_2);
 
 		exp += ")";
-		emit_op(result_type, result_id, exp, false);
+
+		if (check_discard)
+		{
+			exp += " : ";
+			if (strcmp(op, "atomic_store_explicit") != 0)
+			{
+				exp += "atomic_load_explicit(";
+				exp += "(";
+				// Emulate texture2D atomic operations
+				if (res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image)
+					exp += "device";
+				else
+					exp += get_argument_address_space(*var);
+
+				exp += " atomic_";
+				exp += type_to_glsl(remapped_type);
+				exp += "*)";
+
+				exp += "&";
+				exp += to_enclosed_expression(obj);
+
+				if (has_mem_order_2)
+					exp += string(", ") + get_memory_order(mem_order_2);
+				else
+					exp += string(", ") + get_memory_order(mem_order_1);
+
+				exp += ")";
+			}
+			else
+				exp += "((void)0)";
+			exp += ")";
+		}
+
+		if (expected_type != type.basetype)
+			exp = bitcast_expression(type, expected_type, exp);
+
+		if (strcmp(op, "atomic_store_explicit") != 0)
+			emit_op(result_type, result_id, exp, false);
+		else
+			statement(exp, ";");
 	}
 
 	flush_all_atomic_capable_variables();
@@ -4666,7 +9831,8 @@ const char *CompilerMSL::get_memory_order(uint32_t)
 	return "memory_order_relaxed";
 }
 
-// Override for MSL-specific extension syntax instructions
+// Override for MSL-specific extension syntax instructions.
+// In some cases, deliberately select either the fast or precise versions of the MSL functions to match Vulkan math precision results.
 void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count)
 {
 	auto op = static_cast<GLSLstd450>(eop);
@@ -4676,10 +9842,21 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	auto int_type = to_signed_basetype(integer_width);
 	auto uint_type = to_unsigned_basetype(integer_width);
 
+	op = get_remapped_glsl_op(op);
+
 	switch (op)
 	{
+	case GLSLstd450Sinh:
+		emit_unary_func_op(result_type, id, args[0], "fast::sinh");
+		break;
+	case GLSLstd450Cosh:
+		emit_unary_func_op(result_type, id, args[0], "fast::cosh");
+		break;
+	case GLSLstd450Tanh:
+		emit_unary_func_op(result_type, id, args[0], "precise::tanh");
+		break;
 	case GLSLstd450Atan2:
-		emit_binary_func_op(result_type, id, args[0], args[1], "atan2");
+		emit_binary_func_op(result_type, id, args[0], args[1], "precise::atan2");
 		break;
 	case GLSLstd450InverseSqrt:
 		emit_unary_func_op(result_type, id, args[0], "rsqrt");
@@ -4688,12 +9865,20 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		emit_unary_func_op(result_type, id, args[0], "rint");
 		break;
 
+	case GLSLstd450FindILsb:
+	{
+		// In this template version of findLSB, we return T.
+		auto basetype = expression_type(args[0]).basetype;
+		emit_unary_func_op_cast(result_type, id, args[0], "spvFindLSB", basetype, basetype);
+		break;
+	}
+
 	case GLSLstd450FindSMsb:
-		emit_unary_func_op_cast(result_type, id, args[0], "findSMSB", int_type, int_type);
+		emit_unary_func_op_cast(result_type, id, args[0], "spvFindSMSB", int_type, int_type);
 		break;
 
 	case GLSLstd450FindUMsb:
-		emit_unary_func_op_cast(result_type, id, args[0], "findUMSB", uint_type, uint_type);
+		emit_unary_func_op_cast(result_type, id, args[0], "spvFindUMSB", uint_type, uint_type);
 		break;
 
 	case GLSLstd450PackSnorm4x8:
@@ -4812,10 +9997,71 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "precise::clamp");
 		break;
 
-		// TODO:
-		//        GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier)
-		//        GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)
-		//        GLSLstd450InterpolateAtOffset
+	case GLSLstd450InterpolateAtCentroid:
+	{
+		// We can't just emit the expression normally, because the qualified name contains a call to the default
+		// interpolate method, or refers to a local variable. We saved the interface index we need; use it to construct
+		// the base for the method call.
+		uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex);
+		string component;
+		if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr))
+		{
+			uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr);
+			auto *c = maybe_get<SPIRConstant>(index_expr);
+			if (!c || c->specialization)
+				component = join("[", to_expression(index_expr), "]");
+			else
+				component = join(".", index_to_swizzle(c->scalar()));
+		}
+		emit_op(result_type, id,
+		        join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index),
+		             ".interpolate_at_centroid()", component),
+		        should_forward(args[0]));
+		break;
+	}
+
+	case GLSLstd450InterpolateAtSample:
+	{
+		uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex);
+		string component;
+		if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr))
+		{
+			uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr);
+			auto *c = maybe_get<SPIRConstant>(index_expr);
+			if (!c || c->specialization)
+				component = join("[", to_expression(index_expr), "]");
+			else
+				component = join(".", index_to_swizzle(c->scalar()));
+		}
+		emit_op(result_type, id,
+		        join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index),
+		             ".interpolate_at_sample(", to_expression(args[1]), ")", component),
+		        should_forward(args[0]) && should_forward(args[1]));
+		break;
+	}
+
+	case GLSLstd450InterpolateAtOffset:
+	{
+		uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex);
+		string component;
+		if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr))
+		{
+			uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr);
+			auto *c = maybe_get<SPIRConstant>(index_expr);
+			if (!c || c->specialization)
+				component = join("[", to_expression(index_expr), "]");
+			else
+				component = join(".", index_to_swizzle(c->scalar()));
+		}
+		// Like Direct3D, Metal puts the (0, 0) at the upper-left corner, not the center as SPIR-V and GLSL do.
+		// Offset the offset by (1/2 - 1/16), or 0.4375, to compensate for this.
+		// It has to be (1/2 - 1/16) and not 1/2, or several CTS tests subtly break on Intel.
+		emit_op(result_type, id,
+		        join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index),
+		             ".interpolate_at_offset(", to_expression(args[1]), " + 0.4375)", component),
+		        should_forward(args[0]) && should_forward(args[1]));
+		break;
+	}
 
 	case GLSLstd450Distance:
 		// MSL does not support scalar versions here.
@@ -4823,7 +10069,8 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		{
 			// Equivalent to length(a - b) -> abs(a - b).
 			emit_op(result_type, id,
-			        join("abs(", to_unpacked_expression(args[0]), " - ", to_unpacked_expression(args[1]), ")"),
+			        join("abs(", to_enclosed_unpacked_expression(args[0]), " - ",
+			             to_enclosed_unpacked_expression(args[1]), ")"),
 			        should_forward(args[0]) && should_forward(args[1]));
 			inherit_expression_dependencies(id, args[0]);
 			inherit_expression_dependencies(id, args[1]);
@@ -4833,27 +10080,27 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		break;
 
 	case GLSLstd450Length:
-		// MSL does not support scalar versions here.
+		// MSL does not support scalar versions, so use abs().
 		if (expression_type(args[0]).vecsize == 1)
-		{
-			// Equivalent to abs().
 			emit_unary_func_op(result_type, id, args[0], "abs");
-		}
 		else
 			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
 		break;
 
 	case GLSLstd450Normalize:
+	{
+		auto &exp_type = expression_type(args[0]);
 		// MSL does not support scalar versions here.
-		if (expression_type(args[0]).vecsize == 1)
-		{
-			// Returns -1 or 1 for valid input, sign() does the job.
+		// MSL has no implementation for normalize in the fast:: namespace for half2 and half3
+		// Returns -1 or 1 for valid input, sign() does the job.
+		if (exp_type.vecsize == 1)
 			emit_unary_func_op(result_type, id, args[0], "sign");
-		}
+		else if (exp_type.vecsize <= 3 && exp_type.basetype == SPIRType::Half)
+			emit_unary_func_op(result_type, id, args[0], "normalize");
 		else
-			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+			emit_unary_func_op(result_type, id, args[0], "fast::normalize");
 		break;
-
+	}
 	case GLSLstd450Reflect:
 		if (get<SPIRType>(result_type).vecsize == 1)
 			emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect");
@@ -4868,12 +10115,87 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
 		break;
 
+	case GLSLstd450FaceForward:
+		if (get<SPIRType>(result_type).vecsize == 1)
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward");
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Modf:
+	case GLSLstd450Frexp:
+	{
+		// Special case. If the variable is a scalar access chain, we cannot use it directly. We have to emit a temporary.
+		// Another special case is if the variable is in a storage class which is not thread.
+		auto *ptr = maybe_get<SPIRExpression>(args[1]);
+		auto &type = expression_type(args[1]);
+
+		bool is_thread_storage = storage_class_array_is_thread(type.storage);
+		if (type.storage == StorageClassOutput && capture_output_to_buffer)
+			is_thread_storage = false;
+
+		if (!is_thread_storage ||
+		    (ptr && ptr->access_chain && is_scalar(expression_type(args[1]))))
+		{
+			register_call_out_argument(args[1]);
+			forced_temporaries.insert(id);
+
+			// Need to create temporaries and copy over to access chain after.
+			// We cannot directly take the reference of a vector swizzle in MSL, even if it's scalar ...
+			uint32_t &tmp_id = extra_sub_expressions[id];
+			if (!tmp_id)
+				tmp_id = ir.increase_bound_by(1);
+
+			uint32_t tmp_type_id = get_pointee_type_id(expression_type_id(args[1]));
+			emit_uninitialized_temporary_expression(tmp_type_id, tmp_id);
+			emit_binary_func_op(result_type, id, args[0], tmp_id, eop == GLSLstd450Modf ? "modf" : "frexp");
+			statement(to_expression(args[1]), " = ", to_expression(tmp_id), ";");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+	}
+
 	default:
 		CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
 		break;
 	}
 }
 
+void CompilerMSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
+                                                        const uint32_t *args, uint32_t count)
+{
+	enum AMDShaderTrinaryMinMax
+	{
+		FMin3AMD = 1,
+		UMin3AMD = 2,
+		SMin3AMD = 3,
+		FMax3AMD = 4,
+		UMax3AMD = 5,
+		SMax3AMD = 6,
+		FMid3AMD = 7,
+		UMid3AMD = 8,
+		SMid3AMD = 9
+	};
+
+	if (!msl_options.supports_msl_version(2, 1))
+		SPIRV_CROSS_THROW("Trinary min/max functions require MSL 2.1.");
+
+	auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
+
+	switch (op)
+	{
+	case FMid3AMD:
+	case UMid3AMD:
+	case SMid3AMD:
+		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "median3");
+		break;
+	default:
+		CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(result_type, id, eop, args, count);
+		break;
+	}
+}
+
 // Emit a structure declaration for the specified interface variable.
 void CompilerMSL::emit_interface_block(uint32_t ib_var_id)
 {
@@ -4881,7 +10203,8 @@ void CompilerMSL::emit_interface_block(uint32_t ib_var_id)
 	{
 		auto &ib_var = get<SPIRVariable>(ib_var_id);
 		auto &ib_type = get_variable_data_type(ib_var);
-		assert(ib_type.basetype == SPIRType::Struct && !ib_type.member_types.empty());
+		//assert(ib_type.basetype == SPIRType::Struct && !ib_type.member_types.empty());
+		assert(ib_type.basetype == SPIRType::Struct);
 		emit_struct(ib_type);
 	}
 }
@@ -4896,30 +10219,34 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
 	local_variable_names = resource_names;
 	string decl;
 
-	processing_entry_point = (func.self == ir.default_entry_point);
+	processing_entry_point = func.self == ir.default_entry_point;
+
+	// Metal helper functions must be static force-inline otherwise they will cause problems when linked together in a single Metallib.
+	if (!processing_entry_point)
+		statement(force_inline);
 
 	auto &type = get<SPIRType>(func.return_type);
 
-	if (type.array.empty())
+	if (!type.array.empty() && msl_options.force_native_arrays)
 	{
-		decl += func_type_decl(type);
+		// We cannot return native arrays in MSL, so "return" through an out variable.
+		decl += "void";
 	}
 	else
 	{
-		// We cannot return arrays in MSL, so "return" through an out variable.
-		decl = "void";
+		decl += func_type_decl(type);
 	}
 
 	decl += " ";
 	decl += to_name(func.self);
 	decl += "(";
 
-	if (!type.array.empty())
+	if (!type.array.empty() && msl_options.force_native_arrays)
 	{
 		// Fake arrays returns by writing to an out array instead.
 		decl += "thread ";
 		decl += type_to_glsl(type);
-		decl += " (&SPIRV_Cross_return_value)";
+		decl += " (&spvReturnValue)";
 		decl += type_to_array_glsl(type);
 		if (!func.arguments.empty())
 			decl += ", ";
@@ -4932,6 +10259,9 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
 		else
 			decl += entry_point_args_classic(!func.arguments.empty());
 
+		// append entry point args to avoid conflicts in local variable names.
+		local_variable_names.insert(resource_names.begin(), resource_names.end());
+
 		// If entry point function has variables that require early declaration,
 		// ensure they each have an empty initializer, creating one if needed.
 		// This is done at this late stage because the initialization expression
@@ -4939,7 +10269,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
 		for (auto var_id : vars_needing_early_declaration)
 		{
 			auto &ed_var = get<SPIRVariable>(var_id);
-			uint32_t &initializer = ed_var.initializer;
+			ID &initializer = ed_var.initializer;
 			if (!initializer)
 				initializer = ir.increase_bound_by(1);
 
@@ -4968,19 +10298,46 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
 
 		decl += argument_decl(arg);
 
-		// Manufacture automatic sampler arg for SampledImage texture
+		bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
+
 		auto &arg_type = get<SPIRType>(arg.type);
-		if (arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer)
-			decl += join(", thread const ", sampler_type(arg_type), " ", to_sampler_expression(arg.id));
+		if (arg_type.basetype == SPIRType::SampledImage && !is_dynamic_img_sampler)
+		{
+			// Manufacture automatic plane args for multiplanar texture
+			uint32_t planes = 1;
+			if (auto *constexpr_sampler = find_constexpr_sampler(name_id))
+				if (constexpr_sampler->ycbcr_conversion_enable)
+					planes = constexpr_sampler->planes;
+			for (uint32_t i = 1; i < planes; i++)
+				decl += join(", ", argument_decl(arg), plane_name_suffix, i);
+
+			// Manufacture automatic sampler arg for SampledImage texture
+			if (arg_type.image.dim != DimBuffer)
+			{
+				if (arg_type.array.empty())
+				{
+					decl += join(", ", sampler_type(arg_type, arg.id), " ", to_sampler_expression(arg.id));
+				}
+				else
+				{
+					const char *sampler_address_space =
+							descriptor_address_space(name_id,
+							                         StorageClassUniformConstant,
+							                         "thread const");
+					decl += join(", ", sampler_address_space, " ", sampler_type(arg_type, arg.id), "& ", to_sampler_expression(arg.id));
+				}
+			}
+		}
 
 		// Manufacture automatic swizzle arg.
-		if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type))
+		if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type) &&
+		    !is_dynamic_img_sampler)
 		{
 			bool arg_is_array = !arg_type.array.empty();
 			decl += join(", constant uint", arg_is_array ? "* " : "& ", to_swizzle_expression(arg.id));
 		}
 
-		if (buffers_requiring_array_length.count(name_id))
+		if (buffer_requires_array_length(name_id))
 		{
 			bool arg_is_array = !arg_type.array.empty();
 			decl += join(", constant uint", arg_is_array ? "* " : "& ", to_buffer_size_expression(name_id));
@@ -4994,60 +10351,158 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &)
 	statement(decl);
 }
 
+static bool needs_chroma_reconstruction(const MSLConstexprSampler *constexpr_sampler)
+{
+	// For now, only multiplanar images need explicit reconstruction. GBGR and BGRG images
+	// use implicit reconstruction.
+	return constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && constexpr_sampler->planes > 1;
+}
+
 // Returns the texture sampling function string for the specified image and sampling characteristics.
-string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool, bool,
-                                     bool has_offset, bool, bool has_dref, uint32_t, uint32_t)
+string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args)
 {
+	VariableID img = args.base.img;
+	const MSLConstexprSampler *constexpr_sampler = nullptr;
+	bool is_dynamic_img_sampler = false;
+	if (auto *var = maybe_get_backing_variable(img))
+	{
+		constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
+		is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
+	}
+
 	// Special-case gather. We have to alter the component being looked up
 	// in the swizzle case.
-	if (msl_options.swizzle_texture_samples && is_gather)
+	if (msl_options.swizzle_texture_samples && args.base.is_gather && !is_dynamic_img_sampler &&
+	    (!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable))
 	{
-		string fname = imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle";
-		fname += "<" + type_to_glsl(get<SPIRType>(imgtype.image.type)) + ", metal::" + type_to_glsl(imgtype);
-		// Add the arg types ourselves. Yes, this sucks, but Clang can't
-		// deduce template pack parameters in the middle of an argument list.
-		switch (imgtype.image.dim)
-		{
-		case Dim2D:
-			fname += ", float2";
-			if (imgtype.image.arrayed)
-				fname += ", uint";
-			if (imgtype.image.depth)
-				fname += ", float";
-			if (!imgtype.image.depth || has_offset)
-				fname += ", int2";
-			break;
-		case DimCube:
-			fname += ", float3";
-			if (imgtype.image.arrayed)
-				fname += ", uint";
-			if (imgtype.image.depth)
-				fname += ", float";
-			break;
-		default:
-			SPIRV_CROSS_THROW("Invalid texture dimension for gather op.");
-		}
-		fname += ">";
-		return fname;
+		bool is_compare = comparison_ids.count(img);
+		add_spv_func_and_recompile(is_compare ? SPVFuncImplGatherCompareSwizzle : SPVFuncImplGatherSwizzle);
+		return is_compare ? "spvGatherCompareSwizzle" : "spvGatherSwizzle";
 	}
 
 	auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
 
 	// Texture reference
-	string fname = to_expression(combined ? combined->image : img) + ".";
-	if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype))
-		fname = "spvTextureSwizzle(" + fname;
-
-	// Texture function and sampler
-	if (is_fetch)
-		fname += "read";
-	else if (is_gather)
-		fname += "gather";
+	string fname;
+	if (needs_chroma_reconstruction(constexpr_sampler) && !is_dynamic_img_sampler)
+	{
+		if (constexpr_sampler->planes != 2 && constexpr_sampler->planes != 3)
+			SPIRV_CROSS_THROW("Unhandled number of color image planes!");
+		// 444 images aren't downsampled, so we don't need to do linear filtering.
+		if (constexpr_sampler->resolution == MSL_FORMAT_RESOLUTION_444 ||
+		    constexpr_sampler->chroma_filter == MSL_SAMPLER_FILTER_NEAREST)
+		{
+			if (constexpr_sampler->planes == 2)
+				add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest2Plane);
+			else
+				add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest3Plane);
+			fname = "spvChromaReconstructNearest";
+		}
+		else // Linear with a downsampled format
+		{
+			fname = "spvChromaReconstructLinear";
+			switch (constexpr_sampler->resolution)
+			{
+			case MSL_FORMAT_RESOLUTION_444:
+				assert(false);
+				break; // not reached
+			case MSL_FORMAT_RESOLUTION_422:
+				switch (constexpr_sampler->x_chroma_offset)
+				{
+				case MSL_CHROMA_LOCATION_COSITED_EVEN:
+					if (constexpr_sampler->planes == 2)
+						add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven2Plane);
+					else
+						add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven3Plane);
+					fname += "422CositedEven";
+					break;
+				case MSL_CHROMA_LOCATION_MIDPOINT:
+					if (constexpr_sampler->planes == 2)
+						add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint2Plane);
+					else
+						add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint3Plane);
+					fname += "422Midpoint";
+					break;
+				default:
+					SPIRV_CROSS_THROW("Invalid chroma location.");
+				}
+				break;
+			case MSL_FORMAT_RESOLUTION_420:
+				fname += "420";
+				switch (constexpr_sampler->x_chroma_offset)
+				{
+				case MSL_CHROMA_LOCATION_COSITED_EVEN:
+					switch (constexpr_sampler->y_chroma_offset)
+					{
+					case MSL_CHROMA_LOCATION_COSITED_EVEN:
+						if (constexpr_sampler->planes == 2)
+							add_spv_func_and_recompile(
+							    SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane);
+						else
+							add_spv_func_and_recompile(
+							    SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane);
+						fname += "XCositedEvenYCositedEven";
+						break;
+					case MSL_CHROMA_LOCATION_MIDPOINT:
+						if (constexpr_sampler->planes == 2)
+							add_spv_func_and_recompile(
+							    SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane);
+						else
+							add_spv_func_and_recompile(
+							    SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane);
+						fname += "XCositedEvenYMidpoint";
+						break;
+					default:
+						SPIRV_CROSS_THROW("Invalid Y chroma location.");
+					}
+					break;
+				case MSL_CHROMA_LOCATION_MIDPOINT:
+					switch (constexpr_sampler->y_chroma_offset)
+					{
+					case MSL_CHROMA_LOCATION_COSITED_EVEN:
+						if (constexpr_sampler->planes == 2)
+							add_spv_func_and_recompile(
+							    SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane);
+						else
+							add_spv_func_and_recompile(
+							    SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane);
+						fname += "XMidpointYCositedEven";
+						break;
+					case MSL_CHROMA_LOCATION_MIDPOINT:
+						if (constexpr_sampler->planes == 2)
+							add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane);
+						else
+							add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane);
+						fname += "XMidpointYMidpoint";
+						break;
+					default:
+						SPIRV_CROSS_THROW("Invalid Y chroma location.");
+					}
+					break;
+				default:
+					SPIRV_CROSS_THROW("Invalid X chroma location.");
+				}
+				break;
+			default:
+				SPIRV_CROSS_THROW("Invalid format resolution.");
+			}
+		}
+	}
 	else
-		fname += "sample";
+	{
+		fname = to_expression(combined ? combined->image : img) + ".";
+
+		// Texture function and sampler
+		if (args.base.is_fetch)
+			fname += "read";
+		else if (args.base.is_gather)
+			fname += "gather";
+		else
+			fname += "sample";
 
-	if (has_dref)
-		fname += "_compare";
+		if (args.has_dref)
+			fname += "_compare";
+	}
 
 	return fname;
 }
@@ -5068,28 +10523,72 @@ static inline bool sampling_type_needs_f32_conversion(const SPIRType &type)
 }
 
 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
-string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
-                                     uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y,
-                                     uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp,
-                                     uint32_t sample, uint32_t minlod, bool *p_forward)
+string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
 {
+	VariableID img = args.base.img;
+	auto &imgtype = *args.base.imgtype;
+	uint32_t lod = args.lod;
+	uint32_t grad_x = args.grad_x;
+	uint32_t grad_y = args.grad_y;
+	uint32_t bias = args.bias;
+
+	const MSLConstexprSampler *constexpr_sampler = nullptr;
+	bool is_dynamic_img_sampler = false;
+	if (auto *var = maybe_get_backing_variable(img))
+	{
+		constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
+		is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
+	}
+
 	string farg_str;
-	if (!is_fetch)
-		farg_str += to_sampler_expression(img);
+	bool forward = true;
 
-	if (msl_options.swizzle_texture_samples && is_gather)
+	if (!is_dynamic_img_sampler)
 	{
-		if (!farg_str.empty())
-			farg_str += ", ";
+		// Texture reference (for some cases)
+		if (needs_chroma_reconstruction(constexpr_sampler))
+		{
+			// Multiplanar images need two or three textures.
+			farg_str += to_expression(img);
+			for (uint32_t i = 1; i < constexpr_sampler->planes; i++)
+				farg_str += join(", ", to_expression(img), plane_name_suffix, i);
+		}
+		else if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) &&
+		         msl_options.swizzle_texture_samples && args.base.is_gather)
+		{
+			auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
+			farg_str += to_expression(combined ? combined->image : img);
+		}
+
+		// Sampler reference
+		if (!args.base.is_fetch)
+		{
+			if (!farg_str.empty())
+				farg_str += ", ";
+			farg_str += to_sampler_expression(img);
+		}
+
+		if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) &&
+		    msl_options.swizzle_texture_samples && args.base.is_gather)
+		{
+			// Add the swizzle constant from the swizzle buffer.
+			farg_str += ", " + to_swizzle_expression(img);
+			used_swizzle_buffer = true;
+		}
 
-		auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
-		farg_str += to_expression(combined ? combined->image : img);
+		// Swizzled gather puts the component before the other args, to allow template
+		// deduction to work.
+		if (args.component && msl_options.swizzle_texture_samples)
+		{
+			forward = should_forward(args.component);
+			farg_str += ", " + to_component_argument(args.component);
+		}
 	}
 
 	// Texture coordinates
-	bool forward = should_forward(coord);
-	auto coord_expr = to_enclosed_expression(coord);
-	auto &coord_type = expression_type(coord);
+	forward = forward && should_forward(args.coord);
+	auto coord_expr = to_enclosed_expression(args.coord);
+	auto &coord_type = expression_type(args.coord);
 	bool coord_is_fp = type_is_floating_point(coord_type);
 	bool is_cube_fetch = false;
 
@@ -5103,11 +10602,19 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		if (coord_type.vecsize > 1)
 			tex_coords = enclose_expression(tex_coords) + ".x";
 
-		if (is_fetch)
+		if (args.base.is_fetch)
 			tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
 		else if (sampling_type_needs_f32_conversion(coord_type))
 			tex_coords = convert_to_f32(tex_coords, 1);
 
+		if (msl_options.texture_1D_as_2D)
+		{
+			if (args.base.is_fetch)
+				tex_coords = "uint2(" + tex_coords + ", 0)";
+			else
+				tex_coords = "float2(" + tex_coords + ", 0.5)";
+		}
+
 		alt_coord_component = 1;
 		break;
 
@@ -5122,25 +10629,36 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		else
 		{
 			// Metal texel buffer textures are 2D, so convert 1D coord to 2D.
-			if (is_fetch)
-				tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+			// Support for Metal 2.1's new texture_buffer type.
+			if (args.base.is_fetch)
+			{
+				if (msl_options.texel_buffer_texture_width > 0)
+				{
+					tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+				}
+				else
+				{
+					tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ", " +
+					             to_expression(img) + ")";
+				}
+			}
 		}
 
 		alt_coord_component = 1;
 		break;
 
 	case DimSubpassData:
-		if (imgtype.image.ms)
-			tex_coords = "uint2(gl_FragCoord.xy)";
-		else
-			tex_coords = join("uint2(gl_FragCoord.xy), 0");
+		// If we're using Metal's native frame-buffer fetch API for subpass inputs,
+		// this path will not be hit.
+		tex_coords = "uint2(gl_FragCoord.xy)";
+		alt_coord_component = 2;
 		break;
 
 	case Dim2D:
 		if (coord_type.vecsize > 2)
 			tex_coords = enclose_expression(tex_coords) + ".xy";
 
-		if (is_fetch)
+		if (args.base.is_fetch)
 			tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
 		else if (sampling_type_needs_f32_conversion(coord_type))
 			tex_coords = convert_to_f32(tex_coords, 2);
@@ -5152,7 +10670,7 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		if (coord_type.vecsize > 3)
 			tex_coords = enclose_expression(tex_coords) + ".xyz";
 
-		if (is_fetch)
+		if (args.base.is_fetch)
 			tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
 		else if (sampling_type_needs_f32_conversion(coord_type))
 			tex_coords = convert_to_f32(tex_coords, 3);
@@ -5161,7 +10679,7 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		break;
 
 	case DimCube:
-		if (is_fetch)
+		if (args.base.is_fetch)
 		{
 			is_cube_fetch = true;
 			tex_coords += ".xy";
@@ -5183,76 +10701,114 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		break;
 	}
 
-	if (is_fetch && offset)
-	{
-		// Fetch offsets must be applied directly to the coordinate.
-		forward = forward && should_forward(offset);
-		auto &type = expression_type(offset);
-		if (type.basetype != SPIRType::UInt)
-			tex_coords += " + " + bitcast_expression(SPIRType::UInt, offset);
-		else
-			tex_coords += " + " + to_enclosed_expression(offset);
-	}
-	else if (is_fetch && coffset)
+	if (args.base.is_fetch && args.offset)
 	{
 		// Fetch offsets must be applied directly to the coordinate.
-		forward = forward && should_forward(coffset);
-		auto &type = expression_type(coffset);
-		if (type.basetype != SPIRType::UInt)
-			tex_coords += " + " + bitcast_expression(SPIRType::UInt, coffset);
+		forward = forward && should_forward(args.offset);
+		auto &type = expression_type(args.offset);
+		if (imgtype.image.dim == Dim1D && msl_options.texture_1D_as_2D)
+		{
+			if (type.basetype != SPIRType::UInt)
+				tex_coords += join(" + uint2(", bitcast_expression(SPIRType::UInt, args.offset), ", 0)");
+			else
+				tex_coords += join(" + uint2(", to_enclosed_expression(args.offset), ", 0)");
+		}
 		else
-			tex_coords += " + " + to_enclosed_expression(coffset);
+		{
+			if (type.basetype != SPIRType::UInt)
+				tex_coords += " + " + bitcast_expression(SPIRType::UInt, args.offset);
+			else
+				tex_coords += " + " + to_enclosed_expression(args.offset);
+		}
 	}
 
 	// If projection, use alt coord as divisor
-	if (is_proj)
+	if (args.base.is_proj)
 	{
 		if (sampling_type_needs_f32_conversion(coord_type))
-			tex_coords += " / " + convert_to_f32(to_extract_component_expression(coord, alt_coord_component), 1);
+			tex_coords += " / " + convert_to_f32(to_extract_component_expression(args.coord, alt_coord_component), 1);
 		else
-			tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
+			tex_coords += " / " + to_extract_component_expression(args.coord, alt_coord_component);
 	}
 
 	if (!farg_str.empty())
 		farg_str += ", ";
-	farg_str += tex_coords;
 
-	// If fetch from cube, add face explicitly
-	if (is_cube_fetch)
+	if (imgtype.image.dim == DimCube && imgtype.image.arrayed && msl_options.emulate_cube_array)
 	{
-		// Special case for cube arrays, face and layer are packed in one dimension.
-		if (imgtype.image.arrayed)
-			farg_str += ", uint(" + to_extract_component_expression(coord, 2) + ") % 6u";
+		farg_str += "spvCubemapTo2DArrayFace(" + tex_coords + ").xy";
+
+		if (is_cube_fetch)
+			farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ")";
 		else
-			farg_str += ", uint(" + round_fp_tex_coords(to_extract_component_expression(coord, 2), coord_is_fp) + ")";
+			farg_str +=
+			    ", uint(spvCubemapTo2DArrayFace(" + tex_coords + ").z) + (uint(" +
+			    round_fp_tex_coords(to_extract_component_expression(args.coord, alt_coord_component), coord_is_fp) +
+			    ") * 6u)";
+
+		add_spv_func_and_recompile(SPVFuncImplCubemapTo2DArrayFace);
 	}
+	else
+	{
+		farg_str += tex_coords;
+
+		// If fetch from cube, add face explicitly
+		if (is_cube_fetch)
+		{
+			// Special case for cube arrays, face and layer are packed in one dimension.
+			if (imgtype.image.arrayed)
+				farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ") % 6u";
+			else
+				farg_str +=
+				    ", uint(" + round_fp_tex_coords(to_extract_component_expression(args.coord, 2), coord_is_fp) + ")";
+		}
 
-	// If array, use alt coord
-	if (imgtype.image.arrayed)
-	{
-		// Special case for cube arrays, face and layer are packed in one dimension.
-		if (imgtype.image.dim == DimCube && is_fetch)
-			farg_str += ", uint(" + to_extract_component_expression(coord, 2) + ") / 6u";
-		else
-			farg_str += ", uint(" +
-			            round_fp_tex_coords(to_extract_component_expression(coord, alt_coord_component), coord_is_fp) +
-			            ")";
+		// If array, use alt coord
+		if (imgtype.image.arrayed)
+		{
+			// Special case for cube arrays, face and layer are packed in one dimension.
+			if (imgtype.image.dim == DimCube && args.base.is_fetch)
+			{
+				farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ") / 6u";
+			}
+			else
+			{
+				farg_str +=
+				    ", uint(" +
+				    round_fp_tex_coords(to_extract_component_expression(args.coord, alt_coord_component), coord_is_fp) +
+				    ")";
+				if (imgtype.image.dim == DimSubpassData)
+				{
+					if (msl_options.multiview)
+						farg_str += " + gl_ViewIndex";
+					else if (msl_options.arrayed_subpass_input)
+						farg_str += " + gl_Layer";
+				}
+			}
+		}
+		else if (imgtype.image.dim == DimSubpassData)
+		{
+			if (msl_options.multiview)
+				farg_str += ", gl_ViewIndex";
+			else if (msl_options.arrayed_subpass_input)
+				farg_str += ", gl_Layer";
+		}
 	}
 
 	// Depth compare reference value
-	if (dref)
+	if (args.dref)
 	{
-		forward = forward && should_forward(dref);
+		forward = forward && should_forward(args.dref);
 		farg_str += ", ";
 
-		auto &dref_type = expression_type(dref);
+		auto &dref_type = expression_type(args.dref);
 
 		string dref_expr;
-		if (is_proj)
-			dref_expr =
-			    join(to_enclosed_expression(dref), " / ", to_extract_component_expression(coord, alt_coord_component));
+		if (args.base.is_proj)
+			dref_expr = join(to_enclosed_expression(args.dref), " / ",
+			                 to_extract_component_expression(args.coord, alt_coord_component));
 		else
-			dref_expr = to_expression(dref);
+			dref_expr = to_expression(args.dref);
 
 		if (sampling_type_needs_f32_conversion(dref_type))
 			dref_expr = convert_to_f32(dref_expr, 1);
@@ -5274,10 +10830,10 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 				grad_y = 0;
 				farg_str += ", level(0)";
 			}
-			else
+			else if (!msl_options.supports_msl_version(2, 3))
 			{
 				SPIRV_CROSS_THROW("Using non-constant 0.0 gradient() qualifier for sample_compare. This is not "
-				                  "supported in MSL macOS.");
+				                  "supported on macOS prior to MSL 2.3.");
 			}
 		}
 
@@ -5289,27 +10845,27 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 			{
 				bias = 0;
 			}
-			else
+			else if (!msl_options.supports_msl_version(2, 3))
 			{
-				SPIRV_CROSS_THROW(
-				    "Using non-constant 0.0 bias() qualifier for sample_compare. This is not supported in MSL macOS.");
+				SPIRV_CROSS_THROW("Using non-constant 0.0 bias() qualifier for sample_compare. This is not supported "
+				                  "on macOS prior to MSL 2.3.");
 			}
 		}
 	}
 
 	// LOD Options
 	// Metal does not support LOD for 1D textures.
-	if (bias && imgtype.image.dim != Dim1D)
+	if (bias && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D))
 	{
 		forward = forward && should_forward(bias);
 		farg_str += ", bias(" + to_expression(bias) + ")";
 	}
 
 	// Metal does not support LOD for 1D textures.
-	if (lod && imgtype.image.dim != Dim1D)
+	if (lod && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D))
 	{
 		forward = forward && should_forward(lod);
-		if (is_fetch)
+		if (args.base.is_fetch)
 		{
 			farg_str += ", " + to_expression(lod);
 		}
@@ -5318,8 +10874,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 			farg_str += ", level(" + to_expression(lod) + ")";
 		}
 	}
-	else if (is_fetch && !lod && imgtype.image.dim != Dim1D && imgtype.image.dim != DimBuffer && !imgtype.image.ms &&
-	         imgtype.image.sampled != 2)
+	else if (args.base.is_fetch && !lod && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D) &&
+	         imgtype.image.dim != DimBuffer && !imgtype.image.ms && imgtype.image.sampled != 2)
 	{
 		// Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
 		// Check for sampled type as well, because is_fetch is also used for OpImageRead in MSL.
@@ -5327,13 +10883,14 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 	}
 
 	// Metal does not support LOD for 1D textures.
-	if ((grad_x || grad_y) && imgtype.image.dim != Dim1D)
+	if ((grad_x || grad_y) && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D))
 	{
 		forward = forward && should_forward(grad_x);
 		forward = forward && should_forward(grad_y);
 		string grad_opt;
 		switch (imgtype.image.dim)
 		{
+		case Dim1D:
 		case Dim2D:
 			grad_opt = "2d";
 			break;
@@ -5341,7 +10898,10 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 			grad_opt = "3d";
 			break;
 		case DimCube:
-			grad_opt = "cube";
+			if (imgtype.image.arrayed && msl_options.emulate_cube_array)
+				grad_opt = "2d";
+			else
+				grad_opt = "cube";
 			break;
 		default:
 			grad_opt = "unsupported_gradient_dimension";
@@ -5350,46 +10910,47 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		farg_str += ", gradient" + grad_opt + "(" + to_expression(grad_x) + ", " + to_expression(grad_y) + ")";
 	}
 
-	if (minlod)
+	if (args.min_lod)
 	{
-		if (msl_options.is_macos())
-		{
-			if (!msl_options.supports_msl_version(2, 2))
-				SPIRV_CROSS_THROW("min_lod_clamp() is only supported in MSL 2.2+ and up on macOS.");
-		}
-		else if (msl_options.is_ios())
-			SPIRV_CROSS_THROW("min_lod_clamp() is not supported on iOS.");
+		if (!msl_options.supports_msl_version(2, 2))
+			SPIRV_CROSS_THROW("min_lod_clamp() is only supported in MSL 2.2+ and up.");
 
-		forward = forward && should_forward(minlod);
-		farg_str += ", min_lod_clamp(" + to_expression(minlod) + ")";
+		forward = forward && should_forward(args.min_lod);
+		farg_str += ", min_lod_clamp(" + to_expression(args.min_lod) + ")";
 	}
 
 	// Add offsets
 	string offset_expr;
-	if (coffset && !is_fetch)
-	{
-		forward = forward && should_forward(coffset);
-		offset_expr = to_expression(coffset);
-	}
-	else if (offset && !is_fetch)
+	const SPIRType *offset_type = nullptr;
+	if (args.offset && !args.base.is_fetch)
 	{
-		forward = forward && should_forward(offset);
-		offset_expr = to_expression(offset);
+		forward = forward && should_forward(args.offset);
+		offset_expr = to_expression(args.offset);
+		offset_type = &expression_type(args.offset);
 	}
 
 	if (!offset_expr.empty())
 	{
 		switch (imgtype.image.dim)
 		{
+		case Dim1D:
+			if (!msl_options.texture_1D_as_2D)
+				break;
+			if (offset_type->vecsize > 1)
+				offset_expr = enclose_expression(offset_expr) + ".x";
+
+			farg_str += join(", int2(", offset_expr, ", 0)");
+			break;
+
 		case Dim2D:
-			if (coord_type.vecsize > 2)
+			if (offset_type->vecsize > 2)
 				offset_expr = enclose_expression(offset_expr) + ".xy";
 
 			farg_str += ", " + offset_expr;
 			break;
 
 		case Dim3D:
-			if (coord_type.vecsize > 3)
+			if (offset_type->vecsize > 3)
 				offset_expr = enclose_expression(offset_expr) + ".xyz";
 
 			farg_str += ", " + offset_expr;
@@ -5400,30 +10961,37 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		}
 	}
 
-	if (comp)
+	if (args.component)
 	{
 		// If 2D has gather component, ensure it also has an offset arg
 		if (imgtype.image.dim == Dim2D && offset_expr.empty())
 			farg_str += ", int2(0)";
 
-		forward = forward && should_forward(comp);
-		farg_str += ", " + to_component_argument(comp);
-	}
+		if (!msl_options.swizzle_texture_samples || is_dynamic_img_sampler)
+		{
+			forward = forward && should_forward(args.component);
 
-	if (sample)
-	{
-		forward = forward && should_forward(sample);
-		farg_str += ", ";
-		farg_str += to_expression(sample);
+			uint32_t image_var = 0;
+			if (const auto *combined = maybe_get<SPIRCombinedImageSampler>(img))
+			{
+				if (const auto *img_var = maybe_get_backing_variable(combined->image))
+					image_var = img_var->self;
+			}
+			else if (const auto *var = maybe_get_backing_variable(img))
+			{
+				image_var = var->self;
+			}
+
+			if (image_var == 0 || !is_depth_image(expression_type(image_var), image_var))
+				farg_str += ", " + to_component_argument(args.component);
+		}
 	}
 
-	if (msl_options.swizzle_texture_samples && is_sampled_image_type(imgtype))
+	if (args.sample)
 	{
-		// Add the swizzle constant from the swizzle buffer.
-		if (!is_gather)
-			farg_str += ")";
-		farg_str += ", " + to_swizzle_expression(img);
-		used_swizzle_buffer = true;
+		forward = forward && should_forward(args.sample);
+		farg_str += ", ";
+		farg_str += to_expression(args.sample);
 	}
 
 	*p_forward = forward;
@@ -5441,13 +11009,7 @@ string CompilerMSL::round_fp_tex_coords(string tex_coords, bool coord_is_fp)
 // The ID must be a scalar constant.
 string CompilerMSL::to_component_argument(uint32_t id)
 {
-	if (ir.ids[id].get_type() != TypeConstant)
-	{
-		SPIRV_CROSS_THROW("ID " + to_string(id) + " is not an OpConstant.");
-		return "component::x";
-	}
-
-	uint32_t component_index = get<SPIRConstant>(id).scalar();
+	uint32_t component_index = evaluate_constant_u32(id);
 	switch (component_index)
 	{
 	case 0:
@@ -5462,7 +11024,6 @@ string CompilerMSL::to_component_argument(uint32_t id)
 	default:
 		SPIRV_CROSS_THROW("The value (" + to_string(component_index) + ") of OpConstant ID " + to_string(id) +
 		                  " is not a valid Component index, which must be one of 0, 1, 2, or 3.");
-		return "component::x";
 	}
 }
 
@@ -5472,14 +11033,222 @@ void CompilerMSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id
 	set<SPIRCombinedImageSampler>(result_id, result_type, image_id, samp_id);
 }
 
+string CompilerMSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
+                                  SmallVector<uint32_t> &inherited_expressions)
+{
+	auto *ops = stream(i);
+	uint32_t result_type_id = ops[0];
+	uint32_t img = ops[2];
+	auto &result_type = get<SPIRType>(result_type_id);
+	auto op = static_cast<Op>(i.op);
+	bool is_gather = (op == OpImageGather || op == OpImageDrefGather);
+
+	// Bypass pointers because we need the real image struct
+	auto &type = expression_type(img);
+	auto &imgtype = get<SPIRType>(type.self);
+
+	const MSLConstexprSampler *constexpr_sampler = nullptr;
+	bool is_dynamic_img_sampler = false;
+	if (auto *var = maybe_get_backing_variable(img))
+	{
+		constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self));
+		is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
+	}
+
+	string expr;
+	if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler)
+	{
+		// If this needs sampler Y'CbCr conversion, we need to do some additional
+		// processing.
+		switch (constexpr_sampler->ycbcr_model)
+		{
+		case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
+		case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
+			// Default
+			break;
+		case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709:
+			add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT709);
+			expr += "spvConvertYCbCrBT709(";
+			break;
+		case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601:
+			add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT601);
+			expr += "spvConvertYCbCrBT601(";
+			break;
+		case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020:
+			add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT2020);
+			expr += "spvConvertYCbCrBT2020(";
+			break;
+		default:
+			SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion.");
+		}
+
+		if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
+		{
+			switch (constexpr_sampler->ycbcr_range)
+			{
+			case MSL_SAMPLER_YCBCR_RANGE_ITU_FULL:
+				add_spv_func_and_recompile(SPVFuncImplExpandITUFullRange);
+				expr += "spvExpandITUFullRange(";
+				break;
+			case MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW:
+				add_spv_func_and_recompile(SPVFuncImplExpandITUNarrowRange);
+				expr += "spvExpandITUNarrowRange(";
+				break;
+			default:
+				SPIRV_CROSS_THROW("Invalid Y'CbCr range.");
+			}
+		}
+	}
+	else if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) &&
+	         !is_dynamic_img_sampler)
+	{
+		add_spv_func_and_recompile(SPVFuncImplTextureSwizzle);
+		expr += "spvTextureSwizzle(";
+	}
+
+	string inner_expr = CompilerGLSL::to_texture_op(i, sparse, forward, inherited_expressions);
+
+	if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler)
+	{
+		if (!constexpr_sampler->swizzle_is_identity())
+		{
+			static const char swizzle_names[] = "rgba";
+			if (!constexpr_sampler->swizzle_has_one_or_zero())
+			{
+				// If we can, do it inline.
+				expr += inner_expr + ".";
+				for (uint32_t c = 0; c < 4; c++)
+				{
+					switch (constexpr_sampler->swizzle[c])
+					{
+					case MSL_COMPONENT_SWIZZLE_IDENTITY:
+						expr += swizzle_names[c];
+						break;
+					case MSL_COMPONENT_SWIZZLE_R:
+					case MSL_COMPONENT_SWIZZLE_G:
+					case MSL_COMPONENT_SWIZZLE_B:
+					case MSL_COMPONENT_SWIZZLE_A:
+						expr += swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R];
+						break;
+					default:
+						SPIRV_CROSS_THROW("Invalid component swizzle.");
+					}
+				}
+			}
+			else
+			{
+				// Otherwise, we need to emit a temporary and swizzle that.
+				uint32_t temp_id = ir.increase_bound_by(1);
+				emit_op(result_type_id, temp_id, inner_expr, false);
+				for (auto &inherit : inherited_expressions)
+					inherit_expression_dependencies(temp_id, inherit);
+				inherited_expressions.clear();
+				inherited_expressions.push_back(temp_id);
+
+				switch (op)
+				{
+				case OpImageSampleDrefImplicitLod:
+				case OpImageSampleImplicitLod:
+				case OpImageSampleProjImplicitLod:
+				case OpImageSampleProjDrefImplicitLod:
+					register_control_dependent_expression(temp_id);
+					break;
+
+				default:
+					break;
+				}
+				expr += type_to_glsl(result_type) + "(";
+				for (uint32_t c = 0; c < 4; c++)
+				{
+					switch (constexpr_sampler->swizzle[c])
+					{
+					case MSL_COMPONENT_SWIZZLE_IDENTITY:
+						expr += to_expression(temp_id) + "." + swizzle_names[c];
+						break;
+					case MSL_COMPONENT_SWIZZLE_ZERO:
+						expr += "0";
+						break;
+					case MSL_COMPONENT_SWIZZLE_ONE:
+						expr += "1";
+						break;
+					case MSL_COMPONENT_SWIZZLE_R:
+					case MSL_COMPONENT_SWIZZLE_G:
+					case MSL_COMPONENT_SWIZZLE_B:
+					case MSL_COMPONENT_SWIZZLE_A:
+						expr += to_expression(temp_id) + "." +
+						        swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R];
+						break;
+					default:
+						SPIRV_CROSS_THROW("Invalid component swizzle.");
+					}
+					if (c < 3)
+						expr += ", ";
+				}
+				expr += ")";
+			}
+		}
+		else
+			expr += inner_expr;
+		if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
+		{
+			expr += join(", ", constexpr_sampler->bpc, ")");
+			if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
+				expr += ")";
+		}
+	}
+	else
+	{
+		expr += inner_expr;
+		if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) &&
+		    !is_dynamic_img_sampler)
+		{
+			// Add the swizzle constant from the swizzle buffer.
+			expr += ", " + to_swizzle_expression(img) + ")";
+			used_swizzle_buffer = true;
+		}
+	}
+
+	return expr;
+}
+
+static string create_swizzle(MSLComponentSwizzle swizzle)
+{
+	switch (swizzle)
+	{
+	case MSL_COMPONENT_SWIZZLE_IDENTITY:
+		return "spvSwizzle::none";
+	case MSL_COMPONENT_SWIZZLE_ZERO:
+		return "spvSwizzle::zero";
+	case MSL_COMPONENT_SWIZZLE_ONE:
+		return "spvSwizzle::one";
+	case MSL_COMPONENT_SWIZZLE_R:
+		return "spvSwizzle::red";
+	case MSL_COMPONENT_SWIZZLE_G:
+		return "spvSwizzle::green";
+	case MSL_COMPONENT_SWIZZLE_B:
+		return "spvSwizzle::blue";
+	case MSL_COMPONENT_SWIZZLE_A:
+		return "spvSwizzle::alpha";
+	default:
+		SPIRV_CROSS_THROW("Invalid component swizzle.");
+	}
+}
+
 // Returns a string representation of the ID, usable as a function arg.
 // Manufacture automatic sampler arg for SampledImage texture.
-string CompilerMSL::to_func_call_arg(uint32_t id)
+string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id)
 {
 	string arg_str;
 
+	auto &type = expression_type(id);
+	bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
+	// If the argument *itself* is a "dynamic" combined-image sampler, then we can just pass that around.
+	bool arg_is_dynamic_img_sampler = has_extended_decoration(id, SPIRVCrossDecorationDynamicImageSampler);
+	if (is_dynamic_img_sampler && !arg_is_dynamic_img_sampler)
+		arg_str = join("spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">(");
+
 	auto *c = maybe_get<SPIRConstant>(id);
-	if (c && !get<SPIRType>(c->constant_type).array.empty())
+	if (msl_options.force_native_arrays && c && !get<SPIRType>(c->constant_type).array.empty())
 	{
 		// If we are passing a constant array directly to a function for some reason,
 		// the callee will expect an argument in thread const address space
@@ -5492,42 +11261,125 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
 		// so just create a thread local copy in the current function.
 		arg_str = join("_", id, "_array_copy");
 		auto &constants = current_function->constant_arrays_needed_on_stack;
-		auto itr = find(begin(constants), end(constants), id);
+		auto itr = find(begin(constants), end(constants), ID(id));
 		if (itr == end(constants))
 		{
 			force_recompile();
 			constants.push_back(id);
 		}
 	}
+	// Dereference pointer variables where needed.
+	// FIXME: This dereference is actually backwards. We should really just support passing pointer variables between functions.
+	else if (should_dereference(id))
+		arg_str += dereference_expression(type, CompilerGLSL::to_func_call_arg(arg, id));
 	else
-		arg_str = CompilerGLSL::to_func_call_arg(id);
-
-	// Manufacture automatic sampler arg if the arg is a SampledImage texture.
-	auto &type = expression_type(id);
-	if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
-	{
-		// Need to check the base variable in case we need to apply a qualified alias.
-		uint32_t var_id = 0;
-		auto *sampler_var = maybe_get<SPIRVariable>(id);
-		if (sampler_var)
-			var_id = sampler_var->basevariable;
-
-		arg_str += ", " + to_sampler_expression(var_id ? var_id : id);
-	}
+		arg_str += CompilerGLSL::to_func_call_arg(arg, id);
 
+	// Need to check the base variable in case we need to apply a qualified alias.
 	uint32_t var_id = 0;
 	auto *var = maybe_get<SPIRVariable>(id);
 	if (var)
 		var_id = var->basevariable;
 
-	if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type))
+	if (!arg_is_dynamic_img_sampler)
 	{
-		// Need to check the base variable in case we need to apply a qualified alias.
-		arg_str += ", " + to_swizzle_expression(var_id ? var_id : id);
+		auto *constexpr_sampler = find_constexpr_sampler(var_id ? var_id : id);
+		if (type.basetype == SPIRType::SampledImage)
+		{
+			// Manufacture automatic plane args for multiplanar texture
+			uint32_t planes = 1;
+			if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+			{
+				planes = constexpr_sampler->planes;
+				// If this parameter isn't aliasing a global, then we need to use
+				// the special "dynamic image-sampler" class to pass it--and we need
+				// to use it for *every* non-alias parameter, in case a combined
+				// image-sampler with a Y'CbCr conversion is passed. Hopefully, this
+				// pathological case is so rare that it should never be hit in practice.
+				if (!arg.alias_global_variable)
+					add_spv_func_and_recompile(SPVFuncImplDynamicImageSampler);
+			}
+			for (uint32_t i = 1; i < planes; i++)
+				arg_str += join(", ", CompilerGLSL::to_func_call_arg(arg, id), plane_name_suffix, i);
+			// Manufacture automatic sampler arg if the arg is a SampledImage texture.
+			if (type.image.dim != DimBuffer)
+				arg_str += ", " + to_sampler_expression(var_id ? var_id : id);
+
+			// Add sampler Y'CbCr conversion info if we have it
+			if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+			{
+				SmallVector<string> samp_args;
+
+				switch (constexpr_sampler->resolution)
+				{
+				case MSL_FORMAT_RESOLUTION_444:
+					// Default
+					break;
+				case MSL_FORMAT_RESOLUTION_422:
+					samp_args.push_back("spvFormatResolution::_422");
+					break;
+				case MSL_FORMAT_RESOLUTION_420:
+					samp_args.push_back("spvFormatResolution::_420");
+					break;
+				default:
+					SPIRV_CROSS_THROW("Invalid format resolution.");
+				}
+
+				if (constexpr_sampler->chroma_filter != MSL_SAMPLER_FILTER_NEAREST)
+					samp_args.push_back("spvChromaFilter::linear");
+
+				if (constexpr_sampler->x_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN)
+					samp_args.push_back("spvXChromaLocation::midpoint");
+				if (constexpr_sampler->y_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN)
+					samp_args.push_back("spvYChromaLocation::midpoint");
+				switch (constexpr_sampler->ycbcr_model)
+				{
+				case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
+					// Default
+					break;
+				case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
+					samp_args.push_back("spvYCbCrModelConversion::ycbcr_identity");
+					break;
+				case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709:
+					samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_709");
+					break;
+				case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601:
+					samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_601");
+					break;
+				case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020:
+					samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_2020");
+					break;
+				default:
+					SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion.");
+				}
+				if (constexpr_sampler->ycbcr_range != MSL_SAMPLER_YCBCR_RANGE_ITU_FULL)
+					samp_args.push_back("spvYCbCrRange::itu_narrow");
+				samp_args.push_back(join("spvComponentBits(", constexpr_sampler->bpc, ")"));
+				arg_str += join(", spvYCbCrSampler(", merge(samp_args), ")");
+			}
+		}
+
+		if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+			arg_str += join(", (uint(", create_swizzle(constexpr_sampler->swizzle[3]), ") << 24) | (uint(",
+			                create_swizzle(constexpr_sampler->swizzle[2]), ") << 16) | (uint(",
+			                create_swizzle(constexpr_sampler->swizzle[1]), ") << 8) | uint(",
+			                create_swizzle(constexpr_sampler->swizzle[0]), ")");
+		else if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type))
+			arg_str += ", " + to_swizzle_expression(var_id ? var_id : id);
+
+		if (buffer_requires_array_length(var_id))
+			arg_str += ", " + to_buffer_size_expression(var_id ? var_id : id);
+
+		if (is_dynamic_img_sampler)
+			arg_str += ")";
 	}
 
-	if (buffers_requiring_array_length.count(var_id))
-		arg_str += ", " + to_buffer_size_expression(var_id ? var_id : id);
+	// Emulate texture2D atomic operations
+	auto *backing_var = maybe_get_backing_variable(var_id);
+	if (backing_var && atomic_image_vars.count(backing_var->self))
+	{
+		arg_str += ", " + to_expression(var_id) + "_atomic";
+	}
 
 	return arg_str;
 }
@@ -5538,7 +11390,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
 string CompilerMSL::to_sampler_expression(uint32_t id)
 {
 	auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
-	auto expr = to_expression(combined ? combined->image : id);
+	auto expr = to_expression(combined ? combined->image : VariableID(id));
 	auto index = expr.find_first_of('[');
 
 	uint32_t samp_id = 0;
@@ -5559,13 +11411,13 @@ string CompilerMSL::to_swizzle_expression(uint32_t id)
 {
 	auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
 
-	auto expr = to_expression(combined ? combined->image : id);
+	auto expr = to_expression(combined ? combined->image : VariableID(id));
 	auto index = expr.find_first_of('[');
 
 	// If an image is part of an argument buffer translate this to a legal identifier.
-	for (auto &c : expr)
-		if (c == '.')
-			c = '_';
+	string::size_type period = 0;
+	while ((period = expr.find_first_of('.', period)) != string::npos && period < index)
+		expr[period] = '_';
 
 	if (index == string::npos)
 		return expr + swizzle_name_suffix;
@@ -5621,99 +11473,39 @@ bool CompilerMSL::is_patch_block(const SPIRType &type)
 // Checks whether the ID is a row_major matrix that requires conversion before use
 bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id)
 {
-	// Natively supported row-major matrices do not need to be converted.
-	if (backend.native_row_major_matrix)
-		return false;
-
-	// Non-matrix or column-major matrix types do not need to be converted.
-	if (!has_decoration(id, DecorationRowMajor))
-		return false;
-
-	// Generate a function that will swap matrix elements from row-major to column-major.
-	// Packed row-matrix should just use transpose() function.
-	if (!has_extended_decoration(id, SPIRVCrossDecorationPacked))
-	{
-		const auto type = expression_type(id);
-		add_convert_row_major_matrix_function(type.columns, type.vecsize);
-	}
-
-	return true;
+	auto *e = maybe_get<SPIRExpression>(id);
+	if (e)
+		return e->need_transpose;
+	else
+		return has_decoration(id, DecorationRowMajor);
 }
 
 // Checks whether the member is a row_major matrix that requires conversion before use
 bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
 {
-	// Natively supported row-major matrices do not need to be converted.
-	if (backend.native_row_major_matrix)
-		return false;
-
-	// Non-matrix or column-major matrix types do not need to be converted.
-	if (!has_member_decoration(type.self, index, DecorationRowMajor))
-		return false;
-
-	// Generate a function that will swap matrix elements from row-major to column-major.
-	// Packed row-matrix should just use transpose() function.
-	if (!has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked))
-	{
-		const auto mbr_type = get<SPIRType>(type.member_types[index]);
-		add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize);
-	}
-
-	return true;
+	return has_member_decoration(type.self, index, DecorationRowMajor);
 }
 
-// Adds a function suitable for converting a non-square row-major matrix to a column-major matrix.
-void CompilerMSL::add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows)
+string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t physical_type_id,
+                                             bool is_packed)
 {
-	SPVFuncImpl spv_func;
-	if (cols == rows) // Square matrix...just use transpose() function
-		return;
-	else if (cols == 2 && rows == 3)
-		spv_func = SPVFuncImplRowMajor2x3;
-	else if (cols == 2 && rows == 4)
-		spv_func = SPVFuncImplRowMajor2x4;
-	else if (cols == 3 && rows == 2)
-		spv_func = SPVFuncImplRowMajor3x2;
-	else if (cols == 3 && rows == 4)
-		spv_func = SPVFuncImplRowMajor3x4;
-	else if (cols == 4 && rows == 2)
-		spv_func = SPVFuncImplRowMajor4x2;
-	else if (cols == 4 && rows == 3)
-		spv_func = SPVFuncImplRowMajor4x3;
-	else
-		SPIRV_CROSS_THROW("Could not convert row-major matrix.");
-
-	auto rslt = spv_function_implementations.insert(spv_func);
-	if (rslt.second)
+	if (!is_matrix(exp_type))
 	{
-		suppress_missing_prototypes = true;
-		force_recompile();
+		return CompilerGLSL::convert_row_major_matrix(std::move(exp_str), exp_type, physical_type_id, is_packed);
 	}
-}
-
-// Wraps the expression string in a function call that converts the
-// row_major matrix result of the expression to a column_major matrix.
-string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, bool is_packed)
-{
-	strip_enclosed_expression(exp_str);
-
-	string func_name;
-
-	// Square and packed matrices can just use transpose
-	if (exp_type.columns == exp_type.vecsize || is_packed)
-		func_name = "transpose";
 	else
-		func_name = string("spvConvertFromRowMajor") + to_string(exp_type.columns) + "x" + to_string(exp_type.vecsize);
-
-	return join(func_name, "(", exp_str, ")");
+	{
+		strip_enclosed_expression(exp_str);
+		if (physical_type_id != 0 || is_packed)
+			exp_str = unpack_expression_type(exp_str, exp_type, physical_type_id, is_packed, true);
+		return join("transpose(", exp_str, ")");
+	}
 }
 
 // Called automatically at the end of the entry point function
 void CompilerMSL::emit_fixup()
 {
-	if ((get_execution_model() == ExecutionModelVertex ||
-	     get_execution_model() == ExecutionModelTessellationEvaluation) &&
-	    stage_out_var_id && !qual_pos_var_name.empty() && !capture_output_to_buffer)
+	if (is_vertex_like_shader() && stage_out_var_id && !qual_pos_var_name.empty() && !capture_output_to_buffer)
 	{
 		if (options.vertex.fixup_clipspace)
 			statement(qual_pos_var_name, ".z = (", qual_pos_var_name, ".z + ", qual_pos_var_name,
@@ -5728,89 +11520,149 @@ void CompilerMSL::emit_fixup()
 string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
                                      const string &qualifier)
 {
-	auto &membertype = get<SPIRType>(member_type_id);
-
-	// If this member requires padding to maintain alignment, emit a dummy padding member.
-	MSLStructMemberKey key = get_struct_member_key(type.self, index);
-	uint32_t pad_len = struct_member_padding[key];
-	if (pad_len > 0)
-		statement("char _m", index, "_pad", "[", to_string(pad_len), "];");
+	if (member_is_remapped_physical_type(type, index))
+		member_type_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
+	auto &physical_type = get<SPIRType>(member_type_id);
 
 	// If this member is packed, mark it as so.
-	string pack_pfx = "";
-
-	const SPIRType *effective_membertype = &membertype;
-	SPIRType override_type;
+	string pack_pfx;
 
+	// Allow Metal to use the array<T> template to make arrays a value type
 	uint32_t orig_id = 0;
 	if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID))
 		orig_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID);
 
-	if (member_is_packed_type(type, index))
+	bool row_major = false;
+	if (is_matrix(physical_type))
+		row_major = has_member_decoration(type.self, index, DecorationRowMajor);
+
+	SPIRType row_major_physical_type;
+	const SPIRType *declared_type = &physical_type;
+
+	// If a struct is being declared with physical layout,
+	// do not use array<T> wrappers.
+	// This avoids a lot of complicated cases with packed vectors and matrices,
+	// and generally we cannot copy full arrays in and out of buffers into Function
+	// address space.
+	// Array of resources should also be declared as builtin arrays.
+	if (has_member_decoration(type.self, index, DecorationOffset))
+		is_using_builtin_array = true;
+	else if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
+		is_using_builtin_array = true;
+
+	if (member_is_packed_physical_type(type, index))
 	{
 		// If we're packing a matrix, output an appropriate typedef
-		if (membertype.basetype == SPIRType::Struct)
+		if (physical_type.basetype == SPIRType::Struct)
 		{
-			pack_pfx = "/* FIXME: A padded struct is needed here. If you see this message, file a bug! */ ";
+			SPIRV_CROSS_THROW("Cannot emit a packed struct currently.");
 		}
-		else if (membertype.vecsize > 1 && membertype.columns > 1)
+		else if (is_matrix(physical_type))
 		{
-			uint32_t rows = membertype.vecsize;
-			uint32_t cols = membertype.columns;
+			uint32_t rows = physical_type.vecsize;
+			uint32_t cols = physical_type.columns;
 			pack_pfx = "packed_";
-			if (has_member_decoration(type.self, index, DecorationRowMajor))
+			if (row_major)
 			{
 				// These are stored transposed.
-				rows = membertype.columns;
-				cols = membertype.vecsize;
+				rows = physical_type.columns;
+				cols = physical_type.vecsize;
 				pack_pfx = "packed_rm_";
 			}
-			string base_type = membertype.width == 16 ? "half" : "float";
+			string base_type = physical_type.width == 16 ? "half" : "float";
 			string td_line = "typedef ";
 			td_line += "packed_" + base_type + to_string(rows);
 			td_line += " " + pack_pfx;
 			// Use the actual matrix size here.
-			td_line += base_type + to_string(membertype.columns) + "x" + to_string(membertype.vecsize);
+			td_line += base_type + to_string(physical_type.columns) + "x" + to_string(physical_type.vecsize);
 			td_line += "[" + to_string(cols) + "]";
 			td_line += ";";
 			add_typedef_line(td_line);
 		}
-		else if (is_array(membertype) && membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct &&
-		         type_struct_member_array_stride(type, index) == 4 * membertype.width / 8)
+		else if (!is_scalar(physical_type)) // scalar type is already packed.
+			pack_pfx = "packed_";
+	}
+	else if (is_matrix(physical_type))
+	{
+		if (!msl_options.supports_msl_version(3, 0) &&
+		    has_extended_decoration(type.self, SPIRVCrossDecorationWorkgroupStruct))
+		{
+			pack_pfx = "spvStorage_";
+			add_spv_func_and_recompile(SPVFuncImplStorageMatrix);
+			// The pack prefix causes problems with array<T> wrappers.
+			is_using_builtin_array = true;
+		}
+		if (row_major)
 		{
-			// A "packed" float array, but we pad here instead to 4-vector.
-			override_type = membertype;
-			override_type.vecsize = 4;
-			effective_membertype = &override_type;
+			// Need to declare type with flipped vecsize/columns.
+			row_major_physical_type = physical_type;
+			swap(row_major_physical_type.vecsize, row_major_physical_type.columns);
+			declared_type = &row_major_physical_type;
 		}
-		else
-			pack_pfx = "packed_";
 	}
 
-	// Very specifically, image load-store in argument buffers are disallowed on MSL on iOS.
-	if (msl_options.is_ios() && membertype.basetype == SPIRType::Image && membertype.image.sampled == 2)
+	// iOS Tier 1 argument buffers do not support writable images.
+	if (physical_type.basetype == SPIRType::Image &&
+		physical_type.image.sampled == 2 &&
+		msl_options.is_ios() &&
+		msl_options.argument_buffers_tier <= Options::ArgumentBuffersTier::Tier1 &&
+		!has_decoration(orig_id, DecorationNonWritable))
 	{
-		if (!has_decoration(orig_id, DecorationNonWritable))
-			SPIRV_CROSS_THROW("Writable images are not allowed in argument buffers on iOS.");
+		SPIRV_CROSS_THROW("Writable images are not allowed on Tier1 argument buffers on iOS.");
 	}
 
 	// Array information is baked into these types.
 	string array_type;
-	if (membertype.basetype != SPIRType::Image && membertype.basetype != SPIRType::Sampler &&
-	    membertype.basetype != SPIRType::SampledImage)
+	if (physical_type.basetype != SPIRType::Image && physical_type.basetype != SPIRType::Sampler &&
+	    physical_type.basetype != SPIRType::SampledImage)
 	{
-		array_type = type_to_array_glsl(membertype);
+		BuiltIn builtin = BuiltInMax;
+
+		// Special handling. In [[stage_out]] or [[stage_in]] blocks,
+		// we need flat arrays, but if we're somehow declaring gl_PerVertex for constant array reasons, we want
+		// template array types to be declared.
+		bool is_ib_in_out =
+				((stage_out_var_id && get_stage_out_struct_type().self == type.self &&
+				  variable_storage_requires_stage_io(StorageClassOutput)) ||
+				 (stage_in_var_id && get_stage_in_struct_type().self == type.self &&
+				  variable_storage_requires_stage_io(StorageClassInput)));
+		if (is_ib_in_out && is_member_builtin(type, index, &builtin))
+			is_using_builtin_array = true;
+		array_type = type_to_array_glsl(physical_type);
 	}
 
-	return join(pack_pfx, type_to_glsl(*effective_membertype, orig_id), " ", qualifier, to_member_name(type, index),
-	            member_attribute_qualifier(type, index), array_type, ";");
+	auto result = join(pack_pfx, type_to_glsl(*declared_type, orig_id, true), " ", qualifier,
+	                   to_member_name(type, index), member_attribute_qualifier(type, index), array_type, ";");
+
+	is_using_builtin_array = false;
+	return result;
 }
 
 // Emit a structure member, padding and packing to maintain the correct memeber alignments.
 void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
                                      const string &qualifier, uint32_t)
 {
+	// If this member requires padding to maintain its declared offset, emit a dummy padding member before it.
+	if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget))
+	{
+		uint32_t pad_len = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget);
+		statement("char _m", index, "_pad", "[", pad_len, "];");
+	}
+
+	// Handle HLSL-style 0-based vertex/instance index.
+	builtin_declaration = true;
 	statement(to_struct_member(type, member_type_id, index, qualifier));
+	builtin_declaration = false;
+}
+
+void CompilerMSL::emit_struct_padding_target(const SPIRType &type)
+{
+	uint32_t struct_size = get_declared_struct_size_msl(type, true, true);
+	uint32_t target_size = get_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget);
+	if (target_size < struct_size)
+		SPIRV_CROSS_THROW("Cannot pad with negative bytes.");
+	else if (target_size > struct_size)
+		statement("char _m0_final_padding[", target_size - struct_size, "];");
 }
 
 // Return a MSL qualifier for the specified function attribute member
@@ -5825,8 +11677,15 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 	bool is_builtin = is_member_builtin(type, index, &builtin);
 
 	if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
-		return join(" [[id(",
-		            get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")]]");
+	{
+		string quals = join(
+		    " [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")");
+		if (interlocked_resources.count(
+		        get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)))
+			quals += ", raster_order_group(0)";
+		quals += "]]";
+		return quals;
+	}
 
 	// Vertex function inputs
 	if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput)
@@ -5841,6 +11700,8 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 			case BuiltInInstanceId:
 			case BuiltInInstanceIndex:
 			case BuiltInBaseInstance:
+				if (msl_options.vertex_for_tessellation)
+					return "";
 				return string(" [[") + builtin_qualifier(builtin) + "]]";
 
 			case BuiltInDrawIndex:
@@ -5850,13 +11711,19 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 				return "";
 			}
 		}
-		uint32_t locn = get_ordered_member_location(type.self, index);
+
+		uint32_t locn;
+		if (is_builtin)
+			locn = get_or_allocate_builtin_input_member_location(builtin, type.self, index);
+		else
+			locn = get_member_location(type.self, index);
+
 		if (locn != k_unknown_location)
 			return string(" [[attribute(") + convert_to_string(locn) + ")]]";
 	}
 
 	// Vertex and tessellation evaluation function outputs
-	if ((execution.model == ExecutionModelVertex || execution.model == ExecutionModelTessellationEvaluation) &&
+	if (((execution.model == ExecutionModelVertex && !msl_options.vertex_for_tessellation) || is_tese_shader()) &&
 	    type.storage == StorageClassOutput)
 	{
 		if (is_builtin)
@@ -5876,26 +11743,41 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 				/* fallthrough */
 			case BuiltInPosition:
 			case BuiltInLayer:
-			case BuiltInClipDistance:
 				return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
 
+			case BuiltInClipDistance:
+				if (has_member_decoration(type.self, index, DecorationIndex))
+					return join(" [[user(clip", get_member_decoration(type.self, index, DecorationIndex), ")]]");
+				else
+					return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
+
+			case BuiltInCullDistance:
+				if (has_member_decoration(type.self, index, DecorationIndex))
+					return join(" [[user(cull", get_member_decoration(type.self, index, DecorationIndex), ")]]");
+				else
+					return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
+
 			default:
 				return "";
 			}
 		}
-		uint32_t comp;
-		uint32_t locn = get_ordered_member_location(type.self, index, &comp);
-		if (locn != k_unknown_location)
-		{
-			if (comp != k_unknown_component)
-				return string(" [[user(locn") + convert_to_string(locn) + "_" + convert_to_string(comp) + ")]]";
-			else
-				return string(" [[user(locn") + convert_to_string(locn) + ")]]";
-		}
+		string loc_qual = member_location_attribute_qualifier(type, index);
+		if (!loc_qual.empty())
+			return join(" [[", loc_qual, "]]");
+	}
+
+	if (execution.model == ExecutionModelVertex && msl_options.vertex_for_tessellation && type.storage == StorageClassOutput)
+	{
+		// For this type of shader, we always arrange for it to capture its
+		// output to a buffer. For this reason, qualifiers are irrelevant here.
+		if (is_builtin)
+			// We still have to assign a location so the output struct will sort correctly.
+			get_or_allocate_builtin_output_member_location(builtin, type.self, index);
+		return "";
 	}
 
 	// Tessellation control function inputs
-	if (execution.model == ExecutionModelTessellationControl && type.storage == StorageClassInput)
+	if (is_tesc_shader() && type.storage == StorageClassInput)
 	{
 		if (is_builtin)
 		{
@@ -5903,8 +11785,13 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 			{
 			case BuiltInInvocationId:
 			case BuiltInPrimitiveId:
+				if (msl_options.multi_patch_workgroup)
+					return "";
+				return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
 			case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
 			case BuiltInSubgroupSize: // FIXME: Should work in any stage
+				if (msl_options.emulate_subgroups)
+					return "";
 				return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " ");
 			case BuiltInPatchVertices:
 				return "";
@@ -5913,21 +11800,32 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 				break;
 			}
 		}
-		uint32_t locn = get_ordered_member_location(type.self, index);
+		if (msl_options.multi_patch_workgroup)
+			return "";
+
+		uint32_t locn;
+		if (is_builtin)
+			locn = get_or_allocate_builtin_input_member_location(builtin, type.self, index);
+		else
+			locn = get_member_location(type.self, index);
+
 		if (locn != k_unknown_location)
 			return string(" [[attribute(") + convert_to_string(locn) + ")]]";
 	}
 
 	// Tessellation control function outputs
-	if (execution.model == ExecutionModelTessellationControl && type.storage == StorageClassOutput)
+	if (is_tesc_shader() && type.storage == StorageClassOutput)
 	{
 		// For this type of shader, we always arrange for it to capture its
 		// output to a buffer. For this reason, qualifiers are irrelevant here.
+		if (is_builtin)
+			// We still have to assign a location so the output struct will sort correctly.
+			get_or_allocate_builtin_output_member_location(builtin, type.self, index);
 		return "";
 	}
 
 	// Tessellation evaluation function inputs
-	if (execution.model == ExecutionModelTessellationEvaluation && type.storage == StorageClassInput)
+	if (is_tese_shader() && type.storage == StorageClassInput)
 	{
 		if (is_builtin)
 		{
@@ -5943,10 +11841,20 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 				break;
 			}
 		}
+
+		if (msl_options.raw_buffer_tese_input)
+			return "";
+
 		// The special control point array must not be marked with an attribute.
 		if (get_type(type.member_types[index]).basetype == SPIRType::ControlPointArray)
 			return "";
-		uint32_t locn = get_ordered_member_location(type.self, index);
+
+		uint32_t locn;
+		if (is_builtin)
+			locn = get_or_allocate_builtin_input_member_location(builtin, type.self, index);
+		else
+			locn = get_member_location(type.self, index);
+
 		if (locn != k_unknown_location)
 			return string(" [[attribute(") + convert_to_string(locn) + ")]]";
 	}
@@ -5962,7 +11870,7 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 			switch (builtin)
 			{
 			case BuiltInViewIndex:
-				if (!msl_options.multiview)
+				if (!msl_options.multiview || !msl_options.multiview_layered_rendering)
 					break;
 				/* fallthrough */
 			case BuiltInFrontFacing:
@@ -5971,29 +11879,24 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 			case BuiltInSampleId:
 			case BuiltInSampleMask:
 			case BuiltInLayer:
-			case BuiltInBaryCoordNV:
-			case BuiltInBaryCoordNoPerspNV:
+			case BuiltInBaryCoordKHR:
+			case BuiltInBaryCoordNoPerspKHR:
 				quals = builtin_qualifier(builtin);
 				break;
 
+			case BuiltInClipDistance:
+				return join(" [[user(clip", get_member_decoration(type.self, index, DecorationIndex), ")]]");
+			case BuiltInCullDistance:
+				return join(" [[user(cull", get_member_decoration(type.self, index, DecorationIndex), ")]]");
+
 			default:
 				break;
 			}
 		}
 		else
-		{
-			uint32_t comp;
-			uint32_t locn = get_ordered_member_location(type.self, index, &comp);
-			if (locn != k_unknown_location)
-			{
-				if (comp != k_unknown_component)
-					quals = string("user(locn") + convert_to_string(locn) + "_" + convert_to_string(comp) + ")";
-				else
-					quals = string("user(locn") + convert_to_string(locn) + ")";
-			}
-		}
+			quals = member_location_attribute_qualifier(type, index);
 
-		if (builtin == BuiltInBaryCoordNV || builtin == BuiltInBaryCoordNoPerspNV)
+		if (builtin == BuiltInBaryCoordKHR || builtin == BuiltInBaryCoordNoPerspKHR)
 		{
 			if (has_member_decoration(type.self, index, DecorationFlat) ||
 			    has_member_decoration(type.self, index, DecorationCentroid) ||
@@ -6055,19 +11958,33 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 			switch (builtin)
 			{
 			case BuiltInFragStencilRefEXT:
+				// Similar to PointSize, only mark FragStencilRef if there's a stencil buffer.
+				// Some shaders may include a FragStencilRef builtin even when used to render
+				// without a stencil attachment, and Metal will reject this builtin
+				// when compiling the shader into a render pipeline that does not set
+				// stencilAttachmentPixelFormat.
+				if (!msl_options.enable_frag_stencil_ref_builtin)
+					return "";
 				if (!msl_options.supports_msl_version(2, 1))
 					SPIRV_CROSS_THROW("Stencil export only supported in MSL 2.1 and up.");
 				return string(" [[") + builtin_qualifier(builtin) + "]]";
 
-			case BuiltInSampleMask:
 			case BuiltInFragDepth:
+				// Ditto FragDepth.
+				if (!msl_options.enable_frag_depth_builtin)
+					return "";
+				/* fallthrough */
+			case BuiltInSampleMask:
 				return string(" [[") + builtin_qualifier(builtin) + "]]";
 
 			default:
 				return "";
 			}
 		}
-		uint32_t locn = get_ordered_member_location(type.self, index);
+		uint32_t locn = get_member_location(type.self, index);
+		// Metal will likely complain about missing color attachments, too.
+		if (locn != k_unknown_location && !(msl_options.enable_frag_output_mask & (1 << locn)))
+			return "";
 		if (locn != k_unknown_location && has_member_decoration(type.self, index, DecorationIndex))
 			return join(" [[color(", locn, "), index(", get_member_decoration(type.self, index, DecorationIndex),
 			            ")]]");
@@ -6086,15 +12003,18 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 		{
 			switch (builtin)
 			{
+			case BuiltInNumSubgroups:
+			case BuiltInSubgroupId:
+			case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
+			case BuiltInSubgroupSize: // FIXME: Should work in any stage
+				if (msl_options.emulate_subgroups)
+					break;
+				/* fallthrough */
 			case BuiltInGlobalInvocationId:
 			case BuiltInWorkgroupId:
 			case BuiltInNumWorkgroups:
 			case BuiltInLocalInvocationId:
 			case BuiltInLocalInvocationIndex:
-			case BuiltInNumSubgroups:
-			case BuiltInSubgroupId:
-			case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage
-			case BuiltInSubgroupSize: // FIXME: Should work in any stage
 				return string(" [[") + builtin_qualifier(builtin) + "]]";
 
 			default:
@@ -6106,28 +12026,136 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 	return "";
 }
 
+// A user-defined output variable is considered to match an input variable in the subsequent
+// stage if the two variables are declared with the same Location and Component decoration and
+// match in type and decoration, except that interpolation decorations are not required to match.
+// For the purposes of interface matching, variables declared without a Component decoration are
+// considered to have a Component decoration of zero.
+string CompilerMSL::member_location_attribute_qualifier(const SPIRType &type, uint32_t index)
+{
+	string quals;
+	uint32_t comp;
+	uint32_t locn = get_member_location(type.self, index, &comp);
+	if (locn != k_unknown_location)
+	{
+		quals += "user(locn";
+		quals += convert_to_string(locn);
+		if (comp != k_unknown_component && comp != 0)
+		{
+			quals += "_";
+			quals += convert_to_string(comp);
+		}
+		quals += ")";
+	}
+	return quals;
+}
+
 // Returns the location decoration of the member with the specified index in the specified type.
 // If the location of the member has been explicitly set, that location is used. If not, this
 // function assumes the members are ordered in their location order, and simply returns the
 // index as the location.
-uint32_t CompilerMSL::get_ordered_member_location(uint32_t type_id, uint32_t index, uint32_t *comp)
+uint32_t CompilerMSL::get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp) const
 {
-	auto &m = ir.meta[type_id];
-	if (index < m.members.size())
+	if (comp)
 	{
-		auto &dec = m.members[index];
-		if (comp)
-		{
-			if (dec.decoration_flags.get(DecorationComponent))
-				*comp = dec.component;
-			else
-				*comp = k_unknown_component;
-		}
-		if (dec.decoration_flags.get(DecorationLocation))
-			return dec.location;
+		if (has_member_decoration(type_id, index, DecorationComponent))
+			*comp = get_member_decoration(type_id, index, DecorationComponent);
+		else
+			*comp = k_unknown_component;
+	}
+
+	if (has_member_decoration(type_id, index, DecorationLocation))
+		return get_member_decoration(type_id, index, DecorationLocation);
+	else
+		return k_unknown_location;
+}
+
+uint32_t CompilerMSL::get_or_allocate_builtin_input_member_location(spv::BuiltIn builtin,
+                                                                    uint32_t type_id, uint32_t index,
+                                                                    uint32_t *comp)
+{
+	uint32_t loc = get_member_location(type_id, index, comp);
+	if (loc != k_unknown_location)
+		return loc;
+
+	if (comp)
+		*comp = k_unknown_component;
+
+	// Late allocation. Find a location which is unused by the application.
+	// This can happen for built-in inputs in tessellation which are mixed and matched with user inputs.
+	auto &mbr_type = get<SPIRType>(get<SPIRType>(type_id).member_types[index]);
+	uint32_t count = type_to_location_count(mbr_type);
+
+	loc = 0;
+
+	const auto location_range_in_use = [this](uint32_t location, uint32_t location_count) -> bool {
+		for (uint32_t i = 0; i < location_count; i++)
+			if (location_inputs_in_use.count(location + i) != 0)
+				return true;
+		return false;
+	};
+
+	while (location_range_in_use(loc, count))
+		loc++;
+
+	set_member_decoration(type_id, index, DecorationLocation, loc);
+
+	// Triangle tess level inputs are shared in one packed float4,
+	// mark both builtins as sharing one location.
+	if (!msl_options.raw_buffer_tese_input && is_tessellating_triangles() &&
+	    (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter))
+	{
+		builtin_to_automatic_input_location[BuiltInTessLevelInner] = loc;
+		builtin_to_automatic_input_location[BuiltInTessLevelOuter] = loc;
+	}
+	else
+		builtin_to_automatic_input_location[builtin] = loc;
+
+	mark_location_as_used_by_shader(loc, mbr_type, StorageClassInput, true);
+	return loc;
+}
+
+uint32_t CompilerMSL::get_or_allocate_builtin_output_member_location(spv::BuiltIn builtin,
+                                                                     uint32_t type_id, uint32_t index,
+                                                                     uint32_t *comp)
+{
+	uint32_t loc = get_member_location(type_id, index, comp);
+	if (loc != k_unknown_location)
+		return loc;
+	loc = 0;
+
+	if (comp)
+		*comp = k_unknown_component;
+
+	// Late allocation. Find a location which is unused by the application.
+	// This can happen for built-in outputs in tessellation which are mixed and matched with user inputs.
+	auto &mbr_type = get<SPIRType>(get<SPIRType>(type_id).member_types[index]);
+	uint32_t count = type_to_location_count(mbr_type);
+
+	const auto location_range_in_use = [this](uint32_t location, uint32_t location_count) -> bool {
+		for (uint32_t i = 0; i < location_count; i++)
+			if (location_outputs_in_use.count(location + i) != 0)
+				return true;
+		return false;
+	};
+
+	while (location_range_in_use(loc, count))
+		loc++;
+
+	set_member_decoration(type_id, index, DecorationLocation, loc);
+
+	// Triangle tess level inputs are shared in one packed float4;
+	// mark both builtins as sharing one location.
+	if (is_tessellating_triangles() && (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter))
+	{
+		builtin_to_automatic_output_location[BuiltInTessLevelInner] = loc;
+		builtin_to_automatic_output_location[BuiltInTessLevelOuter] = loc;
 	}
+	else
+		builtin_to_automatic_output_location[builtin] = loc;
 
-	return index;
+	mark_location_as_used_by_shader(loc, mbr_type, StorageClassOutput, true);
+	return loc;
 }
 
 // Returns the type declaration for a function, including the
@@ -6150,7 +12178,9 @@ string CompilerMSL::func_type_decl(SPIRType &type)
 	switch (execution.model)
 	{
 	case ExecutionModelVertex:
-		entry_type = "vertex";
+		if (msl_options.vertex_for_tessellation && !msl_options.supports_msl_version(1, 2))
+			SPIRV_CROSS_THROW("Tessellation requires Metal 1.2.");
+		entry_type = msl_options.vertex_for_tessellation ? "kernel" : "vertex";
 		break;
 	case ExecutionModelTessellationEvaluation:
 		if (!msl_options.supports_msl_version(1, 2))
@@ -6158,15 +12188,13 @@ string CompilerMSL::func_type_decl(SPIRType &type)
 		if (execution.flags.get(ExecutionModeIsolines))
 			SPIRV_CROSS_THROW("Metal does not support isoline tessellation.");
 		if (msl_options.is_ios())
-			entry_type =
-			    join("[[ patch(", execution.flags.get(ExecutionModeTriangles) ? "triangle" : "quad", ") ]] vertex");
+			entry_type = join("[[ patch(", is_tessellating_triangles() ? "triangle" : "quad", ") ]] vertex");
 		else
-			entry_type = join("[[ patch(", execution.flags.get(ExecutionModeTriangles) ? "triangle" : "quad", ", ",
+			entry_type = join("[[ patch(", is_tessellating_triangles() ? "triangle" : "quad", ", ",
 			                  execution.output_vertices, ") ]] vertex");
 		break;
 	case ExecutionModelFragment:
-		entry_type =
-		    execution.flags.get(ExecutionModeEarlyFragmentTests) ? "[[ early_fragment_tests ]] fragment" : "fragment";
+		entry_type = uses_explicit_early_fragment_test() ? "[[ early_fragment_tests ]] fragment" : "fragment";
 		break;
 	case ExecutionModelTessellationControl:
 		if (!msl_options.supports_msl_version(1, 2))
@@ -6186,25 +12214,58 @@ string CompilerMSL::func_type_decl(SPIRType &type)
 	return entry_type + " " + return_type;
 }
 
+bool CompilerMSL::is_tesc_shader() const
+{
+	return get_execution_model() == ExecutionModelTessellationControl;
+}
+
+bool CompilerMSL::is_tese_shader() const
+{
+	return get_execution_model() == ExecutionModelTessellationEvaluation;
+}
+
+bool CompilerMSL::uses_explicit_early_fragment_test()
+{
+	auto &ep_flags = get_entry_point().flags;
+	return ep_flags.get(ExecutionModeEarlyFragmentTests) || ep_flags.get(ExecutionModePostDepthCoverage);
+}
+
 // In MSL, address space qualifiers are required for all pointer or reference variables
 string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
 {
 	const auto &type = get<SPIRType>(argument.basetype);
+	return get_type_address_space(type, argument.self, true);
+}
+
+string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bool argument)
+{
+	// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
+	Bitset flags;
+	auto *var = maybe_get<SPIRVariable>(id);
+	if (var && type.basetype == SPIRType::Struct &&
+	    (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
+		flags = get_buffer_block_flags(id);
+	else
+		flags = get_decoration_bitset(id);
 
+	const char *addr_space = nullptr;
 	switch (type.storage)
 	{
 	case StorageClassWorkgroup:
-		return "threadgroup";
+		addr_space = "threadgroup";
+		break;
 
 	case StorageClassStorageBuffer:
+	case StorageClassPhysicalStorageBuffer:
 	{
 		// For arguments from variable pointers, we use the write count deduction, so
 		// we should not assume any constness here. Only for global SSBOs.
 		bool readonly = false;
-		if (has_decoration(type.self, DecorationBlock))
-			readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable);
+		if (!var || has_decoration(type.self, DecorationBlock))
+			readonly = flags.get(DecorationNonWritable);
 
-		return readonly ? "const device" : "device";
+		addr_space = readonly ? "const device" : "device";
+		break;
 	}
 
 	case StorageClassUniform:
@@ -6214,103 +12275,112 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
 		{
 			bool ssbo = has_decoration(type.self, DecorationBufferBlock);
 			if (ssbo)
-			{
-				bool readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable);
-				return readonly ? "const device" : "device";
-			}
+				addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
 			else
-				return "constant";
+				addr_space = "constant";
+		}
+		else if (!argument)
+		{
+			addr_space = "constant";
+		}
+		else if (type_is_msl_framebuffer_fetch(type))
+		{
+			// Subpass inputs are passed around by value.
+			addr_space = "";
 		}
 		break;
 
 	case StorageClassFunction:
 	case StorageClassGeneric:
-		// No address space for plain values.
-		return type.pointer ? "thread" : "";
+		break;
 
 	case StorageClassInput:
-		if (get_execution_model() == ExecutionModelTessellationControl && argument.basevariable == stage_in_ptr_var_id)
-			return "threadgroup";
+		if (is_tesc_shader() && var && var->basevariable == stage_in_ptr_var_id)
+			addr_space = msl_options.multi_patch_workgroup ? "const device" : "threadgroup";
+		// Don't pass tessellation levels in the device AS; we load and convert them
+		// to float manually.
+		if (is_tese_shader() && msl_options.raw_buffer_tese_input && var)
+		{
+			bool is_stage_in = var->basevariable == stage_in_ptr_var_id;
+			bool is_patch_stage_in = has_decoration(var->self, DecorationPatch);
+			bool is_builtin = has_decoration(var->self, DecorationBuiltIn);
+			BuiltIn builtin = (BuiltIn)get_decoration(var->self, DecorationBuiltIn);
+			bool is_tess_level = is_builtin && (builtin == BuiltInTessLevelOuter || builtin == BuiltInTessLevelInner);
+			if (is_stage_in || (is_patch_stage_in && !is_tess_level))
+				addr_space = "const device";
+		}
+		if (get_execution_model() == ExecutionModelFragment && var && var->basevariable == stage_in_var_id)
+			addr_space = "thread";
 		break;
 
 	case StorageClassOutput:
 		if (capture_output_to_buffer)
-			return "device";
+		{
+			if (var && type.storage == StorageClassOutput)
+			{
+				bool is_masked = is_stage_output_variable_masked(*var);
+
+				if (is_masked)
+				{
+					if (is_tessellation_shader())
+						addr_space = "threadgroup";
+					else
+						addr_space = "thread";
+				}
+				else if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
+					addr_space = "threadgroup";
+			}
+
+			if (!addr_space)
+				addr_space = "device";
+		}
 		break;
 
 	default:
 		break;
 	}
 
-	return "thread";
+	if (!addr_space)
+	{
+		// No address space for plain values.
+		addr_space = type.pointer || (argument && type.basetype == SPIRType::ControlPointArray) ? "thread" : "";
+	}
+
+	return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space);
 }
 
-string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id)
+const char *CompilerMSL::to_restrict(uint32_t id, bool space)
 {
-	switch (type.storage)
-	{
-	case StorageClassWorkgroup:
-		return "threadgroup";
-
-	case StorageClassStorageBuffer:
+	// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
+	Bitset flags;
+	if (ir.ids[id].get_type() == TypeVariable)
 	{
-		// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
-		Bitset flags;
-		if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock))
+		uint32_t type_id = expression_type_id(id);
+		auto &type = expression_type(id);
+		if (type.basetype == SPIRType::Struct &&
+		    (has_decoration(type_id, DecorationBlock) || has_decoration(type_id, DecorationBufferBlock)))
 			flags = get_buffer_block_flags(id);
 		else
 			flags = get_decoration_bitset(id);
-
-		return flags.get(DecorationNonWritable) ? "const device" : "device";
-	}
-
-	case StorageClassUniform:
-	case StorageClassUniformConstant:
-	case StorageClassPushConstant:
-		if (type.basetype == SPIRType::Struct)
-		{
-			bool ssbo = has_decoration(type.self, DecorationBufferBlock);
-			if (ssbo)
-			{
-				// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
-				Bitset flags;
-				if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock))
-					flags = get_buffer_block_flags(id);
-				else
-					flags = get_decoration_bitset(id);
-
-				return flags.get(DecorationNonWritable) ? "const device" : "device";
-			}
-			else
-				return "constant";
-		}
-		else
-			return "constant";
-
-	case StorageClassFunction:
-	case StorageClassGeneric:
-		// No address space for plain values.
-		return type.pointer ? "thread" : "";
-
-	case StorageClassOutput:
-		if (capture_output_to_buffer)
-			return "device";
-		break;
-
-	default:
-		break;
 	}
+	else
+		flags = get_decoration_bitset(id);
 
-	return "thread";
+	return flags.get(DecorationRestrict) || flags.get(DecorationRestrictPointerEXT) ?
+	       (space ? "__restrict " : "__restrict") : "";
 }
 
 string CompilerMSL::entry_point_arg_stage_in()
 {
 	string decl;
 
+	if ((is_tesc_shader() && msl_options.multi_patch_workgroup) ||
+	    (is_tese_shader() && msl_options.raw_buffer_tese_input))
+		return decl;
+
 	// Stage-in structure
 	uint32_t stage_in_id;
-	if (get_execution_model() == ExecutionModelTessellationEvaluation)
+	if (is_tese_shader())
 		stage_in_id = patch_stage_in_var_id;
 	else
 		stage_in_id = stage_in_var_id;
@@ -6327,15 +12397,95 @@ string CompilerMSL::entry_point_arg_stage_in()
 	return decl;
 }
 
+// Returns true if this input builtin should be a direct parameter on a shader function parameter list,
+// and false for builtins that should be passed or calculated some other way.
+bool CompilerMSL::is_direct_input_builtin(BuiltIn bi_type)
+{
+	switch (bi_type)
+	{
+	// Vertex function in
+	case BuiltInVertexId:
+	case BuiltInVertexIndex:
+	case BuiltInBaseVertex:
+	case BuiltInInstanceId:
+	case BuiltInInstanceIndex:
+	case BuiltInBaseInstance:
+		return get_execution_model() != ExecutionModelVertex || !msl_options.vertex_for_tessellation;
+	// Tess. control function in
+	case BuiltInPosition:
+	case BuiltInPointSize:
+	case BuiltInClipDistance:
+	case BuiltInCullDistance:
+	case BuiltInPatchVertices:
+		return false;
+	case BuiltInInvocationId:
+	case BuiltInPrimitiveId:
+		return !is_tesc_shader() || !msl_options.multi_patch_workgroup;
+	// Tess. evaluation function in
+	case BuiltInTessLevelInner:
+	case BuiltInTessLevelOuter:
+		return false;
+	// Fragment function in
+	case BuiltInSamplePosition:
+	case BuiltInHelperInvocation:
+	case BuiltInBaryCoordKHR:
+	case BuiltInBaryCoordNoPerspKHR:
+		return false;
+	case BuiltInViewIndex:
+		return get_execution_model() == ExecutionModelFragment && msl_options.multiview &&
+		       msl_options.multiview_layered_rendering;
+	// Compute function in
+	case BuiltInSubgroupId:
+	case BuiltInNumSubgroups:
+		return !msl_options.emulate_subgroups;
+	// Any stage function in
+	case BuiltInDeviceIndex:
+	case BuiltInSubgroupEqMask:
+	case BuiltInSubgroupGeMask:
+	case BuiltInSubgroupGtMask:
+	case BuiltInSubgroupLeMask:
+	case BuiltInSubgroupLtMask:
+		return false;
+	case BuiltInSubgroupSize:
+		if (msl_options.fixed_subgroup_size != 0)
+			return false;
+		/* fallthrough */
+	case BuiltInSubgroupLocalInvocationId:
+		return !msl_options.emulate_subgroups;
+	default:
+		return true;
+	}
+}
+
+// Returns true if this is a fragment shader that runs per sample, and false otherwise.
+bool CompilerMSL::is_sample_rate() const
+{
+	auto &caps = get_declared_capabilities();
+	return get_execution_model() == ExecutionModelFragment &&
+	       (msl_options.force_sample_rate_shading ||
+	        std::find(caps.begin(), caps.end(), CapabilitySampleRateShading) != caps.end() ||
+	        (msl_options.use_framebuffer_fetch_subpasses && need_subpass_input_ms));
+}
+
+bool CompilerMSL::is_intersection_query() const
+{
+	auto &caps = get_declared_capabilities();
+	return std::find(caps.begin(), caps.end(), CapabilityRayQueryKHR) != caps.end();
+}
+
 void CompilerMSL::entry_point_args_builtin(string &ep_args)
 {
 	// Builtin variables
+	SmallVector<pair<SPIRVariable *, BuiltIn>, 8> active_builtins;
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
+		if (var.storage != StorageClassInput)
+			return;
+
 		auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn));
 
 		// Don't emit SamplePosition as a separate parameter. In the entry
 		// point, we get that by calling get_sample_position() on the sample ID.
-		if (var.storage == StorageClassInput && is_builtin_variable(var) &&
+		if (is_builtin_variable(var) &&
 		    get_variable_data_type(var).basetype != SPIRType::Struct &&
 		    get_variable_data_type(var).basetype != SPIRType::ControlPointArray)
 		{
@@ -6344,36 +12494,74 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
 			if (!active_input_builtins.get(bi_type) || !interface_variable_exists_in_entry_point(var_id))
 				return;
 
-			// These builtins are emitted specially. If we pass this branch, the builtin directly matches
-			// a MSL builtin.
-			if (bi_type != BuiltInSamplePosition && bi_type != BuiltInHelperInvocation &&
-			    bi_type != BuiltInPatchVertices && bi_type != BuiltInTessLevelInner &&
-			    bi_type != BuiltInTessLevelOuter && bi_type != BuiltInPosition && bi_type != BuiltInPointSize &&
-			    bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance && bi_type != BuiltInSubgroupEqMask &&
-			    bi_type != BuiltInBaryCoordNV && bi_type != BuiltInBaryCoordNoPerspNV &&
-			    bi_type != BuiltInSubgroupGeMask && bi_type != BuiltInSubgroupGtMask &&
-			    bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask &&
-			    ((get_execution_model() == ExecutionModelFragment && msl_options.multiview) ||
-			     bi_type != BuiltInViewIndex) &&
-			    (get_execution_model() == ExecutionModelGLCompute ||
-			     (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) ||
-			     (bi_type != BuiltInSubgroupLocalInvocationId && bi_type != BuiltInSubgroupSize)))
+			// Remember this variable. We may need to correct its type.
+			active_builtins.push_back(make_pair(&var, bi_type));
+
+			if (is_direct_input_builtin(bi_type))
 			{
 				if (!ep_args.empty())
 					ep_args += ", ";
 
-				ep_args += builtin_type_decl(bi_type, var_id) + " " + to_expression(var_id);
-				ep_args += " [[" + builtin_qualifier(bi_type) + "]]";
+				// Handle HLSL-style 0-based vertex/instance index.
+				builtin_declaration = true;
+
+				// Handle different MSL gl_TessCoord types. (float2, float3)
+				if (bi_type == BuiltInTessCoord && get_entry_point().flags.get(ExecutionModeQuads))
+					ep_args += "float2 " + to_expression(var_id) + "In";
+				else
+					ep_args += builtin_type_decl(bi_type, var_id) + " " + to_expression(var_id);
+
+				ep_args += " [[" + builtin_qualifier(bi_type);
+				if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage))
+				{
+					if (!msl_options.supports_msl_version(2))
+						SPIRV_CROSS_THROW("Post-depth coverage requires MSL 2.0.");
+					if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3))
+						SPIRV_CROSS_THROW("Post-depth coverage on Mac requires MSL 2.3.");
+					ep_args += ", post_depth_coverage";
+				}
+				ep_args += "]]";
+				builtin_declaration = false;
 			}
 		}
+
+		if (has_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase))
+		{
+			// This is a special implicit builtin, not corresponding to any SPIR-V builtin,
+			// which holds the base that was passed to vkCmdDispatchBase() or vkCmdDrawIndexed(). If it's present,
+			// assume we emitted it for a good reason.
+			assert(msl_options.supports_msl_version(1, 2));
+			if (!ep_args.empty())
+				ep_args += ", ";
+
+			ep_args += type_to_glsl(get_variable_data_type(var)) + " " + to_expression(var_id) + " [[grid_origin]]";
+		}
+
+		if (has_extended_decoration(var_id, SPIRVCrossDecorationBuiltInStageInputSize))
+		{
+			// This is another special implicit builtin, not corresponding to any SPIR-V builtin,
+			// which holds the number of vertices and instances to draw. If it's present,
+			// assume we emitted it for a good reason.
+			assert(msl_options.supports_msl_version(1, 2));
+			if (!ep_args.empty())
+				ep_args += ", ";
+
+			ep_args += type_to_glsl(get_variable_data_type(var)) + " " + to_expression(var_id) + " [[grid_size]]";
+		}
 	});
 
-	// Vertex and instance index built-ins
-	if (needs_vertex_idx_arg)
-		ep_args += built_in_func_arg(BuiltInVertexIndex, !ep_args.empty());
+	// Correct the types of all encountered active builtins. We couldn't do this before
+	// because ensure_correct_builtin_type() may increase the bound, which isn't allowed
+	// while iterating over IDs.
+	for (auto &var : active_builtins)
+		var.first->basetype = ensure_correct_builtin_type(var.first->basetype, var.second);
+
+	// Handle HLSL-style 0-based vertex/instance index.
+	if (needs_base_vertex_arg == TriState::Yes)
+		ep_args += built_in_func_arg(BuiltInBaseVertex, !ep_args.empty());
 
-	if (needs_instance_idx_arg)
-		ep_args += built_in_func_arg(BuiltInInstanceIndex, !ep_args.empty());
+	if (needs_base_instance_arg == TriState::Yes)
+		ep_args += built_in_func_arg(BuiltInBaseInstance, !ep_args.empty());
 
 	if (capture_output_to_buffer)
 	{
@@ -6387,14 +12575,15 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
 			                " [[buffer(", msl_options.shader_output_buffer_index, ")]]");
 		}
 
-		if (get_execution_model() == ExecutionModelTessellationControl)
+		if (is_tesc_shader())
 		{
 			if (!ep_args.empty())
 				ep_args += ", ";
 			ep_args +=
 			    join("constant uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]");
 		}
-		else if (stage_out_var_id)
+		else if (stage_out_var_id &&
+		         !(get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation))
 		{
 			if (!ep_args.empty())
 				ep_args += ", ";
@@ -6402,11 +12591,33 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
 			    join("device uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]");
 		}
 
+		if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation &&
+		    (active_input_builtins.get(BuiltInVertexIndex) || active_input_builtins.get(BuiltInVertexId)) &&
+		    msl_options.vertex_index_type != Options::IndexType::None)
+		{
+			// Add the index buffer so we can set gl_VertexIndex correctly.
+			if (!ep_args.empty())
+				ep_args += ", ";
+			switch (msl_options.vertex_index_type)
+			{
+			case Options::IndexType::None:
+				break;
+			case Options::IndexType::UInt16:
+				ep_args += join("const device ushort* ", index_buffer_var_name, " [[buffer(",
+				                msl_options.shader_index_buffer_index, ")]]");
+				break;
+			case Options::IndexType::UInt32:
+				ep_args += join("const device uint* ", index_buffer_var_name, " [[buffer(",
+				                msl_options.shader_index_buffer_index, ")]]");
+				break;
+			}
+		}
+
 		// Tessellation control shaders get three additional parameters:
 		// a buffer to hold the per-patch data, a buffer to hold the per-patch
 		// tessellation levels, and a block of workgroup memory to hold the
 		// input control point data.
-		if (get_execution_model() == ExecutionModelTessellationControl)
+		if (is_tesc_shader())
 		{
 			if (patch_stage_out_var_id)
 			{
@@ -6420,15 +12631,107 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
 				ep_args += ", ";
 			ep_args += join("device ", get_tess_factor_struct_name(), "* ", tess_factor_buffer_var_name, " [[buffer(",
 			                convert_to_string(msl_options.shader_tess_factor_buffer_index), ")]]");
+
+			// Initializer for tess factors must be handled specially since it's never declared as a normal variable.
+			uint32_t outer_factor_initializer_id = 0;
+			uint32_t inner_factor_initializer_id = 0;
+			ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+				if (!has_decoration(var.self, DecorationBuiltIn) || var.storage != StorageClassOutput || !var.initializer)
+					return;
+
+				BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
+				if (builtin == BuiltInTessLevelInner)
+					inner_factor_initializer_id = var.initializer;
+				else if (builtin == BuiltInTessLevelOuter)
+					outer_factor_initializer_id = var.initializer;
+			});
+
+			const SPIRConstant *c = nullptr;
+
+			if (outer_factor_initializer_id && (c = maybe_get<SPIRConstant>(outer_factor_initializer_id)))
+			{
+				auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
+				entry_func.fixup_hooks_in.push_back(
+				    [=]()
+				    {
+					    uint32_t components = is_tessellating_triangles() ? 3 : 4;
+					    for (uint32_t i = 0; i < components; i++)
+					    {
+						    statement(builtin_to_glsl(BuiltInTessLevelOuter, StorageClassOutput), "[", i,
+						              "] = ", "half(", to_expression(c->subconstants[i]), ");");
+					    }
+				    });
+			}
+
+			if (inner_factor_initializer_id && (c = maybe_get<SPIRConstant>(inner_factor_initializer_id)))
+			{
+				auto &entry_func = get<SPIRFunction>(ir.default_entry_point);
+				if (is_tessellating_triangles())
+				{
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement(builtin_to_glsl(BuiltInTessLevelInner, StorageClassOutput), " = ", "half(",
+						          to_expression(c->subconstants[0]), ");");
+					});
+				}
+				else
+				{
+					entry_func.fixup_hooks_in.push_back([=]() {
+						for (uint32_t i = 0; i < 2; i++)
+						{
+							statement(builtin_to_glsl(BuiltInTessLevelInner, StorageClassOutput), "[", i, "] = ",
+							          "half(", to_expression(c->subconstants[i]), ");");
+						}
+					});
+				}
+			}
+
 			if (stage_in_var_id)
 			{
 				if (!ep_args.empty())
 					ep_args += ", ";
-				ep_args += join("threadgroup ", type_to_glsl(get_stage_in_struct_type()), "* ", input_wg_var_name,
-				                " [[threadgroup(", convert_to_string(msl_options.shader_input_wg_index), ")]]");
+				if (msl_options.multi_patch_workgroup)
+				{
+					ep_args += join("device ", type_to_glsl(get_stage_in_struct_type()), "* ", input_buffer_var_name,
+					                " [[buffer(", convert_to_string(msl_options.shader_input_buffer_index), ")]]");
+				}
+				else
+				{
+					ep_args += join("threadgroup ", type_to_glsl(get_stage_in_struct_type()), "* ", input_wg_var_name,
+					                " [[threadgroup(", convert_to_string(msl_options.shader_input_wg_index), ")]]");
+				}
 			}
 		}
 	}
+	// Tessellation evaluation shaders get three additional parameters:
+	// a buffer for the per-patch data, a buffer for the per-patch
+	// tessellation levels, and a buffer for the control point data.
+	if (is_tese_shader() && msl_options.raw_buffer_tese_input)
+	{
+		if (patch_stage_in_var_id)
+		{
+			if (!ep_args.empty())
+				ep_args += ", ";
+			ep_args +=
+			    join("const device ", type_to_glsl(get_patch_stage_in_struct_type()), "* ", patch_input_buffer_var_name,
+			         " [[buffer(", convert_to_string(msl_options.shader_patch_input_buffer_index), ")]]");
+		}
+
+		if (tess_level_inner_var_id || tess_level_outer_var_id)
+		{
+			if (!ep_args.empty())
+				ep_args += ", ";
+			ep_args += join("const device ", get_tess_factor_struct_name(), "* ", tess_factor_buffer_var_name,
+			                " [[buffer(", convert_to_string(msl_options.shader_tess_factor_buffer_index), ")]]");
+		}
+
+		if (stage_in_var_id)
+		{
+			if (!ep_args.empty())
+				ep_args += ", ";
+			ep_args += join("const device ", type_to_glsl(get_stage_in_struct_type()), "* ", input_buffer_var_name,
+			                " [[buffer(", convert_to_string(msl_options.shader_input_buffer_index), ")]]");
+		}
+	}
 }
 
 string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
@@ -6469,7 +12772,7 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
 
 		claimed_bindings.set(buffer_binding);
 
-		ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id);
+		ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(id, true) + to_name(id);
 		ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]";
 
 		next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1);
@@ -6514,28 +12817,61 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 	struct Resource
 	{
 		SPIRVariable *var;
+		SPIRVariable *descriptor_alias;
 		string name;
 		SPIRType::BaseType basetype;
 		uint32_t index;
+		uint32_t plane;
+		uint32_t secondary_index;
 	};
 
 	SmallVector<Resource> resources;
 
-	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) {
 		if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant ||
 		     var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) &&
 		    !is_hidden_variable(var))
 		{
 			auto &type = get_variable_data_type(var);
-			uint32_t var_id = var.self;
 
-			if (var.storage != StorageClassPushConstant)
+			if (is_supported_argument_buffer_type(type) && var.storage != StorageClassPushConstant)
 			{
 				uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
 				if (descriptor_set_is_argument_buffer(desc_set))
 					return;
 			}
 
+			// Handle descriptor aliasing. We can handle aliasing of buffers by casting pointers,
+			// but not for typed resources.
+			SPIRVariable *descriptor_alias = nullptr;
+			if (var.storage == StorageClassUniform || var.storage == StorageClassStorageBuffer)
+			{
+				for (auto &resource : resources)
+				{
+					if (get_decoration(resource.var->self, DecorationDescriptorSet) ==
+					    get_decoration(var_id, DecorationDescriptorSet) &&
+					    get_decoration(resource.var->self, DecorationBinding) ==
+					    get_decoration(var_id, DecorationBinding) &&
+					    resource.basetype == SPIRType::Struct && type.basetype == SPIRType::Struct &&
+					    (resource.var->storage == StorageClassUniform ||
+					     resource.var->storage == StorageClassStorageBuffer))
+					{
+						// Possible, but horrible to implement, ignore for now.
+						if (!type.array.empty())
+							SPIRV_CROSS_THROW("Aliasing arrayed discrete descriptors is currently not supported.");
+
+						descriptor_alias = resource.var;
+						// Self-reference marks that we should declare the resource,
+						// and it's being used as an alias (so we can emit void* instead).
+						resource.descriptor_alias = resource.var;
+						// Need to promote interlocked usage so that the primary declaration is correct.
+						if (interlocked_resources.count(var_id))
+							interlocked_resources.insert(resource.var->self);
+						break;
+					}
+				}
+			}
+
 			const MSLConstexprSampler *constexpr_sampler = nullptr;
 			if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler)
 			{
@@ -6547,29 +12883,48 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 				}
 			}
 
+			// Emulate texture2D atomic operations
+			uint32_t secondary_index = 0;
+			if (atomic_image_vars.count(var.self))
+			{
+				secondary_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0);
+			}
+
 			if (type.basetype == SPIRType::SampledImage)
 			{
 				add_resource_name(var_id);
-				resources.push_back(
-				    { &var, to_name(var_id), SPIRType::Image, get_metal_resource_index(var, SPIRType::Image) });
+
+				uint32_t plane_count = 1;
+				if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+					plane_count = constexpr_sampler->planes;
+
+				for (uint32_t i = 0; i < plane_count; i++)
+					resources.push_back({ &var, descriptor_alias, to_name(var_id), SPIRType::Image,
+					                      get_metal_resource_index(var, SPIRType::Image, i), i, secondary_index });
 
 				if (type.image.dim != DimBuffer && !constexpr_sampler)
 				{
-					resources.push_back({ &var, to_sampler_expression(var_id), SPIRType::Sampler,
-					                      get_metal_resource_index(var, SPIRType::Sampler) });
+					resources.push_back({ &var, descriptor_alias, to_sampler_expression(var_id), SPIRType::Sampler,
+					                      get_metal_resource_index(var, SPIRType::Sampler), 0, 0 });
 				}
 			}
 			else if (!constexpr_sampler)
 			{
 				// constexpr samplers are not declared as resources.
 				add_resource_name(var_id);
-				resources.push_back(
-				    { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) });
+
+				// Don't allocate resource indices for aliases.
+				uint32_t resource_index = ~0u;
+				if (!descriptor_alias)
+					resource_index = get_metal_resource_index(var, type.basetype);
+
+				resources.push_back({ &var, descriptor_alias, to_name(var_id), type.basetype,
+				                      resource_index, 0, secondary_index });
 			}
 		}
 	});
 
-	sort(resources.begin(), resources.end(), [](const Resource &lhs, const Resource &rhs) {
+	stable_sort(resources.begin(), resources.end(), [](const Resource &lhs, const Resource &rhs) {
 		return tie(lhs.basetype, lhs.index) < tie(rhs.basetype, rhs.index);
 	});
 
@@ -6587,7 +12942,29 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 			auto &m = ir.meta[type.self];
 			if (m.members.size() == 0)
 				break;
-			if (!type.array.empty())
+
+			if (r.descriptor_alias)
+			{
+				if (r.var == r.descriptor_alias)
+				{
+					auto primary_name = join("spvBufferAliasSet",
+					                         get_decoration(var_id, DecorationDescriptorSet),
+					                         "Binding",
+					                         get_decoration(var_id, DecorationBinding));
+
+					// Declare the primary alias as void*
+					if (!ep_args.empty())
+						ep_args += ", ";
+					ep_args += get_argument_address_space(var) + " void* " + primary_name;
+					ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
+					if (interlocked_resources.count(var_id))
+						ep_args += ", raster_order_group(0)";
+					ep_args += "]]";
+				}
+
+				buffer_aliases_discrete.push_back(r.var->self);
+			}
+			else if (!type.array.empty())
 			{
 				if (type.array.size() > 1)
 					SPIRV_CROSS_THROW("Arrays of arrays of buffers are not supported.");
@@ -6600,42 +12977,94 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 				if (array_size == 0)
 					SPIRV_CROSS_THROW("Unsized arrays of buffers are not supported in MSL.");
 
-				buffer_arrays.push_back(var_id);
+				// Allow Metal to use the array<T> template to make arrays a value type
+				is_using_builtin_array = true;
+				buffer_arrays_discrete.push_back(var_id);
 				for (uint32_t i = 0; i < array_size; ++i)
 				{
 					if (!ep_args.empty())
 						ep_args += ", ";
-					ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + r.name + "_" +
-					           convert_to_string(i);
-					ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]";
+					ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id, true) +
+					           r.name + "_" + convert_to_string(i);
+					ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")";
+					if (interlocked_resources.count(var_id))
+						ep_args += ", raster_order_group(0)";
+					ep_args += "]]";
 				}
+				is_using_builtin_array = false;
 			}
 			else
 			{
 				if (!ep_args.empty())
 					ep_args += ", ";
-				ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + r.name;
-				ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
+				ep_args +=
+				    get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id, true) + r.name;
+				ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
+				if (interlocked_resources.count(var_id))
+					ep_args += ", raster_order_group(0)";
+				ep_args += "]]";
 			}
 			break;
 		}
 		case SPIRType::Sampler:
 			if (!ep_args.empty())
 				ep_args += ", ";
-			ep_args += sampler_type(type) + " " + r.name;
+			ep_args += sampler_type(type, var_id) + " " + r.name;
 			ep_args += " [[sampler(" + convert_to_string(r.index) + ")]]";
 			break;
 		case SPIRType::Image:
+		{
 			if (!ep_args.empty())
 				ep_args += ", ";
-			ep_args += image_type_glsl(type, var_id) + " " + r.name;
-			ep_args += " [[texture(" + convert_to_string(r.index) + ")]]";
+
+			// Use Metal's native frame-buffer fetch API for subpass inputs.
+			const auto &basetype = get<SPIRType>(var.basetype);
+			if (!type_is_msl_framebuffer_fetch(basetype))
+			{
+				ep_args += image_type_glsl(type, var_id) + " " + r.name;
+				if (r.plane > 0)
+					ep_args += join(plane_name_suffix, r.plane);
+				ep_args += " [[texture(" + convert_to_string(r.index) + ")";
+				if (interlocked_resources.count(var_id))
+					ep_args += ", raster_order_group(0)";
+				ep_args += "]]";
+			}
+			else
+			{
+				if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3))
+					SPIRV_CROSS_THROW("Framebuffer fetch on Mac is not supported before MSL 2.3.");
+				ep_args += image_type_glsl(type, var_id) + " " + r.name;
+				ep_args += " [[color(" + convert_to_string(r.index) + ")]]";
+			}
+
+			// Emulate texture2D atomic operations
+			if (atomic_image_vars.count(var.self))
+			{
+				ep_args += ", device atomic_" + type_to_glsl(get<SPIRType>(basetype.image.type), 0);
+				ep_args += "* " + r.name + "_atomic";
+				ep_args += " [[buffer(" + convert_to_string(r.secondary_index) + ")";
+				if (interlocked_resources.count(var_id))
+					ep_args += ", raster_order_group(0)";
+				ep_args += "]]";
+			}
+			break;
+		}
+		case SPIRType::AccelerationStructure:
+			ep_args += ", " + type_to_glsl(type, var_id) + " " + r.name;
+			ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
 			break;
 		default:
 			if (!ep_args.empty())
 				ep_args += ", ";
-			ep_args += type_to_glsl(type, var_id) + " " + r.name;
-			ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
+			if (!type.pointer)
+				ep_args += get_type_address_space(get<SPIRType>(var.basetype), var_id) + " " +
+				           type_to_glsl(type, var_id) + "& " + r.name;
+			else
+				ep_args += type_to_glsl(type, var_id) + " " + r.name;
+			ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
+			if (interlocked_resources.count(var_id))
+				ep_args += ", raster_order_group(0)";
+			ep_args += "]]";
 			break;
 		}
 	}
@@ -6657,6 +13086,21 @@ string CompilerMSL::entry_point_args_classic(bool append_comma)
 
 void CompilerMSL::fix_up_shader_inputs_outputs()
 {
+	auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
+
+	// Emit a guard to ensure we don't execute beyond the last vertex.
+	// Vertex shaders shouldn't have the problems with barriers in non-uniform control flow that
+	// tessellation control shaders do, so early returns should be OK. We may need to revisit this
+	// if it ever becomes possible to use barriers from a vertex shader.
+	if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)
+	{
+		entry_func.fixup_hooks_in.push_back([this]() {
+			statement("if (any(", to_expression(builtin_invocation_id_id),
+			          " >= ", to_expression(builtin_stage_input_size_id), "))");
+			statement("    return;");
+		});
+	}
+
 	// Look for sampled images and buffer. Add hooks to set up the swizzle constants or array lengths.
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
 		auto &type = get_variable_data_type(var);
@@ -6667,7 +13111,6 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 		{
 			if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type))
 			{
-				auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
 				entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() {
 					bool is_array_type = !type.array.empty();
 
@@ -6692,9 +13135,8 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 		else if ((var.storage == StorageClassStorageBuffer || (var.storage == StorageClassUniform && ssbo)) &&
 		         !is_hidden_variable(var))
 		{
-			if (buffers_requiring_array_length.count(var.self))
+			if (buffer_requires_array_length(var.self))
 			{
-				auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
 				entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() {
 					bool is_array_type = !type.array.empty();
 
@@ -6719,13 +13161,17 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 	});
 
 	// Builtin variables
-	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
+	ir.for_each_typed_id<SPIRVariable>([this, &entry_func](uint32_t, SPIRVariable &var) {
 		uint32_t var_id = var.self;
 		BuiltIn bi_type = ir.meta[var_id].decoration.builtin_type;
 
-		if (var.storage == StorageClassInput && is_builtin_variable(var))
+		if (var.storage != StorageClassInput && var.storage != StorageClassOutput)
+			return;
+		if (!interface_variable_exists_in_entry_point(var.self))
+			return;
+
+		if (var.storage == StorageClassInput && is_builtin_variable(var) && active_input_builtins.get(bi_type))
 		{
-			auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
 			switch (bi_type)
 			{
 			case BuiltInSamplePosition:
@@ -6734,18 +13180,40 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 					          to_expression(builtin_sample_id_id), ");");
 				});
 				break;
-			case BuiltInHelperInvocation:
-				if (msl_options.is_ios())
-					SPIRV_CROSS_THROW("simd_is_helper_thread() is only supported on macOS.");
-				else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
-					SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS.");
+			case BuiltInFragCoord:
+				if (is_sample_rate())
+				{
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement(to_expression(var_id), ".xy += get_sample_position(",
+						          to_expression(builtin_sample_id_id), ") - 0.5;");
+					});
+				}
+				break;
+			case BuiltInInvocationId:
+				// This is direct-mapped without multi-patch workgroups.
+				if (!is_tesc_shader() || !msl_options.multi_patch_workgroup)
+					break;
+
+				entry_func.fixup_hooks_in.push_back([=]() {
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+					          to_expression(builtin_invocation_id_id), ".x % ", this->get_entry_point().output_vertices,
+					          ";");
+				});
+				break;
+			case BuiltInPrimitiveId:
+				// This is natively supported by fragment and tessellation evaluation shaders.
+				// In tessellation control shaders, this is direct-mapped without multi-patch workgroups.
+				if (!is_tesc_shader() || !msl_options.multi_patch_workgroup)
+					break;
 
 				entry_func.fixup_hooks_in.push_back([=]() {
-					statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_is_helper_thread();");
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = min(",
+					          to_expression(builtin_invocation_id_id), ".x / ", this->get_entry_point().output_vertices,
+					          ", spvIndirectParams[1] - 1);");
 				});
 				break;
 			case BuiltInPatchVertices:
-				if (get_execution_model() == ExecutionModelTessellationEvaluation)
+				if (is_tese_shader())
 					entry_func.fixup_hooks_in.push_back([=]() {
 						statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
 						          to_expression(patch_stage_in_var_id), ".gl_in.size();");
@@ -6756,144 +13224,254 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 					});
 				break;
 			case BuiltInTessCoord:
+				if (get_entry_point().flags.get(ExecutionModeQuads))
+				{
+					// The entry point will only have a float2 TessCoord variable.
+					// Pad to float3.
+					entry_func.fixup_hooks_in.push_back([=]() {
+						auto name = builtin_to_glsl(BuiltInTessCoord, StorageClassInput);
+						statement("float3 " + name + " = float3(" + name + "In.x, " + name + "In.y, 0.0);");
+					});
+				}
+
 				// Emit a fixup to account for the shifted domain. Don't do this for triangles;
 				// MoltenVK will just reverse the winding order instead.
-				if (msl_options.tess_domain_origin_lower_left && !get_entry_point().flags.get(ExecutionModeTriangles))
+				if (msl_options.tess_domain_origin_lower_left && !is_tessellating_triangles())
 				{
 					string tc = to_expression(var_id);
 					entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); });
 				}
 				break;
-			case BuiltInSubgroupLocalInvocationId:
-				// This is natively supported in compute shaders.
-				if (get_execution_model() == ExecutionModelGLCompute)
-					break;
-
-				// This is natively supported in fragment shaders in MSL 2.2.
-				if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
+			case BuiltInSubgroupId:
+				if (!msl_options.emulate_subgroups)
 					break;
-
-				if (msl_options.is_ios())
-					SPIRV_CROSS_THROW(
-					    "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.2 on iOS.");
-
-				if (!msl_options.supports_msl_version(2, 1))
-					SPIRV_CROSS_THROW(
-					    "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.1.");
-
-				// Shaders other than compute shaders don't support the SIMD-group
-				// builtins directly, but we can emulate them using the SIMD-group
-				// functions. This might break if some of the subgroup terminated
-				// before reaching the entry point.
+				// For subgroup emulation, this is the same as the local invocation index.
 				entry_func.fixup_hooks_in.push_back([=]() {
-					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
-					          " = simd_prefix_exclusive_sum(1);");
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+					          to_expression(builtin_local_invocation_index_id), ";");
 				});
 				break;
-			case BuiltInSubgroupSize:
-				// This is natively supported in compute shaders.
-				if (get_execution_model() == ExecutionModelGLCompute)
+			case BuiltInNumSubgroups:
+				if (!msl_options.emulate_subgroups)
 					break;
-
-				// This is natively supported in fragment shaders in MSL 2.2.
-				if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
+				// For subgroup emulation, this is the same as the workgroup size.
+				entry_func.fixup_hooks_in.push_back([=]() {
+					auto &type = expression_type(builtin_workgroup_size_id);
+					string size_expr = to_expression(builtin_workgroup_size_id);
+					if (type.vecsize >= 3)
+						size_expr = join(size_expr, ".x * ", size_expr, ".y * ", size_expr, ".z");
+					else if (type.vecsize == 2)
+						size_expr = join(size_expr, ".x * ", size_expr, ".y");
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", size_expr, ";");
+				});
+				break;
+			case BuiltInSubgroupLocalInvocationId:
+				if (!msl_options.emulate_subgroups)
 					break;
-
-				if (msl_options.is_ios())
-					SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders on iOS.");
-
-				if (!msl_options.supports_msl_version(2, 1))
-					SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders before Metal 2.1.");
-
+				// For subgroup emulation, assume subgroups of size 1.
 				entry_func.fixup_hooks_in.push_back(
-				    [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_sum(1);"); });
+				    [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;"); });
+				break;
+			case BuiltInSubgroupSize:
+				if (msl_options.emulate_subgroups)
+				{
+					// For subgroup emulation, assume subgroups of size 1.
+					entry_func.fixup_hooks_in.push_back(
+					    [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 1;"); });
+				}
+				else if (msl_options.fixed_subgroup_size != 0)
+				{
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+						          msl_options.fixed_subgroup_size, ";");
+					});
+				}
 				break;
 			case BuiltInSubgroupEqMask:
-				if (msl_options.is_ios())
-					SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+				if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
+					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
 				if (!msl_options.supports_msl_version(2, 1))
 					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
 				entry_func.fixup_hooks_in.push_back([=]() {
-					statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
-					          to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (",
-					          to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
-					          to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
+					if (msl_options.is_ios())
+					{
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", "uint4(1 << ",
+						          to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
+					}
+					else
+					{
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+						          to_expression(builtin_subgroup_invocation_id_id), " >= 32 ? uint4(0, (1 << (",
+						          to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
+						          to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
+					}
 				});
 				break;
 			case BuiltInSubgroupGeMask:
-				if (msl_options.is_ios())
-					SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+				if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
+					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
 				if (!msl_options.supports_msl_version(2, 1))
 					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+				if (msl_options.fixed_subgroup_size != 0)
+					add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
 				entry_func.fixup_hooks_in.push_back([=]() {
 					// Case where index < 32, size < 32:
-					// mask0 = bfe(0xFFFFFFFF, index, size - index);
-					// mask1 = bfe(0xFFFFFFFF, 0, 0); // Gives 0
+					// mask0 = bfi(0, 0xFFFFFFFF, index, size - index);
+					// mask1 = bfi(0, 0xFFFFFFFF, 0, 0); // Gives 0
 					// Case where index < 32 but size >= 32:
-					// mask0 = bfe(0xFFFFFFFF, index, 32 - index);
-					// mask1 = bfe(0xFFFFFFFF, 0, size - 32);
+					// mask0 = bfi(0, 0xFFFFFFFF, index, 32 - index);
+					// mask1 = bfi(0, 0xFFFFFFFF, 0, size - 32);
 					// Case where index >= 32:
-					// mask0 = bfe(0xFFFFFFFF, 32, 0); // Gives 0
-					// mask1 = bfe(0xFFFFFFFF, index - 32, size - index);
+					// mask0 = bfi(0, 0xFFFFFFFF, 32, 0); // Gives 0
+					// mask1 = bfi(0, 0xFFFFFFFF, index - 32, size - index);
 					// This is expressed without branches to avoid divergent
 					// control flow--hence the complicated min/max expressions.
 					// This is further complicated by the fact that if you attempt
-					// to bfe out-of-bounds on Metal, undefined behavior is the
+					// to bfi/bfe out-of-bounds on Metal, undefined behavior is the
 					// result.
-					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
-					          " = uint4(extract_bits(0xFFFFFFFF, min(",
-					          to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)",
-					          to_expression(builtin_subgroup_size_id), ", 32) - (int)",
-					          to_expression(builtin_subgroup_invocation_id_id),
-					          ", 0)), extract_bits(0xFFFFFFFF, (uint)max((int)",
-					          to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)",
-					          to_expression(builtin_subgroup_size_id), " - (int)max(",
-					          to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));");
+					if (msl_options.fixed_subgroup_size > 32)
+					{
+						// Don't use the subgroup size variable with fixed subgroup sizes,
+						// since the variables could be defined in the wrong order.
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+						          " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
+						          to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(32 - (int)",
+						          to_expression(builtin_subgroup_invocation_id_id),
+						          ", 0)), insert_bits(0u, 0xFFFFFFFF,"
+						          " (uint)max((int)",
+						          to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), ",
+						          msl_options.fixed_subgroup_size, " - max(",
+						          to_expression(builtin_subgroup_invocation_id_id),
+						          ", 32u)), uint2(0));");
+					}
+					else if (msl_options.fixed_subgroup_size != 0)
+					{
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+						          " = uint4(insert_bits(0u, 0xFFFFFFFF, ",
+						          to_expression(builtin_subgroup_invocation_id_id), ", ",
+						          msl_options.fixed_subgroup_size, " - ",
+						          to_expression(builtin_subgroup_invocation_id_id),
+						          "), uint3(0));");
+					}
+					else if (msl_options.is_ios())
+					{
+						// On iOS, the SIMD-group size will currently never exceed 32.
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+						          " = uint4(insert_bits(0u, 0xFFFFFFFF, ",
+						          to_expression(builtin_subgroup_invocation_id_id), ", ",
+						          to_expression(builtin_subgroup_size_id), " - ",
+						          to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));");
+					}
+					else
+					{
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+						          " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
+						          to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)",
+						          to_expression(builtin_subgroup_size_id), ", 32) - (int)",
+						          to_expression(builtin_subgroup_invocation_id_id),
+						          ", 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
+						          to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)",
+						          to_expression(builtin_subgroup_size_id), " - (int)max(",
+						          to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));");
+					}
 				});
 				break;
 			case BuiltInSubgroupGtMask:
-				if (msl_options.is_ios())
-					SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+				if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
+					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
 				if (!msl_options.supports_msl_version(2, 1))
 					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+				add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
 				entry_func.fixup_hooks_in.push_back([=]() {
 					// The same logic applies here, except now the index is one
 					// more than the subgroup invocation ID.
-					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
-					          " = uint4(extract_bits(0xFFFFFFFF, min(",
-					          to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)",
-					          to_expression(builtin_subgroup_size_id), ", 32) - (int)",
-					          to_expression(builtin_subgroup_invocation_id_id),
-					          " - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)",
-					          to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)",
-					          to_expression(builtin_subgroup_size_id), " - (int)max(",
-					          to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));");
+					if (msl_options.fixed_subgroup_size > 32)
+					{
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+						          " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
+						          to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(32 - (int)",
+						          to_expression(builtin_subgroup_invocation_id_id),
+						          " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
+						          to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), ",
+						          msl_options.fixed_subgroup_size, " - max(",
+						          to_expression(builtin_subgroup_invocation_id_id),
+						          " + 1, 32u)), uint2(0));");
+					}
+					else if (msl_options.fixed_subgroup_size != 0)
+					{
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+						          " = uint4(insert_bits(0u, 0xFFFFFFFF, ",
+						          to_expression(builtin_subgroup_invocation_id_id), " + 1, ",
+						          msl_options.fixed_subgroup_size, " - ",
+						          to_expression(builtin_subgroup_invocation_id_id),
+						          " - 1), uint3(0));");
+					}
+					else if (msl_options.is_ios())
+					{
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+						          " = uint4(insert_bits(0u, 0xFFFFFFFF, ",
+						          to_expression(builtin_subgroup_invocation_id_id), " + 1, ",
+						          to_expression(builtin_subgroup_size_id), " - ",
+						          to_expression(builtin_subgroup_invocation_id_id), " - 1), uint3(0));");
+					}
+					else
+					{
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+						          " = uint4(insert_bits(0u, 0xFFFFFFFF, min(",
+						          to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)",
+						          to_expression(builtin_subgroup_size_id), ", 32) - (int)",
+						          to_expression(builtin_subgroup_invocation_id_id),
+						          " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)",
+						          to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)",
+						          to_expression(builtin_subgroup_size_id), " - (int)max(",
+						          to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));");
+					}
 				});
 				break;
 			case BuiltInSubgroupLeMask:
-				if (msl_options.is_ios())
-					SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+				if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
+					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
 				if (!msl_options.supports_msl_version(2, 1))
 					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+				add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
 				entry_func.fixup_hooks_in.push_back([=]() {
-					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
-					          " = uint4(extract_bits(0xFFFFFFFF, 0, min(",
-					          to_expression(builtin_subgroup_invocation_id_id),
-					          " + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
-					          to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));");
+					if (msl_options.is_ios())
+					{
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+						          " = uint4(extract_bits(0xFFFFFFFF, 0, ",
+						          to_expression(builtin_subgroup_invocation_id_id), " + 1), uint3(0));");
+					}
+					else
+					{
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+						          " = uint4(extract_bits(0xFFFFFFFF, 0, min(",
+						          to_expression(builtin_subgroup_invocation_id_id),
+						          " + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
+						          to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));");
+					}
 				});
 				break;
 			case BuiltInSubgroupLtMask:
-				if (msl_options.is_ios())
-					SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
+				if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2))
+					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS.");
 				if (!msl_options.supports_msl_version(2, 1))
 					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
+				add_spv_func_and_recompile(SPVFuncImplSubgroupBallot);
 				entry_func.fixup_hooks_in.push_back([=]() {
-					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
-					          " = uint4(extract_bits(0xFFFFFFFF, 0, min(",
-					          to_expression(builtin_subgroup_invocation_id_id),
-					          ", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
-					          to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));");
+					if (msl_options.is_ios())
+					{
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+						          " = uint4(extract_bits(0xFFFFFFFF, 0, ",
+						          to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));");
+					}
+					else
+					{
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+						          " = uint4(extract_bits(0xFFFFFFFF, 0, min(",
+						          to_expression(builtin_subgroup_invocation_id_id),
+						          ", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)",
+						          to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));");
+					}
 				});
 				break;
 			case BuiltInViewIndex:
@@ -6905,6 +13483,26 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 						statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;");
 					});
 				}
+				else if (msl_options.view_index_from_device_index)
+				{
+					// In this case, we take the view index from that of the device we're running on.
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+						          msl_options.device_index, ";");
+					});
+					// We actually don't want to set the render_target_array_index here.
+					// Since every physical device is rendering a different view,
+					// there's no need for layered rendering here.
+				}
+				else if (!msl_options.multiview_layered_rendering)
+				{
+					// In this case, the views are rendered one at a time. The view index, then,
+					// is just the first part of the "view mask".
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+						          to_expression(view_mask_buffer_id), "[0];");
+					});
+				}
 				else if (get_execution_model() == ExecutionModelFragment)
 				{
 					// Because we adjusted the view index in the vertex shader, we have to
@@ -6919,10 +13517,13 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 					// the view index in the instance index.
 					entry_func.fixup_hooks_in.push_back([=]() {
 						statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
-						          to_expression(view_mask_buffer_id), "[0] + ", to_expression(builtin_instance_idx_id),
-						          " % ", to_expression(view_mask_buffer_id), "[1];");
-						statement(to_expression(builtin_instance_idx_id), " /= ", to_expression(view_mask_buffer_id),
-						          "[1];");
+						          to_expression(view_mask_buffer_id), "[0] + (", to_expression(builtin_instance_idx_id),
+						          " - ", to_expression(builtin_base_instance_id), ") % ",
+						          to_expression(view_mask_buffer_id), "[1];");
+						statement(to_expression(builtin_instance_idx_id), " = (",
+						          to_expression(builtin_instance_idx_id), " - ",
+						          to_expression(builtin_base_instance_id), ") / ", to_expression(view_mask_buffer_id),
+						          "[1] + ", to_expression(builtin_base_instance_id), ";");
 					});
 					// In addition to setting the variable itself, we also need to
 					// set the render_target_array_index with it on output. We have to
@@ -6934,15 +13535,124 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 					});
 				}
 				break;
+			case BuiltInDeviceIndex:
+				// Metal pipelines belong to the devices which create them, so we'll
+				// need to create a MTLPipelineState for every MTLDevice in a grouped
+				// VkDevice. We can assume, then, that the device index is constant.
+				entry_func.fixup_hooks_in.push_back([=]() {
+					statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+					          msl_options.device_index, ";");
+				});
+				break;
+			case BuiltInWorkgroupId:
+				if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInWorkgroupId))
+					break;
+
+				// The vkCmdDispatchBase() command lets the client set the base value
+				// of WorkgroupId. Metal has no direct equivalent; we must make this
+				// adjustment ourselves.
+				entry_func.fixup_hooks_in.push_back([=]() {
+					statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), ";");
+				});
+				break;
+			case BuiltInGlobalInvocationId:
+				if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInGlobalInvocationId))
+					break;
+
+				// GlobalInvocationId is defined as LocalInvocationId + WorkgroupId * WorkgroupSize.
+				// This needs to be adjusted too.
+				entry_func.fixup_hooks_in.push_back([=]() {
+					auto &execution = this->get_entry_point();
+					uint32_t workgroup_size_id = execution.workgroup_size.constant;
+					if (workgroup_size_id)
+						statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id),
+						          " * ", to_expression(workgroup_size_id), ";");
+					else
+						statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id),
+						          " * uint3(", execution.workgroup_size.x, ", ", execution.workgroup_size.y, ", ",
+						          execution.workgroup_size.z, ");");
+				});
+				break;
+			case BuiltInVertexId:
+			case BuiltInVertexIndex:
+				// This is direct-mapped normally.
+				if (!msl_options.vertex_for_tessellation)
+					break;
+
+				entry_func.fixup_hooks_in.push_back([=]() {
+					builtin_declaration = true;
+					switch (msl_options.vertex_index_type)
+					{
+					case Options::IndexType::None:
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+						          to_expression(builtin_invocation_id_id), ".x + ",
+						          to_expression(builtin_dispatch_base_id), ".x;");
+						break;
+					case Options::IndexType::UInt16:
+					case Options::IndexType::UInt32:
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", index_buffer_var_name,
+						          "[", to_expression(builtin_invocation_id_id), ".x] + ",
+						          to_expression(builtin_dispatch_base_id), ".x;");
+						break;
+					}
+					builtin_declaration = false;
+				});
+				break;
+			case BuiltInBaseVertex:
+				// This is direct-mapped normally.
+				if (!msl_options.vertex_for_tessellation)
+					break;
+
+				entry_func.fixup_hooks_in.push_back([=]() {
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+					          to_expression(builtin_dispatch_base_id), ".x;");
+				});
+				break;
+			case BuiltInInstanceId:
+			case BuiltInInstanceIndex:
+				// This is direct-mapped normally.
+				if (!msl_options.vertex_for_tessellation)
+					break;
+
+				entry_func.fixup_hooks_in.push_back([=]() {
+					builtin_declaration = true;
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+					          to_expression(builtin_invocation_id_id), ".y + ", to_expression(builtin_dispatch_base_id),
+					          ".y;");
+					builtin_declaration = false;
+				});
+				break;
+			case BuiltInBaseInstance:
+				// This is direct-mapped normally.
+				if (!msl_options.vertex_for_tessellation)
+					break;
+
+				entry_func.fixup_hooks_in.push_back([=]() {
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+					          to_expression(builtin_dispatch_base_id), ".y;");
+				});
+				break;
 			default:
 				break;
 			}
 		}
+		else if (var.storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment &&
+				 is_builtin_variable(var) && active_output_builtins.get(bi_type) &&
+				 bi_type == BuiltInSampleMask && has_additional_fixed_sample_mask())
+		{
+			// If the additional fixed sample mask was set, we need to adjust the sample_mask
+			// output to reflect that. If the shader outputs the sample_mask itself too, we need
+			// to AND the two masks to get the final one.
+			string op_str = does_shader_write_sample_mask ? " &= " : " = ";
+			entry_func.fixup_hooks_out.push_back([=]() {
+				statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";");
+			});
+		}
 	});
 }
 
 // Returns the Metal index of the resource of the specified type as used by the specified variable.
-uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype)
+uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane)
 {
 	auto &execution = get_entry_point();
 	auto &var_dec = ir.meta[var.self].decoration;
@@ -6953,9 +13663,17 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
 	// If a matching binding has been specified, find and use it.
 	auto itr = resource_bindings.find({ execution.model, var_desc_set, var_binding });
 
-	auto resource_decoration = var_type.basetype == SPIRType::SampledImage && basetype == SPIRType::Sampler ?
-	                               SPIRVCrossDecorationResourceIndexSecondary :
-	                               SPIRVCrossDecorationResourceIndexPrimary;
+	// Atomic helper buffers for image atomics need to use secondary bindings as well.
+	bool use_secondary_binding = (var_type.basetype == SPIRType::SampledImage && basetype == SPIRType::Sampler) ||
+	                             basetype == SPIRType::AtomicCounter;
+
+	auto resource_decoration =
+	    use_secondary_binding ? SPIRVCrossDecorationResourceIndexSecondary : SPIRVCrossDecorationResourceIndexPrimary;
+
+	if (plane == 1)
+		resource_decoration = SPIRVCrossDecorationResourceIndexTertiary;
+	if (plane == 2)
+		resource_decoration = SPIRVCrossDecorationResourceIndexQuaternary;
 
 	if (itr != end(resource_bindings))
 	{
@@ -6964,8 +13682,8 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
 		switch (basetype)
 		{
 		case SPIRType::Image:
-			set_extended_decoration(var.self, resource_decoration, remap.first.msl_texture);
-			return remap.first.msl_texture;
+			set_extended_decoration(var.self, resource_decoration, remap.first.msl_texture + plane);
+			return remap.first.msl_texture + plane;
 		case SPIRType::Sampler:
 			set_extended_decoration(var.self, resource_decoration, remap.first.msl_sampler);
 			return remap.first.msl_sampler;
@@ -6979,33 +13697,49 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
 	if (has_extended_decoration(var.self, resource_decoration))
 		return get_extended_decoration(var.self, resource_decoration);
 
+	auto &type = get<SPIRType>(var.basetype);
+
+	if (type_is_msl_framebuffer_fetch(type))
+	{
+		// Frame-buffer fetch gets its fallback resource index from the input attachment index,
+		// which is then treated as color index.
+		return get_decoration(var.self, DecorationInputAttachmentIndex);
+	}
+	else if (msl_options.enable_decoration_binding)
+	{
+		// Allow user to enable decoration binding.
+		// If there is no explicit mapping of bindings to MSL, use the declared binding as a fallback.
+		if (has_decoration(var.self, DecorationBinding))
+		{
+			var_binding = get_decoration(var.self, DecorationBinding);
+			// Avoid emitting sentinel bindings.
+			if (var_binding < 0x80000000u)
+				return var_binding;
+		}
+	}
+
 	// If we did not explicitly remap, allocate bindings on demand.
 	// We cannot reliably use Binding decorations since SPIR-V and MSL's binding models are very different.
 
+	bool allocate_argument_buffer_ids = false;
+
+	if (var.storage != StorageClassPushConstant)
+		allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(var_desc_set);
+
 	uint32_t binding_stride = 1;
-	auto &type = get<SPIRType>(var.basetype);
 	for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
-		binding_stride *= type.array_size_literal[i] ? type.array[i] : get<SPIRConstant>(type.array[i]).scalar();
+		binding_stride *= to_array_size_literal(type, i);
 
 	assert(binding_stride != 0);
 
 	// If a binding has not been specified, revert to incrementing resource indices.
 	uint32_t resource_index;
 
-	bool allocate_argument_buffer_ids = false;
-	uint32_t desc_set = 0;
-
-	if (var.storage != StorageClassPushConstant)
-	{
-		desc_set = get_decoration(var.self, DecorationDescriptorSet);
-		allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(desc_set);
-	}
-
 	if (allocate_argument_buffer_ids)
 	{
 		// Allocate from a flat ID binding space.
-		resource_index = next_metal_resource_ids[desc_set];
-		next_metal_resource_ids[desc_set] += binding_stride;
+		resource_index = next_metal_resource_ids[var_desc_set];
+		next_metal_resource_ids[var_desc_set] += binding_stride;
 	}
 	else
 	{
@@ -7031,12 +13765,69 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
 	return resource_index;
 }
 
+bool CompilerMSL::type_is_msl_framebuffer_fetch(const SPIRType &type) const
+{
+	return type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
+	       msl_options.use_framebuffer_fetch_subpasses;
+}
+
+bool CompilerMSL::type_is_pointer(const SPIRType &type) const
+{
+	if (!type.pointer)
+		return false;
+	auto &parent_type = get<SPIRType>(type.parent_type);
+	// Safeguards when we forget to set pointer_depth (there is an assert for it in type_to_glsl),
+	// but the extra check shouldn't hurt.
+	return (type.pointer_depth > parent_type.pointer_depth) || !parent_type.pointer;
+}
+
+bool CompilerMSL::type_is_pointer_to_pointer(const SPIRType &type) const
+{
+	if (!type.pointer)
+		return false;
+	auto &parent_type = get<SPIRType>(type.parent_type);
+	return type.pointer_depth > parent_type.pointer_depth && type_is_pointer(parent_type);
+}
+
+const char *CompilerMSL::descriptor_address_space(uint32_t id, StorageClass storage, const char *plain_address_space) const
+{
+	if (msl_options.argument_buffers)
+	{
+		bool storage_class_is_descriptor = storage == StorageClassUniform ||
+		                                   storage == StorageClassStorageBuffer ||
+		                                   storage == StorageClassUniformConstant;
+
+		uint32_t desc_set = get_decoration(id, DecorationDescriptorSet);
+		if (storage_class_is_descriptor && descriptor_set_is_argument_buffer(desc_set))
+		{
+			// An awkward case where we need to emit *more* address space declarations (yay!).
+			// An example is where we pass down an array of buffer pointers to leaf functions.
+			// It's a constant array containing pointers to constants.
+			// The pointer array is always constant however. E.g.
+			// device SSBO * constant (&array)[N].
+			// const device SSBO * constant (&array)[N].
+			// constant SSBO * constant (&array)[N].
+			// However, this only matters for argument buffers, since for MSL 1.0 style codegen,
+			// we emit the buffer array on stack instead, and that seems to work just fine apparently.
+
+			// If the argument was marked as being in device address space, any pointer to member would
+			// be const device, not constant.
+			if (argument_buffer_device_storage_mask & (1u << desc_set))
+				return "const device";
+			else
+				return "constant";
+		}
+	}
+
+	return plain_address_space;
+}
+
 string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 {
 	auto &var = get<SPIRVariable>(arg.id);
 	auto &type = get_variable_data_type(var);
 	auto &var_type = get<SPIRType>(arg.type);
-	StorageClass storage = var_type.storage;
+	StorageClass type_storage = var_type.storage;
 	bool is_pointer = var_type.pointer;
 
 	// If we need to modify the name of the variable, make sure we use the original variable.
@@ -7046,37 +13837,93 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 		name_id = var.basevariable;
 
 	bool constref = !arg.alias_global_variable && is_pointer && arg.write_count == 0;
+	// Framebuffer fetch is plain value, const looks out of place, but it is not wrong.
+	if (type_is_msl_framebuffer_fetch(type))
+		constref = false;
+	else if (type_storage == StorageClassUniformConstant)
+		constref = true;
 
 	bool type_is_image = type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
 	                     type.basetype == SPIRType::Sampler;
 
-	// Arrays of images/samplers in MSL are always const.
-	if (!type.array.empty() && type_is_image)
-		constref = true;
+	// For opaque types we handle const later due to descriptor address spaces.
+	const char *cv_qualifier = (constref && !type_is_image) ? "const " : "";
+	string decl;
+
+	// If this is a combined image-sampler for a 2D image with floating-point type,
+	// we emitted the 'spvDynamicImageSampler' type, and this is *not* an alias parameter
+	// for a global, then we need to emit a "dynamic" combined image-sampler.
+	// Unfortunately, this is necessary to properly support passing around
+	// combined image-samplers with Y'CbCr conversions on them.
+	bool is_dynamic_img_sampler = !arg.alias_global_variable && type.basetype == SPIRType::SampledImage &&
+	                              type.image.dim == Dim2D && type_is_floating_point(get<SPIRType>(type.image.type)) &&
+	                              spv_function_implementations.count(SPVFuncImplDynamicImageSampler);
+
+	// Allow Metal to use the array<T> template to make arrays a value type
+	string address_space = get_argument_address_space(var);
+	bool builtin = has_decoration(var.self, DecorationBuiltIn);
+	auto builtin_type = BuiltIn(get_decoration(arg.id, DecorationBuiltIn));
 
-	string decl;
-	if (constref)
-		decl += "const ";
+	if (address_space == "threadgroup")
+		is_using_builtin_array = true;
 
-	bool builtin = is_builtin_variable(var);
-	if (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id)
-		decl += type_to_glsl(type, arg.id);
+	if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id))
+		decl = join(cv_qualifier, type_to_glsl(type, arg.id));
 	else if (builtin)
-		decl += builtin_type_decl(static_cast<BuiltIn>(get_decoration(arg.id, DecorationBuiltIn)), arg.id);
-	else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type))
-		decl += join(type_to_glsl(type, arg.id), "*");
-	else
-		decl += type_to_glsl(type, arg.id);
+	{
+		// Only use templated array for Clip/Cull distance when feasible.
+		// In other scenarios, we need need to override array length for tess levels (if used as outputs),
+		// or we need to emit the expected type for builtins (uint vs int).
+		auto storage = get<SPIRType>(var.basetype).storage;
+
+		if (storage == StorageClassInput &&
+		    (builtin_type == BuiltInTessLevelInner || builtin_type == BuiltInTessLevelOuter))
+		{
+			is_using_builtin_array = false;
+		}
+		else if (builtin_type != BuiltInClipDistance && builtin_type != BuiltInCullDistance)
+		{
+			is_using_builtin_array = true;
+		}
 
-	bool opaque_handle = storage == StorageClassUniformConstant;
+		if (storage == StorageClassOutput && variable_storage_requires_stage_io(storage) &&
+		    !is_stage_output_builtin_masked(builtin_type))
+			is_using_builtin_array = true;
 
-	string address_space = get_argument_address_space(var);
+		if (is_using_builtin_array)
+			decl = join(cv_qualifier, builtin_type_decl(builtin_type, arg.id));
+		else
+			decl = join(cv_qualifier, type_to_glsl(type, arg.id));
+	}
+	else if ((type_storage == StorageClassUniform || type_storage == StorageClassStorageBuffer) && is_array(type))
+	{
+		is_using_builtin_array = true;
+		decl += join(cv_qualifier, type_to_glsl(type, arg.id), "*");
+	}
+	else if (is_dynamic_img_sampler)
+	{
+		decl = join(cv_qualifier, "spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">");
+		// Mark the variable so that we can handle passing it to another function.
+		set_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler);
+	}
+	else
+	{
+		// The type is a pointer type we need to emit cv_qualifier late.
+		if (type_is_pointer(type))
+		{
+			decl = type_to_glsl(type, arg.id);
+			if (*cv_qualifier != '\0')
+				decl += join(" ", cv_qualifier);
+		}
+		else
+			decl = join(cv_qualifier, type_to_glsl(type, arg.id));
+	}
 
-	if (!builtin && !opaque_handle && !is_pointer &&
-	    (storage == StorageClassFunction || storage == StorageClassGeneric))
+	if (!builtin && !is_pointer &&
+	    (type_storage == StorageClassFunction || type_storage == StorageClassGeneric))
 	{
 		// If the argument is a pure value and not an opaque type, we will pass by value.
-		if (is_array(type))
+		if (msl_options.force_native_arrays && is_array(type))
 		{
 			// We are receiving an array by value. This is problematic.
 			// We cannot be sure of the target address space since we are supposed to receive a copy,
@@ -7087,6 +13934,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 			// non-constant arrays, but we can create thread const from constant.
 			decl = string("thread const ") + decl;
 			decl += " (&";
+			const char *restrict_kw = to_restrict(name_id, true);
+			if (*restrict_kw)
+			{
+				decl += " ";
+				decl += restrict_kw;
+			}
 			decl += to_expression(name_id);
 			decl += ")";
 			decl += type_to_array_glsl(type);
@@ -7101,49 +13954,99 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 	}
 	else if (is_array(type) && !type_is_image)
 	{
-		// Arrays of images and samplers are special cased.
+		// Arrays of opaque types are special cased.
 		if (!address_space.empty())
 			decl = join(address_space, " ", decl);
 
-		if (msl_options.argument_buffers)
+		const char *argument_buffer_space = descriptor_address_space(name_id, type_storage, nullptr);
+		if (argument_buffer_space)
+		{
+			decl += " ";
+			decl += argument_buffer_space;
+		}
+
+		// Special case, need to override the array size here if we're using tess level as an argument.
+		if (is_tesc_shader() && builtin &&
+		    (builtin_type == BuiltInTessLevelInner || builtin_type == BuiltInTessLevelOuter))
 		{
-			uint32_t desc_set = get_decoration(name_id, DecorationDescriptorSet);
-			if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) &&
-			    descriptor_set_is_argument_buffer(desc_set))
+			uint32_t array_size = get_physical_tess_level_array_size(builtin_type);
+			if (array_size == 1)
+			{
+				decl += " &";
+				decl += to_expression(name_id);
+			}
+			else
 			{
-				// An awkward case where we need to emit *more* address space declarations (yay!).
-				// An example is where we pass down an array of buffer pointers to leaf functions.
-				// It's a constant array containing pointers to constants.
-				// The pointer array is always constant however. E.g.
-				// device SSBO * constant (&array)[N].
-				// const device SSBO * constant (&array)[N].
-				// constant SSBO * constant (&array)[N].
-				// However, this only matters for argument buffers, since for MSL 1.0 style codegen,
-				// we emit the buffer array on stack instead, and that seems to work just fine apparently.
-				decl += " constant";
+				decl += " (&";
+				decl += to_expression(name_id);
+				decl += ")";
+				decl += join("[", array_size, "]");
 			}
 		}
+		else
+		{
+			auto array_size_decl = type_to_array_glsl(type);
+			if (array_size_decl.empty())
+				decl += "& ";
+			else
+				decl += " (&";
 
-		decl += " (&";
-		decl += to_expression(name_id);
-		decl += ")";
-		decl += type_to_array_glsl(type);
+			const char *restrict_kw = to_restrict(name_id, true);
+			if (*restrict_kw)
+			{
+				decl += " ";
+				decl += restrict_kw;
+			}
+			decl += to_expression(name_id);
+
+			if (!array_size_decl.empty())
+			{
+				decl += ")";
+				decl += array_size_decl;
+			}
+		}
 	}
-	else if (!opaque_handle)
+	else if (!type_is_image && (!pull_model_inputs.count(var.basevariable) || type.basetype == SPIRType::Struct))
 	{
 		// If this is going to be a reference to a variable pointer, the address space
 		// for the reference has to go before the '&', but after the '*'.
 		if (!address_space.empty())
 		{
-			if (decl.back() == '*')
-				decl += join(" ", address_space, " ");
+			if (type_is_pointer(type))
+			{
+				if (*cv_qualifier == '\0')
+					decl += ' ';
+				decl += join(address_space, " ");
+			}
 			else
 				decl = join(address_space, " ", decl);
 		}
 		decl += "&";
 		decl += " ";
+		decl += to_restrict(name_id, true);
 		decl += to_expression(name_id);
 	}
+	else if (type_is_image)
+	{
+		if (type.array.empty())
+		{
+			// For non-arrayed types we can just pass opaque descriptors by value.
+			// This fixes problems if descriptors are passed by value from argument buffers and plain descriptors
+			// in same shader.
+			// There is no address space we can actually use, but value will work.
+			// This will break if applications attempt to pass down descriptor arrays as arguments, but
+			// fortunately that is extremely unlikely ...
+			decl += " ";
+			decl += to_expression(name_id);
+		}
+		else
+		{
+			const char *img_address_space = descriptor_address_space(name_id, type_storage, "thread const");
+			decl = join(img_address_space, " ", decl);
+			decl += "& ";
+			decl += to_expression(name_id);
+		}
+	}
 	else
 	{
 		if (!address_space.empty())
@@ -7152,6 +14055,16 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 		decl += to_expression(name_id);
 	}
 
+	// Emulate texture2D atomic operations
+	auto *backing_var = maybe_get_backing_variable(name_id);
+	if (backing_var && atomic_image_vars.count(backing_var->self))
+	{
+		decl += ", device atomic_" + type_to_glsl(get<SPIRType>(var_type.image.type), 0);
+		decl += "* " + to_expression(name_id) + "_atomic";
+	}
+
+	is_using_builtin_array = false;
+
 	return decl;
 }
 
@@ -7168,8 +14081,8 @@ string CompilerMSL::to_name(uint32_t id, bool allow_alias) const
 	return Compiler::to_name(id, allow_alias);
 }
 
-// Returns a name that combines the name of the struct with the name of the member, except for Builtins
-string CompilerMSL::to_qualified_member_name(const SPIRType &type, uint32_t index)
+// Appends the name of the member to the variable qualifier string, except for Builtins.
+string CompilerMSL::append_member_name(const string &qualifier, const SPIRType &type, uint32_t index)
 {
 	// Don't qualify Builtin names because they are unique and are treated as such when building expressions
 	BuiltIn builtin = BuiltInMax;
@@ -7180,7 +14093,7 @@ string CompilerMSL::to_qualified_member_name(const SPIRType &type, uint32_t inde
 	string mbr_name = to_member_name(type, index);
 	size_t startPos = mbr_name.find_first_not_of("_");
 	mbr_name = (startPos != string::npos) ? mbr_name.substr(startPos) : "";
-	return join(to_name(type.self), "_", mbr_name);
+	return join(qualifier, "_", mbr_name);
 }
 
 // Ensures that the specified name is permanently usable by prepending a prefix
@@ -7190,17 +14103,21 @@ string CompilerMSL::ensure_valid_name(string name, string pfx)
 	return (name.size() >= 2 && name[0] == '_' && isdigit(name[1])) ? (pfx + name) : name;
 }
 
-// Replace all names that match MSL keywords or Metal Standard Library functions.
-void CompilerMSL::replace_illegal_names()
+const std::unordered_set<std::string> &CompilerMSL::get_reserved_keyword_set()
 {
-	// FIXME: MSL and GLSL are doing two different things here.
-	// Agree on convention and remove this override.
 	static const unordered_set<string> keywords = {
 		"kernel",
 		"vertex",
 		"fragment",
 		"compute",
+		"constant",
+		"device",
 		"bias",
+		"level",
+		"gradient2d",
+		"gradientcube",
+		"gradient3d",
+		"min_lod_clamp",
 		"assert",
 		"VARIABLE_TRACEPOINT",
 		"STATIC_DATA_TRACEPOINT",
@@ -7321,12 +14238,21 @@ void CompilerMSL::replace_illegal_names()
 		"M_SQRT2",
 		"M_SQRT1_2",
 		"quad_broadcast",
+		"thread",
+		"threadgroup",
 	};
 
+	return keywords;
+}
+
+const std::unordered_set<std::string> &CompilerMSL::get_illegal_func_names()
+{
 	static const unordered_set<string> illegal_func_names = {
 		"main",
 		"saturate",
 		"assert",
+		"fmin3",
+		"fmax3",
 		"VARIABLE_TRACEPOINT",
 		"STATIC_DATA_TRACEPOINT",
 		"STATIC_DATA_TRACEPOINT_V",
@@ -7447,24 +14373,57 @@ void CompilerMSL::replace_illegal_names()
 		"M_SQRT1_2",
 	};
 
+	return illegal_func_names;
+}
+
+// Replace all names that match MSL keywords or Metal Standard Library functions.
+void CompilerMSL::replace_illegal_names()
+{
+	// FIXME: MSL and GLSL are doing two different things here.
+	// Agree on convention and remove this override.
+	auto &keywords = get_reserved_keyword_set();
+	auto &illegal_func_names = get_illegal_func_names();
+
 	ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, SPIRVariable &) {
-		auto &dec = ir.meta[self].decoration;
+		auto *meta = ir.find_meta(self);
+		if (!meta)
+			return;
+
+		auto &dec = meta->decoration;
 		if (keywords.find(dec.alias) != end(keywords))
 			dec.alias += "0";
 	});
 
 	ir.for_each_typed_id<SPIRFunction>([&](uint32_t self, SPIRFunction &) {
-		auto &dec = ir.meta[self].decoration;
+		auto *meta = ir.find_meta(self);
+		if (!meta)
+			return;
+
+		auto &dec = meta->decoration;
 		if (illegal_func_names.find(dec.alias) != end(illegal_func_names))
 			dec.alias += "0";
 	});
 
 	ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &) {
-		for (auto &mbr_dec : ir.meta[self].members)
+		auto *meta = ir.find_meta(self);
+		if (!meta)
+			return;
+
+		for (auto &mbr_dec : meta->members)
 			if (keywords.find(mbr_dec.alias) != end(keywords))
 				mbr_dec.alias += "0";
 	});
 
+	CompilerGLSL::replace_illegal_names();
+}
+
+void CompilerMSL::replace_illegal_entry_point_names()
+{
+	auto &illegal_func_names = get_illegal_func_names();
+
+	// It is important to this before we fixup identifiers,
+	// since if ep_name is reserved, we will need to fix that up,
+	// and then copy alias back into entry.name after the fixup.
 	for (auto &entry : ir.entry_points)
 	{
 		// Change both the entry point name and the alias, to keep them synced.
@@ -7472,16 +14431,19 @@ void CompilerMSL::replace_illegal_names()
 		if (illegal_func_names.find(ep_name) != end(illegal_func_names))
 			ep_name += "0";
 
-		// Always write this because entry point might have been renamed earlier.
 		ir.meta[entry.first].decoration.alias = ep_name;
 	}
+}
 
-	CompilerGLSL::replace_illegal_names();
+void CompilerMSL::sync_entry_point_aliases_and_names()
+{
+	for (auto &entry : ir.entry_points)
+		entry.second.name = ir.meta[entry.first].decoration.alias;
 }
 
-string CompilerMSL::to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain)
+string CompilerMSL::to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain_is_resolved)
 {
-	auto *var = maybe_get<SPIRVariable>(base);
+	auto *var = maybe_get_backing_variable(base);
 	// If this is a buffer array, we have to dereference the buffer pointers.
 	// Otherwise, if this is a pointer expression, dereference it.
 
@@ -7489,11 +14451,16 @@ string CompilerMSL::to_member_reference(uint32_t base, const SPIRType &type, uin
 
 	if (var)
 	{
-		bool is_buffer_variable = var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer;
+		// Only allow -> dereference for block types. This is so we get expressions like
+		// buffer[i]->first_member.second_member, rather than buffer[i]->first->second.
+		bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
+
+		bool is_buffer_variable =
+		    is_block && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer);
 		declared_as_pointer = is_buffer_variable && is_array(get<SPIRType>(var->basetype));
 	}
 
-	if (declared_as_pointer || (!ptr_chain && should_dereference(base)))
+	if (declared_as_pointer || (!ptr_chain_is_resolved && should_dereference(base)))
 		return join("->", to_member_name(type, index));
 	else
 		return join(".", to_member_name(type, index));
@@ -7503,8 +14470,10 @@ string CompilerMSL::to_qualifiers_glsl(uint32_t id)
 {
 	string quals;
 
+	auto *var = maybe_get<SPIRVariable>(id);
 	auto &type = expression_type(id);
-	if (type.storage == StorageClassWorkgroup)
+
+	if (type.storage == StorageClassWorkgroup || (var && variable_decl_is_remapped_storage(*var, StorageClassWorkgroup)))
 		quals += "threadgroup ";
 
 	return quals;
@@ -7513,14 +14482,37 @@ string CompilerMSL::to_qualifiers_glsl(uint32_t id)
 // The optional id parameter indicates the object whose type we are trying
 // to find the description for. It is optional. Most type descriptions do not
 // depend on a specific object's use of that type.
-string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
+string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id, bool member)
 {
 	string type_name;
 
 	// Pointer?
 	if (type.pointer)
 	{
-		type_name = join(get_type_address_space(type, id), " ", type_to_glsl(get<SPIRType>(type.parent_type), id));
+		assert(type.pointer_depth > 0);
+
+		const char *restrict_kw;
+
+		auto type_address_space = get_type_address_space(type, id);
+		const auto *p_parent_type = &get<SPIRType>(type.parent_type);
+
+		// Work around C pointer qualifier rules. If glsl_type is a pointer type as well
+		// we'll need to emit the address space to the right.
+		// We could always go this route, but it makes the code unnatural.
+		// Prefer emitting thread T *foo over T thread* foo since it's more readable,
+		// but we'll have to emit thread T * thread * T constant bar; for example.
+		if (type_is_pointer_to_pointer(type))
+			type_name = join(type_to_glsl(*p_parent_type, id), " ", type_address_space, " ");
+		else
+		{
+			// Since this is not a pointer-to-pointer, ensure we've dug down to the base type.
+			// Some situations chain pointers even though they are not formally pointers-of-pointers.
+			while (type_is_pointer(*p_parent_type))
+				p_parent_type = &get<SPIRType>(p_parent_type->parent_type);
+
+			type_name = join(type_address_space, " ", type_to_glsl(*p_parent_type, id));
+		}
+
 		switch (type.basetype)
 		{
 		case SPIRType::Image:
@@ -7531,6 +14523,12 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		default:
 			// Anything else can be a raw pointer.
 			type_name += "*";
+			restrict_kw = to_restrict(id, false);
+			if (*restrict_kw)
+			{
+				type_name += " ";
+				type_name += restrict_kw;
+			}
 			break;
 		}
 		return type_name;
@@ -7540,14 +14538,16 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 	{
 	case SPIRType::Struct:
 		// Need OpName lookup here to get a "sensible" name for a struct.
-		return to_name(type.self);
+		// Allow Metal to use the array<T> template to make arrays a value type
+		type_name = to_name(type.self);
+		break;
 
 	case SPIRType::Image:
 	case SPIRType::SampledImage:
 		return image_type_glsl(type, id);
 
 	case SPIRType::Sampler:
-		return sampler_type(type);
+		return sampler_type(type, id);
 
 	case SPIRType::Void:
 		return "void";
@@ -7558,10 +14558,27 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 	case SPIRType::ControlPointArray:
 		return join("patch_control_point<", type_to_glsl(get<SPIRType>(type.parent_type), id), ">");
 
+	case SPIRType::Interpolant:
+		return join("interpolant<", type_to_glsl(get<SPIRType>(type.parent_type), id), ", interpolation::",
+		            has_decoration(type.self, DecorationNoPerspective) ? "no_perspective" : "perspective", ">");
+
 	// Scalars
 	case SPIRType::Boolean:
-		type_name = "bool";
+	{
+		auto *var = maybe_get_backing_variable(id);
+		if (var && var->basevariable)
+			var = &get<SPIRVariable>(var->basevariable);
+
+		// Need to special-case threadgroup booleans. They are supposed to be logical
+		// storage, but MSL compilers will sometimes crash if you use threadgroup bool.
+		// Workaround this by using 16-bit types instead and fixup on load-store to this data.
+		if ((var && var->storage == StorageClassWorkgroup) || type.storage == StorageClassWorkgroup || member)
+			type_name = "short";
+		else
+			type_name = "bool";
 		break;
+	}
+
 	case SPIRType::Char:
 	case SPIRType::SByte:
 		type_name = "char";
@@ -7600,6 +14617,16 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 	case SPIRType::Double:
 		type_name = "double"; // Currently unsupported
 		break;
+	case SPIRType::AccelerationStructure:
+		if (msl_options.supports_msl_version(2, 4))
+			type_name = "raytracing::acceleration_structure<raytracing::instancing>";
+		else if (msl_options.supports_msl_version(2, 3))
+			type_name = "raytracing::instance_acceleration_structure";
+		else
+			SPIRV_CROSS_THROW("Acceleration Structure Type is supported in MSL 2.3 and above.");
+		break;
+	case SPIRType::RayQuery:
+		return "raytracing::intersection_query<raytracing::instancing, raytracing::triangle_data>";
 
 	default:
 		return "unknown_type";
@@ -7607,17 +14634,157 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 
 	// Matrix?
 	if (type.columns > 1)
+	{
+		auto *var = maybe_get_backing_variable(id);
+		if (var && var->basevariable)
+			var = &get<SPIRVariable>(var->basevariable);
+
+		// Need to special-case threadgroup matrices. Due to an oversight, Metal's
+		// matrix struct prior to Metal 3 lacks constructors in the threadgroup AS,
+		// preventing us from default-constructing or initializing matrices in threadgroup storage.
+		// Work around this by using our own type as storage.
+		if (((var && var->storage == StorageClassWorkgroup) || type.storage == StorageClassWorkgroup) &&
+		    !msl_options.supports_msl_version(3, 0))
+		{
+			add_spv_func_and_recompile(SPVFuncImplStorageMatrix);
+			type_name = "spvStorage_" + type_name;
+		}
+
 		type_name += to_string(type.columns) + "x";
+	}
 
 	// Vector or Matrix?
 	if (type.vecsize > 1)
 		type_name += to_string(type.vecsize);
 
-	return type_name;
+	if (type.array.empty() || using_builtin_array())
+	{
+		return type_name;
+	}
+	else
+	{
+		// Allow Metal to use the array<T> template to make arrays a value type
+		add_spv_func_and_recompile(SPVFuncImplUnsafeArray);
+		string res;
+		string sizes;
+
+		for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
+		{
+			res += "spvUnsafeArray<";
+			sizes += ", ";
+			sizes += to_array_size(type, i);
+			sizes += ">";
+		}
+
+		res += type_name + sizes;
+		return res;
+	}
+}
+
+string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
+{
+	return type_to_glsl(type, id, false);
+}
+
+string CompilerMSL::type_to_array_glsl(const SPIRType &type)
+{
+	// Allow Metal to use the array<T> template to make arrays a value type
+	switch (type.basetype)
+	{
+	case SPIRType::AtomicCounter:
+	case SPIRType::ControlPointArray:
+	case SPIRType::RayQuery:
+		return CompilerGLSL::type_to_array_glsl(type);
+
+	default:
+		if (type_is_array_of_pointers(type) || using_builtin_array())
+			return CompilerGLSL::type_to_array_glsl(type);
+		else
+			return "";
+	}
+}
+
+string CompilerMSL::constant_op_expression(const SPIRConstantOp &cop)
+{
+	switch (cop.opcode)
+	{
+	case OpQuantizeToF16:
+		add_spv_func_and_recompile(SPVFuncImplQuantizeToF16);
+		return join("spvQuantizeToF16(", to_expression(cop.arguments[0]), ")");
+	default:
+		return CompilerGLSL::constant_op_expression(cop);
+	}
+}
+
+bool CompilerMSL::variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const
+{
+	if (variable.storage == storage)
+		return true;
+
+	if (storage == StorageClassWorkgroup)
+	{
+		// Specially masked IO block variable.
+		// Normally, we will never access IO blocks directly here.
+		// The only scenario which that should occur is with a masked IO block.
+		if (is_tesc_shader() && variable.storage == StorageClassOutput &&
+		    has_decoration(get<SPIRType>(variable.basetype).self, DecorationBlock))
+		{
+			return true;
+		}
+
+		return variable.storage == StorageClassOutput && is_tesc_shader() && is_stage_output_variable_masked(variable);
+	}
+	else if (storage == StorageClassStorageBuffer)
+	{
+		// These builtins are passed directly; we don't want to use remapping
+		// for them.
+		auto builtin = (BuiltIn)get_decoration(variable.self, DecorationBuiltIn);
+		if (is_tese_shader() && is_builtin_variable(variable) && (builtin == BuiltInTessCoord || builtin == BuiltInPrimitiveId))
+			return false;
+
+		// We won't be able to catch writes to control point outputs here since variable
+		// refers to a function local pointer.
+		// This is fine, as there cannot be concurrent writers to that memory anyways,
+		// so we just ignore that case.
+
+		return (variable.storage == StorageClassOutput || variable.storage == StorageClassInput) &&
+		       !variable_storage_requires_stage_io(variable.storage) &&
+		       (variable.storage != StorageClassOutput || !is_stage_output_variable_masked(variable));
+	}
+	else
+	{
+		return false;
+	}
+}
+
+std::string CompilerMSL::variable_decl(const SPIRVariable &variable)
+{
+	bool old_is_using_builtin_array = is_using_builtin_array;
+
+	// Threadgroup arrays can't have a wrapper type.
+	if (variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
+		is_using_builtin_array = true;
+
+	auto expr = CompilerGLSL::variable_decl(variable);
+	is_using_builtin_array = old_is_using_builtin_array;
+	return expr;
 }
 
-std::string CompilerMSL::sampler_type(const SPIRType &type)
+// GCC workaround of lambdas calling protected funcs
+std::string CompilerMSL::variable_decl(const SPIRType &type, const std::string &name, uint32_t id)
 {
+	return CompilerGLSL::variable_decl(type, name, id);
+}
+
+std::string CompilerMSL::sampler_type(const SPIRType &type, uint32_t id)
+{
+	auto *var = maybe_get<SPIRVariable>(id);
+	if (var && var->basevariable)
+	{
+		// Check against the base variable, and not a fake ID which might have been generated for this variable.
+		id = var->basevariable;
+	}
+
 	if (!type.array.empty())
 	{
 		if (!msl_options.supports_msl_version(2))
@@ -7627,12 +14794,16 @@ std::string CompilerMSL::sampler_type(const SPIRType &type)
 			SPIRV_CROSS_THROW("Arrays of arrays of samplers are not supported in MSL.");
 
 		// Arrays of samplers in MSL must be declared with a special array<T, N> syntax ala C++11 std::array.
+		// If we have a runtime array, it could be a variable-count descriptor set binding.
 		uint32_t array_size = to_array_size_literal(type);
+		if (array_size == 0)
+			array_size = get_resource_array_size(id);
+
 		if (array_size == 0)
 			SPIRV_CROSS_THROW("Unsized array of samplers is not supported in MSL.");
 
 		auto &parent = get<SPIRType>(get_pointee_type(type).parent_type);
-		return join("array<", sampler_type(parent), ", ", array_size, ">");
+		return join("array<", sampler_type(parent, id), ", ", array_size, ">");
 	}
 	else
 		return "sampler";
@@ -7669,7 +14840,11 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
 			SPIRV_CROSS_THROW("Arrays of arrays of textures are not supported in MSL.");
 
 		// Arrays of images in MSL must be declared with a special array<T, N> syntax ala C++11 std::array.
+		// If we have a runtime array, it could be a variable-count descriptor set binding.
 		uint32_t array_size = to_array_size_literal(type);
+		if (array_size == 0)
+			array_size = get_resource_array_size(id);
+
 		if (array_size == 0)
 			SPIRV_CROSS_THROW("Unsized array of images is not supported in MSL.");
 
@@ -7681,14 +14856,19 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
 
 	// Bypass pointers because we need the real image struct
 	auto &img_type = get<SPIRType>(type.self).image;
-	if (image_is_comparison(type, id))
+	if (is_depth_image(type, id))
 	{
 		switch (img_type.dim)
 		{
 		case Dim1D:
-			img_type_name += "depth1d_unsupported_by_metal";
-			break;
 		case Dim2D:
+			if (img_type.dim == Dim1D && !msl_options.texture_1D_as_2D)
+			{
+				// Use a native Metal 1D texture
+				img_type_name += "depth1d_unsupported_by_metal";
+				break;
+			}
+
 			if (img_type.ms && img_type.arrayed)
 			{
 				if (!msl_options.supports_msl_version(2, 1))
@@ -7706,7 +14886,10 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
 			img_type_name += "depth3d_unsupported_by_metal";
 			break;
 		case DimCube:
-			img_type_name += (img_type.arrayed ? "depthcube_array" : "depthcube");
+			if (!msl_options.emulate_cube_array)
+				img_type_name += (img_type.arrayed ? "depthcube_array" : "depthcube");
+			else
+				img_type_name += (img_type.arrayed ? "depth2d_array" : "depthcube");
 			break;
 		default:
 			img_type_name += "unknown_depth_texture_type";
@@ -7717,9 +14900,6 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
 	{
 		switch (img_type.dim)
 		{
-		case Dim1D:
-			img_type_name += (img_type.arrayed ? "texture1d_array" : "texture1d");
-			break;
 		case DimBuffer:
 			if (img_type.ms || img_type.arrayed)
 				SPIRV_CROSS_THROW("Cannot use texel buffers with multisampling or array layers.");
@@ -7733,9 +14913,27 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
 			else
 				img_type_name += "texture2d";
 			break;
+		case Dim1D:
 		case Dim2D:
 		case DimSubpassData:
-			if (img_type.ms && img_type.arrayed)
+		{
+			bool subpass_array =
+			    img_type.dim == DimSubpassData && (msl_options.multiview || msl_options.arrayed_subpass_input);
+			if (img_type.dim == Dim1D && !msl_options.texture_1D_as_2D)
+			{
+				// Use a native Metal 1D texture
+				img_type_name += (img_type.arrayed ? "texture1d_array" : "texture1d");
+				break;
+			}
+
+			// Use Metal's native frame-buffer fetch API for subpass inputs.
+			if (type_is_msl_framebuffer_fetch(type))
+			{
+				auto img_type_4 = get<SPIRType>(img_type.type);
+				img_type_4.vecsize = 4;
+				return type_to_glsl(img_type_4);
+			}
+			if (img_type.ms && (img_type.arrayed || subpass_array))
 			{
 				if (!msl_options.supports_msl_version(2, 1))
 					SPIRV_CROSS_THROW("Multisampled array textures are supported from 2.1.");
@@ -7743,16 +14941,20 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id)
 			}
 			else if (img_type.ms)
 				img_type_name += "texture2d_ms";
-			else if (img_type.arrayed)
+			else if (img_type.arrayed || subpass_array)
 				img_type_name += "texture2d_array";
 			else
 				img_type_name += "texture2d";
 			break;
+		}
 		case Dim3D:
 			img_type_name += "texture3d";
 			break;
 		case DimCube:
-			img_type_name += (img_type.arrayed ? "texturecube_array" : "texturecube");
+			if (!msl_options.emulate_cube_array)
+				img_type_name += (img_type.arrayed ? "texturecube_array" : "texturecube");
+			else
+				img_type_name += (img_type.arrayed ? "texture2d_array" : "texturecube");
 			break;
 		default:
 			img_type_name += "unknown_texture_type";
@@ -7812,19 +15014,57 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
 	const uint32_t *ops = stream(i);
 	auto op = static_cast<Op>(i.op);
 
-	// Metal 2.0 is required. iOS only supports quad ops. macOS only supports
-	// broadcast and shuffle on 10.13 (2.0), with full support in 10.14 (2.1).
-	// Note that iOS makes no distinction between a quad-group and a subgroup;
-	// all subgroups are quad-groups there.
+	if (msl_options.emulate_subgroups)
+	{
+		// In this mode, only the GroupNonUniform cap is supported. The only op
+		// we need to handle, then, is OpGroupNonUniformElect.
+		if (op != OpGroupNonUniformElect)
+			SPIRV_CROSS_THROW("Subgroup emulation does not support operations other than Elect.");
+		// In this mode, the subgroup size is assumed to be one, so every invocation
+		// is elected.
+		emit_op(ops[0], ops[1], "true", true);
+		return;
+	}
+
+	// Metal 2.0 is required. iOS only supports quad ops on 11.0 (2.0), with
+	// full support in 13.0 (2.2). macOS only supports broadcast and shuffle on
+	// 10.13 (2.0), with full support in 10.14 (2.1).
+	// Note that Apple GPUs before A13 make no distinction between a quad-group
+	// and a SIMD-group; all SIMD-groups are quad-groups on those.
 	if (!msl_options.supports_msl_version(2))
 		SPIRV_CROSS_THROW("Subgroups are only supported in Metal 2.0 and up.");
 
-	if (msl_options.is_ios())
+	// If we need to do implicit bitcasts, make sure we do it with the correct type.
+	uint32_t integer_width = get_integer_width_for_instruction(i);
+	auto int_type = to_signed_basetype(integer_width);
+	auto uint_type = to_unsigned_basetype(integer_width);
+
+	if (msl_options.is_ios() && (!msl_options.supports_msl_version(2, 3) || !msl_options.ios_use_simdgroup_functions))
 	{
 		switch (op)
 		{
 		default:
-			SPIRV_CROSS_THROW("iOS only supports quad-group operations.");
+			SPIRV_CROSS_THROW("Subgroup ops beyond broadcast, ballot, and shuffle on iOS require Metal 2.3 and up.");
+		case OpGroupNonUniformBroadcastFirst:
+			if (!msl_options.supports_msl_version(2, 2))
+				SPIRV_CROSS_THROW("BroadcastFirst on iOS requires Metal 2.2 and up.");
+			break;
+		case OpGroupNonUniformElect:
+			if (!msl_options.supports_msl_version(2, 2))
+				SPIRV_CROSS_THROW("Elect on iOS requires Metal 2.2 and up.");
+			break;
+		case OpGroupNonUniformAny:
+		case OpGroupNonUniformAll:
+		case OpGroupNonUniformAllEqual:
+		case OpGroupNonUniformBallot:
+		case OpGroupNonUniformInverseBallot:
+		case OpGroupNonUniformBallotBitExtract:
+		case OpGroupNonUniformBallotFindLSB:
+		case OpGroupNonUniformBallotFindMSB:
+		case OpGroupNonUniformBallotBitCount:
+			if (!msl_options.supports_msl_version(2, 2))
+				SPIRV_CROSS_THROW("Ballot ops on iOS requires Metal 2.2 and up.");
+			break;
 		case OpGroupNonUniformBroadcast:
 		case OpGroupNonUniformShuffle:
 		case OpGroupNonUniformShuffleXor:
@@ -7841,7 +15081,7 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
 		switch (op)
 		{
 		default:
-			SPIRV_CROSS_THROW("Subgroup ops beyond broadcast and shuffle on macOS require Metal 2.0 and up.");
+			SPIRV_CROSS_THROW("Subgroup ops beyond broadcast and shuffle on macOS require Metal 2.1 and up.");
 		case OpGroupNonUniformBroadcast:
 		case OpGroupNonUniformShuffle:
 		case OpGroupNonUniformShuffleXor:
@@ -7854,23 +15094,25 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
 	uint32_t result_type = ops[0];
 	uint32_t id = ops[1];
 
-	auto scope = static_cast<Scope>(get<SPIRConstant>(ops[2]).scalar());
+	auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
 	if (scope != ScopeSubgroup)
 		SPIRV_CROSS_THROW("Only subgroup scope is supported.");
 
 	switch (op)
 	{
 	case OpGroupNonUniformElect:
-		emit_op(result_type, id, "simd_is_first()", true);
+		if (msl_options.use_quadgroup_operation())
+			emit_op(result_type, id, "quad_is_first()", false);
+		else
+			emit_op(result_type, id, "simd_is_first()", false);
 		break;
 
 	case OpGroupNonUniformBroadcast:
-		emit_binary_func_op(result_type, id, ops[3], ops[4],
-		                    msl_options.is_ios() ? "quad_broadcast" : "simd_broadcast");
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupBroadcast");
 		break;
 
 	case OpGroupNonUniformBroadcastFirst:
-		emit_unary_func_op(result_type, id, ops[3], "simd_broadcast_first");
+		emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBroadcastFirst");
 		break;
 
 	case OpGroupNonUniformBallot:
@@ -7886,54 +15128,63 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i)
 		break;
 
 	case OpGroupNonUniformBallotFindLSB:
-		emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindLSB");
+		emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindLSB");
 		break;
 
 	case OpGroupNonUniformBallotFindMSB:
-		emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallotFindMSB");
+		emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindMSB");
 		break;
 
 	case OpGroupNonUniformBallotBitCount:
 	{
 		auto operation = static_cast<GroupOperation>(ops[3]);
-		if (operation == GroupOperationReduce)
-			emit_unary_func_op(result_type, id, ops[4], "spvSubgroupBallotBitCount");
-		else if (operation == GroupOperationInclusiveScan)
+		switch (operation)
+		{
+		case GroupOperationReduce:
+			emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_size_id, "spvSubgroupBallotBitCount");
+			break;
+		case GroupOperationInclusiveScan:
 			emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
 			                    "spvSubgroupBallotInclusiveBitCount");
-		else if (operation == GroupOperationExclusiveScan)
+			break;
+		case GroupOperationExclusiveScan:
 			emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id,
 			                    "spvSubgroupBallotExclusiveBitCount");
-		else
+			break;
+		default:
 			SPIRV_CROSS_THROW("Invalid BitCount operation.");
+		}
 		break;
 	}
 
 	case OpGroupNonUniformShuffle:
-		emit_binary_func_op(result_type, id, ops[3], ops[4], msl_options.is_ios() ? "quad_shuffle" : "simd_shuffle");
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffle");
 		break;
 
 	case OpGroupNonUniformShuffleXor:
-		emit_binary_func_op(result_type, id, ops[3], ops[4],
-		                    msl_options.is_ios() ? "quad_shuffle_xor" : "simd_shuffle_xor");
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleXor");
 		break;
 
 	case OpGroupNonUniformShuffleUp:
-		emit_binary_func_op(result_type, id, ops[3], ops[4],
-		                    msl_options.is_ios() ? "quad_shuffle_up" : "simd_shuffle_up");
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleUp");
 		break;
 
 	case OpGroupNonUniformShuffleDown:
-		emit_binary_func_op(result_type, id, ops[3], ops[4],
-		                    msl_options.is_ios() ? "quad_shuffle_down" : "simd_shuffle_down");
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleDown");
 		break;
 
 	case OpGroupNonUniformAll:
-		emit_unary_func_op(result_type, id, ops[3], "simd_all");
+		if (msl_options.use_quadgroup_operation())
+			emit_unary_func_op(result_type, id, ops[3], "quad_all");
+		else
+			emit_unary_func_op(result_type, id, ops[3], "simd_all");
 		break;
 
 	case OpGroupNonUniformAny:
-		emit_unary_func_op(result_type, id, ops[3], "simd_any");
+		if (msl_options.use_quadgroup_operation())
+			emit_unary_func_op(result_type, id, ops[3], "quad_any");
+		else
+			emit_unary_func_op(result_type, id, ops[3], "simd_any");
 		break;
 
 	case OpGroupNonUniformAllEqual:
@@ -7954,7 +15205,7 @@ case OpGroupNonUniform##op: \
 		else if (operation == GroupOperationClusteredReduce) \
 		{ \
 			/* Only cluster sizes of 4 are supported. */ \
-			uint32_t cluster_size = get<SPIRConstant>(ops[5]).scalar(); \
+			uint32_t cluster_size = evaluate_constant_u32(ops[5]); \
 			if (cluster_size != 4) \
 				SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \
 			emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \
@@ -7969,6 +15220,7 @@ case OpGroupNonUniform##op: \
 	MSL_GROUP_OP(IMul, product)
 #undef MSL_GROUP_OP
 	// The others, unfortunately, don't support InclusiveScan or ExclusiveScan.
+
 #define MSL_GROUP_OP(op, msl_op) \
 case OpGroupNonUniform##op: \
 	{ \
@@ -7982,7 +15234,7 @@ case OpGroupNonUniform##op: \
 		else if (operation == GroupOperationClusteredReduce) \
 		{ \
 			/* Only cluster sizes of 4 are supported. */ \
-			uint32_t cluster_size = get<SPIRConstant>(ops[5]).scalar(); \
+			uint32_t cluster_size = evaluate_constant_u32(ops[5]); \
 			if (cluster_size != 4) \
 				SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \
 			emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \
@@ -7991,12 +15243,36 @@ case OpGroupNonUniform##op: \
 			SPIRV_CROSS_THROW("Invalid group operation."); \
 		break; \
 	}
+
+#define MSL_GROUP_OP_CAST(op, msl_op, type) \
+case OpGroupNonUniform##op: \
+	{ \
+		auto operation = static_cast<GroupOperation>(ops[3]); \
+		if (operation == GroupOperationReduce) \
+			emit_unary_func_op_cast(result_type, id, ops[4], "simd_" #msl_op, type, type); \
+		else if (operation == GroupOperationInclusiveScan) \
+			SPIRV_CROSS_THROW("Metal doesn't support InclusiveScan for OpGroupNonUniform" #op "."); \
+		else if (operation == GroupOperationExclusiveScan) \
+			SPIRV_CROSS_THROW("Metal doesn't support ExclusiveScan for OpGroupNonUniform" #op "."); \
+		else if (operation == GroupOperationClusteredReduce) \
+		{ \
+			/* Only cluster sizes of 4 are supported. */ \
+			uint32_t cluster_size = evaluate_constant_u32(ops[5]); \
+			if (cluster_size != 4) \
+				SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \
+			emit_unary_func_op_cast(result_type, id, ops[4], "quad_" #msl_op, type, type); \
+		} \
+		else \
+			SPIRV_CROSS_THROW("Invalid group operation."); \
+		break; \
+	}
+
 	MSL_GROUP_OP(FMin, min)
 	MSL_GROUP_OP(FMax, max)
-	MSL_GROUP_OP(SMin, min)
-	MSL_GROUP_OP(SMax, max)
-	MSL_GROUP_OP(UMin, min)
-	MSL_GROUP_OP(UMax, max)
+	MSL_GROUP_OP_CAST(SMin, min, int_type)
+	MSL_GROUP_OP_CAST(SMax, max, int_type)
+	MSL_GROUP_OP_CAST(UMin, min, uint_type)
+	MSL_GROUP_OP_CAST(UMax, max, uint_type)
 	MSL_GROUP_OP(BitwiseAnd, and)
 	MSL_GROUP_OP(BitwiseOr, or)
 	MSL_GROUP_OP(BitwiseXor, xor)
@@ -8004,28 +15280,15 @@ case OpGroupNonUniform##op: \
 	MSL_GROUP_OP(LogicalOr, or)
 	MSL_GROUP_OP(LogicalXor, xor)
 		// clang-format on
+#undef MSL_GROUP_OP
+#undef MSL_GROUP_OP_CAST
 
 	case OpGroupNonUniformQuadSwap:
-	{
-		// We can implement this easily based on the following table giving
-		// the target lane ID from the direction and current lane ID:
-		//        Direction
-		//      | 0 | 1 | 2 |
-		//   ---+---+---+---+
-		// L 0  | 1   2   3
-		// a 1  | 0   3   2
-		// n 2  | 3   0   1
-		// e 3  | 2   1   0
-		// Notice that target = source ^ (direction + 1).
-		uint32_t mask = get<SPIRConstant>(ops[4]).scalar() + 1;
-		uint32_t mask_id = ir.increase_bound_by(1);
-		set<SPIRConstant>(mask_id, expression_type_id(ops[4]), mask, false);
-		emit_binary_func_op(result_type, id, ops[3], mask_id, "quad_shuffle_xor");
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadSwap");
 		break;
-	}
 
 	case OpGroupNonUniformQuadBroadcast:
-		emit_binary_func_op(result_type, id, ops[3], ops[4], "quad_broadcast");
+		emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadBroadcast");
 		break;
 
 	default:
@@ -8043,19 +15306,24 @@ string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in
 	assert(out_type.basetype != SPIRType::Boolean);
 	assert(in_type.basetype != SPIRType::Boolean);
 
-	bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
-	bool same_size_cast = out_type.width == in_type.width;
+	bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type) && (out_type.vecsize == in_type.vecsize);
+	bool same_size_cast = (out_type.width * out_type.vecsize) == (in_type.width * in_type.vecsize);
 
-	if (integral_cast && same_size_cast)
-	{
-		// Trivial bitcast case, casts between integers.
-		return type_to_glsl(out_type);
-	}
-	else
-	{
-		// Fall back to the catch-all bitcast in MSL.
+	// Bitcasting can only be used between types of the same overall size.
+	// And always formally cast between integers, because it's trivial, and also
+	// because Metal can internally cast the results of some integer ops to a larger
+	// size (eg. short shift right becomes int), which means chaining integer ops
+	// together may introduce size variations that SPIR-V doesn't know about.
+	if (same_size_cast && !integral_cast)
 		return "as_type<" + type_to_glsl(out_type) + ">";
-	}
+	else
+		return type_to_glsl(out_type);
+}
+
+bool CompilerMSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t)
+{
+	// This is handled from the outside where we deal with PtrToU/UToPtr and friends.
+	return false;
 }
 
 // Returns an MSL string identifying the name of a SPIR-V builtin.
@@ -8064,20 +15332,114 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
 {
 	switch (builtin)
 	{
-
+	// Handle HLSL-style 0-based vertex/instance index.
 	// Override GLSL compiler strictness
 	case BuiltInVertexId:
-		return "gl_VertexID";
+		ensure_builtin(StorageClassInput, BuiltInVertexId);
+		if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) &&
+		    (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
+		{
+			if (builtin_declaration)
+			{
+				if (needs_base_vertex_arg != TriState::No)
+					needs_base_vertex_arg = TriState::Yes;
+				return "gl_VertexID";
+			}
+			else
+			{
+				ensure_builtin(StorageClassInput, BuiltInBaseVertex);
+				return "(gl_VertexID - gl_BaseVertex)";
+			}
+		}
+		else
+		{
+			return "gl_VertexID";
+		}
 	case BuiltInInstanceId:
-		return "gl_InstanceID";
+		ensure_builtin(StorageClassInput, BuiltInInstanceId);
+		if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) &&
+		    (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
+		{
+			if (builtin_declaration)
+			{
+				if (needs_base_instance_arg != TriState::No)
+					needs_base_instance_arg = TriState::Yes;
+				return "gl_InstanceID";
+			}
+			else
+			{
+				ensure_builtin(StorageClassInput, BuiltInBaseInstance);
+				return "(gl_InstanceID - gl_BaseInstance)";
+			}
+		}
+		else
+		{
+			return "gl_InstanceID";
+		}
 	case BuiltInVertexIndex:
-		return "gl_VertexIndex";
+		ensure_builtin(StorageClassInput, BuiltInVertexIndex);
+		if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) &&
+		    (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
+		{
+			if (builtin_declaration)
+			{
+				if (needs_base_vertex_arg != TriState::No)
+					needs_base_vertex_arg = TriState::Yes;
+				return "gl_VertexIndex";
+			}
+			else
+			{
+				ensure_builtin(StorageClassInput, BuiltInBaseVertex);
+				return "(gl_VertexIndex - gl_BaseVertex)";
+			}
+		}
+		else
+		{
+			return "gl_VertexIndex";
+		}
 	case BuiltInInstanceIndex:
-		return "gl_InstanceIndex";
+		ensure_builtin(StorageClassInput, BuiltInInstanceIndex);
+		if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) &&
+		    (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
+		{
+			if (builtin_declaration)
+			{
+				if (needs_base_instance_arg != TriState::No)
+					needs_base_instance_arg = TriState::Yes;
+				return "gl_InstanceIndex";
+			}
+			else
+			{
+				ensure_builtin(StorageClassInput, BuiltInBaseInstance);
+				return "(gl_InstanceIndex - gl_BaseInstance)";
+			}
+		}
+		else
+		{
+			return "gl_InstanceIndex";
+		}
 	case BuiltInBaseVertex:
-		return "gl_BaseVertex";
+		if (msl_options.supports_msl_version(1, 1) &&
+		    (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
+		{
+			needs_base_vertex_arg = TriState::No;
+			return "gl_BaseVertex";
+		}
+		else
+		{
+			SPIRV_CROSS_THROW("BaseVertex requires Metal 1.1 and Mac or Apple A9+ hardware.");
+		}
 	case BuiltInBaseInstance:
-		return "gl_BaseInstance";
+		if (msl_options.supports_msl_version(1, 1) &&
+		    (msl_options.ios_support_base_vertex_instance || msl_options.is_macos()))
+		{
+			needs_base_instance_arg = TriState::No;
+			return "gl_BaseInstance";
+		}
+		else
+		{
+			SPIRV_CROSS_THROW("BaseInstance requires Metal 1.1 and Mac or Apple A9+ hardware.");
+		}
 	case BuiltInDrawIndex:
 		SPIRV_CROSS_THROW("DrawIndex is not supported in MSL.");
 
@@ -8088,55 +15450,76 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
 		if (!msl_options.supports_msl_version(2, 0))
 			SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0.");
 		/* fallthrough */
+	case BuiltInFragDepth:
+	case BuiltInFragStencilRefEXT:
+		if ((builtin == BuiltInFragDepth && !msl_options.enable_frag_depth_builtin) ||
+		    (builtin == BuiltInFragStencilRefEXT && !msl_options.enable_frag_stencil_ref_builtin))
+			break;
+		/* fallthrough */
 	case BuiltInPosition:
 	case BuiltInPointSize:
 	case BuiltInClipDistance:
 	case BuiltInCullDistance:
 	case BuiltInLayer:
-	case BuiltInFragDepth:
-	case BuiltInFragStencilRefEXT:
-	case BuiltInSampleMask:
-		if (get_execution_model() == ExecutionModelTessellationControl)
+		if (is_tesc_shader())
 			break;
-		if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point))
+		if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point) &&
+		    !is_stage_output_builtin_masked(builtin))
 			return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage);
+		break;
 
+	case BuiltInSampleMask:
+		if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point) &&
+			(has_additional_fixed_sample_mask() || needs_sample_id))
+		{
+			string samp_mask_in;
+			samp_mask_in += "(" + CompilerGLSL::builtin_to_glsl(builtin, storage);
+			if (has_additional_fixed_sample_mask())
+				samp_mask_in += " & " + additional_fixed_sample_mask_str();
+			if (needs_sample_id)
+				samp_mask_in += " & (1 << gl_SampleID)";
+			samp_mask_in += ")";
+			return samp_mask_in;
+		}
+		if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point) &&
+		    !is_stage_output_builtin_masked(builtin))
+			return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage);
 		break;
 
-	case BuiltInBaryCoordNV:
-	case BuiltInBaryCoordNoPerspNV:
+	case BuiltInBaryCoordKHR:
+	case BuiltInBaryCoordNoPerspKHR:
 		if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point))
 			return stage_in_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage);
 		break;
 
 	case BuiltInTessLevelOuter:
-		if (get_execution_model() == ExecutionModelTessellationEvaluation)
+		if (is_tesc_shader() && storage != StorageClassInput && current_function &&
+		    (current_function->self == ir.default_entry_point))
 		{
-			if (storage != StorageClassOutput && !get_entry_point().flags.get(ExecutionModeTriangles) &&
-			    current_function && (current_function->self == ir.default_entry_point))
-				return join(patch_stage_in_var_name, ".", CompilerGLSL::builtin_to_glsl(builtin, storage));
-			else
-				break;
-		}
-		if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point))
 			return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id),
 			            "].edgeTessellationFactor");
+		}
 		break;
 
 	case BuiltInTessLevelInner:
-		if (get_execution_model() == ExecutionModelTessellationEvaluation)
+		if (is_tesc_shader() && storage != StorageClassInput && current_function &&
+		    (current_function->self == ir.default_entry_point))
 		{
-			if (storage != StorageClassOutput && !get_entry_point().flags.get(ExecutionModeTriangles) &&
-			    current_function && (current_function->self == ir.default_entry_point))
-				return join(patch_stage_in_var_name, ".", CompilerGLSL::builtin_to_glsl(builtin, storage));
-			else
-				break;
-		}
-		if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point))
 			return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id),
 			            "].insideTessellationFactor");
+		}
 		break;
 
+	case BuiltInHelperInvocation:
+		if (needs_manual_helper_invocation_updates())
+			break;
+		if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
+			SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.3 on iOS.");
+		else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1))
+			SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS.");
+		// In SPIR-V 1.6 with Volatile HelperInvocation, we cannot emit a fixup early.
+		return "simd_is_helper_thread()";
+
 	default:
 		break;
 	}
@@ -8190,6 +15573,11 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 
 	// Tess. control function in
 	case BuiltInInvocationId:
+		if (msl_options.multi_patch_workgroup)
+		{
+			// Shouldn't be reached.
+			SPIRV_CROSS_THROW("InvocationId is computed manually with multi-patch workgroups in MSL.");
+		}
 		return "thread_index_in_threadgroup";
 	case BuiltInPatchVertices:
 		// Shouldn't be reached.
@@ -8198,12 +15586,17 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 		switch (execution.model)
 		{
 		case ExecutionModelTessellationControl:
+			if (msl_options.multi_patch_workgroup)
+			{
+				// Shouldn't be reached.
+				SPIRV_CROSS_THROW("PrimitiveId is computed manually with multi-patch workgroups in MSL.");
+			}
 			return "threadgroup_position_in_grid";
 		case ExecutionModelTessellationEvaluation:
 			return "patch_id";
 		case ExecutionModelFragment:
-			if (msl_options.is_ios())
-				SPIRV_CROSS_THROW("PrimitiveId is not supported in fragment on iOS.");
+			if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
+				SPIRV_CROSS_THROW("PrimitiveId on iOS requires MSL 2.3.");
 			else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 2))
 				SPIRV_CROSS_THROW("PrimitiveId on macOS requires MSL 2.2.");
 			return "primitive_id";
@@ -8271,6 +15664,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 		return "thread_index_in_threadgroup";
 
 	case BuiltInSubgroupSize:
+		if (msl_options.emulate_subgroups || msl_options.fixed_subgroup_size != 0)
+			// Shouldn't be reached.
+			SPIRV_CROSS_THROW("Emitting threads_per_simdgroup attribute with fixed subgroup size??");
 		if (execution.model == ExecutionModelFragment)
 		{
 			if (!msl_options.supports_msl_version(2, 2))
@@ -8285,28 +15681,42 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 		}
 
 	case BuiltInNumSubgroups:
+		if (msl_options.emulate_subgroups)
+			// Shouldn't be reached.
+			SPIRV_CROSS_THROW("NumSubgroups is handled specially with emulation.");
 		if (!msl_options.supports_msl_version(2))
 			SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
-		return msl_options.is_ios() ? "quadgroups_per_threadgroup" : "simdgroups_per_threadgroup";
+		return msl_options.use_quadgroup_operation() ? "quadgroups_per_threadgroup" : "simdgroups_per_threadgroup";
 
 	case BuiltInSubgroupId:
+		if (msl_options.emulate_subgroups)
+			// Shouldn't be reached.
+			SPIRV_CROSS_THROW("SubgroupId is handled specially with emulation.");
 		if (!msl_options.supports_msl_version(2))
 			SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
-		return msl_options.is_ios() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup";
+		return msl_options.use_quadgroup_operation() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup";
 
 	case BuiltInSubgroupLocalInvocationId:
+		if (msl_options.emulate_subgroups)
+			// Shouldn't be reached.
+			SPIRV_CROSS_THROW("SubgroupLocalInvocationId is handled specially with emulation.");
 		if (execution.model == ExecutionModelFragment)
 		{
 			if (!msl_options.supports_msl_version(2, 2))
 				SPIRV_CROSS_THROW("thread_index_in_simdgroup requires Metal 2.2 in fragment shaders.");
 			return "thread_index_in_simdgroup";
 		}
-		else
+		else if (execution.model == ExecutionModelKernel || execution.model == ExecutionModelGLCompute ||
+		         execution.model == ExecutionModelTessellationControl ||
+		         (execution.model == ExecutionModelVertex && msl_options.vertex_for_tessellation))
 		{
+			// We are generating a Metal kernel function.
 			if (!msl_options.supports_msl_version(2))
-				SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
-			return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup";
+				SPIRV_CROSS_THROW("Subgroup builtins in kernel functions require Metal 2.0.");
+			return msl_options.use_quadgroup_operation() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup";
 		}
+		else
+			SPIRV_CROSS_THROW("Subgroup builtins are not available in this type of function.");
 
 	case BuiltInSubgroupEqMask:
 	case BuiltInSubgroupGeMask:
@@ -8316,18 +15726,16 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 		// Shouldn't be reached.
 		SPIRV_CROSS_THROW("Subgroup ballot masks are handled specially in MSL.");
 
-	case BuiltInBaryCoordNV:
-		// TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3.
-		if (msl_options.is_ios())
-			SPIRV_CROSS_THROW("Barycentrics not supported on iOS.");
+	case BuiltInBaryCoordKHR:
+		if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
+			SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS.");
 		else if (!msl_options.supports_msl_version(2, 2))
 			SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS.");
 		return "barycentric_coord, center_perspective";
 
-	case BuiltInBaryCoordNoPerspNV:
-		// TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3.
-		if (msl_options.is_ios())
-			SPIRV_CROSS_THROW("Barycentrics not supported on iOS.");
+	case BuiltInBaryCoordNoPerspKHR:
+		if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3))
+			SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS.");
 		else if (!msl_options.supports_msl_version(2, 2))
 			SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS.");
 		return "barycentric_coord, center_no_perspective";
@@ -8340,7 +15748,6 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 // Returns an MSL string type declaration for a SPIR-V builtin
 string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
 {
-	const SPIREntryPoint &execution = get_entry_point();
 	switch (builtin)
 	{
 	// Vertex function in
@@ -8361,6 +15768,7 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
 
 	// Vertex function out
 	case BuiltInClipDistance:
+	case BuiltInCullDistance:
 		return "float";
 	case BuiltInPointSize:
 		return "float";
@@ -8383,17 +15791,17 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
 
 	// Tess. control function out
 	case BuiltInTessLevelInner:
-		if (execution.model == ExecutionModelTessellationEvaluation)
-			return !execution.flags.get(ExecutionModeTriangles) ? "float2" : "float";
+		if (is_tese_shader())
+			return (msl_options.raw_buffer_tese_input || is_tessellating_triangles()) ? "float" : "float2";
 		return "half";
 	case BuiltInTessLevelOuter:
-		if (execution.model == ExecutionModelTessellationEvaluation)
-			return !execution.flags.get(ExecutionModeTriangles) ? "float4" : "float";
+		if (is_tese_shader())
+			return (msl_options.raw_buffer_tese_input || is_tessellating_triangles()) ? "float" : "float4";
 		return "half";
 
 	// Tess. evaluation function in
 	case BuiltInTessCoord:
-		return execution.flags.get(ExecutionModeTriangles) ? "float3" : "float2";
+		return "float3";
 
 	// Fragment function in
 	case BuiltInFrontFacing:
@@ -8411,6 +15819,14 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
 	case BuiltInViewIndex:
 		return "uint";
 
+	case BuiltInHelperInvocation:
+		return "bool";
+
+	case BuiltInBaryCoordKHR:
+	case BuiltInBaryCoordNoPerspKHR:
+		// Use the type as declared, can be 1, 2 or 3 components.
+		return type_to_glsl(get_variable_data_type(get<SPIRVariable>(id)));
+
 	// Fragment function out
 	case BuiltInFragDepth:
 		return "float";
@@ -8437,13 +15853,8 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
 	case BuiltInSubgroupLtMask:
 		return "uint4";
 
-	case BuiltInHelperInvocation:
-		return "bool";
-
-	case BuiltInBaryCoordNV:
-	case BuiltInBaryCoordNoPerspNV:
-		// Use the type as declared, can be 1, 2 or 3 components.
-		return type_to_glsl(get_variable_data_type(get<SPIRVariable>(id)));
+	case BuiltInDeviceIndex:
+		return "int";
 
 	default:
 		return "unsupported-built-in-type";
@@ -8457,17 +15868,153 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma)
 	if (prefix_comma)
 		bi_arg += ", ";
 
+	// Handle HLSL-style 0-based vertex/instance index.
+	builtin_declaration = true;
 	bi_arg += builtin_type_decl(builtin);
 	bi_arg += " " + builtin_to_glsl(builtin, StorageClassInput);
 	bi_arg += " [[" + builtin_qualifier(builtin) + "]]";
+	builtin_declaration = false;
 
 	return bi_arg;
 }
 
+const SPIRType &CompilerMSL::get_physical_member_type(const SPIRType &type, uint32_t index) const
+{
+	if (member_is_remapped_physical_type(type, index))
+		return get<SPIRType>(get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID));
+	else
+		return get<SPIRType>(type.member_types[index]);
+}
+
+SPIRType CompilerMSL::get_presumed_input_type(const SPIRType &ib_type, uint32_t index) const
+{
+	SPIRType type = get_physical_member_type(ib_type, index);
+	uint32_t loc = get_member_decoration(ib_type.self, index, DecorationLocation);
+	uint32_t cmp = get_member_decoration(ib_type.self, index, DecorationComponent);
+	auto p_va = inputs_by_location.find({loc, cmp});
+	if (p_va != end(inputs_by_location) && p_va->second.vecsize > type.vecsize)
+		type.vecsize = p_va->second.vecsize;
+
+	return type;
+}
+
+uint32_t CompilerMSL::get_declared_type_array_stride_msl(const SPIRType &type, bool is_packed, bool row_major) const
+{
+	// Array stride in MSL is always size * array_size. sizeof(float3) == 16,
+	// unlike GLSL and HLSL where array stride would be 16 and size 12.
+
+	// We could use parent type here and recurse, but that makes creating physical type remappings
+	// far more complicated. We'd rather just create the final type, and ignore having to create the entire type
+	// hierarchy in order to compute this value, so make a temporary type on the stack.
+
+	auto basic_type = type;
+	basic_type.array.clear();
+	basic_type.array_size_literal.clear();
+	uint32_t value_size = get_declared_type_size_msl(basic_type, is_packed, row_major);
+
+	uint32_t dimensions = uint32_t(type.array.size());
+	assert(dimensions > 0);
+	dimensions--;
+
+	// Multiply together every dimension, except the last one.
+	for (uint32_t dim = 0; dim < dimensions; dim++)
+	{
+		uint32_t array_size = to_array_size_literal(type, dim);
+		value_size *= max<uint32_t>(array_size, 1u);
+	}
+
+	return value_size;
+}
+
+uint32_t CompilerMSL::get_declared_struct_member_array_stride_msl(const SPIRType &type, uint32_t index) const
+{
+	return get_declared_type_array_stride_msl(get_physical_member_type(type, index),
+	                                          member_is_packed_physical_type(type, index),
+	                                          has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
+uint32_t CompilerMSL::get_declared_input_array_stride_msl(const SPIRType &type, uint32_t index) const
+{
+	return get_declared_type_array_stride_msl(get_presumed_input_type(type, index), false,
+	                                          has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
+uint32_t CompilerMSL::get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const
+{
+	// For packed matrices, we just use the size of the vector type.
+	// Otherwise, MatrixStride == alignment, which is the size of the underlying vector type.
+	if (packed)
+		return (type.width / 8) * ((row_major && type.columns > 1) ? type.columns : type.vecsize);
+	else
+		return get_declared_type_alignment_msl(type, false, row_major);
+}
+
+uint32_t CompilerMSL::get_declared_struct_member_matrix_stride_msl(const SPIRType &type, uint32_t index) const
+{
+	return get_declared_type_matrix_stride_msl(get_physical_member_type(type, index),
+	                                           member_is_packed_physical_type(type, index),
+	                                           has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
+uint32_t CompilerMSL::get_declared_input_matrix_stride_msl(const SPIRType &type, uint32_t index) const
+{
+	return get_declared_type_matrix_stride_msl(get_presumed_input_type(type, index), false,
+	                                           has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
+uint32_t CompilerMSL::get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment,
+                                                   bool ignore_padding) const
+{
+	// If we have a target size, that is the declared size as well.
+	if (!ignore_padding && has_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget))
+		return get_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget);
+
+	if (struct_type.member_types.empty())
+		return 0;
+
+	uint32_t mbr_cnt = uint32_t(struct_type.member_types.size());
+
+	// In MSL, a struct's alignment is equal to the maximum alignment of any of its members.
+	uint32_t alignment = 1;
+
+	if (!ignore_alignment)
+	{
+		for (uint32_t i = 0; i < mbr_cnt; i++)
+		{
+			uint32_t mbr_alignment = get_declared_struct_member_alignment_msl(struct_type, i);
+			alignment = max(alignment, mbr_alignment);
+		}
+	}
+
+	// Last member will always be matched to the final Offset decoration, but size of struct in MSL now depends
+	// on physical size in MSL, and the size of the struct itself is then aligned to struct alignment.
+	uint32_t spirv_offset = type_struct_member_offset(struct_type, mbr_cnt - 1);
+	uint32_t msl_size = spirv_offset + get_declared_struct_member_size_msl(struct_type, mbr_cnt - 1);
+	msl_size = (msl_size + alignment - 1) & ~(alignment - 1);
+	return msl_size;
+}
+
 // Returns the byte size of a struct member.
-size_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const
+uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_packed, bool row_major) const
 {
-	auto &type = get<SPIRType>(struct_type.member_types[index]);
+	// Pointers take 8 bytes each
+	if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
+	{
+		uint32_t type_size = 8 * (type.vecsize == 3 ? 4 : type.vecsize);
+
+		// Work our way through potentially layered arrays,
+		// stopping when we hit a pointer that is not also an array.
+		int32_t dim_idx = (int32_t)type.array.size() - 1;
+		auto *p_type = &type;
+		while (!type_is_pointer(*p_type) && dim_idx >= 0)
+		{
+			type_size *= to_array_size_literal(*p_type, dim_idx);
+			p_type = &get<SPIRType>(p_type->parent_type);
+			dim_idx--;
+		}
+
+		return type_size;
+	}
 
 	switch (type.basetype)
 	{
@@ -8481,39 +16028,56 @@ size_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &struct_t
 
 	default:
 	{
-		// For arrays, we can use ArrayStride to get an easy check.
-		// Runtime arrays will have zero size so force to min of one.
 		if (!type.array.empty())
 		{
 			uint32_t array_size = to_array_size_literal(type);
-			return type_struct_member_array_stride(struct_type, index) * max(array_size, 1u);
+			return get_declared_type_array_stride_msl(type, is_packed, row_major) * max<uint32_t>(array_size, 1u);
 		}
 
 		if (type.basetype == SPIRType::Struct)
+			return get_declared_struct_size_msl(type);
+
+		if (is_packed)
 		{
-			// The size of a struct in Metal is aligned up to its natural alignment.
-			auto size = get_declared_struct_size(type);
-			auto alignment = get_declared_struct_member_alignment(struct_type, index);
-			return (size + alignment - 1) & ~(alignment - 1);
+			return type.vecsize * type.columns * (type.width / 8);
 		}
+		else
+		{
+			// An unpacked 3-element vector or matrix column is the same memory size as a 4-element.
+			uint32_t vecsize = type.vecsize;
+			uint32_t columns = type.columns;
 
-		uint32_t component_size = type.width / 8;
-		uint32_t vecsize = type.vecsize;
-		uint32_t columns = type.columns;
+			if (row_major && columns > 1)
+				swap(vecsize, columns);
 
-		// An unpacked 3-element vector or matrix column is the same memory size as a 4-element.
-		if (vecsize == 3 && !has_extended_member_decoration(struct_type.self, index, SPIRVCrossDecorationPacked))
-			vecsize = 4;
+			if (vecsize == 3)
+				vecsize = 4;
 
-		return component_size * vecsize * columns;
+			return vecsize * columns * (type.width / 8);
+		}
 	}
 	}
 }
 
-// Returns the byte alignment of a struct member.
-size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const
+uint32_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &type, uint32_t index) const
+{
+	return get_declared_type_size_msl(get_physical_member_type(type, index),
+	                                  member_is_packed_physical_type(type, index),
+	                                  has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
+uint32_t CompilerMSL::get_declared_input_size_msl(const SPIRType &type, uint32_t index) const
+{
+	return get_declared_type_size_msl(get_presumed_input_type(type, index), false,
+	                                  has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
+// Returns the byte alignment of a type.
+uint32_t CompilerMSL::get_declared_type_alignment_msl(const SPIRType &type, bool is_packed, bool row_major) const
 {
-	auto &type = get<SPIRType>(struct_type.member_types[index]);
+	// Pointers aligns on multiples of 8 bytes
+	if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer)
+		return 8 * (type.vecsize == 3 ? 4 : type.vecsize);
 
 	switch (type.basetype)
 	{
@@ -8525,10 +16089,6 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_
 	case SPIRType::Sampler:
 		SPIRV_CROSS_THROW("Querying alignment of opaque object.");
 
-	case SPIRType::Int64:
-		SPIRV_CROSS_THROW("long types are not supported in buffers in MSL.");
-	case SPIRType::UInt64:
-		SPIRV_CROSS_THROW("ulong types are not supported in buffers in MSL.");
 	case SPIRType::Double:
 		SPIRV_CROSS_THROW("double types are not supported in buffers in MSL.");
 
@@ -8537,40 +16097,47 @@ size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_
 		// In MSL, a struct's alignment is equal to the maximum alignment of any of its members.
 		uint32_t alignment = 1;
 		for (uint32_t i = 0; i < type.member_types.size(); i++)
-			alignment = max(alignment, uint32_t(get_declared_struct_member_alignment(type, i)));
+			alignment = max(alignment, uint32_t(get_declared_struct_member_alignment_msl(type, i)));
 		return alignment;
 	}
 
 	default:
 	{
+		if (type.basetype == SPIRType::Int64 && !msl_options.supports_msl_version(2, 3))
+			SPIRV_CROSS_THROW("long types in buffers are only supported in MSL 2.3 and above.");
+		if (type.basetype == SPIRType::UInt64 && !msl_options.supports_msl_version(2, 3))
+			SPIRV_CROSS_THROW("ulong types in buffers are only supported in MSL 2.3 and above.");
 		// Alignment of packed type is the same as the underlying component or column size.
 		// Alignment of unpacked type is the same as the vector size.
 		// Alignment of 3-elements vector is the same as 4-elements (including packed using column).
-		if (member_is_packed_type(struct_type, index))
-		{
-			// This is getting pretty complicated.
-			// The special case of array of float/float2 needs to be handled here.
-			uint32_t packed_type_id =
-			    get_extended_member_decoration(struct_type.self, index, SPIRVCrossDecorationPackedType);
-			const SPIRType *packed_type = packed_type_id != 0 ? &get<SPIRType>(packed_type_id) : nullptr;
-			if (packed_type && is_array(*packed_type) && !is_matrix(*packed_type) &&
-			    packed_type->basetype != SPIRType::Struct)
-			{
-				uint32_t stride = type_struct_member_array_stride(struct_type, index);
-				if (stride == (packed_type->width / 8) * 4)
-					return stride;
-				else
-					return packed_type->width / 8;
-			}
-			else
-				return type.width / 8;
+		if (is_packed)
+		{
+			// If we have packed_T and friends, the alignment is always scalar.
+			return type.width / 8;
 		}
 		else
-			return (type.width / 8) * (type.vecsize == 3 ? 4 : type.vecsize);
+		{
+			// This is the general rule for MSL. Size == alignment.
+			uint32_t vecsize = (row_major && type.columns > 1) ? type.columns : type.vecsize;
+			return (type.width / 8) * (vecsize == 3 ? 4 : vecsize);
+		}
 	}
 	}
 }
 
+uint32_t CompilerMSL::get_declared_struct_member_alignment_msl(const SPIRType &type, uint32_t index) const
+{
+	return get_declared_type_alignment_msl(get_physical_member_type(type, index),
+	                                       member_is_packed_physical_type(type, index),
+	                                       has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
+uint32_t CompilerMSL::get_declared_input_alignment_msl(const SPIRType &type, uint32_t index) const
+{
+	return get_declared_type_alignment_msl(get_presumed_input_type(type, index), false,
+	                                       has_member_decoration(type.self, index, DecorationRowMajor));
+}
+
 bool CompilerMSL::skip_argument(uint32_t) const
 {
 	return false;
@@ -8626,6 +16193,17 @@ bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *ar
 	return true;
 }
 
+// If a needed custom function wasn't added before, add it and force a recompile.
+void CompilerMSL::add_spv_func_and_recompile(SPVFuncImpl spv_func)
+{
+	if (spv_function_implementations.count(spv_func) == 0)
+	{
+		spv_function_implementations.insert(spv_func);
+		suppress_missing_prototypes = true;
+		force_recompile();
+	}
+}
+
 bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t length)
 {
 	// Since MSL exists in a single execution scope, function prototype declarations are not
@@ -8648,14 +16226,27 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
 		suppress_missing_prototypes = true;
 		break;
 
+	case OpDemoteToHelperInvocationEXT:
+		uses_discard = true;
+		break;
+
+	// Emulate texture2D atomic operations
+	case OpImageTexelPointer:
+	{
+		auto *var = compiler.maybe_get_backing_variable(args[2]);
+		image_pointers[args[1]] = var ? var->self : ID(0);
+		break;
+	}
+
 	case OpImageWrite:
-		uses_resource_write = true;
+		uses_image_write = true;
 		break;
 
 	case OpStore:
 		check_resource_write(args[0]);
 		break;
 
+	// Emulate texture2D atomic operations
 	case OpAtomicExchange:
 	case OpAtomicCompareExchange:
 	case OpAtomicCompareExchangeWeak:
@@ -8670,20 +16261,57 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
 	case OpAtomicAnd:
 	case OpAtomicOr:
 	case OpAtomicXor:
+	{
+		uses_atomics = true;
+		auto it = image_pointers.find(args[2]);
+		if (it != image_pointers.end())
+		{
+			uses_image_write = true;
+			compiler.atomic_image_vars.insert(it->second);
+		}
+		else
+			check_resource_write(args[2]);
+		break;
+	}
+
+	case OpAtomicStore:
+	{
 		uses_atomics = true;
-		check_resource_write(args[2]);
+		auto it = image_pointers.find(args[0]);
+		if (it != image_pointers.end())
+		{
+			compiler.atomic_image_vars.insert(it->second);
+			uses_image_write = true;
+		}
+		else
+			check_resource_write(args[0]);
 		break;
+	}
 
 	case OpAtomicLoad:
+	{
 		uses_atomics = true;
+		auto it = image_pointers.find(args[2]);
+		if (it != image_pointers.end())
+		{
+			compiler.atomic_image_vars.insert(it->second);
+		}
 		break;
+	}
 
 	case OpGroupNonUniformInverseBallot:
 		needs_subgroup_invocation_id = true;
 		break;
 
+	case OpGroupNonUniformBallotFindLSB:
+	case OpGroupNonUniformBallotFindMSB:
+		needs_subgroup_size = true;
+		break;
+
 	case OpGroupNonUniformBallotBitCount:
-		if (args[3] != GroupOperationReduce)
+		if (args[3] == GroupOperationReduce)
+			needs_subgroup_size = true;
+		else
 			needs_subgroup_invocation_id = true;
 		break;
 
@@ -8703,11 +16331,66 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui
 		uint32_t result_type = args[0];
 		uint32_t id = args[1];
 		uint32_t ptr = args[2];
+
 		compiler.set<SPIRExpression>(id, "", result_type, true);
 		compiler.register_read(id, ptr, true);
 		compiler.ir.ids[id].set_allow_type_rewrite();
 		break;
-	}
+	}
+
+	case OpExtInst:
+	{
+		uint32_t extension_set = args[2];
+		if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
+		{
+			auto op_450 = static_cast<GLSLstd450>(args[3]);
+			switch (op_450)
+			{
+			case GLSLstd450InterpolateAtCentroid:
+			case GLSLstd450InterpolateAtSample:
+			case GLSLstd450InterpolateAtOffset:
+			{
+				if (!compiler.msl_options.supports_msl_version(2, 3))
+					SPIRV_CROSS_THROW("Pull-model interpolation requires MSL 2.3.");
+				// Fragment varyings used with pull-model interpolation need special handling,
+				// due to the way pull-model interpolation works in Metal.
+				auto *var = compiler.maybe_get_backing_variable(args[4]);
+				if (var)
+				{
+					compiler.pull_model_inputs.insert(var->self);
+					auto &var_type = compiler.get_variable_element_type(*var);
+					// In addition, if this variable has a 'Sample' decoration, we need the sample ID
+					// in order to do default interpolation.
+					if (compiler.has_decoration(var->self, DecorationSample))
+					{
+						needs_sample_id = true;
+					}
+					else if (var_type.basetype == SPIRType::Struct)
+					{
+						// Now we need to check each member and see if it has this decoration.
+						for (uint32_t i = 0; i < var_type.member_types.size(); ++i)
+						{
+							if (compiler.has_member_decoration(var_type.self, i, DecorationSample))
+							{
+								needs_sample_id = true;
+								break;
+							}
+						}
+					}
+				}
+				break;
+			}
+			default:
+				break;
+			}
+		}
+		break;
+	}
+
+	case OpIsHelperInvocationEXT:
+		if (compiler.needs_manual_helper_invocation_updates())
+			needs_helper_invocation = true;
+		break;
 
 	default:
 		break;
@@ -8727,7 +16410,7 @@ void CompilerMSL::OpCodePreprocessor::check_resource_write(uint32_t var_id)
 	auto *p_var = compiler.maybe_get_backing_variable(var_id);
 	StorageClass sc = p_var ? p_var->storage : StorageClassMax;
 	if (sc == StorageClassUniform || sc == StorageClassStorageBuffer)
-		uses_resource_write = true;
+		uses_buffer_write = true;
 }
 
 // Returns an enumeration of a SPIR-V function that needs to be output for certain Op codes.
@@ -8738,60 +16421,61 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 	case OpFMod:
 		return SPVFuncImplMod;
 
-	case OpFunctionCall:
-	{
-		auto &return_type = compiler.get<SPIRType>(args[0]);
-		if (return_type.array.size() > 1)
+	case OpFAdd:
+	case OpFSub:
+		if (compiler.msl_options.invariant_float_math ||
+		    compiler.has_decoration(args[1], DecorationNoContraction))
 		{
-			if (return_type.array.size() > SPVFuncImplArrayCopyMultidimMax)
-				SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays.");
-			return static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + return_type.array.size());
+			return opcode == OpFAdd ? SPVFuncImplFAdd : SPVFuncImplFSub;
 		}
-		else if (return_type.array.size() > 0)
-			return SPVFuncImplArrayCopy;
-
 		break;
-	}
-
-	case OpStore:
-	{
-		// Get the result type of the RHS. Since this is run as a pre-processing stage,
-		// we must extract the result type directly from the Instruction, rather than the ID.
-		uint32_t id_lhs = args[0];
-		uint32_t id_rhs = args[1];
 
-		const SPIRType *type = nullptr;
-		if (compiler.ir.ids[id_rhs].get_type() != TypeNone)
-		{
-			// Could be a constant, or similar.
-			type = &compiler.expression_type(id_rhs);
-		}
-		else
+	case OpFMul:
+	case OpOuterProduct:
+	case OpMatrixTimesVector:
+	case OpVectorTimesMatrix:
+	case OpMatrixTimesMatrix:
+		if (compiler.msl_options.invariant_float_math ||
+		    compiler.has_decoration(args[1], DecorationNoContraction))
 		{
-			// Or ... an expression.
-			uint32_t tid = result_types[id_rhs];
-			if (tid)
-				type = &compiler.get<SPIRType>(tid);
+			return SPVFuncImplFMul;
 		}
+		break;
 
-		auto *var = compiler.maybe_get<SPIRVariable>(id_lhs);
+	case OpQuantizeToF16:
+		return SPVFuncImplQuantizeToF16;
+
+	case OpTypeArray:
+	{
+		// Allow Metal to use the array<T> template to make arrays a value type
+		return SPVFuncImplUnsafeArray;
+	}
 
-		// Are we simply assigning to a statically assigned variable which takes a constant?
-		// Don't bother emitting this function.
-		bool static_expression_lhs =
-		    var && var->storage == StorageClassFunction && var->statically_assigned && var->remapped_variable;
-		if (type && compiler.is_array(*type) && !static_expression_lhs)
+	// Emulate texture2D atomic operations
+	case OpAtomicExchange:
+	case OpAtomicCompareExchange:
+	case OpAtomicCompareExchangeWeak:
+	case OpAtomicIIncrement:
+	case OpAtomicIDecrement:
+	case OpAtomicIAdd:
+	case OpAtomicISub:
+	case OpAtomicSMin:
+	case OpAtomicUMin:
+	case OpAtomicSMax:
+	case OpAtomicUMax:
+	case OpAtomicAnd:
+	case OpAtomicOr:
+	case OpAtomicXor:
+	case OpAtomicLoad:
+	case OpAtomicStore:
+	{
+		auto it = image_pointers.find(args[opcode == OpAtomicStore ? 0 : 2]);
+		if (it != image_pointers.end())
 		{
-			if (type->array.size() > 1)
-			{
-				if (type->array.size() > SPVFuncImplArrayCopyMultidimMax)
-					SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays.");
-				return static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type->array.size());
-			}
-			else
-				return SPVFuncImplArrayCopy;
+			uint32_t tid = compiler.get<SPIRVariable>(it->second).basetype;
+			if (tid && compiler.get<SPIRType>(tid).image.dim == Dim2D)
+				return SPVFuncImplImage2DAtomicCoords;
 		}
-
 		break;
 	}
 
@@ -8803,27 +16487,9 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 		uint32_t tid = result_types[args[opcode == OpImageWrite ? 0 : 2]];
 		if (tid && compiler.get<SPIRType>(tid).image.dim == DimBuffer && !compiler.msl_options.texture_buffer_native)
 			return SPVFuncImplTexelBufferCoords;
-
-		if (opcode == OpImageFetch && compiler.msl_options.swizzle_texture_samples)
-			return SPVFuncImplTextureSwizzle;
-
 		break;
 	}
 
-	case OpImageSampleExplicitLod:
-	case OpImageSampleProjExplicitLod:
-	case OpImageSampleDrefExplicitLod:
-	case OpImageSampleProjDrefExplicitLod:
-	case OpImageSampleImplicitLod:
-	case OpImageSampleProjImplicitLod:
-	case OpImageSampleDrefImplicitLod:
-	case OpImageSampleProjDrefImplicitLod:
-	case OpImageGather:
-	case OpImageDrefGather:
-		if (compiler.msl_options.swizzle_texture_samples)
-			return SPVFuncImplTextureSwizzle;
-		break;
-
 	case OpExtInst:
 	{
 		uint32_t extension_set = args[2];
@@ -8849,16 +16515,21 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 				auto &type = compiler.get<SPIRType>(args[0]);
 				if (type.vecsize == 1)
 					return SPVFuncImplReflectScalar;
-				else
-					return SPVFuncImplNone;
+				break;
 			}
 			case GLSLstd450Refract:
 			{
 				auto &type = compiler.get<SPIRType>(args[0]);
 				if (type.vecsize == 1)
 					return SPVFuncImplRefractScalar;
-				else
-					return SPVFuncImplNone;
+				break;
+			}
+			case GLSLstd450FaceForward:
+			{
+				auto &type = compiler.get<SPIRType>(args[0]);
+				if (type.vecsize == 1)
+					return SPVFuncImplFaceForwardScalar;
+				break;
 			}
 			case GLSLstd450MatrixInverse:
 			{
@@ -8883,6 +16554,12 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 		break;
 	}
 
+	case OpGroupNonUniformBroadcast:
+		return SPVFuncImplSubgroupBroadcast;
+
+	case OpGroupNonUniformBroadcastFirst:
+		return SPVFuncImplSubgroupBroadcastFirst;
+
 	case OpGroupNonUniformBallot:
 		return SPVFuncImplSubgroupBallot;
 
@@ -8902,6 +16579,24 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 	case OpGroupNonUniformAllEqual:
 		return SPVFuncImplSubgroupAllEqual;
 
+	case OpGroupNonUniformShuffle:
+		return SPVFuncImplSubgroupShuffle;
+
+	case OpGroupNonUniformShuffleXor:
+		return SPVFuncImplSubgroupShuffleXor;
+
+	case OpGroupNonUniformShuffleUp:
+		return SPVFuncImplSubgroupShuffleUp;
+
+	case OpGroupNonUniformShuffleDown:
+		return SPVFuncImplSubgroupShuffleDown;
+
+	case OpGroupNonUniformQuadBroadcast:
+		return SPVFuncImplQuadBroadcast;
+
+	case OpGroupNonUniformQuadSwap:
+		return SPVFuncImplQuadSwap;
+
 	default:
 		break;
 	}
@@ -8916,8 +16611,27 @@ void CompilerMSL::MemberSorter::sort()
 	// the members should be reordered, based on builtin and sorting aspect meta info.
 	size_t mbr_cnt = type.member_types.size();
 	SmallVector<uint32_t> mbr_idxs(mbr_cnt);
-	iota(mbr_idxs.begin(), mbr_idxs.end(), 0); // Fill with consecutive indices
-	std::sort(mbr_idxs.begin(), mbr_idxs.end(), *this); // Sort member indices based on sorting aspect
+	std::iota(mbr_idxs.begin(), mbr_idxs.end(), 0); // Fill with consecutive indices
+	std::stable_sort(mbr_idxs.begin(), mbr_idxs.end(), *this); // Sort member indices based on sorting aspect
+
+	bool sort_is_identity = true;
+	for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
+	{
+		if (mbr_idx != mbr_idxs[mbr_idx])
+		{
+			sort_is_identity = false;
+			break;
+		}
+	}
+
+	if (sort_is_identity)
+		return;
+
+	if (meta.members.size() < type.member_types.size())
+	{
+		// This should never trigger in normal circumstances, but to be safe.
+		meta.members.resize(type.member_types.size());
+	}
 
 	// Move type and meta member info to the order defined by the sorted member indices.
 	// This is done by creating temporary copies of both member types and meta, and then
@@ -8929,32 +16643,36 @@ void CompilerMSL::MemberSorter::sort()
 		type.member_types[mbr_idx] = mbr_types_cpy[mbr_idxs[mbr_idx]];
 		meta.members[mbr_idx] = mbr_meta_cpy[mbr_idxs[mbr_idx]];
 	}
+
+	// If we're sorting by Offset, this might affect user code which accesses a buffer block.
+	// We will need to redirect member indices from defined index to sorted index using reverse lookup.
+	if (sort_aspect == SortAspect::Offset)
+	{
+		type.member_type_index_redirection.resize(mbr_cnt);
+		for (uint32_t map_idx = 0; map_idx < mbr_cnt; map_idx++)
+			type.member_type_index_redirection[mbr_idxs[map_idx]] = map_idx;
+	}
 }
 
-// Sort first by builtin status (put builtins at end), then by the sorting aspect.
 bool CompilerMSL::MemberSorter::operator()(uint32_t mbr_idx1, uint32_t mbr_idx2)
 {
 	auto &mbr_meta1 = meta.members[mbr_idx1];
 	auto &mbr_meta2 = meta.members[mbr_idx2];
-	if (mbr_meta1.builtin != mbr_meta2.builtin)
-		return mbr_meta2.builtin;
-	else
-		switch (sort_aspect)
-		{
-		case Location:
+
+	if (sort_aspect == LocationThenBuiltInType)
+	{
+		// Sort first by builtin status (put builtins at end), then by the sorting aspect.
+		if (mbr_meta1.builtin != mbr_meta2.builtin)
+			return mbr_meta2.builtin;
+		else if (mbr_meta1.builtin)
+			return mbr_meta1.builtin_type < mbr_meta2.builtin_type;
+		else if (mbr_meta1.location == mbr_meta2.location)
+			return mbr_meta1.component < mbr_meta2.component;
+		else
 			return mbr_meta1.location < mbr_meta2.location;
-		case LocationReverse:
-			return mbr_meta1.location > mbr_meta2.location;
-		case Offset:
-			return mbr_meta1.offset < mbr_meta2.offset;
-		case OffsetThenLocationReverse:
-			return (mbr_meta1.offset < mbr_meta2.offset) ||
-			       ((mbr_meta1.offset == mbr_meta2.offset) && (mbr_meta1.location > mbr_meta2.location));
-		case Alphabetical:
-			return mbr_meta1.alias < mbr_meta2.alias;
-		default:
-			return false;
-		}
+	}
+	else
+		return mbr_meta1.offset < mbr_meta2.offset;
 }
 
 CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa)
@@ -8966,7 +16684,7 @@ CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa)
 	meta.members.resize(max(type.member_types.size(), meta.members.size()));
 }
 
-void CompilerMSL::remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler)
+void CompilerMSL::remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler)
 {
 	auto &type = get<SPIRType>(get<SPIRVariable>(id).basetype);
 	if (type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler)
@@ -8982,18 +16700,60 @@ void CompilerMSL::remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t
 	constexpr_samplers_by_binding[{ desc_set, binding }] = sampler;
 }
 
-void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
+void CompilerMSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
 {
+	bool is_packed = has_extended_decoration(source_id, SPIRVCrossDecorationPhysicalTypePacked);
+	auto *source_expr = maybe_get<SPIRExpression>(source_id);
 	auto *var = maybe_get_backing_variable(source_id);
+	const SPIRType *var_type, *phys_type;
+	if (uint32_t phys_id = get_extended_decoration(source_id, SPIRVCrossDecorationPhysicalTypeID))
+		phys_type = &get<SPIRType>(phys_id);
+	else
+		phys_type = &expr_type;
 	if (var)
+	{
 		source_id = var->self;
+		var_type = &get_variable_data_type(*var);
+	}
 
-	// Only interested in standalone builtin variables.
+	// Type fixups for workgroup variables if they are booleans.
+	if (var && (var->storage == StorageClassWorkgroup || var_type->basetype == SPIRType::Struct) &&
+	    expr_type.basetype == SPIRType::Boolean)
+		expr = join(type_to_glsl(expr_type), "(", expr, ")");
+	// Type fixups for workgroup variables if they are matrices.
+	// Don't do fixup for packed types; those are handled specially.
+	// FIXME: Maybe use a type like spvStorageMatrix for packed matrices?
+	if (!msl_options.supports_msl_version(3, 0) && var &&
+	    (var->storage == StorageClassWorkgroup ||
+	     (var_type->basetype == SPIRType::Struct &&
+	      has_extended_decoration(var_type->self, SPIRVCrossDecorationWorkgroupStruct) && !is_packed)) &&
+	    expr_type.columns > 1)
+	{
+		SPIRType matrix_type = *phys_type;
+		if (source_expr && source_expr->need_transpose)
+			swap(matrix_type.vecsize, matrix_type.columns);
+		matrix_type.array.clear();
+		matrix_type.array_size_literal.clear();
+		expr = join(type_to_glsl(matrix_type), "(", expr, ")");
+	}
+
+	// Only interested in standalone builtin variables in the switch below.
 	if (!has_decoration(source_id, DecorationBuiltIn))
+	{
+		// If the backing variable does not match our expected sign, we can fix it up here.
+		// See ensure_correct_input_type().
+		if (var && var->storage == StorageClassInput)
+		{
+			auto &base_type = get<SPIRType>(var->basetype);
+			if (base_type.basetype != SPIRType::Struct && expr_type.basetype != base_type.basetype)
+				expr = join(type_to_glsl(expr_type), "(", expr, ")");
+		}
 		return;
+	}
 
 	auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
 	auto expected_type = expr_type.basetype;
+	auto expected_width = expr_type.width;
 	switch (builtin)
 	{
 	case BuiltInGlobalInvocationId:
@@ -9009,13 +16769,21 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp
 	case BuiltInSubgroupSize:
 	case BuiltInSubgroupLocalInvocationId:
 	case BuiltInViewIndex:
+	case BuiltInVertexIndex:
+	case BuiltInInstanceIndex:
+	case BuiltInBaseInstance:
+	case BuiltInBaseVertex:
 		expected_type = SPIRType::UInt;
+		expected_width = 32;
 		break;
 
 	case BuiltInTessLevelInner:
 	case BuiltInTessLevelOuter:
-		if (get_execution_model() == ExecutionModelTessellationControl)
+		if (is_tesc_shader())
+		{
 			expected_type = SPIRType::Half;
+			expected_width = 16;
+		}
 		break;
 
 	default:
@@ -9023,21 +16791,79 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp
 	}
 
 	if (expected_type != expr_type.basetype)
-		expr = bitcast_expression(expr_type, expected_type, expr);
-
-	if (builtin == BuiltInTessCoord && get_entry_point().flags.get(ExecutionModeQuads) && expr_type.vecsize == 3)
 	{
-		// In SPIR-V, this is always a vec3, even for quads. In Metal, though, it's a float2 for quads.
-		// The code is expecting a float3, so we need to widen this.
-		expr = join("float3(", expr, ", 0)");
+		if (!expr_type.array.empty() && (builtin == BuiltInTessLevelInner || builtin == BuiltInTessLevelOuter))
+		{
+			// Triggers when loading TessLevel directly as an array.
+			// Need explicit padding + cast.
+			auto wrap_expr = join(type_to_glsl(expr_type), "({ ");
+
+			uint32_t array_size = get_physical_tess_level_array_size(builtin);
+			for (uint32_t i = 0; i < array_size; i++)
+			{
+				if (array_size > 1)
+					wrap_expr += join("float(", expr, "[", i, "])");
+				else
+					wrap_expr += join("float(", expr, ")");
+				if (i + 1 < array_size)
+					wrap_expr += ", ";
+			}
+
+			if (is_tessellating_triangles())
+				wrap_expr += ", 0.0";
+
+			wrap_expr += " })";
+			expr = std::move(wrap_expr);
+		}
+		else
+		{
+			// These are of different widths, so we cannot do a straight bitcast.
+			if (expected_width != expr_type.width)
+				expr = join(type_to_glsl(expr_type), "(", expr, ")");
+			else
+				expr = bitcast_expression(expr_type, expected_type, expr);
+		}
 	}
 }
 
-void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
+void CompilerMSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
 {
+	bool is_packed = has_extended_decoration(target_id, SPIRVCrossDecorationPhysicalTypePacked);
+	auto *target_expr = maybe_get<SPIRExpression>(target_id);
 	auto *var = maybe_get_backing_variable(target_id);
+	const SPIRType *var_type, *phys_type;
+	if (uint32_t phys_id = get_extended_decoration(target_id, SPIRVCrossDecorationPhysicalTypeID))
+		phys_type = &get<SPIRType>(phys_id);
+	else
+		phys_type = &expr_type;
 	if (var)
+	{
 		target_id = var->self;
+		var_type = &get_variable_data_type(*var);
+	}
+
+	// Type fixups for workgroup variables if they are booleans.
+	if (var && (var->storage == StorageClassWorkgroup || var_type->basetype == SPIRType::Struct) &&
+	    expr_type.basetype == SPIRType::Boolean)
+	{
+		auto short_type = expr_type;
+		short_type.basetype = SPIRType::Short;
+		expr = join(type_to_glsl(short_type), "(", expr, ")");
+	}
+	// Type fixups for workgroup variables if they are matrices.
+	// Don't do fixup for packed types; those are handled specially.
+	// FIXME: Maybe use a type like spvStorageMatrix for packed matrices?
+	if (!msl_options.supports_msl_version(3, 0) && var &&
+	    (var->storage == StorageClassWorkgroup ||
+	     (var_type->basetype == SPIRType::Struct &&
+	      has_extended_decoration(var_type->self, SPIRVCrossDecorationWorkgroupStruct) && !is_packed)) &&
+	    expr_type.columns > 1)
+	{
+		SPIRType matrix_type = *phys_type;
+		if (target_expr && target_expr->need_transpose)
+			swap(matrix_type.vecsize, matrix_type.columns);
+		expr = join("spvStorage_", type_to_glsl(matrix_type), "(", expr, ")");
+	}
 
 	// Only interested in standalone builtin variables.
 	if (!has_decoration(target_id, DecorationBuiltIn))
@@ -9045,6 +16871,7 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr
 
 	auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
 	auto expected_type = expr_type.basetype;
+	auto expected_width = expr_type.width;
 	switch (builtin)
 	{
 	case BuiltInLayer:
@@ -9053,11 +16880,13 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr
 	case BuiltInPrimitiveId:
 	case BuiltInViewIndex:
 		expected_type = SPIRType::UInt;
+		expected_width = 32;
 		break;
 
 	case BuiltInTessLevelInner:
 	case BuiltInTessLevelOuter:
 		expected_type = SPIRType::Half;
+		expected_width = 16;
 		break;
 
 	default:
@@ -9066,10 +16895,13 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr
 
 	if (expected_type != expr_type.basetype)
 	{
-		if (expected_type == SPIRType::Half && expr_type.basetype == SPIRType::Float)
+		if (expected_width != expr_type.width)
 		{
 			// These are of different widths, so we cannot do a straight bitcast.
-			expr = join("half(", expr, ")");
+			auto type = expr_type;
+			type.basetype = expected_type;
+			type.width = expected_width;
+			expr = join(type_to_glsl(type), "(", expr, ")");
 		}
 		else
 		{
@@ -9080,17 +16912,29 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr
 	}
 }
 
-std::string CompilerMSL::to_initializer_expression(const SPIRVariable &var)
+string CompilerMSL::to_initializer_expression(const SPIRVariable &var)
 {
 	// We risk getting an array initializer here with MSL. If we have an array.
 	// FIXME: We cannot handle non-constant arrays being initialized.
 	// We will need to inject spvArrayCopy here somehow ...
 	auto &type = get<SPIRType>(var.basetype);
+	string expr;
 	if (ir.ids[var.initializer].get_type() == TypeConstant &&
 	    (!type.array.empty() || type.basetype == SPIRType::Struct))
-		return constant_expression(get<SPIRConstant>(var.initializer));
+		expr = constant_expression(get<SPIRConstant>(var.initializer));
 	else
-		return CompilerGLSL::to_initializer_expression(var);
+		expr = CompilerGLSL::to_initializer_expression(var);
+	// If the initializer has more vector components than the variable, add a swizzle.
+	// FIXME: This can't handle arrays or structs.
+	auto &init_type = expression_type(var.initializer);
+	if (type.array.empty() && type.basetype != SPIRType::Struct && init_type.vecsize > type.vecsize)
+		expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
+	return expr;
+}
+
+string CompilerMSL::to_zero_initialized_expression(uint32_t)
+{
+	return "{}";
 }
 
 bool CompilerMSL::descriptor_set_is_argument_buffer(uint32_t desc_set) const
@@ -9103,6 +16947,18 @@ bool CompilerMSL::descriptor_set_is_argument_buffer(uint32_t desc_set) const
 	return (argument_buffer_discrete_mask & (1u << desc_set)) == 0;
 }
 
+bool CompilerMSL::is_supported_argument_buffer_type(const SPIRType &type) const
+{
+	// iOS Tier 1 argument buffers do not support writable images.
+	// When the argument buffer is encoded, we don't know whether this image will have a
+	// NonWritable decoration, so just use discrete arguments for all storage images on iOS.
+	bool is_supported_type = !(type.basetype == SPIRType::Image &&
+							   type.image.sampled == 2 &&
+							   msl_options.is_ios() &&
+							   msl_options.argument_buffers_tier <= Options::ArgumentBuffersTier::Tier1);
+	return is_supported_type && !type_is_msl_framebuffer_fetch(type);
+}
+
 void CompilerMSL::analyze_argument_buffers()
 {
 	// Gather all used resources and sort them out into argument buffers.
@@ -9119,11 +16975,14 @@ void CompilerMSL::analyze_argument_buffers()
 	struct Resource
 	{
 		SPIRVariable *var;
+		SPIRVariable *descriptor_alias;
 		string name;
 		SPIRType::BaseType basetype;
 		uint32_t index;
+		uint32_t plane;
 	};
 	SmallVector<Resource> resources_in_set[kMaxArgumentBuffers];
+	SmallVector<uint32_t> inline_block_vars;
 
 	bool set_needs_swizzle_buffer[kMaxArgumentBuffers] = {};
 	bool set_needs_buffer_sizes[kMaxArgumentBuffers] = {};
@@ -9156,33 +17015,85 @@ void CompilerMSL::analyze_argument_buffers()
 				}
 			}
 
+			// Handle descriptor aliasing as well as we can.
+			// We can handle aliasing of buffers by casting pointers, but not for typed resources.
+			// Inline UBOs cannot be handled since it's not a pointer, but inline data.
+			SPIRVariable *descriptor_alias = nullptr;
+			if (var.storage == StorageClassUniform || var.storage == StorageClassStorageBuffer)
+			{
+				for (auto &resource : resources_in_set[desc_set])
+				{
+					if (get_decoration(resource.var->self, DecorationBinding) ==
+					    get_decoration(var_id, DecorationBinding) &&
+					    resource.basetype == SPIRType::Struct && type.basetype == SPIRType::Struct &&
+					    (resource.var->storage == StorageClassUniform ||
+					     resource.var->storage == StorageClassStorageBuffer))
+					{
+						descriptor_alias = resource.var;
+						// Self-reference marks that we should declare the resource,
+						// and it's being used as an alias (so we can emit void* instead).
+						resource.descriptor_alias = resource.var;
+						// Need to promote interlocked usage so that the primary declaration is correct.
+						if (interlocked_resources.count(var_id))
+							interlocked_resources.insert(resource.var->self);
+						break;
+					}
+				}
+			}
+
+			uint32_t binding = get_decoration(var_id, DecorationBinding);
 			if (type.basetype == SPIRType::SampledImage)
 			{
 				add_resource_name(var_id);
 
-				uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image);
-				uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler);
+				uint32_t plane_count = 1;
+				if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable)
+					plane_count = constexpr_sampler->planes;
 
-				resources_in_set[desc_set].push_back({ &var, to_name(var_id), SPIRType::Image, image_resource_index });
+				for (uint32_t i = 0; i < plane_count; i++)
+				{
+					uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image, i);
+					resources_in_set[desc_set].push_back(
+					    { &var, descriptor_alias, to_name(var_id), SPIRType::Image, image_resource_index, i });
+				}
 
 				if (type.image.dim != DimBuffer && !constexpr_sampler)
 				{
+					uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler);
 					resources_in_set[desc_set].push_back(
-					    { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index });
+					    { &var, descriptor_alias, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index, 0 });
 				}
 			}
-			else if (!constexpr_sampler)
+			else if (inline_uniform_blocks.count(SetBindingPair{ desc_set, binding }))
+			{
+				inline_block_vars.push_back(var_id);
+			}
+			else if (!constexpr_sampler && is_supported_argument_buffer_type(type))
 			{
 				// constexpr samplers are not declared as resources.
+				// Inline uniform blocks are always emitted at the end.
 				add_resource_name(var_id);
+
+				uint32_t resource_index = ~0u;
+				if (!descriptor_alias)
+					resource_index = get_metal_resource_index(var, type.basetype);
+
 				resources_in_set[desc_set].push_back(
-				    { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) });
+					{ &var, descriptor_alias, to_name(var_id), type.basetype, resource_index, 0 });
+
+				// Emulate texture2D atomic operations
+				if (atomic_image_vars.count(var.self))
+				{
+					uint32_t buffer_resource_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0);
+					resources_in_set[desc_set].push_back(
+						{ &var, descriptor_alias, to_name(var_id) + "_atomic", SPIRType::Struct, buffer_resource_index, 0 });
+				}
 			}
 
 			// Check if this descriptor set needs a swizzle buffer.
 			if (needs_swizzle_buffer_def && is_sampled_image_type(type))
 				set_needs_swizzle_buffer[desc_set] = true;
-			else if (buffers_requiring_array_length.count(var_id) != 0)
+			else if (buffer_requires_array_length(var_id))
 			{
 				set_needs_buffer_sizes[desc_set] = true;
 				needs_buffer_sizes = true;
@@ -9202,20 +17113,13 @@ void CompilerMSL::analyze_argument_buffers()
 
 			if (uint_ptr_type_id == 0)
 			{
-				uint32_t offset = ir.increase_bound_by(2);
-				uint32_t type_id = offset;
-				uint_ptr_type_id = offset + 1;
+				uint_ptr_type_id = ir.increase_bound_by(1);
 
 				// Create a buffer to hold extra data, including the swizzle constants.
-				SPIRType uint_type;
-				uint_type.basetype = SPIRType::UInt;
-				uint_type.width = 32;
-				set<SPIRType>(type_id, uint_type);
-
-				SPIRType uint_type_pointer = uint_type;
+				SPIRType uint_type_pointer = get_uint_type();
 				uint_type_pointer.pointer = true;
-				uint_type_pointer.pointer_depth = 1;
-				uint_type_pointer.parent_type = type_id;
+				uint_type_pointer.pointer_depth++;
+				uint_type_pointer.parent_type = get_uint_type_id();
 				uint_type_pointer.storage = StorageClassUniform;
 				set<SPIRType>(uint_ptr_type_id, uint_type_pointer);
 				set_decoration(uint_ptr_type_id, DecorationArrayStride, 4);
@@ -9229,7 +17133,7 @@ void CompilerMSL::analyze_argument_buffers()
 				set_decoration(var_id, DecorationDescriptorSet, desc_set);
 				set_decoration(var_id, DecorationBinding, kSwizzleBufferBinding);
 				resources_in_set[desc_set].push_back(
-				    { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt) });
+				    { &var, nullptr, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 });
 			}
 
 			if (set_needs_buffer_sizes[desc_set])
@@ -9240,11 +17144,21 @@ void CompilerMSL::analyze_argument_buffers()
 				set_decoration(var_id, DecorationDescriptorSet, desc_set);
 				set_decoration(var_id, DecorationBinding, kBufferSizeBufferBinding);
 				resources_in_set[desc_set].push_back(
-				    { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt) });
+				    { &var, nullptr, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 });
 			}
 		}
 	}
 
+	// Now add inline uniform blocks.
+	for (uint32_t var_id : inline_block_vars)
+	{
+		auto &var = get<SPIRVariable>(var_id);
+		uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet);
+		add_resource_name(var_id);
+		resources_in_set[desc_set].push_back(
+		    { &var, nullptr, to_name(var_id), SPIRType::Struct, get_metal_resource_index(var, SPIRType::Struct), 0 });
+	}
+
 	for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++)
 	{
 		auto &resources = resources_in_set[desc_set];
@@ -9259,14 +17173,26 @@ void CompilerMSL::analyze_argument_buffers()
 		argument_buffer_ids[desc_set] = next_id;
 
 		auto &buffer_type = set<SPIRType>(type_id);
-		buffer_type.storage = StorageClassUniform;
+
 		buffer_type.basetype = SPIRType::Struct;
+
+		if ((argument_buffer_device_storage_mask & (1u << desc_set)) != 0)
+		{
+			buffer_type.storage = StorageClassStorageBuffer;
+			// Make sure the argument buffer gets marked as const device.
+			set_decoration(next_id, DecorationNonWritable);
+			// Need to mark the type as a Block to enable this.
+			set_decoration(type_id, DecorationBlock);
+		}
+		else
+			buffer_type.storage = StorageClassUniform;
+
 		set_name(type_id, join("spvDescriptorSetBuffer", desc_set));
 
 		auto &ptr_type = set<SPIRType>(ptr_type_id);
 		ptr_type = buffer_type;
 		ptr_type.pointer = true;
-		ptr_type.pointer_depth = 1;
+		ptr_type.pointer_depth++;
 		ptr_type.parent_type = type_id;
 
 		uint32_t buffer_variable_id = next_id;
@@ -9274,16 +17200,71 @@ void CompilerMSL::analyze_argument_buffers()
 		set_name(buffer_variable_id, join("spvDescriptorSet", desc_set));
 
 		// Ids must be emitted in ID order.
-		sort(begin(resources), end(resources), [&](const Resource &lhs, const Resource &rhs) -> bool {
+		stable_sort(begin(resources), end(resources), [&](const Resource &lhs, const Resource &rhs) -> bool {
 			return tie(lhs.index, lhs.basetype) < tie(rhs.index, rhs.basetype);
 		});
 
 		uint32_t member_index = 0;
+		uint32_t next_arg_buff_index = 0;
 		for (auto &resource : resources)
 		{
 			auto &var = *resource.var;
 			auto &type = get_variable_data_type(var);
+
+			// If needed, synthesize and add padding members.
+			// member_index and next_arg_buff_index are incremented when padding members are added.
+			if (msl_options.pad_argument_buffer_resources)
+			{
+				while (resource.index > next_arg_buff_index)
+				{
+					auto &rez_bind = get_argument_buffer_resource(desc_set, next_arg_buff_index);
+					switch (rez_bind.basetype)
+					{
+					case SPIRType::Void:
+					case SPIRType::Boolean:
+					case SPIRType::SByte:
+					case SPIRType::UByte:
+					case SPIRType::Short:
+					case SPIRType::UShort:
+					case SPIRType::Int:
+					case SPIRType::UInt:
+					case SPIRType::Int64:
+					case SPIRType::UInt64:
+					case SPIRType::AtomicCounter:
+					case SPIRType::Half:
+					case SPIRType::Float:
+					case SPIRType::Double:
+						add_argument_buffer_padding_buffer_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
+						break;
+					case SPIRType::Image:
+						add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
+						break;
+					case SPIRType::Sampler:
+						add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
+						break;
+					case SPIRType::SampledImage:
+						if (next_arg_buff_index == rez_bind.msl_sampler)
+							add_argument_buffer_padding_sampler_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
+						else
+							add_argument_buffer_padding_image_type(buffer_type, member_index, next_arg_buff_index, rez_bind);
+						break;
+					default:
+						break;
+					}
+				}
+
+				// Adjust the number of slots consumed by current member itself.
+				// If actual member is an array, allow runtime array resolution as well.
+				uint32_t elem_cnt = type.array.empty() ? 1 : to_array_size_literal(type);
+				if (elem_cnt == 0)
+					elem_cnt = get_resource_array_size(var.self);
+
+				next_arg_buff_index += elem_cnt;
+			}
+
 			string mbr_name = ensure_valid_name(resource.name, "m");
+			if (resource.plane > 0)
+				mbr_name += join(plane_name_suffix, resource.plane);
 			set_member_name(buffer_type.self, member_index, mbr_name);
 
 			if (resource.basetype == SPIRType::Sampler && type.basetype != SPIRType::Sampler)
@@ -9311,18 +17292,68 @@ void CompilerMSL::analyze_argument_buffers()
 			}
 			else
 			{
+				uint32_t binding = get_decoration(var.self, DecorationBinding);
+				SetBindingPair pair = { desc_set, binding };
+
 				if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler ||
 				    resource.basetype == SPIRType::SampledImage)
 				{
 					// Drop pointer information when we emit the resources into a struct.
 					buffer_type.member_types.push_back(get_variable_data_type_id(var));
+					if (resource.plane == 0)
+						set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
+				}
+				else if (buffers_requiring_dynamic_offset.count(pair))
+				{
+					if (resource.descriptor_alias)
+						SPIRV_CROSS_THROW("Descriptor aliasing is currently not supported with dynamic offsets.");
+
+					// Don't set the qualified name here; we'll define a variable holding the corrected buffer address later.
+					buffer_type.member_types.push_back(var.basetype);
+					buffers_requiring_dynamic_offset[pair].second = var.self;
+				}
+				else if (inline_uniform_blocks.count(pair))
+				{
+					if (resource.descriptor_alias)
+						SPIRV_CROSS_THROW("Descriptor aliasing is currently not supported with inline UBOs.");
+
+					// Put the buffer block itself into the argument buffer.
+					buffer_type.member_types.push_back(get_variable_data_type_id(var));
 					set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
 				}
+				else if (atomic_image_vars.count(var.self))
+				{
+					// Emulate texture2D atomic operations.
+					// Don't set the qualified name: it's already set for this variable,
+					// and the code that references the buffer manually appends "_atomic"
+					// to the name.
+					uint32_t offset = ir.increase_bound_by(2);
+					uint32_t atomic_type_id = offset;
+					uint32_t type_ptr_id = offset + 1;
+
+					SPIRType atomic_type;
+					atomic_type.basetype = SPIRType::AtomicCounter;
+					atomic_type.width = 32;
+					atomic_type.vecsize = 1;
+					set<SPIRType>(atomic_type_id, atomic_type);
+
+					atomic_type.pointer = true;
+					atomic_type.pointer_depth++;
+					atomic_type.parent_type = atomic_type_id;
+					atomic_type.storage = StorageClassStorageBuffer;
+					auto &atomic_ptr_type = set<SPIRType>(type_ptr_id, atomic_type);
+					atomic_ptr_type.self = atomic_type_id;
+
+					buffer_type.member_types.push_back(type_ptr_id);
+				}
 				else
 				{
-					// Resources will be declared as pointers not references, so automatically dereference as appropriate.
-					buffer_type.member_types.push_back(var.basetype);
-					if (type.array.empty())
+					if (!resource.descriptor_alias || resource.descriptor_alias == resource.var)
+						buffer_type.member_types.push_back(var.basetype);
+
+					if (resource.descriptor_alias && resource.descriptor_alias != resource.var)
+						buffer_aliases_argument.push_back({ var.self, resource.descriptor_alias->self });
+					else if (type.array.empty())
 						set_qualified_name(var.self, join("(*", to_name(buffer_variable_id), ".", mbr_name, ")"));
 					else
 						set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
@@ -9338,29 +17369,174 @@ void CompilerMSL::analyze_argument_buffers()
 	}
 }
 
-bool CompilerMSL::SetBindingPair::operator==(const SetBindingPair &other) const
+// Return the resource type of the app-provided resources for the descriptor set,
+// that matches the resource index of the argument buffer index.
+// This is a two-step lookup, first lookup the resource binding number from the argument buffer index,
+// then lookup the resource binding using the binding number.
+MSLResourceBinding &CompilerMSL::get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx)
+{
+	auto stage = get_entry_point().model;
+	StageSetBinding arg_idx_tuple = { stage, desc_set, arg_idx };
+	auto arg_itr = resource_arg_buff_idx_to_binding_number.find(arg_idx_tuple);
+	if (arg_itr != end(resource_arg_buff_idx_to_binding_number))
+	{
+		StageSetBinding bind_tuple = { stage, desc_set, arg_itr->second };
+		auto bind_itr = resource_bindings.find(bind_tuple);
+		if (bind_itr != end(resource_bindings))
+			return bind_itr->second.first;
+	}
+	SPIRV_CROSS_THROW("Argument buffer resource base type could not be determined. When padding argument buffer "
+	                  "elements, all descriptor set resources must be supplied with a base type by the app.");
+}
+
+// Adds an argument buffer padding argument buffer type as one or more members of the struct type at the member index.
+// Metal does not support arrays of buffers, so these are emitted as multiple struct members.
+void CompilerMSL::add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx,
+                                                          uint32_t &arg_buff_index, MSLResourceBinding &rez_bind)
+{
+	if (!argument_buffer_padding_buffer_type_id)
+	{
+		uint32_t buff_type_id = ir.increase_bound_by(2);
+		auto &buff_type = set<SPIRType>(buff_type_id);
+		buff_type.basetype = rez_bind.basetype;
+		buff_type.storage = StorageClassUniformConstant;
+
+		uint32_t ptr_type_id = buff_type_id + 1;
+		auto &ptr_type = set<SPIRType>(ptr_type_id);
+		ptr_type = buff_type;
+		ptr_type.pointer = true;
+		ptr_type.pointer_depth++;
+		ptr_type.parent_type = buff_type_id;
+
+		argument_buffer_padding_buffer_type_id = ptr_type_id;
+	}
+
+	for (uint32_t rez_idx = 0; rez_idx < rez_bind.count; rez_idx++)
+		add_argument_buffer_padding_type(argument_buffer_padding_buffer_type_id, struct_type, mbr_idx, arg_buff_index, 1);
+}
+
+// Adds an argument buffer padding argument image type as a member of the struct type at the member index.
+void CompilerMSL::add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx,
+                                                         uint32_t &arg_buff_index, MSLResourceBinding &rez_bind)
+{
+	if (!argument_buffer_padding_image_type_id)
+	{
+		uint32_t base_type_id = ir.increase_bound_by(2);
+		auto &base_type = set<SPIRType>(base_type_id);
+		base_type.basetype = SPIRType::Float;
+		base_type.width = 32;
+
+		uint32_t img_type_id = base_type_id + 1;
+		auto &img_type = set<SPIRType>(img_type_id);
+		img_type.basetype = SPIRType::Image;
+		img_type.storage = StorageClassUniformConstant;
+
+		img_type.image.type = base_type_id;
+		img_type.image.dim = Dim2D;
+		img_type.image.depth = false;
+		img_type.image.arrayed = false;
+		img_type.image.ms = false;
+		img_type.image.sampled = 1;
+		img_type.image.format = ImageFormatUnknown;
+		img_type.image.access = AccessQualifierMax;
+
+		argument_buffer_padding_image_type_id = img_type_id;
+	}
+
+	add_argument_buffer_padding_type(argument_buffer_padding_image_type_id, struct_type, mbr_idx, arg_buff_index, rez_bind.count);
+}
+
+// Adds an argument buffer padding argument sampler type as a member of the struct type at the member index.
+void CompilerMSL::add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx,
+                                                           uint32_t &arg_buff_index, MSLResourceBinding &rez_bind)
+{
+	if (!argument_buffer_padding_sampler_type_id)
+	{
+		uint32_t samp_type_id = ir.increase_bound_by(1);
+		auto &samp_type = set<SPIRType>(samp_type_id);
+		samp_type.basetype = SPIRType::Sampler;
+		samp_type.storage = StorageClassUniformConstant;
+
+		argument_buffer_padding_sampler_type_id = samp_type_id;
+	}
+
+	add_argument_buffer_padding_type(argument_buffer_padding_sampler_type_id, struct_type, mbr_idx, arg_buff_index, rez_bind.count);
+}
+
+// Adds the argument buffer padding argument type as a member of the struct type at the member index.
+// Advances both arg_buff_index and mbr_idx to next argument slots.
+void CompilerMSL::add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx,
+                                                   uint32_t &arg_buff_index, uint32_t count)
+{
+	uint32_t type_id = mbr_type_id;
+	if (count > 1)
+	{
+		uint32_t ary_type_id = ir.increase_bound_by(1);
+		auto &ary_type = set<SPIRType>(ary_type_id);
+		ary_type = get<SPIRType>(type_id);
+		ary_type.array.push_back(count);
+		ary_type.array_size_literal.push_back(true);
+		ary_type.parent_type = type_id;
+		type_id = ary_type_id;
+	}
+
+	set_member_name(struct_type.self, mbr_idx, join("_m", arg_buff_index, "_pad"));
+	set_extended_member_decoration(struct_type.self, mbr_idx, SPIRVCrossDecorationResourceIndexPrimary, arg_buff_index);
+	struct_type.member_types.push_back(type_id);
+
+	arg_buff_index += count;
+	mbr_idx++;
+}
+
+void CompilerMSL::activate_argument_buffer_resources()
+{
+	// For ABI compatibility, force-enable all resources which are part of argument buffers.
+	ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, const SPIRVariable &) {
+		if (!has_decoration(self, DecorationDescriptorSet))
+			return;
+
+		uint32_t desc_set = get_decoration(self, DecorationDescriptorSet);
+		if (descriptor_set_is_argument_buffer(desc_set))
+			add_active_interface_variable(self);
+	});
+}
+
+bool CompilerMSL::using_builtin_array() const
+{
+	return msl_options.force_native_arrays || is_using_builtin_array;
+}
+
+void CompilerMSL::set_combined_sampler_suffix(const char *suffix)
 {
-	return desc_set == other.desc_set && binding == other.binding;
+	sampler_name_suffix = suffix;
 }
 
-bool CompilerMSL::StageSetBinding::operator==(const StageSetBinding &other) const
+const char *CompilerMSL::get_combined_sampler_suffix() const
 {
-	return model == other.model && desc_set == other.desc_set && binding == other.binding;
+	return sampler_name_suffix.c_str();
 }
 
-size_t CompilerMSL::InternalHasher::operator()(const SetBindingPair &value) const
+void CompilerMSL::emit_block_hints(const SPIRBlock &)
 {
-	// Quality of hash doesn't really matter here.
-	auto hash_set = std::hash<uint32_t>()(value.desc_set);
-	auto hash_binding = std::hash<uint32_t>()(value.binding);
-	return (hash_set * 0x10001b31) ^ hash_binding;
 }
 
-size_t CompilerMSL::InternalHasher::operator()(const StageSetBinding &value) const
+string CompilerMSL::additional_fixed_sample_mask_str() const
 {
-	// Quality of hash doesn't really matter here.
-	auto hash_model = std::hash<uint32_t>()(value.model);
-	auto hash_set = std::hash<uint32_t>()(value.desc_set);
-	auto tmp_hash = (hash_model * 0x10001b31) ^ hash_set;
-	return (tmp_hash * 0x10001b31) ^ value.binding;
+	char print_buffer[32];
+#ifdef _MSC_VER
+	// snprintf does not exist or is buggy on older MSVC versions, some of
+	// them being used by MinGW. Use sprintf instead and disable
+	// corresponding warning.
+#pragma warning(push)
+#pragma warning(disable : 4996)
+#endif
+#if _WIN32
+	sprintf(print_buffer, "0x%x", msl_options.additional_fixed_sample_mask);
+#else
+	snprintf(print_buffer, sizeof(print_buffer), "0x%x", msl_options.additional_fixed_sample_mask);
+#endif
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+	return print_buffer;
 }
diff --git a/spirv_msl.hpp b/spirv_msl.hpp
index 9f2bab49d4a..737575d49b8 100644
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2016-2019 The Brenwill Workshop Ltd.
+ * Copyright 2016-2021 The Brenwill Workshop Ltd.
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_CROSS_MSL_HPP
 #define SPIRV_CROSS_MSL_HPP
 
@@ -27,43 +34,81 @@
 namespace SPIRV_CROSS_NAMESPACE
 {
 
-// Indicates the format of the vertex attribute. Currently limited to specifying
-// if the attribute is an 8-bit unsigned integer, 16-bit unsigned integer, or
+// Indicates the format of a shader interface variable. Currently limited to specifying
+// if the input is an 8-bit unsigned integer, 16-bit unsigned integer, or
 // some other format.
-enum MSLVertexFormat
+enum MSLShaderVariableFormat
+{
+	MSL_SHADER_VARIABLE_FORMAT_OTHER = 0,
+	MSL_SHADER_VARIABLE_FORMAT_UINT8 = 1,
+	MSL_SHADER_VARIABLE_FORMAT_UINT16 = 2,
+	MSL_SHADER_VARIABLE_FORMAT_ANY16 = 3,
+	MSL_SHADER_VARIABLE_FORMAT_ANY32 = 4,
+
+	// Deprecated aliases.
+	MSL_VERTEX_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER,
+	MSL_VERTEX_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8,
+	MSL_VERTEX_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16,
+	MSL_SHADER_INPUT_FORMAT_OTHER = MSL_SHADER_VARIABLE_FORMAT_OTHER,
+	MSL_SHADER_INPUT_FORMAT_UINT8 = MSL_SHADER_VARIABLE_FORMAT_UINT8,
+	MSL_SHADER_INPUT_FORMAT_UINT16 = MSL_SHADER_VARIABLE_FORMAT_UINT16,
+	MSL_SHADER_INPUT_FORMAT_ANY16 = MSL_SHADER_VARIABLE_FORMAT_ANY16,
+	MSL_SHADER_INPUT_FORMAT_ANY32 = MSL_SHADER_VARIABLE_FORMAT_ANY32,
+
+	MSL_SHADER_VARIABLE_FORMAT_INT_MAX = 0x7fffffff
+};
+
+// Indicates the rate at which a variable changes value, one of: per-vertex,
+// per-primitive, or per-patch.
+enum MSLShaderVariableRate
 {
-	MSL_VERTEX_FORMAT_OTHER = 0,
-	MSL_VERTEX_FORMAT_UINT8 = 1,
-	MSL_VERTEX_FORMAT_UINT16 = 2,
-	MSL_VERTEX_FORMAT_INT_MAX = 0x7fffffff
+	MSL_SHADER_VARIABLE_RATE_PER_VERTEX = 0,
+	MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE = 1,
+	MSL_SHADER_VARIABLE_RATE_PER_PATCH = 2,
+
+	MSL_SHADER_VARIABLE_RATE_INT_MAX = 0x7fffffff,
 };
 
-// Defines MSL characteristics of a vertex attribute at a particular location.
+// Defines MSL characteristics of a shader interface variable at a particular location.
 // After compilation, it is possible to query whether or not this location was used.
-struct MSLVertexAttr
+// If vecsize is nonzero, it must be greater than or equal to the vecsize declared in the shader,
+// or behavior is undefined.
+struct MSLShaderInterfaceVariable
 {
 	uint32_t location = 0;
-	uint32_t msl_buffer = 0;
-	uint32_t msl_offset = 0;
-	uint32_t msl_stride = 0;
-	bool per_instance = false;
-	MSLVertexFormat format = MSL_VERTEX_FORMAT_OTHER;
+	uint32_t component = 0;
+	MSLShaderVariableFormat format = MSL_SHADER_VARIABLE_FORMAT_OTHER;
 	spv::BuiltIn builtin = spv::BuiltInMax;
+	uint32_t vecsize = 0;
+	MSLShaderVariableRate rate = MSL_SHADER_VARIABLE_RATE_PER_VERTEX;
 };
 
 // Matches the binding index of a MSL resource for a binding within a descriptor set.
 // Taken together, the stage, desc_set and binding combine to form a reference to a resource
-// descriptor used in a particular shading stage.
-// If using MSL 2.0 argument buffers, and the descriptor set is not marked as a discrete descriptor set,
-// the binding reference we remap to will become an [[id(N)]] attribute within
-// the "descriptor set" argument buffer structure.
-// For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will become a
-// [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used.
+// descriptor used in a particular shading stage. The count field indicates the number of
+// resources consumed by this binding, if the binding represents an array of resources.
+// If the resource array is a run-time-sized array, which are legal in GLSL or SPIR-V, this value
+// will be used to declare the array size in MSL, which does not support run-time-sized arrays.
+// If pad_argument_buffer_resources is enabled, the base_type and count values are used to
+// specify the base type and array size of the resource in the argument buffer, if that resource
+// is not defined and used by the shader. With pad_argument_buffer_resources enabled, this
+// information will be used to pad the argument buffer structure, in order to align that
+// structure consistently for all uses, across all shaders, of the descriptor set represented
+// by the arugment buffer. If pad_argument_buffer_resources is disabled, base_type does not
+// need to be populated, and if the resource is also not a run-time sized array, the count
+// field does not need to be populated.
+// If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set,
+// and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we
+// remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure.
+// For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will
+// become a [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used.
 struct MSLResourceBinding
 {
 	spv::ExecutionModel stage = spv::ExecutionModelMax;
+	SPIRType::BaseType basetype = SPIRType::Unknown;
 	uint32_t desc_set = 0;
 	uint32_t binding = 0;
+	uint32_t count = 0;
 	uint32_t msl_buffer = 0;
 	uint32_t msl_texture = 0;
 	uint32_t msl_sampler = 0;
@@ -122,6 +167,50 @@ enum MSLSamplerBorderColor
 	MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff
 };
 
+enum MSLFormatResolution
+{
+	MSL_FORMAT_RESOLUTION_444 = 0,
+	MSL_FORMAT_RESOLUTION_422,
+	MSL_FORMAT_RESOLUTION_420,
+	MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff
+};
+
+enum MSLChromaLocation
+{
+	MSL_CHROMA_LOCATION_COSITED_EVEN = 0,
+	MSL_CHROMA_LOCATION_MIDPOINT,
+	MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff
+};
+
+enum MSLComponentSwizzle
+{
+	MSL_COMPONENT_SWIZZLE_IDENTITY = 0,
+	MSL_COMPONENT_SWIZZLE_ZERO,
+	MSL_COMPONENT_SWIZZLE_ONE,
+	MSL_COMPONENT_SWIZZLE_R,
+	MSL_COMPONENT_SWIZZLE_G,
+	MSL_COMPONENT_SWIZZLE_B,
+	MSL_COMPONENT_SWIZZLE_A,
+	MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff
+};
+
+enum MSLSamplerYCbCrModelConversion
+{
+	MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0,
+	MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY,
+	MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709,
+	MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601,
+	MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020,
+	MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff
+};
+
+enum MSLSamplerYCbCrRange
+{
+	MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0,
+	MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW,
+	MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff
+};
+
 struct MSLConstexprSampler
 {
 	MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED;
@@ -137,21 +226,50 @@ struct MSLConstexprSampler
 	float lod_clamp_max = 1000.0f;
 	int max_anisotropy = 1;
 
+	// Sampler Y'CbCr conversion parameters
+	uint32_t planes = 0;
+	MSLFormatResolution resolution = MSL_FORMAT_RESOLUTION_444;
+	MSLSamplerFilter chroma_filter = MSL_SAMPLER_FILTER_NEAREST;
+	MSLChromaLocation x_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN;
+	MSLChromaLocation y_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN;
+	MSLComponentSwizzle swizzle[4]; // IDENTITY, IDENTITY, IDENTITY, IDENTITY
+	MSLSamplerYCbCrModelConversion ycbcr_model = MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY;
+	MSLSamplerYCbCrRange ycbcr_range = MSL_SAMPLER_YCBCR_RANGE_ITU_FULL;
+	uint32_t bpc = 8;
+
 	bool compare_enable = false;
 	bool lod_clamp_enable = false;
 	bool anisotropy_enable = false;
-};
+	bool ycbcr_conversion_enable = false;
 
-// Tracks the type ID and member index of a struct member
-using MSLStructMemberKey = uint64_t;
+	MSLConstexprSampler()
+	{
+		for (uint32_t i = 0; i < 4; i++)
+			swizzle[i] = MSL_COMPONENT_SWIZZLE_IDENTITY;
+	}
+	bool swizzle_is_identity() const
+	{
+		return (swizzle[0] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[1] == MSL_COMPONENT_SWIZZLE_IDENTITY &&
+		        swizzle[2] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[3] == MSL_COMPONENT_SWIZZLE_IDENTITY);
+	}
+	bool swizzle_has_one_or_zero() const
+	{
+		return (swizzle[0] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[0] == MSL_COMPONENT_SWIZZLE_ONE ||
+		        swizzle[1] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[1] == MSL_COMPONENT_SWIZZLE_ONE ||
+		        swizzle[2] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[2] == MSL_COMPONENT_SWIZZLE_ONE ||
+		        swizzle[3] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[3] == MSL_COMPONENT_SWIZZLE_ONE);
+	}
+};
 
 // Special constant used in a MSLResourceBinding desc_set
 // element to indicate the bindings for the push constants.
-static const uint32_t kPushConstDescSet = ~(0u);
+// Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly.
+static const uint32_t kPushConstDescSet = ResourceBindingPushConstantDescriptorSet;
 
 // Special constant used in a MSLResourceBinding binding
 // element to indicate the bindings for the push constants.
-static const uint32_t kPushConstBinding = 0;
+// Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly.
+static const uint32_t kPushConstBinding = ResourceBindingPushConstantBinding;
 
 // Special constant used in a MSLResourceBinding binding
 // element to indicate the buffer binding for swizzle buffers.
@@ -169,6 +287,9 @@ static const uint32_t kArgumentBufferBinding = ~(3u);
 
 static const uint32_t kMaxArgumentBuffers = 8;
 
+// The arbitrary maximum for the nesting of array of array copies.
+static const uint32_t kArrayCopyMultidimMax = 6;
+
 // Decompiles SPIR-V to Metal Shading Language
 class CompilerMSL : public CompilerGLSL
 {
@@ -185,6 +306,8 @@ class CompilerMSL : public CompilerGLSL
 		Platform platform = macOS;
 		uint32_t msl_version = make_msl_version(1, 2);
 		uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers
+		uint32_t r32ui_linear_texture_alignment = 4;
+		uint32_t r32ui_alignment_constant_id = 65535;
 		uint32_t swizzle_buffer_index = 30;
 		uint32_t indirect_params_buffer_index = 29;
 		uint32_t shader_output_buffer_index = 28;
@@ -192,41 +315,199 @@ class CompilerMSL : public CompilerGLSL
 		uint32_t shader_tess_factor_buffer_index = 26;
 		uint32_t buffer_size_buffer_index = 25;
 		uint32_t view_mask_buffer_index = 24;
+		uint32_t dynamic_offsets_buffer_index = 23;
+		uint32_t shader_input_buffer_index = 22;
+		uint32_t shader_index_buffer_index = 21;
+		uint32_t shader_patch_input_buffer_index = 20;
 		uint32_t shader_input_wg_index = 0;
+		uint32_t device_index = 0;
+		uint32_t enable_frag_output_mask = 0xffffffff;
+		// Metal doesn't allow setting a fixed sample mask directly in the pipeline.
+		// We can evade this restriction by ANDing the internal sample_mask output
+		// of the shader with the additional fixed sample mask.
+		uint32_t additional_fixed_sample_mask = 0xffffffff;
 		bool enable_point_size_builtin = true;
+		bool enable_frag_depth_builtin = true;
+		bool enable_frag_stencil_ref_builtin = true;
 		bool disable_rasterization = false;
 		bool capture_output_to_buffer = false;
 		bool swizzle_texture_samples = false;
 		bool tess_domain_origin_lower_left = false;
 		bool multiview = false;
+		bool multiview_layered_rendering = true;
+		bool view_index_from_device_index = false;
+		bool dispatch_base = false;
+		bool texture_1D_as_2D = false;
 
-		// Enable use of MSL 2.0 indirect argument buffers.
+		// Enable use of Metal argument buffers.
 		// MSL 2.0 must also be enabled.
 		bool argument_buffers = false;
 
+		// Defines Metal argument buffer tier levels.
+		// Uses same values as Metal MTLArgumentBuffersTier enumeration.
+		enum class ArgumentBuffersTier
+		{
+			Tier1 = 0,
+			Tier2 = 1,
+		};
+
+		// When using Metal argument buffers, indicates the Metal argument buffer tier level supported by the Metal platform.
+		// Ignored when Options::argument_buffers is disabled.
+		// - Tier1 supports writable images on macOS, but not on iOS.
+		// - Tier2 supports writable images on macOS and iOS, and higher resource count limits.
+		// Tier capabilities based on recommendations from Apple engineering.
+		ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1;
+
+		// Ensures vertex and instance indices start at zero. This reflects the behavior of HLSL with SV_VertexID and SV_InstanceID.
+		bool enable_base_index_zero = false;
+
 		// Fragment output in MSL must have at least as many components as the render pass.
 		// Add support to explicit pad out components.
 		bool pad_fragment_output_components = false;
 
+		// Specifies whether the iOS target version supports the [[base_vertex]] and [[base_instance]] attributes.
+		bool ios_support_base_vertex_instance = false;
+
+		// Use Metal's native frame-buffer fetch API for subpass inputs.
+		bool use_framebuffer_fetch_subpasses = false;
+
+		// Enables use of "fma" intrinsic for invariant float math
+		bool invariant_float_math = false;
+
+		// Emulate texturecube_array with texture2d_array for iOS where this type is not available
+		bool emulate_cube_array = false;
+
+		// Allow user to enable decoration binding
+		bool enable_decoration_binding = false;
+
 		// Requires MSL 2.1, use the native support for texel buffers.
 		bool texture_buffer_native = false;
 
-		bool is_ios()
+		// Forces all resources which are part of an argument buffer to be considered active.
+		// This ensures ABI compatibility between shaders where some resources might be unused,
+		// and would otherwise declare a different IAB.
+		bool force_active_argument_buffer_resources = false;
+
+		// Aligns each resource in an argument buffer to its assigned index value, id(N),
+		// by adding synthetic padding members in the argument buffer struct for any resources
+		// in the argument buffer that are not defined and used by the shader. This allows
+		// the shader to index into the correct argument in a descriptor set argument buffer
+		// that is shared across shaders, where not all resources in the argument buffer are
+		// defined in each shader. For this to work, an MSLResourceBinding must be provided for
+		// all descriptors in any descriptor set held in an argument buffer in the shader, and
+		// that MSLResourceBinding must have the basetype and count members populated correctly.
+		// The implementation here assumes any inline blocks in the argument buffer is provided
+		// in a Metal buffer, and doesn't take into consideration inline blocks that are
+		// optionally embedded directly into the argument buffer via add_inline_uniform_block().
+		bool pad_argument_buffer_resources = false;
+
+		// Forces the use of plain arrays, which works around certain driver bugs on certain versions
+		// of Intel Macbooks. See https://github.com/KhronosGroup/SPIRV-Cross/issues/1210.
+		// May reduce performance in scenarios where arrays are copied around as value-types.
+		bool force_native_arrays = false;
+
+		// If a shader writes clip distance, also emit user varyings which
+		// can be read in subsequent stages.
+		bool enable_clip_distance_user_varying = true;
+
+		// In a tessellation control shader, assume that more than one patch can be processed in a
+		// single workgroup. This requires changes to the way the InvocationId and PrimitiveId
+		// builtins are processed, but should result in more efficient usage of the GPU.
+		bool multi_patch_workgroup = false;
+
+		// Use storage buffers instead of vertex-style attributes for tessellation evaluation
+		// input. This may require conversion of inputs in the generated post-tessellation
+		// vertex shader, but allows the use of nested arrays.
+		bool raw_buffer_tese_input = false;
+
+		// If set, a vertex shader will be compiled as part of a tessellation pipeline.
+		// It will be translated as a compute kernel, so it can use the global invocation ID
+		// to index the output buffer.
+		bool vertex_for_tessellation = false;
+
+		// Assume that SubpassData images have multiple layers. Layered input attachments
+		// are addressed relative to the Layer output from the vertex pipeline. This option
+		// has no effect with multiview, since all input attachments are assumed to be layered
+		// and will be addressed using the current ViewIndex.
+		bool arrayed_subpass_input = false;
+
+		// Whether to use SIMD-group or quadgroup functions to implement group non-uniform
+		// operations. Some GPUs on iOS do not support the SIMD-group functions, only the
+		// quadgroup functions.
+		bool ios_use_simdgroup_functions = false;
+
+		// If set, the subgroup size will be assumed to be one, and subgroup-related
+		// builtins and operations will be emitted accordingly. This mode is intended to
+		// be used by MoltenVK on hardware/software configurations which do not provide
+		// sufficient support for subgroups.
+		bool emulate_subgroups = false;
+
+		// If nonzero, a fixed subgroup size to assume. Metal, similarly to VK_EXT_subgroup_size_control,
+		// allows the SIMD-group size (aka thread execution width) to vary depending on
+		// register usage and requirements. In certain circumstances--for example, a pipeline
+		// in MoltenVK without VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT--
+		// this is undesirable. This fixes the value of the SubgroupSize builtin, instead of
+		// mapping it to the Metal builtin [[thread_execution_width]]. If the thread
+		// execution width is reduced, the extra invocations will appear to be inactive.
+		// If zero, the SubgroupSize will be allowed to vary, and the builtin will be mapped
+		// to the Metal [[thread_execution_width]] builtin.
+		uint32_t fixed_subgroup_size = 0;
+
+		enum class IndexType
+		{
+			None = 0,
+			UInt16 = 1,
+			UInt32 = 2
+		};
+
+		// The type of index in the index buffer, if present. For a compute shader, Metal
+		// requires specifying the indexing at pipeline creation, rather than at draw time
+		// as with graphics pipelines. This means we must create three different pipelines,
+		// for no indexing, 16-bit indices, and 32-bit indices. Each requires different
+		// handling for the gl_VertexIndex builtin. We may as well, then, create three
+		// different shaders for these three scenarios.
+		IndexType vertex_index_type = IndexType::None;
+
+		// If set, a dummy [[sample_id]] input is added to a fragment shader if none is present.
+		// This will force the shader to run at sample rate, assuming Metal does not optimize
+		// the extra threads away.
+		bool force_sample_rate_shading = false;
+
+		// If set, gl_HelperInvocation will be set manually whenever a fragment is discarded.
+		// Some Metal devices have a bug where simd_is_helper_thread() does not return true
+		// after a fragment has been discarded. This is a workaround that is only expected to be needed
+		// until the bug is fixed in Metal; it is provided as an option to allow disabling it when that occurs.
+		bool manual_helper_invocation_updates = true;
+
+		// If set, extra checks will be emitted in fragment shaders to prevent writes
+		// from discarded fragments. Some Metal devices have a bug where writes to storage resources
+		// from discarded fragment threads continue to occur, despite the fragment being
+		// discarded. This is a workaround that is only expected to be needed until the
+		// bug is fixed in Metal; it is provided as an option so it can be enabled
+		// only when the bug is present.
+		bool check_discarded_frag_stores = false;
+
+		bool is_ios() const
 		{
 			return platform == iOS;
 		}
 
-		bool is_macos()
+		bool is_macos() const
 		{
 			return platform == macOS;
 		}
 
+		bool use_quadgroup_operation() const
+		{
+			return is_ios() && !ios_use_simdgroup_functions;
+		}
+
 		void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
 		{
 			msl_version = make_msl_version(major, minor, patch);
 		}
 
-		bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0)
+		bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) const
 		{
 			return msl_version >= make_msl_version(major, minor, patch);
 		}
@@ -270,32 +551,44 @@ class CompilerMSL : public CompilerGLSL
 		return !buffers_requiring_array_length.empty();
 	}
 
+	bool buffer_requires_array_length(VariableID id) const
+	{
+		return buffers_requiring_array_length.count(id) != 0;
+	}
+
 	// Provide feedback to calling API to allow it to pass a buffer
 	// containing the view mask for the current multiview subpass.
 	bool needs_view_mask_buffer() const
 	{
-		return msl_options.multiview;
+		return msl_options.multiview && !msl_options.view_index_from_device_index;
+	}
+
+	// Provide feedback to calling API to allow it to pass a buffer
+	// containing the dispatch base workgroup ID.
+	bool needs_dispatch_base_buffer() const
+	{
+		return msl_options.dispatch_base && !msl_options.supports_msl_version(1, 2);
 	}
 
 	// Provide feedback to calling API to allow it to pass an output
 	// buffer if the shader needs it.
 	bool needs_output_buffer() const
 	{
-		return capture_output_to_buffer && stage_out_var_id != 0;
+		return capture_output_to_buffer && stage_out_var_id != ID(0);
 	}
 
 	// Provide feedback to calling API to allow it to pass a patch output
 	// buffer if the shader needs it.
 	bool needs_patch_output_buffer() const
 	{
-		return capture_output_to_buffer && patch_stage_out_var_id != 0;
+		return capture_output_to_buffer && patch_stage_out_var_id != ID(0);
 	}
 
 	// Provide feedback to calling API to allow it to pass an input threadgroup
 	// buffer if the shader needs it.
 	bool needs_input_threadgroup_mem() const
 	{
-		return capture_output_to_buffer && stage_in_var_id != 0;
+		return capture_output_to_buffer && stage_in_var_id != ID(0);
 	}
 
 	explicit CompilerMSL(std::vector<uint32_t> spirv);
@@ -303,11 +596,15 @@ class CompilerMSL : public CompilerGLSL
 	explicit CompilerMSL(const ParsedIR &ir);
 	explicit CompilerMSL(ParsedIR &&ir);
 
-	// attr is a vertex attribute binding used to match
-	// vertex content locations to MSL attributes. If vertex attributes are provided,
-	// is_msl_vertex_attribute_used() will return true after calling ::compile() if
-	// the location was used by the MSL code.
-	void add_msl_vertex_attribute(const MSLVertexAttr &attr);
+	// input is a shader interface variable description used to fix up shader input variables.
+	// If shader inputs are provided, is_msl_shader_input_used() will return true after
+	// calling ::compile() if the location were used by the MSL code.
+	void add_msl_shader_input(const MSLShaderInterfaceVariable &input);
+
+	// output is a shader interface variable description used to fix up shader output variables.
+	// If shader outputs are provided, is_msl_shader_output_used() will return true after
+	// calling ::compile() if the location were used by the MSL code.
+	void add_msl_shader_output(const MSLShaderInterfaceVariable &output);
 
 	// resource is a resource binding to indicate the MSL buffer,
 	// texture or sampler index to use for a particular SPIR-V description set
@@ -316,18 +613,54 @@ class CompilerMSL : public CompilerGLSL
 	// the set/binding combination was used by the MSL code.
 	void add_msl_resource_binding(const MSLResourceBinding &resource);
 
+	// desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource
+	// in this shader. index is the index within the dynamic offset buffer to use. This
+	// function marks that resource as using a dynamic offset (VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC
+	// or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC). This function only has any effect if argument buffers
+	// are enabled. If so, the buffer will have its address adjusted at the beginning of the shader with
+	// an offset taken from the dynamic offset buffer.
+	void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index);
+
+	// desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource
+	// in this shader. This function marks that resource as an inline uniform block
+	// (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT). This function only has any effect if argument buffers
+	// are enabled. If so, the buffer block will be directly embedded into the argument
+	// buffer, instead of being referenced indirectly via pointer.
+	void add_inline_uniform_block(uint32_t desc_set, uint32_t binding);
+
 	// When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets.
 	// This corresponds to VK_KHR_push_descriptor in Vulkan.
 	void add_discrete_descriptor_set(uint32_t desc_set);
 
-	// Query after compilation is done. This allows you to check if a location or set/binding combination was used by the shader.
-	bool is_msl_vertex_attribute_used(uint32_t location);
+	// If an argument buffer is large enough, it may need to be in the device storage space rather than
+	// constant. Opt-in to this behavior here on a per set basis.
+	void set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage);
+
+	// Query after compilation is done. This allows you to check if an input location was used by the shader.
+	bool is_msl_shader_input_used(uint32_t location);
+
+	// Query after compilation is done. This allows you to check if an output location were used by the shader.
+	bool is_msl_shader_output_used(uint32_t location);
+
+	// If not using add_msl_shader_input, it's possible
+	// that certain builtin attributes need to be automatically assigned locations.
+	// This is typical for tessellation builtin inputs such as tess levels, gl_Position, etc.
+	// This returns k_unknown_location if the location was explicitly assigned with
+	// add_msl_shader_input or the builtin is not used, otherwise returns N in [[attribute(N)]].
+	uint32_t get_automatic_builtin_input_location(spv::BuiltIn builtin) const;
+
+	// If not using add_msl_shader_output, it's possible
+	// that certain builtin attributes need to be automatically assigned locations.
+	// This is typical for tessellation builtin outputs such as tess levels, gl_Position, etc.
+	// This returns k_unknown_location if the location were explicitly assigned with
+	// add_msl_shader_output or the builtin were not used, otherwise returns N in [[attribute(N)]].
+	uint32_t get_automatic_builtin_output_location(spv::BuiltIn builtin) const;
 
 	// NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here.
 	// Constexpr samplers are always assumed to be emitted.
 	// No specific MSLResourceBinding remapping is required for constexpr samplers as long as they are remapped
 	// by remap_constexpr_sampler(_by_binding).
-	bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding);
+	bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding) const;
 
 	// This must only be called after a successful call to CompilerMSL::compile().
 	// For a variable resource ID obtained through reflection API, report the automatically assigned resource index.
@@ -340,8 +673,17 @@ class CompilerMSL : public CompilerGLSL
 
 	// Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers, in which case the
 	// sampler's binding is returned instead. For any other resource type, -1 is returned.
+	// Secondary bindings are also used for the auxillary image atomic buffer.
 	uint32_t get_automatic_msl_resource_binding_secondary(uint32_t id) const;
 
+	// Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for multiplanar images,
+	// in which case the second plane's binding is returned instead. For any other resource type, -1 is returned.
+	uint32_t get_automatic_msl_resource_binding_tertiary(uint32_t id) const;
+
+	// Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for triplanar images,
+	// in which case the third plane's binding is returned instead. For any other resource type, -1 is returned.
+	uint32_t get_automatic_msl_resource_binding_quaternary(uint32_t id) const;
+
 	// Compiles the SPIR-V code into Metal Shading Language.
 	std::string compile() override;
 
@@ -352,7 +694,7 @@ class CompilerMSL : public CompilerGLSL
 	// This can be used on both combined image/samplers (sampler2D) or standalone samplers.
 	// The remapped sampler must not be an array of samplers.
 	// Prefer remap_constexpr_sampler_by_binding unless you're also doing reflection anyways.
-	void remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler);
+	void remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler);
 
 	// Same as remap_constexpr_sampler, except you provide set/binding, rather than variable ID.
 	// Remaps based on ID take priority over set/binding remaps.
@@ -362,10 +704,13 @@ class CompilerMSL : public CompilerGLSL
 	// to use for a particular location. The default is 4 if number of components is not overridden.
 	void set_fragment_output_components(uint32_t location, uint32_t components);
 
+	void set_combined_sampler_suffix(const char *suffix);
+	const char *get_combined_sampler_suffix() const;
+
 protected:
 	// An enum of SPIR-V functions that are implemented in additional
 	// source code that is added to the shader if necessary.
-	enum SPVFuncImpl
+	enum SPVFuncImpl : uint8_t
 	{
 		SPVFuncImplNone,
 		SPVFuncImplMod,
@@ -385,73 +730,151 @@ class CompilerMSL : public CompilerGLSL
 		SPVFuncImplArrayOfArrayCopy5Dim = SPVFuncImplArrayCopyMultidimBase + 5,
 		SPVFuncImplArrayOfArrayCopy6Dim = SPVFuncImplArrayCopyMultidimBase + 6,
 		SPVFuncImplTexelBufferCoords,
+		SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations
+		SPVFuncImplFMul,
+		SPVFuncImplFAdd,
+		SPVFuncImplFSub,
+		SPVFuncImplQuantizeToF16,
+		SPVFuncImplCubemapTo2DArrayFace,
+		SPVFuncImplUnsafeArray, // Allow Metal to use the array<T> template to make arrays a value type
+		SPVFuncImplStorageMatrix, // Allow threadgroup construction of matrices
 		SPVFuncImplInverse4x4,
 		SPVFuncImplInverse3x3,
 		SPVFuncImplInverse2x2,
-		SPVFuncImplRowMajor2x3,
-		SPVFuncImplRowMajor2x4,
-		SPVFuncImplRowMajor3x2,
-		SPVFuncImplRowMajor3x4,
-		SPVFuncImplRowMajor4x2,
-		SPVFuncImplRowMajor4x3,
+		// It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's
+		// emitted before them.
+		SPVFuncImplForwardArgs,
+		// Likewise, this must come before *Swizzle.
+		SPVFuncImplGetSwizzle,
 		SPVFuncImplTextureSwizzle,
+		SPVFuncImplGatherSwizzle,
+		SPVFuncImplGatherCompareSwizzle,
+		SPVFuncImplSubgroupBroadcast,
+		SPVFuncImplSubgroupBroadcastFirst,
 		SPVFuncImplSubgroupBallot,
 		SPVFuncImplSubgroupBallotBitExtract,
 		SPVFuncImplSubgroupBallotFindLSB,
 		SPVFuncImplSubgroupBallotFindMSB,
 		SPVFuncImplSubgroupBallotBitCount,
 		SPVFuncImplSubgroupAllEqual,
+		SPVFuncImplSubgroupShuffle,
+		SPVFuncImplSubgroupShuffleXor,
+		SPVFuncImplSubgroupShuffleUp,
+		SPVFuncImplSubgroupShuffleDown,
+		SPVFuncImplQuadBroadcast,
+		SPVFuncImplQuadSwap,
 		SPVFuncImplReflectScalar,
 		SPVFuncImplRefractScalar,
-		SPVFuncImplArrayCopyMultidimMax = 6
+		SPVFuncImplFaceForwardScalar,
+		SPVFuncImplChromaReconstructNearest2Plane,
+		SPVFuncImplChromaReconstructNearest3Plane,
+		SPVFuncImplChromaReconstructLinear422CositedEven2Plane,
+		SPVFuncImplChromaReconstructLinear422CositedEven3Plane,
+		SPVFuncImplChromaReconstructLinear422Midpoint2Plane,
+		SPVFuncImplChromaReconstructLinear422Midpoint3Plane,
+		SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane,
+		SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane,
+		SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane,
+		SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane,
+		SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane,
+		SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane,
+		SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane,
+		SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane,
+		SPVFuncImplExpandITUFullRange,
+		SPVFuncImplExpandITUNarrowRange,
+		SPVFuncImplConvertYCbCrBT709,
+		SPVFuncImplConvertYCbCrBT601,
+		SPVFuncImplConvertYCbCrBT2020,
+		SPVFuncImplDynamicImageSampler,
 	};
 
+	// If the underlying resource has been used for comparison then duplicate loads of that resource must be too
+	// Use Metal's native frame-buffer fetch API for subpass inputs.
+	void emit_texture_op(const Instruction &i, bool sparse) override;
+	void emit_binary_ptr_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
+	std::string to_ptr_expression(uint32_t id, bool register_expression_read = true);
 	void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op);
 	void emit_instruction(const Instruction &instr) override;
 	void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
 	                  uint32_t count) override;
+	void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op,
+	                                           const uint32_t *args, uint32_t count) override;
 	void emit_header() override;
 	void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override;
 	void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override;
 	void emit_subgroup_op(const Instruction &i) override;
+	std::string to_texture_op(const Instruction &i, bool sparse, bool *forward,
+	                          SmallVector<uint32_t> &inherited_expressions) override;
 	void emit_fixup() override;
 	std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
 	                             const std::string &qualifier = "");
 	void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
 	                        const std::string &qualifier = "", uint32_t base_offset = 0) override;
+	void emit_struct_padding_target(const SPIRType &type) override;
+	std::string type_to_glsl(const SPIRType &type, uint32_t id, bool member);
 	std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override;
+	void emit_block_hints(const SPIRBlock &block) override;
+
+	// Allow Metal to use the array<T> template to make arrays a value type
+	std::string type_to_array_glsl(const SPIRType &type) override;
+	std::string constant_op_expression(const SPIRConstantOp &cop) override;
+
+	// Threadgroup arrays can't have a wrapper type
+	std::string variable_decl(const SPIRVariable &variable) override;
+
+	bool variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const override;
+
+	// GCC workaround of lambdas calling protected functions (for older GCC versions)
+	std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override;
+
 	std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override;
-	std::string sampler_type(const SPIRType &type);
+	std::string sampler_type(const SPIRType &type, uint32_t id);
 	std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override;
-	size_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const;
-	std::string to_func_call_arg(uint32_t id) override;
+	std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override;
 	std::string to_name(uint32_t id, bool allow_alias = true) const override;
-	std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
-	                             bool has_array_offsets, bool has_offset, bool has_grad, bool has_dref, uint32_t lod,
-	                             uint32_t minlod) override;
-	std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
-	                             uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x,
-	                             uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias,
-	                             uint32_t comp, uint32_t sample, uint32_t minlod, bool *p_forward) override;
+	std::string to_function_name(const TextureFunctionNameArguments &args) override;
+	std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward) override;
 	std::string to_initializer_expression(const SPIRVariable &var) override;
-	std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id) override;
+	std::string to_zero_initialized_expression(uint32_t type_id) override;
+
+	std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id,
+	                                   bool is_packed, bool row_major) override;
+
+	// Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but [[sample_mask]] is a scalar in Metal.
+	bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override;
+
 	std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override;
+	bool emit_complex_bitcast(uint32_t result_id, uint32_t id, uint32_t op0) override;
 	bool skip_argument(uint32_t id) const override;
-	std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) override;
+	std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain_is_resolved) override;
 	std::string to_qualifiers_glsl(uint32_t id) override;
 	void replace_illegal_names() override;
-	void declare_undefined_values() override;
 	void declare_constant_arrays();
+
+	void replace_illegal_entry_point_names();
+	void sync_entry_point_aliases_and_names();
+
+	static const std::unordered_set<std::string> &get_reserved_keyword_set();
+	static const std::unordered_set<std::string> &get_illegal_func_names();
+
+	// Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries
+	void declare_complex_constant_arrays();
+
 	bool is_patch_block(const SPIRType &type);
 	bool is_non_native_row_major_matrix(uint32_t id) override;
 	bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override;
-	std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed) override;
+	std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, uint32_t physical_type_id,
+	                                     bool is_packed) override;
+
+	bool is_tesc_shader() const;
+	bool is_tese_shader() const;
 
 	void preprocess_op_codes();
 	void localize_global_variables();
 	void extract_global_variables_from_functions();
 	void mark_packable_structs();
 	void mark_as_packable(SPIRType &type);
+	void mark_as_workgroup_struct(SPIRType &type);
 
 	std::unordered_map<uint32_t, std::set<uint32_t>> function_global_vars;
 	void extract_global_variables_from_function(uint32_t func_id, std::set<uint32_t> &added_arg_ids,
@@ -460,33 +883,66 @@ class CompilerMSL : public CompilerGLSL
 	uint32_t add_interface_block(spv::StorageClass storage, bool patch = false);
 	uint32_t add_interface_block_pointer(uint32_t ib_var_id, spv::StorageClass storage);
 
+	struct InterfaceBlockMeta
+	{
+		struct LocationMeta
+		{
+			uint32_t base_type_id = 0;
+			uint32_t num_components = 0;
+			bool flat = false;
+			bool noperspective = false;
+			bool centroid = false;
+			bool sample = false;
+		};
+		std::unordered_map<uint32_t, LocationMeta> location_meta;
+		bool strip_array = false;
+		bool allow_local_declaration = false;
+	};
+
+	std::string to_tesc_invocation_id();
+	void emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array);
 	void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type,
-	                                     SPIRVariable &var, bool strip_array);
+	                                     SPIRVariable &var, InterfaceBlockMeta &meta);
 	void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
-	                                               SPIRType &ib_type, SPIRVariable &var, bool strip_array);
+	                                               SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta);
 	void add_plain_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
-	                                           SPIRType &ib_type, SPIRVariable &var, bool strip_array);
-	void add_plain_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
-	                                                  SPIRType &ib_type, SPIRVariable &var, uint32_t index,
-	                                                  bool strip_array);
-	void add_composite_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
-	                                                      SPIRType &ib_type, SPIRVariable &var, uint32_t index,
-	                                                      bool strip_array);
-	uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array);
+	                                           SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta);
+	bool add_component_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref,
+	                                               SPIRVariable &var, const SPIRType &type,
+	                                               InterfaceBlockMeta &meta);
+	void add_plain_member_variable_to_interface_block(spv::StorageClass storage,
+	                                                  const std::string &ib_var_ref, SPIRType &ib_type,
+	                                                  SPIRVariable &var, SPIRType &var_type,
+	                                                  uint32_t mbr_idx, InterfaceBlockMeta &meta,
+	                                                  const std::string &mbr_name_qual,
+	                                                  const std::string &var_chain_qual,
+	                                                  uint32_t &location, uint32_t &var_mbr_idx);
+	void add_composite_member_variable_to_interface_block(spv::StorageClass storage,
+	                                                      const std::string &ib_var_ref, SPIRType &ib_type,
+	                                                      SPIRVariable &var, SPIRType &var_type,
+	                                                      uint32_t mbr_idx, InterfaceBlockMeta &meta,
+	                                                      const std::string &mbr_name_qual,
+	                                                      const std::string &var_chain_qual,
+	                                                      uint32_t &location, uint32_t &var_mbr_idx);
 	void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var);
+	void add_tess_level_input(const std::string &base_ref, const std::string &mbr_name, SPIRVariable &var);
 
 	void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id);
 
-	void mark_location_as_used_by_shader(uint32_t location, spv::StorageClass storage);
+	void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type,
+	                                     spv::StorageClass storage, bool fallback = false);
 	uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin);
-	uint32_t ensure_correct_attribute_type(uint32_t type_id, uint32_t location);
+	uint32_t ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t component,
+	                                   uint32_t num_components, bool strip_array);
 
+	void emit_custom_templates();
 	void emit_custom_functions();
 	void emit_resources();
 	void emit_specialization_constants_and_structs();
 	void emit_interface_block(uint32_t ib_var_id);
 	bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs);
-	void add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows);
+	uint32_t get_resource_array_size(uint32_t id) const;
+
 	void fix_up_shader_inputs_outputs();
 
 	std::string func_type_decl(SPIRType &type);
@@ -495,45 +951,89 @@ class CompilerMSL : public CompilerGLSL
 	std::string entry_point_arg_stage_in();
 	void entry_point_args_builtin(std::string &args);
 	void entry_point_args_discrete_descriptors(std::string &args);
-	std::string to_qualified_member_name(const SPIRType &type, uint32_t index);
+	std::string append_member_name(const std::string &qualifier, const SPIRType &type, uint32_t index);
 	std::string ensure_valid_name(std::string name, std::string pfx);
 	std::string to_sampler_expression(uint32_t id);
 	std::string to_swizzle_expression(uint32_t id);
 	std::string to_buffer_size_expression(uint32_t id);
+	bool is_sample_rate() const;
+	bool is_intersection_query() const;
+	bool is_direct_input_builtin(spv::BuiltIn builtin);
 	std::string builtin_qualifier(spv::BuiltIn builtin);
 	std::string builtin_type_decl(spv::BuiltIn builtin, uint32_t id = 0);
 	std::string built_in_func_arg(spv::BuiltIn builtin, bool prefix_comma);
 	std::string member_attribute_qualifier(const SPIRType &type, uint32_t index);
+	std::string member_location_attribute_qualifier(const SPIRType &type, uint32_t index);
 	std::string argument_decl(const SPIRFunction::Parameter &arg);
+	const char *descriptor_address_space(uint32_t id, spv::StorageClass storage, const char *plain_address_space) const;
 	std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp);
-	uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype);
-	uint32_t get_ordered_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr);
-	size_t get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const;
+	uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0);
+	uint32_t get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr) const;
+	uint32_t get_or_allocate_builtin_input_member_location(spv::BuiltIn builtin,
+	                                                       uint32_t type_id, uint32_t index, uint32_t *comp = nullptr);
+	uint32_t get_or_allocate_builtin_output_member_location(spv::BuiltIn builtin,
+	                                                        uint32_t type_id, uint32_t index, uint32_t *comp = nullptr);
+
+	uint32_t get_physical_tess_level_array_size(spv::BuiltIn builtin) const;
+
+	// MSL packing rules. These compute the effective packing rules as observed by the MSL compiler in the MSL output.
+	// These values can change depending on various extended decorations which control packing rules.
+	// We need to make these rules match up with SPIR-V declared rules.
+	uint32_t get_declared_type_size_msl(const SPIRType &type, bool packed, bool row_major) const;
+	uint32_t get_declared_type_array_stride_msl(const SPIRType &type, bool packed, bool row_major) const;
+	uint32_t get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const;
+	uint32_t get_declared_type_alignment_msl(const SPIRType &type, bool packed, bool row_major) const;
+
+	uint32_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const;
+	uint32_t get_declared_struct_member_array_stride_msl(const SPIRType &struct_type, uint32_t index) const;
+	uint32_t get_declared_struct_member_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const;
+	uint32_t get_declared_struct_member_alignment_msl(const SPIRType &struct_type, uint32_t index) const;
+
+	uint32_t get_declared_input_size_msl(const SPIRType &struct_type, uint32_t index) const;
+	uint32_t get_declared_input_array_stride_msl(const SPIRType &struct_type, uint32_t index) const;
+	uint32_t get_declared_input_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const;
+	uint32_t get_declared_input_alignment_msl(const SPIRType &struct_type, uint32_t index) const;
+
+	const SPIRType &get_physical_member_type(const SPIRType &struct_type, uint32_t index) const;
+	SPIRType get_presumed_input_type(const SPIRType &struct_type, uint32_t index) const;
+
+	uint32_t get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment = false,
+	                                      bool ignore_padding = false) const;
+
 	std::string to_component_argument(uint32_t id);
-	void align_struct(SPIRType &ib_type);
-	bool is_member_packable(SPIRType &ib_type, uint32_t index, uint32_t base_offset = 0);
-	uint32_t get_member_packed_type(SPIRType &ib_type, uint32_t index);
-	MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index);
+	void align_struct(SPIRType &ib_type, std::unordered_set<uint32_t> &aligned_structs);
+	void mark_scalar_layout_structs(const SPIRType &ib_type);
+	void mark_struct_members_packed(const SPIRType &type);
+	void ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index);
+	bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const;
 	std::string get_argument_address_space(const SPIRVariable &argument);
-	std::string get_type_address_space(const SPIRType &type, uint32_t id);
+	std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false);
+	const char *to_restrict(uint32_t id, bool space);
 	SPIRType &get_stage_in_struct_type();
 	SPIRType &get_stage_out_struct_type();
 	SPIRType &get_patch_stage_in_struct_type();
 	SPIRType &get_patch_stage_out_struct_type();
 	std::string get_tess_factor_struct_name();
-	void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1,
-	                         uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
+	SPIRType &get_uint_type();
+	uint32_t get_uint_type_id();
+	void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, spv::Op opcode,
+	                         uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
 	                         bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0);
 	const char *get_memory_order(uint32_t spv_mem_sem);
 	void add_pragma_line(const std::string &line);
 	void add_typedef_line(const std::string &line);
 	void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
-	void emit_array_copy(const std::string &lhs, uint32_t rhs_id) override;
+	void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id,
+	                     spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override;
 	void build_implicit_builtins();
 	uint32_t build_constant_uint_array_pointer();
 	void emit_entry_point_declarations() override;
+	bool uses_explicit_early_fragment_test();
+
 	uint32_t builtin_frag_coord_id = 0;
 	uint32_t builtin_sample_id_id = 0;
+	uint32_t builtin_sample_mask_id = 0;
+	uint32_t builtin_helper_invocation_id = 0;
 	uint32_t builtin_vertex_idx_id = 0;
 	uint32_t builtin_base_vertex_id = 0;
 	uint32_t builtin_instance_idx_id = 0;
@@ -544,56 +1044,64 @@ class CompilerMSL : public CompilerGLSL
 	uint32_t builtin_primitive_id_id = 0;
 	uint32_t builtin_subgroup_invocation_id_id = 0;
 	uint32_t builtin_subgroup_size_id = 0;
+	uint32_t builtin_dispatch_base_id = 0;
+	uint32_t builtin_stage_input_size_id = 0;
+	uint32_t builtin_local_invocation_index_id = 0;
+	uint32_t builtin_workgroup_size_id = 0;
 	uint32_t swizzle_buffer_id = 0;
 	uint32_t buffer_size_buffer_id = 0;
 	uint32_t view_mask_buffer_id = 0;
+	uint32_t dynamic_offsets_buffer_id = 0;
+	uint32_t uint_type_id = 0;
+	uint32_t argument_buffer_padding_buffer_type_id = 0;
+	uint32_t argument_buffer_padding_image_type_id = 0;
+	uint32_t argument_buffer_padding_sampler_type_id = 0;
+
+	bool does_shader_write_sample_mask = false;
+	bool frag_shader_needs_discard_checks = false;
 
-	void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
-	void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
+	void cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
+	void cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
 	void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override;
 
 	void analyze_sampled_image_usage();
 
+	bool access_chain_needs_stage_io_builtin_translation(uint32_t base) override;
+	void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage,
+	                                            bool &is_packed) override;
+	void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length);
+	void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override;
+
 	bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length);
+	bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr);
 	bool is_out_of_bounds_tessellation_level(uint32_t id_lhs);
 
+	void ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin);
+
 	void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id);
 
 	std::string convert_to_f32(const std::string &expr, uint32_t components);
 
 	Options msl_options;
 	std::set<SPVFuncImpl> spv_function_implementations;
-	std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_location;
-	std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_builtin;
-	std::unordered_set<uint32_t> vtx_attrs_in_use;
+	// Must be ordered to ensure declarations are in a specific order.
+	std::map<LocationComponentPair, MSLShaderInterfaceVariable> inputs_by_location;
+	std::unordered_map<uint32_t, MSLShaderInterfaceVariable> inputs_by_builtin;
+	std::map<LocationComponentPair, MSLShaderInterfaceVariable> outputs_by_location;
+	std::unordered_map<uint32_t, MSLShaderInterfaceVariable> outputs_by_builtin;
+	std::unordered_set<uint32_t> location_inputs_in_use;
+	std::unordered_set<uint32_t> location_inputs_in_use_fallback;
+	std::unordered_set<uint32_t> location_outputs_in_use;
+	std::unordered_set<uint32_t> location_outputs_in_use_fallback;
 	std::unordered_map<uint32_t, uint32_t> fragment_output_components;
-	std::unordered_map<MSLStructMemberKey, uint32_t> struct_member_padding;
+	std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location;
+	std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_output_location;
 	std::set<std::string> pragma_lines;
 	std::set<std::string> typedef_lines;
 	SmallVector<uint32_t> vars_needing_early_declaration;
 
-	struct SetBindingPair
-	{
-		uint32_t desc_set;
-		uint32_t binding;
-		bool operator==(const SetBindingPair &other) const;
-	};
-
-	struct StageSetBinding
-	{
-		spv::ExecutionModel model;
-		uint32_t desc_set;
-		uint32_t binding;
-		bool operator==(const StageSetBinding &other) const;
-	};
-
-	struct InternalHasher
-	{
-		size_t operator()(const SetBindingPair &value) const;
-		size_t operator()(const StageSetBinding &value) const;
-	};
-
 	std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings;
+	std::unordered_map<StageSetBinding, uint32_t, InternalHasher> resource_arg_buff_idx_to_binding_number;
 
 	uint32_t next_metal_resource_index_buffer = 0;
 	uint32_t next_metal_resource_index_texture = 0;
@@ -601,21 +1109,41 @@ class CompilerMSL : public CompilerGLSL
 	// Intentionally uninitialized, works around MSVC 2013 bug.
 	uint32_t next_metal_resource_ids[kMaxArgumentBuffers];
 
-	uint32_t stage_in_var_id = 0;
-	uint32_t stage_out_var_id = 0;
-	uint32_t patch_stage_in_var_id = 0;
-	uint32_t patch_stage_out_var_id = 0;
-	uint32_t stage_in_ptr_var_id = 0;
-	uint32_t stage_out_ptr_var_id = 0;
+	VariableID stage_in_var_id = 0;
+	VariableID stage_out_var_id = 0;
+	VariableID patch_stage_in_var_id = 0;
+	VariableID patch_stage_out_var_id = 0;
+	VariableID stage_in_ptr_var_id = 0;
+	VariableID stage_out_ptr_var_id = 0;
+	VariableID tess_level_inner_var_id = 0;
+	VariableID tess_level_outer_var_id = 0;
+	VariableID stage_out_masked_builtin_type_id = 0;
+
+	// Handle HLSL-style 0-based vertex/instance index.
+	enum class TriState
+	{
+		Neutral,
+		No,
+		Yes
+	};
+	TriState needs_base_vertex_arg = TriState::Neutral;
+	TriState needs_base_instance_arg = TriState::Neutral;
+
 	bool has_sampled_images = false;
-	bool needs_vertex_idx_arg = false;
-	bool needs_instance_idx_arg = false;
+	bool builtin_declaration = false; // Handle HLSL-style 0-based vertex/instance index.
+
+	bool is_using_builtin_array = false; // Force the use of C style array declaration.
+	bool using_builtin_array() const;
+
 	bool is_rasterization_disabled = false;
 	bool capture_output_to_buffer = false;
 	bool needs_swizzle_buffer_def = false;
 	bool used_swizzle_buffer = false;
 	bool added_builtin_tess_level = false;
 	bool needs_subgroup_invocation_id = false;
+	bool needs_subgroup_size = false;
+	bool needs_sample_id = false;
+	bool needs_helper_invocation = false;
 	std::string qual_pos_var_name;
 	std::string stage_in_var_name = "in";
 	std::string stage_out_var_name = "out";
@@ -624,10 +1152,14 @@ class CompilerMSL : public CompilerGLSL
 	std::string sampler_name_suffix = "Smplr";
 	std::string swizzle_name_suffix = "Swzl";
 	std::string buffer_size_name_suffix = "BufferSize";
+	std::string plane_name_suffix = "Plane";
 	std::string input_wg_var_name = "gl_in";
+	std::string input_buffer_var_name = "spvIn";
 	std::string output_buffer_var_name = "spvOut";
+	std::string patch_input_buffer_var_name = "spvPatchIn";
 	std::string patch_output_buffer_var_name = "spvPatchOut";
 	std::string tess_factor_buffer_var_name = "spvTessLevel";
+	std::string index_buffer_var_name = "spvIndices";
 	spv::Op previous_instruction_opcode = spv::OpNop;
 
 	// Must be ordered since declaration is in a specific order.
@@ -636,18 +1168,62 @@ class CompilerMSL : public CompilerGLSL
 	const MSLConstexprSampler *find_constexpr_sampler(uint32_t id) const;
 
 	std::unordered_set<uint32_t> buffers_requiring_array_length;
-	SmallVector<uint32_t> buffer_arrays;
+	SmallVector<uint32_t> buffer_arrays_discrete;
+	SmallVector<std::pair<uint32_t, uint32_t>> buffer_aliases_argument;
+	SmallVector<uint32_t> buffer_aliases_discrete;
+	std::unordered_set<uint32_t> atomic_image_vars; // Emulate texture2D atomic operations
+	std::unordered_set<uint32_t> pull_model_inputs;
+
+	// Must be ordered since array is in a specific order.
+	std::map<SetBindingPair, std::pair<uint32_t, uint32_t>> buffers_requiring_dynamic_offset;
+
+	SmallVector<uint32_t> disabled_frag_outputs;
+
+	std::unordered_set<SetBindingPair, InternalHasher> inline_uniform_blocks;
 
 	uint32_t argument_buffer_ids[kMaxArgumentBuffers];
 	uint32_t argument_buffer_discrete_mask = 0;
+	uint32_t argument_buffer_device_storage_mask = 0;
+
 	void analyze_argument_buffers();
 	bool descriptor_set_is_argument_buffer(uint32_t desc_set) const;
+	MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx);
+	void add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind);
+	void add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind);
+	void add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind);
+	void add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, uint32_t count);
 
 	uint32_t get_target_components_for_fragment_location(uint32_t location) const;
-	uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components);
+	uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components,
+	                                    SPIRType::BaseType basetype = SPIRType::Unknown);
+	uint32_t build_msl_interpolant_type(uint32_t type_id, bool is_noperspective);
 
 	bool suppress_missing_prototypes = false;
 
+	void add_spv_func_and_recompile(SPVFuncImpl spv_func);
+
+	void activate_argument_buffer_resources();
+
+	bool type_is_msl_framebuffer_fetch(const SPIRType &type) const;
+	bool type_is_pointer(const SPIRType &type) const;
+	bool type_is_pointer_to_pointer(const SPIRType &type) const;
+	bool is_supported_argument_buffer_type(const SPIRType &type) const;
+
+	bool variable_storage_requires_stage_io(spv::StorageClass storage) const;
+
+	bool needs_manual_helper_invocation_updates() const
+	{
+		return msl_options.manual_helper_invocation_updates && msl_options.supports_msl_version(2, 3);
+	}
+	bool needs_frag_discard_checks() const
+	{
+		return get_execution_model() == spv::ExecutionModelFragment && msl_options.supports_msl_version(2, 3) &&
+		       msl_options.check_discarded_frag_stores && frag_shader_needs_discard_checks;
+	}
+
+	bool has_additional_fixed_sample_mask() const { return msl_options.additional_fixed_sample_mask != 0xffffffff; }
+	std::string additional_fixed_sample_mask_str() const;
+
 	// OpcodeHandler that handles several MSL preprocessing operations.
 	struct OpCodePreprocessor : OpcodeHandler
 	{
@@ -662,10 +1238,16 @@ class CompilerMSL : public CompilerGLSL
 
 		CompilerMSL &compiler;
 		std::unordered_map<uint32_t, uint32_t> result_types;
+		std::unordered_map<uint32_t, uint32_t> image_pointers; // Emulate texture2D atomic operations
 		bool suppress_missing_prototypes = false;
 		bool uses_atomics = false;
-		bool uses_resource_write = false;
+		bool uses_image_write = false;
+		bool uses_buffer_write = false;
+		bool uses_discard = false;
 		bool needs_subgroup_invocation_id = false;
+		bool needs_subgroup_size = false;
+		bool needs_sample_id = false;
+		bool needs_helper_invocation = false;
 	};
 
 	// OpcodeHandler that scans for uses of sampled images
@@ -688,11 +1270,8 @@ class CompilerMSL : public CompilerGLSL
 	{
 		enum SortAspect
 		{
-			Location,
-			LocationReverse,
-			Offset,
-			OffsetThenLocationReverse,
-			Alphabetical
+			LocationThenBuiltInType,
+			Offset
 		};
 
 		void sort();
diff --git a/spirv_parser.cpp b/spirv_parser.cpp
index d5a16337d1c..01c2e381241 100644
--- a/spirv_parser.cpp
+++ b/spirv_parser.cpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2018-2019 Arm Limited
+ * Copyright 2018-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #include "spirv_parser.hpp"
 #include <assert.h>
 
@@ -24,7 +31,7 @@ namespace SPIRV_CROSS_NAMESPACE
 {
 Parser::Parser(vector<uint32_t> spirv)
 {
-	ir.spirv = move(spirv);
+	ir.spirv = std::move(spirv);
 }
 
 Parser::Parser(const uint32_t *spirv_data, size_t word_count)
@@ -60,6 +67,8 @@ static bool is_valid_spirv_version(uint32_t version)
 	case 0x10200: // SPIR-V 1.2
 	case 0x10300: // SPIR-V 1.3
 	case 0x10400: // SPIR-V 1.4
+	case 0x10500: // SPIR-V 1.5
+	case 0x10600: // SPIR-V 1.6
 		return true;
 
 	default:
@@ -85,6 +94,11 @@ void Parser::parse()
 		SPIRV_CROSS_THROW("Invalid SPIRV format.");
 
 	uint32_t bound = s[3];
+
+	const uint32_t MaximumNumberOfIDs = 0x3fffff;
+	if (bound > MaximumNumberOfIDs)
+		SPIRV_CROSS_THROW("ID bound exceeds limit of 0x3fffff.\n");
+
 	ir.set_id_bounds(bound);
 
 	uint32_t offset = 5;
@@ -113,10 +127,22 @@ void Parser::parse()
 	for (auto &i : instructions)
 		parse(i);
 
+	for (auto &fixup : forward_pointer_fixups)
+	{
+		auto &target = get<SPIRType>(fixup.first);
+		auto &source = get<SPIRType>(fixup.second);
+		target.member_types = source.member_types;
+		target.basetype = source.basetype;
+		target.self = source.self;
+	}
+	forward_pointer_fixups.clear();
+
 	if (current_function)
 		SPIRV_CROSS_THROW("Function was not terminated.");
 	if (current_block)
 		SPIRV_CROSS_THROW("Block was not terminated.");
+	if (ir.default_entry_point == 0)
+		SPIRV_CROSS_THROW("There is no entry point in the SPIR-V module.");
 }
 
 const uint32_t *Parser::stream(const Instruction &instr) const
@@ -157,6 +183,15 @@ void Parser::parse(const Instruction &instruction)
 	auto op = static_cast<Op>(instruction.op);
 	uint32_t length = instruction.length;
 
+	// HACK for glslang that might emit OpEmitMeshTasksEXT followed by return / branch.
+	// Instead of failing hard, just ignore it.
+	if (ignore_trailing_block_opcodes)
+	{
+		ignore_trailing_block_opcodes = false;
+		if (op == OpReturn || op == OpBranch || op == OpUnreachable)
+			return;
+	}
+
 	switch (op)
 	{
 	case OpSourceContinued:
@@ -233,29 +268,37 @@ void Parser::parse(const Instruction &instruction)
 	case OpExtension:
 	{
 		auto ext = extract_string(ir.spirv, instruction.offset);
-		ir.declared_extensions.push_back(move(ext));
+		ir.declared_extensions.push_back(std::move(ext));
 		break;
 	}
 
 	case OpExtInstImport:
 	{
 		uint32_t id = ops[0];
+
+		SPIRExtension::Extension spirv_ext = SPIRExtension::Unsupported;
+
 		auto ext = extract_string(ir.spirv, instruction.offset + 1);
 		if (ext == "GLSL.std.450")
-			set<SPIRExtension>(id, SPIRExtension::GLSL);
+			spirv_ext = SPIRExtension::GLSL;
 		else if (ext == "DebugInfo")
-			set<SPIRExtension>(id, SPIRExtension::SPV_debug_info);
+			spirv_ext = SPIRExtension::SPV_debug_info;
 		else if (ext == "SPV_AMD_shader_ballot")
-			set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_ballot);
+			spirv_ext = SPIRExtension::SPV_AMD_shader_ballot;
 		else if (ext == "SPV_AMD_shader_explicit_vertex_parameter")
-			set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter);
+			spirv_ext = SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter;
 		else if (ext == "SPV_AMD_shader_trinary_minmax")
-			set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_trinary_minmax);
+			spirv_ext = SPIRExtension::SPV_AMD_shader_trinary_minmax;
 		else if (ext == "SPV_AMD_gcn_shader")
-			set<SPIRExtension>(id, SPIRExtension::SPV_AMD_gcn_shader);
-		else
-			set<SPIRExtension>(id, SPIRExtension::Unsupported);
-
+			spirv_ext = SPIRExtension::SPV_AMD_gcn_shader;
+		else if (ext == "NonSemantic.DebugPrintf")
+			spirv_ext = SPIRExtension::NonSemanticDebugPrintf;
+		else if (ext == "NonSemantic.Shader.DebugInfo.100")
+			spirv_ext = SPIRExtension::NonSemanticShaderDebugInfo;
+		else if (ext.find("NonSemantic.") == 0)
+			spirv_ext = SPIRExtension::NonSemanticGeneric;
+
+		set<SPIRExtension>(id, spirv_ext);
 		// Other SPIR-V extensions which have ExtInstrs are currently not supported.
 
 		break;
@@ -265,7 +308,15 @@ void Parser::parse(const Instruction &instruction)
 	{
 		// The SPIR-V debug information extended instructions might come at global scope.
 		if (current_block)
+		{
 			current_block->ops.push_back(instruction);
+			if (length >= 2)
+			{
+				const auto *type = maybe_get<SPIRType>(ops[0]);
+				if (type)
+					ir.load_type_width.insert({ ops[1], type->width });
+			}
+		}
 		break;
 	}
 
@@ -278,7 +329,9 @@ void Parser::parse(const Instruction &instruction)
 
 		// Strings need nul-terminator and consume the whole word.
 		uint32_t strlen_words = uint32_t((e.name.size() + 1 + 3) >> 2);
-		e.interface_variables.insert(end(e.interface_variables), ops + strlen_words + 2, ops + instruction.length);
+
+		for (uint32_t i = strlen_words + 2; i < instruction.length; i++)
+			e.interface_variables.push_back(ops[i]);
 
 		// Set the name of the entry point in case OpName is not provided later.
 		ir.set_name(ops[1], e.name);
@@ -311,12 +364,32 @@ void Parser::parse(const Instruction &instruction)
 			execution.output_vertices = ops[2];
 			break;
 
+		case ExecutionModeOutputPrimitivesEXT:
+			execution.output_primitives = ops[2];
+			break;
+
 		default:
 			break;
 		}
 		break;
 	}
 
+	case OpExecutionModeId:
+	{
+		auto &execution = ir.entry_points[ops[0]];
+		auto mode = static_cast<ExecutionMode>(ops[1]);
+		execution.flags.set(mode);
+
+		if (mode == ExecutionModeLocalSizeId)
+		{
+			execution.workgroup_size.id_x = ops[2];
+			execution.workgroup_size.id_y = ops[3];
+			execution.workgroup_size.id_z = ops[4];
+		}
+
+		break;
+	}
+
 	case OpName:
 	{
 		uint32_t id = ops[0];
@@ -535,6 +608,11 @@ void Parser::parse(const Instruction &instruction)
 		auto *c = maybe_get<SPIRConstant>(cid);
 		bool literal = c && !c->specialization;
 
+		// We're copying type information into Array types, so we'll need a fixup for any physical pointer
+		// references.
+		if (base.forward_pointer)
+			forward_pointer_fixups.push_back({ id, tid });
+
 		arraybase.array_size_literal.push_back(literal);
 		arraybase.array.push_back(literal ? c->scalar() : cid);
 		// Do NOT set arraybase.self!
@@ -548,6 +626,11 @@ void Parser::parse(const Instruction &instruction)
 		auto &base = get<SPIRType>(ops[1]);
 		auto &arraybase = set<SPIRType>(id);
 
+		// We're copying type information into Array types, so we'll need a fixup for any physical pointer
+		// references.
+		if (base.forward_pointer)
+			forward_pointer_fixups.push_back({ id, ops[1] });
+
 		arraybase = base;
 		arraybase.array.push_back(0);
 		arraybase.array_size_literal.push_back(true);
@@ -595,10 +678,15 @@ void Parser::parse(const Instruction &instruction)
 	{
 		uint32_t id = ops[0];
 
-		auto &base = get<SPIRType>(ops[2]);
+		// Very rarely, we might receive a FunctionPrototype here.
+		// We won't be able to compile it, but we shouldn't crash when parsing.
+		// We should be able to reflect.
+		auto *base = maybe_get<SPIRType>(ops[2]);
 		auto &ptrbase = set<SPIRType>(id);
 
-		ptrbase = base;
+		if (base)
+			ptrbase = *base;
+
 		ptrbase.pointer = true;
 		ptrbase.pointer_depth++;
 		ptrbase.storage = static_cast<StorageClass>(ops[1]);
@@ -606,6 +694,9 @@ void Parser::parse(const Instruction &instruction)
 		if (ptrbase.storage == StorageClassAtomicCounter)
 			ptrbase.basetype = SPIRType::AtomicCounter;
 
+		if (base && base->forward_pointer)
+			forward_pointer_fixups.push_back({ id, ops[2] });
+
 		ptrbase.parent_type = ops[2];
 
 		// Do NOT set ptrbase.self!
@@ -619,6 +710,7 @@ void Parser::parse(const Instruction &instruction)
 		ptrbase.pointer = true;
 		ptrbase.pointer_depth++;
 		ptrbase.storage = static_cast<StorageClass>(ops[1]);
+		ptrbase.forward_pointer = true;
 
 		if (ptrbase.storage == StorageClassAtomicCounter)
 			ptrbase.basetype = SPIRType::AtomicCounter;
@@ -658,7 +750,7 @@ void Parser::parse(const Instruction &instruction)
 				}
 			}
 
-			if (type.type_alias == 0)
+			if (type.type_alias == TypeID(0))
 				global_struct_cache.push_back(id);
 		}
 		break;
@@ -675,11 +767,19 @@ void Parser::parse(const Instruction &instruction)
 		break;
 	}
 
-	case OpTypeAccelerationStructureNV:
+	case OpTypeAccelerationStructureKHR:
+	{
+		uint32_t id = ops[0];
+		auto &type = set<SPIRType>(id);
+		type.basetype = SPIRType::AccelerationStructure;
+		break;
+	}
+
+	case OpTypeRayQueryKHR:
 	{
 		uint32_t id = ops[0];
 		auto &type = set<SPIRType>(id);
-		type.basetype = SPIRType::AccelerationStructureNV;
+		type.basetype = SPIRType::RayQuery;
 		break;
 	}
 
@@ -700,15 +800,6 @@ void Parser::parse(const Instruction &instruction)
 		}
 
 		set<SPIRVariable>(id, type, storage, initializer);
-
-		// hlsl based shaders don't have those decorations. force them and then reset when reading/writing images
-		auto &ttype = get<SPIRType>(type);
-		if (ttype.basetype == SPIRType::BaseType::Image)
-		{
-			ir.set_decoration(id, DecorationNonWritable);
-			ir.set_decoration(id, DecorationNonReadable);
-		}
-
 		break;
 	}
 
@@ -772,7 +863,7 @@ void Parser::parse(const Instruction &instruction)
 	{
 		uint32_t id = ops[1];
 		uint32_t type = ops[0];
-		make_constant_null(id, type);
+		ir.make_constant_null(id, type, true);
 		break;
 	}
 
@@ -916,6 +1007,58 @@ void Parser::parse(const Instruction &instruction)
 		current_block->false_block = ops[2];
 
 		current_block->terminator = SPIRBlock::Select;
+
+		if (current_block->true_block == current_block->false_block)
+		{
+			// Bogus conditional, translate to a direct branch.
+			// Avoids some ugly edge cases later when analyzing CFGs.
+
+			// There are some super jank cases where the merge block is different from the true/false,
+			// and later branches can "break" out of the selection construct this way.
+			// This is complete nonsense, but CTS hits this case.
+			// In this scenario, we should see the selection construct as more of a Switch with one default case.
+			// The problem here is that this breaks any attempt to break out of outer switch statements,
+			// but it's theoretically solvable if this ever comes up using the ladder breaking system ...
+
+			if (current_block->true_block != current_block->next_block &&
+			    current_block->merge == SPIRBlock::MergeSelection)
+			{
+				uint32_t ids = ir.increase_bound_by(2);
+
+				SPIRType type;
+				type.basetype = SPIRType::Int;
+				type.width = 32;
+				set<SPIRType>(ids, type);
+				auto &c = set<SPIRConstant>(ids + 1, ids);
+
+				current_block->condition = c.self;
+				current_block->default_block = current_block->true_block;
+				current_block->terminator = SPIRBlock::MultiSelect;
+				ir.block_meta[current_block->next_block] &= ~ParsedIR::BLOCK_META_SELECTION_MERGE_BIT;
+				ir.block_meta[current_block->next_block] |= ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT;
+			}
+			else
+			{
+				// Collapse loops if we have to.
+				bool collapsed_loop = current_block->true_block == current_block->merge_block &&
+				                      current_block->merge == SPIRBlock::MergeLoop;
+
+				if (collapsed_loop)
+				{
+					ir.block_meta[current_block->merge_block] &= ~ParsedIR::BLOCK_META_LOOP_MERGE_BIT;
+					ir.block_meta[current_block->continue_block] &= ~ParsedIR::BLOCK_META_CONTINUE_BIT;
+				}
+
+				current_block->next_block = current_block->true_block;
+				current_block->condition = 0;
+				current_block->true_block = 0;
+				current_block->false_block = 0;
+				current_block->merge_block = 0;
+				current_block->merge = SPIRBlock::MergeNone;
+				current_block->terminator = SPIRBlock::Direct;
+			}
+		}
+
 		current_block = nullptr;
 		break;
 	}
@@ -930,8 +1073,21 @@ void Parser::parse(const Instruction &instruction)
 		current_block->condition = ops[0];
 		current_block->default_block = ops[1];
 
-		for (uint32_t i = 2; i + 2 <= length; i += 2)
-			current_block->cases.push_back({ ops[i], ops[i + 1] });
+		uint32_t remaining_ops = length - 2;
+		if ((remaining_ops % 2) == 0)
+		{
+			for (uint32_t i = 2; i + 2 <= length; i += 2)
+				current_block->cases_32bit.push_back({ ops[i], ops[i + 1] });
+		}
+
+		if ((remaining_ops % 3) == 0)
+		{
+			for (uint32_t i = 2; i + 3 <= length; i += 3)
+			{
+				uint64_t value = (static_cast<uint64_t>(ops[i + 1]) << 32) | ops[i];
+				current_block->cases_64bit.push_back({ value, ops[i + 2] });
+			}
+		}
 
 		// If we jump to next block, make it break instead since we're inside a switch case block at that point.
 		ir.block_meta[current_block->next_block] |= ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT;
@@ -941,6 +1097,7 @@ void Parser::parse(const Instruction &instruction)
 	}
 
 	case OpKill:
+	case OpTerminateInvocation:
 	{
 		if (!current_block)
 			SPIRV_CROSS_THROW("Trying to end a non-existing block.");
@@ -949,6 +1106,34 @@ void Parser::parse(const Instruction &instruction)
 		break;
 	}
 
+	case OpTerminateRayKHR:
+		// NV variant is not a terminator.
+		if (!current_block)
+			SPIRV_CROSS_THROW("Trying to end a non-existing block.");
+		current_block->terminator = SPIRBlock::TerminateRay;
+		current_block = nullptr;
+		break;
+
+	case OpIgnoreIntersectionKHR:
+		// NV variant is not a terminator.
+		if (!current_block)
+			SPIRV_CROSS_THROW("Trying to end a non-existing block.");
+		current_block->terminator = SPIRBlock::IgnoreIntersection;
+		current_block = nullptr;
+		break;
+
+	case OpEmitMeshTasksEXT:
+		if (!current_block)
+			SPIRV_CROSS_THROW("Trying to end a non-existing block.");
+		current_block->terminator = SPIRBlock::EmitMeshTasks;
+		for (uint32_t i = 0; i < 3; i++)
+			current_block->mesh.groups[i] = ops[i];
+		current_block->mesh.payload = length >= 4 ? ops[3] : 0;
+		current_block = nullptr;
+		// Currently glslang is bugged and does not treat EmitMeshTasksEXT as a terminator.
+		ignore_trailing_block_opcodes = true;
+		break;
+
 	case OpReturn:
 	{
 		if (!current_block)
@@ -1008,12 +1193,12 @@ void Parser::parse(const Instruction &instruction)
 		ir.block_meta[current_block->self] |= ParsedIR::BLOCK_META_LOOP_HEADER_BIT;
 		ir.block_meta[current_block->merge_block] |= ParsedIR::BLOCK_META_LOOP_MERGE_BIT;
 
-		ir.continue_block_to_loop_header[current_block->continue_block] = current_block->self;
+		ir.continue_block_to_loop_header[current_block->continue_block] = BlockID(current_block->self);
 
 		// Don't add loop headers to continue blocks,
 		// which would make it impossible branch into the loop header since
 		// they are treated as continues.
-		if (current_block->continue_block != current_block->self)
+		if (current_block->continue_block != BlockID(current_block->self))
 			ir.block_meta[current_block->continue_block] |= ParsedIR::BLOCK_META_CONTINUE_BIT;
 
 		if (length >= 3)
@@ -1073,6 +1258,13 @@ void Parser::parse(const Instruction &instruction)
 	// Actual opcodes.
 	default:
 	{
+		if (length >= 2)
+		{
+			const auto *type = maybe_get<SPIRType>(ops[0]);
+			if (type)
+				ir.load_type_width.insert({ ops[1], type->width });
+		}
+
 		if (!current_block)
 			SPIRV_CROSS_THROW("Currently no block to insert opcode.");
 
@@ -1137,46 +1329,4 @@ bool Parser::variable_storage_is_aliased(const SPIRVariable &v) const
 
 	return !is_restrict && (ssbo || image || counter);
 }
-
-void Parser::make_constant_null(uint32_t id, uint32_t type)
-{
-	auto &constant_type = get<SPIRType>(type);
-
-	if (constant_type.pointer)
-	{
-		auto &constant = set<SPIRConstant>(id, type);
-		constant.make_null(constant_type);
-	}
-	else if (!constant_type.array.empty())
-	{
-		assert(constant_type.parent_type);
-		uint32_t parent_id = ir.increase_bound_by(1);
-		make_constant_null(parent_id, constant_type.parent_type);
-
-		if (!constant_type.array_size_literal.back())
-			SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
-
-		SmallVector<uint32_t> elements(constant_type.array.back());
-		for (uint32_t i = 0; i < constant_type.array.back(); i++)
-			elements[i] = parent_id;
-		set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false);
-	}
-	else if (!constant_type.member_types.empty())
-	{
-		uint32_t member_ids = ir.increase_bound_by(uint32_t(constant_type.member_types.size()));
-		SmallVector<uint32_t> elements(constant_type.member_types.size());
-		for (uint32_t i = 0; i < constant_type.member_types.size(); i++)
-		{
-			make_constant_null(member_ids + i, constant_type.member_types[i]);
-			elements[i] = member_ids + i;
-		}
-		set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false);
-	}
-	else
-	{
-		auto &constant = set<SPIRConstant>(id, type);
-		constant.make_null(constant_type);
-	}
-}
-
 } // namespace SPIRV_CROSS_NAMESPACE
diff --git a/spirv_parser.hpp b/spirv_parser.hpp
index ef2c1b9869e..dabc0e22446 100644
--- a/spirv_parser.hpp
+++ b/spirv_parser.hpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2018-2019 Arm Limited
+ * Copyright 2018-2021 Arm Limited
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_CROSS_PARSER_HPP
 #define SPIRV_CROSS_PARSER_HPP
 
@@ -39,6 +46,8 @@ class Parser
 	ParsedIR ir;
 	SPIRFunction *current_function = nullptr;
 	SPIRBlock *current_block = nullptr;
+	// For workarounds.
+	bool ignore_trailing_block_opcodes = false;
 
 	void parse(const Instruction &instr);
 	const uint32_t *stream(const Instruction &instr) const;
@@ -84,10 +93,10 @@ class Parser
 
 	// This must be an ordered data structure so we always pick the same type aliases.
 	SmallVector<uint32_t> global_struct_cache;
+	SmallVector<std::pair<uint32_t, uint32_t>> forward_pointer_fixups;
 
 	bool types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const;
 	bool variable_storage_is_aliased(const SPIRVariable &v) const;
-	void make_constant_null(uint32_t id, uint32_t type);
 };
 } // namespace SPIRV_CROSS_NAMESPACE
 
diff --git a/spirv_reflect.cpp b/spirv_reflect.cpp
index b187a7fa611..0bd224e6c2b 100644
--- a/spirv_reflect.cpp
+++ b/spirv_reflect.cpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2018-2019 Bradley Austin Davis
+ * Copyright 2018-2021 Bradley Austin Davis
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #include "spirv_reflect.hpp"
 #include "spirv_glsl.hpp"
 #include <iomanip>
@@ -61,6 +68,7 @@ class Stream
 	void end_json_array();
 	void emit_json_array_value(const std::string &value);
 	void emit_json_array_value(uint32_t value);
+	void emit_json_array_value(bool value);
 
 	std::string str() const
 	{
@@ -158,6 +166,16 @@ void Stream::emit_json_array_value(uint32_t value)
 	stack.top().second = true;
 }
 
+void Stream::emit_json_array_value(bool value)
+{
+	if (stack.empty() || stack.top().first != Type::Array)
+		SPIRV_CROSS_THROW("Invalid JSON state");
+	if (stack.top().second)
+		statement_inner(",\n");
+	statement_no_return(value ? "true" : "false");
+	stack.top().second = true;
+}
+
 void Stream::begin_json_object()
 {
 	if (!stack.empty() && stack.top().second)
@@ -256,7 +274,6 @@ string CompilerReflection::compile()
 	json_stream = std::make_shared<simple_json::Stream>();
 	json_stream->set_current_locale_radix_character(current_locale_radix_character);
 	json_stream->begin_json_object();
-	fixup_type_alias();
 	reorder_type_alias();
 	emit_entry_points();
 	emit_types();
@@ -266,53 +283,97 @@ string CompilerReflection::compile()
 	return json_stream->str();
 }
 
+static bool naturally_emit_type(const SPIRType &type)
+{
+	return type.basetype == SPIRType::Struct && !type.pointer && type.array.empty();
+}
+
+bool CompilerReflection::type_is_reference(const SPIRType &type) const
+{
+	// Physical pointers and arrays of physical pointers need to refer to the pointee's type.
+	return type_is_top_level_physical_pointer(type) ||
+	       (!type.array.empty() && type_is_top_level_physical_pointer(get<SPIRType>(type.parent_type)));
+}
+
 void CompilerReflection::emit_types()
 {
 	bool emitted_open_tag = false;
 
-	ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) {
-		if (type.basetype == SPIRType::Struct && !type.pointer && type.array.empty())
-			emit_type(type, emitted_open_tag);
+	SmallVector<uint32_t> physical_pointee_types;
+
+	// If we have physical pointers or arrays of physical pointers, it's also helpful to emit the pointee type
+	// and chain the type hierarchy. For POD, arrays can emit the entire type in-place.
+	ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
+		if (naturally_emit_type(type))
+		{
+			emit_type(self, emitted_open_tag);
+		}
+		else if (type_is_reference(type))
+		{
+			if (!naturally_emit_type(this->get<SPIRType>(type.parent_type)) &&
+			    find(physical_pointee_types.begin(), physical_pointee_types.end(), type.parent_type) ==
+			        physical_pointee_types.end())
+			{
+				physical_pointee_types.push_back(type.parent_type);
+			}
+		}
 	});
 
+	for (uint32_t pointee_type : physical_pointee_types)
+		emit_type(pointee_type, emitted_open_tag);
+
 	if (emitted_open_tag)
 	{
 		json_stream->end_json_object();
 	}
 }
 
-void CompilerReflection::emit_type(const SPIRType &type, bool &emitted_open_tag)
+void CompilerReflection::emit_type(uint32_t type_id, bool &emitted_open_tag)
 {
+	auto &type = get<SPIRType>(type_id);
 	auto name = type_to_glsl(type);
 
-	if (type.type_alias != 0)
-		return;
-
 	if (!emitted_open_tag)
 	{
 		json_stream->emit_json_key_object("types");
 		emitted_open_tag = true;
 	}
-	json_stream->emit_json_key_object("_" + std::to_string(type.self));
+	json_stream->emit_json_key_object("_" + std::to_string(type_id));
 	json_stream->emit_json_key_value("name", name);
-	json_stream->emit_json_key_array("members");
-	// FIXME ideally we'd like to emit the size of a structure as a
-	// convenience to people parsing the reflected JSON.  The problem
-	// is that there's no implicit size for a type.  It's final size
-	// will be determined by the top level declaration in which it's
-	// included.  So there might be one size for the struct if it's
-	// included in a std140 uniform block and another if it's included
-	// in a std430 uniform block.
-	// The solution is to include *all* potential sizes as a map of
-	// layout type name to integer, but that will probably require
-	// some additional logic being written in this class, or in the
-	// parent CompilerGLSL class.
-	auto size = type.member_types.size();
-	for (uint32_t i = 0; i < size; ++i)
+
+	if (type_is_top_level_physical_pointer(type))
 	{
-		emit_type_member(type, i);
+		json_stream->emit_json_key_value("type", "_" + std::to_string(type.parent_type));
+		json_stream->emit_json_key_value("physical_pointer", true);
 	}
-	json_stream->end_json_array();
+	else if (!type.array.empty())
+	{
+		emit_type_array(type);
+		json_stream->emit_json_key_value("type", "_" + std::to_string(type.parent_type));
+		json_stream->emit_json_key_value("array_stride", get_decoration(type_id, DecorationArrayStride));
+	}
+	else
+	{
+		json_stream->emit_json_key_array("members");
+		// FIXME ideally we'd like to emit the size of a structure as a
+		// convenience to people parsing the reflected JSON.  The problem
+		// is that there's no implicit size for a type.  It's final size
+		// will be determined by the top level declaration in which it's
+		// included.  So there might be one size for the struct if it's
+		// included in a std140 uniform block and another if it's included
+		// in a std430 uniform block.
+		// The solution is to include *all* potential sizes as a map of
+		// layout type name to integer, but that will probably require
+		// some additional logic being written in this class, or in the
+		// parent CompilerGLSL class.
+		auto size = type.member_types.size();
+		for (uint32_t i = 0; i < size; ++i)
+		{
+			emit_type_member(type, i);
+		}
+		json_stream->end_json_array();
+	}
+
 	json_stream->end_json_object();
 }
 
@@ -324,7 +385,12 @@ void CompilerReflection::emit_type_member(const SPIRType &type, uint32_t index)
 	// FIXME we'd like to emit the offset of each member, but such offsets are
 	// context dependent.  See the comment above regarding structure sizes
 	json_stream->emit_json_key_value("name", name);
-	if (membertype.basetype == SPIRType::Struct)
+
+	if (type_is_reference(membertype))
+	{
+		json_stream->emit_json_key_value("type", "_" + std::to_string(membertype.parent_type));
+	}
+	else if (membertype.basetype == SPIRType::Struct)
 	{
 		json_stream->emit_json_key_value("type", "_" + std::to_string(membertype.self));
 	}
@@ -338,7 +404,7 @@ void CompilerReflection::emit_type_member(const SPIRType &type, uint32_t index)
 
 void CompilerReflection::emit_type_array(const SPIRType &type)
 {
-	if (!type.array.empty())
+	if (!type_is_top_level_physical_pointer(type) && !type.array.empty())
 	{
 		json_stream->emit_json_key_array("array");
 		// Note that we emit the zeros here as a means of identifying
@@ -347,15 +413,16 @@ void CompilerReflection::emit_type_array(const SPIRType &type)
 		for (const auto &value : type.array)
 			json_stream->emit_json_array_value(value);
 		json_stream->end_json_array();
+
+		json_stream->emit_json_key_array("array_size_is_literal");
+		for (const auto &value : type.array_size_literal)
+			json_stream->emit_json_array_value(value);
+		json_stream->end_json_array();
 	}
 }
 
 void CompilerReflection::emit_type_member_qualifiers(const SPIRType &type, uint32_t index)
 {
-	auto flags = combined_decoration_for_member(type, index);
-	if (flags.get(DecorationRowMajor))
-		json_stream->emit_json_key_value("row_major", true);
-
 	auto &membertype = get<SPIRType>(type.member_types[index]);
 	emit_type_array(membertype);
 	auto &memb = ir.meta[type.self].members;
@@ -366,6 +433,19 @@ void CompilerReflection::emit_type_member_qualifiers(const SPIRType &type, uint3
 			json_stream->emit_json_key_value("location", dec.location);
 		if (dec.decoration_flags.get(DecorationOffset))
 			json_stream->emit_json_key_value("offset", dec.offset);
+
+		// Array stride is a property of the array type, not the struct.
+		if (has_decoration(type.member_types[index], DecorationArrayStride))
+			json_stream->emit_json_key_value("array_stride",
+			                                 get_decoration(type.member_types[index], DecorationArrayStride));
+
+		if (dec.decoration_flags.get(DecorationMatrixStride))
+			json_stream->emit_json_key_value("matrix_stride", dec.matrix_stride);
+		if (dec.decoration_flags.get(DecorationRowMajor))
+			json_stream->emit_json_key_value("row_major", true);
+
+		if (type_is_top_level_physical_pointer(membertype))
+			json_stream->emit_json_key_value("physical_pointer", true);
 	}
 }
 
@@ -424,6 +504,28 @@ void CompilerReflection::emit_entry_points()
 			json_stream->begin_json_object();
 			json_stream->emit_json_key_value("name", e.name);
 			json_stream->emit_json_key_value("mode", execution_model_to_str(e.execution_model));
+			if (e.execution_model == ExecutionModelGLCompute)
+			{
+				const auto &spv_entry = get_entry_point(e.name, e.execution_model);
+
+				SpecializationConstant spec_x, spec_y, spec_z;
+				get_work_group_size_specialization_constants(spec_x, spec_y, spec_z);
+
+				json_stream->emit_json_key_array("workgroup_size");
+				json_stream->emit_json_array_value(spec_x.id != ID(0) ? spec_x.constant_id :
+				                                                        spv_entry.workgroup_size.x);
+				json_stream->emit_json_array_value(spec_y.id != ID(0) ? spec_y.constant_id :
+				                                                        spv_entry.workgroup_size.y);
+				json_stream->emit_json_array_value(spec_z.id != ID(0) ? spec_z.constant_id :
+				                                                        spv_entry.workgroup_size.z);
+				json_stream->end_json_array();
+
+				json_stream->emit_json_key_array("workgroup_size_is_spec_constant_id");
+				json_stream->emit_json_array_value(spec_x.id != ID(0));
+				json_stream->emit_json_array_value(spec_y.id != ID(0));
+				json_stream->emit_json_array_value(spec_z.id != ID(0));
+				json_stream->end_json_array();
+			}
 			json_stream->end_json_object();
 		}
 		json_stream->end_json_array();
@@ -468,7 +570,7 @@ void CompilerReflection::emit_resources(const char *tag, const SmallVector<Resou
 		bool is_block = get_decoration_bitset(type.self).get(DecorationBlock) ||
 		                get_decoration_bitset(type.self).get(DecorationBufferBlock);
 
-		uint32_t fallback_id = !is_push_constant && is_block ? res.base_type_id : res.id;
+		ID fallback_id = !is_push_constant && is_block ? ID(res.base_type_id) : ID(res.id);
 
 		json_stream->begin_json_object();
 
@@ -485,18 +587,18 @@ void CompilerReflection::emit_resources(const char *tag, const SmallVector<Resou
 		{
 			bool ssbo_block = type.storage == StorageClassStorageBuffer ||
 			                  (type.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
-			if (ssbo_block)
-			{
-				auto buffer_flags = get_buffer_block_flags(res.id);
-				if (buffer_flags.get(DecorationNonReadable))
-					json_stream->emit_json_key_value("writeonly", true);
-				if (buffer_flags.get(DecorationNonWritable))
-					json_stream->emit_json_key_value("readonly", true);
-				if (buffer_flags.get(DecorationRestrict))
-					json_stream->emit_json_key_value("restrict", true);
-				if (buffer_flags.get(DecorationCoherent))
-					json_stream->emit_json_key_value("coherent", true);
-			}
+			Bitset qualifier_mask = ssbo_block ? get_buffer_block_flags(res.id) : mask;
+
+			if (qualifier_mask.get(DecorationNonReadable))
+				json_stream->emit_json_key_value("writeonly", true);
+			if (qualifier_mask.get(DecorationNonWritable))
+				json_stream->emit_json_key_value("readonly", true);
+			if (qualifier_mask.get(DecorationRestrict))
+				json_stream->emit_json_key_value("restrict", true);
+			if (qualifier_mask.get(DecorationCoherent))
+				json_stream->emit_json_key_value("coherent", true);
+			if (qualifier_mask.get(DecorationVolatile))
+				json_stream->emit_json_key_value("volatile", true);
 		}
 
 		emit_type_array(type);
@@ -552,13 +654,15 @@ void CompilerReflection::emit_specialization_constants()
 		return;
 
 	json_stream->emit_json_key_array("specialization_constants");
-	for (const auto spec_const : specialization_constants)
+	for (const auto &spec_const : specialization_constants)
 	{
 		auto &c = get<SPIRConstant>(spec_const.id);
 		auto type = get<SPIRType>(c.constant_type);
 		json_stream->begin_json_object();
+		json_stream->emit_json_key_value("name", get_name(spec_const.id));
 		json_stream->emit_json_key_value("id", spec_const.constant_id);
 		json_stream->emit_json_key_value("type", type_to_glsl(type));
+		json_stream->emit_json_key_value("variable_id", spec_const.id);
 		switch (type.basetype)
 		{
 		case SPIRType::UInt:
diff --git a/spirv_reflect.hpp b/spirv_reflect.hpp
index 5a228a68376..a129ba54da5 100644
--- a/spirv_reflect.hpp
+++ b/spirv_reflect.hpp
@@ -1,5 +1,6 @@
 /*
- * Copyright 2018-2019 Bradley Austin Davis
+ * Copyright 2018-2021 Bradley Austin Davis
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +15,12 @@
  * limitations under the License.
  */
 
+/*
+ * At your option, you may choose to accept this material under either:
+ *  1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
+ *  2. The MIT License, found at <http://opensource.org/licenses/MIT>.
+ */
+
 #ifndef SPIRV_CROSS_REFLECT_HPP
 #define SPIRV_CROSS_REFLECT_HPP
 
@@ -67,11 +74,12 @@ class CompilerReflection : public CompilerGLSL
 	void emit_resources();
 	void emit_specialization_constants();
 
-	void emit_type(const SPIRType &type, bool &emitted_open_tag);
+	void emit_type(uint32_t type_id, bool &emitted_open_tag);
 	void emit_type_member(const SPIRType &type, uint32_t index);
 	void emit_type_member_qualifiers(const SPIRType &type, uint32_t index);
 	void emit_type_array(const SPIRType &type);
 	void emit_resources(const char *tag, const SmallVector<Resource> &resources);
+	bool type_is_reference(const SPIRType &type) const;
 
 	std::string to_member_name(const SPIRType &type, uint32_t index) const;
 
diff --git a/test_shaders.py b/test_shaders.py
index d2f75e80150..cf329e5e4c1 100755
--- a/test_shaders.py
+++ b/test_shaders.py
@@ -1,5 +1,20 @@
 #!/usr/bin/env python3
 
+# Copyright 2015-2021 Arm Limited
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import os.path
@@ -86,24 +101,25 @@ def get_shader_stats(shader):
 def print_msl_compiler_version():
     try:
         subprocess.check_call(['xcrun', '--sdk', 'iphoneos', 'metal', '--version'])
-        print('...are the Metal compiler characteristics.\n')   # display after so xcrun FNF is silent
+        print('... are the Metal compiler characteristics.\n')   # display after so xcrun FNF is silent
     except OSError as e:
         if (e.errno != errno.ENOENT):    # Ignore xcrun not found error
             raise
+        print('Metal SDK is not present.\n')
     except subprocess.CalledProcessError:
         pass
 
-def msl_compiler_supports_22():
+def msl_compiler_supports_version(version):
     try:
-        subprocess.check_call(['xcrun', '--sdk', 'macosx', 'metal', '-x', 'metal', '-std=macos-metal2.2', '-'],
+        subprocess.check_call(['xcrun', '--sdk', 'macosx', 'metal', '-x', 'metal', '-std=macos-metal' + version, '-'],
                 stdin = subprocess.DEVNULL, stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL)
-        print('Current SDK supports MSL 2.2. Enabling validation for MSL 2.2 shaders.')
+        print('Current SDK supports MSL {0}. Enabling validation for MSL {0} shaders.'.format(version))
         return True
     except OSError as e:
-        print('Failed to check if MSL 2.2 is not supported. It probably is not.')
+        print('Failed to check if MSL {} is not supported. It probably is not.'.format(version))
         return False
     except subprocess.CalledProcessError:
-        print('Current SDK does NOT support MSL 2.2. Disabling validation for MSL 2.2 shaders.')
+        print('Current SDK does NOT support MSL {0}. Disabling validation for MSL {0} shaders.'.format(version))
         return False
 
 def path_to_msl_standard(shader):
@@ -114,6 +130,10 @@ def path_to_msl_standard(shader):
             return '-std=ios-metal2.1'
         elif '.msl22.' in shader:
             return '-std=ios-metal2.2'
+        elif '.msl23.' in shader:
+            return '-std=ios-metal2.3'
+        elif '.msl24.' in shader:
+            return '-std=ios-metal2.4'
         elif '.msl11.' in shader:
             return '-std=ios-metal1.1'
         elif '.msl10.' in shader:
@@ -127,6 +147,10 @@ def path_to_msl_standard(shader):
             return '-std=macos-metal2.1'
         elif '.msl22.' in shader:
             return '-std=macos-metal2.2'
+        elif '.msl23.' in shader:
+            return '-std=macos-metal2.3'
+        elif '.msl24.' in shader:
+            return '-std=macos-metal2.4'
         elif '.msl11.' in shader:
             return '-std=macos-metal1.1'
         else:
@@ -139,6 +163,10 @@ def path_to_msl_standard_cli(shader):
         return '20100'
     elif '.msl22.' in shader:
         return '20200'
+    elif '.msl23.' in shader:
+        return '20300'
+    elif '.msl24.' in shader:
+        return '20400'
     elif '.msl11.' in shader:
         return '10100'
     else:
@@ -164,23 +192,42 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
     spirv_path = create_temporary()
     msl_path = create_temporary(os.path.basename(shader))
 
-    spirv_cmd = [paths.spirv_as, '--target-env', 'vulkan1.1', '-o', spirv_path, shader]
+    spirv_16 = '.spv16.' in shader
+    spirv_14 = '.spv14.' in shader
+
+    if spirv_16:
+        spirv_env = 'spv1.6'
+        glslang_env = 'spirv1.6'
+    elif spirv_14:
+        spirv_env = 'vulkan1.1spv1.4'
+        glslang_env = 'spirv1.4'
+    else:
+        spirv_env = 'vulkan1.1'
+        glslang_env = 'vulkan1.1'
+
+    spirv_cmd = [paths.spirv_as, '--target-env', spirv_env, '-o', spirv_path, shader]
     if '.preserve.' in shader:
         spirv_cmd.append('--preserve-numeric-ids')
 
     if spirv:
         subprocess.check_call(spirv_cmd)
     else:
-        subprocess.check_call([paths.glslang, '--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader])
+        subprocess.check_call([paths.glslang, '--amb' ,'--target-env', glslang_env, '-V', '-o', spirv_path, shader])
 
-    if opt:
-        subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path])
+    if opt and (not shader_is_invalid_spirv(shader)):
+        if '.graphics-robust-access.' in shader:
+            subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '--graphics-robust-access', '-o', spirv_path, spirv_path])
+        else:
+            subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path])
 
     spirv_cross_path = paths.spirv_cross
 
-    msl_args = [spirv_cross_path, '--entry', 'main', '--output', msl_path, spirv_path, '--msl', '--iterations', str(iterations)]
+    msl_args = [spirv_cross_path, '--output', msl_path, spirv_path, '--msl', '--iterations', str(iterations)]
     msl_args.append('--msl-version')
     msl_args.append(path_to_msl_standard_cli(shader))
+    if not '.nomain.' in shader:
+        msl_args.append('--entry')
+        msl_args.append('main')
     if '.swizzle.' in shader:
         msl_args.append('--msl-swizzle-texture-samples')
     if '.ios.' in shader:
@@ -195,21 +242,134 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
         msl_args.append('--msl-argument-buffers')
     if '.texture-buffer-native.' in shader:
         msl_args.append('--msl-texture-buffer-native')
+    if '.framebuffer-fetch.' in shader:
+        msl_args.append('--msl-framebuffer-fetch')
+    if '.invariant-float-math.' in shader:
+        msl_args.append('--msl-invariant-float-math')
+    if '.emulate-cube-array.' in shader:
+        msl_args.append('--msl-emulate-cube-array')
     if '.discrete.' in shader:
         # Arbitrary for testing purposes.
         msl_args.append('--msl-discrete-descriptor-set')
         msl_args.append('2')
         msl_args.append('--msl-discrete-descriptor-set')
         msl_args.append('3')
+    if '.force-active.' in shader:
+        msl_args.append('--msl-force-active-argument-buffer-resources')
     if '.line.' in shader:
         msl_args.append('--emit-line-directives')
     if '.multiview.' in shader:
         msl_args.append('--msl-multiview')
+    if '.no-layered.' in shader:
+        msl_args.append('--msl-multiview-no-layered-rendering')
+    if '.viewfromdev.' in shader:
+        msl_args.append('--msl-view-index-from-device-index')
+    if '.dispatchbase.' in shader:
+        msl_args.append('--msl-dispatch-base')
+    if '.dynamic-buffer.' in shader:
+        # Arbitrary for testing purposes.
+        msl_args.append('--msl-dynamic-buffer')
+        msl_args.append('0')
+        msl_args.append('0')
+        msl_args.append('--msl-dynamic-buffer')
+        msl_args.append('1')
+        msl_args.append('2')
+    if '.inline-block.' in shader:
+        # Arbitrary for testing purposes.
+        msl_args.append('--msl-inline-uniform-block')
+        msl_args.append('0')
+        msl_args.append('0')
+    if '.device-argument-buffer.' in shader:
+        msl_args.append('--msl-device-argument-buffer')
+        msl_args.append('0')
+        msl_args.append('--msl-device-argument-buffer')
+        msl_args.append('1')
+    if '.force-native-array.' in shader:
+        msl_args.append('--msl-force-native-arrays')
+    if '.zero-initialize.' in shader:
+        msl_args.append('--force-zero-initialized-variables')
+    if '.frag-output.' in shader:
+        # Arbitrary for testing purposes.
+        msl_args.append('--msl-disable-frag-depth-builtin')
+        msl_args.append('--msl-disable-frag-stencil-ref-builtin')
+        msl_args.append('--msl-enable-frag-output-mask')
+        msl_args.append('0x000000ca')
+    if '.no-user-varying.' in shader:
+        msl_args.append('--msl-no-clip-distance-user-varying')
+    if '.shader-inputs.' in shader:
+        # Arbitrary for testing purposes.
+        msl_args.append('--msl-shader-input')
+        msl_args.append('0')
+        msl_args.append('u8')
+        msl_args.append('2')
+        msl_args.append('--msl-shader-input')
+        msl_args.append('1')
+        msl_args.append('u16')
+        msl_args.append('3')
+        msl_args.append('--msl-shader-input')
+        msl_args.append('6')
+        msl_args.append('other')
+        msl_args.append('4')
+    if '.multi-patch.' in shader:
+        msl_args.append('--msl-multi-patch-workgroup')
+        # Arbitrary for testing purposes.
+        msl_args.append('--msl-shader-input')
+        msl_args.append('0')
+        msl_args.append('any32')
+        msl_args.append('3')
+        msl_args.append('--msl-shader-input')
+        msl_args.append('1')
+        msl_args.append('any16')
+        msl_args.append('2')
+    if '.raw-tess-in.' in shader:
+        msl_args.append('--msl-raw-buffer-tese-input')
+    if '.for-tess.' in shader:
+        msl_args.append('--msl-vertex-for-tessellation')
+    if '.fixed-sample-mask.' in shader:
+        msl_args.append('--msl-additional-fixed-sample-mask')
+        msl_args.append('0x00000022')
+    if '.arrayed-subpass.' in shader:
+        msl_args.append('--msl-arrayed-subpass-input')
+    if '.1d-as-2d.' in shader:
+        msl_args.append('--msl-texture-1d-as-2d')
+    if '.simd.' in shader:
+        msl_args.append('--msl-ios-use-simdgroup-functions')
+    if '.emulate-subgroup.' in shader:
+        msl_args.append('--msl-emulate-subgroups')
+    if '.fixed-subgroup.' in shader:
+        # Arbitrary for testing purposes.
+        msl_args.append('--msl-fixed-subgroup-size')
+        msl_args.append('32')
+    if '.force-sample.' in shader:
+        msl_args.append('--msl-force-sample-rate-shading')
+    if '.discard-checks.' in shader:
+        msl_args.append('--msl-check-discarded-frag-stores')
+    if '.decoration-binding.' in shader:
+        msl_args.append('--msl-decoration-binding')
+    if '.mask-location-0.' in shader:
+        msl_args.append('--mask-stage-output-location')
+        msl_args.append('0')
+        msl_args.append('0')
+    if '.mask-location-1.' in shader:
+        msl_args.append('--mask-stage-output-location')
+        msl_args.append('1')
+        msl_args.append('0')
+    if '.mask-position.' in shader:
+        msl_args.append('--mask-stage-output-builtin')
+        msl_args.append('Position')
+    if '.mask-point-size.' in shader:
+        msl_args.append('--mask-stage-output-builtin')
+        msl_args.append('PointSize')
+    if '.mask-clip-distance.' in shader:
+        msl_args.append('--mask-stage-output-builtin')
+        msl_args.append('ClipDistance')
+    if '.relax-nan.' in shader:
+        msl_args.append('--relax-nan-checks')
 
     subprocess.check_call(msl_args)
 
     if not shader_is_invalid_spirv(msl_path):
-        subprocess.check_call([paths.spirv_val, '--target-env', 'vulkan1.1', spirv_path])
+        subprocess.check_call([paths.spirv_val, '--allow-localsizeid', '--scalar-block-layout', '--target-env', spirv_env, spirv_path])
 
     return (spirv_path, msl_path)
 
@@ -226,6 +386,10 @@ def shader_model_hlsl(shader):
             return '-Tps_5_1'
     elif '.comp' in shader:
         return '-Tcs_5_1'
+    elif '.mesh' in shader:
+        return '-Tms_6_5'
+    elif '.task' in shader:
+        return '-Tas_6_5'
     else:
         return None
 
@@ -245,9 +409,21 @@ def shader_to_win_path(shader):
 
 ignore_fxc = False
 def validate_shader_hlsl(shader, force_no_external_validation, paths):
-    if not '.nonuniformresource' in shader:
-        # glslang HLSL does not support this, so rely on fxc to test it.
-        subprocess.check_call([paths.glslang, '-e', 'main', '-D', '--target-env', 'vulkan1.1', '-V', shader])
+    test_glslang = True
+    if '.nonuniformresource.' in shader:
+        test_glslang = False
+    if '.fxconly.' in shader:
+        test_glslang = False
+    if '.task' in shader or '.mesh' in shader:
+        test_glslang = False
+
+    hlsl_args = [paths.glslang, '--amb', '-e', 'main', '-D', '--target-env', 'vulkan1.1', '-V', shader]
+    if '.sm30.' in shader:
+        hlsl_args.append('--hlsl-dx9-compatible')
+
+    if test_glslang:
+        subprocess.check_call(hlsl_args)
+
     is_no_fxc = '.nofxc.' in shader
     global ignore_fxc
     if (not ignore_fxc) and (not force_no_external_validation) and (not is_no_fxc):
@@ -270,7 +446,9 @@ def validate_shader_hlsl(shader, force_no_external_validation, paths):
             raise RuntimeError('Failed compiling HLSL shader')
 
 def shader_to_sm(shader):
-    if '.sm60.' in shader:
+    if '.sm62.' in shader:
+        return '62'
+    elif '.sm60.' in shader:
         return '60'
     elif '.sm51.' in shader:
         return '51'
@@ -283,16 +461,29 @@ def cross_compile_hlsl(shader, spirv, opt, force_no_external_validation, iterati
     spirv_path = create_temporary()
     hlsl_path = create_temporary(os.path.basename(shader))
 
-    spirv_cmd = [paths.spirv_as, '--target-env', 'vulkan1.1', '-o', spirv_path, shader]
+    spirv_16 = '.spv16.' in shader
+    spirv_14 = '.spv14.' in shader
+
+    if spirv_16:
+        spirv_env = 'spv1.6'
+        glslang_env = 'spirv1.6'
+    elif spirv_14:
+        spirv_env = 'vulkan1.1spv1.4'
+        glslang_env = 'spirv1.4'
+    else:
+        spirv_env = 'vulkan1.1'
+        glslang_env = 'vulkan1.1'
+
+    spirv_cmd = [paths.spirv_as, '--target-env', spirv_env, '-o', spirv_path, shader]
     if '.preserve.' in shader:
         spirv_cmd.append('--preserve-numeric-ids')
 
     if spirv:
         subprocess.check_call(spirv_cmd)
     else:
-        subprocess.check_call([paths.glslang, '--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader])
+        subprocess.check_call([paths.glslang, '--amb', '--target-env', glslang_env, '-V', '-o', spirv_path, shader])
 
-    if opt:
+    if opt and (not shader_is_invalid_spirv(hlsl_path)):
         subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path])
 
     spirv_cross_path = paths.spirv_cross
@@ -302,13 +493,28 @@ def cross_compile_hlsl(shader, spirv, opt, force_no_external_validation, iterati
     hlsl_args = [spirv_cross_path, '--entry', 'main', '--output', hlsl_path, spirv_path, '--hlsl-enable-compat', '--hlsl', '--shader-model', sm, '--iterations', str(iterations)]
     if '.line.' in shader:
         hlsl_args.append('--emit-line-directives')
+    if '.flatten.' in shader:
+        hlsl_args.append('--flatten-ubo')
+    if '.force-uav.' in shader:
+        hlsl_args.append('--hlsl-force-storage-buffer-as-uav')
+    if '.zero-initialize.' in shader:
+        hlsl_args.append('--force-zero-initialized-variables')
+    if '.nonwritable-uav-texture.' in shader:
+        hlsl_args.append('--hlsl-nonwritable-uav-texture-as-srv')
+    if '.native-16bit.' in shader:
+        hlsl_args.append('--hlsl-enable-16bit-types')
+    if '.flatten-matrix-vertex-input.' in shader:
+        hlsl_args.append('--hlsl-flatten-matrix-vertex-input-semantics')
+    if '.relax-nan.' in shader:
+        hlsl_args.append('--relax-nan-checks')
+
     subprocess.check_call(hlsl_args)
 
     if not shader_is_invalid_spirv(hlsl_path):
-        subprocess.check_call([paths.spirv_val, '--target-env', 'vulkan1.1', spirv_path])
+        subprocess.check_call([paths.spirv_val, '--allow-localsizeid', '--scalar-block-layout', '--target-env', spirv_env, spirv_path])
 
     validate_shader_hlsl(hlsl_path, force_no_external_validation, paths)
-    
+
     return (spirv_path, hlsl_path)
 
 def cross_compile_reflect(shader, spirv, opt, iterations, paths):
@@ -322,9 +528,9 @@ def cross_compile_reflect(shader, spirv, opt, iterations, paths):
     if spirv:
         subprocess.check_call(spirv_cmd)
     else:
-        subprocess.check_call([paths.glslang, '--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader])
+        subprocess.check_call([paths.glslang, '--amb', '--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader])
 
-    if opt:
+    if opt and (not shader_is_invalid_spirv(reflect_path)):
         subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path])
 
     spirv_cross_path = paths.spirv_cross
@@ -335,37 +541,58 @@ def cross_compile_reflect(shader, spirv, opt, iterations, paths):
 
 def validate_shader(shader, vulkan, paths):
     if vulkan:
-        subprocess.check_call([paths.glslang, '--target-env', 'vulkan1.1', '-V', shader])
+        spirv_14 = '.spv14.' in shader
+        glslang_env = 'spirv1.4' if spirv_14 else 'vulkan1.1'
+        subprocess.check_call([paths.glslang, '--amb', '--target-env', glslang_env, '-V', shader])
     else:
         subprocess.check_call([paths.glslang, shader])
 
-def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, flatten_ubo, sso, flatten_dim, opt, push_ubo, iterations, paths):
+def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, force_es, flatten_ubo, sso, flatten_dim, opt, push_ubo, iterations, paths):
     spirv_path = create_temporary()
     glsl_path = create_temporary(os.path.basename(shader))
 
+    spirv_16 = '.spv16.' in shader
+    spirv_14 = '.spv14.' in shader
+    if spirv_16:
+        spirv_env = 'spv1.6'
+        glslang_env = 'spirv1.6'
+    elif spirv_14:
+        spirv_env = 'vulkan1.1spv1.4'
+        glslang_env = 'spirv1.4'
+    else:
+        spirv_env = 'vulkan1.1'
+        glslang_env = 'vulkan1.1'
+
     if vulkan or spirv:
         vulkan_glsl_path = create_temporary('vk' + os.path.basename(shader))
 
-    spirv_cmd = [paths.spirv_as, '--target-env', 'vulkan1.1', '-o', spirv_path, shader]
+    spirv_cmd = [paths.spirv_as, '--target-env', spirv_env, '-o', spirv_path, shader]
     if '.preserve.' in shader:
         spirv_cmd.append('--preserve-numeric-ids')
 
     if spirv:
         subprocess.check_call(spirv_cmd)
     else:
-        subprocess.check_call([paths.glslang, '--target-env', 'vulkan1.1', '-V', '-o', spirv_path, shader])
+        glslang_cmd = [paths.glslang, '--amb', '--target-env', glslang_env, '-V', '-o', spirv_path, shader]
+        if '.g.' in shader:
+            glslang_cmd.append('-g')
+        if '.gV.' in shader:
+            glslang_cmd.append('-gV')
+        subprocess.check_call(glslang_cmd)
 
     if opt and (not invalid_spirv):
         subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path])
 
     if not invalid_spirv:
-        subprocess.check_call([paths.spirv_val, '--target-env', 'vulkan1.1', spirv_path])
+        subprocess.check_call([paths.spirv_val, '--allow-localsizeid', '--scalar-block-layout', '--target-env', spirv_env, spirv_path])
 
     extra_args = ['--iterations', str(iterations)]
     if eliminate:
         extra_args += ['--remove-unused-variables']
     if is_legacy:
         extra_args += ['--version', '100', '--es']
+    if force_es:
+        extra_args += ['--version', '310', '--es']
     if flatten_ubo:
         extra_args += ['--flatten-ubo']
     if sso:
@@ -376,19 +603,37 @@ def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, fl
         extra_args += ['--glsl-emit-push-constant-as-ubo']
     if '.line.' in shader:
         extra_args += ['--emit-line-directives']
+    if '.no-samplerless.' in shader:
+        extra_args += ['--vulkan-glsl-disable-ext-samplerless-texture-functions']
+    if '.no-qualifier-deduction.' in shader:
+        extra_args += ['--disable-storage-image-qualifier-deduction']
+    if '.framebuffer-fetch.' in shader:
+        extra_args += ['--glsl-remap-ext-framebuffer-fetch', '0', '0']
+        extra_args += ['--glsl-remap-ext-framebuffer-fetch', '1', '1']
+        extra_args += ['--glsl-remap-ext-framebuffer-fetch', '2', '2']
+        extra_args += ['--glsl-remap-ext-framebuffer-fetch', '3', '3']
+    if '.framebuffer-fetch-noncoherent.' in shader:
+        extra_args += ['--glsl-ext-framebuffer-fetch-noncoherent']
+    if '.zero-initialize.' in shader:
+        extra_args += ['--force-zero-initialized-variables']
+    if '.force-flattened-io.' in shader:
+        extra_args += ['--glsl-force-flattened-io-blocks']
+    if '.relax-nan.' in shader:
+        extra_args.append('--relax-nan-checks')
 
     spirv_cross_path = paths.spirv_cross
 
     # A shader might not be possible to make valid GLSL from, skip validation for this case.
-    if not ('nocompat' in glsl_path):
+    if (not ('nocompat' in glsl_path)) or (not vulkan):
         subprocess.check_call([spirv_cross_path, '--entry', 'main', '--output', glsl_path, spirv_path] + extra_args)
-        validate_shader(glsl_path, False, paths)
+        if not 'nocompat' in glsl_path:
+            validate_shader(glsl_path, False, paths)
     else:
         remove_file(glsl_path)
         glsl_path = None
 
-    if vulkan or spirv:
-        subprocess.check_call([spirv_cross_path, '--entry', 'main', '--vulkan-semantics', '--output', vulkan_glsl_path, spirv_path] + extra_args)
+    if (vulkan or spirv) and (not is_legacy):
+        subprocess.check_call([spirv_cross_path, '--entry', 'main', '-V', '--output', vulkan_glsl_path, spirv_path] + extra_args)
         validate_shader(vulkan_glsl_path, True, paths)
         # SPIR-V shaders might just want to validate Vulkan GLSL output, we don't always care about the output.
         if not vulkan:
@@ -419,30 +664,14 @@ def reference_path(directory, relpath, opt):
     reference_dir = os.path.join(reference_dir, split_paths[1])
     return os.path.join(reference_dir, relpath)
 
-def json_ordered(obj):
-    if isinstance(obj, dict):
-        return sorted((k, json_ordered(v)) for k, v in obj.items())
-    if isinstance(obj, list):
-        return sorted(json_ordered(x) for x in obj)
-    else:
-        return obj
-    
-def json_compare(json_a, json_b):
-    return json_ordered(json_a) == json_ordered(json_b)
-
 def regression_check_reflect(shader, json_file, args):
     reference = reference_path(shader[0], shader[1], args.opt) + '.json'
     joined_path = os.path.join(shader[0], shader[1])
     print('Reference shader reflection path:', reference)
     if os.path.exists(reference):
-        actual = ''
-        expected = ''
-        with open(json_file) as f:
-            actual_json = f.read();
-            actual = json.loads(actual_json)
-        with open(reference) as f:
-            expected = json.load(f)
-        if (json_compare(actual, expected) != True):
+        actual = md5_for_file(json_file)
+        expected = md5_for_file(reference)
+        if actual != expected:
             if args.update:
                 print('Generated reflection json has changed for {}!'.format(reference))
                 # If we expect changes, update the reference file.
@@ -452,13 +681,22 @@ def regression_check_reflect(shader, json_file, args):
                 shutil.move(json_file, reference)
             else:
                 print('Generated reflection json in {} does not match reference {}!'.format(json_file, reference))
-                with open(json_file, 'r') as f:
-                    print('')
-                    print('Generated:')
-                    print('======================')
-                    print(f.read())
-                    print('======================')
-                    print('')
+                if args.diff:
+                    diff_path = generate_diff_file(reference, glsl)
+                    with open(diff_path, 'r') as f:
+                        print('')
+                        print('Diff:')
+                        print(f.read())
+                        print('')
+                    remove_file(diff_path)
+                else:
+                    with open(json_file, 'r') as f:
+                        print('')
+                        print('Generated:')
+                        print('======================')
+                        print(f.read())
+                        print('======================')
+                        print('')
 
                 # Otherwise, fail the test. Keep the shader file around so we can inspect.
                 if not args.keep:
@@ -471,7 +709,20 @@ def regression_check_reflect(shader, json_file, args):
         print('Found new shader {}. Placing generated source code in {}'.format(joined_path, reference))
         make_reference_dir(reference)
         shutil.move(json_file, reference)
-    
+
+def generate_diff_file(origin, generated):
+    diff_destination = create_temporary()
+    with open(diff_destination, "w") as f:
+        try:
+            subprocess.check_call(["diff", origin, generated], stdout=f)
+        except subprocess.CalledProcessError as e:
+            # diff returns 1 when the files are different so we can safely
+            # ignore this case.
+            if e.returncode != 1:
+                raise e
+
+    return diff_destination
+
 def regression_check(shader, glsl, args):
     reference = reference_path(shader[0], shader[1], args.opt)
     joined_path = os.path.join(shader[0], shader[1])
@@ -488,13 +739,22 @@ def regression_check(shader, glsl, args):
                 shutil.move(glsl, reference)
             else:
                 print('Generated source code in {} does not match reference {}!'.format(glsl, reference))
-                with open(glsl, 'r') as f:
-                    print('')
-                    print('Generated:')
-                    print('======================')
-                    print(f.read())
-                    print('======================')
-                    print('')
+                if args.diff:
+                    diff_path = generate_diff_file(reference, glsl)
+                    with open(diff_path, 'r') as f:
+                        print('')
+                        print('Diff:')
+                        print(f.read())
+                        print('')
+                    remove_file(diff_path)
+                else:
+                    with open(glsl, 'r') as f:
+                        print('')
+                        print('Generated:')
+                        print('======================')
+                        print(f.read())
+                        print('======================')
+                        print('')
 
                 # Otherwise, fail the test. Keep the shader file around so we can inspect.
                 if not args.keep:
@@ -525,6 +785,9 @@ def shader_is_invalid_spirv(shader):
 def shader_is_legacy(shader):
     return '.legacy.' in shader
 
+def shader_is_force_es(shader):
+    return '.es.' in shader
+
 def shader_is_flatten_ubo(shader):
     return '.flatten.' in shader
 
@@ -548,6 +811,7 @@ def test_shader(stats, shader, args, paths):
     is_spirv = shader_is_spirv(shader[1])
     invalid_spirv = shader_is_invalid_spirv(shader[1])
     is_legacy = shader_is_legacy(shader[1])
+    force_es = shader_is_force_es(shader[1])
     flatten_ubo = shader_is_flatten_ubo(shader[1])
     sso = shader_is_sso(shader[1])
     flatten_dim = shader_is_flatten_dimensions(shader[1])
@@ -555,7 +819,7 @@ def test_shader(stats, shader, args, paths):
     push_ubo = shader_is_push_ubo(shader[1])
 
     print('Testing shader:', joined_path)
-    spirv, glsl, vulkan_glsl = cross_compile(joined_path, vulkan, is_spirv, invalid_spirv, eliminate, is_legacy, flatten_ubo, sso, flatten_dim, args.opt and (not noopt), push_ubo, args.iterations, paths)
+    spirv, glsl, vulkan_glsl = cross_compile(joined_path, vulkan, is_spirv, invalid_spirv, eliminate, is_legacy, force_es, flatten_ubo, sso, flatten_dim, args.opt and (not noopt), push_ubo, args.iterations, paths)
 
     # Only test GLSL stats if we have a shader following GL semantics.
     if stats and (not vulkan) and (not is_spirv) and (not desktop):
@@ -596,7 +860,9 @@ def test_shader_msl(stats, shader, args, paths):
 #    print('SPRIV shader: ' + spirv)
 
     shader_is_msl22 = 'msl22' in joined_path
-    skip_validation = shader_is_msl22 and (not args.msl22)
+    shader_is_msl23 = 'msl23' in joined_path
+    shader_is_msl24 = 'msl24' in joined_path
+    skip_validation = (shader_is_msl22 and (not args.msl22)) or (shader_is_msl23 and (not args.msl23)) or (shader_is_msl24 and (not args.msl24))
     if '.invalid.' in joined_path:
         skip_validation = True
 
@@ -647,23 +913,24 @@ def test_shaders_helper(stats, backend, args):
             relpath = os.path.relpath(path, args.folder)
             all_files.append(relpath)
 
-    # The child processes in parallel execution mode don't have the proper state for the global args variable, so 
+    # The child processes in parallel execution mode don't have the proper state for the global args variable, so
     # at this point we need to switch to explicit arguments
     if args.parallel:
-        pool = multiprocessing.Pool(multiprocessing.cpu_count())
-
-        results = []
-        for f in all_files:
-            results.append(pool.apply_async(test_shader_file,
-                args = (f, stats, args, backend)))
-
-        for res in results:
-            error = res.get()
-            if error is not None:
-                pool.close()
-                pool.join()
-                print('Error:', error)
-                sys.exit(1)
+        with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
+            results = []
+            for f in all_files:
+                results.append(pool.apply_async(test_shader_file,
+                    args = (f, stats, args, backend)))
+
+            pool.close()
+            pool.join()
+            results_completed = [res.get() for res in results]
+
+            for error in results_completed:
+                if error is not None:
+                    print('Error:', error)
+                    sys.exit(1)
+
     else:
         for i in all_files:
             e = test_shader_file(i, stats, args, backend)
@@ -689,6 +956,9 @@ def main():
     parser.add_argument('--keep',
             action = 'store_true',
             help = 'Leave failed GLSL shaders on disk if they fail regression. Useful for debugging.')
+    parser.add_argument('--diff',
+            action = 'store_true',
+            help = 'Displays a diff instead of the generated output on failure. Useful for debugging.')
     parser.add_argument('--malisc',
             action = 'store_true',
             help = 'Use malisc offline compiler to determine static cycle counts before and after spirv-cross.')
@@ -732,7 +1002,7 @@ def main():
             default = 1,
             type = int,
             help = 'Number of iterations to run SPIRV-Cross (benchmarking)')
-    
+
     args = parser.parse_args()
     if not args.folder:
         sys.stderr.write('Need shader folder.\n')
@@ -741,16 +1011,20 @@ def main():
     if (args.parallel and (args.malisc or args.force_no_external_validation or args.update)):
         sys.stderr.write('Parallel execution is disabled when using the flags --update, --malisc or --force-no-external-validation\n')
         args.parallel = False
-        
+
     args.msl22 = False
+    args.msl23 = False
+    args.msl24 = False
     if args.msl:
         print_msl_compiler_version()
-        args.msl22 = msl_compiler_supports_22()
+        args.msl22 = msl_compiler_supports_version('2.2')
+        args.msl23 = msl_compiler_supports_version('2.3')
+        args.msl24 = msl_compiler_supports_version('2.4')
 
     backend = 'glsl'
-    if (args.msl or args.metal): 
+    if (args.msl or args.metal):
         backend = 'msl'
-    elif args.hlsl: 
+    elif args.hlsl:
         backend = 'hlsl'
     elif args.reflect:
         backend = 'reflect'
diff --git a/test_shaders.sh b/test_shaders.sh
index 4498ac3f08c..c2ab23243ce 100755
--- a/test_shaders.sh
+++ b/test_shaders.sh
@@ -1,4 +1,8 @@
 #!/bin/bash
+# Copyright 2016-2021 The Khronos Group Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+OPTS=$@
 
 if [ -z "$SPIRV_CROSS_PATH" ]; then
 	echo "Building spirv-cross"
@@ -11,14 +15,17 @@ echo "Using glslangValidation in: $(which glslangValidator)."
 echo "Using spirv-opt in: $(which spirv-opt)."
 echo "Using SPIRV-Cross in: \"$SPIRV_CROSS_PATH\"."
 
-./test_shaders.py shaders --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-no-opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-msl --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-msl --msl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-msl-no-opt --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-hlsl --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-hlsl --hlsl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-hlsl-no-opt --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-reflection --reflect --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
+./test_shaders.py shaders ${OPTS} --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
+./test_shaders.py shaders ${OPTS} --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
+./test_shaders.py shaders-no-opt ${OPTS} --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
+./test_shaders.py shaders-msl ${OPTS} --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
+./test_shaders.py shaders-msl ${OPTS} --msl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
+./test_shaders.py shaders-msl-no-opt ${OPTS} --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
+./test_shaders.py shaders-hlsl ${OPTS} --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
+./test_shaders.py shaders-hlsl ${OPTS} --hlsl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
+./test_shaders.py shaders-hlsl-no-opt ${OPTS} --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
+./test_shaders.py shaders-reflection ${OPTS} --reflect --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
+./test_shaders.py shaders-ue4 ${OPTS} --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
+./test_shaders.py shaders-ue4 ${OPTS} --msl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
+./test_shaders.py shaders-ue4-no-opt ${OPTS} --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
 
diff --git a/tests-other/hlsl_resource_binding.spv b/tests-other/hlsl_resource_binding.spv
new file mode 100644
index 00000000000..c48dc49ea00
Binary files /dev/null and b/tests-other/hlsl_resource_binding.spv differ
diff --git a/tests-other/hlsl_resource_bindings.cpp b/tests-other/hlsl_resource_bindings.cpp
new file mode 100644
index 00000000000..1a938dac333
--- /dev/null
+++ b/tests-other/hlsl_resource_bindings.cpp
@@ -0,0 +1,89 @@
+// Testbench for HLSL resource binding APIs.
+// It does not validate output at the moment, but it's useful for ad-hoc testing.
+
+#include <spirv_cross_c.h>
+#include <vector>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define SPVC_CHECKED_CALL(x) do { \
+	if ((x) != SPVC_SUCCESS) { \
+		fprintf(stderr, "Failed at line %d.\n", __LINE__); \
+		exit(1); \
+	} \
+} while(0)
+
+static std::vector<SpvId> read_file(const char *path)
+{
+	long len;
+	FILE *file = fopen(path, "rb");
+
+	if (!file)
+		return {};
+
+	fseek(file, 0, SEEK_END);
+	len = ftell(file);
+	rewind(file);
+
+	std::vector<SpvId> buffer(len / sizeof(SpvId));
+	if (fread(buffer.data(), 1, len, file) != (size_t)len)
+	{
+		fclose(file);
+		return {};
+	}
+
+	fclose(file);
+	return buffer;
+}
+
+int main(int argc, char **argv)
+{
+	if (argc != 2)
+		return EXIT_FAILURE;
+
+	auto buffer = read_file(argv[1]);
+	if (buffer.empty())
+		return EXIT_FAILURE;
+
+	spvc_context ctx;
+	spvc_parsed_ir parsed_ir;
+	spvc_compiler compiler;
+
+	SPVC_CHECKED_CALL(spvc_context_create(&ctx));
+	SPVC_CHECKED_CALL(spvc_context_parse_spirv(ctx, buffer.data(), buffer.size(), &parsed_ir));
+	SPVC_CHECKED_CALL(spvc_context_create_compiler(ctx, SPVC_BACKEND_HLSL, parsed_ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler));
+
+	spvc_compiler_options opts;
+	SPVC_CHECKED_CALL(spvc_compiler_create_compiler_options(compiler, &opts));
+	SPVC_CHECKED_CALL(spvc_compiler_options_set_uint(opts, SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, 51));
+	SPVC_CHECKED_CALL(spvc_compiler_install_compiler_options(compiler, opts));
+
+	spvc_hlsl_resource_binding binding;
+	spvc_hlsl_resource_binding_init(&binding);
+	binding.stage = SpvExecutionModelFragment;
+	binding.desc_set = 1;
+	binding.binding = 4;
+	binding.srv.register_space = 2;
+	binding.srv.register_binding = 3;
+	binding.sampler.register_space = 4;
+	binding.sampler.register_binding = 5;
+	SPVC_CHECKED_CALL(spvc_compiler_hlsl_add_resource_binding(compiler, &binding));
+
+	binding.desc_set = SPVC_HLSL_PUSH_CONSTANT_DESC_SET;
+	binding.binding = SPVC_HLSL_PUSH_CONSTANT_BINDING;
+	binding.cbv.register_space = 0;
+	binding.cbv.register_binding = 4;
+	SPVC_CHECKED_CALL(spvc_compiler_hlsl_add_resource_binding(compiler, &binding));
+
+	const char *str;
+	SPVC_CHECKED_CALL(spvc_compiler_compile(compiler, &str));
+
+	fprintf(stderr, "Output:\n%s\n", str);
+
+	if (!spvc_compiler_hlsl_is_resource_used(compiler, SpvExecutionModelFragment, 1, 4))
+		return EXIT_FAILURE;
+
+	if (!spvc_compiler_hlsl_is_resource_used(compiler, SpvExecutionModelFragment, SPVC_HLSL_PUSH_CONSTANT_DESC_SET, SPVC_HLSL_PUSH_CONSTANT_BINDING))
+		return EXIT_FAILURE;
+}
+
diff --git a/tests-other/msl_ycbcr_conversion_test.cpp b/tests-other/msl_ycbcr_conversion_test.cpp
new file mode 100644
index 00000000000..deab27bec20
--- /dev/null
+++ b/tests-other/msl_ycbcr_conversion_test.cpp
@@ -0,0 +1,103 @@
+// Testbench for MSL constexpr samplers, with Y'CbCr conversion.
+// It does not validate output, but it's useful for ad-hoc testing.
+
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include <spirv_cross_c.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <vector>
+
+#define SPVC_CHECKED_CALL(x) do { \
+	if ((x) != SPVC_SUCCESS) { \
+		fprintf(stderr, "Failed at line %d.\n", __LINE__); \
+		exit(1); \
+	} \
+} while(0)
+#define SPVC_CHECKED_CALL_NEGATIVE(x) do { \
+	g_fail_on_error = SPVC_FALSE; \
+	if ((x) == SPVC_SUCCESS) { \
+		fprintf(stderr, "Failed at line %d.\n", __LINE__); \
+		exit(1); \
+	} \
+	g_fail_on_error = SPVC_TRUE; \
+} while(0)
+
+static std::vector<SpvId> read_file(const char *path)
+{
+	long len;
+	FILE *file = fopen(path, "rb");
+
+	if (!file)
+		return {};
+
+	fseek(file, 0, SEEK_END);
+	len = ftell(file);
+	rewind(file);
+
+	std::vector<SpvId> buffer(len / sizeof(SpvId));
+	if (fread(buffer.data(), 1, len, file) != (size_t)len)
+	{
+		fclose(file);
+		return {};
+	}
+
+	fclose(file);
+	return buffer;
+}
+
+int main(int argc, char **argv)
+{
+	if (argc != 2)
+		return EXIT_FAILURE;
+
+	auto buffer = read_file(argv[1]);
+	if (buffer.empty())
+		return EXIT_FAILURE;
+
+	spvc_context ctx;
+	spvc_parsed_ir parsed_ir;
+	spvc_compiler compiler;
+	spvc_compiler_options options;
+
+	SPVC_CHECKED_CALL(spvc_context_create(&ctx));
+	SPVC_CHECKED_CALL(spvc_context_parse_spirv(ctx, buffer.data(), buffer.size(), &parsed_ir));
+	SPVC_CHECKED_CALL(spvc_context_create_compiler(ctx, SPVC_BACKEND_MSL, parsed_ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler));
+	SPVC_CHECKED_CALL(spvc_compiler_create_compiler_options(compiler, &options));
+	SPVC_CHECKED_CALL(spvc_compiler_options_set_uint(options, SPVC_COMPILER_OPTION_MSL_VERSION, SPVC_MAKE_MSL_VERSION(2, 0, 0)));
+	SPVC_CHECKED_CALL(spvc_compiler_install_compiler_options(compiler, options));
+
+	spvc_msl_resource_binding binding;
+	spvc_msl_resource_binding_init(&binding);
+	binding.desc_set = 1;
+	binding.binding = 2;
+	binding.stage = SpvExecutionModelFragment;
+	binding.msl_texture = 0;
+	binding.msl_sampler = 0;
+	SPVC_CHECKED_CALL(spvc_compiler_msl_add_resource_binding(compiler, &binding));
+
+	spvc_msl_constexpr_sampler samp;
+	spvc_msl_sampler_ycbcr_conversion conv;
+	spvc_msl_constexpr_sampler_init(&samp);
+	spvc_msl_sampler_ycbcr_conversion_init(&conv);
+	conv.planes = 3;
+	conv.resolution = SPVC_MSL_FORMAT_RESOLUTION_422;
+	conv.chroma_filter = SPVC_MSL_SAMPLER_FILTER_LINEAR;
+	conv.x_chroma_offset = SPVC_MSL_CHROMA_LOCATION_MIDPOINT;
+	conv.ycbcr_model = SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020;
+	conv.ycbcr_range = SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW;
+	conv.bpc = 8;
+	SPVC_CHECKED_CALL(spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(compiler, 1, 2, &samp, &conv));
+
+	const char *str;
+	SPVC_CHECKED_CALL(spvc_compiler_compile(compiler, &str));
+
+	// Should be marked, as a sanity check.
+	if (!spvc_compiler_msl_is_resource_used(compiler, SpvExecutionModelFragment, 1, 2))
+		return EXIT_FAILURE;
+
+	fprintf(stderr, "Output:\n%s\n", str);
+}
+
diff --git a/tests-other/msl_ycbcr_conversion_test.spv b/tests-other/msl_ycbcr_conversion_test.spv
new file mode 100644
index 00000000000..62372d5c652
Binary files /dev/null and b/tests-other/msl_ycbcr_conversion_test.spv differ
diff --git a/tests-other/msl_ycbcr_conversion_test_2.spv b/tests-other/msl_ycbcr_conversion_test_2.spv
new file mode 100644
index 00000000000..10fa7690d0d
Binary files /dev/null and b/tests-other/msl_ycbcr_conversion_test_2.spv differ
diff --git a/tests-other/small_vector.cpp b/tests-other/small_vector.cpp
index 7b03d85c603..e9a3bb0bee4 100644
--- a/tests-other/small_vector.cpp
+++ b/tests-other/small_vector.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2019 Hans-Kristian Arntzen
+ * Copyright 2019-2021 Hans-Kristian Arntzen
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/tests-other/typed_id_test.cpp b/tests-other/typed_id_test.cpp
new file mode 100644
index 00000000000..e8ecb16cc6d
--- /dev/null
+++ b/tests-other/typed_id_test.cpp
@@ -0,0 +1,49 @@
+#include "spirv_common.hpp"
+
+using namespace SPIRV_CROSS_NAMESPACE;
+
+int main()
+{
+	// Construct from uint32_t.
+	VariableID var_id = 10;
+	TypeID type_id = 20;
+	ConstantID constant_id = 30;
+
+	// Assign from uint32_t.
+	var_id = 100;
+	type_id = 40;
+	constant_id = 60;
+
+	// Construct generic ID.
+	ID generic_var_id = var_id;
+	ID generic_type_id = type_id;
+	ID generic_constant_id = constant_id;
+
+	// Assign generic id.
+	generic_var_id = var_id;
+	generic_type_id = type_id;
+	generic_constant_id = constant_id;
+
+	// Assign generic ID to typed ID
+	var_id = generic_var_id;
+	type_id = generic_type_id;
+	constant_id = generic_constant_id;
+
+	// Implicit conversion to uint32_t.
+	uint32_t a;
+	a = var_id;
+	a = type_id;
+	a = constant_id;
+	a = generic_var_id;
+	a = generic_type_id;
+	a = generic_constant_id;
+
+	// Copy assignment.
+	var_id = VariableID(10);
+	type_id = TypeID(10);
+	constant_id = ConstantID(10);
+
+	// These operations are blocked, assign or construction from mismatched types.
+	//var_id = type_id;
+	//var_id = TypeID(100);
+}
\ No newline at end of file
diff --git a/update_test_shaders.sh b/update_test_shaders.sh
index c33afc5caaa..85f9f0b7a1d 100755
--- a/update_test_shaders.sh
+++ b/update_test_shaders.sh
@@ -1,25 +1,6 @@
 #!/bin/bash
+# Copyright 2016-2021 The Khronos Group Inc.
+# SPDX-License-Identifier: Apache-2.0
 
-if [ -z "$SPIRV_CROSS_PATH" ]; then
-	echo "Building spirv-cross"
-	make -j$(nproc)
-	SPIRV_CROSS_PATH="./spirv-cross"
-fi
-
-export PATH="./external/glslang-build/output/bin:./external/spirv-tools-build/output/bin:.:$PATH"
-echo "Using glslangValidation in: $(which glslangValidator)."
-echo "Using spirv-opt in: $(which spirv-opt)."
-echo "Using SPIRV-Cross in: \"$SPIRV_CROSS_PATH\"."
-
-./test_shaders.py shaders --update --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders --update --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-no-opt --update --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-msl --update --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-msl --update --msl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-msl-no-opt --update --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-hlsl --update --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-hlsl --update --hlsl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-hlsl-no-opt --update --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-./test_shaders.py shaders-reflection --reflect --update --spirv-cross "$SPIRV_CROSS_PATH" || exit 1
-
+./test_shaders.sh --update